1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #define pr_fmt(fmt) "habanalabs: " fmt 9 10 #include <uapi/drm/habanalabs_accel.h> 11 #include "habanalabs.h" 12 13 #include <linux/pci.h> 14 #include <linux/hwmon.h> 15 #include <linux/vmalloc.h> 16 17 #include <trace/events/habanalabs.h> 18 19 #define HL_RESET_DELAY_USEC 10000 /* 10ms */ 20 21 #define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 5 22 23 enum dma_alloc_type { 24 DMA_ALLOC_COHERENT, 25 DMA_ALLOC_POOL, 26 }; 27 28 #define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788 29 30 /* 31 * hl_set_dram_bar- sets the bar to allow later access to address 32 * 33 * @hdev: pointer to habanalabs device structure. 34 * @addr: the address the caller wants to access. 35 * @region: the PCI region. 36 * @new_bar_region_base: the new BAR region base address. 37 * 38 * @return: the old BAR base address on success, U64_MAX for failure. 39 * The caller should set it back to the old address after use. 40 * 41 * In case the bar space does not cover the whole address space, 42 * the bar base address should be set to allow access to a given address. 43 * This function can be called also if the bar doesn't need to be set, 44 * in that case it just won't change the base. 45 */ 46 static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region, 47 u64 *new_bar_region_base) 48 { 49 struct asic_fixed_properties *prop = &hdev->asic_prop; 50 u64 bar_base_addr, old_base; 51 52 if (is_power_of_2(prop->dram_pci_bar_size)) 53 bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); 54 else 55 bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) * 56 prop->dram_pci_bar_size; 57 58 old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); 59 60 /* in case of success we need to update the new BAR base */ 61 if ((old_base != U64_MAX) && new_bar_region_base) 62 *new_bar_region_base = bar_base_addr; 63 64 return old_base; 65 } 66 67 int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, 68 enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar) 69 { 70 struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; 71 u64 old_base = 0, rc, bar_region_base = region->region_base; 72 void __iomem *acc_addr; 73 74 if (set_dram_bar) { 75 old_base = hl_set_dram_bar(hdev, addr, region, &bar_region_base); 76 if (old_base == U64_MAX) 77 return -EIO; 78 } 79 80 acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar + 81 (addr - bar_region_base); 82 83 switch (acc_type) { 84 case DEBUGFS_READ8: 85 *val = readb(acc_addr); 86 break; 87 case DEBUGFS_WRITE8: 88 writeb(*val, acc_addr); 89 break; 90 case DEBUGFS_READ32: 91 *val = readl(acc_addr); 92 break; 93 case DEBUGFS_WRITE32: 94 writel(*val, acc_addr); 95 break; 96 case DEBUGFS_READ64: 97 *val = readq(acc_addr); 98 break; 99 case DEBUGFS_WRITE64: 100 writeq(*val, acc_addr); 101 break; 102 } 103 104 if (set_dram_bar) { 105 rc = hl_set_dram_bar(hdev, old_base, region, NULL); 106 if (rc == U64_MAX) 107 return -EIO; 108 } 109 110 return 0; 111 } 112 113 static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 114 gfp_t flag, enum dma_alloc_type alloc_type, 115 const char *caller) 116 { 117 void *ptr = NULL; 118 119 switch (alloc_type) { 120 case DMA_ALLOC_COHERENT: 121 ptr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, size, dma_handle, flag); 122 break; 123 case DMA_ALLOC_POOL: 124 ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, size, flag, dma_handle); 125 break; 126 } 127 128 if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr)) 129 trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size, 130 caller); 131 132 return ptr; 133 } 134 135 static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr, 136 dma_addr_t dma_handle, enum dma_alloc_type alloc_type, 137 const char *caller) 138 { 139 /* this is needed to avoid warning on using freed pointer */ 140 u64 store_cpu_addr = (u64) (uintptr_t) cpu_addr; 141 142 switch (alloc_type) { 143 case DMA_ALLOC_COHERENT: 144 hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle); 145 break; 146 case DMA_ALLOC_POOL: 147 hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle); 148 break; 149 } 150 151 trace_habanalabs_dma_free(hdev->dev, store_cpu_addr, dma_handle, size, caller); 152 } 153 154 void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 155 gfp_t flag, const char *caller) 156 { 157 return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller); 158 } 159 160 void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, 161 dma_addr_t dma_handle, const char *caller) 162 { 163 hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller); 164 } 165 166 void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, 167 dma_addr_t *dma_handle, const char *caller) 168 { 169 return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller); 170 } 171 172 void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, 173 const char *caller) 174 { 175 hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller); 176 } 177 178 void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle) 179 { 180 return hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 181 } 182 183 void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) 184 { 185 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr); 186 } 187 188 int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) 189 { 190 struct asic_fixed_properties *prop = &hdev->asic_prop; 191 struct scatterlist *sg; 192 int rc, i; 193 194 rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0); 195 if (rc) 196 return rc; 197 198 /* Shift to the device's base physical address of host memory if necessary */ 199 if (prop->device_dma_offset_for_host_access) 200 for_each_sgtable_dma_sg(sgt, sg, i) 201 sg->dma_address += prop->device_dma_offset_for_host_access; 202 203 return 0; 204 } 205 206 void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) 207 { 208 struct asic_fixed_properties *prop = &hdev->asic_prop; 209 struct scatterlist *sg; 210 int i; 211 212 /* Cancel the device's base physical address of host memory if necessary */ 213 if (prop->device_dma_offset_for_host_access) 214 for_each_sgtable_dma_sg(sgt, sg, i) 215 sg->dma_address -= prop->device_dma_offset_for_host_access; 216 217 dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0); 218 } 219 220 /* 221 * hl_access_cfg_region - access the config region 222 * 223 * @hdev: pointer to habanalabs device structure 224 * @addr: the address to access 225 * @val: the value to write from or read to 226 * @acc_type: the type of access (read/write 64/32) 227 */ 228 int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, 229 enum debugfs_access_type acc_type) 230 { 231 struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG]; 232 u32 val_h, val_l; 233 234 if (!IS_ALIGNED(addr, sizeof(u32))) { 235 dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32)); 236 return -EINVAL; 237 } 238 239 switch (acc_type) { 240 case DEBUGFS_READ32: 241 *val = RREG32(addr - cfg_region->region_base); 242 break; 243 case DEBUGFS_WRITE32: 244 WREG32(addr - cfg_region->region_base, *val); 245 break; 246 case DEBUGFS_READ64: 247 val_l = RREG32(addr - cfg_region->region_base); 248 val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base); 249 250 *val = (((u64) val_h) << 32) | val_l; 251 break; 252 case DEBUGFS_WRITE64: 253 WREG32(addr - cfg_region->region_base, lower_32_bits(*val)); 254 WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val)); 255 break; 256 default: 257 dev_err(hdev->dev, "access type %d is not supported\n", acc_type); 258 return -EOPNOTSUPP; 259 } 260 261 return 0; 262 } 263 264 /* 265 * hl_access_dev_mem - access device memory 266 * 267 * @hdev: pointer to habanalabs device structure 268 * @region_type: the type of the region the address belongs to 269 * @addr: the address to access 270 * @val: the value to write from or read to 271 * @acc_type: the type of access (r/w, 32/64) 272 */ 273 int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, 274 u64 addr, u64 *val, enum debugfs_access_type acc_type) 275 { 276 switch (region_type) { 277 case PCI_REGION_CFG: 278 return hl_access_cfg_region(hdev, addr, val, acc_type); 279 case PCI_REGION_SRAM: 280 case PCI_REGION_DRAM: 281 return hl_access_sram_dram_region(hdev, addr, val, acc_type, 282 region_type, (region_type == PCI_REGION_DRAM)); 283 default: 284 return -EFAULT; 285 } 286 287 return 0; 288 } 289 290 void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...) 291 { 292 va_list args; 293 int str_size; 294 295 va_start(args, fmt); 296 /* Calculate formatted string length. Assuming each string is null terminated, hence 297 * increment result by 1 298 */ 299 str_size = vsnprintf(NULL, 0, fmt, args) + 1; 300 va_end(args); 301 302 if ((e->actual_size + str_size) < e->allocated_buf_size) { 303 va_start(args, fmt); 304 vsnprintf(e->buf + e->actual_size, str_size, fmt, args); 305 va_end(args); 306 } 307 308 /* Need to update the size even when not updating destination buffer to get the exact size 309 * of all input strings 310 */ 311 e->actual_size += str_size; 312 } 313 314 enum hl_device_status hl_device_status(struct hl_device *hdev) 315 { 316 enum hl_device_status status; 317 318 if (hdev->reset_info.in_reset) { 319 if (hdev->reset_info.in_compute_reset) 320 status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE; 321 else 322 status = HL_DEVICE_STATUS_IN_RESET; 323 } else if (hdev->reset_info.needs_reset) { 324 status = HL_DEVICE_STATUS_NEEDS_RESET; 325 } else if (hdev->disabled) { 326 status = HL_DEVICE_STATUS_MALFUNCTION; 327 } else if (!hdev->init_done) { 328 status = HL_DEVICE_STATUS_IN_DEVICE_CREATION; 329 } else { 330 status = HL_DEVICE_STATUS_OPERATIONAL; 331 } 332 333 return status; 334 } 335 336 bool hl_device_operational(struct hl_device *hdev, 337 enum hl_device_status *status) 338 { 339 enum hl_device_status current_status; 340 341 current_status = hl_device_status(hdev); 342 if (status) 343 *status = current_status; 344 345 switch (current_status) { 346 case HL_DEVICE_STATUS_IN_RESET: 347 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: 348 case HL_DEVICE_STATUS_MALFUNCTION: 349 case HL_DEVICE_STATUS_NEEDS_RESET: 350 return false; 351 case HL_DEVICE_STATUS_OPERATIONAL: 352 case HL_DEVICE_STATUS_IN_DEVICE_CREATION: 353 default: 354 return true; 355 } 356 } 357 358 bool hl_ctrl_device_operational(struct hl_device *hdev, 359 enum hl_device_status *status) 360 { 361 enum hl_device_status current_status; 362 363 current_status = hl_device_status(hdev); 364 if (status) 365 *status = current_status; 366 367 switch (current_status) { 368 case HL_DEVICE_STATUS_MALFUNCTION: 369 return false; 370 case HL_DEVICE_STATUS_IN_RESET: 371 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: 372 case HL_DEVICE_STATUS_NEEDS_RESET: 373 case HL_DEVICE_STATUS_OPERATIONAL: 374 case HL_DEVICE_STATUS_IN_DEVICE_CREATION: 375 default: 376 return true; 377 } 378 } 379 380 static void print_idle_status_mask(struct hl_device *hdev, const char *message, 381 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE]) 382 { 383 if (idle_mask[3]) 384 dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx_%016llx)\n", 385 message, idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]); 386 else if (idle_mask[2]) 387 dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx)\n", 388 message, idle_mask[2], idle_mask[1], idle_mask[0]); 389 else if (idle_mask[1]) 390 dev_err(hdev->dev, "%s (mask %#llx_%016llx)\n", 391 message, idle_mask[1], idle_mask[0]); 392 else 393 dev_err(hdev->dev, "%s (mask %#llx)\n", message, idle_mask[0]); 394 } 395 396 static void hpriv_release(struct kref *ref) 397 { 398 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; 399 bool reset_device, device_is_idle = true; 400 struct hl_fpriv *hpriv; 401 struct hl_device *hdev; 402 403 hpriv = container_of(ref, struct hl_fpriv, refcount); 404 405 hdev = hpriv->hdev; 406 407 hdev->asic_funcs->send_device_activity(hdev, false); 408 409 put_pid(hpriv->taskpid); 410 411 hl_debugfs_remove_file(hpriv); 412 413 mutex_destroy(&hpriv->ctx_lock); 414 mutex_destroy(&hpriv->restore_phase_mutex); 415 416 /* Device should be reset if reset-upon-device-release is enabled, or if there is a pending 417 * reset that waits for device release. 418 */ 419 reset_device = hdev->reset_upon_device_release || hdev->reset_info.watchdog_active; 420 421 /* Check the device idle status and reset if not idle. 422 * Skip it if already in reset, or if device is going to be reset in any case. 423 */ 424 if (!hdev->reset_info.in_reset && !reset_device && hdev->pdev && !hdev->pldm) 425 device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask, 426 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); 427 if (!device_is_idle) { 428 print_idle_status_mask(hdev, "device is not idle after user context is closed", 429 idle_mask); 430 reset_device = true; 431 } 432 433 /* We need to remove the user from the list to make sure the reset process won't 434 * try to kill the user process. Because, if we got here, it means there are no 435 * more driver/device resources that the user process is occupying so there is 436 * no need to kill it 437 * 438 * However, we can't set the compute_ctx to NULL at this stage. This is to prevent 439 * a race between the release and opening the device again. We don't want to let 440 * a user open the device while there a reset is about to happen. 441 */ 442 mutex_lock(&hdev->fpriv_list_lock); 443 list_del(&hpriv->dev_node); 444 mutex_unlock(&hdev->fpriv_list_lock); 445 446 if (reset_device) { 447 hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE); 448 } else { 449 /* Scrubbing is handled within hl_device_reset(), so here need to do it directly */ 450 int rc = hdev->asic_funcs->scrub_device_mem(hdev); 451 452 if (rc) 453 dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc); 454 } 455 456 /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different 457 * thread, we don't care because the in_reset is marked so if a user will try to open 458 * the device it will fail on that, even if compute_ctx is false. 459 */ 460 mutex_lock(&hdev->fpriv_list_lock); 461 hdev->is_compute_ctx_active = false; 462 mutex_unlock(&hdev->fpriv_list_lock); 463 464 hdev->compute_ctx_in_release = 0; 465 466 /* release the eventfd */ 467 if (hpriv->notifier_event.eventfd) 468 eventfd_ctx_put(hpriv->notifier_event.eventfd); 469 470 mutex_destroy(&hpriv->notifier_event.lock); 471 472 kfree(hpriv); 473 } 474 475 void hl_hpriv_get(struct hl_fpriv *hpriv) 476 { 477 kref_get(&hpriv->refcount); 478 } 479 480 int hl_hpriv_put(struct hl_fpriv *hpriv) 481 { 482 return kref_put(&hpriv->refcount, hpriv_release); 483 } 484 485 static void compose_device_in_use_info(char **buf, size_t *buf_size, const char *fmt, ...) 486 { 487 struct va_format vaf; 488 va_list args; 489 int size; 490 491 va_start(args, fmt); 492 vaf.fmt = fmt; 493 vaf.va = &args; 494 495 size = snprintf(*buf, *buf_size, "%pV", &vaf); 496 if (size >= *buf_size) 497 size = *buf_size; 498 499 *buf += size; 500 *buf_size -= size; 501 502 va_end(args); 503 } 504 505 static void print_device_in_use_info(struct hl_device *hdev, const char *message) 506 { 507 u32 active_cs_num, dmabuf_export_cnt; 508 char buf[64], *buf_ptr = buf; 509 size_t buf_size = sizeof(buf); 510 bool unknown_reason = true; 511 512 active_cs_num = hl_get_active_cs_num(hdev); 513 if (active_cs_num) { 514 unknown_reason = false; 515 compose_device_in_use_info(&buf_ptr, &buf_size, " [%u active CS]", active_cs_num); 516 } 517 518 dmabuf_export_cnt = atomic_read(&hdev->dmabuf_export_cnt); 519 if (dmabuf_export_cnt) { 520 unknown_reason = false; 521 compose_device_in_use_info(&buf_ptr, &buf_size, " [%u exported dma-buf]", 522 dmabuf_export_cnt); 523 } 524 525 if (unknown_reason) 526 compose_device_in_use_info(&buf_ptr, &buf_size, " [unknown reason]"); 527 528 dev_notice(hdev->dev, "%s%s\n", message, buf); 529 } 530 531 /* 532 * hl_device_release - release function for habanalabs device 533 * 534 * @inode: pointer to inode structure 535 * @filp: pointer to file structure 536 * 537 * Called when process closes an habanalabs device 538 */ 539 static int hl_device_release(struct inode *inode, struct file *filp) 540 { 541 struct hl_fpriv *hpriv = filp->private_data; 542 struct hl_device *hdev = hpriv->hdev; 543 544 filp->private_data = NULL; 545 546 if (!hdev) { 547 pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); 548 put_pid(hpriv->taskpid); 549 return 0; 550 } 551 552 hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); 553 hl_mem_mgr_fini(&hpriv->mem_mgr); 554 555 hdev->compute_ctx_in_release = 1; 556 557 if (!hl_hpriv_put(hpriv)) { 558 print_device_in_use_info(hdev, "User process closed FD but device still in use"); 559 hl_device_reset(hdev, HL_DRV_RESET_HARD); 560 } 561 562 hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; 563 564 return 0; 565 } 566 567 static int hl_device_release_ctrl(struct inode *inode, struct file *filp) 568 { 569 struct hl_fpriv *hpriv = filp->private_data; 570 struct hl_device *hdev = hpriv->hdev; 571 572 filp->private_data = NULL; 573 574 if (!hdev) { 575 pr_err("Closing FD after device was removed\n"); 576 goto out; 577 } 578 579 mutex_lock(&hdev->fpriv_ctrl_list_lock); 580 list_del(&hpriv->dev_node); 581 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 582 out: 583 /* release the eventfd */ 584 if (hpriv->notifier_event.eventfd) 585 eventfd_ctx_put(hpriv->notifier_event.eventfd); 586 587 mutex_destroy(&hpriv->notifier_event.lock); 588 put_pid(hpriv->taskpid); 589 590 kfree(hpriv); 591 592 return 0; 593 } 594 595 /* 596 * hl_mmap - mmap function for habanalabs device 597 * 598 * @*filp: pointer to file structure 599 * @*vma: pointer to vm_area_struct of the process 600 * 601 * Called when process does an mmap on habanalabs device. Call the relevant mmap 602 * function at the end of the common code. 603 */ 604 static int hl_mmap(struct file *filp, struct vm_area_struct *vma) 605 { 606 struct hl_fpriv *hpriv = filp->private_data; 607 struct hl_device *hdev = hpriv->hdev; 608 unsigned long vm_pgoff; 609 610 if (!hdev) { 611 pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n"); 612 return -ENODEV; 613 } 614 615 vm_pgoff = vma->vm_pgoff; 616 617 switch (vm_pgoff & HL_MMAP_TYPE_MASK) { 618 case HL_MMAP_TYPE_BLOCK: 619 vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); 620 return hl_hw_block_mmap(hpriv, vma); 621 622 case HL_MMAP_TYPE_CB: 623 case HL_MMAP_TYPE_TS_BUFF: 624 return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL); 625 } 626 return -EINVAL; 627 } 628 629 static const struct file_operations hl_ops = { 630 .owner = THIS_MODULE, 631 .open = hl_device_open, 632 .release = hl_device_release, 633 .mmap = hl_mmap, 634 .unlocked_ioctl = hl_ioctl, 635 .compat_ioctl = hl_ioctl 636 }; 637 638 static const struct file_operations hl_ctrl_ops = { 639 .owner = THIS_MODULE, 640 .open = hl_device_open_ctrl, 641 .release = hl_device_release_ctrl, 642 .unlocked_ioctl = hl_ioctl_control, 643 .compat_ioctl = hl_ioctl_control 644 }; 645 646 static void device_release_func(struct device *dev) 647 { 648 kfree(dev); 649 } 650 651 /* 652 * device_init_cdev - Initialize cdev and device for habanalabs device 653 * 654 * @hdev: pointer to habanalabs device structure 655 * @class: pointer to the class object of the device 656 * @minor: minor number of the specific device 657 * @fpos: file operations to install for this device 658 * @name: name of the device as it will appear in the filesystem 659 * @cdev: pointer to the char device object that will be initialized 660 * @dev: pointer to the device object that will be initialized 661 * 662 * Initialize a cdev and a Linux device for habanalabs's device. 663 */ 664 static int device_init_cdev(struct hl_device *hdev, struct class *class, 665 int minor, const struct file_operations *fops, 666 char *name, struct cdev *cdev, 667 struct device **dev) 668 { 669 cdev_init(cdev, fops); 670 cdev->owner = THIS_MODULE; 671 672 *dev = kzalloc(sizeof(**dev), GFP_KERNEL); 673 if (!*dev) 674 return -ENOMEM; 675 676 device_initialize(*dev); 677 (*dev)->devt = MKDEV(hdev->major, minor); 678 (*dev)->class = class; 679 (*dev)->release = device_release_func; 680 dev_set_drvdata(*dev, hdev); 681 dev_set_name(*dev, "%s", name); 682 683 return 0; 684 } 685 686 static int device_cdev_sysfs_add(struct hl_device *hdev) 687 { 688 int rc; 689 690 rc = cdev_device_add(&hdev->cdev, hdev->dev); 691 if (rc) { 692 dev_err(hdev->dev, 693 "failed to add a char device to the system\n"); 694 return rc; 695 } 696 697 rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); 698 if (rc) { 699 dev_err(hdev->dev, 700 "failed to add a control char device to the system\n"); 701 goto delete_cdev_device; 702 } 703 704 /* hl_sysfs_init() must be done after adding the device to the system */ 705 rc = hl_sysfs_init(hdev); 706 if (rc) { 707 dev_err(hdev->dev, "failed to initialize sysfs\n"); 708 goto delete_ctrl_cdev_device; 709 } 710 711 hdev->cdev_sysfs_created = true; 712 713 return 0; 714 715 delete_ctrl_cdev_device: 716 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); 717 delete_cdev_device: 718 cdev_device_del(&hdev->cdev, hdev->dev); 719 return rc; 720 } 721 722 static void device_cdev_sysfs_del(struct hl_device *hdev) 723 { 724 if (!hdev->cdev_sysfs_created) 725 goto put_devices; 726 727 hl_sysfs_fini(hdev); 728 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); 729 cdev_device_del(&hdev->cdev, hdev->dev); 730 731 put_devices: 732 put_device(hdev->dev); 733 put_device(hdev->dev_ctrl); 734 } 735 736 static void device_hard_reset_pending(struct work_struct *work) 737 { 738 struct hl_device_reset_work *device_reset_work = 739 container_of(work, struct hl_device_reset_work, reset_work.work); 740 struct hl_device *hdev = device_reset_work->hdev; 741 u32 flags; 742 int rc; 743 744 flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR; 745 746 rc = hl_device_reset(hdev, flags); 747 748 if ((rc == -EBUSY) && !hdev->device_fini_pending) { 749 struct hl_ctx *ctx = hl_get_compute_ctx(hdev); 750 751 if (ctx) { 752 /* The read refcount value should subtracted by one, because the read is 753 * protected with hl_get_compute_ctx(). 754 */ 755 dev_info(hdev->dev, 756 "Could not reset device (compute_ctx refcount %u). will try again in %u seconds", 757 kref_read(&ctx->refcount) - 1, HL_PENDING_RESET_PER_SEC); 758 hl_ctx_put(ctx); 759 } else { 760 dev_info(hdev->dev, "Could not reset device. will try again in %u seconds", 761 HL_PENDING_RESET_PER_SEC); 762 } 763 764 queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work, 765 msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); 766 } 767 } 768 769 static void device_release_watchdog_func(struct work_struct *work) 770 { 771 struct hl_device_reset_work *watchdog_work = 772 container_of(work, struct hl_device_reset_work, reset_work.work); 773 struct hl_device *hdev = watchdog_work->hdev; 774 u32 flags; 775 776 dev_dbg(hdev->dev, "Device wasn't released in time. Initiate hard-reset.\n"); 777 778 flags = watchdog_work->flags | HL_DRV_RESET_HARD | HL_DRV_RESET_FROM_WD_THR; 779 780 hl_device_reset(hdev, flags); 781 } 782 783 /* 784 * device_early_init - do some early initialization for the habanalabs device 785 * 786 * @hdev: pointer to habanalabs device structure 787 * 788 * Install the relevant function pointers and call the early_init function, 789 * if such a function exists 790 */ 791 static int device_early_init(struct hl_device *hdev) 792 { 793 int i, rc; 794 char workq_name[32]; 795 796 switch (hdev->asic_type) { 797 case ASIC_GOYA: 798 goya_set_asic_funcs(hdev); 799 strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); 800 break; 801 case ASIC_GAUDI: 802 gaudi_set_asic_funcs(hdev); 803 strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name)); 804 break; 805 case ASIC_GAUDI_SEC: 806 gaudi_set_asic_funcs(hdev); 807 strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name)); 808 break; 809 case ASIC_GAUDI2: 810 gaudi2_set_asic_funcs(hdev); 811 strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name)); 812 break; 813 case ASIC_GAUDI2B: 814 gaudi2_set_asic_funcs(hdev); 815 strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name)); 816 break; 817 break; 818 default: 819 dev_err(hdev->dev, "Unrecognized ASIC type %d\n", 820 hdev->asic_type); 821 return -EINVAL; 822 } 823 824 rc = hdev->asic_funcs->early_init(hdev); 825 if (rc) 826 return rc; 827 828 rc = hl_asid_init(hdev); 829 if (rc) 830 goto early_fini; 831 832 if (hdev->asic_prop.completion_queues_count) { 833 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, 834 sizeof(struct workqueue_struct *), 835 GFP_KERNEL); 836 if (!hdev->cq_wq) { 837 rc = -ENOMEM; 838 goto asid_fini; 839 } 840 } 841 842 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { 843 snprintf(workq_name, 32, "hl%u-free-jobs-%u", hdev->cdev_idx, (u32) i); 844 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); 845 if (hdev->cq_wq[i] == NULL) { 846 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); 847 rc = -ENOMEM; 848 goto free_cq_wq; 849 } 850 } 851 852 snprintf(workq_name, 32, "hl%u-events", hdev->cdev_idx); 853 hdev->eq_wq = create_singlethread_workqueue(workq_name); 854 if (hdev->eq_wq == NULL) { 855 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); 856 rc = -ENOMEM; 857 goto free_cq_wq; 858 } 859 860 snprintf(workq_name, 32, "hl%u-cs-completions", hdev->cdev_idx); 861 hdev->cs_cmplt_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); 862 if (!hdev->cs_cmplt_wq) { 863 dev_err(hdev->dev, 864 "Failed to allocate CS completions workqueue\n"); 865 rc = -ENOMEM; 866 goto free_eq_wq; 867 } 868 869 snprintf(workq_name, 32, "hl%u-ts-free-obj", hdev->cdev_idx); 870 hdev->ts_free_obj_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); 871 if (!hdev->ts_free_obj_wq) { 872 dev_err(hdev->dev, 873 "Failed to allocate Timestamp registration free workqueue\n"); 874 rc = -ENOMEM; 875 goto free_cs_cmplt_wq; 876 } 877 878 snprintf(workq_name, 32, "hl%u-prefetch", hdev->cdev_idx); 879 hdev->prefetch_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); 880 if (!hdev->prefetch_wq) { 881 dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n"); 882 rc = -ENOMEM; 883 goto free_ts_free_wq; 884 } 885 886 hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), GFP_KERNEL); 887 if (!hdev->hl_chip_info) { 888 rc = -ENOMEM; 889 goto free_prefetch_wq; 890 } 891 892 rc = hl_mmu_if_set_funcs(hdev); 893 if (rc) 894 goto free_chip_info; 895 896 hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr); 897 898 snprintf(workq_name, 32, "hl%u_device_reset", hdev->cdev_idx); 899 hdev->reset_wq = create_singlethread_workqueue(workq_name); 900 if (!hdev->reset_wq) { 901 rc = -ENOMEM; 902 dev_err(hdev->dev, "Failed to create device reset WQ\n"); 903 goto free_cb_mgr; 904 } 905 906 INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending); 907 hdev->device_reset_work.hdev = hdev; 908 hdev->device_fini_pending = 0; 909 910 INIT_DELAYED_WORK(&hdev->device_release_watchdog_work.reset_work, 911 device_release_watchdog_func); 912 hdev->device_release_watchdog_work.hdev = hdev; 913 914 mutex_init(&hdev->send_cpu_message_lock); 915 mutex_init(&hdev->debug_lock); 916 INIT_LIST_HEAD(&hdev->cs_mirror_list); 917 spin_lock_init(&hdev->cs_mirror_lock); 918 spin_lock_init(&hdev->reset_info.lock); 919 INIT_LIST_HEAD(&hdev->fpriv_list); 920 INIT_LIST_HEAD(&hdev->fpriv_ctrl_list); 921 mutex_init(&hdev->fpriv_list_lock); 922 mutex_init(&hdev->fpriv_ctrl_list_lock); 923 mutex_init(&hdev->clk_throttling.lock); 924 925 return 0; 926 927 free_cb_mgr: 928 hl_mem_mgr_fini(&hdev->kernel_mem_mgr); 929 free_chip_info: 930 kfree(hdev->hl_chip_info); 931 free_prefetch_wq: 932 destroy_workqueue(hdev->prefetch_wq); 933 free_ts_free_wq: 934 destroy_workqueue(hdev->ts_free_obj_wq); 935 free_cs_cmplt_wq: 936 destroy_workqueue(hdev->cs_cmplt_wq); 937 free_eq_wq: 938 destroy_workqueue(hdev->eq_wq); 939 free_cq_wq: 940 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 941 if (hdev->cq_wq[i]) 942 destroy_workqueue(hdev->cq_wq[i]); 943 kfree(hdev->cq_wq); 944 asid_fini: 945 hl_asid_fini(hdev); 946 early_fini: 947 if (hdev->asic_funcs->early_fini) 948 hdev->asic_funcs->early_fini(hdev); 949 950 return rc; 951 } 952 953 /* 954 * device_early_fini - finalize all that was done in device_early_init 955 * 956 * @hdev: pointer to habanalabs device structure 957 * 958 */ 959 static void device_early_fini(struct hl_device *hdev) 960 { 961 int i; 962 963 mutex_destroy(&hdev->debug_lock); 964 mutex_destroy(&hdev->send_cpu_message_lock); 965 966 mutex_destroy(&hdev->fpriv_list_lock); 967 mutex_destroy(&hdev->fpriv_ctrl_list_lock); 968 969 mutex_destroy(&hdev->clk_throttling.lock); 970 971 hl_mem_mgr_fini(&hdev->kernel_mem_mgr); 972 973 kfree(hdev->hl_chip_info); 974 975 destroy_workqueue(hdev->prefetch_wq); 976 destroy_workqueue(hdev->ts_free_obj_wq); 977 destroy_workqueue(hdev->cs_cmplt_wq); 978 destroy_workqueue(hdev->eq_wq); 979 destroy_workqueue(hdev->reset_wq); 980 981 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 982 destroy_workqueue(hdev->cq_wq[i]); 983 kfree(hdev->cq_wq); 984 985 hl_asid_fini(hdev); 986 987 if (hdev->asic_funcs->early_fini) 988 hdev->asic_funcs->early_fini(hdev); 989 } 990 991 static void hl_device_heartbeat(struct work_struct *work) 992 { 993 struct hl_device *hdev = container_of(work, struct hl_device, 994 work_heartbeat.work); 995 struct hl_info_fw_err_info info = {0}; 996 u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 997 998 if (!hl_device_operational(hdev, NULL)) 999 goto reschedule; 1000 1001 if (!hdev->asic_funcs->send_heartbeat(hdev)) 1002 goto reschedule; 1003 1004 if (hl_device_operational(hdev, NULL)) 1005 dev_err(hdev->dev, "Device heartbeat failed!\n"); 1006 1007 info.err_type = HL_INFO_FW_HEARTBEAT_ERR; 1008 info.event_mask = &event_mask; 1009 hl_handle_fw_err(hdev, &info); 1010 hl_device_cond_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT, event_mask); 1011 1012 return; 1013 1014 reschedule: 1015 /* 1016 * prev_reset_trigger tracks consecutive fatal h/w errors until first 1017 * heartbeat immediately post reset. 1018 * If control reached here, then at least one heartbeat work has been 1019 * scheduled since last reset/init cycle. 1020 * So if the device is not already in reset cycle, reset the flag 1021 * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR 1022 * status for at least one heartbeat. From this point driver restarts 1023 * tracking future consecutive fatal errors. 1024 */ 1025 if (!hdev->reset_info.in_reset) 1026 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 1027 1028 schedule_delayed_work(&hdev->work_heartbeat, 1029 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); 1030 } 1031 1032 /* 1033 * device_late_init - do late stuff initialization for the habanalabs device 1034 * 1035 * @hdev: pointer to habanalabs device structure 1036 * 1037 * Do stuff that either needs the device H/W queues to be active or needs 1038 * to happen after all the rest of the initialization is finished 1039 */ 1040 static int device_late_init(struct hl_device *hdev) 1041 { 1042 int rc; 1043 1044 if (hdev->asic_funcs->late_init) { 1045 rc = hdev->asic_funcs->late_init(hdev); 1046 if (rc) { 1047 dev_err(hdev->dev, 1048 "failed late initialization for the H/W\n"); 1049 return rc; 1050 } 1051 } 1052 1053 hdev->high_pll = hdev->asic_prop.high_pll; 1054 1055 if (hdev->heartbeat) { 1056 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); 1057 schedule_delayed_work(&hdev->work_heartbeat, 1058 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); 1059 } 1060 1061 hdev->late_init_done = true; 1062 1063 return 0; 1064 } 1065 1066 /* 1067 * device_late_fini - finalize all that was done in device_late_init 1068 * 1069 * @hdev: pointer to habanalabs device structure 1070 * 1071 */ 1072 static void device_late_fini(struct hl_device *hdev) 1073 { 1074 if (!hdev->late_init_done) 1075 return; 1076 1077 if (hdev->heartbeat) 1078 cancel_delayed_work_sync(&hdev->work_heartbeat); 1079 1080 if (hdev->asic_funcs->late_fini) 1081 hdev->asic_funcs->late_fini(hdev); 1082 1083 hdev->late_init_done = false; 1084 } 1085 1086 int hl_device_utilization(struct hl_device *hdev, u32 *utilization) 1087 { 1088 u64 max_power, curr_power, dc_power, dividend, divisor; 1089 int rc; 1090 1091 max_power = hdev->max_power; 1092 dc_power = hdev->asic_prop.dc_power_default; 1093 divisor = max_power - dc_power; 1094 if (!divisor) { 1095 dev_warn(hdev->dev, "device utilization is not supported\n"); 1096 return -EOPNOTSUPP; 1097 } 1098 rc = hl_fw_cpucp_power_get(hdev, &curr_power); 1099 1100 if (rc) 1101 return rc; 1102 1103 curr_power = clamp(curr_power, dc_power, max_power); 1104 1105 dividend = (curr_power - dc_power) * 100; 1106 *utilization = (u32) div_u64(dividend, divisor); 1107 1108 return 0; 1109 } 1110 1111 int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable) 1112 { 1113 int rc = 0; 1114 1115 mutex_lock(&hdev->debug_lock); 1116 1117 if (!enable) { 1118 if (!hdev->in_debug) { 1119 dev_err(hdev->dev, 1120 "Failed to disable debug mode because device was not in debug mode\n"); 1121 rc = -EFAULT; 1122 goto out; 1123 } 1124 1125 if (!hdev->reset_info.hard_reset_pending) 1126 hdev->asic_funcs->halt_coresight(hdev, ctx); 1127 1128 hdev->in_debug = 0; 1129 1130 goto out; 1131 } 1132 1133 if (hdev->in_debug) { 1134 dev_err(hdev->dev, 1135 "Failed to enable debug mode because device is already in debug mode\n"); 1136 rc = -EFAULT; 1137 goto out; 1138 } 1139 1140 hdev->in_debug = 1; 1141 1142 out: 1143 mutex_unlock(&hdev->debug_lock); 1144 1145 return rc; 1146 } 1147 1148 static void take_release_locks(struct hl_device *hdev) 1149 { 1150 /* Flush anyone that is inside the critical section of enqueue 1151 * jobs to the H/W 1152 */ 1153 hdev->asic_funcs->hw_queues_lock(hdev); 1154 hdev->asic_funcs->hw_queues_unlock(hdev); 1155 1156 /* Flush processes that are sending message to CPU */ 1157 mutex_lock(&hdev->send_cpu_message_lock); 1158 mutex_unlock(&hdev->send_cpu_message_lock); 1159 1160 /* Flush anyone that is inside device open */ 1161 mutex_lock(&hdev->fpriv_list_lock); 1162 mutex_unlock(&hdev->fpriv_list_lock); 1163 mutex_lock(&hdev->fpriv_ctrl_list_lock); 1164 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 1165 } 1166 1167 static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, 1168 bool skip_wq_flush) 1169 { 1170 if (hard_reset) 1171 device_late_fini(hdev); 1172 1173 /* 1174 * Halt the engines and disable interrupts so we won't get any more 1175 * completions from H/W and we won't have any accesses from the 1176 * H/W to the host machine 1177 */ 1178 hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset); 1179 1180 /* Go over all the queues, release all CS and their jobs */ 1181 hl_cs_rollback_all(hdev, skip_wq_flush); 1182 1183 /* flush the MMU prefetch workqueue */ 1184 flush_workqueue(hdev->prefetch_wq); 1185 1186 /* Release all pending user interrupts, each pending user interrupt 1187 * holds a reference to user context 1188 */ 1189 hl_release_pending_user_interrupts(hdev); 1190 } 1191 1192 /* 1193 * hl_device_suspend - initiate device suspend 1194 * 1195 * @hdev: pointer to habanalabs device structure 1196 * 1197 * Puts the hw in the suspend state (all asics). 1198 * Returns 0 for success or an error on failure. 1199 * Called at driver suspend. 1200 */ 1201 int hl_device_suspend(struct hl_device *hdev) 1202 { 1203 int rc; 1204 1205 pci_save_state(hdev->pdev); 1206 1207 /* Block future CS/VM/JOB completion operations */ 1208 spin_lock(&hdev->reset_info.lock); 1209 if (hdev->reset_info.in_reset) { 1210 spin_unlock(&hdev->reset_info.lock); 1211 dev_err(hdev->dev, "Can't suspend while in reset\n"); 1212 return -EIO; 1213 } 1214 hdev->reset_info.in_reset = 1; 1215 spin_unlock(&hdev->reset_info.lock); 1216 1217 /* This blocks all other stuff that is not blocked by in_reset */ 1218 hdev->disabled = true; 1219 1220 take_release_locks(hdev); 1221 1222 rc = hdev->asic_funcs->suspend(hdev); 1223 if (rc) 1224 dev_err(hdev->dev, 1225 "Failed to disable PCI access of device CPU\n"); 1226 1227 /* Shut down the device */ 1228 pci_disable_device(hdev->pdev); 1229 pci_set_power_state(hdev->pdev, PCI_D3hot); 1230 1231 return 0; 1232 } 1233 1234 /* 1235 * hl_device_resume - initiate device resume 1236 * 1237 * @hdev: pointer to habanalabs device structure 1238 * 1239 * Bring the hw back to operating state (all asics). 1240 * Returns 0 for success or an error on failure. 1241 * Called at driver resume. 1242 */ 1243 int hl_device_resume(struct hl_device *hdev) 1244 { 1245 int rc; 1246 1247 pci_set_power_state(hdev->pdev, PCI_D0); 1248 pci_restore_state(hdev->pdev); 1249 rc = pci_enable_device_mem(hdev->pdev); 1250 if (rc) { 1251 dev_err(hdev->dev, 1252 "Failed to enable PCI device in resume\n"); 1253 return rc; 1254 } 1255 1256 pci_set_master(hdev->pdev); 1257 1258 rc = hdev->asic_funcs->resume(hdev); 1259 if (rc) { 1260 dev_err(hdev->dev, "Failed to resume device after suspend\n"); 1261 goto disable_device; 1262 } 1263 1264 1265 /* 'in_reset' was set to true during suspend, now we must clear it in order 1266 * for hard reset to be performed 1267 */ 1268 spin_lock(&hdev->reset_info.lock); 1269 hdev->reset_info.in_reset = 0; 1270 spin_unlock(&hdev->reset_info.lock); 1271 1272 rc = hl_device_reset(hdev, HL_DRV_RESET_HARD); 1273 if (rc) { 1274 dev_err(hdev->dev, "Failed to reset device during resume\n"); 1275 goto disable_device; 1276 } 1277 1278 return 0; 1279 1280 disable_device: 1281 pci_clear_master(hdev->pdev); 1282 pci_disable_device(hdev->pdev); 1283 1284 return rc; 1285 } 1286 1287 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev) 1288 { 1289 struct task_struct *task = NULL; 1290 struct list_head *fd_list; 1291 struct hl_fpriv *hpriv; 1292 struct mutex *fd_lock; 1293 u32 pending_cnt; 1294 1295 fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; 1296 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; 1297 1298 /* Giving time for user to close FD, and for processes that are inside 1299 * hl_device_open to finish 1300 */ 1301 if (!list_empty(fd_list)) 1302 ssleep(1); 1303 1304 if (timeout) { 1305 pending_cnt = timeout; 1306 } else { 1307 if (hdev->process_kill_trial_cnt) { 1308 /* Processes have been already killed */ 1309 pending_cnt = 1; 1310 goto wait_for_processes; 1311 } else { 1312 /* Wait a small period after process kill */ 1313 pending_cnt = HL_PENDING_RESET_PER_SEC; 1314 } 1315 } 1316 1317 mutex_lock(fd_lock); 1318 1319 /* This section must be protected because we are dereferencing 1320 * pointers that are freed if the process exits 1321 */ 1322 list_for_each_entry(hpriv, fd_list, dev_node) { 1323 task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); 1324 if (task) { 1325 dev_info(hdev->dev, "Killing user process pid=%d\n", 1326 task_pid_nr(task)); 1327 send_sig(SIGKILL, task, 1); 1328 usleep_range(1000, 10000); 1329 1330 put_task_struct(task); 1331 } else { 1332 /* 1333 * If we got here, it means that process was killed from outside the driver 1334 * right after it started looping on fd_list and before get_pid_task, thus 1335 * we don't need to kill it. 1336 */ 1337 dev_dbg(hdev->dev, 1338 "Can't get task struct for user process, assuming process was killed from outside the driver\n"); 1339 } 1340 } 1341 1342 mutex_unlock(fd_lock); 1343 1344 /* 1345 * We killed the open users, but that doesn't mean they are closed. 1346 * It could be that they are running a long cleanup phase in the driver 1347 * e.g. MMU unmappings, or running other long teardown flow even before 1348 * our cleanup. 1349 * Therefore we need to wait again to make sure they are closed before 1350 * continuing with the reset. 1351 */ 1352 1353 wait_for_processes: 1354 while ((!list_empty(fd_list)) && (pending_cnt)) { 1355 dev_dbg(hdev->dev, 1356 "Waiting for all unmap operations to finish before hard reset\n"); 1357 1358 pending_cnt--; 1359 1360 ssleep(1); 1361 } 1362 1363 /* All processes exited successfully */ 1364 if (list_empty(fd_list)) 1365 return 0; 1366 1367 /* Give up waiting for processes to exit */ 1368 if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS) 1369 return -ETIME; 1370 1371 hdev->process_kill_trial_cnt++; 1372 1373 return -EBUSY; 1374 } 1375 1376 static void device_disable_open_processes(struct hl_device *hdev, bool control_dev) 1377 { 1378 struct list_head *fd_list; 1379 struct hl_fpriv *hpriv; 1380 struct mutex *fd_lock; 1381 1382 fd_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; 1383 fd_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; 1384 1385 mutex_lock(fd_lock); 1386 list_for_each_entry(hpriv, fd_list, dev_node) 1387 hpriv->hdev = NULL; 1388 mutex_unlock(fd_lock); 1389 } 1390 1391 static void handle_reset_trigger(struct hl_device *hdev, u32 flags) 1392 { 1393 u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 1394 1395 /* No consecutive mechanism when user context exists */ 1396 if (hdev->is_compute_ctx_active) 1397 return; 1398 1399 /* 1400 * 'reset cause' is being updated here, because getting here 1401 * means that it's the 1st time and the last time we're here 1402 * ('in_reset' makes sure of it). This makes sure that 1403 * 'reset_cause' will continue holding its 1st recorded reason! 1404 */ 1405 if (flags & HL_DRV_RESET_HEARTBEAT) { 1406 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT; 1407 cur_reset_trigger = HL_DRV_RESET_HEARTBEAT; 1408 } else if (flags & HL_DRV_RESET_TDR) { 1409 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR; 1410 cur_reset_trigger = HL_DRV_RESET_TDR; 1411 } else if (flags & HL_DRV_RESET_FW_FATAL_ERR) { 1412 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 1413 cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR; 1414 } else { 1415 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 1416 } 1417 1418 /* 1419 * If reset cause is same twice, then reset_trigger_repeated 1420 * is set and if this reset is due to a fatal FW error 1421 * device is set to an unstable state. 1422 */ 1423 if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) { 1424 hdev->reset_info.prev_reset_trigger = cur_reset_trigger; 1425 hdev->reset_info.reset_trigger_repeated = 0; 1426 } else { 1427 hdev->reset_info.reset_trigger_repeated = 1; 1428 } 1429 1430 /* If reset is due to heartbeat, device CPU is no responsive in 1431 * which case no point sending PCI disable message to it. 1432 * 1433 * If F/W is performing the reset, no need to send it a message to disable 1434 * PCI access 1435 */ 1436 if ((flags & HL_DRV_RESET_HARD) && 1437 !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) { 1438 /* Disable PCI access from device F/W so he won't send 1439 * us additional interrupts. We disable MSI/MSI-X at 1440 * the halt_engines function and we can't have the F/W 1441 * sending us interrupts after that. We need to disable 1442 * the access here because if the device is marked 1443 * disable, the message won't be send. Also, in case 1444 * of heartbeat, the device CPU is marked as disable 1445 * so this message won't be sent 1446 */ 1447 if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) 1448 dev_warn(hdev->dev, 1449 "Failed to disable FW's PCI access\n"); 1450 } 1451 } 1452 1453 /* 1454 * hl_device_reset - reset the device 1455 * 1456 * @hdev: pointer to habanalabs device structure 1457 * @flags: reset flags. 1458 * 1459 * Block future CS and wait for pending CS to be enqueued 1460 * Call ASIC H/W fini 1461 * Flush all completions 1462 * Re-initialize all internal data structures 1463 * Call ASIC H/W init, late_init 1464 * Test queues 1465 * Enable device 1466 * 1467 * Returns 0 for success or an error on failure. 1468 */ 1469 int hl_device_reset(struct hl_device *hdev, u32 flags) 1470 { 1471 bool hard_reset, from_hard_reset_thread, fw_reset, reset_upon_device_release, 1472 schedule_hard_reset = false, delay_reset, from_dev_release, from_watchdog_thread; 1473 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; 1474 struct hl_ctx *ctx; 1475 int i, rc, hw_fini_rc; 1476 1477 if (!hdev->init_done) { 1478 dev_err(hdev->dev, "Can't reset before initialization is done\n"); 1479 return 0; 1480 } 1481 1482 hard_reset = !!(flags & HL_DRV_RESET_HARD); 1483 from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR); 1484 fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW); 1485 from_dev_release = !!(flags & HL_DRV_RESET_DEV_RELEASE); 1486 delay_reset = !!(flags & HL_DRV_RESET_DELAY); 1487 from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR); 1488 reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release; 1489 1490 if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) { 1491 dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n"); 1492 return 0; 1493 } 1494 1495 if (!hard_reset && !hdev->asic_prop.supports_compute_reset) { 1496 dev_dbg(hdev->dev, "asic doesn't support compute reset - do hard-reset instead\n"); 1497 hard_reset = true; 1498 } 1499 1500 if (reset_upon_device_release) { 1501 if (hard_reset) { 1502 dev_crit(hdev->dev, 1503 "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n"); 1504 return -EINVAL; 1505 } 1506 1507 goto do_reset; 1508 } 1509 1510 if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) { 1511 dev_dbg(hdev->dev, 1512 "asic doesn't allow inference soft reset - do hard-reset instead\n"); 1513 hard_reset = true; 1514 } 1515 1516 do_reset: 1517 /* Re-entry of reset thread */ 1518 if (from_hard_reset_thread && hdev->process_kill_trial_cnt) 1519 goto kill_processes; 1520 1521 /* 1522 * Prevent concurrency in this function - only one reset should be 1523 * done at any given time. We need to perform this only if we didn't 1524 * get here from a dedicated hard reset thread. 1525 */ 1526 if (!from_hard_reset_thread) { 1527 /* Block future CS/VM/JOB completion operations */ 1528 spin_lock(&hdev->reset_info.lock); 1529 if (hdev->reset_info.in_reset) { 1530 /* We allow scheduling of a hard reset only during a compute reset */ 1531 if (hard_reset && hdev->reset_info.in_compute_reset) 1532 hdev->reset_info.hard_reset_schedule_flags = flags; 1533 spin_unlock(&hdev->reset_info.lock); 1534 return 0; 1535 } 1536 1537 /* This still allows the completion of some KDMA ops 1538 * Update this before in_reset because in_compute_reset implies we are in reset 1539 */ 1540 hdev->reset_info.in_compute_reset = !hard_reset; 1541 1542 hdev->reset_info.in_reset = 1; 1543 1544 spin_unlock(&hdev->reset_info.lock); 1545 1546 /* Cancel the device release watchdog work if required. 1547 * In case of reset-upon-device-release while the release watchdog work is 1548 * scheduled due to a hard-reset, do hard-reset instead of compute-reset. 1549 */ 1550 if ((hard_reset || from_dev_release) && hdev->reset_info.watchdog_active) { 1551 struct hl_device_reset_work *watchdog_work = 1552 &hdev->device_release_watchdog_work; 1553 1554 hdev->reset_info.watchdog_active = 0; 1555 if (!from_watchdog_thread) 1556 cancel_delayed_work_sync(&watchdog_work->reset_work); 1557 1558 if (from_dev_release && (watchdog_work->flags & HL_DRV_RESET_HARD)) { 1559 hdev->reset_info.in_compute_reset = 0; 1560 flags |= HL_DRV_RESET_HARD; 1561 flags &= ~HL_DRV_RESET_DEV_RELEASE; 1562 hard_reset = true; 1563 } 1564 } 1565 1566 if (delay_reset) 1567 usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1); 1568 1569 escalate_reset_flow: 1570 handle_reset_trigger(hdev, flags); 1571 1572 /* This also blocks future CS/VM/JOB completion operations */ 1573 hdev->disabled = true; 1574 1575 take_release_locks(hdev); 1576 1577 if (hard_reset) 1578 dev_info(hdev->dev, "Going to reset device\n"); 1579 else if (reset_upon_device_release) 1580 dev_dbg(hdev->dev, "Going to reset device after release by user\n"); 1581 else 1582 dev_dbg(hdev->dev, "Going to reset engines of inference device\n"); 1583 } 1584 1585 if ((hard_reset) && (!from_hard_reset_thread)) { 1586 hdev->reset_info.hard_reset_pending = true; 1587 1588 hdev->process_kill_trial_cnt = 0; 1589 1590 hdev->device_reset_work.flags = flags; 1591 1592 /* 1593 * Because the reset function can't run from heartbeat work, 1594 * we need to call the reset function from a dedicated work. 1595 */ 1596 queue_delayed_work(hdev->reset_wq, &hdev->device_reset_work.reset_work, 0); 1597 1598 return 0; 1599 } 1600 1601 cleanup_resources(hdev, hard_reset, fw_reset, from_dev_release); 1602 1603 kill_processes: 1604 if (hard_reset) { 1605 /* Kill processes here after CS rollback. This is because the 1606 * process can't really exit until all its CSs are done, which 1607 * is what we do in cs rollback 1608 */ 1609 rc = device_kill_open_processes(hdev, 0, false); 1610 1611 if (rc == -EBUSY) { 1612 if (hdev->device_fini_pending) { 1613 dev_crit(hdev->dev, 1614 "%s Failed to kill all open processes, stopping hard reset\n", 1615 dev_name(&(hdev)->pdev->dev)); 1616 goto out_err; 1617 } 1618 1619 /* signal reset thread to reschedule */ 1620 return rc; 1621 } 1622 1623 if (rc) { 1624 dev_crit(hdev->dev, 1625 "%s Failed to kill all open processes, stopping hard reset\n", 1626 dev_name(&(hdev)->pdev->dev)); 1627 goto out_err; 1628 } 1629 1630 /* Flush the Event queue workers to make sure no other thread is 1631 * reading or writing to registers during the reset 1632 */ 1633 flush_workqueue(hdev->eq_wq); 1634 } 1635 1636 /* Reset the H/W. It will be in idle state after this returns */ 1637 hw_fini_rc = hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); 1638 1639 if (hard_reset) { 1640 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 1641 1642 /* Release kernel context */ 1643 if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) 1644 hdev->kernel_ctx = NULL; 1645 1646 hl_vm_fini(hdev); 1647 hl_mmu_fini(hdev); 1648 hl_eq_reset(hdev, &hdev->event_queue); 1649 } 1650 1651 /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */ 1652 hl_hw_queue_reset(hdev, hard_reset); 1653 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1654 hl_cq_reset(hdev, &hdev->completion_queue[i]); 1655 1656 /* Make sure the context switch phase will run again */ 1657 ctx = hl_get_compute_ctx(hdev); 1658 if (ctx) { 1659 atomic_set(&ctx->thread_ctx_switch_token, 1); 1660 ctx->thread_ctx_switch_wait_token = 0; 1661 hl_ctx_put(ctx); 1662 } 1663 1664 if (hw_fini_rc) { 1665 rc = hw_fini_rc; 1666 goto out_err; 1667 } 1668 /* Finished tear-down, starting to re-initialize */ 1669 1670 if (hard_reset) { 1671 hdev->device_cpu_disabled = false; 1672 hdev->reset_info.hard_reset_pending = false; 1673 1674 if (hdev->reset_info.reset_trigger_repeated && 1675 (hdev->reset_info.prev_reset_trigger == 1676 HL_DRV_RESET_FW_FATAL_ERR)) { 1677 /* if there 2 back to back resets from FW, 1678 * ensure driver puts the driver in a unusable state 1679 */ 1680 dev_crit(hdev->dev, 1681 "%s Consecutive FW fatal errors received, stopping hard reset\n", 1682 dev_name(&(hdev)->pdev->dev)); 1683 rc = -EIO; 1684 goto out_err; 1685 } 1686 1687 if (hdev->kernel_ctx) { 1688 dev_crit(hdev->dev, 1689 "%s kernel ctx was alive during hard reset, something is terribly wrong\n", 1690 dev_name(&(hdev)->pdev->dev)); 1691 rc = -EBUSY; 1692 goto out_err; 1693 } 1694 1695 rc = hl_mmu_init(hdev); 1696 if (rc) { 1697 dev_err(hdev->dev, 1698 "Failed to initialize MMU S/W after hard reset\n"); 1699 goto out_err; 1700 } 1701 1702 /* Allocate the kernel context */ 1703 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), 1704 GFP_KERNEL); 1705 if (!hdev->kernel_ctx) { 1706 rc = -ENOMEM; 1707 hl_mmu_fini(hdev); 1708 goto out_err; 1709 } 1710 1711 hdev->is_compute_ctx_active = false; 1712 1713 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); 1714 if (rc) { 1715 dev_err(hdev->dev, 1716 "failed to init kernel ctx in hard reset\n"); 1717 kfree(hdev->kernel_ctx); 1718 hdev->kernel_ctx = NULL; 1719 hl_mmu_fini(hdev); 1720 goto out_err; 1721 } 1722 } 1723 1724 /* Device is now enabled as part of the initialization requires 1725 * communication with the device firmware to get information that 1726 * is required for the initialization itself 1727 */ 1728 hdev->disabled = false; 1729 1730 /* F/W security enabled indication might be updated after hard-reset */ 1731 if (hard_reset) { 1732 rc = hl_fw_read_preboot_status(hdev); 1733 if (rc) 1734 goto out_err; 1735 } 1736 1737 rc = hdev->asic_funcs->hw_init(hdev); 1738 if (rc) { 1739 dev_err(hdev->dev, "failed to initialize the H/W after reset\n"); 1740 goto out_err; 1741 } 1742 1743 /* If device is not idle fail the reset process */ 1744 if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, 1745 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { 1746 print_idle_status_mask(hdev, "device is not idle after reset", idle_mask); 1747 rc = -EIO; 1748 goto out_err; 1749 } 1750 1751 /* Check that the communication with the device is working */ 1752 rc = hdev->asic_funcs->test_queues(hdev); 1753 if (rc) { 1754 dev_err(hdev->dev, "Failed to detect if device is alive after reset\n"); 1755 goto out_err; 1756 } 1757 1758 if (hard_reset) { 1759 rc = device_late_init(hdev); 1760 if (rc) { 1761 dev_err(hdev->dev, "Failed late init after hard reset\n"); 1762 goto out_err; 1763 } 1764 1765 rc = hl_vm_init(hdev); 1766 if (rc) { 1767 dev_err(hdev->dev, "Failed to init memory module after hard reset\n"); 1768 goto out_err; 1769 } 1770 1771 if (!hdev->asic_prop.fw_security_enabled) 1772 hl_fw_set_max_power(hdev); 1773 } else { 1774 rc = hdev->asic_funcs->compute_reset_late_init(hdev); 1775 if (rc) { 1776 if (reset_upon_device_release) 1777 dev_err(hdev->dev, 1778 "Failed late init in reset after device release\n"); 1779 else 1780 dev_err(hdev->dev, "Failed late init after compute reset\n"); 1781 goto out_err; 1782 } 1783 } 1784 1785 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1786 if (rc) { 1787 dev_err(hdev->dev, "scrub mem failed from device reset (%d)\n", rc); 1788 goto out_err; 1789 } 1790 1791 spin_lock(&hdev->reset_info.lock); 1792 hdev->reset_info.in_compute_reset = 0; 1793 1794 /* Schedule hard reset only if requested and if not already in hard reset. 1795 * We keep 'in_reset' enabled, so no other reset can go in during the hard 1796 * reset schedule 1797 */ 1798 if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags) 1799 schedule_hard_reset = true; 1800 else 1801 hdev->reset_info.in_reset = 0; 1802 1803 spin_unlock(&hdev->reset_info.lock); 1804 1805 hdev->reset_info.needs_reset = false; 1806 1807 if (hard_reset) 1808 dev_info(hdev->dev, 1809 "Successfully finished resetting the %s device\n", 1810 dev_name(&(hdev)->pdev->dev)); 1811 else 1812 dev_dbg(hdev->dev, 1813 "Successfully finished resetting the %s device\n", 1814 dev_name(&(hdev)->pdev->dev)); 1815 1816 if (hard_reset) { 1817 hdev->reset_info.hard_reset_cnt++; 1818 1819 /* After reset is done, we are ready to receive events from 1820 * the F/W. We can't do it before because we will ignore events 1821 * and if those events are fatal, we won't know about it and 1822 * the device will be operational although it shouldn't be 1823 */ 1824 hdev->asic_funcs->enable_events_from_fw(hdev); 1825 } else { 1826 if (!reset_upon_device_release) 1827 hdev->reset_info.compute_reset_cnt++; 1828 1829 if (schedule_hard_reset) { 1830 dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n"); 1831 flags = hdev->reset_info.hard_reset_schedule_flags; 1832 hdev->reset_info.hard_reset_schedule_flags = 0; 1833 hdev->disabled = true; 1834 hard_reset = true; 1835 handle_reset_trigger(hdev, flags); 1836 goto escalate_reset_flow; 1837 } 1838 } 1839 1840 return 0; 1841 1842 out_err: 1843 hdev->disabled = true; 1844 1845 spin_lock(&hdev->reset_info.lock); 1846 hdev->reset_info.in_compute_reset = 0; 1847 1848 if (hard_reset) { 1849 dev_err(hdev->dev, 1850 "%s Failed to reset! Device is NOT usable\n", 1851 dev_name(&(hdev)->pdev->dev)); 1852 hdev->reset_info.hard_reset_cnt++; 1853 } else { 1854 if (reset_upon_device_release) { 1855 dev_err(hdev->dev, "Failed to reset device after user release\n"); 1856 flags &= ~HL_DRV_RESET_DEV_RELEASE; 1857 } else { 1858 dev_err(hdev->dev, "Failed to do compute reset\n"); 1859 hdev->reset_info.compute_reset_cnt++; 1860 } 1861 1862 spin_unlock(&hdev->reset_info.lock); 1863 flags |= HL_DRV_RESET_HARD; 1864 hard_reset = true; 1865 goto escalate_reset_flow; 1866 } 1867 1868 hdev->reset_info.in_reset = 0; 1869 1870 spin_unlock(&hdev->reset_info.lock); 1871 1872 return rc; 1873 } 1874 1875 /* 1876 * hl_device_cond_reset() - conditionally reset the device. 1877 * @hdev: pointer to habanalabs device structure. 1878 * @reset_flags: reset flags. 1879 * @event_mask: events to notify user about. 1880 * 1881 * Conditionally reset the device, or alternatively schedule a watchdog work to reset the device 1882 * unless another reset precedes it. 1883 */ 1884 int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) 1885 { 1886 struct hl_ctx *ctx = NULL; 1887 1888 /* F/W reset cannot be postponed */ 1889 if (flags & HL_DRV_RESET_BYPASS_REQ_TO_FW) 1890 goto device_reset; 1891 1892 /* Device release watchdog is relevant only if user exists and gets a reset notification */ 1893 if (!(event_mask & HL_NOTIFIER_EVENT_DEVICE_RESET)) { 1894 dev_err(hdev->dev, "Resetting device without a reset indication to user\n"); 1895 goto device_reset; 1896 } 1897 1898 ctx = hl_get_compute_ctx(hdev); 1899 if (!ctx || !ctx->hpriv->notifier_event.eventfd) 1900 goto device_reset; 1901 1902 /* Schedule the device release watchdog work unless reset is already in progress or if the 1903 * work is already scheduled. 1904 */ 1905 spin_lock(&hdev->reset_info.lock); 1906 if (hdev->reset_info.in_reset) { 1907 spin_unlock(&hdev->reset_info.lock); 1908 goto device_reset; 1909 } 1910 1911 if (hdev->reset_info.watchdog_active) 1912 goto out; 1913 1914 hdev->device_release_watchdog_work.flags = flags; 1915 dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n", 1916 hdev->device_release_watchdog_timeout_sec); 1917 schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work, 1918 msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000)); 1919 hdev->reset_info.watchdog_active = 1; 1920 out: 1921 spin_unlock(&hdev->reset_info.lock); 1922 1923 hl_notifier_event_send_all(hdev, event_mask); 1924 1925 hl_ctx_put(ctx); 1926 1927 hl_abort_waitings_for_completion(hdev); 1928 1929 return 0; 1930 1931 device_reset: 1932 if (event_mask) 1933 hl_notifier_event_send_all(hdev, event_mask); 1934 if (ctx) 1935 hl_ctx_put(ctx); 1936 1937 return hl_device_reset(hdev, flags); 1938 } 1939 1940 static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask) 1941 { 1942 mutex_lock(¬ifier_event->lock); 1943 notifier_event->events_mask |= event_mask; 1944 1945 if (notifier_event->eventfd) 1946 eventfd_signal(notifier_event->eventfd, 1); 1947 1948 mutex_unlock(¬ifier_event->lock); 1949 } 1950 1951 /* 1952 * hl_notifier_event_send_all - notify all user processes via eventfd 1953 * 1954 * @hdev: pointer to habanalabs device structure 1955 * @event_mask: the occurred event/s 1956 * Returns 0 for success or an error on failure. 1957 */ 1958 void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask) 1959 { 1960 struct hl_fpriv *hpriv; 1961 1962 if (!event_mask) { 1963 dev_warn(hdev->dev, "Skip sending zero event"); 1964 return; 1965 } 1966 1967 mutex_lock(&hdev->fpriv_list_lock); 1968 1969 list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) 1970 hl_notifier_event_send(&hpriv->notifier_event, event_mask); 1971 1972 mutex_unlock(&hdev->fpriv_list_lock); 1973 1974 /* control device */ 1975 mutex_lock(&hdev->fpriv_ctrl_list_lock); 1976 1977 list_for_each_entry(hpriv, &hdev->fpriv_ctrl_list, dev_node) 1978 hl_notifier_event_send(&hpriv->notifier_event, event_mask); 1979 1980 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 1981 } 1982 1983 static int create_cdev(struct hl_device *hdev) 1984 { 1985 char *name; 1986 int rc; 1987 1988 hdev->cdev_idx = hdev->id / 2; 1989 1990 name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx); 1991 if (!name) { 1992 rc = -ENOMEM; 1993 goto out_err; 1994 } 1995 1996 /* Initialize cdev and device structures */ 1997 rc = device_init_cdev(hdev, hdev->hclass, hdev->id, &hl_ops, name, 1998 &hdev->cdev, &hdev->dev); 1999 2000 kfree(name); 2001 2002 if (rc) 2003 goto out_err; 2004 2005 name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx); 2006 if (!name) { 2007 rc = -ENOMEM; 2008 goto free_dev; 2009 } 2010 2011 /* Initialize cdev and device structures for control device */ 2012 rc = device_init_cdev(hdev, hdev->hclass, hdev->id_control, &hl_ctrl_ops, 2013 name, &hdev->cdev_ctrl, &hdev->dev_ctrl); 2014 2015 kfree(name); 2016 2017 if (rc) 2018 goto free_dev; 2019 2020 return 0; 2021 2022 free_dev: 2023 put_device(hdev->dev); 2024 out_err: 2025 return rc; 2026 } 2027 2028 /* 2029 * hl_device_init - main initialization function for habanalabs device 2030 * 2031 * @hdev: pointer to habanalabs device structure 2032 * 2033 * Allocate an id for the device, do early initialization and then call the 2034 * ASIC specific initialization functions. Finally, create the cdev and the 2035 * Linux device to expose it to the user 2036 */ 2037 int hl_device_init(struct hl_device *hdev) 2038 { 2039 int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; 2040 bool add_cdev_sysfs_on_err = false; 2041 2042 rc = create_cdev(hdev); 2043 if (rc) 2044 goto out_disabled; 2045 2046 /* Initialize ASIC function pointers and perform early init */ 2047 rc = device_early_init(hdev); 2048 if (rc) 2049 goto free_dev; 2050 2051 user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count + 2052 hdev->asic_prop.user_interrupt_count; 2053 2054 if (user_interrupt_cnt) { 2055 hdev->user_interrupt = kcalloc(user_interrupt_cnt, sizeof(*hdev->user_interrupt), 2056 GFP_KERNEL); 2057 if (!hdev->user_interrupt) { 2058 rc = -ENOMEM; 2059 goto early_fini; 2060 } 2061 } 2062 2063 /* 2064 * Start calling ASIC initialization. First S/W then H/W and finally 2065 * late init 2066 */ 2067 rc = hdev->asic_funcs->sw_init(hdev); 2068 if (rc) 2069 goto free_usr_intr_mem; 2070 2071 2072 /* initialize completion structure for multi CS wait */ 2073 hl_multi_cs_completion_init(hdev); 2074 2075 /* 2076 * Initialize the H/W queues. Must be done before hw_init, because 2077 * there the addresses of the kernel queue are being written to the 2078 * registers of the device 2079 */ 2080 rc = hl_hw_queues_create(hdev); 2081 if (rc) { 2082 dev_err(hdev->dev, "failed to initialize kernel queues\n"); 2083 goto sw_fini; 2084 } 2085 2086 cq_cnt = hdev->asic_prop.completion_queues_count; 2087 2088 /* 2089 * Initialize the completion queues. Must be done before hw_init, 2090 * because there the addresses of the completion queues are being 2091 * passed as arguments to request_irq 2092 */ 2093 if (cq_cnt) { 2094 hdev->completion_queue = kcalloc(cq_cnt, 2095 sizeof(*hdev->completion_queue), 2096 GFP_KERNEL); 2097 2098 if (!hdev->completion_queue) { 2099 dev_err(hdev->dev, 2100 "failed to allocate completion queues\n"); 2101 rc = -ENOMEM; 2102 goto hw_queues_destroy; 2103 } 2104 } 2105 2106 for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) { 2107 rc = hl_cq_init(hdev, &hdev->completion_queue[i], 2108 hdev->asic_funcs->get_queue_id_for_cq(hdev, i)); 2109 if (rc) { 2110 dev_err(hdev->dev, 2111 "failed to initialize completion queue\n"); 2112 goto cq_fini; 2113 } 2114 hdev->completion_queue[i].cq_idx = i; 2115 } 2116 2117 hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs, 2118 sizeof(struct hl_cs *), GFP_KERNEL); 2119 if (!hdev->shadow_cs_queue) { 2120 rc = -ENOMEM; 2121 goto cq_fini; 2122 } 2123 2124 /* 2125 * Initialize the event queue. Must be done before hw_init, 2126 * because there the address of the event queue is being 2127 * passed as argument to request_irq 2128 */ 2129 rc = hl_eq_init(hdev, &hdev->event_queue); 2130 if (rc) { 2131 dev_err(hdev->dev, "failed to initialize event queue\n"); 2132 goto free_shadow_cs_queue; 2133 } 2134 2135 /* MMU S/W must be initialized before kernel context is created */ 2136 rc = hl_mmu_init(hdev); 2137 if (rc) { 2138 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n"); 2139 goto eq_fini; 2140 } 2141 2142 /* Allocate the kernel context */ 2143 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); 2144 if (!hdev->kernel_ctx) { 2145 rc = -ENOMEM; 2146 goto mmu_fini; 2147 } 2148 2149 hdev->is_compute_ctx_active = false; 2150 2151 hdev->asic_funcs->state_dump_init(hdev); 2152 2153 hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC; 2154 2155 hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL; 2156 hl_debugfs_add_device(hdev); 2157 2158 /* debugfs nodes are created in hl_ctx_init so it must be called after 2159 * hl_debugfs_add_device. 2160 */ 2161 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); 2162 if (rc) { 2163 dev_err(hdev->dev, "failed to initialize kernel context\n"); 2164 kfree(hdev->kernel_ctx); 2165 goto remove_device_from_debugfs; 2166 } 2167 2168 rc = hl_cb_pool_init(hdev); 2169 if (rc) { 2170 dev_err(hdev->dev, "failed to initialize CB pool\n"); 2171 goto release_ctx; 2172 } 2173 2174 rc = hl_dec_init(hdev); 2175 if (rc) { 2176 dev_err(hdev->dev, "Failed to initialize the decoder module\n"); 2177 goto cb_pool_fini; 2178 } 2179 2180 /* 2181 * From this point, override rc (=0) in case of an error to allow 2182 * debugging (by adding char devices and create sysfs nodes as part of 2183 * the error flow). 2184 */ 2185 add_cdev_sysfs_on_err = true; 2186 2187 /* Device is now enabled as part of the initialization requires 2188 * communication with the device firmware to get information that 2189 * is required for the initialization itself 2190 */ 2191 hdev->disabled = false; 2192 2193 rc = hdev->asic_funcs->hw_init(hdev); 2194 if (rc) { 2195 dev_err(hdev->dev, "failed to initialize the H/W\n"); 2196 rc = 0; 2197 goto out_disabled; 2198 } 2199 2200 /* Check that the communication with the device is working */ 2201 rc = hdev->asic_funcs->test_queues(hdev); 2202 if (rc) { 2203 dev_err(hdev->dev, "Failed to detect if device is alive\n"); 2204 rc = 0; 2205 goto out_disabled; 2206 } 2207 2208 rc = device_late_init(hdev); 2209 if (rc) { 2210 dev_err(hdev->dev, "Failed late initialization\n"); 2211 rc = 0; 2212 goto out_disabled; 2213 } 2214 2215 dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n", 2216 hdev->asic_name, 2217 hdev->asic_prop.dram_size / SZ_1G); 2218 2219 rc = hl_vm_init(hdev); 2220 if (rc) { 2221 dev_err(hdev->dev, "Failed to initialize memory module\n"); 2222 rc = 0; 2223 goto out_disabled; 2224 } 2225 2226 /* 2227 * Expose devices and sysfs nodes to user. 2228 * From here there is no need to add char devices and create sysfs nodes 2229 * in case of an error. 2230 */ 2231 add_cdev_sysfs_on_err = false; 2232 rc = device_cdev_sysfs_add(hdev); 2233 if (rc) { 2234 dev_err(hdev->dev, 2235 "Failed to add char devices and sysfs nodes\n"); 2236 rc = 0; 2237 goto out_disabled; 2238 } 2239 2240 /* Need to call this again because the max power might change, 2241 * depending on card type for certain ASICs 2242 */ 2243 if (hdev->asic_prop.set_max_power_on_device_init && 2244 !hdev->asic_prop.fw_security_enabled) 2245 hl_fw_set_max_power(hdev); 2246 2247 /* 2248 * hl_hwmon_init() must be called after device_late_init(), because only 2249 * there we get the information from the device about which 2250 * hwmon-related sensors the device supports. 2251 * Furthermore, it must be done after adding the device to the system. 2252 */ 2253 rc = hl_hwmon_init(hdev); 2254 if (rc) { 2255 dev_err(hdev->dev, "Failed to initialize hwmon\n"); 2256 rc = 0; 2257 goto out_disabled; 2258 } 2259 2260 dev_notice(hdev->dev, 2261 "Successfully added device %s to habanalabs driver\n", 2262 dev_name(&(hdev)->pdev->dev)); 2263 2264 hdev->init_done = true; 2265 2266 /* After initialization is done, we are ready to receive events from 2267 * the F/W. We can't do it before because we will ignore events and if 2268 * those events are fatal, we won't know about it and the device will 2269 * be operational although it shouldn't be 2270 */ 2271 hdev->asic_funcs->enable_events_from_fw(hdev); 2272 2273 return 0; 2274 2275 cb_pool_fini: 2276 hl_cb_pool_fini(hdev); 2277 release_ctx: 2278 if (hl_ctx_put(hdev->kernel_ctx) != 1) 2279 dev_err(hdev->dev, 2280 "kernel ctx is still alive on initialization failure\n"); 2281 remove_device_from_debugfs: 2282 hl_debugfs_remove_device(hdev); 2283 mmu_fini: 2284 hl_mmu_fini(hdev); 2285 eq_fini: 2286 hl_eq_fini(hdev, &hdev->event_queue); 2287 free_shadow_cs_queue: 2288 kfree(hdev->shadow_cs_queue); 2289 cq_fini: 2290 for (i = 0 ; i < cq_ready_cnt ; i++) 2291 hl_cq_fini(hdev, &hdev->completion_queue[i]); 2292 kfree(hdev->completion_queue); 2293 hw_queues_destroy: 2294 hl_hw_queues_destroy(hdev); 2295 sw_fini: 2296 hdev->asic_funcs->sw_fini(hdev); 2297 free_usr_intr_mem: 2298 kfree(hdev->user_interrupt); 2299 early_fini: 2300 device_early_fini(hdev); 2301 free_dev: 2302 put_device(hdev->dev_ctrl); 2303 put_device(hdev->dev); 2304 out_disabled: 2305 hdev->disabled = true; 2306 if (add_cdev_sysfs_on_err) 2307 device_cdev_sysfs_add(hdev); 2308 if (hdev->pdev) 2309 dev_err(&hdev->pdev->dev, 2310 "Failed to initialize hl%d. Device %s is NOT usable !\n", 2311 hdev->cdev_idx, dev_name(&(hdev)->pdev->dev)); 2312 else 2313 pr_err("Failed to initialize hl%d. Device %s is NOT usable !\n", 2314 hdev->cdev_idx, dev_name(&(hdev)->pdev->dev)); 2315 2316 return rc; 2317 } 2318 2319 /* 2320 * hl_device_fini - main tear-down function for habanalabs device 2321 * 2322 * @hdev: pointer to habanalabs device structure 2323 * 2324 * Destroy the device, call ASIC fini functions and release the id 2325 */ 2326 void hl_device_fini(struct hl_device *hdev) 2327 { 2328 bool device_in_reset; 2329 ktime_t timeout; 2330 u64 reset_sec; 2331 int i, rc; 2332 2333 dev_info(hdev->dev, "Removing device\n"); 2334 2335 hdev->device_fini_pending = 1; 2336 flush_delayed_work(&hdev->device_reset_work.reset_work); 2337 2338 if (hdev->pldm) 2339 reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT; 2340 else 2341 reset_sec = HL_HARD_RESET_MAX_TIMEOUT; 2342 2343 /* 2344 * This function is competing with the reset function, so try to 2345 * take the reset atomic and if we are already in middle of reset, 2346 * wait until reset function is finished. Reset function is designed 2347 * to always finish. However, in Gaudi, because of all the network 2348 * ports, the hard reset could take between 10-30 seconds 2349 */ 2350 2351 timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000); 2352 2353 spin_lock(&hdev->reset_info.lock); 2354 device_in_reset = !!hdev->reset_info.in_reset; 2355 if (!device_in_reset) 2356 hdev->reset_info.in_reset = 1; 2357 spin_unlock(&hdev->reset_info.lock); 2358 2359 while (device_in_reset) { 2360 usleep_range(50, 200); 2361 2362 spin_lock(&hdev->reset_info.lock); 2363 device_in_reset = !!hdev->reset_info.in_reset; 2364 if (!device_in_reset) 2365 hdev->reset_info.in_reset = 1; 2366 spin_unlock(&hdev->reset_info.lock); 2367 2368 if (ktime_compare(ktime_get(), timeout) > 0) { 2369 dev_crit(hdev->dev, 2370 "%s Failed to remove device because reset function did not finish\n", 2371 dev_name(&(hdev)->pdev->dev)); 2372 return; 2373 } 2374 } 2375 2376 cancel_delayed_work_sync(&hdev->device_release_watchdog_work.reset_work); 2377 2378 /* Disable PCI access from device F/W so it won't send us additional 2379 * interrupts. We disable MSI/MSI-X at the halt_engines function and we 2380 * can't have the F/W sending us interrupts after that. We need to 2381 * disable the access here because if the device is marked disable, the 2382 * message won't be send. Also, in case of heartbeat, the device CPU is 2383 * marked as disable so this message won't be sent 2384 */ 2385 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 2386 2387 /* Mark device as disabled */ 2388 hdev->disabled = true; 2389 2390 take_release_locks(hdev); 2391 2392 hdev->reset_info.hard_reset_pending = true; 2393 2394 hl_hwmon_fini(hdev); 2395 2396 cleanup_resources(hdev, true, false, false); 2397 2398 /* Kill processes here after CS rollback. This is because the process 2399 * can't really exit until all its CSs are done, which is what we 2400 * do in cs rollback 2401 */ 2402 dev_info(hdev->dev, 2403 "Waiting for all processes to exit (timeout of %u seconds)", 2404 HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI); 2405 2406 hdev->process_kill_trial_cnt = 0; 2407 rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false); 2408 if (rc) { 2409 dev_crit(hdev->dev, "Failed to kill all open processes\n"); 2410 device_disable_open_processes(hdev, false); 2411 } 2412 2413 hdev->process_kill_trial_cnt = 0; 2414 rc = device_kill_open_processes(hdev, 0, true); 2415 if (rc) { 2416 dev_crit(hdev->dev, "Failed to kill all control device open processes\n"); 2417 device_disable_open_processes(hdev, true); 2418 } 2419 2420 hl_cb_pool_fini(hdev); 2421 2422 /* Reset the H/W. It will be in idle state after this returns */ 2423 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 2424 if (rc) 2425 dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc); 2426 2427 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 2428 2429 /* Release kernel context */ 2430 if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) 2431 dev_err(hdev->dev, "kernel ctx is still alive\n"); 2432 2433 hl_debugfs_remove_device(hdev); 2434 2435 hl_dec_fini(hdev); 2436 2437 hl_vm_fini(hdev); 2438 2439 hl_mmu_fini(hdev); 2440 2441 vfree(hdev->captured_err_info.page_fault_info.user_mappings); 2442 2443 hl_eq_fini(hdev, &hdev->event_queue); 2444 2445 kfree(hdev->shadow_cs_queue); 2446 2447 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 2448 hl_cq_fini(hdev, &hdev->completion_queue[i]); 2449 kfree(hdev->completion_queue); 2450 kfree(hdev->user_interrupt); 2451 2452 hl_hw_queues_destroy(hdev); 2453 2454 /* Call ASIC S/W finalize function */ 2455 hdev->asic_funcs->sw_fini(hdev); 2456 2457 device_early_fini(hdev); 2458 2459 /* Hide devices and sysfs nodes from user */ 2460 device_cdev_sysfs_del(hdev); 2461 2462 pr_info("removed device successfully\n"); 2463 } 2464 2465 /* 2466 * MMIO register access helper functions. 2467 */ 2468 2469 /* 2470 * hl_rreg - Read an MMIO register 2471 * 2472 * @hdev: pointer to habanalabs device structure 2473 * @reg: MMIO register offset (in bytes) 2474 * 2475 * Returns the value of the MMIO register we are asked to read 2476 * 2477 */ 2478 inline u32 hl_rreg(struct hl_device *hdev, u32 reg) 2479 { 2480 u32 val = readl(hdev->rmmio + reg); 2481 2482 if (unlikely(trace_habanalabs_rreg32_enabled())) 2483 trace_habanalabs_rreg32(hdev->dev, reg, val); 2484 2485 return val; 2486 } 2487 2488 /* 2489 * hl_wreg - Write to an MMIO register 2490 * 2491 * @hdev: pointer to habanalabs device structure 2492 * @reg: MMIO register offset (in bytes) 2493 * @val: 32-bit value 2494 * 2495 * Writes the 32-bit value into the MMIO register 2496 * 2497 */ 2498 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) 2499 { 2500 if (unlikely(trace_habanalabs_wreg32_enabled())) 2501 trace_habanalabs_wreg32(hdev->dev, reg, val); 2502 2503 writel(val, hdev->rmmio + reg); 2504 } 2505 2506 void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, 2507 u8 flags) 2508 { 2509 struct razwi_info *razwi_info = &hdev->captured_err_info.razwi_info; 2510 2511 if (num_of_engines > HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR) { 2512 dev_err(hdev->dev, 2513 "Number of possible razwi initiators (%u) exceeded limit (%u)\n", 2514 num_of_engines, HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR); 2515 return; 2516 } 2517 2518 /* In case it's the first razwi since the device was opened, capture its parameters */ 2519 if (atomic_cmpxchg(&hdev->captured_err_info.razwi_info.razwi_detected, 0, 1)) 2520 return; 2521 2522 razwi_info->razwi.timestamp = ktime_to_ns(ktime_get()); 2523 razwi_info->razwi.addr = addr; 2524 razwi_info->razwi.num_of_possible_engines = num_of_engines; 2525 memcpy(&razwi_info->razwi.engine_id[0], &engine_id[0], 2526 num_of_engines * sizeof(u16)); 2527 razwi_info->razwi.flags = flags; 2528 2529 razwi_info->razwi_info_available = true; 2530 } 2531 2532 void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, 2533 u8 flags, u64 *event_mask) 2534 { 2535 hl_capture_razwi(hdev, addr, engine_id, num_of_engines, flags); 2536 2537 if (event_mask) 2538 *event_mask |= HL_NOTIFIER_EVENT_RAZWI; 2539 } 2540 2541 static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu) 2542 { 2543 struct page_fault_info *pgf_info = &hdev->captured_err_info.page_fault_info; 2544 struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; 2545 struct hl_vm_hash_node *hnode; 2546 struct hl_userptr *userptr; 2547 enum vm_type *vm_type; 2548 struct hl_ctx *ctx; 2549 u32 map_idx = 0; 2550 int i; 2551 2552 /* Reset previous session count*/ 2553 pgf_info->num_of_user_mappings = 0; 2554 2555 ctx = hl_get_compute_ctx(hdev); 2556 if (!ctx) { 2557 dev_err(hdev->dev, "Can't get user context for user mappings\n"); 2558 return; 2559 } 2560 2561 mutex_lock(&ctx->mem_hash_lock); 2562 hash_for_each(ctx->mem_hash, i, hnode, node) { 2563 vm_type = hnode->ptr; 2564 if (((*vm_type == VM_TYPE_USERPTR) && is_pmmu) || 2565 ((*vm_type == VM_TYPE_PHYS_PACK) && !is_pmmu)) 2566 pgf_info->num_of_user_mappings++; 2567 2568 } 2569 2570 if (!pgf_info->num_of_user_mappings) 2571 goto finish; 2572 2573 /* In case we already allocated in previous session, need to release it before 2574 * allocating new buffer. 2575 */ 2576 vfree(pgf_info->user_mappings); 2577 pgf_info->user_mappings = 2578 vzalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping)); 2579 if (!pgf_info->user_mappings) { 2580 pgf_info->num_of_user_mappings = 0; 2581 goto finish; 2582 } 2583 2584 hash_for_each(ctx->mem_hash, i, hnode, node) { 2585 vm_type = hnode->ptr; 2586 if ((*vm_type == VM_TYPE_USERPTR) && (is_pmmu)) { 2587 userptr = hnode->ptr; 2588 pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; 2589 pgf_info->user_mappings[map_idx].size = userptr->size; 2590 map_idx++; 2591 } else if ((*vm_type == VM_TYPE_PHYS_PACK) && (!is_pmmu)) { 2592 phys_pg_pack = hnode->ptr; 2593 pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; 2594 pgf_info->user_mappings[map_idx].size = phys_pg_pack->total_size; 2595 map_idx++; 2596 } 2597 } 2598 finish: 2599 mutex_unlock(&ctx->mem_hash_lock); 2600 hl_ctx_put(ctx); 2601 } 2602 2603 void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu) 2604 { 2605 struct page_fault_info *pgf_info = &hdev->captured_err_info.page_fault_info; 2606 2607 /* Capture only the first page fault */ 2608 if (atomic_cmpxchg(&pgf_info->page_fault_detected, 0, 1)) 2609 return; 2610 2611 pgf_info->page_fault.timestamp = ktime_to_ns(ktime_get()); 2612 pgf_info->page_fault.addr = addr; 2613 pgf_info->page_fault.engine_id = eng_id; 2614 hl_capture_user_mappings(hdev, is_pmmu); 2615 2616 pgf_info->page_fault_info_available = true; 2617 } 2618 2619 void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu, 2620 u64 *event_mask) 2621 { 2622 hl_capture_page_fault(hdev, addr, eng_id, is_pmmu); 2623 2624 if (event_mask) 2625 *event_mask |= HL_NOTIFIER_EVENT_PAGE_FAULT; 2626 } 2627 2628 static void hl_capture_hw_err(struct hl_device *hdev, u16 event_id) 2629 { 2630 struct hw_err_info *info = &hdev->captured_err_info.hw_err; 2631 2632 /* Capture only the first HW err */ 2633 if (atomic_cmpxchg(&info->event_detected, 0, 1)) 2634 return; 2635 2636 info->event.timestamp = ktime_to_ns(ktime_get()); 2637 info->event.event_id = event_id; 2638 2639 info->event_info_available = true; 2640 } 2641 2642 void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask) 2643 { 2644 hl_capture_hw_err(hdev, event_id); 2645 2646 if (event_mask) 2647 *event_mask |= HL_NOTIFIER_EVENT_CRITICL_HW_ERR; 2648 } 2649 2650 static void hl_capture_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *fw_info) 2651 { 2652 struct fw_err_info *info = &hdev->captured_err_info.fw_err; 2653 2654 /* Capture only the first FW error */ 2655 if (atomic_cmpxchg(&info->event_detected, 0, 1)) 2656 return; 2657 2658 info->event.timestamp = ktime_to_ns(ktime_get()); 2659 info->event.err_type = fw_info->err_type; 2660 if (fw_info->err_type == HL_INFO_FW_REPORTED_ERR) 2661 info->event.event_id = fw_info->event_id; 2662 2663 info->event_info_available = true; 2664 } 2665 2666 void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info) 2667 { 2668 hl_capture_fw_err(hdev, info); 2669 2670 if (info->event_mask) 2671 *info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR; 2672 } 2673