1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2021 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include <uapi/drm/habanalabs_accel.h> 9 #include "habanalabs.h" 10 11 #include <linux/uaccess.h> 12 #include <linux/slab.h> 13 14 #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ 15 HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ 16 HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \ 17 HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) 18 19 20 #define MAX_TS_ITER_NUM 10 21 22 /** 23 * enum hl_cs_wait_status - cs wait status 24 * @CS_WAIT_STATUS_BUSY: cs was not completed yet 25 * @CS_WAIT_STATUS_COMPLETED: cs completed 26 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone 27 */ 28 enum hl_cs_wait_status { 29 CS_WAIT_STATUS_BUSY, 30 CS_WAIT_STATUS_COMPLETED, 31 CS_WAIT_STATUS_GONE 32 }; 33 34 static void job_wq_completion(struct work_struct *work); 35 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, 36 enum hl_cs_wait_status *status, s64 *timestamp); 37 static void cs_do_release(struct kref *ref); 38 39 static void hl_push_cs_outcome(struct hl_device *hdev, 40 struct hl_cs_outcome_store *outcome_store, 41 u64 seq, ktime_t ts, int error) 42 { 43 struct hl_cs_outcome *node; 44 unsigned long flags; 45 46 /* 47 * CS outcome store supports the following operations: 48 * push outcome - store a recent CS outcome in the store 49 * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store 50 * It uses 2 lists: used list and free list. 51 * It has a pre-allocated amount of nodes, each node stores 52 * a single CS outcome. 53 * Initially, all the nodes are in the free list. 54 * On push outcome, a node (any) is taken from the free list, its 55 * information is filled in, and the node is moved to the used list. 56 * It is possible, that there are no nodes left in the free list. 57 * In this case, we will lose some information about old outcomes. We 58 * will pop the OLDEST node from the used list, and make it free. 59 * On pop, the node is searched for in the used list (using a search 60 * index). 61 * If found, the node is then removed from the used list, and moved 62 * back to the free list. The outcome data that the node contained is 63 * returned back to the user. 64 */ 65 66 spin_lock_irqsave(&outcome_store->db_lock, flags); 67 68 if (list_empty(&outcome_store->free_list)) { 69 node = list_last_entry(&outcome_store->used_list, 70 struct hl_cs_outcome, list_link); 71 hash_del(&node->map_link); 72 dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq); 73 } else { 74 node = list_last_entry(&outcome_store->free_list, 75 struct hl_cs_outcome, list_link); 76 } 77 78 list_del_init(&node->list_link); 79 80 node->seq = seq; 81 node->ts = ts; 82 node->error = error; 83 84 list_add(&node->list_link, &outcome_store->used_list); 85 hash_add(outcome_store->outcome_map, &node->map_link, node->seq); 86 87 spin_unlock_irqrestore(&outcome_store->db_lock, flags); 88 } 89 90 static bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store, 91 u64 seq, ktime_t *ts, int *error) 92 { 93 struct hl_cs_outcome *node; 94 unsigned long flags; 95 96 spin_lock_irqsave(&outcome_store->db_lock, flags); 97 98 hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq) 99 if (node->seq == seq) { 100 *ts = node->ts; 101 *error = node->error; 102 103 hash_del(&node->map_link); 104 list_del_init(&node->list_link); 105 list_add(&node->list_link, &outcome_store->free_list); 106 107 spin_unlock_irqrestore(&outcome_store->db_lock, flags); 108 109 return true; 110 } 111 112 spin_unlock_irqrestore(&outcome_store->db_lock, flags); 113 114 return false; 115 } 116 117 static void hl_sob_reset(struct kref *ref) 118 { 119 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, 120 kref); 121 struct hl_device *hdev = hw_sob->hdev; 122 123 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id); 124 125 hdev->asic_funcs->reset_sob(hdev, hw_sob); 126 127 hw_sob->need_reset = false; 128 } 129 130 void hl_sob_reset_error(struct kref *ref) 131 { 132 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, 133 kref); 134 struct hl_device *hdev = hw_sob->hdev; 135 136 dev_crit(hdev->dev, 137 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n", 138 hw_sob->q_idx, hw_sob->sob_id); 139 } 140 141 void hw_sob_put(struct hl_hw_sob *hw_sob) 142 { 143 if (hw_sob) 144 kref_put(&hw_sob->kref, hl_sob_reset); 145 } 146 147 static void hw_sob_put_err(struct hl_hw_sob *hw_sob) 148 { 149 if (hw_sob) 150 kref_put(&hw_sob->kref, hl_sob_reset_error); 151 } 152 153 void hw_sob_get(struct hl_hw_sob *hw_sob) 154 { 155 if (hw_sob) 156 kref_get(&hw_sob->kref); 157 } 158 159 /** 160 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet 161 * @sob_base: sob base id 162 * @sob_mask: sob user mask, each bit represents a sob offset from sob base 163 * @mask: generated mask 164 * 165 * Return: 0 if given parameters are valid 166 */ 167 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) 168 { 169 int i; 170 171 if (sob_mask == 0) 172 return -EINVAL; 173 174 if (sob_mask == 0x1) { 175 *mask = ~(1 << (sob_base & 0x7)); 176 } else { 177 /* find msb in order to verify sob range is valid */ 178 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--) 179 if (BIT(i) & sob_mask) 180 break; 181 182 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1)) 183 return -EINVAL; 184 185 *mask = ~sob_mask; 186 } 187 188 return 0; 189 } 190 191 static void hl_fence_release(struct kref *kref) 192 { 193 struct hl_fence *fence = 194 container_of(kref, struct hl_fence, refcount); 195 struct hl_cs_compl *hl_cs_cmpl = 196 container_of(fence, struct hl_cs_compl, base_fence); 197 198 kfree(hl_cs_cmpl); 199 } 200 201 void hl_fence_put(struct hl_fence *fence) 202 { 203 if (IS_ERR_OR_NULL(fence)) 204 return; 205 kref_put(&fence->refcount, hl_fence_release); 206 } 207 208 void hl_fences_put(struct hl_fence **fence, int len) 209 { 210 int i; 211 212 for (i = 0; i < len; i++, fence++) 213 hl_fence_put(*fence); 214 } 215 216 void hl_fence_get(struct hl_fence *fence) 217 { 218 if (fence) 219 kref_get(&fence->refcount); 220 } 221 222 static void hl_fence_init(struct hl_fence *fence, u64 sequence) 223 { 224 kref_init(&fence->refcount); 225 fence->cs_sequence = sequence; 226 fence->error = 0; 227 fence->timestamp = ktime_set(0, 0); 228 fence->mcs_handling_done = false; 229 init_completion(&fence->completion); 230 } 231 232 void cs_get(struct hl_cs *cs) 233 { 234 kref_get(&cs->refcount); 235 } 236 237 static int cs_get_unless_zero(struct hl_cs *cs) 238 { 239 return kref_get_unless_zero(&cs->refcount); 240 } 241 242 static void cs_put(struct hl_cs *cs) 243 { 244 kref_put(&cs->refcount, cs_do_release); 245 } 246 247 static void cs_job_do_release(struct kref *ref) 248 { 249 struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount); 250 251 kfree(job); 252 } 253 254 static void hl_cs_job_put(struct hl_cs_job *job) 255 { 256 kref_put(&job->refcount, cs_job_do_release); 257 } 258 259 bool cs_needs_completion(struct hl_cs *cs) 260 { 261 /* In case this is a staged CS, only the last CS in sequence should 262 * get a completion, any non staged CS will always get a completion 263 */ 264 if (cs->staged_cs && !cs->staged_last) 265 return false; 266 267 return true; 268 } 269 270 bool cs_needs_timeout(struct hl_cs *cs) 271 { 272 /* In case this is a staged CS, only the first CS in sequence should 273 * get a timeout, any non staged CS will always get a timeout 274 */ 275 if (cs->staged_cs && !cs->staged_first) 276 return false; 277 278 return true; 279 } 280 281 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) 282 { 283 /* 284 * Patched CB is created for external queues jobs, and for H/W queues 285 * jobs if the user CB was allocated by driver and MMU is disabled. 286 */ 287 return (job->queue_type == QUEUE_TYPE_EXT || 288 (job->queue_type == QUEUE_TYPE_HW && 289 job->is_kernel_allocated_cb && 290 !hdev->mmu_enable)); 291 } 292 293 /* 294 * cs_parser - parse the user command submission 295 * 296 * @hpriv : pointer to the private data of the fd 297 * @job : pointer to the job that holds the command submission info 298 * 299 * The function parses the command submission of the user. It calls the 300 * ASIC specific parser, which returns a list of memory blocks to send 301 * to the device as different command buffers 302 * 303 */ 304 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) 305 { 306 struct hl_device *hdev = hpriv->hdev; 307 struct hl_cs_parser parser; 308 int rc; 309 310 parser.ctx_id = job->cs->ctx->asid; 311 parser.cs_sequence = job->cs->sequence; 312 parser.job_id = job->id; 313 314 parser.hw_queue_id = job->hw_queue_id; 315 parser.job_userptr_list = &job->userptr_list; 316 parser.patched_cb = NULL; 317 parser.user_cb = job->user_cb; 318 parser.user_cb_size = job->user_cb_size; 319 parser.queue_type = job->queue_type; 320 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; 321 job->patched_cb = NULL; 322 parser.completion = cs_needs_completion(job->cs); 323 324 rc = hdev->asic_funcs->cs_parser(hdev, &parser); 325 326 if (is_cb_patched(hdev, job)) { 327 if (!rc) { 328 job->patched_cb = parser.patched_cb; 329 job->job_cb_size = parser.patched_cb_size; 330 job->contains_dma_pkt = parser.contains_dma_pkt; 331 atomic_inc(&job->patched_cb->cs_cnt); 332 } 333 334 /* 335 * Whether the parsing worked or not, we don't need the 336 * original CB anymore because it was already parsed and 337 * won't be accessed again for this CS 338 */ 339 atomic_dec(&job->user_cb->cs_cnt); 340 hl_cb_put(job->user_cb); 341 job->user_cb = NULL; 342 } else if (!rc) { 343 job->job_cb_size = job->user_cb_size; 344 } 345 346 return rc; 347 } 348 349 static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job) 350 { 351 struct hl_cs *cs = job->cs; 352 353 if (is_cb_patched(hdev, job)) { 354 hl_userptr_delete_list(hdev, &job->userptr_list); 355 356 /* 357 * We might arrive here from rollback and patched CB wasn't 358 * created, so we need to check it's not NULL 359 */ 360 if (job->patched_cb) { 361 atomic_dec(&job->patched_cb->cs_cnt); 362 hl_cb_put(job->patched_cb); 363 } 364 } 365 366 /* For H/W queue jobs, if a user CB was allocated by driver and MMU is 367 * enabled, the user CB isn't released in cs_parser() and thus should be 368 * released here. This is also true for INT queues jobs which were 369 * allocated by driver. 370 */ 371 if ((job->is_kernel_allocated_cb && 372 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) || 373 job->queue_type == QUEUE_TYPE_INT))) { 374 atomic_dec(&job->user_cb->cs_cnt); 375 hl_cb_put(job->user_cb); 376 } 377 378 /* 379 * This is the only place where there can be multiple threads 380 * modifying the list at the same time 381 */ 382 spin_lock(&cs->job_lock); 383 list_del(&job->cs_node); 384 spin_unlock(&cs->job_lock); 385 386 hl_debugfs_remove_job(hdev, job); 387 388 /* We decrement reference only for a CS that gets completion 389 * because the reference was incremented only for this kind of CS 390 * right before it was scheduled. 391 * 392 * In staged submission, only the last CS marked as 'staged_last' 393 * gets completion, hence its release function will be called from here. 394 * As for all the rest CS's in the staged submission which do not get 395 * completion, their CS reference will be decremented by the 396 * 'staged_last' CS during the CS release flow. 397 * All relevant PQ CI counters will be incremented during the CS release 398 * flow by calling 'hl_hw_queue_update_ci'. 399 */ 400 if (cs_needs_completion(cs) && 401 (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) 402 cs_put(cs); 403 404 hl_cs_job_put(job); 405 } 406 407 /* 408 * hl_staged_cs_find_first - locate the first CS in this staged submission 409 * 410 * @hdev: pointer to device structure 411 * @cs_seq: staged submission sequence number 412 * 413 * @note: This function must be called under 'hdev->cs_mirror_lock' 414 * 415 * Find and return a CS pointer with the given sequence 416 */ 417 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq) 418 { 419 struct hl_cs *cs; 420 421 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node) 422 if (cs->staged_cs && cs->staged_first && 423 cs->sequence == cs_seq) 424 return cs; 425 426 return NULL; 427 } 428 429 /* 430 * is_staged_cs_last_exists - returns true if the last CS in sequence exists 431 * 432 * @hdev: pointer to device structure 433 * @cs: staged submission member 434 * 435 */ 436 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs) 437 { 438 struct hl_cs *last_entry; 439 440 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs, 441 staged_cs_node); 442 443 if (last_entry->staged_last) 444 return true; 445 446 return false; 447 } 448 449 /* 450 * staged_cs_get - get CS reference if this CS is a part of a staged CS 451 * 452 * @hdev: pointer to device structure 453 * @cs: current CS 454 * @cs_seq: staged submission sequence number 455 * 456 * Increment CS reference for every CS in this staged submission except for 457 * the CS which get completion. 458 */ 459 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs) 460 { 461 /* Only the last CS in this staged submission will get a completion. 462 * We must increment the reference for all other CS's in this 463 * staged submission. 464 * Once we get a completion we will release the whole staged submission. 465 */ 466 if (!cs->staged_last) 467 cs_get(cs); 468 } 469 470 /* 471 * staged_cs_put - put a CS in case it is part of staged submission 472 * 473 * @hdev: pointer to device structure 474 * @cs: CS to put 475 * 476 * This function decrements a CS reference (for a non completion CS) 477 */ 478 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) 479 { 480 /* We release all CS's in a staged submission except the last 481 * CS which we have never incremented its reference. 482 */ 483 if (!cs_needs_completion(cs)) 484 cs_put(cs); 485 } 486 487 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) 488 { 489 struct hl_cs *next = NULL, *iter, *first_cs; 490 491 if (!cs_needs_timeout(cs)) 492 return; 493 494 spin_lock(&hdev->cs_mirror_lock); 495 496 /* We need to handle tdr only once for the complete staged submission. 497 * Hence, we choose the CS that reaches this function first which is 498 * the CS marked as 'staged_last'. 499 * In case single staged cs was submitted which has both first and last 500 * indications, then "cs_find_first" below will return NULL, since we 501 * removed the cs node from the list before getting here, 502 * in such cases just continue with the cs to cancel it's TDR work. 503 */ 504 if (cs->staged_cs && cs->staged_last) { 505 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); 506 if (first_cs) 507 cs = first_cs; 508 } 509 510 spin_unlock(&hdev->cs_mirror_lock); 511 512 /* Don't cancel TDR in case this CS was timedout because we might be 513 * running from the TDR context 514 */ 515 if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT) 516 return; 517 518 if (cs->tdr_active) 519 cancel_delayed_work_sync(&cs->work_tdr); 520 521 spin_lock(&hdev->cs_mirror_lock); 522 523 /* queue TDR for next CS */ 524 list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node) 525 if (cs_needs_timeout(iter)) { 526 next = iter; 527 break; 528 } 529 530 if (next && !next->tdr_active) { 531 next->tdr_active = true; 532 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); 533 } 534 535 spin_unlock(&hdev->cs_mirror_lock); 536 } 537 538 /* 539 * force_complete_multi_cs - complete all contexts that wait on multi-CS 540 * 541 * @hdev: pointer to habanalabs device structure 542 */ 543 static void force_complete_multi_cs(struct hl_device *hdev) 544 { 545 int i; 546 547 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 548 struct multi_cs_completion *mcs_compl; 549 550 mcs_compl = &hdev->multi_cs_completion[i]; 551 552 spin_lock(&mcs_compl->lock); 553 554 if (!mcs_compl->used) { 555 spin_unlock(&mcs_compl->lock); 556 continue; 557 } 558 559 /* when calling force complete no context should be waiting on 560 * multi-cS. 561 * We are calling the function as a protection for such case 562 * to free any pending context and print error message 563 */ 564 dev_err(hdev->dev, 565 "multi-CS completion context %d still waiting when calling force completion\n", 566 i); 567 complete_all(&mcs_compl->completion); 568 spin_unlock(&mcs_compl->lock); 569 } 570 } 571 572 /* 573 * complete_multi_cs - complete all waiting entities on multi-CS 574 * 575 * @hdev: pointer to habanalabs device structure 576 * @cs: CS structure 577 * The function signals a waiting entity that has an overlapping stream masters 578 * with the completed CS. 579 * For example: 580 * - a completed CS worked on stream master QID 4, multi CS completion 581 * is actively waiting on stream master QIDs 3, 5. don't send signal as no 582 * common stream master QID 583 * - a completed CS worked on stream master QID 4, multi CS completion 584 * is actively waiting on stream master QIDs 3, 4. send signal as stream 585 * master QID 4 is common 586 */ 587 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) 588 { 589 struct hl_fence *fence = cs->fence; 590 int i; 591 592 /* in case of multi CS check for completion only for the first CS */ 593 if (cs->staged_cs && !cs->staged_first) 594 return; 595 596 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 597 struct multi_cs_completion *mcs_compl; 598 599 mcs_compl = &hdev->multi_cs_completion[i]; 600 if (!mcs_compl->used) 601 continue; 602 603 spin_lock(&mcs_compl->lock); 604 605 /* 606 * complete if: 607 * 1. still waiting for completion 608 * 2. the completed CS has at least one overlapping stream 609 * master with the stream masters in the completion 610 */ 611 if (mcs_compl->used && 612 (fence->stream_master_qid_map & 613 mcs_compl->stream_master_qid_map)) { 614 /* extract the timestamp only of first completed CS */ 615 if (!mcs_compl->timestamp) 616 mcs_compl->timestamp = ktime_to_ns(fence->timestamp); 617 618 complete_all(&mcs_compl->completion); 619 620 /* 621 * Setting mcs_handling_done inside the lock ensures 622 * at least one fence have mcs_handling_done set to 623 * true before wait for mcs finish. This ensures at 624 * least one CS will be set as completed when polling 625 * mcs fences. 626 */ 627 fence->mcs_handling_done = true; 628 } 629 630 spin_unlock(&mcs_compl->lock); 631 } 632 /* In case CS completed without mcs completion initialized */ 633 fence->mcs_handling_done = true; 634 } 635 636 static inline void cs_release_sob_reset_handler(struct hl_device *hdev, 637 struct hl_cs *cs, 638 struct hl_cs_compl *hl_cs_cmpl) 639 { 640 /* Skip this handler if the cs wasn't submitted, to avoid putting 641 * the hw_sob twice, since this case already handled at this point, 642 * also skip if the hw_sob pointer wasn't set. 643 */ 644 if (!hl_cs_cmpl->hw_sob || !cs->submitted) 645 return; 646 647 spin_lock(&hl_cs_cmpl->lock); 648 649 /* 650 * we get refcount upon reservation of signals or signal/wait cs for the 651 * hw_sob object, and need to put it when the first staged cs 652 * (which cotains the encaps signals) or cs signal/wait is completed. 653 */ 654 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || 655 (hl_cs_cmpl->type == CS_TYPE_WAIT) || 656 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) || 657 (!!hl_cs_cmpl->encaps_signals)) { 658 dev_dbg(hdev->dev, 659 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n", 660 hl_cs_cmpl->cs_seq, 661 hl_cs_cmpl->type, 662 hl_cs_cmpl->hw_sob->sob_id, 663 hl_cs_cmpl->sob_val); 664 665 hw_sob_put(hl_cs_cmpl->hw_sob); 666 667 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) 668 hdev->asic_funcs->reset_sob_group(hdev, 669 hl_cs_cmpl->sob_group); 670 } 671 672 spin_unlock(&hl_cs_cmpl->lock); 673 } 674 675 static void cs_do_release(struct kref *ref) 676 { 677 struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); 678 struct hl_device *hdev = cs->ctx->hdev; 679 struct hl_cs_job *job, *tmp; 680 struct hl_cs_compl *hl_cs_cmpl = 681 container_of(cs->fence, struct hl_cs_compl, base_fence); 682 683 cs->completed = true; 684 685 /* 686 * Although if we reached here it means that all external jobs have 687 * finished, because each one of them took refcnt to CS, we still 688 * need to go over the internal jobs and complete them. Otherwise, we 689 * will have leaked memory and what's worse, the CS object (and 690 * potentially the CTX object) could be released, while the JOB 691 * still holds a pointer to them (but no reference). 692 */ 693 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 694 hl_complete_job(hdev, job); 695 696 if (!cs->submitted) { 697 /* 698 * In case the wait for signal CS was submitted, the fence put 699 * occurs in init_signal_wait_cs() or collective_wait_init_cs() 700 * right before hanging on the PQ. 701 */ 702 if (cs->type == CS_TYPE_WAIT || 703 cs->type == CS_TYPE_COLLECTIVE_WAIT) 704 hl_fence_put(cs->signal_fence); 705 706 goto out; 707 } 708 709 /* Need to update CI for all queue jobs that does not get completion */ 710 hl_hw_queue_update_ci(cs); 711 712 /* remove CS from CS mirror list */ 713 spin_lock(&hdev->cs_mirror_lock); 714 list_del_init(&cs->mirror_node); 715 spin_unlock(&hdev->cs_mirror_lock); 716 717 cs_handle_tdr(hdev, cs); 718 719 if (cs->staged_cs) { 720 /* the completion CS decrements reference for the entire 721 * staged submission 722 */ 723 if (cs->staged_last) { 724 struct hl_cs *staged_cs, *tmp_cs; 725 726 list_for_each_entry_safe(staged_cs, tmp_cs, 727 &cs->staged_cs_node, staged_cs_node) 728 staged_cs_put(hdev, staged_cs); 729 } 730 731 /* A staged CS will be a member in the list only after it 732 * was submitted. We used 'cs_mirror_lock' when inserting 733 * it to list so we will use it again when removing it 734 */ 735 if (cs->submitted) { 736 spin_lock(&hdev->cs_mirror_lock); 737 list_del(&cs->staged_cs_node); 738 spin_unlock(&hdev->cs_mirror_lock); 739 } 740 741 /* decrement refcount to handle when first staged cs 742 * with encaps signals is completed. 743 */ 744 if (hl_cs_cmpl->encaps_signals) 745 kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount, 746 hl_encaps_release_handle_and_put_ctx); 747 } 748 749 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals) 750 kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); 751 752 out: 753 /* Must be called before hl_ctx_put because inside we use ctx to get 754 * the device 755 */ 756 hl_debugfs_remove_cs(cs); 757 758 hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL; 759 760 /* We need to mark an error for not submitted because in that case 761 * the hl fence release flow is different. Mainly, we don't need 762 * to handle hw_sob for signal/wait 763 */ 764 if (cs->timedout) 765 cs->fence->error = -ETIMEDOUT; 766 else if (cs->aborted) 767 cs->fence->error = -EIO; 768 else if (!cs->submitted) 769 cs->fence->error = -EBUSY; 770 771 if (unlikely(cs->skip_reset_on_timeout)) { 772 dev_err(hdev->dev, 773 "Command submission %llu completed after %llu (s)\n", 774 cs->sequence, 775 div_u64(jiffies - cs->submission_time_jiffies, HZ)); 776 } 777 778 if (cs->timestamp) { 779 cs->fence->timestamp = ktime_get(); 780 hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence, 781 cs->fence->timestamp, cs->fence->error); 782 } 783 784 hl_ctx_put(cs->ctx); 785 786 complete_all(&cs->fence->completion); 787 complete_multi_cs(hdev, cs); 788 789 cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl); 790 791 hl_fence_put(cs->fence); 792 793 kfree(cs->jobs_in_queue_cnt); 794 kfree(cs); 795 } 796 797 static void cs_timedout(struct work_struct *work) 798 { 799 struct hl_device *hdev; 800 u64 event_mask = 0x0; 801 int rc; 802 struct hl_cs *cs = container_of(work, struct hl_cs, 803 work_tdr.work); 804 bool skip_reset_on_timeout = cs->skip_reset_on_timeout, device_reset = false; 805 806 rc = cs_get_unless_zero(cs); 807 if (!rc) 808 return; 809 810 if ((!cs->submitted) || (cs->completed)) { 811 cs_put(cs); 812 return; 813 } 814 815 hdev = cs->ctx->hdev; 816 817 if (likely(!skip_reset_on_timeout)) { 818 if (hdev->reset_on_lockup) 819 device_reset = true; 820 else 821 hdev->reset_info.needs_reset = true; 822 823 /* Mark the CS is timed out so we won't try to cancel its TDR */ 824 cs->timedout = true; 825 } 826 827 /* Save only the first CS timeout parameters */ 828 rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0); 829 if (rc) { 830 hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); 831 hdev->captured_err_info.cs_timeout.seq = cs->sequence; 832 event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT; 833 } 834 835 switch (cs->type) { 836 case CS_TYPE_SIGNAL: 837 dev_err(hdev->dev, 838 "Signal command submission %llu has not finished in time!\n", 839 cs->sequence); 840 break; 841 842 case CS_TYPE_WAIT: 843 dev_err(hdev->dev, 844 "Wait command submission %llu has not finished in time!\n", 845 cs->sequence); 846 break; 847 848 case CS_TYPE_COLLECTIVE_WAIT: 849 dev_err(hdev->dev, 850 "Collective Wait command submission %llu has not finished in time!\n", 851 cs->sequence); 852 break; 853 854 default: 855 dev_err(hdev->dev, 856 "Command submission %llu has not finished in time!\n", 857 cs->sequence); 858 break; 859 } 860 861 rc = hl_state_dump(hdev); 862 if (rc) 863 dev_err(hdev->dev, "Error during system state dump %d\n", rc); 864 865 cs_put(cs); 866 867 if (device_reset) { 868 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 869 hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask); 870 } else if (event_mask) { 871 hl_notifier_event_send_all(hdev, event_mask); 872 } 873 } 874 875 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, 876 enum hl_cs_type cs_type, u64 user_sequence, 877 struct hl_cs **cs_new, u32 flags, u32 timeout) 878 { 879 struct hl_cs_counters_atomic *cntr; 880 struct hl_fence *other = NULL; 881 struct hl_cs_compl *cs_cmpl; 882 struct hl_cs *cs; 883 int rc; 884 885 cntr = &hdev->aggregated_cs_counters; 886 887 cs = kzalloc(sizeof(*cs), GFP_ATOMIC); 888 if (!cs) 889 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 890 891 if (!cs) { 892 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 893 atomic64_inc(&cntr->out_of_mem_drop_cnt); 894 return -ENOMEM; 895 } 896 897 /* increment refcnt for context */ 898 hl_ctx_get(ctx); 899 900 cs->ctx = ctx; 901 cs->submitted = false; 902 cs->completed = false; 903 cs->type = cs_type; 904 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); 905 cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); 906 cs->timeout_jiffies = timeout; 907 cs->skip_reset_on_timeout = 908 hdev->reset_info.skip_reset_on_timeout || 909 !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT); 910 cs->submission_time_jiffies = jiffies; 911 INIT_LIST_HEAD(&cs->job_list); 912 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); 913 kref_init(&cs->refcount); 914 spin_lock_init(&cs->job_lock); 915 916 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC); 917 if (!cs_cmpl) 918 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL); 919 920 if (!cs_cmpl) { 921 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 922 atomic64_inc(&cntr->out_of_mem_drop_cnt); 923 rc = -ENOMEM; 924 goto free_cs; 925 } 926 927 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, 928 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); 929 if (!cs->jobs_in_queue_cnt) 930 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, 931 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); 932 933 if (!cs->jobs_in_queue_cnt) { 934 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 935 atomic64_inc(&cntr->out_of_mem_drop_cnt); 936 rc = -ENOMEM; 937 goto free_cs_cmpl; 938 } 939 940 cs_cmpl->hdev = hdev; 941 cs_cmpl->type = cs->type; 942 spin_lock_init(&cs_cmpl->lock); 943 cs->fence = &cs_cmpl->base_fence; 944 945 spin_lock(&ctx->cs_lock); 946 947 cs_cmpl->cs_seq = ctx->cs_sequence; 948 other = ctx->cs_pending[cs_cmpl->cs_seq & 949 (hdev->asic_prop.max_pending_cs - 1)]; 950 951 if (other && !completion_done(&other->completion)) { 952 /* If the following statement is true, it means we have reached 953 * a point in which only part of the staged submission was 954 * submitted and we don't have enough room in the 'cs_pending' 955 * array for the rest of the submission. 956 * This causes a deadlock because this CS will never be 957 * completed as it depends on future CS's for completion. 958 */ 959 if (other->cs_sequence == user_sequence) 960 dev_crit_ratelimited(hdev->dev, 961 "Staged CS %llu deadlock due to lack of resources", 962 user_sequence); 963 964 dev_dbg_ratelimited(hdev->dev, 965 "Rejecting CS because of too many in-flights CS\n"); 966 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt); 967 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt); 968 rc = -EAGAIN; 969 goto free_fence; 970 } 971 972 /* init hl_fence */ 973 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); 974 975 cs->sequence = cs_cmpl->cs_seq; 976 977 ctx->cs_pending[cs_cmpl->cs_seq & 978 (hdev->asic_prop.max_pending_cs - 1)] = 979 &cs_cmpl->base_fence; 980 ctx->cs_sequence++; 981 982 hl_fence_get(&cs_cmpl->base_fence); 983 984 hl_fence_put(other); 985 986 spin_unlock(&ctx->cs_lock); 987 988 *cs_new = cs; 989 990 return 0; 991 992 free_fence: 993 spin_unlock(&ctx->cs_lock); 994 kfree(cs->jobs_in_queue_cnt); 995 free_cs_cmpl: 996 kfree(cs_cmpl); 997 free_cs: 998 kfree(cs); 999 hl_ctx_put(ctx); 1000 return rc; 1001 } 1002 1003 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) 1004 { 1005 struct hl_cs_job *job, *tmp; 1006 1007 staged_cs_put(hdev, cs); 1008 1009 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 1010 hl_complete_job(hdev, job); 1011 } 1012 1013 /* 1014 * release_reserved_encaps_signals() - release reserved encapsulated signals. 1015 * @hdev: pointer to habanalabs device structure 1016 * 1017 * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with 1018 * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back. 1019 * For these signals need also to put the refcount of the H/W SOB which was taken at the 1020 * reservation. 1021 */ 1022 static void release_reserved_encaps_signals(struct hl_device *hdev) 1023 { 1024 struct hl_ctx *ctx = hl_get_compute_ctx(hdev); 1025 struct hl_cs_encaps_sig_handle *handle; 1026 struct hl_encaps_signals_mgr *mgr; 1027 u32 id; 1028 1029 if (!ctx) 1030 return; 1031 1032 mgr = &ctx->sig_mgr; 1033 1034 idr_for_each_entry(&mgr->handles, handle, id) 1035 if (handle->cs_seq == ULLONG_MAX) 1036 kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx); 1037 1038 hl_ctx_put(ctx); 1039 } 1040 1041 void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush) 1042 { 1043 int i; 1044 struct hl_cs *cs, *tmp; 1045 1046 if (!skip_wq_flush) { 1047 flush_workqueue(hdev->ts_free_obj_wq); 1048 1049 /* flush all completions before iterating over the CS mirror list in 1050 * order to avoid a race with the release functions 1051 */ 1052 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1053 flush_workqueue(hdev->cq_wq[i]); 1054 1055 flush_workqueue(hdev->cs_cmplt_wq); 1056 } 1057 1058 /* Make sure we don't have leftovers in the CS mirror list */ 1059 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { 1060 cs_get(cs); 1061 cs->aborted = true; 1062 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", 1063 cs->ctx->asid, cs->sequence); 1064 cs_rollback(hdev, cs); 1065 cs_put(cs); 1066 } 1067 1068 force_complete_multi_cs(hdev); 1069 1070 release_reserved_encaps_signals(hdev); 1071 } 1072 1073 static void 1074 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) 1075 { 1076 struct hl_user_pending_interrupt *pend, *temp; 1077 unsigned long flags; 1078 1079 spin_lock_irqsave(&interrupt->wait_list_lock, flags); 1080 list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { 1081 if (pend->ts_reg_info.buf) { 1082 list_del(&pend->wait_list_node); 1083 hl_mmap_mem_buf_put(pend->ts_reg_info.buf); 1084 hl_cb_put(pend->ts_reg_info.cq_cb); 1085 } else { 1086 pend->fence.error = -EIO; 1087 complete_all(&pend->fence.completion); 1088 } 1089 } 1090 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 1091 } 1092 1093 void hl_release_pending_user_interrupts(struct hl_device *hdev) 1094 { 1095 struct asic_fixed_properties *prop = &hdev->asic_prop; 1096 struct hl_user_interrupt *interrupt; 1097 int i; 1098 1099 if (!prop->user_interrupt_count) 1100 return; 1101 1102 /* We iterate through the user interrupt requests and waking up all 1103 * user threads waiting for interrupt completion. We iterate the 1104 * list under a lock, this is why all user threads, once awake, 1105 * will wait on the same lock and will release the waiting object upon 1106 * unlock. 1107 */ 1108 1109 for (i = 0 ; i < prop->user_interrupt_count ; i++) { 1110 interrupt = &hdev->user_interrupt[i]; 1111 wake_pending_user_interrupt_threads(interrupt); 1112 } 1113 1114 interrupt = &hdev->common_user_cq_interrupt; 1115 wake_pending_user_interrupt_threads(interrupt); 1116 1117 interrupt = &hdev->common_decoder_interrupt; 1118 wake_pending_user_interrupt_threads(interrupt); 1119 } 1120 1121 static void force_complete_cs(struct hl_device *hdev) 1122 { 1123 struct hl_cs *cs; 1124 1125 spin_lock(&hdev->cs_mirror_lock); 1126 1127 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) { 1128 cs->fence->error = -EIO; 1129 complete_all(&cs->fence->completion); 1130 } 1131 1132 spin_unlock(&hdev->cs_mirror_lock); 1133 } 1134 1135 void hl_abort_waitings_for_completion(struct hl_device *hdev) 1136 { 1137 force_complete_cs(hdev); 1138 force_complete_multi_cs(hdev); 1139 hl_release_pending_user_interrupts(hdev); 1140 } 1141 1142 static void job_wq_completion(struct work_struct *work) 1143 { 1144 struct hl_cs_job *job = container_of(work, struct hl_cs_job, 1145 finish_work); 1146 struct hl_cs *cs = job->cs; 1147 struct hl_device *hdev = cs->ctx->hdev; 1148 1149 /* job is no longer needed */ 1150 hl_complete_job(hdev, job); 1151 } 1152 1153 static void cs_completion(struct work_struct *work) 1154 { 1155 struct hl_cs *cs = container_of(work, struct hl_cs, finish_work); 1156 struct hl_device *hdev = cs->ctx->hdev; 1157 struct hl_cs_job *job, *tmp; 1158 1159 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 1160 hl_complete_job(hdev, job); 1161 } 1162 1163 static int validate_queue_index(struct hl_device *hdev, 1164 struct hl_cs_chunk *chunk, 1165 enum hl_queue_type *queue_type, 1166 bool *is_kernel_allocated_cb) 1167 { 1168 struct asic_fixed_properties *asic = &hdev->asic_prop; 1169 struct hw_queue_properties *hw_queue_prop; 1170 1171 /* This must be checked here to prevent out-of-bounds access to 1172 * hw_queues_props array 1173 */ 1174 if (chunk->queue_index >= asic->max_queues) { 1175 dev_err(hdev->dev, "Queue index %d is invalid\n", 1176 chunk->queue_index); 1177 return -EINVAL; 1178 } 1179 1180 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; 1181 1182 if (hw_queue_prop->type == QUEUE_TYPE_NA) { 1183 dev_err(hdev->dev, "Queue index %d is not applicable\n", 1184 chunk->queue_index); 1185 return -EINVAL; 1186 } 1187 1188 if (hw_queue_prop->binned) { 1189 dev_err(hdev->dev, "Queue index %d is binned out\n", 1190 chunk->queue_index); 1191 return -EINVAL; 1192 } 1193 1194 if (hw_queue_prop->driver_only) { 1195 dev_err(hdev->dev, 1196 "Queue index %d is restricted for the kernel driver\n", 1197 chunk->queue_index); 1198 return -EINVAL; 1199 } 1200 1201 /* When hw queue type isn't QUEUE_TYPE_HW, 1202 * USER_ALLOC_CB flag shall be referred as "don't care". 1203 */ 1204 if (hw_queue_prop->type == QUEUE_TYPE_HW) { 1205 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { 1206 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { 1207 dev_err(hdev->dev, 1208 "Queue index %d doesn't support user CB\n", 1209 chunk->queue_index); 1210 return -EINVAL; 1211 } 1212 1213 *is_kernel_allocated_cb = false; 1214 } else { 1215 if (!(hw_queue_prop->cb_alloc_flags & 1216 CB_ALLOC_KERNEL)) { 1217 dev_err(hdev->dev, 1218 "Queue index %d doesn't support kernel CB\n", 1219 chunk->queue_index); 1220 return -EINVAL; 1221 } 1222 1223 *is_kernel_allocated_cb = true; 1224 } 1225 } else { 1226 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags 1227 & CB_ALLOC_KERNEL); 1228 } 1229 1230 *queue_type = hw_queue_prop->type; 1231 return 0; 1232 } 1233 1234 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, 1235 struct hl_mem_mgr *mmg, 1236 struct hl_cs_chunk *chunk) 1237 { 1238 struct hl_cb *cb; 1239 1240 cb = hl_cb_get(mmg, chunk->cb_handle); 1241 if (!cb) { 1242 dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle); 1243 return NULL; 1244 } 1245 1246 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { 1247 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); 1248 goto release_cb; 1249 } 1250 1251 atomic_inc(&cb->cs_cnt); 1252 1253 return cb; 1254 1255 release_cb: 1256 hl_cb_put(cb); 1257 return NULL; 1258 } 1259 1260 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, 1261 enum hl_queue_type queue_type, bool is_kernel_allocated_cb) 1262 { 1263 struct hl_cs_job *job; 1264 1265 job = kzalloc(sizeof(*job), GFP_ATOMIC); 1266 if (!job) 1267 job = kzalloc(sizeof(*job), GFP_KERNEL); 1268 1269 if (!job) 1270 return NULL; 1271 1272 kref_init(&job->refcount); 1273 job->queue_type = queue_type; 1274 job->is_kernel_allocated_cb = is_kernel_allocated_cb; 1275 1276 if (is_cb_patched(hdev, job)) 1277 INIT_LIST_HEAD(&job->userptr_list); 1278 1279 if (job->queue_type == QUEUE_TYPE_EXT) 1280 INIT_WORK(&job->finish_work, job_wq_completion); 1281 1282 return job; 1283 } 1284 1285 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) 1286 { 1287 if (cs_type_flags & HL_CS_FLAGS_SIGNAL) 1288 return CS_TYPE_SIGNAL; 1289 else if (cs_type_flags & HL_CS_FLAGS_WAIT) 1290 return CS_TYPE_WAIT; 1291 else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT) 1292 return CS_TYPE_COLLECTIVE_WAIT; 1293 else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY) 1294 return CS_RESERVE_SIGNALS; 1295 else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) 1296 return CS_UNRESERVE_SIGNALS; 1297 else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) 1298 return CS_TYPE_ENGINE_CORE; 1299 else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) 1300 return CS_TYPE_FLUSH_PCI_HBW_WRITES; 1301 else 1302 return CS_TYPE_DEFAULT; 1303 } 1304 1305 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) 1306 { 1307 struct hl_device *hdev = hpriv->hdev; 1308 struct hl_ctx *ctx = hpriv->ctx; 1309 u32 cs_type_flags, num_chunks; 1310 enum hl_device_status status; 1311 enum hl_cs_type cs_type; 1312 bool is_sync_stream; 1313 int i; 1314 1315 for (i = 0 ; i < sizeof(args->in.pad) ; i++) 1316 if (args->in.pad[i]) { 1317 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); 1318 return -EINVAL; 1319 } 1320 1321 if (!hl_device_operational(hdev, &status)) { 1322 return -EBUSY; 1323 } 1324 1325 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 1326 !hdev->supports_staged_submission) { 1327 dev_err(hdev->dev, "staged submission not supported"); 1328 return -EPERM; 1329 } 1330 1331 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; 1332 1333 if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) { 1334 dev_err(hdev->dev, 1335 "CS type flags are mutually exclusive, context %d\n", 1336 ctx->asid); 1337 return -EINVAL; 1338 } 1339 1340 cs_type = hl_cs_get_cs_type(cs_type_flags); 1341 num_chunks = args->in.num_chunks_execute; 1342 1343 is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || 1344 cs_type == CS_TYPE_COLLECTIVE_WAIT); 1345 1346 if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { 1347 dev_err(hdev->dev, "Sync stream CS is not supported\n"); 1348 return -EINVAL; 1349 } 1350 1351 if (cs_type == CS_TYPE_DEFAULT) { 1352 if (!num_chunks) { 1353 dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); 1354 return -EINVAL; 1355 } 1356 } else if (is_sync_stream && num_chunks != 1) { 1357 dev_err(hdev->dev, 1358 "Sync stream CS mandates one chunk only, context %d\n", 1359 ctx->asid); 1360 return -EINVAL; 1361 } 1362 1363 return 0; 1364 } 1365 1366 static int hl_cs_copy_chunk_array(struct hl_device *hdev, 1367 struct hl_cs_chunk **cs_chunk_array, 1368 void __user *chunks, u32 num_chunks, 1369 struct hl_ctx *ctx) 1370 { 1371 u32 size_to_copy; 1372 1373 if (num_chunks > HL_MAX_JOBS_PER_CS) { 1374 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1375 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1376 dev_err(hdev->dev, 1377 "Number of chunks can NOT be larger than %d\n", 1378 HL_MAX_JOBS_PER_CS); 1379 return -EINVAL; 1380 } 1381 1382 *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), 1383 GFP_ATOMIC); 1384 if (!*cs_chunk_array) 1385 *cs_chunk_array = kmalloc_array(num_chunks, 1386 sizeof(**cs_chunk_array), GFP_KERNEL); 1387 if (!*cs_chunk_array) { 1388 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1389 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); 1390 return -ENOMEM; 1391 } 1392 1393 size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); 1394 if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) { 1395 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1396 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1397 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); 1398 kfree(*cs_chunk_array); 1399 return -EFAULT; 1400 } 1401 1402 return 0; 1403 } 1404 1405 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, 1406 u64 sequence, u32 flags, 1407 u32 encaps_signal_handle) 1408 { 1409 if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION)) 1410 return 0; 1411 1412 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST); 1413 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST); 1414 1415 if (cs->staged_first) { 1416 /* Staged CS sequence is the first CS sequence */ 1417 INIT_LIST_HEAD(&cs->staged_cs_node); 1418 cs->staged_sequence = cs->sequence; 1419 1420 if (cs->encaps_signals) 1421 cs->encaps_sig_hdl_id = encaps_signal_handle; 1422 } else { 1423 /* User sequence will be validated in 'hl_hw_queue_schedule_cs' 1424 * under the cs_mirror_lock 1425 */ 1426 cs->staged_sequence = sequence; 1427 } 1428 1429 /* Increment CS reference if needed */ 1430 staged_cs_get(hdev, cs); 1431 1432 cs->staged_cs = true; 1433 1434 return 0; 1435 } 1436 1437 static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid) 1438 { 1439 int i; 1440 1441 for (i = 0; i < hdev->stream_master_qid_arr_size; i++) 1442 if (qid == hdev->stream_master_qid_arr[i]) 1443 return BIT(i); 1444 1445 return 0; 1446 } 1447 1448 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, 1449 u32 num_chunks, u64 *cs_seq, u32 flags, 1450 u32 encaps_signals_handle, u32 timeout, 1451 u16 *signal_initial_sob_count) 1452 { 1453 bool staged_mid, int_queues_only = true, using_hw_queues = false; 1454 struct hl_device *hdev = hpriv->hdev; 1455 struct hl_cs_chunk *cs_chunk_array; 1456 struct hl_cs_counters_atomic *cntr; 1457 struct hl_ctx *ctx = hpriv->ctx; 1458 struct hl_cs_job *job; 1459 struct hl_cs *cs; 1460 struct hl_cb *cb; 1461 u64 user_sequence; 1462 u8 stream_master_qid_map = 0; 1463 int rc, i; 1464 1465 cntr = &hdev->aggregated_cs_counters; 1466 user_sequence = *cs_seq; 1467 *cs_seq = ULLONG_MAX; 1468 1469 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, 1470 hpriv->ctx); 1471 if (rc) 1472 goto out; 1473 1474 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 1475 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) 1476 staged_mid = true; 1477 else 1478 staged_mid = false; 1479 1480 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, 1481 staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, 1482 timeout); 1483 if (rc) 1484 goto free_cs_chunk_array; 1485 1486 *cs_seq = cs->sequence; 1487 1488 hl_debugfs_add_cs(cs); 1489 1490 rc = cs_staged_submission(hdev, cs, user_sequence, flags, 1491 encaps_signals_handle); 1492 if (rc) 1493 goto free_cs_object; 1494 1495 /* If this is a staged submission we must return the staged sequence 1496 * rather than the internal CS sequence 1497 */ 1498 if (cs->staged_cs) 1499 *cs_seq = cs->staged_sequence; 1500 1501 /* Validate ALL the CS chunks before submitting the CS */ 1502 for (i = 0 ; i < num_chunks ; i++) { 1503 struct hl_cs_chunk *chunk = &cs_chunk_array[i]; 1504 enum hl_queue_type queue_type; 1505 bool is_kernel_allocated_cb; 1506 1507 rc = validate_queue_index(hdev, chunk, &queue_type, 1508 &is_kernel_allocated_cb); 1509 if (rc) { 1510 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1511 atomic64_inc(&cntr->validation_drop_cnt); 1512 goto free_cs_object; 1513 } 1514 1515 if (is_kernel_allocated_cb) { 1516 cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk); 1517 if (!cb) { 1518 atomic64_inc( 1519 &ctx->cs_counters.validation_drop_cnt); 1520 atomic64_inc(&cntr->validation_drop_cnt); 1521 rc = -EINVAL; 1522 goto free_cs_object; 1523 } 1524 } else { 1525 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; 1526 } 1527 1528 if (queue_type == QUEUE_TYPE_EXT || 1529 queue_type == QUEUE_TYPE_HW) { 1530 int_queues_only = false; 1531 1532 /* 1533 * store which stream are being used for external/HW 1534 * queues of this CS 1535 */ 1536 if (hdev->supports_wait_for_multi_cs) 1537 stream_master_qid_map |= 1538 get_stream_master_qid_mask(hdev, 1539 chunk->queue_index); 1540 } 1541 1542 if (queue_type == QUEUE_TYPE_HW) 1543 using_hw_queues = true; 1544 1545 job = hl_cs_allocate_job(hdev, queue_type, 1546 is_kernel_allocated_cb); 1547 if (!job) { 1548 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1549 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1550 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1551 rc = -ENOMEM; 1552 if (is_kernel_allocated_cb) 1553 goto release_cb; 1554 1555 goto free_cs_object; 1556 } 1557 1558 job->id = i + 1; 1559 job->cs = cs; 1560 job->user_cb = cb; 1561 job->user_cb_size = chunk->cb_size; 1562 job->hw_queue_id = chunk->queue_index; 1563 1564 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1565 cs->jobs_cnt++; 1566 1567 list_add_tail(&job->cs_node, &cs->job_list); 1568 1569 /* 1570 * Increment CS reference. When CS reference is 0, CS is 1571 * done and can be signaled to user and free all its resources 1572 * Only increment for JOB on external or H/W queues, because 1573 * only for those JOBs we get completion 1574 */ 1575 if (cs_needs_completion(cs) && 1576 (job->queue_type == QUEUE_TYPE_EXT || 1577 job->queue_type == QUEUE_TYPE_HW)) 1578 cs_get(cs); 1579 1580 hl_debugfs_add_job(hdev, job); 1581 1582 rc = cs_parser(hpriv, job); 1583 if (rc) { 1584 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); 1585 atomic64_inc(&cntr->parsing_drop_cnt); 1586 dev_err(hdev->dev, 1587 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", 1588 cs->ctx->asid, cs->sequence, job->id, rc); 1589 goto free_cs_object; 1590 } 1591 } 1592 1593 /* We allow a CS with any queue type combination as long as it does 1594 * not get a completion 1595 */ 1596 if (int_queues_only && cs_needs_completion(cs)) { 1597 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1598 atomic64_inc(&cntr->validation_drop_cnt); 1599 dev_err(hdev->dev, 1600 "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n", 1601 cs->ctx->asid, cs->sequence); 1602 rc = -EINVAL; 1603 goto free_cs_object; 1604 } 1605 1606 if (using_hw_queues) 1607 INIT_WORK(&cs->finish_work, cs_completion); 1608 1609 /* 1610 * store the (external/HW queues) streams used by the CS in the 1611 * fence object for multi-CS completion 1612 */ 1613 if (hdev->supports_wait_for_multi_cs) 1614 cs->fence->stream_master_qid_map = stream_master_qid_map; 1615 1616 rc = hl_hw_queue_schedule_cs(cs); 1617 if (rc) { 1618 if (rc != -EAGAIN) 1619 dev_err(hdev->dev, 1620 "Failed to submit CS %d.%llu to H/W queues, error %d\n", 1621 cs->ctx->asid, cs->sequence, rc); 1622 goto free_cs_object; 1623 } 1624 1625 *signal_initial_sob_count = cs->initial_sob_count; 1626 1627 rc = HL_CS_STATUS_SUCCESS; 1628 goto put_cs; 1629 1630 release_cb: 1631 atomic_dec(&cb->cs_cnt); 1632 hl_cb_put(cb); 1633 free_cs_object: 1634 cs_rollback(hdev, cs); 1635 *cs_seq = ULLONG_MAX; 1636 /* The path below is both for good and erroneous exits */ 1637 put_cs: 1638 /* We finished with the CS in this function, so put the ref */ 1639 cs_put(cs); 1640 free_cs_chunk_array: 1641 kfree(cs_chunk_array); 1642 out: 1643 return rc; 1644 } 1645 1646 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, 1647 u64 *cs_seq) 1648 { 1649 struct hl_device *hdev = hpriv->hdev; 1650 struct hl_ctx *ctx = hpriv->ctx; 1651 bool need_soft_reset = false; 1652 int rc = 0, do_ctx_switch = 0; 1653 void __user *chunks; 1654 u32 num_chunks, tmp; 1655 u16 sob_count; 1656 int ret; 1657 1658 if (hdev->supports_ctx_switch) 1659 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); 1660 1661 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { 1662 mutex_lock(&hpriv->restore_phase_mutex); 1663 1664 if (do_ctx_switch) { 1665 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); 1666 if (rc) { 1667 dev_err_ratelimited(hdev->dev, 1668 "Failed to switch to context %d, rejecting CS! %d\n", 1669 ctx->asid, rc); 1670 /* 1671 * If we timedout, or if the device is not IDLE 1672 * while we want to do context-switch (-EBUSY), 1673 * we need to soft-reset because QMAN is 1674 * probably stuck. However, we can't call to 1675 * reset here directly because of deadlock, so 1676 * need to do it at the very end of this 1677 * function 1678 */ 1679 if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) 1680 need_soft_reset = true; 1681 mutex_unlock(&hpriv->restore_phase_mutex); 1682 goto out; 1683 } 1684 } 1685 1686 hdev->asic_funcs->restore_phase_topology(hdev); 1687 1688 chunks = (void __user *) (uintptr_t) args->in.chunks_restore; 1689 num_chunks = args->in.num_chunks_restore; 1690 1691 if (!num_chunks) { 1692 dev_dbg(hdev->dev, 1693 "Need to run restore phase but restore CS is empty\n"); 1694 rc = 0; 1695 } else { 1696 rc = cs_ioctl_default(hpriv, chunks, num_chunks, 1697 cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count); 1698 } 1699 1700 mutex_unlock(&hpriv->restore_phase_mutex); 1701 1702 if (rc) { 1703 dev_err(hdev->dev, 1704 "Failed to submit restore CS for context %d (%d)\n", 1705 ctx->asid, rc); 1706 goto out; 1707 } 1708 1709 /* Need to wait for restore completion before execution phase */ 1710 if (num_chunks) { 1711 enum hl_cs_wait_status status; 1712 wait_again: 1713 ret = _hl_cs_wait_ioctl(hdev, ctx, 1714 jiffies_to_usecs(hdev->timeout_jiffies), 1715 *cs_seq, &status, NULL); 1716 if (ret) { 1717 if (ret == -ERESTARTSYS) { 1718 usleep_range(100, 200); 1719 goto wait_again; 1720 } 1721 1722 dev_err(hdev->dev, 1723 "Restore CS for context %d failed to complete %d\n", 1724 ctx->asid, ret); 1725 rc = -ENOEXEC; 1726 goto out; 1727 } 1728 } 1729 1730 if (hdev->supports_ctx_switch) 1731 ctx->thread_ctx_switch_wait_token = 1; 1732 1733 } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { 1734 rc = hl_poll_timeout_memory(hdev, 1735 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), 1736 100, jiffies_to_usecs(hdev->timeout_jiffies), false); 1737 1738 if (rc == -ETIMEDOUT) { 1739 dev_err(hdev->dev, 1740 "context switch phase timeout (%d)\n", tmp); 1741 goto out; 1742 } 1743 } 1744 1745 out: 1746 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) 1747 hl_device_reset(hdev, 0); 1748 1749 return rc; 1750 } 1751 1752 /* 1753 * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case. 1754 * if the SOB value reaches the max value move to the other SOB reserved 1755 * to the queue. 1756 * @hdev: pointer to device structure 1757 * @q_idx: stream queue index 1758 * @hw_sob: the H/W SOB used in this signal CS. 1759 * @count: signals count 1760 * @encaps_sig: tells whether it's reservation for encaps signals or not. 1761 * 1762 * Note that this function must be called while hw_queues_lock is taken. 1763 */ 1764 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, 1765 struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig) 1766 1767 { 1768 struct hl_sync_stream_properties *prop; 1769 struct hl_hw_sob *sob = *hw_sob, *other_sob; 1770 u8 other_sob_offset; 1771 1772 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 1773 1774 hw_sob_get(sob); 1775 1776 /* check for wraparound */ 1777 if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) { 1778 /* 1779 * Decrement as we reached the max value. 1780 * The release function won't be called here as we've 1781 * just incremented the refcount right before calling this 1782 * function. 1783 */ 1784 hw_sob_put_err(sob); 1785 1786 /* 1787 * check the other sob value, if it still in use then fail 1788 * otherwise make the switch 1789 */ 1790 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; 1791 other_sob = &prop->hw_sob[other_sob_offset]; 1792 1793 if (kref_read(&other_sob->kref) != 1) { 1794 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n", 1795 q_idx); 1796 return -EINVAL; 1797 } 1798 1799 /* 1800 * next_sob_val always points to the next available signal 1801 * in the sob, so in encaps signals it will be the next one 1802 * after reserving the required amount. 1803 */ 1804 if (encaps_sig) 1805 prop->next_sob_val = count + 1; 1806 else 1807 prop->next_sob_val = count; 1808 1809 /* only two SOBs are currently in use */ 1810 prop->curr_sob_offset = other_sob_offset; 1811 *hw_sob = other_sob; 1812 1813 /* 1814 * check if other_sob needs reset, then do it before using it 1815 * for the reservation or the next signal cs. 1816 * we do it here, and for both encaps and regular signal cs 1817 * cases in order to avoid possible races of two kref_put 1818 * of the sob which can occur at the same time if we move the 1819 * sob reset(kref_put) to cs_do_release function. 1820 * in addition, if we have combination of cs signal and 1821 * encaps, and at the point we need to reset the sob there was 1822 * no more reservations and only signal cs keep coming, 1823 * in such case we need signal_cs to put the refcount and 1824 * reset the sob. 1825 */ 1826 if (other_sob->need_reset) 1827 hw_sob_put(other_sob); 1828 1829 if (encaps_sig) { 1830 /* set reset indication for the sob */ 1831 sob->need_reset = true; 1832 hw_sob_get(other_sob); 1833 } 1834 1835 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", 1836 prop->curr_sob_offset, q_idx); 1837 } else { 1838 prop->next_sob_val += count; 1839 } 1840 1841 return 0; 1842 } 1843 1844 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, 1845 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx, 1846 bool encaps_signals) 1847 { 1848 u64 *signal_seq_arr = NULL; 1849 u32 size_to_copy, signal_seq_arr_len; 1850 int rc = 0; 1851 1852 if (encaps_signals) { 1853 *signal_seq = chunk->encaps_signal_seq; 1854 return 0; 1855 } 1856 1857 signal_seq_arr_len = chunk->num_signal_seq_arr; 1858 1859 /* currently only one signal seq is supported */ 1860 if (signal_seq_arr_len != 1) { 1861 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1862 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1863 dev_err(hdev->dev, 1864 "Wait for signal CS supports only one signal CS seq\n"); 1865 return -EINVAL; 1866 } 1867 1868 signal_seq_arr = kmalloc_array(signal_seq_arr_len, 1869 sizeof(*signal_seq_arr), 1870 GFP_ATOMIC); 1871 if (!signal_seq_arr) 1872 signal_seq_arr = kmalloc_array(signal_seq_arr_len, 1873 sizeof(*signal_seq_arr), 1874 GFP_KERNEL); 1875 if (!signal_seq_arr) { 1876 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1877 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); 1878 return -ENOMEM; 1879 } 1880 1881 size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr); 1882 if (copy_from_user(signal_seq_arr, 1883 u64_to_user_ptr(chunk->signal_seq_arr), 1884 size_to_copy)) { 1885 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1886 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1887 dev_err(hdev->dev, 1888 "Failed to copy signal seq array from user\n"); 1889 rc = -EFAULT; 1890 goto out; 1891 } 1892 1893 /* currently it is guaranteed to have only one signal seq */ 1894 *signal_seq = signal_seq_arr[0]; 1895 1896 out: 1897 kfree(signal_seq_arr); 1898 1899 return rc; 1900 } 1901 1902 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, 1903 struct hl_ctx *ctx, struct hl_cs *cs, 1904 enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset) 1905 { 1906 struct hl_cs_counters_atomic *cntr; 1907 struct hl_cs_job *job; 1908 struct hl_cb *cb; 1909 u32 cb_size; 1910 1911 cntr = &hdev->aggregated_cs_counters; 1912 1913 job = hl_cs_allocate_job(hdev, q_type, true); 1914 if (!job) { 1915 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1916 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1917 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1918 return -ENOMEM; 1919 } 1920 1921 if (cs->type == CS_TYPE_WAIT) 1922 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); 1923 else 1924 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); 1925 1926 cb = hl_cb_kernel_create(hdev, cb_size, 1927 q_type == QUEUE_TYPE_HW && hdev->mmu_enable); 1928 if (!cb) { 1929 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1930 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1931 kfree(job); 1932 return -EFAULT; 1933 } 1934 1935 job->id = 0; 1936 job->cs = cs; 1937 job->user_cb = cb; 1938 atomic_inc(&job->user_cb->cs_cnt); 1939 job->user_cb_size = cb_size; 1940 job->hw_queue_id = q_idx; 1941 1942 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) 1943 && cs->encaps_signals) 1944 job->encaps_sig_wait_offset = encaps_signal_offset; 1945 /* 1946 * No need in parsing, user CB is the patched CB. 1947 * We call hl_cb_destroy() out of two reasons - we don't need the CB in 1948 * the CB idr anymore and to decrement its refcount as it was 1949 * incremented inside hl_cb_kernel_create(). 1950 */ 1951 job->patched_cb = job->user_cb; 1952 job->job_cb_size = job->user_cb_size; 1953 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1954 1955 /* increment refcount as for external queues we get completion */ 1956 cs_get(cs); 1957 1958 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1959 cs->jobs_cnt++; 1960 1961 list_add_tail(&job->cs_node, &cs->job_list); 1962 1963 hl_debugfs_add_job(hdev, job); 1964 1965 return 0; 1966 } 1967 1968 static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, 1969 u32 q_idx, u32 count, 1970 u32 *handle_id, u32 *sob_addr, 1971 u32 *signals_count) 1972 { 1973 struct hw_queue_properties *hw_queue_prop; 1974 struct hl_sync_stream_properties *prop; 1975 struct hl_device *hdev = hpriv->hdev; 1976 struct hl_cs_encaps_sig_handle *handle; 1977 struct hl_encaps_signals_mgr *mgr; 1978 struct hl_hw_sob *hw_sob; 1979 int hdl_id; 1980 int rc = 0; 1981 1982 if (count >= HL_MAX_SOB_VAL) { 1983 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n", 1984 count); 1985 rc = -EINVAL; 1986 goto out; 1987 } 1988 1989 if (q_idx >= hdev->asic_prop.max_queues) { 1990 dev_err(hdev->dev, "Queue index %d is invalid\n", 1991 q_idx); 1992 rc = -EINVAL; 1993 goto out; 1994 } 1995 1996 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; 1997 1998 if (!hw_queue_prop->supports_sync_stream) { 1999 dev_err(hdev->dev, 2000 "Queue index %d does not support sync stream operations\n", 2001 q_idx); 2002 rc = -EINVAL; 2003 goto out; 2004 } 2005 2006 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 2007 2008 handle = kzalloc(sizeof(*handle), GFP_KERNEL); 2009 if (!handle) { 2010 rc = -ENOMEM; 2011 goto out; 2012 } 2013 2014 handle->count = count; 2015 2016 hl_ctx_get(hpriv->ctx); 2017 handle->ctx = hpriv->ctx; 2018 mgr = &hpriv->ctx->sig_mgr; 2019 2020 spin_lock(&mgr->lock); 2021 hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC); 2022 spin_unlock(&mgr->lock); 2023 2024 if (hdl_id < 0) { 2025 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n"); 2026 rc = -EINVAL; 2027 goto put_ctx; 2028 } 2029 2030 handle->id = hdl_id; 2031 handle->q_idx = q_idx; 2032 handle->hdev = hdev; 2033 kref_init(&handle->refcount); 2034 2035 hdev->asic_funcs->hw_queues_lock(hdev); 2036 2037 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; 2038 2039 /* 2040 * Increment the SOB value by count by user request 2041 * to reserve those signals 2042 * check if the signals amount to reserve is not exceeding the max sob 2043 * value, if yes then switch sob. 2044 */ 2045 rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count, 2046 true); 2047 if (rc) { 2048 dev_err(hdev->dev, "Failed to switch SOB\n"); 2049 hdev->asic_funcs->hw_queues_unlock(hdev); 2050 rc = -EINVAL; 2051 goto remove_idr; 2052 } 2053 /* set the hw_sob to the handle after calling the sob wraparound handler 2054 * since sob could have changed. 2055 */ 2056 handle->hw_sob = hw_sob; 2057 2058 /* store the current sob value for unreserve validity check, and 2059 * signal offset support 2060 */ 2061 handle->pre_sob_val = prop->next_sob_val - handle->count; 2062 2063 handle->cs_seq = ULLONG_MAX; 2064 2065 *signals_count = prop->next_sob_val; 2066 hdev->asic_funcs->hw_queues_unlock(hdev); 2067 2068 *sob_addr = handle->hw_sob->sob_addr; 2069 *handle_id = hdl_id; 2070 2071 dev_dbg(hdev->dev, 2072 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n", 2073 hw_sob->sob_id, handle->hw_sob->sob_addr, 2074 prop->next_sob_val - 1, q_idx, hdl_id); 2075 goto out; 2076 2077 remove_idr: 2078 spin_lock(&mgr->lock); 2079 idr_remove(&mgr->handles, hdl_id); 2080 spin_unlock(&mgr->lock); 2081 2082 put_ctx: 2083 hl_ctx_put(handle->ctx); 2084 kfree(handle); 2085 2086 out: 2087 return rc; 2088 } 2089 2090 static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) 2091 { 2092 struct hl_cs_encaps_sig_handle *encaps_sig_hdl; 2093 struct hl_sync_stream_properties *prop; 2094 struct hl_device *hdev = hpriv->hdev; 2095 struct hl_encaps_signals_mgr *mgr; 2096 struct hl_hw_sob *hw_sob; 2097 u32 q_idx, sob_addr; 2098 int rc = 0; 2099 2100 mgr = &hpriv->ctx->sig_mgr; 2101 2102 spin_lock(&mgr->lock); 2103 encaps_sig_hdl = idr_find(&mgr->handles, handle_id); 2104 if (encaps_sig_hdl) { 2105 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n", 2106 handle_id, encaps_sig_hdl->hw_sob->sob_addr, 2107 encaps_sig_hdl->count); 2108 2109 hdev->asic_funcs->hw_queues_lock(hdev); 2110 2111 q_idx = encaps_sig_hdl->q_idx; 2112 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 2113 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; 2114 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); 2115 2116 /* Check if sob_val got out of sync due to other 2117 * signal submission requests which were handled 2118 * between the reserve-unreserve calls or SOB switch 2119 * upon reaching SOB max value. 2120 */ 2121 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count 2122 != prop->next_sob_val || 2123 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) { 2124 dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n", 2125 encaps_sig_hdl->pre_sob_val, 2126 (prop->next_sob_val - encaps_sig_hdl->count)); 2127 2128 hdev->asic_funcs->hw_queues_unlock(hdev); 2129 rc = -EINVAL; 2130 goto out; 2131 } 2132 2133 /* 2134 * Decrement the SOB value by count by user request 2135 * to unreserve those signals 2136 */ 2137 prop->next_sob_val -= encaps_sig_hdl->count; 2138 2139 hdev->asic_funcs->hw_queues_unlock(hdev); 2140 2141 hw_sob_put(hw_sob); 2142 2143 /* Release the id and free allocated memory of the handle */ 2144 idr_remove(&mgr->handles, handle_id); 2145 hl_ctx_put(encaps_sig_hdl->ctx); 2146 kfree(encaps_sig_hdl); 2147 } else { 2148 rc = -EINVAL; 2149 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); 2150 } 2151 out: 2152 spin_unlock(&mgr->lock); 2153 2154 return rc; 2155 } 2156 2157 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, 2158 void __user *chunks, u32 num_chunks, 2159 u64 *cs_seq, u32 flags, u32 timeout, 2160 u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count) 2161 { 2162 struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL; 2163 bool handle_found = false, is_wait_cs = false, 2164 wait_cs_submitted = false, 2165 cs_encaps_signals = false; 2166 struct hl_cs_chunk *cs_chunk_array, *chunk; 2167 bool staged_cs_with_encaps_signals = false; 2168 struct hw_queue_properties *hw_queue_prop; 2169 struct hl_device *hdev = hpriv->hdev; 2170 struct hl_cs_compl *sig_waitcs_cmpl; 2171 u32 q_idx, collective_engine_id = 0; 2172 struct hl_cs_counters_atomic *cntr; 2173 struct hl_fence *sig_fence = NULL; 2174 struct hl_ctx *ctx = hpriv->ctx; 2175 enum hl_queue_type q_type; 2176 struct hl_cs *cs; 2177 u64 signal_seq; 2178 int rc; 2179 2180 cntr = &hdev->aggregated_cs_counters; 2181 *cs_seq = ULLONG_MAX; 2182 2183 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, 2184 ctx); 2185 if (rc) 2186 goto out; 2187 2188 /* currently it is guaranteed to have only one chunk */ 2189 chunk = &cs_chunk_array[0]; 2190 2191 if (chunk->queue_index >= hdev->asic_prop.max_queues) { 2192 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2193 atomic64_inc(&cntr->validation_drop_cnt); 2194 dev_err(hdev->dev, "Queue index %d is invalid\n", 2195 chunk->queue_index); 2196 rc = -EINVAL; 2197 goto free_cs_chunk_array; 2198 } 2199 2200 q_idx = chunk->queue_index; 2201 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; 2202 q_type = hw_queue_prop->type; 2203 2204 if (!hw_queue_prop->supports_sync_stream) { 2205 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2206 atomic64_inc(&cntr->validation_drop_cnt); 2207 dev_err(hdev->dev, 2208 "Queue index %d does not support sync stream operations\n", 2209 q_idx); 2210 rc = -EINVAL; 2211 goto free_cs_chunk_array; 2212 } 2213 2214 if (cs_type == CS_TYPE_COLLECTIVE_WAIT) { 2215 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 2216 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2217 atomic64_inc(&cntr->validation_drop_cnt); 2218 dev_err(hdev->dev, 2219 "Queue index %d is invalid\n", q_idx); 2220 rc = -EINVAL; 2221 goto free_cs_chunk_array; 2222 } 2223 2224 if (!hdev->nic_ports_mask) { 2225 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2226 atomic64_inc(&cntr->validation_drop_cnt); 2227 dev_err(hdev->dev, 2228 "Collective operations not supported when NIC ports are disabled"); 2229 rc = -EINVAL; 2230 goto free_cs_chunk_array; 2231 } 2232 2233 collective_engine_id = chunk->collective_engine_id; 2234 } 2235 2236 is_wait_cs = !!(cs_type == CS_TYPE_WAIT || 2237 cs_type == CS_TYPE_COLLECTIVE_WAIT); 2238 2239 cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); 2240 2241 if (is_wait_cs) { 2242 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, 2243 ctx, cs_encaps_signals); 2244 if (rc) 2245 goto free_cs_chunk_array; 2246 2247 if (cs_encaps_signals) { 2248 /* check if cs sequence has encapsulated 2249 * signals handle 2250 */ 2251 struct idr *idp; 2252 u32 id; 2253 2254 spin_lock(&ctx->sig_mgr.lock); 2255 idp = &ctx->sig_mgr.handles; 2256 idr_for_each_entry(idp, encaps_sig_hdl, id) { 2257 if (encaps_sig_hdl->cs_seq == signal_seq) { 2258 /* get refcount to protect removing this handle from idr, 2259 * needed when multiple wait cs are used with offset 2260 * to wait on reserved encaps signals. 2261 * Since kref_put of this handle is executed outside the 2262 * current lock, it is possible that the handle refcount 2263 * is 0 but it yet to be removed from the list. In this 2264 * case need to consider the handle as not valid. 2265 */ 2266 if (kref_get_unless_zero(&encaps_sig_hdl->refcount)) 2267 handle_found = true; 2268 break; 2269 } 2270 } 2271 spin_unlock(&ctx->sig_mgr.lock); 2272 2273 if (!handle_found) { 2274 /* treat as signal CS already finished */ 2275 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", 2276 signal_seq); 2277 rc = 0; 2278 goto free_cs_chunk_array; 2279 } 2280 2281 /* validate also the signal offset value */ 2282 if (chunk->encaps_signal_offset > 2283 encaps_sig_hdl->count) { 2284 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n", 2285 chunk->encaps_signal_offset, 2286 encaps_sig_hdl->count); 2287 rc = -EINVAL; 2288 goto free_cs_chunk_array; 2289 } 2290 } 2291 2292 sig_fence = hl_ctx_get_fence(ctx, signal_seq); 2293 if (IS_ERR(sig_fence)) { 2294 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2295 atomic64_inc(&cntr->validation_drop_cnt); 2296 dev_err(hdev->dev, 2297 "Failed to get signal CS with seq 0x%llx\n", 2298 signal_seq); 2299 rc = PTR_ERR(sig_fence); 2300 goto free_cs_chunk_array; 2301 } 2302 2303 if (!sig_fence) { 2304 /* signal CS already finished */ 2305 rc = 0; 2306 goto free_cs_chunk_array; 2307 } 2308 2309 sig_waitcs_cmpl = 2310 container_of(sig_fence, struct hl_cs_compl, base_fence); 2311 2312 staged_cs_with_encaps_signals = !! 2313 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT && 2314 (flags & HL_CS_FLAGS_ENCAP_SIGNALS)); 2315 2316 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL && 2317 !staged_cs_with_encaps_signals) { 2318 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2319 atomic64_inc(&cntr->validation_drop_cnt); 2320 dev_err(hdev->dev, 2321 "CS seq 0x%llx is not of a signal/encaps-signal CS\n", 2322 signal_seq); 2323 hl_fence_put(sig_fence); 2324 rc = -EINVAL; 2325 goto free_cs_chunk_array; 2326 } 2327 2328 if (completion_done(&sig_fence->completion)) { 2329 /* signal CS already finished */ 2330 hl_fence_put(sig_fence); 2331 rc = 0; 2332 goto free_cs_chunk_array; 2333 } 2334 } 2335 2336 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); 2337 if (rc) { 2338 if (is_wait_cs) 2339 hl_fence_put(sig_fence); 2340 2341 goto free_cs_chunk_array; 2342 } 2343 2344 /* 2345 * Save the signal CS fence for later initialization right before 2346 * hanging the wait CS on the queue. 2347 * for encaps signals case, we save the cs sequence and handle pointer 2348 * for later initialization. 2349 */ 2350 if (is_wait_cs) { 2351 cs->signal_fence = sig_fence; 2352 /* store the handle pointer, so we don't have to 2353 * look for it again, later on the flow 2354 * when we need to set SOB info in hw_queue. 2355 */ 2356 if (cs->encaps_signals) 2357 cs->encaps_sig_hdl = encaps_sig_hdl; 2358 } 2359 2360 hl_debugfs_add_cs(cs); 2361 2362 *cs_seq = cs->sequence; 2363 2364 if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL) 2365 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, 2366 q_idx, chunk->encaps_signal_offset); 2367 else if (cs_type == CS_TYPE_COLLECTIVE_WAIT) 2368 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, 2369 cs, q_idx, collective_engine_id, 2370 chunk->encaps_signal_offset); 2371 else { 2372 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2373 atomic64_inc(&cntr->validation_drop_cnt); 2374 rc = -EINVAL; 2375 } 2376 2377 if (rc) 2378 goto free_cs_object; 2379 2380 if (q_type == QUEUE_TYPE_HW) 2381 INIT_WORK(&cs->finish_work, cs_completion); 2382 2383 rc = hl_hw_queue_schedule_cs(cs); 2384 if (rc) { 2385 /* In case wait cs failed here, it means the signal cs 2386 * already completed. we want to free all it's related objects 2387 * but we don't want to fail the ioctl. 2388 */ 2389 if (is_wait_cs) 2390 rc = 0; 2391 else if (rc != -EAGAIN) 2392 dev_err(hdev->dev, 2393 "Failed to submit CS %d.%llu to H/W queues, error %d\n", 2394 ctx->asid, cs->sequence, rc); 2395 goto free_cs_object; 2396 } 2397 2398 *signal_sob_addr_offset = cs->sob_addr_offset; 2399 *signal_initial_sob_count = cs->initial_sob_count; 2400 2401 rc = HL_CS_STATUS_SUCCESS; 2402 if (is_wait_cs) 2403 wait_cs_submitted = true; 2404 goto put_cs; 2405 2406 free_cs_object: 2407 cs_rollback(hdev, cs); 2408 *cs_seq = ULLONG_MAX; 2409 /* The path below is both for good and erroneous exits */ 2410 put_cs: 2411 /* We finished with the CS in this function, so put the ref */ 2412 cs_put(cs); 2413 free_cs_chunk_array: 2414 if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs) 2415 kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); 2416 kfree(cs_chunk_array); 2417 out: 2418 return rc; 2419 } 2420 2421 static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, 2422 u32 num_engine_cores, u32 core_command) 2423 { 2424 int rc; 2425 struct hl_device *hdev = hpriv->hdev; 2426 void __user *engine_cores_arr; 2427 u32 *cores; 2428 2429 if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { 2430 dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); 2431 return -EINVAL; 2432 } 2433 2434 if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) { 2435 dev_err(hdev->dev, "Engine core command is invalid\n"); 2436 return -EINVAL; 2437 } 2438 2439 engine_cores_arr = (void __user *) (uintptr_t) engine_cores; 2440 cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL); 2441 if (!cores) 2442 return -ENOMEM; 2443 2444 if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) { 2445 dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); 2446 kfree(cores); 2447 return -EFAULT; 2448 } 2449 2450 rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); 2451 kfree(cores); 2452 2453 return rc; 2454 } 2455 2456 static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) 2457 { 2458 struct hl_device *hdev = hpriv->hdev; 2459 struct asic_fixed_properties *prop = &hdev->asic_prop; 2460 2461 if (!prop->hbw_flush_reg) { 2462 dev_dbg(hdev->dev, "HBW flush is not supported\n"); 2463 return -EOPNOTSUPP; 2464 } 2465 2466 RREG32(prop->hbw_flush_reg); 2467 2468 return 0; 2469 } 2470 2471 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) 2472 { 2473 union hl_cs_args *args = data; 2474 enum hl_cs_type cs_type = 0; 2475 u64 cs_seq = ULONG_MAX; 2476 void __user *chunks; 2477 u32 num_chunks, flags, timeout, 2478 signals_count = 0, sob_addr = 0, handle_id = 0; 2479 u16 sob_initial_count = 0; 2480 int rc; 2481 2482 rc = hl_cs_sanity_checks(hpriv, args); 2483 if (rc) 2484 goto out; 2485 2486 rc = hl_cs_ctx_switch(hpriv, args, &cs_seq); 2487 if (rc) 2488 goto out; 2489 2490 cs_type = hl_cs_get_cs_type(args->in.cs_flags & 2491 ~HL_CS_FLAGS_FORCE_RESTORE); 2492 chunks = (void __user *) (uintptr_t) args->in.chunks_execute; 2493 num_chunks = args->in.num_chunks_execute; 2494 flags = args->in.cs_flags; 2495 2496 /* In case this is a staged CS, user should supply the CS sequence */ 2497 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 2498 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) 2499 cs_seq = args->in.seq; 2500 2501 timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT 2502 ? msecs_to_jiffies(args->in.timeout * 1000) 2503 : hpriv->hdev->timeout_jiffies; 2504 2505 switch (cs_type) { 2506 case CS_TYPE_SIGNAL: 2507 case CS_TYPE_WAIT: 2508 case CS_TYPE_COLLECTIVE_WAIT: 2509 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks, 2510 &cs_seq, args->in.cs_flags, timeout, 2511 &sob_addr, &sob_initial_count); 2512 break; 2513 case CS_RESERVE_SIGNALS: 2514 rc = cs_ioctl_reserve_signals(hpriv, 2515 args->in.encaps_signals_q_idx, 2516 args->in.encaps_signals_count, 2517 &handle_id, &sob_addr, &signals_count); 2518 break; 2519 case CS_UNRESERVE_SIGNALS: 2520 rc = cs_ioctl_unreserve_signals(hpriv, 2521 args->in.encaps_sig_handle_id); 2522 break; 2523 case CS_TYPE_ENGINE_CORE: 2524 rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, 2525 args->in.num_engine_cores, args->in.core_command); 2526 break; 2527 case CS_TYPE_FLUSH_PCI_HBW_WRITES: 2528 rc = cs_ioctl_flush_pci_hbw_writes(hpriv); 2529 break; 2530 default: 2531 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, 2532 args->in.cs_flags, 2533 args->in.encaps_sig_handle_id, 2534 timeout, &sob_initial_count); 2535 break; 2536 } 2537 out: 2538 if (rc != -EAGAIN) { 2539 memset(args, 0, sizeof(*args)); 2540 2541 switch (cs_type) { 2542 case CS_RESERVE_SIGNALS: 2543 args->out.handle_id = handle_id; 2544 args->out.sob_base_addr_offset = sob_addr; 2545 args->out.count = signals_count; 2546 break; 2547 case CS_TYPE_SIGNAL: 2548 args->out.sob_base_addr_offset = sob_addr; 2549 args->out.sob_count_before_submission = sob_initial_count; 2550 args->out.seq = cs_seq; 2551 break; 2552 case CS_TYPE_DEFAULT: 2553 args->out.sob_count_before_submission = sob_initial_count; 2554 args->out.seq = cs_seq; 2555 break; 2556 default: 2557 args->out.seq = cs_seq; 2558 break; 2559 } 2560 2561 args->out.status = rc; 2562 } 2563 2564 return rc; 2565 } 2566 2567 static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence, 2568 enum hl_cs_wait_status *status, u64 timeout_us, s64 *timestamp) 2569 { 2570 struct hl_device *hdev = ctx->hdev; 2571 ktime_t timestamp_kt; 2572 long completion_rc; 2573 int rc = 0, error; 2574 2575 if (IS_ERR(fence)) { 2576 rc = PTR_ERR(fence); 2577 if (rc == -EINVAL) 2578 dev_notice_ratelimited(hdev->dev, 2579 "Can't wait on CS %llu because current CS is at seq %llu\n", 2580 seq, ctx->cs_sequence); 2581 return rc; 2582 } 2583 2584 if (!fence) { 2585 if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, ×tamp_kt, &error)) { 2586 dev_dbg(hdev->dev, 2587 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", 2588 seq, ctx->cs_sequence); 2589 *status = CS_WAIT_STATUS_GONE; 2590 return 0; 2591 } 2592 2593 completion_rc = 1; 2594 goto report_results; 2595 } 2596 2597 if (!timeout_us) { 2598 completion_rc = completion_done(&fence->completion); 2599 } else { 2600 unsigned long timeout; 2601 2602 timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ? 2603 timeout_us : usecs_to_jiffies(timeout_us); 2604 completion_rc = 2605 wait_for_completion_interruptible_timeout( 2606 &fence->completion, timeout); 2607 } 2608 2609 error = fence->error; 2610 timestamp_kt = fence->timestamp; 2611 2612 report_results: 2613 if (completion_rc > 0) { 2614 *status = CS_WAIT_STATUS_COMPLETED; 2615 if (timestamp) 2616 *timestamp = ktime_to_ns(timestamp_kt); 2617 } else { 2618 *status = CS_WAIT_STATUS_BUSY; 2619 } 2620 2621 if (completion_rc == -ERESTARTSYS) 2622 rc = completion_rc; 2623 else if (error == -ETIMEDOUT || error == -EIO) 2624 rc = error; 2625 2626 return rc; 2627 } 2628 2629 /* 2630 * hl_cs_poll_fences - iterate CS fences to check for CS completion 2631 * 2632 * @mcs_data: multi-CS internal data 2633 * @mcs_compl: multi-CS completion structure 2634 * 2635 * @return 0 on success, otherwise non 0 error code 2636 * 2637 * The function iterates on all CS sequence in the list and set bit in 2638 * completion_bitmap for each completed CS. 2639 * While iterating, the function sets the stream map of each fence in the fence 2640 * array in the completion QID stream map to be used by CSs to perform 2641 * completion to the multi-CS context. 2642 * This function shall be called after taking context ref 2643 */ 2644 static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl) 2645 { 2646 struct hl_fence **fence_ptr = mcs_data->fence_arr; 2647 struct hl_device *hdev = mcs_data->ctx->hdev; 2648 int i, rc, arr_len = mcs_data->arr_len; 2649 u64 *seq_arr = mcs_data->seq_arr; 2650 ktime_t max_ktime, first_cs_time; 2651 enum hl_cs_wait_status status; 2652 2653 memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *)); 2654 2655 /* get all fences under the same lock */ 2656 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); 2657 if (rc) 2658 return rc; 2659 2660 /* 2661 * re-initialize the completion here to handle 2 possible cases: 2662 * 1. CS will complete the multi-CS prior clearing the completion. in which 2663 * case the fence iteration is guaranteed to catch the CS completion. 2664 * 2. the completion will occur after re-init of the completion. 2665 * in which case we will wake up immediately in wait_for_completion. 2666 */ 2667 reinit_completion(&mcs_compl->completion); 2668 2669 /* 2670 * set to maximum time to verify timestamp is valid: if at the end 2671 * this value is maintained- no timestamp was updated 2672 */ 2673 max_ktime = ktime_set(KTIME_SEC_MAX, 0); 2674 first_cs_time = max_ktime; 2675 2676 for (i = 0; i < arr_len; i++, fence_ptr++) { 2677 struct hl_fence *fence = *fence_ptr; 2678 2679 /* 2680 * In order to prevent case where we wait until timeout even though a CS associated 2681 * with the multi-CS actually completed we do things in the below order: 2682 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way 2683 * any CS can, potentially, complete the multi CS for the specific QID (note 2684 * that once completion is initialized, calling complete* and then wait on the 2685 * completion will cause it to return at once) 2686 * 2. only after allowing multi-CS completion for the specific QID we check whether 2687 * the specific CS already completed (and thus the wait for completion part will 2688 * be skipped). if the CS not completed it is guaranteed that completing CS will 2689 * wake up the completion. 2690 */ 2691 if (fence) 2692 mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map; 2693 2694 /* 2695 * function won't sleep as it is called with timeout 0 (i.e. 2696 * poll the fence) 2697 */ 2698 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL); 2699 if (rc) { 2700 dev_err(hdev->dev, 2701 "wait_for_fence error :%d for CS seq %llu\n", 2702 rc, seq_arr[i]); 2703 break; 2704 } 2705 2706 switch (status) { 2707 case CS_WAIT_STATUS_BUSY: 2708 /* CS did not finished, QID to wait on already stored */ 2709 break; 2710 case CS_WAIT_STATUS_COMPLETED: 2711 /* 2712 * Using mcs_handling_done to avoid possibility of mcs_data 2713 * returns to user indicating CS completed before it finished 2714 * all of its mcs handling, to avoid race the next time the 2715 * user waits for mcs. 2716 * note: when reaching this case fence is definitely not NULL 2717 * but NULL check was added to overcome static analysis 2718 */ 2719 if (fence && !fence->mcs_handling_done) { 2720 /* 2721 * in case multi CS is completed but MCS handling not done 2722 * we "complete" the multi CS to prevent it from waiting 2723 * until time-out and the "multi-CS handling done" will have 2724 * another chance at the next iteration 2725 */ 2726 complete_all(&mcs_compl->completion); 2727 break; 2728 } 2729 2730 mcs_data->completion_bitmap |= BIT(i); 2731 /* 2732 * For all completed CSs we take the earliest timestamp. 2733 * For this we have to validate that the timestamp is 2734 * earliest of all timestamps so far. 2735 */ 2736 if (fence && mcs_data->update_ts && 2737 (ktime_compare(fence->timestamp, first_cs_time) < 0)) 2738 first_cs_time = fence->timestamp; 2739 break; 2740 case CS_WAIT_STATUS_GONE: 2741 mcs_data->update_ts = false; 2742 mcs_data->gone_cs = true; 2743 /* 2744 * It is possible to get an old sequence numbers from user 2745 * which related to already completed CSs and their fences 2746 * already gone. In this case, CS set as completed but 2747 * no need to consider its QID for mcs completion. 2748 */ 2749 mcs_data->completion_bitmap |= BIT(i); 2750 break; 2751 default: 2752 dev_err(hdev->dev, "Invalid fence status\n"); 2753 rc = -EINVAL; 2754 break; 2755 } 2756 2757 } 2758 2759 hl_fences_put(mcs_data->fence_arr, arr_len); 2760 2761 if (mcs_data->update_ts && 2762 (ktime_compare(first_cs_time, max_ktime) != 0)) 2763 mcs_data->timestamp = ktime_to_ns(first_cs_time); 2764 2765 return rc; 2766 } 2767 2768 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, 2769 enum hl_cs_wait_status *status, s64 *timestamp) 2770 { 2771 struct hl_fence *fence; 2772 int rc = 0; 2773 2774 if (timestamp) 2775 *timestamp = 0; 2776 2777 hl_ctx_get(ctx); 2778 2779 fence = hl_ctx_get_fence(ctx, seq); 2780 2781 rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp); 2782 hl_fence_put(fence); 2783 hl_ctx_put(ctx); 2784 2785 return rc; 2786 } 2787 2788 static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs) 2789 { 2790 if (usecs <= U32_MAX) 2791 return usecs_to_jiffies(usecs); 2792 2793 /* 2794 * If the value in nanoseconds is larger than 64 bit, use the largest 2795 * 64 bit value. 2796 */ 2797 if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC))) 2798 return nsecs_to_jiffies(U64_MAX); 2799 2800 return nsecs_to_jiffies(usecs * NSEC_PER_USEC); 2801 } 2802 2803 /* 2804 * hl_wait_multi_cs_completion_init - init completion structure 2805 * 2806 * @hdev: pointer to habanalabs device structure 2807 * @stream_master_bitmap: stream master QIDs map, set bit indicates stream 2808 * master QID to wait on 2809 * 2810 * @return valid completion struct pointer on success, otherwise error pointer 2811 * 2812 * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver. 2813 * the function gets the first available completion (by marking it "used") 2814 * and initialize its values. 2815 */ 2816 static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev) 2817 { 2818 struct multi_cs_completion *mcs_compl; 2819 int i; 2820 2821 /* find free multi_cs completion structure */ 2822 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 2823 mcs_compl = &hdev->multi_cs_completion[i]; 2824 spin_lock(&mcs_compl->lock); 2825 if (!mcs_compl->used) { 2826 mcs_compl->used = 1; 2827 mcs_compl->timestamp = 0; 2828 /* 2829 * init QID map to 0 to avoid completion by CSs. the actual QID map 2830 * to multi-CS CSs will be set incrementally at a later stage 2831 */ 2832 mcs_compl->stream_master_qid_map = 0; 2833 spin_unlock(&mcs_compl->lock); 2834 break; 2835 } 2836 spin_unlock(&mcs_compl->lock); 2837 } 2838 2839 if (i == MULTI_CS_MAX_USER_CTX) { 2840 dev_err(hdev->dev, "no available multi-CS completion structure\n"); 2841 return ERR_PTR(-ENOMEM); 2842 } 2843 return mcs_compl; 2844 } 2845 2846 /* 2847 * hl_wait_multi_cs_completion_fini - return completion structure and set as 2848 * unused 2849 * 2850 * @mcs_compl: pointer to the completion structure 2851 */ 2852 static void hl_wait_multi_cs_completion_fini( 2853 struct multi_cs_completion *mcs_compl) 2854 { 2855 /* 2856 * free completion structure, do it under lock to be in-sync with the 2857 * thread that signals completion 2858 */ 2859 spin_lock(&mcs_compl->lock); 2860 mcs_compl->used = 0; 2861 spin_unlock(&mcs_compl->lock); 2862 } 2863 2864 /* 2865 * hl_wait_multi_cs_completion - wait for first CS to complete 2866 * 2867 * @mcs_data: multi-CS internal data 2868 * 2869 * @return 0 on success, otherwise non 0 error code 2870 */ 2871 static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data, 2872 struct multi_cs_completion *mcs_compl) 2873 { 2874 long completion_rc; 2875 2876 completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion, 2877 mcs_data->timeout_jiffies); 2878 2879 /* update timestamp */ 2880 if (completion_rc > 0) 2881 mcs_data->timestamp = mcs_compl->timestamp; 2882 2883 if (completion_rc == -ERESTARTSYS) 2884 return completion_rc; 2885 2886 mcs_data->wait_status = completion_rc; 2887 2888 return 0; 2889 } 2890 2891 /* 2892 * hl_multi_cs_completion_init - init array of multi-CS completion structures 2893 * 2894 * @hdev: pointer to habanalabs device structure 2895 */ 2896 void hl_multi_cs_completion_init(struct hl_device *hdev) 2897 { 2898 struct multi_cs_completion *mcs_cmpl; 2899 int i; 2900 2901 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 2902 mcs_cmpl = &hdev->multi_cs_completion[i]; 2903 mcs_cmpl->used = 0; 2904 spin_lock_init(&mcs_cmpl->lock); 2905 init_completion(&mcs_cmpl->completion); 2906 } 2907 } 2908 2909 /* 2910 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl 2911 * 2912 * @hpriv: pointer to the private data of the fd 2913 * @data: pointer to multi-CS wait ioctl in/out args 2914 * 2915 */ 2916 static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) 2917 { 2918 struct multi_cs_completion *mcs_compl; 2919 struct hl_device *hdev = hpriv->hdev; 2920 struct multi_cs_data mcs_data = {}; 2921 union hl_wait_cs_args *args = data; 2922 struct hl_ctx *ctx = hpriv->ctx; 2923 struct hl_fence **fence_arr; 2924 void __user *seq_arr; 2925 u32 size_to_copy; 2926 u64 *cs_seq_arr; 2927 u8 seq_arr_len; 2928 int rc, i; 2929 2930 for (i = 0 ; i < sizeof(args->in.pad) ; i++) 2931 if (args->in.pad[i]) { 2932 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); 2933 return -EINVAL; 2934 } 2935 2936 if (!hdev->supports_wait_for_multi_cs) { 2937 dev_err(hdev->dev, "Wait for multi CS is not supported\n"); 2938 return -EPERM; 2939 } 2940 2941 seq_arr_len = args->in.seq_arr_len; 2942 2943 if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) { 2944 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n", 2945 HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len); 2946 return -EINVAL; 2947 } 2948 2949 /* allocate memory for sequence array */ 2950 cs_seq_arr = 2951 kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL); 2952 if (!cs_seq_arr) 2953 return -ENOMEM; 2954 2955 /* copy CS sequence array from user */ 2956 seq_arr = (void __user *) (uintptr_t) args->in.seq; 2957 size_to_copy = seq_arr_len * sizeof(*cs_seq_arr); 2958 if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) { 2959 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n"); 2960 rc = -EFAULT; 2961 goto free_seq_arr; 2962 } 2963 2964 /* allocate array for the fences */ 2965 fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL); 2966 if (!fence_arr) { 2967 rc = -ENOMEM; 2968 goto free_seq_arr; 2969 } 2970 2971 /* initialize the multi-CS internal data */ 2972 mcs_data.ctx = ctx; 2973 mcs_data.seq_arr = cs_seq_arr; 2974 mcs_data.fence_arr = fence_arr; 2975 mcs_data.arr_len = seq_arr_len; 2976 2977 hl_ctx_get(ctx); 2978 2979 /* wait (with timeout) for the first CS to be completed */ 2980 mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); 2981 mcs_compl = hl_wait_multi_cs_completion_init(hdev); 2982 if (IS_ERR(mcs_compl)) { 2983 rc = PTR_ERR(mcs_compl); 2984 goto put_ctx; 2985 } 2986 2987 /* poll all CS fences, extract timestamp */ 2988 mcs_data.update_ts = true; 2989 rc = hl_cs_poll_fences(&mcs_data, mcs_compl); 2990 /* 2991 * skip wait for CS completion when one of the below is true: 2992 * - an error on the poll function 2993 * - one or more CS in the list completed 2994 * - the user called ioctl with timeout 0 2995 */ 2996 if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) 2997 goto completion_fini; 2998 2999 while (true) { 3000 rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl); 3001 if (rc || (mcs_data.wait_status == 0)) 3002 break; 3003 3004 /* 3005 * poll fences once again to update the CS map. 3006 * no timestamp should be updated this time. 3007 */ 3008 mcs_data.update_ts = false; 3009 rc = hl_cs_poll_fences(&mcs_data, mcs_compl); 3010 3011 if (rc || mcs_data.completion_bitmap) 3012 break; 3013 3014 /* 3015 * if hl_wait_multi_cs_completion returned before timeout (i.e. 3016 * it got a completion) it either got completed by CS in the multi CS list 3017 * (in which case the indication will be non empty completion_bitmap) or it 3018 * got completed by CS submitted to one of the shared stream master but 3019 * not in the multi CS list (in which case we should wait again but modify 3020 * the timeout and set timestamp as zero to let a CS related to the current 3021 * multi-CS set a new, relevant, timestamp) 3022 */ 3023 mcs_data.timeout_jiffies = mcs_data.wait_status; 3024 mcs_compl->timestamp = 0; 3025 } 3026 3027 completion_fini: 3028 hl_wait_multi_cs_completion_fini(mcs_compl); 3029 3030 put_ctx: 3031 hl_ctx_put(ctx); 3032 kfree(fence_arr); 3033 3034 free_seq_arr: 3035 kfree(cs_seq_arr); 3036 3037 if (rc == -ERESTARTSYS) { 3038 dev_err_ratelimited(hdev->dev, 3039 "user process got signal while waiting for Multi-CS\n"); 3040 rc = -EINTR; 3041 } 3042 3043 if (rc) 3044 return rc; 3045 3046 /* update output args */ 3047 memset(args, 0, sizeof(*args)); 3048 3049 if (mcs_data.completion_bitmap) { 3050 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; 3051 args->out.cs_completion_map = mcs_data.completion_bitmap; 3052 3053 /* if timestamp not 0- it's valid */ 3054 if (mcs_data.timestamp) { 3055 args->out.timestamp_nsec = mcs_data.timestamp; 3056 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 3057 } 3058 3059 /* update if some CS was gone */ 3060 if (!mcs_data.timestamp) 3061 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; 3062 } else { 3063 args->out.status = HL_WAIT_CS_STATUS_BUSY; 3064 } 3065 3066 return 0; 3067 } 3068 3069 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) 3070 { 3071 struct hl_device *hdev = hpriv->hdev; 3072 union hl_wait_cs_args *args = data; 3073 enum hl_cs_wait_status status; 3074 u64 seq = args->in.seq; 3075 s64 timestamp; 3076 int rc; 3077 3078 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, ×tamp); 3079 3080 if (rc == -ERESTARTSYS) { 3081 dev_err_ratelimited(hdev->dev, 3082 "user process got signal while waiting for CS handle %llu\n", 3083 seq); 3084 return -EINTR; 3085 } 3086 3087 memset(args, 0, sizeof(*args)); 3088 3089 if (rc) { 3090 if (rc == -ETIMEDOUT) { 3091 dev_err_ratelimited(hdev->dev, 3092 "CS %llu has timed-out while user process is waiting for it\n", 3093 seq); 3094 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; 3095 } else if (rc == -EIO) { 3096 dev_err_ratelimited(hdev->dev, 3097 "CS %llu has been aborted while user process is waiting for it\n", 3098 seq); 3099 args->out.status = HL_WAIT_CS_STATUS_ABORTED; 3100 } 3101 return rc; 3102 } 3103 3104 if (timestamp) { 3105 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 3106 args->out.timestamp_nsec = timestamp; 3107 } 3108 3109 switch (status) { 3110 case CS_WAIT_STATUS_GONE: 3111 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; 3112 fallthrough; 3113 case CS_WAIT_STATUS_COMPLETED: 3114 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; 3115 break; 3116 case CS_WAIT_STATUS_BUSY: 3117 default: 3118 args->out.status = HL_WAIT_CS_STATUS_BUSY; 3119 break; 3120 } 3121 3122 return 0; 3123 } 3124 3125 static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, 3126 struct hl_cb *cq_cb, 3127 u64 ts_offset, u64 cq_offset, u64 target_value, 3128 spinlock_t *wait_list_lock, 3129 struct hl_user_pending_interrupt **pend) 3130 { 3131 struct hl_ts_buff *ts_buff = buf->private; 3132 struct hl_user_pending_interrupt *requested_offset_record = 3133 (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + 3134 ts_offset; 3135 struct hl_user_pending_interrupt *cb_last = 3136 (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + 3137 (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); 3138 unsigned long flags, iter_counter = 0; 3139 u64 current_cq_counter; 3140 3141 /* Validate ts_offset not exceeding last max */ 3142 if (requested_offset_record >= cb_last) { 3143 dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n", 3144 (u64)(uintptr_t)cb_last); 3145 return -EINVAL; 3146 } 3147 3148 start_over: 3149 spin_lock_irqsave(wait_list_lock, flags); 3150 3151 /* Unregister only if we didn't reach the target value 3152 * since in this case there will be no handling in irq context 3153 * and then it's safe to delete the node out of the interrupt list 3154 * then re-use it on other interrupt 3155 */ 3156 if (requested_offset_record->ts_reg_info.in_use) { 3157 current_cq_counter = *requested_offset_record->cq_kernel_addr; 3158 if (current_cq_counter < requested_offset_record->cq_target_value) { 3159 list_del(&requested_offset_record->wait_list_node); 3160 spin_unlock_irqrestore(wait_list_lock, flags); 3161 3162 hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf); 3163 hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); 3164 3165 dev_dbg(buf->mmg->dev, 3166 "ts node removed from interrupt list now can re-use\n"); 3167 } else { 3168 dev_dbg(buf->mmg->dev, 3169 "ts node in middle of irq handling\n"); 3170 3171 /* irq handling in the middle give it time to finish */ 3172 spin_unlock_irqrestore(wait_list_lock, flags); 3173 usleep_range(1, 10); 3174 if (++iter_counter == MAX_TS_ITER_NUM) { 3175 dev_err(buf->mmg->dev, 3176 "handling registration interrupt took too long!!\n"); 3177 return -EINVAL; 3178 } 3179 3180 goto start_over; 3181 } 3182 } else { 3183 /* Fill up the new registration node info */ 3184 requested_offset_record->ts_reg_info.buf = buf; 3185 requested_offset_record->ts_reg_info.cq_cb = cq_cb; 3186 requested_offset_record->ts_reg_info.timestamp_kernel_addr = 3187 (u64 *) ts_buff->user_buff_address + ts_offset; 3188 requested_offset_record->cq_kernel_addr = 3189 (u64 *) cq_cb->kernel_address + cq_offset; 3190 requested_offset_record->cq_target_value = target_value; 3191 3192 spin_unlock_irqrestore(wait_list_lock, flags); 3193 } 3194 3195 *pend = requested_offset_record; 3196 3197 dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n", 3198 requested_offset_record); 3199 return 0; 3200 } 3201 3202 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, 3203 struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg, 3204 u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset, 3205 u64 target_value, struct hl_user_interrupt *interrupt, 3206 bool register_ts_record, u64 ts_handle, u64 ts_offset, 3207 u32 *status, u64 *timestamp) 3208 { 3209 struct hl_user_pending_interrupt *pend; 3210 struct hl_mmap_mem_buf *buf; 3211 struct hl_cb *cq_cb; 3212 unsigned long timeout, flags; 3213 long completion_rc; 3214 int rc = 0; 3215 3216 timeout = hl_usecs64_to_jiffies(timeout_us); 3217 3218 hl_ctx_get(ctx); 3219 3220 cq_cb = hl_cb_get(cb_mmg, cq_counters_handle); 3221 if (!cq_cb) { 3222 rc = -EINVAL; 3223 goto put_ctx; 3224 } 3225 3226 /* Validate the cq offset */ 3227 if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >= 3228 ((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) { 3229 rc = -EINVAL; 3230 goto put_cq_cb; 3231 } 3232 3233 if (register_ts_record) { 3234 dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", 3235 interrupt->interrupt_id, ts_offset, cq_counters_offset); 3236 buf = hl_mmap_mem_buf_get(mmg, ts_handle); 3237 if (!buf) { 3238 rc = -EINVAL; 3239 goto put_cq_cb; 3240 } 3241 3242 /* get ts buffer record */ 3243 rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset, 3244 cq_counters_offset, target_value, 3245 &interrupt->wait_list_lock, &pend); 3246 if (rc) 3247 goto put_ts_buff; 3248 } else { 3249 pend = kzalloc(sizeof(*pend), GFP_KERNEL); 3250 if (!pend) { 3251 rc = -ENOMEM; 3252 goto put_cq_cb; 3253 } 3254 hl_fence_init(&pend->fence, ULONG_MAX); 3255 pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset; 3256 pend->cq_target_value = target_value; 3257 } 3258 3259 spin_lock_irqsave(&interrupt->wait_list_lock, flags); 3260 3261 /* We check for completion value as interrupt could have been received 3262 * before we added the node to the wait list 3263 */ 3264 if (*pend->cq_kernel_addr >= target_value) { 3265 if (register_ts_record) 3266 pend->ts_reg_info.in_use = 0; 3267 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 3268 3269 *status = HL_WAIT_CS_STATUS_COMPLETED; 3270 3271 if (register_ts_record) { 3272 *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); 3273 goto put_ts_buff; 3274 } else { 3275 pend->fence.timestamp = ktime_get(); 3276 goto set_timestamp; 3277 } 3278 } else if (!timeout_us) { 3279 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 3280 *status = HL_WAIT_CS_STATUS_BUSY; 3281 pend->fence.timestamp = ktime_get(); 3282 goto set_timestamp; 3283 } 3284 3285 /* Add pending user interrupt to relevant list for the interrupt 3286 * handler to monitor. 3287 * Note that we cannot have sorted list by target value, 3288 * in order to shorten the list pass loop, since 3289 * same list could have nodes for different cq counter handle. 3290 * Note: 3291 * Mark ts buff offset as in use here in the spinlock protection area 3292 * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record 3293 * before adding the node to the list. this scenario might happen when 3294 * multiple threads are racing on same offset and one thread could 3295 * set the ts buff in ts_buff_get_kernel_ts_record then the other thread 3296 * takes over and get to ts_buff_get_kernel_ts_record and then we will try 3297 * to re-use the same ts buff offset, and will try to delete a non existing 3298 * node from the list. 3299 */ 3300 if (register_ts_record) 3301 pend->ts_reg_info.in_use = 1; 3302 3303 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); 3304 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 3305 3306 if (register_ts_record) { 3307 rc = *status = HL_WAIT_CS_STATUS_COMPLETED; 3308 goto ts_registration_exit; 3309 } 3310 3311 /* Wait for interrupt handler to signal completion */ 3312 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, 3313 timeout); 3314 if (completion_rc > 0) { 3315 *status = HL_WAIT_CS_STATUS_COMPLETED; 3316 } else { 3317 if (completion_rc == -ERESTARTSYS) { 3318 dev_err_ratelimited(hdev->dev, 3319 "user process got signal while waiting for interrupt ID %d\n", 3320 interrupt->interrupt_id); 3321 rc = -EINTR; 3322 *status = HL_WAIT_CS_STATUS_ABORTED; 3323 } else { 3324 if (pend->fence.error == -EIO) { 3325 dev_err_ratelimited(hdev->dev, 3326 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", 3327 pend->fence.error); 3328 rc = -EIO; 3329 *status = HL_WAIT_CS_STATUS_ABORTED; 3330 } else { 3331 /* The wait has timed-out. We don't know anything beyond that 3332 * because the workload wasn't submitted through the driver. 3333 * Therefore, from driver's perspective, the workload is still 3334 * executing. 3335 */ 3336 rc = 0; 3337 *status = HL_WAIT_CS_STATUS_BUSY; 3338 } 3339 } 3340 } 3341 3342 /* 3343 * We keep removing the node from list here, and not at the irq handler 3344 * for completion timeout case. and if it's a registration 3345 * for ts record, the node will be deleted in the irq handler after 3346 * we reach the target value. 3347 */ 3348 spin_lock_irqsave(&interrupt->wait_list_lock, flags); 3349 list_del(&pend->wait_list_node); 3350 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 3351 3352 set_timestamp: 3353 *timestamp = ktime_to_ns(pend->fence.timestamp); 3354 kfree(pend); 3355 hl_cb_put(cq_cb); 3356 ts_registration_exit: 3357 hl_ctx_put(ctx); 3358 3359 return rc; 3360 3361 put_ts_buff: 3362 hl_mmap_mem_buf_put(buf); 3363 put_cq_cb: 3364 hl_cb_put(cq_cb); 3365 put_ctx: 3366 hl_ctx_put(ctx); 3367 3368 return rc; 3369 } 3370 3371 static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx, 3372 u64 timeout_us, u64 user_address, 3373 u64 target_value, struct hl_user_interrupt *interrupt, 3374 u32 *status, 3375 u64 *timestamp) 3376 { 3377 struct hl_user_pending_interrupt *pend; 3378 unsigned long timeout, flags; 3379 u64 completion_value; 3380 long completion_rc; 3381 int rc = 0; 3382 3383 timeout = hl_usecs64_to_jiffies(timeout_us); 3384 3385 hl_ctx_get(ctx); 3386 3387 pend = kzalloc(sizeof(*pend), GFP_KERNEL); 3388 if (!pend) { 3389 hl_ctx_put(ctx); 3390 return -ENOMEM; 3391 } 3392 3393 hl_fence_init(&pend->fence, ULONG_MAX); 3394 3395 /* Add pending user interrupt to relevant list for the interrupt 3396 * handler to monitor 3397 */ 3398 spin_lock_irqsave(&interrupt->wait_list_lock, flags); 3399 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); 3400 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 3401 3402 /* We check for completion value as interrupt could have been received 3403 * before we added the node to the wait list 3404 */ 3405 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { 3406 dev_err(hdev->dev, "Failed to copy completion value from user\n"); 3407 rc = -EFAULT; 3408 goto remove_pending_user_interrupt; 3409 } 3410 3411 if (completion_value >= target_value) { 3412 *status = HL_WAIT_CS_STATUS_COMPLETED; 3413 /* There was no interrupt, we assume the completion is now. */ 3414 pend->fence.timestamp = ktime_get(); 3415 } else { 3416 *status = HL_WAIT_CS_STATUS_BUSY; 3417 } 3418 3419 if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED)) 3420 goto remove_pending_user_interrupt; 3421 3422 wait_again: 3423 /* Wait for interrupt handler to signal completion */ 3424 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, 3425 timeout); 3426 3427 /* If timeout did not expire we need to perform the comparison. 3428 * If comparison fails, keep waiting until timeout expires 3429 */ 3430 if (completion_rc > 0) { 3431 spin_lock_irqsave(&interrupt->wait_list_lock, flags); 3432 /* reinit_completion must be called before we check for user 3433 * completion value, otherwise, if interrupt is received after 3434 * the comparison and before the next wait_for_completion, 3435 * we will reach timeout and fail 3436 */ 3437 reinit_completion(&pend->fence.completion); 3438 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 3439 3440 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { 3441 dev_err(hdev->dev, "Failed to copy completion value from user\n"); 3442 rc = -EFAULT; 3443 3444 goto remove_pending_user_interrupt; 3445 } 3446 3447 if (completion_value >= target_value) { 3448 *status = HL_WAIT_CS_STATUS_COMPLETED; 3449 } else if (pend->fence.error) { 3450 dev_err_ratelimited(hdev->dev, 3451 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", 3452 pend->fence.error); 3453 /* set the command completion status as ABORTED */ 3454 *status = HL_WAIT_CS_STATUS_ABORTED; 3455 } else { 3456 timeout = completion_rc; 3457 goto wait_again; 3458 } 3459 } else if (completion_rc == -ERESTARTSYS) { 3460 dev_err_ratelimited(hdev->dev, 3461 "user process got signal while waiting for interrupt ID %d\n", 3462 interrupt->interrupt_id); 3463 rc = -EINTR; 3464 } else { 3465 /* The wait has timed-out. We don't know anything beyond that 3466 * because the workload wasn't submitted through the driver. 3467 * Therefore, from driver's perspective, the workload is still 3468 * executing. 3469 */ 3470 rc = 0; 3471 *status = HL_WAIT_CS_STATUS_BUSY; 3472 } 3473 3474 remove_pending_user_interrupt: 3475 spin_lock_irqsave(&interrupt->wait_list_lock, flags); 3476 list_del(&pend->wait_list_node); 3477 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 3478 3479 *timestamp = ktime_to_ns(pend->fence.timestamp); 3480 3481 kfree(pend); 3482 hl_ctx_put(ctx); 3483 3484 return rc; 3485 } 3486 3487 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) 3488 { 3489 u16 interrupt_id, first_interrupt, last_interrupt; 3490 struct hl_device *hdev = hpriv->hdev; 3491 struct asic_fixed_properties *prop; 3492 struct hl_user_interrupt *interrupt; 3493 union hl_wait_cs_args *args = data; 3494 u32 status = HL_WAIT_CS_STATUS_BUSY; 3495 u64 timestamp = 0; 3496 int rc, int_idx; 3497 3498 prop = &hdev->asic_prop; 3499 3500 if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) { 3501 dev_err(hdev->dev, "no user interrupts allowed"); 3502 return -EPERM; 3503 } 3504 3505 interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); 3506 3507 first_interrupt = prop->first_available_user_interrupt; 3508 last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1; 3509 3510 if (interrupt_id < prop->user_dec_intr_count) { 3511 3512 /* Check if the requested core is enabled */ 3513 if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) { 3514 dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed", 3515 interrupt_id); 3516 return -EINVAL; 3517 } 3518 3519 interrupt = &hdev->user_interrupt[interrupt_id]; 3520 3521 } else if (interrupt_id >= first_interrupt && interrupt_id <= last_interrupt) { 3522 3523 int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count; 3524 interrupt = &hdev->user_interrupt[int_idx]; 3525 3526 } else if (interrupt_id == HL_COMMON_USER_CQ_INTERRUPT_ID) { 3527 interrupt = &hdev->common_user_cq_interrupt; 3528 } else if (interrupt_id == HL_COMMON_DEC_INTERRUPT_ID) { 3529 interrupt = &hdev->common_decoder_interrupt; 3530 } else { 3531 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); 3532 return -EINVAL; 3533 } 3534 3535 if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) 3536 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr, 3537 args->in.interrupt_timeout_us, args->in.cq_counters_handle, 3538 args->in.cq_counters_offset, 3539 args->in.target, interrupt, 3540 !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT), 3541 args->in.timestamp_handle, args->in.timestamp_offset, 3542 &status, ×tamp); 3543 else 3544 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, 3545 args->in.interrupt_timeout_us, args->in.addr, 3546 args->in.target, interrupt, &status, 3547 ×tamp); 3548 if (rc) 3549 return rc; 3550 3551 memset(args, 0, sizeof(*args)); 3552 args->out.status = status; 3553 3554 if (timestamp) { 3555 args->out.timestamp_nsec = timestamp; 3556 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 3557 } 3558 3559 return 0; 3560 } 3561 3562 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data) 3563 { 3564 struct hl_device *hdev = hpriv->hdev; 3565 union hl_wait_cs_args *args = data; 3566 u32 flags = args->in.flags; 3567 int rc; 3568 3569 /* If the device is not operational, or if an error has happened and user should release the 3570 * device, there is no point in waiting for any command submission or user interrupt. 3571 */ 3572 if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active) 3573 return -EBUSY; 3574 3575 if (flags & HL_WAIT_CS_FLAGS_INTERRUPT) 3576 rc = hl_interrupt_wait_ioctl(hpriv, data); 3577 else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS) 3578 rc = hl_multi_cs_wait_ioctl(hpriv, data); 3579 else 3580 rc = hl_cs_wait_ioctl(hpriv, data); 3581 3582 return rc; 3583 } 3584