1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2021 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include <uapi/drm/habanalabs_accel.h> 9 #include "habanalabs.h" 10 11 #include <linux/uaccess.h> 12 #include <linux/slab.h> 13 14 #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ 15 HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ 16 HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \ 17 HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) 18 19 20 #define MAX_TS_ITER_NUM 100 21 22 /** 23 * enum hl_cs_wait_status - cs wait status 24 * @CS_WAIT_STATUS_BUSY: cs was not completed yet 25 * @CS_WAIT_STATUS_COMPLETED: cs completed 26 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone 27 */ 28 enum hl_cs_wait_status { 29 CS_WAIT_STATUS_BUSY, 30 CS_WAIT_STATUS_COMPLETED, 31 CS_WAIT_STATUS_GONE 32 }; 33 34 static void job_wq_completion(struct work_struct *work); 35 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, 36 enum hl_cs_wait_status *status, s64 *timestamp); 37 static void cs_do_release(struct kref *ref); 38 39 static void hl_push_cs_outcome(struct hl_device *hdev, 40 struct hl_cs_outcome_store *outcome_store, 41 u64 seq, ktime_t ts, int error) 42 { 43 struct hl_cs_outcome *node; 44 unsigned long flags; 45 46 /* 47 * CS outcome store supports the following operations: 48 * push outcome - store a recent CS outcome in the store 49 * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store 50 * It uses 2 lists: used list and free list. 51 * It has a pre-allocated amount of nodes, each node stores 52 * a single CS outcome. 53 * Initially, all the nodes are in the free list. 54 * On push outcome, a node (any) is taken from the free list, its 55 * information is filled in, and the node is moved to the used list. 56 * It is possible, that there are no nodes left in the free list. 57 * In this case, we will lose some information about old outcomes. We 58 * will pop the OLDEST node from the used list, and make it free. 59 * On pop, the node is searched for in the used list (using a search 60 * index). 61 * If found, the node is then removed from the used list, and moved 62 * back to the free list. The outcome data that the node contained is 63 * returned back to the user. 64 */ 65 66 spin_lock_irqsave(&outcome_store->db_lock, flags); 67 68 if (list_empty(&outcome_store->free_list)) { 69 node = list_last_entry(&outcome_store->used_list, 70 struct hl_cs_outcome, list_link); 71 hash_del(&node->map_link); 72 dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq); 73 } else { 74 node = list_last_entry(&outcome_store->free_list, 75 struct hl_cs_outcome, list_link); 76 } 77 78 list_del_init(&node->list_link); 79 80 node->seq = seq; 81 node->ts = ts; 82 node->error = error; 83 84 list_add(&node->list_link, &outcome_store->used_list); 85 hash_add(outcome_store->outcome_map, &node->map_link, node->seq); 86 87 spin_unlock_irqrestore(&outcome_store->db_lock, flags); 88 } 89 90 static bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store, 91 u64 seq, ktime_t *ts, int *error) 92 { 93 struct hl_cs_outcome *node; 94 unsigned long flags; 95 96 spin_lock_irqsave(&outcome_store->db_lock, flags); 97 98 hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq) 99 if (node->seq == seq) { 100 *ts = node->ts; 101 *error = node->error; 102 103 hash_del(&node->map_link); 104 list_del_init(&node->list_link); 105 list_add(&node->list_link, &outcome_store->free_list); 106 107 spin_unlock_irqrestore(&outcome_store->db_lock, flags); 108 109 return true; 110 } 111 112 spin_unlock_irqrestore(&outcome_store->db_lock, flags); 113 114 return false; 115 } 116 117 static void hl_sob_reset(struct kref *ref) 118 { 119 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, 120 kref); 121 struct hl_device *hdev = hw_sob->hdev; 122 123 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id); 124 125 hdev->asic_funcs->reset_sob(hdev, hw_sob); 126 127 hw_sob->need_reset = false; 128 } 129 130 void hl_sob_reset_error(struct kref *ref) 131 { 132 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, 133 kref); 134 struct hl_device *hdev = hw_sob->hdev; 135 136 dev_crit(hdev->dev, 137 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n", 138 hw_sob->q_idx, hw_sob->sob_id); 139 } 140 141 void hw_sob_put(struct hl_hw_sob *hw_sob) 142 { 143 if (hw_sob) 144 kref_put(&hw_sob->kref, hl_sob_reset); 145 } 146 147 static void hw_sob_put_err(struct hl_hw_sob *hw_sob) 148 { 149 if (hw_sob) 150 kref_put(&hw_sob->kref, hl_sob_reset_error); 151 } 152 153 void hw_sob_get(struct hl_hw_sob *hw_sob) 154 { 155 if (hw_sob) 156 kref_get(&hw_sob->kref); 157 } 158 159 /** 160 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet 161 * @sob_base: sob base id 162 * @sob_mask: sob user mask, each bit represents a sob offset from sob base 163 * @mask: generated mask 164 * 165 * Return: 0 if given parameters are valid 166 */ 167 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) 168 { 169 int i; 170 171 if (sob_mask == 0) 172 return -EINVAL; 173 174 if (sob_mask == 0x1) { 175 *mask = ~(1 << (sob_base & 0x7)); 176 } else { 177 /* find msb in order to verify sob range is valid */ 178 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--) 179 if (BIT(i) & sob_mask) 180 break; 181 182 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1)) 183 return -EINVAL; 184 185 *mask = ~sob_mask; 186 } 187 188 return 0; 189 } 190 191 static void hl_fence_release(struct kref *kref) 192 { 193 struct hl_fence *fence = 194 container_of(kref, struct hl_fence, refcount); 195 struct hl_cs_compl *hl_cs_cmpl = 196 container_of(fence, struct hl_cs_compl, base_fence); 197 198 kfree(hl_cs_cmpl); 199 } 200 201 void hl_fence_put(struct hl_fence *fence) 202 { 203 if (IS_ERR_OR_NULL(fence)) 204 return; 205 kref_put(&fence->refcount, hl_fence_release); 206 } 207 208 void hl_fences_put(struct hl_fence **fence, int len) 209 { 210 int i; 211 212 for (i = 0; i < len; i++, fence++) 213 hl_fence_put(*fence); 214 } 215 216 void hl_fence_get(struct hl_fence *fence) 217 { 218 if (fence) 219 kref_get(&fence->refcount); 220 } 221 222 static void hl_fence_init(struct hl_fence *fence, u64 sequence) 223 { 224 kref_init(&fence->refcount); 225 fence->cs_sequence = sequence; 226 fence->error = 0; 227 fence->timestamp = ktime_set(0, 0); 228 fence->mcs_handling_done = false; 229 init_completion(&fence->completion); 230 } 231 232 void cs_get(struct hl_cs *cs) 233 { 234 kref_get(&cs->refcount); 235 } 236 237 static int cs_get_unless_zero(struct hl_cs *cs) 238 { 239 return kref_get_unless_zero(&cs->refcount); 240 } 241 242 static void cs_put(struct hl_cs *cs) 243 { 244 kref_put(&cs->refcount, cs_do_release); 245 } 246 247 static void cs_job_do_release(struct kref *ref) 248 { 249 struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount); 250 251 kfree(job); 252 } 253 254 static void hl_cs_job_put(struct hl_cs_job *job) 255 { 256 kref_put(&job->refcount, cs_job_do_release); 257 } 258 259 bool cs_needs_completion(struct hl_cs *cs) 260 { 261 /* In case this is a staged CS, only the last CS in sequence should 262 * get a completion, any non staged CS will always get a completion 263 */ 264 if (cs->staged_cs && !cs->staged_last) 265 return false; 266 267 return true; 268 } 269 270 bool cs_needs_timeout(struct hl_cs *cs) 271 { 272 /* In case this is a staged CS, only the first CS in sequence should 273 * get a timeout, any non staged CS will always get a timeout 274 */ 275 if (cs->staged_cs && !cs->staged_first) 276 return false; 277 278 return true; 279 } 280 281 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) 282 { 283 /* 284 * Patched CB is created for external queues jobs, and for H/W queues 285 * jobs if the user CB was allocated by driver and MMU is disabled. 286 */ 287 return (job->queue_type == QUEUE_TYPE_EXT || 288 (job->queue_type == QUEUE_TYPE_HW && 289 job->is_kernel_allocated_cb && 290 !hdev->mmu_enable)); 291 } 292 293 /* 294 * cs_parser - parse the user command submission 295 * 296 * @hpriv : pointer to the private data of the fd 297 * @job : pointer to the job that holds the command submission info 298 * 299 * The function parses the command submission of the user. It calls the 300 * ASIC specific parser, which returns a list of memory blocks to send 301 * to the device as different command buffers 302 * 303 */ 304 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) 305 { 306 struct hl_device *hdev = hpriv->hdev; 307 struct hl_cs_parser parser; 308 int rc; 309 310 parser.ctx_id = job->cs->ctx->asid; 311 parser.cs_sequence = job->cs->sequence; 312 parser.job_id = job->id; 313 314 parser.hw_queue_id = job->hw_queue_id; 315 parser.job_userptr_list = &job->userptr_list; 316 parser.patched_cb = NULL; 317 parser.user_cb = job->user_cb; 318 parser.user_cb_size = job->user_cb_size; 319 parser.queue_type = job->queue_type; 320 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; 321 job->patched_cb = NULL; 322 parser.completion = cs_needs_completion(job->cs); 323 324 rc = hdev->asic_funcs->cs_parser(hdev, &parser); 325 326 if (is_cb_patched(hdev, job)) { 327 if (!rc) { 328 job->patched_cb = parser.patched_cb; 329 job->job_cb_size = parser.patched_cb_size; 330 job->contains_dma_pkt = parser.contains_dma_pkt; 331 atomic_inc(&job->patched_cb->cs_cnt); 332 } 333 334 /* 335 * Whether the parsing worked or not, we don't need the 336 * original CB anymore because it was already parsed and 337 * won't be accessed again for this CS 338 */ 339 atomic_dec(&job->user_cb->cs_cnt); 340 hl_cb_put(job->user_cb); 341 job->user_cb = NULL; 342 } else if (!rc) { 343 job->job_cb_size = job->user_cb_size; 344 } 345 346 return rc; 347 } 348 349 static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job) 350 { 351 struct hl_cs *cs = job->cs; 352 353 if (is_cb_patched(hdev, job)) { 354 hl_userptr_delete_list(hdev, &job->userptr_list); 355 356 /* 357 * We might arrive here from rollback and patched CB wasn't 358 * created, so we need to check it's not NULL 359 */ 360 if (job->patched_cb) { 361 atomic_dec(&job->patched_cb->cs_cnt); 362 hl_cb_put(job->patched_cb); 363 } 364 } 365 366 /* For H/W queue jobs, if a user CB was allocated by driver and MMU is 367 * enabled, the user CB isn't released in cs_parser() and thus should be 368 * released here. This is also true for INT queues jobs which were 369 * allocated by driver. 370 */ 371 if ((job->is_kernel_allocated_cb && 372 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) || 373 job->queue_type == QUEUE_TYPE_INT))) { 374 atomic_dec(&job->user_cb->cs_cnt); 375 hl_cb_put(job->user_cb); 376 } 377 378 /* 379 * This is the only place where there can be multiple threads 380 * modifying the list at the same time 381 */ 382 spin_lock(&cs->job_lock); 383 list_del(&job->cs_node); 384 spin_unlock(&cs->job_lock); 385 386 hl_debugfs_remove_job(hdev, job); 387 388 /* We decrement reference only for a CS that gets completion 389 * because the reference was incremented only for this kind of CS 390 * right before it was scheduled. 391 * 392 * In staged submission, only the last CS marked as 'staged_last' 393 * gets completion, hence its release function will be called from here. 394 * As for all the rest CS's in the staged submission which do not get 395 * completion, their CS reference will be decremented by the 396 * 'staged_last' CS during the CS release flow. 397 * All relevant PQ CI counters will be incremented during the CS release 398 * flow by calling 'hl_hw_queue_update_ci'. 399 */ 400 if (cs_needs_completion(cs) && 401 (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) { 402 403 /* In CS based completions, the timestamp is already available, 404 * so no need to extract it from job 405 */ 406 if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB) 407 cs->completion_timestamp = job->timestamp; 408 409 cs_put(cs); 410 } 411 412 hl_cs_job_put(job); 413 } 414 415 /* 416 * hl_staged_cs_find_first - locate the first CS in this staged submission 417 * 418 * @hdev: pointer to device structure 419 * @cs_seq: staged submission sequence number 420 * 421 * @note: This function must be called under 'hdev->cs_mirror_lock' 422 * 423 * Find and return a CS pointer with the given sequence 424 */ 425 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq) 426 { 427 struct hl_cs *cs; 428 429 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node) 430 if (cs->staged_cs && cs->staged_first && 431 cs->sequence == cs_seq) 432 return cs; 433 434 return NULL; 435 } 436 437 /* 438 * is_staged_cs_last_exists - returns true if the last CS in sequence exists 439 * 440 * @hdev: pointer to device structure 441 * @cs: staged submission member 442 * 443 */ 444 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs) 445 { 446 struct hl_cs *last_entry; 447 448 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs, 449 staged_cs_node); 450 451 if (last_entry->staged_last) 452 return true; 453 454 return false; 455 } 456 457 /* 458 * staged_cs_get - get CS reference if this CS is a part of a staged CS 459 * 460 * @hdev: pointer to device structure 461 * @cs: current CS 462 * @cs_seq: staged submission sequence number 463 * 464 * Increment CS reference for every CS in this staged submission except for 465 * the CS which get completion. 466 */ 467 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs) 468 { 469 /* Only the last CS in this staged submission will get a completion. 470 * We must increment the reference for all other CS's in this 471 * staged submission. 472 * Once we get a completion we will release the whole staged submission. 473 */ 474 if (!cs->staged_last) 475 cs_get(cs); 476 } 477 478 /* 479 * staged_cs_put - put a CS in case it is part of staged submission 480 * 481 * @hdev: pointer to device structure 482 * @cs: CS to put 483 * 484 * This function decrements a CS reference (for a non completion CS) 485 */ 486 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) 487 { 488 /* We release all CS's in a staged submission except the last 489 * CS which we have never incremented its reference. 490 */ 491 if (!cs_needs_completion(cs)) 492 cs_put(cs); 493 } 494 495 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) 496 { 497 struct hl_cs *next = NULL, *iter, *first_cs; 498 499 if (!cs_needs_timeout(cs)) 500 return; 501 502 spin_lock(&hdev->cs_mirror_lock); 503 504 /* We need to handle tdr only once for the complete staged submission. 505 * Hence, we choose the CS that reaches this function first which is 506 * the CS marked as 'staged_last'. 507 * In case single staged cs was submitted which has both first and last 508 * indications, then "cs_find_first" below will return NULL, since we 509 * removed the cs node from the list before getting here, 510 * in such cases just continue with the cs to cancel it's TDR work. 511 */ 512 if (cs->staged_cs && cs->staged_last) { 513 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); 514 if (first_cs) 515 cs = first_cs; 516 } 517 518 spin_unlock(&hdev->cs_mirror_lock); 519 520 /* Don't cancel TDR in case this CS was timedout because we might be 521 * running from the TDR context 522 */ 523 if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT) 524 return; 525 526 if (cs->tdr_active) 527 cancel_delayed_work_sync(&cs->work_tdr); 528 529 spin_lock(&hdev->cs_mirror_lock); 530 531 /* queue TDR for next CS */ 532 list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node) 533 if (cs_needs_timeout(iter)) { 534 next = iter; 535 break; 536 } 537 538 if (next && !next->tdr_active) { 539 next->tdr_active = true; 540 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); 541 } 542 543 spin_unlock(&hdev->cs_mirror_lock); 544 } 545 546 /* 547 * force_complete_multi_cs - complete all contexts that wait on multi-CS 548 * 549 * @hdev: pointer to habanalabs device structure 550 */ 551 static void force_complete_multi_cs(struct hl_device *hdev) 552 { 553 int i; 554 555 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 556 struct multi_cs_completion *mcs_compl; 557 558 mcs_compl = &hdev->multi_cs_completion[i]; 559 560 spin_lock(&mcs_compl->lock); 561 562 if (!mcs_compl->used) { 563 spin_unlock(&mcs_compl->lock); 564 continue; 565 } 566 567 /* when calling force complete no context should be waiting on 568 * multi-cS. 569 * We are calling the function as a protection for such case 570 * to free any pending context and print error message 571 */ 572 dev_err(hdev->dev, 573 "multi-CS completion context %d still waiting when calling force completion\n", 574 i); 575 complete_all(&mcs_compl->completion); 576 spin_unlock(&mcs_compl->lock); 577 } 578 } 579 580 /* 581 * complete_multi_cs - complete all waiting entities on multi-CS 582 * 583 * @hdev: pointer to habanalabs device structure 584 * @cs: CS structure 585 * The function signals a waiting entity that has an overlapping stream masters 586 * with the completed CS. 587 * For example: 588 * - a completed CS worked on stream master QID 4, multi CS completion 589 * is actively waiting on stream master QIDs 3, 5. don't send signal as no 590 * common stream master QID 591 * - a completed CS worked on stream master QID 4, multi CS completion 592 * is actively waiting on stream master QIDs 3, 4. send signal as stream 593 * master QID 4 is common 594 */ 595 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) 596 { 597 struct hl_fence *fence = cs->fence; 598 int i; 599 600 /* in case of multi CS check for completion only for the first CS */ 601 if (cs->staged_cs && !cs->staged_first) 602 return; 603 604 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 605 struct multi_cs_completion *mcs_compl; 606 607 mcs_compl = &hdev->multi_cs_completion[i]; 608 if (!mcs_compl->used) 609 continue; 610 611 spin_lock(&mcs_compl->lock); 612 613 /* 614 * complete if: 615 * 1. still waiting for completion 616 * 2. the completed CS has at least one overlapping stream 617 * master with the stream masters in the completion 618 */ 619 if (mcs_compl->used && 620 (fence->stream_master_qid_map & 621 mcs_compl->stream_master_qid_map)) { 622 /* extract the timestamp only of first completed CS */ 623 if (!mcs_compl->timestamp) 624 mcs_compl->timestamp = ktime_to_ns(fence->timestamp); 625 626 complete_all(&mcs_compl->completion); 627 628 /* 629 * Setting mcs_handling_done inside the lock ensures 630 * at least one fence have mcs_handling_done set to 631 * true before wait for mcs finish. This ensures at 632 * least one CS will be set as completed when polling 633 * mcs fences. 634 */ 635 fence->mcs_handling_done = true; 636 } 637 638 spin_unlock(&mcs_compl->lock); 639 } 640 /* In case CS completed without mcs completion initialized */ 641 fence->mcs_handling_done = true; 642 } 643 644 static inline void cs_release_sob_reset_handler(struct hl_device *hdev, 645 struct hl_cs *cs, 646 struct hl_cs_compl *hl_cs_cmpl) 647 { 648 /* Skip this handler if the cs wasn't submitted, to avoid putting 649 * the hw_sob twice, since this case already handled at this point, 650 * also skip if the hw_sob pointer wasn't set. 651 */ 652 if (!hl_cs_cmpl->hw_sob || !cs->submitted) 653 return; 654 655 spin_lock(&hl_cs_cmpl->lock); 656 657 /* 658 * we get refcount upon reservation of signals or signal/wait cs for the 659 * hw_sob object, and need to put it when the first staged cs 660 * (which contains the encaps signals) or cs signal/wait is completed. 661 */ 662 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || 663 (hl_cs_cmpl->type == CS_TYPE_WAIT) || 664 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) || 665 (!!hl_cs_cmpl->encaps_signals)) { 666 dev_dbg(hdev->dev, 667 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n", 668 hl_cs_cmpl->cs_seq, 669 hl_cs_cmpl->type, 670 hl_cs_cmpl->hw_sob->sob_id, 671 hl_cs_cmpl->sob_val); 672 673 hw_sob_put(hl_cs_cmpl->hw_sob); 674 675 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) 676 hdev->asic_funcs->reset_sob_group(hdev, 677 hl_cs_cmpl->sob_group); 678 } 679 680 spin_unlock(&hl_cs_cmpl->lock); 681 } 682 683 static void cs_do_release(struct kref *ref) 684 { 685 struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); 686 struct hl_device *hdev = cs->ctx->hdev; 687 struct hl_cs_job *job, *tmp; 688 struct hl_cs_compl *hl_cs_cmpl = 689 container_of(cs->fence, struct hl_cs_compl, base_fence); 690 691 cs->completed = true; 692 693 /* 694 * Although if we reached here it means that all external jobs have 695 * finished, because each one of them took refcnt to CS, we still 696 * need to go over the internal jobs and complete them. Otherwise, we 697 * will have leaked memory and what's worse, the CS object (and 698 * potentially the CTX object) could be released, while the JOB 699 * still holds a pointer to them (but no reference). 700 */ 701 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 702 hl_complete_job(hdev, job); 703 704 if (!cs->submitted) { 705 /* 706 * In case the wait for signal CS was submitted, the fence put 707 * occurs in init_signal_wait_cs() or collective_wait_init_cs() 708 * right before hanging on the PQ. 709 */ 710 if (cs->type == CS_TYPE_WAIT || 711 cs->type == CS_TYPE_COLLECTIVE_WAIT) 712 hl_fence_put(cs->signal_fence); 713 714 goto out; 715 } 716 717 /* Need to update CI for all queue jobs that does not get completion */ 718 hl_hw_queue_update_ci(cs); 719 720 /* remove CS from CS mirror list */ 721 spin_lock(&hdev->cs_mirror_lock); 722 list_del_init(&cs->mirror_node); 723 spin_unlock(&hdev->cs_mirror_lock); 724 725 cs_handle_tdr(hdev, cs); 726 727 if (cs->staged_cs) { 728 /* the completion CS decrements reference for the entire 729 * staged submission 730 */ 731 if (cs->staged_last) { 732 struct hl_cs *staged_cs, *tmp_cs; 733 734 list_for_each_entry_safe(staged_cs, tmp_cs, 735 &cs->staged_cs_node, staged_cs_node) 736 staged_cs_put(hdev, staged_cs); 737 } 738 739 /* A staged CS will be a member in the list only after it 740 * was submitted. We used 'cs_mirror_lock' when inserting 741 * it to list so we will use it again when removing it 742 */ 743 if (cs->submitted) { 744 spin_lock(&hdev->cs_mirror_lock); 745 list_del(&cs->staged_cs_node); 746 spin_unlock(&hdev->cs_mirror_lock); 747 } 748 749 /* decrement refcount to handle when first staged cs 750 * with encaps signals is completed. 751 */ 752 if (hl_cs_cmpl->encaps_signals) 753 kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount, 754 hl_encaps_release_handle_and_put_ctx); 755 } 756 757 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals) 758 kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); 759 760 out: 761 /* Must be called before hl_ctx_put because inside we use ctx to get 762 * the device 763 */ 764 hl_debugfs_remove_cs(cs); 765 766 hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL; 767 768 /* We need to mark an error for not submitted because in that case 769 * the hl fence release flow is different. Mainly, we don't need 770 * to handle hw_sob for signal/wait 771 */ 772 if (cs->timedout) 773 cs->fence->error = -ETIMEDOUT; 774 else if (cs->aborted) 775 cs->fence->error = -EIO; 776 else if (!cs->submitted) 777 cs->fence->error = -EBUSY; 778 779 if (unlikely(cs->skip_reset_on_timeout)) { 780 dev_err(hdev->dev, 781 "Command submission %llu completed after %llu (s)\n", 782 cs->sequence, 783 div_u64(jiffies - cs->submission_time_jiffies, HZ)); 784 } 785 786 if (cs->timestamp) { 787 cs->fence->timestamp = cs->completion_timestamp; 788 hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence, 789 cs->fence->timestamp, cs->fence->error); 790 } 791 792 hl_ctx_put(cs->ctx); 793 794 complete_all(&cs->fence->completion); 795 complete_multi_cs(hdev, cs); 796 797 cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl); 798 799 hl_fence_put(cs->fence); 800 801 kfree(cs->jobs_in_queue_cnt); 802 kfree(cs); 803 } 804 805 static void cs_timedout(struct work_struct *work) 806 { 807 struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); 808 bool skip_reset_on_timeout, device_reset = false; 809 struct hl_device *hdev; 810 u64 event_mask = 0x0; 811 uint timeout_sec; 812 int rc; 813 814 skip_reset_on_timeout = cs->skip_reset_on_timeout; 815 816 rc = cs_get_unless_zero(cs); 817 if (!rc) 818 return; 819 820 if ((!cs->submitted) || (cs->completed)) { 821 cs_put(cs); 822 return; 823 } 824 825 hdev = cs->ctx->hdev; 826 827 if (likely(!skip_reset_on_timeout)) { 828 if (hdev->reset_on_lockup) 829 device_reset = true; 830 else 831 hdev->reset_info.needs_reset = true; 832 833 /* Mark the CS is timed out so we won't try to cancel its TDR */ 834 cs->timedout = true; 835 } 836 837 /* Save only the first CS timeout parameters */ 838 rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0); 839 if (rc) { 840 hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); 841 hdev->captured_err_info.cs_timeout.seq = cs->sequence; 842 event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT; 843 } 844 845 timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000; 846 847 switch (cs->type) { 848 case CS_TYPE_SIGNAL: 849 dev_err(hdev->dev, 850 "Signal command submission %llu has not finished in %u seconds!\n", 851 cs->sequence, timeout_sec); 852 break; 853 854 case CS_TYPE_WAIT: 855 dev_err(hdev->dev, 856 "Wait command submission %llu has not finished in %u seconds!\n", 857 cs->sequence, timeout_sec); 858 break; 859 860 case CS_TYPE_COLLECTIVE_WAIT: 861 dev_err(hdev->dev, 862 "Collective Wait command submission %llu has not finished in %u seconds!\n", 863 cs->sequence, timeout_sec); 864 break; 865 866 default: 867 dev_err(hdev->dev, 868 "Command submission %llu has not finished in %u seconds!\n", 869 cs->sequence, timeout_sec); 870 break; 871 } 872 873 rc = hl_state_dump(hdev); 874 if (rc) 875 dev_err(hdev->dev, "Error during system state dump %d\n", rc); 876 877 cs_put(cs); 878 879 if (device_reset) { 880 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 881 hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask); 882 } else if (event_mask) { 883 hl_notifier_event_send_all(hdev, event_mask); 884 } 885 } 886 887 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, 888 enum hl_cs_type cs_type, u64 user_sequence, 889 struct hl_cs **cs_new, u32 flags, u32 timeout) 890 { 891 struct hl_cs_counters_atomic *cntr; 892 struct hl_fence *other = NULL; 893 struct hl_cs_compl *cs_cmpl; 894 struct hl_cs *cs; 895 int rc; 896 897 cntr = &hdev->aggregated_cs_counters; 898 899 cs = kzalloc(sizeof(*cs), GFP_ATOMIC); 900 if (!cs) 901 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 902 903 if (!cs) { 904 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 905 atomic64_inc(&cntr->out_of_mem_drop_cnt); 906 return -ENOMEM; 907 } 908 909 /* increment refcnt for context */ 910 hl_ctx_get(ctx); 911 912 cs->ctx = ctx; 913 cs->submitted = false; 914 cs->completed = false; 915 cs->type = cs_type; 916 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); 917 cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); 918 cs->timeout_jiffies = timeout; 919 cs->skip_reset_on_timeout = 920 hdev->reset_info.skip_reset_on_timeout || 921 !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT); 922 cs->submission_time_jiffies = jiffies; 923 INIT_LIST_HEAD(&cs->job_list); 924 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); 925 kref_init(&cs->refcount); 926 spin_lock_init(&cs->job_lock); 927 928 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC); 929 if (!cs_cmpl) 930 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL); 931 932 if (!cs_cmpl) { 933 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 934 atomic64_inc(&cntr->out_of_mem_drop_cnt); 935 rc = -ENOMEM; 936 goto free_cs; 937 } 938 939 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, 940 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); 941 if (!cs->jobs_in_queue_cnt) 942 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, 943 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); 944 945 if (!cs->jobs_in_queue_cnt) { 946 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 947 atomic64_inc(&cntr->out_of_mem_drop_cnt); 948 rc = -ENOMEM; 949 goto free_cs_cmpl; 950 } 951 952 cs_cmpl->hdev = hdev; 953 cs_cmpl->type = cs->type; 954 spin_lock_init(&cs_cmpl->lock); 955 cs->fence = &cs_cmpl->base_fence; 956 957 spin_lock(&ctx->cs_lock); 958 959 cs_cmpl->cs_seq = ctx->cs_sequence; 960 other = ctx->cs_pending[cs_cmpl->cs_seq & 961 (hdev->asic_prop.max_pending_cs - 1)]; 962 963 if (other && !completion_done(&other->completion)) { 964 /* If the following statement is true, it means we have reached 965 * a point in which only part of the staged submission was 966 * submitted and we don't have enough room in the 'cs_pending' 967 * array for the rest of the submission. 968 * This causes a deadlock because this CS will never be 969 * completed as it depends on future CS's for completion. 970 */ 971 if (other->cs_sequence == user_sequence) 972 dev_crit_ratelimited(hdev->dev, 973 "Staged CS %llu deadlock due to lack of resources", 974 user_sequence); 975 976 dev_dbg_ratelimited(hdev->dev, 977 "Rejecting CS because of too many in-flights CS\n"); 978 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt); 979 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt); 980 rc = -EAGAIN; 981 goto free_fence; 982 } 983 984 /* init hl_fence */ 985 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); 986 987 cs->sequence = cs_cmpl->cs_seq; 988 989 ctx->cs_pending[cs_cmpl->cs_seq & 990 (hdev->asic_prop.max_pending_cs - 1)] = 991 &cs_cmpl->base_fence; 992 ctx->cs_sequence++; 993 994 hl_fence_get(&cs_cmpl->base_fence); 995 996 hl_fence_put(other); 997 998 spin_unlock(&ctx->cs_lock); 999 1000 *cs_new = cs; 1001 1002 return 0; 1003 1004 free_fence: 1005 spin_unlock(&ctx->cs_lock); 1006 kfree(cs->jobs_in_queue_cnt); 1007 free_cs_cmpl: 1008 kfree(cs_cmpl); 1009 free_cs: 1010 kfree(cs); 1011 hl_ctx_put(ctx); 1012 return rc; 1013 } 1014 1015 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) 1016 { 1017 struct hl_cs_job *job, *tmp; 1018 1019 staged_cs_put(hdev, cs); 1020 1021 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 1022 hl_complete_job(hdev, job); 1023 } 1024 1025 /* 1026 * release_reserved_encaps_signals() - release reserved encapsulated signals. 1027 * @hdev: pointer to habanalabs device structure 1028 * 1029 * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with 1030 * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back. 1031 * For these signals need also to put the refcount of the H/W SOB which was taken at the 1032 * reservation. 1033 */ 1034 static void release_reserved_encaps_signals(struct hl_device *hdev) 1035 { 1036 struct hl_ctx *ctx = hl_get_compute_ctx(hdev); 1037 struct hl_cs_encaps_sig_handle *handle; 1038 struct hl_encaps_signals_mgr *mgr; 1039 u32 id; 1040 1041 if (!ctx) 1042 return; 1043 1044 mgr = &ctx->sig_mgr; 1045 1046 idr_for_each_entry(&mgr->handles, handle, id) 1047 if (handle->cs_seq == ULLONG_MAX) 1048 kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx); 1049 1050 hl_ctx_put(ctx); 1051 } 1052 1053 void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush) 1054 { 1055 int i; 1056 struct hl_cs *cs, *tmp; 1057 1058 if (!skip_wq_flush) { 1059 flush_workqueue(hdev->ts_free_obj_wq); 1060 1061 /* flush all completions before iterating over the CS mirror list in 1062 * order to avoid a race with the release functions 1063 */ 1064 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1065 flush_workqueue(hdev->cq_wq[i]); 1066 1067 flush_workqueue(hdev->cs_cmplt_wq); 1068 } 1069 1070 /* Make sure we don't have leftovers in the CS mirror list */ 1071 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { 1072 cs_get(cs); 1073 cs->aborted = true; 1074 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", 1075 cs->ctx->asid, cs->sequence); 1076 cs_rollback(hdev, cs); 1077 cs_put(cs); 1078 } 1079 1080 force_complete_multi_cs(hdev); 1081 1082 release_reserved_encaps_signals(hdev); 1083 } 1084 1085 static void 1086 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) 1087 { 1088 struct hl_user_pending_interrupt *pend, *temp; 1089 1090 spin_lock(&interrupt->wait_list_lock); 1091 list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { 1092 if (pend->ts_reg_info.buf) { 1093 list_del(&pend->wait_list_node); 1094 hl_mmap_mem_buf_put(pend->ts_reg_info.buf); 1095 hl_cb_put(pend->ts_reg_info.cq_cb); 1096 } else { 1097 pend->fence.error = -EIO; 1098 complete_all(&pend->fence.completion); 1099 } 1100 } 1101 spin_unlock(&interrupt->wait_list_lock); 1102 } 1103 1104 void hl_release_pending_user_interrupts(struct hl_device *hdev) 1105 { 1106 struct asic_fixed_properties *prop = &hdev->asic_prop; 1107 struct hl_user_interrupt *interrupt; 1108 int i; 1109 1110 if (!prop->user_interrupt_count) 1111 return; 1112 1113 /* We iterate through the user interrupt requests and waking up all 1114 * user threads waiting for interrupt completion. We iterate the 1115 * list under a lock, this is why all user threads, once awake, 1116 * will wait on the same lock and will release the waiting object upon 1117 * unlock. 1118 */ 1119 1120 for (i = 0 ; i < prop->user_interrupt_count ; i++) { 1121 interrupt = &hdev->user_interrupt[i]; 1122 wake_pending_user_interrupt_threads(interrupt); 1123 } 1124 1125 interrupt = &hdev->common_user_cq_interrupt; 1126 wake_pending_user_interrupt_threads(interrupt); 1127 1128 interrupt = &hdev->common_decoder_interrupt; 1129 wake_pending_user_interrupt_threads(interrupt); 1130 } 1131 1132 static void force_complete_cs(struct hl_device *hdev) 1133 { 1134 struct hl_cs *cs; 1135 1136 spin_lock(&hdev->cs_mirror_lock); 1137 1138 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) { 1139 cs->fence->error = -EIO; 1140 complete_all(&cs->fence->completion); 1141 } 1142 1143 spin_unlock(&hdev->cs_mirror_lock); 1144 } 1145 1146 void hl_abort_waiting_for_cs_completions(struct hl_device *hdev) 1147 { 1148 force_complete_cs(hdev); 1149 force_complete_multi_cs(hdev); 1150 } 1151 1152 static void job_wq_completion(struct work_struct *work) 1153 { 1154 struct hl_cs_job *job = container_of(work, struct hl_cs_job, 1155 finish_work); 1156 struct hl_cs *cs = job->cs; 1157 struct hl_device *hdev = cs->ctx->hdev; 1158 1159 /* job is no longer needed */ 1160 hl_complete_job(hdev, job); 1161 } 1162 1163 static void cs_completion(struct work_struct *work) 1164 { 1165 struct hl_cs *cs = container_of(work, struct hl_cs, finish_work); 1166 struct hl_device *hdev = cs->ctx->hdev; 1167 struct hl_cs_job *job, *tmp; 1168 1169 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 1170 hl_complete_job(hdev, job); 1171 } 1172 1173 u32 hl_get_active_cs_num(struct hl_device *hdev) 1174 { 1175 u32 active_cs_num = 0; 1176 struct hl_cs *cs; 1177 1178 spin_lock(&hdev->cs_mirror_lock); 1179 1180 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) 1181 if (!cs->completed) 1182 active_cs_num++; 1183 1184 spin_unlock(&hdev->cs_mirror_lock); 1185 1186 return active_cs_num; 1187 } 1188 1189 static int validate_queue_index(struct hl_device *hdev, 1190 struct hl_cs_chunk *chunk, 1191 enum hl_queue_type *queue_type, 1192 bool *is_kernel_allocated_cb) 1193 { 1194 struct asic_fixed_properties *asic = &hdev->asic_prop; 1195 struct hw_queue_properties *hw_queue_prop; 1196 1197 /* This must be checked here to prevent out-of-bounds access to 1198 * hw_queues_props array 1199 */ 1200 if (chunk->queue_index >= asic->max_queues) { 1201 dev_err(hdev->dev, "Queue index %d is invalid\n", 1202 chunk->queue_index); 1203 return -EINVAL; 1204 } 1205 1206 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; 1207 1208 if (hw_queue_prop->type == QUEUE_TYPE_NA) { 1209 dev_err(hdev->dev, "Queue index %d is not applicable\n", 1210 chunk->queue_index); 1211 return -EINVAL; 1212 } 1213 1214 if (hw_queue_prop->binned) { 1215 dev_err(hdev->dev, "Queue index %d is binned out\n", 1216 chunk->queue_index); 1217 return -EINVAL; 1218 } 1219 1220 if (hw_queue_prop->driver_only) { 1221 dev_err(hdev->dev, 1222 "Queue index %d is restricted for the kernel driver\n", 1223 chunk->queue_index); 1224 return -EINVAL; 1225 } 1226 1227 /* When hw queue type isn't QUEUE_TYPE_HW, 1228 * USER_ALLOC_CB flag shall be referred as "don't care". 1229 */ 1230 if (hw_queue_prop->type == QUEUE_TYPE_HW) { 1231 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { 1232 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { 1233 dev_err(hdev->dev, 1234 "Queue index %d doesn't support user CB\n", 1235 chunk->queue_index); 1236 return -EINVAL; 1237 } 1238 1239 *is_kernel_allocated_cb = false; 1240 } else { 1241 if (!(hw_queue_prop->cb_alloc_flags & 1242 CB_ALLOC_KERNEL)) { 1243 dev_err(hdev->dev, 1244 "Queue index %d doesn't support kernel CB\n", 1245 chunk->queue_index); 1246 return -EINVAL; 1247 } 1248 1249 *is_kernel_allocated_cb = true; 1250 } 1251 } else { 1252 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags 1253 & CB_ALLOC_KERNEL); 1254 } 1255 1256 *queue_type = hw_queue_prop->type; 1257 return 0; 1258 } 1259 1260 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, 1261 struct hl_mem_mgr *mmg, 1262 struct hl_cs_chunk *chunk) 1263 { 1264 struct hl_cb *cb; 1265 1266 cb = hl_cb_get(mmg, chunk->cb_handle); 1267 if (!cb) { 1268 dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle); 1269 return NULL; 1270 } 1271 1272 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { 1273 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); 1274 goto release_cb; 1275 } 1276 1277 atomic_inc(&cb->cs_cnt); 1278 1279 return cb; 1280 1281 release_cb: 1282 hl_cb_put(cb); 1283 return NULL; 1284 } 1285 1286 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, 1287 enum hl_queue_type queue_type, bool is_kernel_allocated_cb) 1288 { 1289 struct hl_cs_job *job; 1290 1291 job = kzalloc(sizeof(*job), GFP_ATOMIC); 1292 if (!job) 1293 job = kzalloc(sizeof(*job), GFP_KERNEL); 1294 1295 if (!job) 1296 return NULL; 1297 1298 kref_init(&job->refcount); 1299 job->queue_type = queue_type; 1300 job->is_kernel_allocated_cb = is_kernel_allocated_cb; 1301 1302 if (is_cb_patched(hdev, job)) 1303 INIT_LIST_HEAD(&job->userptr_list); 1304 1305 if (job->queue_type == QUEUE_TYPE_EXT) 1306 INIT_WORK(&job->finish_work, job_wq_completion); 1307 1308 return job; 1309 } 1310 1311 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) 1312 { 1313 if (cs_type_flags & HL_CS_FLAGS_SIGNAL) 1314 return CS_TYPE_SIGNAL; 1315 else if (cs_type_flags & HL_CS_FLAGS_WAIT) 1316 return CS_TYPE_WAIT; 1317 else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT) 1318 return CS_TYPE_COLLECTIVE_WAIT; 1319 else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY) 1320 return CS_RESERVE_SIGNALS; 1321 else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) 1322 return CS_UNRESERVE_SIGNALS; 1323 else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) 1324 return CS_TYPE_ENGINE_CORE; 1325 else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND) 1326 return CS_TYPE_ENGINES; 1327 else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) 1328 return CS_TYPE_FLUSH_PCI_HBW_WRITES; 1329 else 1330 return CS_TYPE_DEFAULT; 1331 } 1332 1333 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) 1334 { 1335 struct hl_device *hdev = hpriv->hdev; 1336 struct hl_ctx *ctx = hpriv->ctx; 1337 u32 cs_type_flags, num_chunks; 1338 enum hl_device_status status; 1339 enum hl_cs_type cs_type; 1340 bool is_sync_stream; 1341 int i; 1342 1343 for (i = 0 ; i < sizeof(args->in.pad) ; i++) 1344 if (args->in.pad[i]) { 1345 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); 1346 return -EINVAL; 1347 } 1348 1349 if (!hl_device_operational(hdev, &status)) { 1350 return -EBUSY; 1351 } 1352 1353 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 1354 !hdev->supports_staged_submission) { 1355 dev_err(hdev->dev, "staged submission not supported"); 1356 return -EPERM; 1357 } 1358 1359 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; 1360 1361 if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) { 1362 dev_err(hdev->dev, 1363 "CS type flags are mutually exclusive, context %d\n", 1364 ctx->asid); 1365 return -EINVAL; 1366 } 1367 1368 cs_type = hl_cs_get_cs_type(cs_type_flags); 1369 num_chunks = args->in.num_chunks_execute; 1370 1371 is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || 1372 cs_type == CS_TYPE_COLLECTIVE_WAIT); 1373 1374 if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { 1375 dev_err(hdev->dev, "Sync stream CS is not supported\n"); 1376 return -EINVAL; 1377 } 1378 1379 if (cs_type == CS_TYPE_DEFAULT) { 1380 if (!num_chunks) { 1381 dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); 1382 return -EINVAL; 1383 } 1384 } else if (is_sync_stream && num_chunks != 1) { 1385 dev_err(hdev->dev, 1386 "Sync stream CS mandates one chunk only, context %d\n", 1387 ctx->asid); 1388 return -EINVAL; 1389 } 1390 1391 return 0; 1392 } 1393 1394 static int hl_cs_copy_chunk_array(struct hl_device *hdev, 1395 struct hl_cs_chunk **cs_chunk_array, 1396 void __user *chunks, u32 num_chunks, 1397 struct hl_ctx *ctx) 1398 { 1399 u32 size_to_copy; 1400 1401 if (num_chunks > HL_MAX_JOBS_PER_CS) { 1402 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1403 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1404 dev_err(hdev->dev, 1405 "Number of chunks can NOT be larger than %d\n", 1406 HL_MAX_JOBS_PER_CS); 1407 return -EINVAL; 1408 } 1409 1410 *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), 1411 GFP_ATOMIC); 1412 if (!*cs_chunk_array) 1413 *cs_chunk_array = kmalloc_array(num_chunks, 1414 sizeof(**cs_chunk_array), GFP_KERNEL); 1415 if (!*cs_chunk_array) { 1416 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1417 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); 1418 return -ENOMEM; 1419 } 1420 1421 size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); 1422 if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) { 1423 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1424 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1425 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); 1426 kfree(*cs_chunk_array); 1427 return -EFAULT; 1428 } 1429 1430 return 0; 1431 } 1432 1433 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, 1434 u64 sequence, u32 flags, 1435 u32 encaps_signal_handle) 1436 { 1437 if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION)) 1438 return 0; 1439 1440 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST); 1441 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST); 1442 1443 if (cs->staged_first) { 1444 /* Staged CS sequence is the first CS sequence */ 1445 INIT_LIST_HEAD(&cs->staged_cs_node); 1446 cs->staged_sequence = cs->sequence; 1447 1448 if (cs->encaps_signals) 1449 cs->encaps_sig_hdl_id = encaps_signal_handle; 1450 } else { 1451 /* User sequence will be validated in 'hl_hw_queue_schedule_cs' 1452 * under the cs_mirror_lock 1453 */ 1454 cs->staged_sequence = sequence; 1455 } 1456 1457 /* Increment CS reference if needed */ 1458 staged_cs_get(hdev, cs); 1459 1460 cs->staged_cs = true; 1461 1462 return 0; 1463 } 1464 1465 static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid) 1466 { 1467 int i; 1468 1469 for (i = 0; i < hdev->stream_master_qid_arr_size; i++) 1470 if (qid == hdev->stream_master_qid_arr[i]) 1471 return BIT(i); 1472 1473 return 0; 1474 } 1475 1476 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, 1477 u32 num_chunks, u64 *cs_seq, u32 flags, 1478 u32 encaps_signals_handle, u32 timeout, 1479 u16 *signal_initial_sob_count) 1480 { 1481 bool staged_mid, int_queues_only = true, using_hw_queues = false; 1482 struct hl_device *hdev = hpriv->hdev; 1483 struct hl_cs_chunk *cs_chunk_array; 1484 struct hl_cs_counters_atomic *cntr; 1485 struct hl_ctx *ctx = hpriv->ctx; 1486 struct hl_cs_job *job; 1487 struct hl_cs *cs; 1488 struct hl_cb *cb; 1489 u64 user_sequence; 1490 u8 stream_master_qid_map = 0; 1491 int rc, i; 1492 1493 cntr = &hdev->aggregated_cs_counters; 1494 user_sequence = *cs_seq; 1495 *cs_seq = ULLONG_MAX; 1496 1497 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, 1498 hpriv->ctx); 1499 if (rc) 1500 goto out; 1501 1502 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 1503 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) 1504 staged_mid = true; 1505 else 1506 staged_mid = false; 1507 1508 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, 1509 staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, 1510 timeout); 1511 if (rc) 1512 goto free_cs_chunk_array; 1513 1514 *cs_seq = cs->sequence; 1515 1516 hl_debugfs_add_cs(cs); 1517 1518 rc = cs_staged_submission(hdev, cs, user_sequence, flags, 1519 encaps_signals_handle); 1520 if (rc) 1521 goto free_cs_object; 1522 1523 /* If this is a staged submission we must return the staged sequence 1524 * rather than the internal CS sequence 1525 */ 1526 if (cs->staged_cs) 1527 *cs_seq = cs->staged_sequence; 1528 1529 /* Validate ALL the CS chunks before submitting the CS */ 1530 for (i = 0 ; i < num_chunks ; i++) { 1531 struct hl_cs_chunk *chunk = &cs_chunk_array[i]; 1532 enum hl_queue_type queue_type; 1533 bool is_kernel_allocated_cb; 1534 1535 rc = validate_queue_index(hdev, chunk, &queue_type, 1536 &is_kernel_allocated_cb); 1537 if (rc) { 1538 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1539 atomic64_inc(&cntr->validation_drop_cnt); 1540 goto free_cs_object; 1541 } 1542 1543 if (is_kernel_allocated_cb) { 1544 cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk); 1545 if (!cb) { 1546 atomic64_inc( 1547 &ctx->cs_counters.validation_drop_cnt); 1548 atomic64_inc(&cntr->validation_drop_cnt); 1549 rc = -EINVAL; 1550 goto free_cs_object; 1551 } 1552 } else { 1553 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; 1554 } 1555 1556 if (queue_type == QUEUE_TYPE_EXT || 1557 queue_type == QUEUE_TYPE_HW) { 1558 int_queues_only = false; 1559 1560 /* 1561 * store which stream are being used for external/HW 1562 * queues of this CS 1563 */ 1564 if (hdev->supports_wait_for_multi_cs) 1565 stream_master_qid_map |= 1566 get_stream_master_qid_mask(hdev, 1567 chunk->queue_index); 1568 } 1569 1570 if (queue_type == QUEUE_TYPE_HW) 1571 using_hw_queues = true; 1572 1573 job = hl_cs_allocate_job(hdev, queue_type, 1574 is_kernel_allocated_cb); 1575 if (!job) { 1576 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1577 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1578 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1579 rc = -ENOMEM; 1580 if (is_kernel_allocated_cb) 1581 goto release_cb; 1582 1583 goto free_cs_object; 1584 } 1585 1586 job->id = i + 1; 1587 job->cs = cs; 1588 job->user_cb = cb; 1589 job->user_cb_size = chunk->cb_size; 1590 job->hw_queue_id = chunk->queue_index; 1591 1592 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1593 cs->jobs_cnt++; 1594 1595 list_add_tail(&job->cs_node, &cs->job_list); 1596 1597 /* 1598 * Increment CS reference. When CS reference is 0, CS is 1599 * done and can be signaled to user and free all its resources 1600 * Only increment for JOB on external or H/W queues, because 1601 * only for those JOBs we get completion 1602 */ 1603 if (cs_needs_completion(cs) && 1604 (job->queue_type == QUEUE_TYPE_EXT || 1605 job->queue_type == QUEUE_TYPE_HW)) 1606 cs_get(cs); 1607 1608 hl_debugfs_add_job(hdev, job); 1609 1610 rc = cs_parser(hpriv, job); 1611 if (rc) { 1612 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); 1613 atomic64_inc(&cntr->parsing_drop_cnt); 1614 dev_err(hdev->dev, 1615 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", 1616 cs->ctx->asid, cs->sequence, job->id, rc); 1617 goto free_cs_object; 1618 } 1619 } 1620 1621 /* We allow a CS with any queue type combination as long as it does 1622 * not get a completion 1623 */ 1624 if (int_queues_only && cs_needs_completion(cs)) { 1625 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1626 atomic64_inc(&cntr->validation_drop_cnt); 1627 dev_err(hdev->dev, 1628 "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n", 1629 cs->ctx->asid, cs->sequence); 1630 rc = -EINVAL; 1631 goto free_cs_object; 1632 } 1633 1634 if (using_hw_queues) 1635 INIT_WORK(&cs->finish_work, cs_completion); 1636 1637 /* 1638 * store the (external/HW queues) streams used by the CS in the 1639 * fence object for multi-CS completion 1640 */ 1641 if (hdev->supports_wait_for_multi_cs) 1642 cs->fence->stream_master_qid_map = stream_master_qid_map; 1643 1644 rc = hl_hw_queue_schedule_cs(cs); 1645 if (rc) { 1646 if (rc != -EAGAIN) 1647 dev_err(hdev->dev, 1648 "Failed to submit CS %d.%llu to H/W queues, error %d\n", 1649 cs->ctx->asid, cs->sequence, rc); 1650 goto free_cs_object; 1651 } 1652 1653 *signal_initial_sob_count = cs->initial_sob_count; 1654 1655 rc = HL_CS_STATUS_SUCCESS; 1656 goto put_cs; 1657 1658 release_cb: 1659 atomic_dec(&cb->cs_cnt); 1660 hl_cb_put(cb); 1661 free_cs_object: 1662 cs_rollback(hdev, cs); 1663 *cs_seq = ULLONG_MAX; 1664 /* The path below is both for good and erroneous exits */ 1665 put_cs: 1666 /* We finished with the CS in this function, so put the ref */ 1667 cs_put(cs); 1668 free_cs_chunk_array: 1669 kfree(cs_chunk_array); 1670 out: 1671 return rc; 1672 } 1673 1674 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, 1675 u64 *cs_seq) 1676 { 1677 struct hl_device *hdev = hpriv->hdev; 1678 struct hl_ctx *ctx = hpriv->ctx; 1679 bool need_soft_reset = false; 1680 int rc = 0, do_ctx_switch = 0; 1681 void __user *chunks; 1682 u32 num_chunks, tmp; 1683 u16 sob_count; 1684 int ret; 1685 1686 if (hdev->supports_ctx_switch) 1687 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); 1688 1689 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { 1690 mutex_lock(&hpriv->restore_phase_mutex); 1691 1692 if (do_ctx_switch) { 1693 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); 1694 if (rc) { 1695 dev_err_ratelimited(hdev->dev, 1696 "Failed to switch to context %d, rejecting CS! %d\n", 1697 ctx->asid, rc); 1698 /* 1699 * If we timedout, or if the device is not IDLE 1700 * while we want to do context-switch (-EBUSY), 1701 * we need to soft-reset because QMAN is 1702 * probably stuck. However, we can't call to 1703 * reset here directly because of deadlock, so 1704 * need to do it at the very end of this 1705 * function 1706 */ 1707 if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) 1708 need_soft_reset = true; 1709 mutex_unlock(&hpriv->restore_phase_mutex); 1710 goto out; 1711 } 1712 } 1713 1714 hdev->asic_funcs->restore_phase_topology(hdev); 1715 1716 chunks = (void __user *) (uintptr_t) args->in.chunks_restore; 1717 num_chunks = args->in.num_chunks_restore; 1718 1719 if (!num_chunks) { 1720 dev_dbg(hdev->dev, 1721 "Need to run restore phase but restore CS is empty\n"); 1722 rc = 0; 1723 } else { 1724 rc = cs_ioctl_default(hpriv, chunks, num_chunks, 1725 cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count); 1726 } 1727 1728 mutex_unlock(&hpriv->restore_phase_mutex); 1729 1730 if (rc) { 1731 dev_err(hdev->dev, 1732 "Failed to submit restore CS for context %d (%d)\n", 1733 ctx->asid, rc); 1734 goto out; 1735 } 1736 1737 /* Need to wait for restore completion before execution phase */ 1738 if (num_chunks) { 1739 enum hl_cs_wait_status status; 1740 wait_again: 1741 ret = _hl_cs_wait_ioctl(hdev, ctx, 1742 jiffies_to_usecs(hdev->timeout_jiffies), 1743 *cs_seq, &status, NULL); 1744 if (ret) { 1745 if (ret == -ERESTARTSYS) { 1746 usleep_range(100, 200); 1747 goto wait_again; 1748 } 1749 1750 dev_err(hdev->dev, 1751 "Restore CS for context %d failed to complete %d\n", 1752 ctx->asid, ret); 1753 rc = -ENOEXEC; 1754 goto out; 1755 } 1756 } 1757 1758 if (hdev->supports_ctx_switch) 1759 ctx->thread_ctx_switch_wait_token = 1; 1760 1761 } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { 1762 rc = hl_poll_timeout_memory(hdev, 1763 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), 1764 100, jiffies_to_usecs(hdev->timeout_jiffies), false); 1765 1766 if (rc == -ETIMEDOUT) { 1767 dev_err(hdev->dev, 1768 "context switch phase timeout (%d)\n", tmp); 1769 goto out; 1770 } 1771 } 1772 1773 out: 1774 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) 1775 hl_device_reset(hdev, 0); 1776 1777 return rc; 1778 } 1779 1780 /* 1781 * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case. 1782 * if the SOB value reaches the max value move to the other SOB reserved 1783 * to the queue. 1784 * @hdev: pointer to device structure 1785 * @q_idx: stream queue index 1786 * @hw_sob: the H/W SOB used in this signal CS. 1787 * @count: signals count 1788 * @encaps_sig: tells whether it's reservation for encaps signals or not. 1789 * 1790 * Note that this function must be called while hw_queues_lock is taken. 1791 */ 1792 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, 1793 struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig) 1794 1795 { 1796 struct hl_sync_stream_properties *prop; 1797 struct hl_hw_sob *sob = *hw_sob, *other_sob; 1798 u8 other_sob_offset; 1799 1800 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 1801 1802 hw_sob_get(sob); 1803 1804 /* check for wraparound */ 1805 if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) { 1806 /* 1807 * Decrement as we reached the max value. 1808 * The release function won't be called here as we've 1809 * just incremented the refcount right before calling this 1810 * function. 1811 */ 1812 hw_sob_put_err(sob); 1813 1814 /* 1815 * check the other sob value, if it still in use then fail 1816 * otherwise make the switch 1817 */ 1818 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; 1819 other_sob = &prop->hw_sob[other_sob_offset]; 1820 1821 if (kref_read(&other_sob->kref) != 1) { 1822 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n", 1823 q_idx); 1824 return -EINVAL; 1825 } 1826 1827 /* 1828 * next_sob_val always points to the next available signal 1829 * in the sob, so in encaps signals it will be the next one 1830 * after reserving the required amount. 1831 */ 1832 if (encaps_sig) 1833 prop->next_sob_val = count + 1; 1834 else 1835 prop->next_sob_val = count; 1836 1837 /* only two SOBs are currently in use */ 1838 prop->curr_sob_offset = other_sob_offset; 1839 *hw_sob = other_sob; 1840 1841 /* 1842 * check if other_sob needs reset, then do it before using it 1843 * for the reservation or the next signal cs. 1844 * we do it here, and for both encaps and regular signal cs 1845 * cases in order to avoid possible races of two kref_put 1846 * of the sob which can occur at the same time if we move the 1847 * sob reset(kref_put) to cs_do_release function. 1848 * in addition, if we have combination of cs signal and 1849 * encaps, and at the point we need to reset the sob there was 1850 * no more reservations and only signal cs keep coming, 1851 * in such case we need signal_cs to put the refcount and 1852 * reset the sob. 1853 */ 1854 if (other_sob->need_reset) 1855 hw_sob_put(other_sob); 1856 1857 if (encaps_sig) { 1858 /* set reset indication for the sob */ 1859 sob->need_reset = true; 1860 hw_sob_get(other_sob); 1861 } 1862 1863 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", 1864 prop->curr_sob_offset, q_idx); 1865 } else { 1866 prop->next_sob_val += count; 1867 } 1868 1869 return 0; 1870 } 1871 1872 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, 1873 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx, 1874 bool encaps_signals) 1875 { 1876 u64 *signal_seq_arr = NULL; 1877 u32 size_to_copy, signal_seq_arr_len; 1878 int rc = 0; 1879 1880 if (encaps_signals) { 1881 *signal_seq = chunk->encaps_signal_seq; 1882 return 0; 1883 } 1884 1885 signal_seq_arr_len = chunk->num_signal_seq_arr; 1886 1887 /* currently only one signal seq is supported */ 1888 if (signal_seq_arr_len != 1) { 1889 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1890 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1891 dev_err(hdev->dev, 1892 "Wait for signal CS supports only one signal CS seq\n"); 1893 return -EINVAL; 1894 } 1895 1896 signal_seq_arr = kmalloc_array(signal_seq_arr_len, 1897 sizeof(*signal_seq_arr), 1898 GFP_ATOMIC); 1899 if (!signal_seq_arr) 1900 signal_seq_arr = kmalloc_array(signal_seq_arr_len, 1901 sizeof(*signal_seq_arr), 1902 GFP_KERNEL); 1903 if (!signal_seq_arr) { 1904 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1905 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); 1906 return -ENOMEM; 1907 } 1908 1909 size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr); 1910 if (copy_from_user(signal_seq_arr, 1911 u64_to_user_ptr(chunk->signal_seq_arr), 1912 size_to_copy)) { 1913 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1914 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1915 dev_err(hdev->dev, 1916 "Failed to copy signal seq array from user\n"); 1917 rc = -EFAULT; 1918 goto out; 1919 } 1920 1921 /* currently it is guaranteed to have only one signal seq */ 1922 *signal_seq = signal_seq_arr[0]; 1923 1924 out: 1925 kfree(signal_seq_arr); 1926 1927 return rc; 1928 } 1929 1930 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, 1931 struct hl_ctx *ctx, struct hl_cs *cs, 1932 enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset) 1933 { 1934 struct hl_cs_counters_atomic *cntr; 1935 struct hl_cs_job *job; 1936 struct hl_cb *cb; 1937 u32 cb_size; 1938 1939 cntr = &hdev->aggregated_cs_counters; 1940 1941 job = hl_cs_allocate_job(hdev, q_type, true); 1942 if (!job) { 1943 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1944 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1945 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1946 return -ENOMEM; 1947 } 1948 1949 if (cs->type == CS_TYPE_WAIT) 1950 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); 1951 else 1952 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); 1953 1954 cb = hl_cb_kernel_create(hdev, cb_size, 1955 q_type == QUEUE_TYPE_HW && hdev->mmu_enable); 1956 if (!cb) { 1957 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1958 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1959 kfree(job); 1960 return -EFAULT; 1961 } 1962 1963 job->id = 0; 1964 job->cs = cs; 1965 job->user_cb = cb; 1966 atomic_inc(&job->user_cb->cs_cnt); 1967 job->user_cb_size = cb_size; 1968 job->hw_queue_id = q_idx; 1969 1970 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) 1971 && cs->encaps_signals) 1972 job->encaps_sig_wait_offset = encaps_signal_offset; 1973 /* 1974 * No need in parsing, user CB is the patched CB. 1975 * We call hl_cb_destroy() out of two reasons - we don't need the CB in 1976 * the CB idr anymore and to decrement its refcount as it was 1977 * incremented inside hl_cb_kernel_create(). 1978 */ 1979 job->patched_cb = job->user_cb; 1980 job->job_cb_size = job->user_cb_size; 1981 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1982 1983 /* increment refcount as for external queues we get completion */ 1984 cs_get(cs); 1985 1986 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1987 cs->jobs_cnt++; 1988 1989 list_add_tail(&job->cs_node, &cs->job_list); 1990 1991 hl_debugfs_add_job(hdev, job); 1992 1993 return 0; 1994 } 1995 1996 static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, 1997 u32 q_idx, u32 count, 1998 u32 *handle_id, u32 *sob_addr, 1999 u32 *signals_count) 2000 { 2001 struct hw_queue_properties *hw_queue_prop; 2002 struct hl_sync_stream_properties *prop; 2003 struct hl_device *hdev = hpriv->hdev; 2004 struct hl_cs_encaps_sig_handle *handle; 2005 struct hl_encaps_signals_mgr *mgr; 2006 struct hl_hw_sob *hw_sob; 2007 int hdl_id; 2008 int rc = 0; 2009 2010 if (count >= HL_MAX_SOB_VAL) { 2011 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n", 2012 count); 2013 rc = -EINVAL; 2014 goto out; 2015 } 2016 2017 if (q_idx >= hdev->asic_prop.max_queues) { 2018 dev_err(hdev->dev, "Queue index %d is invalid\n", 2019 q_idx); 2020 rc = -EINVAL; 2021 goto out; 2022 } 2023 2024 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; 2025 2026 if (!hw_queue_prop->supports_sync_stream) { 2027 dev_err(hdev->dev, 2028 "Queue index %d does not support sync stream operations\n", 2029 q_idx); 2030 rc = -EINVAL; 2031 goto out; 2032 } 2033 2034 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 2035 2036 handle = kzalloc(sizeof(*handle), GFP_KERNEL); 2037 if (!handle) { 2038 rc = -ENOMEM; 2039 goto out; 2040 } 2041 2042 handle->count = count; 2043 2044 hl_ctx_get(hpriv->ctx); 2045 handle->ctx = hpriv->ctx; 2046 mgr = &hpriv->ctx->sig_mgr; 2047 2048 spin_lock(&mgr->lock); 2049 hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC); 2050 spin_unlock(&mgr->lock); 2051 2052 if (hdl_id < 0) { 2053 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n"); 2054 rc = -EINVAL; 2055 goto put_ctx; 2056 } 2057 2058 handle->id = hdl_id; 2059 handle->q_idx = q_idx; 2060 handle->hdev = hdev; 2061 kref_init(&handle->refcount); 2062 2063 hdev->asic_funcs->hw_queues_lock(hdev); 2064 2065 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; 2066 2067 /* 2068 * Increment the SOB value by count by user request 2069 * to reserve those signals 2070 * check if the signals amount to reserve is not exceeding the max sob 2071 * value, if yes then switch sob. 2072 */ 2073 rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count, 2074 true); 2075 if (rc) { 2076 dev_err(hdev->dev, "Failed to switch SOB\n"); 2077 hdev->asic_funcs->hw_queues_unlock(hdev); 2078 rc = -EINVAL; 2079 goto remove_idr; 2080 } 2081 /* set the hw_sob to the handle after calling the sob wraparound handler 2082 * since sob could have changed. 2083 */ 2084 handle->hw_sob = hw_sob; 2085 2086 /* store the current sob value for unreserve validity check, and 2087 * signal offset support 2088 */ 2089 handle->pre_sob_val = prop->next_sob_val - handle->count; 2090 2091 handle->cs_seq = ULLONG_MAX; 2092 2093 *signals_count = prop->next_sob_val; 2094 hdev->asic_funcs->hw_queues_unlock(hdev); 2095 2096 *sob_addr = handle->hw_sob->sob_addr; 2097 *handle_id = hdl_id; 2098 2099 dev_dbg(hdev->dev, 2100 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n", 2101 hw_sob->sob_id, handle->hw_sob->sob_addr, 2102 prop->next_sob_val - 1, q_idx, hdl_id); 2103 goto out; 2104 2105 remove_idr: 2106 spin_lock(&mgr->lock); 2107 idr_remove(&mgr->handles, hdl_id); 2108 spin_unlock(&mgr->lock); 2109 2110 put_ctx: 2111 hl_ctx_put(handle->ctx); 2112 kfree(handle); 2113 2114 out: 2115 return rc; 2116 } 2117 2118 static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) 2119 { 2120 struct hl_cs_encaps_sig_handle *encaps_sig_hdl; 2121 struct hl_sync_stream_properties *prop; 2122 struct hl_device *hdev = hpriv->hdev; 2123 struct hl_encaps_signals_mgr *mgr; 2124 struct hl_hw_sob *hw_sob; 2125 u32 q_idx, sob_addr; 2126 int rc = 0; 2127 2128 mgr = &hpriv->ctx->sig_mgr; 2129 2130 spin_lock(&mgr->lock); 2131 encaps_sig_hdl = idr_find(&mgr->handles, handle_id); 2132 if (encaps_sig_hdl) { 2133 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n", 2134 handle_id, encaps_sig_hdl->hw_sob->sob_addr, 2135 encaps_sig_hdl->count); 2136 2137 hdev->asic_funcs->hw_queues_lock(hdev); 2138 2139 q_idx = encaps_sig_hdl->q_idx; 2140 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 2141 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; 2142 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); 2143 2144 /* Check if sob_val got out of sync due to other 2145 * signal submission requests which were handled 2146 * between the reserve-unreserve calls or SOB switch 2147 * upon reaching SOB max value. 2148 */ 2149 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count 2150 != prop->next_sob_val || 2151 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) { 2152 dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n", 2153 encaps_sig_hdl->pre_sob_val, 2154 (prop->next_sob_val - encaps_sig_hdl->count)); 2155 2156 hdev->asic_funcs->hw_queues_unlock(hdev); 2157 rc = -EINVAL; 2158 goto out_unlock; 2159 } 2160 2161 /* 2162 * Decrement the SOB value by count by user request 2163 * to unreserve those signals 2164 */ 2165 prop->next_sob_val -= encaps_sig_hdl->count; 2166 2167 hdev->asic_funcs->hw_queues_unlock(hdev); 2168 2169 hw_sob_put(hw_sob); 2170 2171 /* Release the id and free allocated memory of the handle */ 2172 idr_remove(&mgr->handles, handle_id); 2173 2174 /* unlock before calling ctx_put, where we might sleep */ 2175 spin_unlock(&mgr->lock); 2176 hl_ctx_put(encaps_sig_hdl->ctx); 2177 kfree(encaps_sig_hdl); 2178 goto out; 2179 } else { 2180 rc = -EINVAL; 2181 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); 2182 } 2183 2184 out_unlock: 2185 spin_unlock(&mgr->lock); 2186 2187 out: 2188 return rc; 2189 } 2190 2191 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, 2192 void __user *chunks, u32 num_chunks, 2193 u64 *cs_seq, u32 flags, u32 timeout, 2194 u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count) 2195 { 2196 struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL; 2197 bool handle_found = false, is_wait_cs = false, 2198 wait_cs_submitted = false, 2199 cs_encaps_signals = false; 2200 struct hl_cs_chunk *cs_chunk_array, *chunk; 2201 bool staged_cs_with_encaps_signals = false; 2202 struct hw_queue_properties *hw_queue_prop; 2203 struct hl_device *hdev = hpriv->hdev; 2204 struct hl_cs_compl *sig_waitcs_cmpl; 2205 u32 q_idx, collective_engine_id = 0; 2206 struct hl_cs_counters_atomic *cntr; 2207 struct hl_fence *sig_fence = NULL; 2208 struct hl_ctx *ctx = hpriv->ctx; 2209 enum hl_queue_type q_type; 2210 struct hl_cs *cs; 2211 u64 signal_seq; 2212 int rc; 2213 2214 cntr = &hdev->aggregated_cs_counters; 2215 *cs_seq = ULLONG_MAX; 2216 2217 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, 2218 ctx); 2219 if (rc) 2220 goto out; 2221 2222 /* currently it is guaranteed to have only one chunk */ 2223 chunk = &cs_chunk_array[0]; 2224 2225 if (chunk->queue_index >= hdev->asic_prop.max_queues) { 2226 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2227 atomic64_inc(&cntr->validation_drop_cnt); 2228 dev_err(hdev->dev, "Queue index %d is invalid\n", 2229 chunk->queue_index); 2230 rc = -EINVAL; 2231 goto free_cs_chunk_array; 2232 } 2233 2234 q_idx = chunk->queue_index; 2235 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; 2236 q_type = hw_queue_prop->type; 2237 2238 if (!hw_queue_prop->supports_sync_stream) { 2239 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2240 atomic64_inc(&cntr->validation_drop_cnt); 2241 dev_err(hdev->dev, 2242 "Queue index %d does not support sync stream operations\n", 2243 q_idx); 2244 rc = -EINVAL; 2245 goto free_cs_chunk_array; 2246 } 2247 2248 if (cs_type == CS_TYPE_COLLECTIVE_WAIT) { 2249 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 2250 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2251 atomic64_inc(&cntr->validation_drop_cnt); 2252 dev_err(hdev->dev, 2253 "Queue index %d is invalid\n", q_idx); 2254 rc = -EINVAL; 2255 goto free_cs_chunk_array; 2256 } 2257 2258 if (!hdev->nic_ports_mask) { 2259 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2260 atomic64_inc(&cntr->validation_drop_cnt); 2261 dev_err(hdev->dev, 2262 "Collective operations not supported when NIC ports are disabled"); 2263 rc = -EINVAL; 2264 goto free_cs_chunk_array; 2265 } 2266 2267 collective_engine_id = chunk->collective_engine_id; 2268 } 2269 2270 is_wait_cs = !!(cs_type == CS_TYPE_WAIT || 2271 cs_type == CS_TYPE_COLLECTIVE_WAIT); 2272 2273 cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); 2274 2275 if (is_wait_cs) { 2276 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, 2277 ctx, cs_encaps_signals); 2278 if (rc) 2279 goto free_cs_chunk_array; 2280 2281 if (cs_encaps_signals) { 2282 /* check if cs sequence has encapsulated 2283 * signals handle 2284 */ 2285 struct idr *idp; 2286 u32 id; 2287 2288 spin_lock(&ctx->sig_mgr.lock); 2289 idp = &ctx->sig_mgr.handles; 2290 idr_for_each_entry(idp, encaps_sig_hdl, id) { 2291 if (encaps_sig_hdl->cs_seq == signal_seq) { 2292 /* get refcount to protect removing this handle from idr, 2293 * needed when multiple wait cs are used with offset 2294 * to wait on reserved encaps signals. 2295 * Since kref_put of this handle is executed outside the 2296 * current lock, it is possible that the handle refcount 2297 * is 0 but it yet to be removed from the list. In this 2298 * case need to consider the handle as not valid. 2299 */ 2300 if (kref_get_unless_zero(&encaps_sig_hdl->refcount)) 2301 handle_found = true; 2302 break; 2303 } 2304 } 2305 spin_unlock(&ctx->sig_mgr.lock); 2306 2307 if (!handle_found) { 2308 /* treat as signal CS already finished */ 2309 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", 2310 signal_seq); 2311 rc = 0; 2312 goto free_cs_chunk_array; 2313 } 2314 2315 /* validate also the signal offset value */ 2316 if (chunk->encaps_signal_offset > 2317 encaps_sig_hdl->count) { 2318 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n", 2319 chunk->encaps_signal_offset, 2320 encaps_sig_hdl->count); 2321 rc = -EINVAL; 2322 goto free_cs_chunk_array; 2323 } 2324 } 2325 2326 sig_fence = hl_ctx_get_fence(ctx, signal_seq); 2327 if (IS_ERR(sig_fence)) { 2328 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2329 atomic64_inc(&cntr->validation_drop_cnt); 2330 dev_err(hdev->dev, 2331 "Failed to get signal CS with seq 0x%llx\n", 2332 signal_seq); 2333 rc = PTR_ERR(sig_fence); 2334 goto free_cs_chunk_array; 2335 } 2336 2337 if (!sig_fence) { 2338 /* signal CS already finished */ 2339 rc = 0; 2340 goto free_cs_chunk_array; 2341 } 2342 2343 sig_waitcs_cmpl = 2344 container_of(sig_fence, struct hl_cs_compl, base_fence); 2345 2346 staged_cs_with_encaps_signals = !! 2347 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT && 2348 (flags & HL_CS_FLAGS_ENCAP_SIGNALS)); 2349 2350 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL && 2351 !staged_cs_with_encaps_signals) { 2352 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2353 atomic64_inc(&cntr->validation_drop_cnt); 2354 dev_err(hdev->dev, 2355 "CS seq 0x%llx is not of a signal/encaps-signal CS\n", 2356 signal_seq); 2357 hl_fence_put(sig_fence); 2358 rc = -EINVAL; 2359 goto free_cs_chunk_array; 2360 } 2361 2362 if (completion_done(&sig_fence->completion)) { 2363 /* signal CS already finished */ 2364 hl_fence_put(sig_fence); 2365 rc = 0; 2366 goto free_cs_chunk_array; 2367 } 2368 } 2369 2370 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); 2371 if (rc) { 2372 if (is_wait_cs) 2373 hl_fence_put(sig_fence); 2374 2375 goto free_cs_chunk_array; 2376 } 2377 2378 /* 2379 * Save the signal CS fence for later initialization right before 2380 * hanging the wait CS on the queue. 2381 * for encaps signals case, we save the cs sequence and handle pointer 2382 * for later initialization. 2383 */ 2384 if (is_wait_cs) { 2385 cs->signal_fence = sig_fence; 2386 /* store the handle pointer, so we don't have to 2387 * look for it again, later on the flow 2388 * when we need to set SOB info in hw_queue. 2389 */ 2390 if (cs->encaps_signals) 2391 cs->encaps_sig_hdl = encaps_sig_hdl; 2392 } 2393 2394 hl_debugfs_add_cs(cs); 2395 2396 *cs_seq = cs->sequence; 2397 2398 if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL) 2399 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, 2400 q_idx, chunk->encaps_signal_offset); 2401 else if (cs_type == CS_TYPE_COLLECTIVE_WAIT) 2402 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, 2403 cs, q_idx, collective_engine_id, 2404 chunk->encaps_signal_offset); 2405 else { 2406 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2407 atomic64_inc(&cntr->validation_drop_cnt); 2408 rc = -EINVAL; 2409 } 2410 2411 if (rc) 2412 goto free_cs_object; 2413 2414 if (q_type == QUEUE_TYPE_HW) 2415 INIT_WORK(&cs->finish_work, cs_completion); 2416 2417 rc = hl_hw_queue_schedule_cs(cs); 2418 if (rc) { 2419 /* In case wait cs failed here, it means the signal cs 2420 * already completed. we want to free all it's related objects 2421 * but we don't want to fail the ioctl. 2422 */ 2423 if (is_wait_cs) 2424 rc = 0; 2425 else if (rc != -EAGAIN) 2426 dev_err(hdev->dev, 2427 "Failed to submit CS %d.%llu to H/W queues, error %d\n", 2428 ctx->asid, cs->sequence, rc); 2429 goto free_cs_object; 2430 } 2431 2432 *signal_sob_addr_offset = cs->sob_addr_offset; 2433 *signal_initial_sob_count = cs->initial_sob_count; 2434 2435 rc = HL_CS_STATUS_SUCCESS; 2436 if (is_wait_cs) 2437 wait_cs_submitted = true; 2438 goto put_cs; 2439 2440 free_cs_object: 2441 cs_rollback(hdev, cs); 2442 *cs_seq = ULLONG_MAX; 2443 /* The path below is both for good and erroneous exits */ 2444 put_cs: 2445 /* We finished with the CS in this function, so put the ref */ 2446 cs_put(cs); 2447 free_cs_chunk_array: 2448 if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs) 2449 kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); 2450 kfree(cs_chunk_array); 2451 out: 2452 return rc; 2453 } 2454 2455 static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, 2456 u32 num_engine_cores, u32 core_command) 2457 { 2458 struct hl_device *hdev = hpriv->hdev; 2459 void __user *engine_cores_arr; 2460 u32 *cores; 2461 int rc; 2462 2463 if (!hdev->asic_prop.supports_engine_modes) 2464 return -EPERM; 2465 2466 if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { 2467 dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); 2468 return -EINVAL; 2469 } 2470 2471 if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) { 2472 dev_err(hdev->dev, "Engine core command is invalid\n"); 2473 return -EINVAL; 2474 } 2475 2476 engine_cores_arr = (void __user *) (uintptr_t) engine_cores; 2477 cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL); 2478 if (!cores) 2479 return -ENOMEM; 2480 2481 if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) { 2482 dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); 2483 kfree(cores); 2484 return -EFAULT; 2485 } 2486 2487 rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); 2488 kfree(cores); 2489 2490 return rc; 2491 } 2492 2493 static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr, 2494 u32 num_engines, enum hl_engine_command command) 2495 { 2496 struct hl_device *hdev = hpriv->hdev; 2497 u32 *engines, max_num_of_engines; 2498 void __user *engines_arr; 2499 int rc; 2500 2501 if (!hdev->asic_prop.supports_engine_modes) 2502 return -EPERM; 2503 2504 if (command >= HL_ENGINE_COMMAND_MAX) { 2505 dev_err(hdev->dev, "Engine command is invalid\n"); 2506 return -EINVAL; 2507 } 2508 2509 max_num_of_engines = hdev->asic_prop.max_num_of_engines; 2510 if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT) 2511 max_num_of_engines = hdev->asic_prop.num_engine_cores; 2512 2513 if (!num_engines || num_engines > max_num_of_engines) { 2514 dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines); 2515 return -EINVAL; 2516 } 2517 2518 engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr; 2519 engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL); 2520 if (!engines) 2521 return -ENOMEM; 2522 2523 if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) { 2524 dev_err(hdev->dev, "Failed to copy engine-ids array from user\n"); 2525 kfree(engines); 2526 return -EFAULT; 2527 } 2528 2529 rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command); 2530 kfree(engines); 2531 2532 return rc; 2533 } 2534 2535 static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) 2536 { 2537 struct hl_device *hdev = hpriv->hdev; 2538 struct asic_fixed_properties *prop = &hdev->asic_prop; 2539 2540 if (!prop->hbw_flush_reg) { 2541 dev_dbg(hdev->dev, "HBW flush is not supported\n"); 2542 return -EOPNOTSUPP; 2543 } 2544 2545 RREG32(prop->hbw_flush_reg); 2546 2547 return 0; 2548 } 2549 2550 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) 2551 { 2552 union hl_cs_args *args = data; 2553 enum hl_cs_type cs_type = 0; 2554 u64 cs_seq = ULONG_MAX; 2555 void __user *chunks; 2556 u32 num_chunks, flags, timeout, 2557 signals_count = 0, sob_addr = 0, handle_id = 0; 2558 u16 sob_initial_count = 0; 2559 int rc; 2560 2561 rc = hl_cs_sanity_checks(hpriv, args); 2562 if (rc) 2563 goto out; 2564 2565 rc = hl_cs_ctx_switch(hpriv, args, &cs_seq); 2566 if (rc) 2567 goto out; 2568 2569 cs_type = hl_cs_get_cs_type(args->in.cs_flags & 2570 ~HL_CS_FLAGS_FORCE_RESTORE); 2571 chunks = (void __user *) (uintptr_t) args->in.chunks_execute; 2572 num_chunks = args->in.num_chunks_execute; 2573 flags = args->in.cs_flags; 2574 2575 /* In case this is a staged CS, user should supply the CS sequence */ 2576 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 2577 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) 2578 cs_seq = args->in.seq; 2579 2580 timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT 2581 ? msecs_to_jiffies(args->in.timeout * 1000) 2582 : hpriv->hdev->timeout_jiffies; 2583 2584 switch (cs_type) { 2585 case CS_TYPE_SIGNAL: 2586 case CS_TYPE_WAIT: 2587 case CS_TYPE_COLLECTIVE_WAIT: 2588 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks, 2589 &cs_seq, args->in.cs_flags, timeout, 2590 &sob_addr, &sob_initial_count); 2591 break; 2592 case CS_RESERVE_SIGNALS: 2593 rc = cs_ioctl_reserve_signals(hpriv, 2594 args->in.encaps_signals_q_idx, 2595 args->in.encaps_signals_count, 2596 &handle_id, &sob_addr, &signals_count); 2597 break; 2598 case CS_UNRESERVE_SIGNALS: 2599 rc = cs_ioctl_unreserve_signals(hpriv, 2600 args->in.encaps_sig_handle_id); 2601 break; 2602 case CS_TYPE_ENGINE_CORE: 2603 rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, 2604 args->in.num_engine_cores, args->in.core_command); 2605 break; 2606 case CS_TYPE_ENGINES: 2607 rc = cs_ioctl_engines(hpriv, args->in.engines, 2608 args->in.num_engines, args->in.engine_command); 2609 break; 2610 case CS_TYPE_FLUSH_PCI_HBW_WRITES: 2611 rc = cs_ioctl_flush_pci_hbw_writes(hpriv); 2612 break; 2613 default: 2614 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, 2615 args->in.cs_flags, 2616 args->in.encaps_sig_handle_id, 2617 timeout, &sob_initial_count); 2618 break; 2619 } 2620 out: 2621 if (rc != -EAGAIN) { 2622 memset(args, 0, sizeof(*args)); 2623 2624 switch (cs_type) { 2625 case CS_RESERVE_SIGNALS: 2626 args->out.handle_id = handle_id; 2627 args->out.sob_base_addr_offset = sob_addr; 2628 args->out.count = signals_count; 2629 break; 2630 case CS_TYPE_SIGNAL: 2631 args->out.sob_base_addr_offset = sob_addr; 2632 args->out.sob_count_before_submission = sob_initial_count; 2633 args->out.seq = cs_seq; 2634 break; 2635 case CS_TYPE_DEFAULT: 2636 args->out.sob_count_before_submission = sob_initial_count; 2637 args->out.seq = cs_seq; 2638 break; 2639 default: 2640 args->out.seq = cs_seq; 2641 break; 2642 } 2643 2644 args->out.status = rc; 2645 } 2646 2647 return rc; 2648 } 2649 2650 static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence, 2651 enum hl_cs_wait_status *status, u64 timeout_us, s64 *timestamp) 2652 { 2653 struct hl_device *hdev = ctx->hdev; 2654 ktime_t timestamp_kt; 2655 long completion_rc; 2656 int rc = 0, error; 2657 2658 if (IS_ERR(fence)) { 2659 rc = PTR_ERR(fence); 2660 if (rc == -EINVAL) 2661 dev_notice_ratelimited(hdev->dev, 2662 "Can't wait on CS %llu because current CS is at seq %llu\n", 2663 seq, ctx->cs_sequence); 2664 return rc; 2665 } 2666 2667 if (!fence) { 2668 if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, ×tamp_kt, &error)) { 2669 dev_dbg(hdev->dev, 2670 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", 2671 seq, ctx->cs_sequence); 2672 *status = CS_WAIT_STATUS_GONE; 2673 return 0; 2674 } 2675 2676 completion_rc = 1; 2677 goto report_results; 2678 } 2679 2680 if (!timeout_us) { 2681 completion_rc = completion_done(&fence->completion); 2682 } else { 2683 unsigned long timeout; 2684 2685 timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ? 2686 timeout_us : usecs_to_jiffies(timeout_us); 2687 completion_rc = 2688 wait_for_completion_interruptible_timeout( 2689 &fence->completion, timeout); 2690 } 2691 2692 error = fence->error; 2693 timestamp_kt = fence->timestamp; 2694 2695 report_results: 2696 if (completion_rc > 0) { 2697 *status = CS_WAIT_STATUS_COMPLETED; 2698 if (timestamp) 2699 *timestamp = ktime_to_ns(timestamp_kt); 2700 } else { 2701 *status = CS_WAIT_STATUS_BUSY; 2702 } 2703 2704 if (completion_rc == -ERESTARTSYS) 2705 rc = completion_rc; 2706 else if (error == -ETIMEDOUT || error == -EIO) 2707 rc = error; 2708 2709 return rc; 2710 } 2711 2712 /* 2713 * hl_cs_poll_fences - iterate CS fences to check for CS completion 2714 * 2715 * @mcs_data: multi-CS internal data 2716 * @mcs_compl: multi-CS completion structure 2717 * 2718 * @return 0 on success, otherwise non 0 error code 2719 * 2720 * The function iterates on all CS sequence in the list and set bit in 2721 * completion_bitmap for each completed CS. 2722 * While iterating, the function sets the stream map of each fence in the fence 2723 * array in the completion QID stream map to be used by CSs to perform 2724 * completion to the multi-CS context. 2725 * This function shall be called after taking context ref 2726 */ 2727 static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl) 2728 { 2729 struct hl_fence **fence_ptr = mcs_data->fence_arr; 2730 struct hl_device *hdev = mcs_data->ctx->hdev; 2731 int i, rc, arr_len = mcs_data->arr_len; 2732 u64 *seq_arr = mcs_data->seq_arr; 2733 ktime_t max_ktime, first_cs_time; 2734 enum hl_cs_wait_status status; 2735 2736 memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *)); 2737 2738 /* get all fences under the same lock */ 2739 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); 2740 if (rc) 2741 return rc; 2742 2743 /* 2744 * re-initialize the completion here to handle 2 possible cases: 2745 * 1. CS will complete the multi-CS prior clearing the completion. in which 2746 * case the fence iteration is guaranteed to catch the CS completion. 2747 * 2. the completion will occur after re-init of the completion. 2748 * in which case we will wake up immediately in wait_for_completion. 2749 */ 2750 reinit_completion(&mcs_compl->completion); 2751 2752 /* 2753 * set to maximum time to verify timestamp is valid: if at the end 2754 * this value is maintained- no timestamp was updated 2755 */ 2756 max_ktime = ktime_set(KTIME_SEC_MAX, 0); 2757 first_cs_time = max_ktime; 2758 2759 for (i = 0; i < arr_len; i++, fence_ptr++) { 2760 struct hl_fence *fence = *fence_ptr; 2761 2762 /* 2763 * In order to prevent case where we wait until timeout even though a CS associated 2764 * with the multi-CS actually completed we do things in the below order: 2765 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way 2766 * any CS can, potentially, complete the multi CS for the specific QID (note 2767 * that once completion is initialized, calling complete* and then wait on the 2768 * completion will cause it to return at once) 2769 * 2. only after allowing multi-CS completion for the specific QID we check whether 2770 * the specific CS already completed (and thus the wait for completion part will 2771 * be skipped). if the CS not completed it is guaranteed that completing CS will 2772 * wake up the completion. 2773 */ 2774 if (fence) 2775 mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map; 2776 2777 /* 2778 * function won't sleep as it is called with timeout 0 (i.e. 2779 * poll the fence) 2780 */ 2781 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL); 2782 if (rc) { 2783 dev_err(hdev->dev, 2784 "wait_for_fence error :%d for CS seq %llu\n", 2785 rc, seq_arr[i]); 2786 break; 2787 } 2788 2789 switch (status) { 2790 case CS_WAIT_STATUS_BUSY: 2791 /* CS did not finished, QID to wait on already stored */ 2792 break; 2793 case CS_WAIT_STATUS_COMPLETED: 2794 /* 2795 * Using mcs_handling_done to avoid possibility of mcs_data 2796 * returns to user indicating CS completed before it finished 2797 * all of its mcs handling, to avoid race the next time the 2798 * user waits for mcs. 2799 * note: when reaching this case fence is definitely not NULL 2800 * but NULL check was added to overcome static analysis 2801 */ 2802 if (fence && !fence->mcs_handling_done) { 2803 /* 2804 * in case multi CS is completed but MCS handling not done 2805 * we "complete" the multi CS to prevent it from waiting 2806 * until time-out and the "multi-CS handling done" will have 2807 * another chance at the next iteration 2808 */ 2809 complete_all(&mcs_compl->completion); 2810 break; 2811 } 2812 2813 mcs_data->completion_bitmap |= BIT(i); 2814 /* 2815 * For all completed CSs we take the earliest timestamp. 2816 * For this we have to validate that the timestamp is 2817 * earliest of all timestamps so far. 2818 */ 2819 if (fence && mcs_data->update_ts && 2820 (ktime_compare(fence->timestamp, first_cs_time) < 0)) 2821 first_cs_time = fence->timestamp; 2822 break; 2823 case CS_WAIT_STATUS_GONE: 2824 mcs_data->update_ts = false; 2825 mcs_data->gone_cs = true; 2826 /* 2827 * It is possible to get an old sequence numbers from user 2828 * which related to already completed CSs and their fences 2829 * already gone. In this case, CS set as completed but 2830 * no need to consider its QID for mcs completion. 2831 */ 2832 mcs_data->completion_bitmap |= BIT(i); 2833 break; 2834 default: 2835 dev_err(hdev->dev, "Invalid fence status\n"); 2836 rc = -EINVAL; 2837 break; 2838 } 2839 2840 } 2841 2842 hl_fences_put(mcs_data->fence_arr, arr_len); 2843 2844 if (mcs_data->update_ts && 2845 (ktime_compare(first_cs_time, max_ktime) != 0)) 2846 mcs_data->timestamp = ktime_to_ns(first_cs_time); 2847 2848 return rc; 2849 } 2850 2851 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, 2852 enum hl_cs_wait_status *status, s64 *timestamp) 2853 { 2854 struct hl_fence *fence; 2855 int rc = 0; 2856 2857 if (timestamp) 2858 *timestamp = 0; 2859 2860 hl_ctx_get(ctx); 2861 2862 fence = hl_ctx_get_fence(ctx, seq); 2863 2864 rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp); 2865 hl_fence_put(fence); 2866 hl_ctx_put(ctx); 2867 2868 return rc; 2869 } 2870 2871 static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs) 2872 { 2873 if (usecs <= U32_MAX) 2874 return usecs_to_jiffies(usecs); 2875 2876 /* 2877 * If the value in nanoseconds is larger than 64 bit, use the largest 2878 * 64 bit value. 2879 */ 2880 if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC))) 2881 return nsecs_to_jiffies(U64_MAX); 2882 2883 return nsecs_to_jiffies(usecs * NSEC_PER_USEC); 2884 } 2885 2886 /* 2887 * hl_wait_multi_cs_completion_init - init completion structure 2888 * 2889 * @hdev: pointer to habanalabs device structure 2890 * @stream_master_bitmap: stream master QIDs map, set bit indicates stream 2891 * master QID to wait on 2892 * 2893 * @return valid completion struct pointer on success, otherwise error pointer 2894 * 2895 * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver. 2896 * the function gets the first available completion (by marking it "used") 2897 * and initialize its values. 2898 */ 2899 static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev) 2900 { 2901 struct multi_cs_completion *mcs_compl; 2902 int i; 2903 2904 /* find free multi_cs completion structure */ 2905 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 2906 mcs_compl = &hdev->multi_cs_completion[i]; 2907 spin_lock(&mcs_compl->lock); 2908 if (!mcs_compl->used) { 2909 mcs_compl->used = 1; 2910 mcs_compl->timestamp = 0; 2911 /* 2912 * init QID map to 0 to avoid completion by CSs. the actual QID map 2913 * to multi-CS CSs will be set incrementally at a later stage 2914 */ 2915 mcs_compl->stream_master_qid_map = 0; 2916 spin_unlock(&mcs_compl->lock); 2917 break; 2918 } 2919 spin_unlock(&mcs_compl->lock); 2920 } 2921 2922 if (i == MULTI_CS_MAX_USER_CTX) { 2923 dev_err(hdev->dev, "no available multi-CS completion structure\n"); 2924 return ERR_PTR(-ENOMEM); 2925 } 2926 return mcs_compl; 2927 } 2928 2929 /* 2930 * hl_wait_multi_cs_completion_fini - return completion structure and set as 2931 * unused 2932 * 2933 * @mcs_compl: pointer to the completion structure 2934 */ 2935 static void hl_wait_multi_cs_completion_fini( 2936 struct multi_cs_completion *mcs_compl) 2937 { 2938 /* 2939 * free completion structure, do it under lock to be in-sync with the 2940 * thread that signals completion 2941 */ 2942 spin_lock(&mcs_compl->lock); 2943 mcs_compl->used = 0; 2944 spin_unlock(&mcs_compl->lock); 2945 } 2946 2947 /* 2948 * hl_wait_multi_cs_completion - wait for first CS to complete 2949 * 2950 * @mcs_data: multi-CS internal data 2951 * 2952 * @return 0 on success, otherwise non 0 error code 2953 */ 2954 static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data, 2955 struct multi_cs_completion *mcs_compl) 2956 { 2957 long completion_rc; 2958 2959 completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion, 2960 mcs_data->timeout_jiffies); 2961 2962 /* update timestamp */ 2963 if (completion_rc > 0) 2964 mcs_data->timestamp = mcs_compl->timestamp; 2965 2966 if (completion_rc == -ERESTARTSYS) 2967 return completion_rc; 2968 2969 mcs_data->wait_status = completion_rc; 2970 2971 return 0; 2972 } 2973 2974 /* 2975 * hl_multi_cs_completion_init - init array of multi-CS completion structures 2976 * 2977 * @hdev: pointer to habanalabs device structure 2978 */ 2979 void hl_multi_cs_completion_init(struct hl_device *hdev) 2980 { 2981 struct multi_cs_completion *mcs_cmpl; 2982 int i; 2983 2984 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 2985 mcs_cmpl = &hdev->multi_cs_completion[i]; 2986 mcs_cmpl->used = 0; 2987 spin_lock_init(&mcs_cmpl->lock); 2988 init_completion(&mcs_cmpl->completion); 2989 } 2990 } 2991 2992 /* 2993 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl 2994 * 2995 * @hpriv: pointer to the private data of the fd 2996 * @data: pointer to multi-CS wait ioctl in/out args 2997 * 2998 */ 2999 static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) 3000 { 3001 struct multi_cs_completion *mcs_compl; 3002 struct hl_device *hdev = hpriv->hdev; 3003 struct multi_cs_data mcs_data = {}; 3004 union hl_wait_cs_args *args = data; 3005 struct hl_ctx *ctx = hpriv->ctx; 3006 struct hl_fence **fence_arr; 3007 void __user *seq_arr; 3008 u32 size_to_copy; 3009 u64 *cs_seq_arr; 3010 u8 seq_arr_len; 3011 int rc, i; 3012 3013 for (i = 0 ; i < sizeof(args->in.pad) ; i++) 3014 if (args->in.pad[i]) { 3015 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); 3016 return -EINVAL; 3017 } 3018 3019 if (!hdev->supports_wait_for_multi_cs) { 3020 dev_err(hdev->dev, "Wait for multi CS is not supported\n"); 3021 return -EPERM; 3022 } 3023 3024 seq_arr_len = args->in.seq_arr_len; 3025 3026 if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) { 3027 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n", 3028 HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len); 3029 return -EINVAL; 3030 } 3031 3032 /* allocate memory for sequence array */ 3033 cs_seq_arr = 3034 kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL); 3035 if (!cs_seq_arr) 3036 return -ENOMEM; 3037 3038 /* copy CS sequence array from user */ 3039 seq_arr = (void __user *) (uintptr_t) args->in.seq; 3040 size_to_copy = seq_arr_len * sizeof(*cs_seq_arr); 3041 if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) { 3042 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n"); 3043 rc = -EFAULT; 3044 goto free_seq_arr; 3045 } 3046 3047 /* allocate array for the fences */ 3048 fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL); 3049 if (!fence_arr) { 3050 rc = -ENOMEM; 3051 goto free_seq_arr; 3052 } 3053 3054 /* initialize the multi-CS internal data */ 3055 mcs_data.ctx = ctx; 3056 mcs_data.seq_arr = cs_seq_arr; 3057 mcs_data.fence_arr = fence_arr; 3058 mcs_data.arr_len = seq_arr_len; 3059 3060 hl_ctx_get(ctx); 3061 3062 /* wait (with timeout) for the first CS to be completed */ 3063 mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); 3064 mcs_compl = hl_wait_multi_cs_completion_init(hdev); 3065 if (IS_ERR(mcs_compl)) { 3066 rc = PTR_ERR(mcs_compl); 3067 goto put_ctx; 3068 } 3069 3070 /* poll all CS fences, extract timestamp */ 3071 mcs_data.update_ts = true; 3072 rc = hl_cs_poll_fences(&mcs_data, mcs_compl); 3073 /* 3074 * skip wait for CS completion when one of the below is true: 3075 * - an error on the poll function 3076 * - one or more CS in the list completed 3077 * - the user called ioctl with timeout 0 3078 */ 3079 if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) 3080 goto completion_fini; 3081 3082 while (true) { 3083 rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl); 3084 if (rc || (mcs_data.wait_status == 0)) 3085 break; 3086 3087 /* 3088 * poll fences once again to update the CS map. 3089 * no timestamp should be updated this time. 3090 */ 3091 mcs_data.update_ts = false; 3092 rc = hl_cs_poll_fences(&mcs_data, mcs_compl); 3093 3094 if (rc || mcs_data.completion_bitmap) 3095 break; 3096 3097 /* 3098 * if hl_wait_multi_cs_completion returned before timeout (i.e. 3099 * it got a completion) it either got completed by CS in the multi CS list 3100 * (in which case the indication will be non empty completion_bitmap) or it 3101 * got completed by CS submitted to one of the shared stream master but 3102 * not in the multi CS list (in which case we should wait again but modify 3103 * the timeout and set timestamp as zero to let a CS related to the current 3104 * multi-CS set a new, relevant, timestamp) 3105 */ 3106 mcs_data.timeout_jiffies = mcs_data.wait_status; 3107 mcs_compl->timestamp = 0; 3108 } 3109 3110 completion_fini: 3111 hl_wait_multi_cs_completion_fini(mcs_compl); 3112 3113 put_ctx: 3114 hl_ctx_put(ctx); 3115 kfree(fence_arr); 3116 3117 free_seq_arr: 3118 kfree(cs_seq_arr); 3119 3120 if (rc == -ERESTARTSYS) { 3121 dev_err_ratelimited(hdev->dev, 3122 "user process got signal while waiting for Multi-CS\n"); 3123 rc = -EINTR; 3124 } 3125 3126 if (rc) 3127 return rc; 3128 3129 /* update output args */ 3130 memset(args, 0, sizeof(*args)); 3131 3132 if (mcs_data.completion_bitmap) { 3133 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; 3134 args->out.cs_completion_map = mcs_data.completion_bitmap; 3135 3136 /* if timestamp not 0- it's valid */ 3137 if (mcs_data.timestamp) { 3138 args->out.timestamp_nsec = mcs_data.timestamp; 3139 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 3140 } 3141 3142 /* update if some CS was gone */ 3143 if (!mcs_data.timestamp) 3144 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; 3145 } else { 3146 args->out.status = HL_WAIT_CS_STATUS_BUSY; 3147 } 3148 3149 return 0; 3150 } 3151 3152 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) 3153 { 3154 struct hl_device *hdev = hpriv->hdev; 3155 union hl_wait_cs_args *args = data; 3156 enum hl_cs_wait_status status; 3157 u64 seq = args->in.seq; 3158 s64 timestamp; 3159 int rc; 3160 3161 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, ×tamp); 3162 3163 if (rc == -ERESTARTSYS) { 3164 dev_err_ratelimited(hdev->dev, 3165 "user process got signal while waiting for CS handle %llu\n", 3166 seq); 3167 return -EINTR; 3168 } 3169 3170 memset(args, 0, sizeof(*args)); 3171 3172 if (rc) { 3173 if (rc == -ETIMEDOUT) { 3174 dev_err_ratelimited(hdev->dev, 3175 "CS %llu has timed-out while user process is waiting for it\n", 3176 seq); 3177 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; 3178 } else if (rc == -EIO) { 3179 dev_err_ratelimited(hdev->dev, 3180 "CS %llu has been aborted while user process is waiting for it\n", 3181 seq); 3182 args->out.status = HL_WAIT_CS_STATUS_ABORTED; 3183 } 3184 return rc; 3185 } 3186 3187 if (timestamp) { 3188 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 3189 args->out.timestamp_nsec = timestamp; 3190 } 3191 3192 switch (status) { 3193 case CS_WAIT_STATUS_GONE: 3194 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; 3195 fallthrough; 3196 case CS_WAIT_STATUS_COMPLETED: 3197 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; 3198 break; 3199 case CS_WAIT_STATUS_BUSY: 3200 default: 3201 args->out.status = HL_WAIT_CS_STATUS_BUSY; 3202 break; 3203 } 3204 3205 return 0; 3206 } 3207 3208 static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, 3209 struct hl_cb *cq_cb, 3210 u64 ts_offset, u64 cq_offset, u64 target_value, 3211 spinlock_t *wait_list_lock, 3212 struct hl_user_pending_interrupt **pend) 3213 { 3214 struct hl_ts_buff *ts_buff = buf->private; 3215 struct hl_user_pending_interrupt *requested_offset_record = 3216 (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + 3217 ts_offset; 3218 struct hl_user_pending_interrupt *cb_last = 3219 (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + 3220 (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); 3221 unsigned long iter_counter = 0; 3222 u64 current_cq_counter; 3223 ktime_t timestamp; 3224 3225 /* Validate ts_offset not exceeding last max */ 3226 if (requested_offset_record >= cb_last) { 3227 dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n", 3228 (u64)(uintptr_t)cb_last); 3229 return -EINVAL; 3230 } 3231 3232 timestamp = ktime_get(); 3233 3234 start_over: 3235 spin_lock(wait_list_lock); 3236 3237 /* Unregister only if we didn't reach the target value 3238 * since in this case there will be no handling in irq context 3239 * and then it's safe to delete the node out of the interrupt list 3240 * then re-use it on other interrupt 3241 */ 3242 if (requested_offset_record->ts_reg_info.in_use) { 3243 current_cq_counter = *requested_offset_record->cq_kernel_addr; 3244 if (current_cq_counter < requested_offset_record->cq_target_value) { 3245 list_del(&requested_offset_record->wait_list_node); 3246 spin_unlock(wait_list_lock); 3247 3248 hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf); 3249 hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); 3250 3251 dev_dbg(buf->mmg->dev, 3252 "ts node removed from interrupt list now can re-use\n"); 3253 } else { 3254 dev_dbg(buf->mmg->dev, 3255 "ts node in middle of irq handling\n"); 3256 3257 /* irq thread handling in the middle give it time to finish */ 3258 spin_unlock(wait_list_lock); 3259 usleep_range(100, 1000); 3260 if (++iter_counter == MAX_TS_ITER_NUM) { 3261 dev_err(buf->mmg->dev, 3262 "Timestamp offset processing reached timeout of %lld ms\n", 3263 ktime_ms_delta(ktime_get(), timestamp)); 3264 return -EAGAIN; 3265 } 3266 3267 goto start_over; 3268 } 3269 } else { 3270 /* Fill up the new registration node info */ 3271 requested_offset_record->ts_reg_info.buf = buf; 3272 requested_offset_record->ts_reg_info.cq_cb = cq_cb; 3273 requested_offset_record->ts_reg_info.timestamp_kernel_addr = 3274 (u64 *) ts_buff->user_buff_address + ts_offset; 3275 requested_offset_record->cq_kernel_addr = 3276 (u64 *) cq_cb->kernel_address + cq_offset; 3277 requested_offset_record->cq_target_value = target_value; 3278 3279 spin_unlock(wait_list_lock); 3280 } 3281 3282 *pend = requested_offset_record; 3283 3284 dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n", 3285 requested_offset_record); 3286 return 0; 3287 } 3288 3289 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, 3290 struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg, 3291 u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset, 3292 u64 target_value, struct hl_user_interrupt *interrupt, 3293 bool register_ts_record, u64 ts_handle, u64 ts_offset, 3294 u32 *status, u64 *timestamp) 3295 { 3296 struct hl_user_pending_interrupt *pend; 3297 struct hl_mmap_mem_buf *buf; 3298 struct hl_cb *cq_cb; 3299 unsigned long timeout; 3300 long completion_rc; 3301 int rc = 0; 3302 3303 timeout = hl_usecs64_to_jiffies(timeout_us); 3304 3305 hl_ctx_get(ctx); 3306 3307 cq_cb = hl_cb_get(cb_mmg, cq_counters_handle); 3308 if (!cq_cb) { 3309 rc = -EINVAL; 3310 goto put_ctx; 3311 } 3312 3313 /* Validate the cq offset */ 3314 if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >= 3315 ((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) { 3316 rc = -EINVAL; 3317 goto put_cq_cb; 3318 } 3319 3320 if (register_ts_record) { 3321 dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", 3322 interrupt->interrupt_id, ts_offset, cq_counters_offset); 3323 buf = hl_mmap_mem_buf_get(mmg, ts_handle); 3324 if (!buf) { 3325 rc = -EINVAL; 3326 goto put_cq_cb; 3327 } 3328 3329 /* get ts buffer record */ 3330 rc = ts_buff_get_kernel_ts_record(buf, cq_cb, ts_offset, 3331 cq_counters_offset, target_value, 3332 &interrupt->wait_list_lock, &pend); 3333 if (rc) 3334 goto put_ts_buff; 3335 } else { 3336 pend = kzalloc(sizeof(*pend), GFP_KERNEL); 3337 if (!pend) { 3338 rc = -ENOMEM; 3339 goto put_cq_cb; 3340 } 3341 hl_fence_init(&pend->fence, ULONG_MAX); 3342 pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset; 3343 pend->cq_target_value = target_value; 3344 } 3345 3346 spin_lock(&interrupt->wait_list_lock); 3347 3348 /* We check for completion value as interrupt could have been received 3349 * before we added the node to the wait list 3350 */ 3351 if (*pend->cq_kernel_addr >= target_value) { 3352 if (register_ts_record) 3353 pend->ts_reg_info.in_use = 0; 3354 spin_unlock(&interrupt->wait_list_lock); 3355 3356 *status = HL_WAIT_CS_STATUS_COMPLETED; 3357 3358 if (register_ts_record) { 3359 *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); 3360 goto put_ts_buff; 3361 } else { 3362 pend->fence.timestamp = ktime_get(); 3363 goto set_timestamp; 3364 } 3365 } else if (!timeout_us) { 3366 spin_unlock(&interrupt->wait_list_lock); 3367 *status = HL_WAIT_CS_STATUS_BUSY; 3368 pend->fence.timestamp = ktime_get(); 3369 goto set_timestamp; 3370 } 3371 3372 /* Add pending user interrupt to relevant list for the interrupt 3373 * handler to monitor. 3374 * Note that we cannot have sorted list by target value, 3375 * in order to shorten the list pass loop, since 3376 * same list could have nodes for different cq counter handle. 3377 * Note: 3378 * Mark ts buff offset as in use here in the spinlock protection area 3379 * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record 3380 * before adding the node to the list. this scenario might happen when 3381 * multiple threads are racing on same offset and one thread could 3382 * set the ts buff in ts_buff_get_kernel_ts_record then the other thread 3383 * takes over and get to ts_buff_get_kernel_ts_record and then we will try 3384 * to re-use the same ts buff offset, and will try to delete a non existing 3385 * node from the list. 3386 */ 3387 if (register_ts_record) 3388 pend->ts_reg_info.in_use = 1; 3389 3390 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); 3391 spin_unlock(&interrupt->wait_list_lock); 3392 3393 if (register_ts_record) { 3394 rc = *status = HL_WAIT_CS_STATUS_COMPLETED; 3395 goto ts_registration_exit; 3396 } 3397 3398 /* Wait for interrupt handler to signal completion */ 3399 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, 3400 timeout); 3401 if (completion_rc > 0) { 3402 *status = HL_WAIT_CS_STATUS_COMPLETED; 3403 } else { 3404 if (completion_rc == -ERESTARTSYS) { 3405 dev_err_ratelimited(hdev->dev, 3406 "user process got signal while waiting for interrupt ID %d\n", 3407 interrupt->interrupt_id); 3408 rc = -EINTR; 3409 *status = HL_WAIT_CS_STATUS_ABORTED; 3410 } else { 3411 if (pend->fence.error == -EIO) { 3412 dev_err_ratelimited(hdev->dev, 3413 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", 3414 pend->fence.error); 3415 rc = -EIO; 3416 *status = HL_WAIT_CS_STATUS_ABORTED; 3417 } else { 3418 /* The wait has timed-out. We don't know anything beyond that 3419 * because the workload wasn't submitted through the driver. 3420 * Therefore, from driver's perspective, the workload is still 3421 * executing. 3422 */ 3423 rc = 0; 3424 *status = HL_WAIT_CS_STATUS_BUSY; 3425 } 3426 } 3427 } 3428 3429 /* 3430 * We keep removing the node from list here, and not at the irq handler 3431 * for completion timeout case. and if it's a registration 3432 * for ts record, the node will be deleted in the irq handler after 3433 * we reach the target value. 3434 */ 3435 spin_lock(&interrupt->wait_list_lock); 3436 list_del(&pend->wait_list_node); 3437 spin_unlock(&interrupt->wait_list_lock); 3438 3439 set_timestamp: 3440 *timestamp = ktime_to_ns(pend->fence.timestamp); 3441 kfree(pend); 3442 hl_cb_put(cq_cb); 3443 ts_registration_exit: 3444 hl_ctx_put(ctx); 3445 3446 return rc; 3447 3448 put_ts_buff: 3449 hl_mmap_mem_buf_put(buf); 3450 put_cq_cb: 3451 hl_cb_put(cq_cb); 3452 put_ctx: 3453 hl_ctx_put(ctx); 3454 3455 return rc; 3456 } 3457 3458 static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx, 3459 u64 timeout_us, u64 user_address, 3460 u64 target_value, struct hl_user_interrupt *interrupt, 3461 u32 *status, 3462 u64 *timestamp) 3463 { 3464 struct hl_user_pending_interrupt *pend; 3465 unsigned long timeout; 3466 u64 completion_value; 3467 long completion_rc; 3468 int rc = 0; 3469 3470 timeout = hl_usecs64_to_jiffies(timeout_us); 3471 3472 hl_ctx_get(ctx); 3473 3474 pend = kzalloc(sizeof(*pend), GFP_KERNEL); 3475 if (!pend) { 3476 hl_ctx_put(ctx); 3477 return -ENOMEM; 3478 } 3479 3480 hl_fence_init(&pend->fence, ULONG_MAX); 3481 3482 /* Add pending user interrupt to relevant list for the interrupt 3483 * handler to monitor 3484 */ 3485 spin_lock(&interrupt->wait_list_lock); 3486 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); 3487 spin_unlock(&interrupt->wait_list_lock); 3488 3489 /* We check for completion value as interrupt could have been received 3490 * before we added the node to the wait list 3491 */ 3492 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { 3493 dev_err(hdev->dev, "Failed to copy completion value from user\n"); 3494 rc = -EFAULT; 3495 goto remove_pending_user_interrupt; 3496 } 3497 3498 if (completion_value >= target_value) { 3499 *status = HL_WAIT_CS_STATUS_COMPLETED; 3500 /* There was no interrupt, we assume the completion is now. */ 3501 pend->fence.timestamp = ktime_get(); 3502 } else { 3503 *status = HL_WAIT_CS_STATUS_BUSY; 3504 } 3505 3506 if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED)) 3507 goto remove_pending_user_interrupt; 3508 3509 wait_again: 3510 /* Wait for interrupt handler to signal completion */ 3511 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, 3512 timeout); 3513 3514 /* If timeout did not expire we need to perform the comparison. 3515 * If comparison fails, keep waiting until timeout expires 3516 */ 3517 if (completion_rc > 0) { 3518 spin_lock(&interrupt->wait_list_lock); 3519 /* reinit_completion must be called before we check for user 3520 * completion value, otherwise, if interrupt is received after 3521 * the comparison and before the next wait_for_completion, 3522 * we will reach timeout and fail 3523 */ 3524 reinit_completion(&pend->fence.completion); 3525 spin_unlock(&interrupt->wait_list_lock); 3526 3527 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { 3528 dev_err(hdev->dev, "Failed to copy completion value from user\n"); 3529 rc = -EFAULT; 3530 3531 goto remove_pending_user_interrupt; 3532 } 3533 3534 if (completion_value >= target_value) { 3535 *status = HL_WAIT_CS_STATUS_COMPLETED; 3536 } else if (pend->fence.error) { 3537 dev_err_ratelimited(hdev->dev, 3538 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", 3539 pend->fence.error); 3540 /* set the command completion status as ABORTED */ 3541 *status = HL_WAIT_CS_STATUS_ABORTED; 3542 } else { 3543 timeout = completion_rc; 3544 goto wait_again; 3545 } 3546 } else if (completion_rc == -ERESTARTSYS) { 3547 dev_err_ratelimited(hdev->dev, 3548 "user process got signal while waiting for interrupt ID %d\n", 3549 interrupt->interrupt_id); 3550 rc = -EINTR; 3551 } else { 3552 /* The wait has timed-out. We don't know anything beyond that 3553 * because the workload wasn't submitted through the driver. 3554 * Therefore, from driver's perspective, the workload is still 3555 * executing. 3556 */ 3557 rc = 0; 3558 *status = HL_WAIT_CS_STATUS_BUSY; 3559 } 3560 3561 remove_pending_user_interrupt: 3562 spin_lock(&interrupt->wait_list_lock); 3563 list_del(&pend->wait_list_node); 3564 spin_unlock(&interrupt->wait_list_lock); 3565 3566 *timestamp = ktime_to_ns(pend->fence.timestamp); 3567 3568 kfree(pend); 3569 hl_ctx_put(ctx); 3570 3571 return rc; 3572 } 3573 3574 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) 3575 { 3576 u16 interrupt_id, first_interrupt, last_interrupt; 3577 struct hl_device *hdev = hpriv->hdev; 3578 struct asic_fixed_properties *prop; 3579 struct hl_user_interrupt *interrupt; 3580 union hl_wait_cs_args *args = data; 3581 u32 status = HL_WAIT_CS_STATUS_BUSY; 3582 u64 timestamp = 0; 3583 int rc, int_idx; 3584 3585 prop = &hdev->asic_prop; 3586 3587 if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) { 3588 dev_err(hdev->dev, "no user interrupts allowed"); 3589 return -EPERM; 3590 } 3591 3592 interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); 3593 3594 first_interrupt = prop->first_available_user_interrupt; 3595 last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1; 3596 3597 if (interrupt_id < prop->user_dec_intr_count) { 3598 3599 /* Check if the requested core is enabled */ 3600 if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) { 3601 dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed", 3602 interrupt_id); 3603 return -EINVAL; 3604 } 3605 3606 interrupt = &hdev->user_interrupt[interrupt_id]; 3607 3608 } else if (interrupt_id >= first_interrupt && interrupt_id <= last_interrupt) { 3609 3610 int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count; 3611 interrupt = &hdev->user_interrupt[int_idx]; 3612 3613 } else if (interrupt_id == HL_COMMON_USER_CQ_INTERRUPT_ID) { 3614 interrupt = &hdev->common_user_cq_interrupt; 3615 } else if (interrupt_id == HL_COMMON_DEC_INTERRUPT_ID) { 3616 interrupt = &hdev->common_decoder_interrupt; 3617 } else { 3618 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); 3619 return -EINVAL; 3620 } 3621 3622 if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) 3623 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr, 3624 args->in.interrupt_timeout_us, args->in.cq_counters_handle, 3625 args->in.cq_counters_offset, 3626 args->in.target, interrupt, 3627 !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT), 3628 args->in.timestamp_handle, args->in.timestamp_offset, 3629 &status, ×tamp); 3630 else 3631 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, 3632 args->in.interrupt_timeout_us, args->in.addr, 3633 args->in.target, interrupt, &status, 3634 ×tamp); 3635 if (rc) 3636 return rc; 3637 3638 memset(args, 0, sizeof(*args)); 3639 args->out.status = status; 3640 3641 if (timestamp) { 3642 args->out.timestamp_nsec = timestamp; 3643 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 3644 } 3645 3646 return 0; 3647 } 3648 3649 int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data) 3650 { 3651 struct hl_device *hdev = hpriv->hdev; 3652 union hl_wait_cs_args *args = data; 3653 u32 flags = args->in.flags; 3654 int rc; 3655 3656 /* If the device is not operational, or if an error has happened and user should release the 3657 * device, there is no point in waiting for any command submission or user interrupt. 3658 */ 3659 if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active) 3660 return -EBUSY; 3661 3662 if (flags & HL_WAIT_CS_FLAGS_INTERRUPT) 3663 rc = hl_interrupt_wait_ioctl(hpriv, data); 3664 else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS) 3665 rc = hl_multi_cs_wait_ioctl(hpriv, data); 3666 else 3667 rc = hl_cs_wait_ioctl(hpriv, data); 3668 3669 return rc; 3670 } 3671