Lines Matching +full:cs +full:- +full:0

1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2021 HabanaLabs, Ltd.
23 * enum hl_cs_wait_status - cs wait status
24 * @CS_WAIT_STATUS_BUSY: cs was not completed yet
25 * @CS_WAIT_STATUS_COMPLETED: cs completed
26 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
47 * CS outcome store supports the following operations: in hl_push_cs_outcome()
48 * push outcome - store a recent CS outcome in the store in hl_push_cs_outcome()
49 * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store in hl_push_cs_outcome()
51 * It has a pre-allocated amount of nodes, each node stores in hl_push_cs_outcome()
52 * a single CS outcome. in hl_push_cs_outcome()
66 spin_lock_irqsave(&outcome_store->db_lock, flags); in hl_push_cs_outcome()
68 if (list_empty(&outcome_store->free_list)) { in hl_push_cs_outcome()
69 node = list_last_entry(&outcome_store->used_list, in hl_push_cs_outcome()
71 hash_del(&node->map_link); in hl_push_cs_outcome()
72 dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq); in hl_push_cs_outcome()
74 node = list_last_entry(&outcome_store->free_list, in hl_push_cs_outcome()
78 list_del_init(&node->list_link); in hl_push_cs_outcome()
80 node->seq = seq; in hl_push_cs_outcome()
81 node->ts = ts; in hl_push_cs_outcome()
82 node->error = error; in hl_push_cs_outcome()
84 list_add(&node->list_link, &outcome_store->used_list); in hl_push_cs_outcome()
85 hash_add(outcome_store->outcome_map, &node->map_link, node->seq); in hl_push_cs_outcome()
87 spin_unlock_irqrestore(&outcome_store->db_lock, flags); in hl_push_cs_outcome()
96 spin_lock_irqsave(&outcome_store->db_lock, flags); in hl_pop_cs_outcome()
98 hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq) in hl_pop_cs_outcome()
99 if (node->seq == seq) { in hl_pop_cs_outcome()
100 *ts = node->ts; in hl_pop_cs_outcome()
101 *error = node->error; in hl_pop_cs_outcome()
103 hash_del(&node->map_link); in hl_pop_cs_outcome()
104 list_del_init(&node->list_link); in hl_pop_cs_outcome()
105 list_add(&node->list_link, &outcome_store->free_list); in hl_pop_cs_outcome()
107 spin_unlock_irqrestore(&outcome_store->db_lock, flags); in hl_pop_cs_outcome()
112 spin_unlock_irqrestore(&outcome_store->db_lock, flags); in hl_pop_cs_outcome()
121 struct hl_device *hdev = hw_sob->hdev; in hl_sob_reset()
123 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id); in hl_sob_reset()
125 hdev->asic_funcs->reset_sob(hdev, hw_sob); in hl_sob_reset()
127 hw_sob->need_reset = false; in hl_sob_reset()
134 struct hl_device *hdev = hw_sob->hdev; in hl_sob_reset_error()
136 dev_crit(hdev->dev, in hl_sob_reset_error()
138 hw_sob->q_idx, hw_sob->sob_id); in hl_sob_reset_error()
144 kref_put(&hw_sob->kref, hl_sob_reset); in hw_sob_put()
150 kref_put(&hw_sob->kref, hl_sob_reset_error); in hw_sob_put_err()
156 kref_get(&hw_sob->kref); in hw_sob_get()
160 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
165 * Return: 0 if given parameters are valid
171 if (sob_mask == 0) in hl_gen_sob_mask()
172 return -EINVAL; in hl_gen_sob_mask()
174 if (sob_mask == 0x1) { in hl_gen_sob_mask()
175 *mask = ~(1 << (sob_base & 0x7)); in hl_gen_sob_mask()
178 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--) in hl_gen_sob_mask()
182 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1)) in hl_gen_sob_mask()
183 return -EINVAL; in hl_gen_sob_mask()
188 return 0; in hl_gen_sob_mask()
205 kref_put(&fence->refcount, hl_fence_release); in hl_fence_put()
212 for (i = 0; i < len; i++, fence++) in hl_fences_put()
219 kref_get(&fence->refcount); in hl_fence_get()
224 kref_init(&fence->refcount); in hl_fence_init()
225 fence->cs_sequence = sequence; in hl_fence_init()
226 fence->error = 0; in hl_fence_init()
227 fence->timestamp = ktime_set(0, 0); in hl_fence_init()
228 fence->mcs_handling_done = false; in hl_fence_init()
229 init_completion(&fence->completion); in hl_fence_init()
232 void cs_get(struct hl_cs *cs) in cs_get() argument
234 kref_get(&cs->refcount); in cs_get()
237 static int cs_get_unless_zero(struct hl_cs *cs) in cs_get_unless_zero() argument
239 return kref_get_unless_zero(&cs->refcount); in cs_get_unless_zero()
242 static void cs_put(struct hl_cs *cs) in cs_put() argument
244 kref_put(&cs->refcount, cs_do_release); in cs_put()
256 kref_put(&job->refcount, cs_job_do_release); in hl_cs_job_put()
259 bool cs_needs_completion(struct hl_cs *cs) in cs_needs_completion() argument
261 /* In case this is a staged CS, only the last CS in sequence should in cs_needs_completion()
262 * get a completion, any non staged CS will always get a completion in cs_needs_completion()
264 if (cs->staged_cs && !cs->staged_last) in cs_needs_completion()
270 bool cs_needs_timeout(struct hl_cs *cs) in cs_needs_timeout() argument
272 /* In case this is a staged CS, only the first CS in sequence should in cs_needs_timeout()
273 * get a timeout, any non staged CS will always get a timeout in cs_needs_timeout()
275 if (cs->staged_cs && !cs->staged_first) in cs_needs_timeout()
284 return (job->queue_type == QUEUE_TYPE_EXT); in is_cb_patched()
288 * cs_parser - parse the user command submission
300 struct hl_device *hdev = hpriv->hdev; in cs_parser()
304 parser.ctx_id = job->cs->ctx->asid; in cs_parser()
305 parser.cs_sequence = job->cs->sequence; in cs_parser()
306 parser.job_id = job->id; in cs_parser()
308 parser.hw_queue_id = job->hw_queue_id; in cs_parser()
309 parser.job_userptr_list = &job->userptr_list; in cs_parser()
311 parser.user_cb = job->user_cb; in cs_parser()
312 parser.user_cb_size = job->user_cb_size; in cs_parser()
313 parser.queue_type = job->queue_type; in cs_parser()
314 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; in cs_parser()
315 job->patched_cb = NULL; in cs_parser()
316 parser.completion = cs_needs_completion(job->cs); in cs_parser()
318 rc = hdev->asic_funcs->cs_parser(hdev, &parser); in cs_parser()
322 job->patched_cb = parser.patched_cb; in cs_parser()
323 job->job_cb_size = parser.patched_cb_size; in cs_parser()
324 job->contains_dma_pkt = parser.contains_dma_pkt; in cs_parser()
325 atomic_inc(&job->patched_cb->cs_cnt); in cs_parser()
331 * won't be accessed again for this CS in cs_parser()
333 atomic_dec(&job->user_cb->cs_cnt); in cs_parser()
334 hl_cb_put(job->user_cb); in cs_parser()
335 job->user_cb = NULL; in cs_parser()
337 job->job_cb_size = job->user_cb_size; in cs_parser()
345 struct hl_cs *cs = job->cs; in hl_complete_job() local
348 hl_userptr_delete_list(hdev, &job->userptr_list); in hl_complete_job()
354 if (job->patched_cb) { in hl_complete_job()
355 atomic_dec(&job->patched_cb->cs_cnt); in hl_complete_job()
356 hl_cb_put(job->patched_cb); in hl_complete_job()
365 if (job->is_kernel_allocated_cb && in hl_complete_job()
366 (job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) { in hl_complete_job()
367 atomic_dec(&job->user_cb->cs_cnt); in hl_complete_job()
368 hl_cb_put(job->user_cb); in hl_complete_job()
375 spin_lock(&cs->job_lock); in hl_complete_job()
376 list_del(&job->cs_node); in hl_complete_job()
377 spin_unlock(&cs->job_lock); in hl_complete_job()
381 /* We decrement reference only for a CS that gets completion in hl_complete_job()
382 * because the reference was incremented only for this kind of CS in hl_complete_job()
385 * In staged submission, only the last CS marked as 'staged_last' in hl_complete_job()
387 * As for all the rest CS's in the staged submission which do not get in hl_complete_job()
388 * completion, their CS reference will be decremented by the in hl_complete_job()
389 * 'staged_last' CS during the CS release flow. in hl_complete_job()
390 * All relevant PQ CI counters will be incremented during the CS release in hl_complete_job()
393 if (cs_needs_completion(cs) && in hl_complete_job()
394 (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) { in hl_complete_job()
396 /* In CS based completions, the timestamp is already available, in hl_complete_job()
399 if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB) in hl_complete_job()
400 cs->completion_timestamp = job->timestamp; in hl_complete_job()
402 cs_put(cs); in hl_complete_job()
409 * hl_staged_cs_find_first - locate the first CS in this staged submission
414 * @note: This function must be called under 'hdev->cs_mirror_lock'
416 * Find and return a CS pointer with the given sequence
420 struct hl_cs *cs; in hl_staged_cs_find_first() local
422 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node) in hl_staged_cs_find_first()
423 if (cs->staged_cs && cs->staged_first && in hl_staged_cs_find_first()
424 cs->sequence == cs_seq) in hl_staged_cs_find_first()
425 return cs; in hl_staged_cs_find_first()
431 * is_staged_cs_last_exists - returns true if the last CS in sequence exists
434 * @cs: staged submission member
437 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs) in is_staged_cs_last_exists() argument
441 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs, in is_staged_cs_last_exists()
444 if (last_entry->staged_last) in is_staged_cs_last_exists()
451 * staged_cs_get - get CS reference if this CS is a part of a staged CS
454 * @cs: current CS
457 * Increment CS reference for every CS in this staged submission except for
458 * the CS which get completion.
460 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs) in staged_cs_get() argument
462 /* Only the last CS in this staged submission will get a completion. in staged_cs_get()
463 * We must increment the reference for all other CS's in this in staged_cs_get()
467 if (!cs->staged_last) in staged_cs_get()
468 cs_get(cs); in staged_cs_get()
472 * staged_cs_put - put a CS in case it is part of staged submission
475 * @cs: CS to put
477 * This function decrements a CS reference (for a non completion CS)
479 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) in staged_cs_put() argument
481 /* We release all CS's in a staged submission except the last in staged_cs_put()
482 * CS which we have never incremented its reference. in staged_cs_put()
484 if (!cs_needs_completion(cs)) in staged_cs_put()
485 cs_put(cs); in staged_cs_put()
488 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) in cs_handle_tdr() argument
492 if (!cs_needs_timeout(cs)) in cs_handle_tdr()
495 spin_lock(&hdev->cs_mirror_lock); in cs_handle_tdr()
498 * Hence, we choose the CS that reaches this function first which is in cs_handle_tdr()
499 * the CS marked as 'staged_last'. in cs_handle_tdr()
500 * In case single staged cs was submitted which has both first and last in cs_handle_tdr()
502 * removed the cs node from the list before getting here, in cs_handle_tdr()
503 * in such cases just continue with the cs to cancel it's TDR work. in cs_handle_tdr()
505 if (cs->staged_cs && cs->staged_last) { in cs_handle_tdr()
506 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); in cs_handle_tdr()
508 cs = first_cs; in cs_handle_tdr()
511 spin_unlock(&hdev->cs_mirror_lock); in cs_handle_tdr()
513 /* Don't cancel TDR in case this CS was timedout because we might be in cs_handle_tdr()
516 if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT) in cs_handle_tdr()
519 if (cs->tdr_active) in cs_handle_tdr()
520 cancel_delayed_work_sync(&cs->work_tdr); in cs_handle_tdr()
522 spin_lock(&hdev->cs_mirror_lock); in cs_handle_tdr()
524 /* queue TDR for next CS */ in cs_handle_tdr()
525 list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node) in cs_handle_tdr()
531 if (next && !next->tdr_active) { in cs_handle_tdr()
532 next->tdr_active = true; in cs_handle_tdr()
533 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); in cs_handle_tdr()
536 spin_unlock(&hdev->cs_mirror_lock); in cs_handle_tdr()
540 * force_complete_multi_cs - complete all contexts that wait on multi-CS
548 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in force_complete_multi_cs()
551 mcs_compl = &hdev->multi_cs_completion[i]; in force_complete_multi_cs()
553 spin_lock(&mcs_compl->lock); in force_complete_multi_cs()
555 if (!mcs_compl->used) { in force_complete_multi_cs()
556 spin_unlock(&mcs_compl->lock); in force_complete_multi_cs()
561 * multi-cS. in force_complete_multi_cs()
565 dev_err(hdev->dev, in force_complete_multi_cs()
566 "multi-CS completion context %d still waiting when calling force completion\n", in force_complete_multi_cs()
568 complete_all(&mcs_compl->completion); in force_complete_multi_cs()
569 spin_unlock(&mcs_compl->lock); in force_complete_multi_cs()
574 * complete_multi_cs - complete all waiting entities on multi-CS
577 * @cs: CS structure
579 * with the completed CS.
581 * - a completed CS worked on stream master QID 4, multi CS completion
584 * - a completed CS worked on stream master QID 4, multi CS completion
588 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) in complete_multi_cs() argument
590 struct hl_fence *fence = cs->fence; in complete_multi_cs()
593 /* in case of multi CS check for completion only for the first CS */ in complete_multi_cs()
594 if (cs->staged_cs && !cs->staged_first) in complete_multi_cs()
597 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in complete_multi_cs()
600 mcs_compl = &hdev->multi_cs_completion[i]; in complete_multi_cs()
601 if (!mcs_compl->used) in complete_multi_cs()
604 spin_lock(&mcs_compl->lock); in complete_multi_cs()
609 * 2. the completed CS has at least one overlapping stream in complete_multi_cs()
612 if (mcs_compl->used && in complete_multi_cs()
613 (fence->stream_master_qid_map & in complete_multi_cs()
614 mcs_compl->stream_master_qid_map)) { in complete_multi_cs()
615 /* extract the timestamp only of first completed CS */ in complete_multi_cs()
616 if (!mcs_compl->timestamp) in complete_multi_cs()
617 mcs_compl->timestamp = ktime_to_ns(fence->timestamp); in complete_multi_cs()
619 complete_all(&mcs_compl->completion); in complete_multi_cs()
625 * least one CS will be set as completed when polling in complete_multi_cs()
628 fence->mcs_handling_done = true; in complete_multi_cs()
631 spin_unlock(&mcs_compl->lock); in complete_multi_cs()
633 /* In case CS completed without mcs completion initialized */ in complete_multi_cs()
634 fence->mcs_handling_done = true; in complete_multi_cs()
638 struct hl_cs *cs, in cs_release_sob_reset_handler() argument
641 /* Skip this handler if the cs wasn't submitted, to avoid putting in cs_release_sob_reset_handler()
645 if (!hl_cs_cmpl->hw_sob || !cs->submitted) in cs_release_sob_reset_handler()
648 spin_lock(&hl_cs_cmpl->lock); in cs_release_sob_reset_handler()
651 * we get refcount upon reservation of signals or signal/wait cs for the in cs_release_sob_reset_handler()
652 * hw_sob object, and need to put it when the first staged cs in cs_release_sob_reset_handler()
653 * (which contains the encaps signals) or cs signal/wait is completed. in cs_release_sob_reset_handler()
655 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || in cs_release_sob_reset_handler()
656 (hl_cs_cmpl->type == CS_TYPE_WAIT) || in cs_release_sob_reset_handler()
657 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) || in cs_release_sob_reset_handler()
658 (!!hl_cs_cmpl->encaps_signals)) { in cs_release_sob_reset_handler()
659 dev_dbg(hdev->dev, in cs_release_sob_reset_handler()
660 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n", in cs_release_sob_reset_handler()
661 hl_cs_cmpl->cs_seq, in cs_release_sob_reset_handler()
662 hl_cs_cmpl->type, in cs_release_sob_reset_handler()
663 hl_cs_cmpl->hw_sob->sob_id, in cs_release_sob_reset_handler()
664 hl_cs_cmpl->sob_val); in cs_release_sob_reset_handler()
666 hw_sob_put(hl_cs_cmpl->hw_sob); in cs_release_sob_reset_handler()
668 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) in cs_release_sob_reset_handler()
669 hdev->asic_funcs->reset_sob_group(hdev, in cs_release_sob_reset_handler()
670 hl_cs_cmpl->sob_group); in cs_release_sob_reset_handler()
673 spin_unlock(&hl_cs_cmpl->lock); in cs_release_sob_reset_handler()
678 struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); in cs_do_release() local
679 struct hl_device *hdev = cs->ctx->hdev; in cs_do_release()
682 container_of(cs->fence, struct hl_cs_compl, base_fence); in cs_do_release()
684 cs->completed = true; in cs_do_release()
688 * finished, because each one of them took refcnt to CS, we still in cs_do_release()
690 * will have leaked memory and what's worse, the CS object (and in cs_do_release()
694 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_do_release()
697 if (!cs->submitted) { in cs_do_release()
699 * In case the wait for signal CS was submitted, the fence put in cs_do_release()
703 if (cs->type == CS_TYPE_WAIT || in cs_do_release()
704 cs->type == CS_TYPE_COLLECTIVE_WAIT) in cs_do_release()
705 hl_fence_put(cs->signal_fence); in cs_do_release()
711 hl_hw_queue_update_ci(cs); in cs_do_release()
713 /* remove CS from CS mirror list */ in cs_do_release()
714 spin_lock(&hdev->cs_mirror_lock); in cs_do_release()
715 list_del_init(&cs->mirror_node); in cs_do_release()
716 spin_unlock(&hdev->cs_mirror_lock); in cs_do_release()
718 cs_handle_tdr(hdev, cs); in cs_do_release()
720 if (cs->staged_cs) { in cs_do_release()
721 /* the completion CS decrements reference for the entire in cs_do_release()
724 if (cs->staged_last) { in cs_do_release()
728 &cs->staged_cs_node, staged_cs_node) in cs_do_release()
732 /* A staged CS will be a member in the list only after it in cs_do_release()
736 if (cs->submitted) { in cs_do_release()
737 spin_lock(&hdev->cs_mirror_lock); in cs_do_release()
738 list_del(&cs->staged_cs_node); in cs_do_release()
739 spin_unlock(&hdev->cs_mirror_lock); in cs_do_release()
742 /* decrement refcount to handle when first staged cs in cs_do_release()
745 if (hl_cs_cmpl->encaps_signals) in cs_do_release()
746 kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount, in cs_do_release()
750 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals) in cs_do_release()
751 kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); in cs_do_release()
757 hl_debugfs_remove_cs(cs); in cs_do_release()
759 hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL; in cs_do_release()
765 if (cs->timedout) in cs_do_release()
766 cs->fence->error = -ETIMEDOUT; in cs_do_release()
767 else if (cs->aborted) in cs_do_release()
768 cs->fence->error = -EIO; in cs_do_release()
769 else if (!cs->submitted) in cs_do_release()
770 cs->fence->error = -EBUSY; in cs_do_release()
772 if (unlikely(cs->skip_reset_on_timeout)) { in cs_do_release()
773 dev_err(hdev->dev, in cs_do_release()
775 cs->sequence, in cs_do_release()
776 div_u64(jiffies - cs->submission_time_jiffies, HZ)); in cs_do_release()
779 if (cs->timestamp) { in cs_do_release()
780 cs->fence->timestamp = cs->completion_timestamp; in cs_do_release()
781 hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence, in cs_do_release()
782 cs->fence->timestamp, cs->fence->error); in cs_do_release()
785 hl_ctx_put(cs->ctx); in cs_do_release()
787 complete_all(&cs->fence->completion); in cs_do_release()
788 complete_multi_cs(hdev, cs); in cs_do_release()
790 cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl); in cs_do_release()
792 hl_fence_put(cs->fence); in cs_do_release()
794 kfree(cs->jobs_in_queue_cnt); in cs_do_release()
795 kfree(cs); in cs_do_release()
800 struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); in cs_timedout() local
803 u64 event_mask = 0x0; in cs_timedout()
807 skip_reset_on_timeout = cs->skip_reset_on_timeout; in cs_timedout()
809 rc = cs_get_unless_zero(cs); in cs_timedout()
813 if ((!cs->submitted) || (cs->completed)) { in cs_timedout()
814 cs_put(cs); in cs_timedout()
818 hdev = cs->ctx->hdev; in cs_timedout()
821 if (hdev->reset_on_lockup) in cs_timedout()
824 hdev->reset_info.needs_reset = true; in cs_timedout()
826 /* Mark the CS is timed out so we won't try to cancel its TDR */ in cs_timedout()
827 cs->timedout = true; in cs_timedout()
830 /* Save only the first CS timeout parameters */ in cs_timedout()
831 rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0); in cs_timedout()
833 hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); in cs_timedout()
834 hdev->captured_err_info.cs_timeout.seq = cs->sequence; in cs_timedout()
838 timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000; in cs_timedout()
840 switch (cs->type) { in cs_timedout()
842 dev_err(hdev->dev, in cs_timedout()
844 cs->sequence, timeout_sec); in cs_timedout()
848 dev_err(hdev->dev, in cs_timedout()
850 cs->sequence, timeout_sec); in cs_timedout()
854 dev_err(hdev->dev, in cs_timedout()
856 cs->sequence, timeout_sec); in cs_timedout()
860 dev_err(hdev->dev, in cs_timedout()
862 cs->sequence, timeout_sec); in cs_timedout()
868 dev_err(hdev->dev, "Error during system state dump %d\n", rc); in cs_timedout()
870 cs_put(cs); in cs_timedout()
887 struct hl_cs *cs; in allocate_cs() local
890 cntr = &hdev->aggregated_cs_counters; in allocate_cs()
892 cs = kzalloc(sizeof(*cs), GFP_ATOMIC); in allocate_cs()
893 if (!cs) in allocate_cs()
894 cs = kzalloc(sizeof(*cs), GFP_KERNEL); in allocate_cs()
896 if (!cs) { in allocate_cs()
897 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
898 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
899 return -ENOMEM; in allocate_cs()
905 cs->ctx = ctx; in allocate_cs()
906 cs->submitted = false; in allocate_cs()
907 cs->completed = false; in allocate_cs()
908 cs->type = cs_type; in allocate_cs()
909 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); in allocate_cs()
910 cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); in allocate_cs()
911 cs->timeout_jiffies = timeout; in allocate_cs()
912 cs->skip_reset_on_timeout = in allocate_cs()
913 hdev->reset_info.skip_reset_on_timeout || in allocate_cs()
915 cs->submission_time_jiffies = jiffies; in allocate_cs()
916 INIT_LIST_HEAD(&cs->job_list); in allocate_cs()
917 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); in allocate_cs()
918 kref_init(&cs->refcount); in allocate_cs()
919 spin_lock_init(&cs->job_lock); in allocate_cs()
926 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
927 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
928 rc = -ENOMEM; in allocate_cs()
932 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, in allocate_cs()
933 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); in allocate_cs()
934 if (!cs->jobs_in_queue_cnt) in allocate_cs()
935 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, in allocate_cs()
936 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); in allocate_cs()
938 if (!cs->jobs_in_queue_cnt) { in allocate_cs()
939 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
940 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
941 rc = -ENOMEM; in allocate_cs()
945 cs_cmpl->hdev = hdev; in allocate_cs()
946 cs_cmpl->type = cs->type; in allocate_cs()
947 spin_lock_init(&cs_cmpl->lock); in allocate_cs()
948 cs->fence = &cs_cmpl->base_fence; in allocate_cs()
950 spin_lock(&ctx->cs_lock); in allocate_cs()
952 cs_cmpl->cs_seq = ctx->cs_sequence; in allocate_cs()
953 other = ctx->cs_pending[cs_cmpl->cs_seq & in allocate_cs()
954 (hdev->asic_prop.max_pending_cs - 1)]; in allocate_cs()
956 if (other && !completion_done(&other->completion)) { in allocate_cs()
961 * This causes a deadlock because this CS will never be in allocate_cs()
962 * completed as it depends on future CS's for completion. in allocate_cs()
964 if (other->cs_sequence == user_sequence) in allocate_cs()
965 dev_crit_ratelimited(hdev->dev, in allocate_cs()
966 "Staged CS %llu deadlock due to lack of resources", in allocate_cs()
969 dev_dbg_ratelimited(hdev->dev, in allocate_cs()
970 "Rejecting CS because of too many in-flights CS\n"); in allocate_cs()
971 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt); in allocate_cs()
972 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt); in allocate_cs()
973 rc = -EAGAIN; in allocate_cs()
978 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); in allocate_cs()
980 cs->sequence = cs_cmpl->cs_seq; in allocate_cs()
982 ctx->cs_pending[cs_cmpl->cs_seq & in allocate_cs()
983 (hdev->asic_prop.max_pending_cs - 1)] = in allocate_cs()
984 &cs_cmpl->base_fence; in allocate_cs()
985 ctx->cs_sequence++; in allocate_cs()
987 hl_fence_get(&cs_cmpl->base_fence); in allocate_cs()
991 spin_unlock(&ctx->cs_lock); in allocate_cs()
993 *cs_new = cs; in allocate_cs()
995 return 0; in allocate_cs()
998 spin_unlock(&ctx->cs_lock); in allocate_cs()
999 kfree(cs->jobs_in_queue_cnt); in allocate_cs()
1003 kfree(cs); in allocate_cs()
1008 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) in cs_rollback() argument
1012 staged_cs_put(hdev, cs); in cs_rollback()
1014 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_rollback()
1019 * release_reserved_encaps_signals() - release reserved encapsulated signals.
1022 * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with
1023 * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back.
1037 mgr = &ctx->sig_mgr; in release_reserved_encaps_signals()
1039 idr_for_each_entry(&mgr->handles, handle, id) in release_reserved_encaps_signals()
1040 if (handle->cs_seq == ULLONG_MAX) in release_reserved_encaps_signals()
1041 kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx); in release_reserved_encaps_signals()
1049 struct hl_cs *cs, *tmp; in hl_cs_rollback_all() local
1052 flush_workqueue(hdev->ts_free_obj_wq); in hl_cs_rollback_all()
1054 /* flush all completions before iterating over the CS mirror list in in hl_cs_rollback_all()
1057 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) in hl_cs_rollback_all()
1058 flush_workqueue(hdev->cq_wq[i]); in hl_cs_rollback_all()
1060 flush_workqueue(hdev->cs_cmplt_wq); in hl_cs_rollback_all()
1063 /* Make sure we don't have leftovers in the CS mirror list */ in hl_cs_rollback_all()
1064 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { in hl_cs_rollback_all()
1065 cs_get(cs); in hl_cs_rollback_all()
1066 cs->aborted = true; in hl_cs_rollback_all()
1067 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", in hl_cs_rollback_all()
1068 cs->ctx->asid, cs->sequence); in hl_cs_rollback_all()
1069 cs_rollback(hdev, cs); in hl_cs_rollback_all()
1070 cs_put(cs); in hl_cs_rollback_all()
1083 spin_lock(&interrupt->wait_list_lock); in wake_pending_user_interrupt_threads()
1084 list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { in wake_pending_user_interrupt_threads()
1085 if (pend->ts_reg_info.buf) { in wake_pending_user_interrupt_threads()
1086 list_del(&pend->wait_list_node); in wake_pending_user_interrupt_threads()
1087 hl_mmap_mem_buf_put(pend->ts_reg_info.buf); in wake_pending_user_interrupt_threads()
1088 hl_cb_put(pend->ts_reg_info.cq_cb); in wake_pending_user_interrupt_threads()
1090 pend->fence.error = -EIO; in wake_pending_user_interrupt_threads()
1091 complete_all(&pend->fence.completion); in wake_pending_user_interrupt_threads()
1094 spin_unlock(&interrupt->wait_list_lock); in wake_pending_user_interrupt_threads()
1099 struct asic_fixed_properties *prop = &hdev->asic_prop; in hl_release_pending_user_interrupts()
1103 if (!prop->user_interrupt_count) in hl_release_pending_user_interrupts()
1113 for (i = 0 ; i < prop->user_interrupt_count ; i++) { in hl_release_pending_user_interrupts()
1114 interrupt = &hdev->user_interrupt[i]; in hl_release_pending_user_interrupts()
1118 interrupt = &hdev->common_user_cq_interrupt; in hl_release_pending_user_interrupts()
1121 interrupt = &hdev->common_decoder_interrupt; in hl_release_pending_user_interrupts()
1127 struct hl_cs *cs; in force_complete_cs() local
1129 spin_lock(&hdev->cs_mirror_lock); in force_complete_cs()
1131 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) { in force_complete_cs()
1132 cs->fence->error = -EIO; in force_complete_cs()
1133 complete_all(&cs->fence->completion); in force_complete_cs()
1136 spin_unlock(&hdev->cs_mirror_lock); in force_complete_cs()
1149 struct hl_cs *cs = job->cs; in job_wq_completion() local
1150 struct hl_device *hdev = cs->ctx->hdev; in job_wq_completion()
1158 struct hl_cs *cs = container_of(work, struct hl_cs, finish_work); in cs_completion() local
1159 struct hl_device *hdev = cs->ctx->hdev; in cs_completion()
1162 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_completion()
1168 u32 active_cs_num = 0; in hl_get_active_cs_num()
1169 struct hl_cs *cs; in hl_get_active_cs_num() local
1171 spin_lock(&hdev->cs_mirror_lock); in hl_get_active_cs_num()
1173 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) in hl_get_active_cs_num()
1174 if (!cs->completed) in hl_get_active_cs_num()
1177 spin_unlock(&hdev->cs_mirror_lock); in hl_get_active_cs_num()
1187 struct asic_fixed_properties *asic = &hdev->asic_prop; in validate_queue_index()
1190 /* This must be checked here to prevent out-of-bounds access to in validate_queue_index()
1193 if (chunk->queue_index >= asic->max_queues) { in validate_queue_index()
1194 dev_err(hdev->dev, "Queue index %d is invalid\n", in validate_queue_index()
1195 chunk->queue_index); in validate_queue_index()
1196 return -EINVAL; in validate_queue_index()
1199 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; in validate_queue_index()
1201 if (hw_queue_prop->type == QUEUE_TYPE_NA) { in validate_queue_index()
1202 dev_err(hdev->dev, "Queue index %d is not applicable\n", in validate_queue_index()
1203 chunk->queue_index); in validate_queue_index()
1204 return -EINVAL; in validate_queue_index()
1207 if (hw_queue_prop->binned) { in validate_queue_index()
1208 dev_err(hdev->dev, "Queue index %d is binned out\n", in validate_queue_index()
1209 chunk->queue_index); in validate_queue_index()
1210 return -EINVAL; in validate_queue_index()
1213 if (hw_queue_prop->driver_only) { in validate_queue_index()
1214 dev_err(hdev->dev, in validate_queue_index()
1216 chunk->queue_index); in validate_queue_index()
1217 return -EINVAL; in validate_queue_index()
1223 if (hw_queue_prop->type == QUEUE_TYPE_HW) { in validate_queue_index()
1224 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { in validate_queue_index()
1225 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { in validate_queue_index()
1226 dev_err(hdev->dev, in validate_queue_index()
1228 chunk->queue_index); in validate_queue_index()
1229 return -EINVAL; in validate_queue_index()
1234 if (!(hw_queue_prop->cb_alloc_flags & in validate_queue_index()
1236 dev_err(hdev->dev, in validate_queue_index()
1238 chunk->queue_index); in validate_queue_index()
1239 return -EINVAL; in validate_queue_index()
1245 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags in validate_queue_index()
1249 *queue_type = hw_queue_prop->type; in validate_queue_index()
1250 return 0; in validate_queue_index()
1259 cb = hl_cb_get(mmg, chunk->cb_handle); in get_cb_from_cs_chunk()
1261 dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle); in get_cb_from_cs_chunk()
1265 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { in get_cb_from_cs_chunk()
1266 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); in get_cb_from_cs_chunk()
1270 atomic_inc(&cb->cs_cnt); in get_cb_from_cs_chunk()
1291 kref_init(&job->refcount); in hl_cs_allocate_job()
1292 job->queue_type = queue_type; in hl_cs_allocate_job()
1293 job->is_kernel_allocated_cb = is_kernel_allocated_cb; in hl_cs_allocate_job()
1296 INIT_LIST_HEAD(&job->userptr_list); in hl_cs_allocate_job()
1298 if (job->queue_type == QUEUE_TYPE_EXT) in hl_cs_allocate_job()
1299 INIT_WORK(&job->finish_work, job_wq_completion); in hl_cs_allocate_job()
1328 struct hl_device *hdev = hpriv->hdev; in hl_cs_sanity_checks()
1329 struct hl_ctx *ctx = hpriv->ctx; in hl_cs_sanity_checks()
1336 for (i = 0 ; i < sizeof(args->in.pad) ; i++) in hl_cs_sanity_checks()
1337 if (args->in.pad[i]) { in hl_cs_sanity_checks()
1338 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); in hl_cs_sanity_checks()
1339 return -EINVAL; in hl_cs_sanity_checks()
1343 return -EBUSY; in hl_cs_sanity_checks()
1346 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && in hl_cs_sanity_checks()
1347 !hdev->supports_staged_submission) { in hl_cs_sanity_checks()
1348 dev_err(hdev->dev, "staged submission not supported"); in hl_cs_sanity_checks()
1349 return -EPERM; in hl_cs_sanity_checks()
1352 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; in hl_cs_sanity_checks()
1355 dev_err(hdev->dev, in hl_cs_sanity_checks()
1356 "CS type flags are mutually exclusive, context %d\n", in hl_cs_sanity_checks()
1357 ctx->asid); in hl_cs_sanity_checks()
1358 return -EINVAL; in hl_cs_sanity_checks()
1362 num_chunks = args->in.num_chunks_execute; in hl_cs_sanity_checks()
1367 if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { in hl_cs_sanity_checks()
1368 dev_err(hdev->dev, "Sync stream CS is not supported\n"); in hl_cs_sanity_checks()
1369 return -EINVAL; in hl_cs_sanity_checks()
1374 dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); in hl_cs_sanity_checks()
1375 return -EINVAL; in hl_cs_sanity_checks()
1378 dev_err(hdev->dev, in hl_cs_sanity_checks()
1379 "Sync stream CS mandates one chunk only, context %d\n", in hl_cs_sanity_checks()
1380 ctx->asid); in hl_cs_sanity_checks()
1381 return -EINVAL; in hl_cs_sanity_checks()
1384 return 0; in hl_cs_sanity_checks()
1395 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1396 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1397 dev_err(hdev->dev, in hl_cs_copy_chunk_array()
1400 return -EINVAL; in hl_cs_copy_chunk_array()
1409 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in hl_cs_copy_chunk_array()
1410 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); in hl_cs_copy_chunk_array()
1411 return -ENOMEM; in hl_cs_copy_chunk_array()
1416 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1417 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1418 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); in hl_cs_copy_chunk_array()
1420 return -EFAULT; in hl_cs_copy_chunk_array()
1423 return 0; in hl_cs_copy_chunk_array()
1426 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, in cs_staged_submission() argument
1431 return 0; in cs_staged_submission()
1433 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST); in cs_staged_submission()
1434 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST); in cs_staged_submission()
1436 if (cs->staged_first) { in cs_staged_submission()
1437 /* Staged CS sequence is the first CS sequence */ in cs_staged_submission()
1438 INIT_LIST_HEAD(&cs->staged_cs_node); in cs_staged_submission()
1439 cs->staged_sequence = cs->sequence; in cs_staged_submission()
1441 if (cs->encaps_signals) in cs_staged_submission()
1442 cs->encaps_sig_hdl_id = encaps_signal_handle; in cs_staged_submission()
1447 cs->staged_sequence = sequence; in cs_staged_submission()
1450 /* Increment CS reference if needed */ in cs_staged_submission()
1451 staged_cs_get(hdev, cs); in cs_staged_submission()
1453 cs->staged_cs = true; in cs_staged_submission()
1455 return 0; in cs_staged_submission()
1462 for (i = 0; i < hdev->stream_master_qid_arr_size; i++) in get_stream_master_qid_mask()
1463 if (qid == hdev->stream_master_qid_arr[i]) in get_stream_master_qid_mask()
1466 return 0; in get_stream_master_qid_mask()
1475 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_default()
1478 struct hl_ctx *ctx = hpriv->ctx; in cs_ioctl_default()
1480 struct hl_cs *cs; in cs_ioctl_default() local
1483 u8 stream_master_qid_map = 0; in cs_ioctl_default()
1486 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_default()
1491 hpriv->ctx); in cs_ioctl_default()
1501 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, in cs_ioctl_default()
1502 staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, in cs_ioctl_default()
1507 *cs_seq = cs->sequence; in cs_ioctl_default()
1509 hl_debugfs_add_cs(cs); in cs_ioctl_default()
1511 rc = cs_staged_submission(hdev, cs, user_sequence, flags, in cs_ioctl_default()
1517 * rather than the internal CS sequence in cs_ioctl_default()
1519 if (cs->staged_cs) in cs_ioctl_default()
1520 *cs_seq = cs->staged_sequence; in cs_ioctl_default()
1522 /* Validate ALL the CS chunks before submitting the CS */ in cs_ioctl_default()
1523 for (i = 0 ; i < num_chunks ; i++) { in cs_ioctl_default()
1531 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1532 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1537 cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk); in cs_ioctl_default()
1540 &ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1541 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1542 rc = -EINVAL; in cs_ioctl_default()
1546 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; in cs_ioctl_default()
1555 * queues of this CS in cs_ioctl_default()
1557 if (hdev->supports_wait_for_multi_cs) in cs_ioctl_default()
1560 chunk->queue_index); in cs_ioctl_default()
1569 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_default()
1570 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_default()
1571 dev_err(hdev->dev, "Failed to allocate a new job\n"); in cs_ioctl_default()
1572 rc = -ENOMEM; in cs_ioctl_default()
1579 job->id = i + 1; in cs_ioctl_default()
1580 job->cs = cs; in cs_ioctl_default()
1581 job->user_cb = cb; in cs_ioctl_default()
1582 job->user_cb_size = chunk->cb_size; in cs_ioctl_default()
1583 job->hw_queue_id = chunk->queue_index; in cs_ioctl_default()
1585 cs->jobs_in_queue_cnt[job->hw_queue_id]++; in cs_ioctl_default()
1586 cs->jobs_cnt++; in cs_ioctl_default()
1588 list_add_tail(&job->cs_node, &cs->job_list); in cs_ioctl_default()
1591 * Increment CS reference. When CS reference is 0, CS is in cs_ioctl_default()
1596 if (cs_needs_completion(cs) && in cs_ioctl_default()
1597 (job->queue_type == QUEUE_TYPE_EXT || in cs_ioctl_default()
1598 job->queue_type == QUEUE_TYPE_HW)) in cs_ioctl_default()
1599 cs_get(cs); in cs_ioctl_default()
1605 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); in cs_ioctl_default()
1606 atomic64_inc(&cntr->parsing_drop_cnt); in cs_ioctl_default()
1607 dev_err(hdev->dev, in cs_ioctl_default()
1608 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", in cs_ioctl_default()
1609 cs->ctx->asid, cs->sequence, job->id, rc); in cs_ioctl_default()
1614 /* We allow a CS with any queue type combination as long as it does in cs_ioctl_default()
1617 if (int_queues_only && cs_needs_completion(cs)) { in cs_ioctl_default()
1618 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1619 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1620 dev_err(hdev->dev, in cs_ioctl_default()
1621 "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n", in cs_ioctl_default()
1622 cs->ctx->asid, cs->sequence); in cs_ioctl_default()
1623 rc = -EINVAL; in cs_ioctl_default()
1628 INIT_WORK(&cs->finish_work, cs_completion); in cs_ioctl_default()
1631 * store the (external/HW queues) streams used by the CS in the in cs_ioctl_default()
1632 * fence object for multi-CS completion in cs_ioctl_default()
1634 if (hdev->supports_wait_for_multi_cs) in cs_ioctl_default()
1635 cs->fence->stream_master_qid_map = stream_master_qid_map; in cs_ioctl_default()
1637 rc = hl_hw_queue_schedule_cs(cs); in cs_ioctl_default()
1639 if (rc != -EAGAIN) in cs_ioctl_default()
1640 dev_err(hdev->dev, in cs_ioctl_default()
1641 "Failed to submit CS %d.%llu to H/W queues, error %d\n", in cs_ioctl_default()
1642 cs->ctx->asid, cs->sequence, rc); in cs_ioctl_default()
1646 *signal_initial_sob_count = cs->initial_sob_count; in cs_ioctl_default()
1652 atomic_dec(&cb->cs_cnt); in cs_ioctl_default()
1655 cs_rollback(hdev, cs); in cs_ioctl_default()
1659 /* We finished with the CS in this function, so put the ref */ in cs_ioctl_default()
1660 cs_put(cs); in cs_ioctl_default()
1670 struct hl_device *hdev = hpriv->hdev; in hl_cs_ctx_switch()
1671 struct hl_ctx *ctx = hpriv->ctx; in hl_cs_ctx_switch()
1673 int rc = 0, do_ctx_switch = 0; in hl_cs_ctx_switch()
1679 if (hdev->supports_ctx_switch) in hl_cs_ctx_switch()
1680 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); in hl_cs_ctx_switch()
1682 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { in hl_cs_ctx_switch()
1683 mutex_lock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1686 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); in hl_cs_ctx_switch()
1688 dev_err_ratelimited(hdev->dev, in hl_cs_ctx_switch()
1689 "Failed to switch to context %d, rejecting CS! %d\n", in hl_cs_ctx_switch()
1690 ctx->asid, rc); in hl_cs_ctx_switch()
1693 * while we want to do context-switch (-EBUSY), in hl_cs_ctx_switch()
1694 * we need to soft-reset because QMAN is in hl_cs_ctx_switch()
1700 if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) in hl_cs_ctx_switch()
1702 mutex_unlock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1707 hdev->asic_funcs->restore_phase_topology(hdev); in hl_cs_ctx_switch()
1709 chunks = (void __user *) (uintptr_t) args->in.chunks_restore; in hl_cs_ctx_switch()
1710 num_chunks = args->in.num_chunks_restore; in hl_cs_ctx_switch()
1713 dev_dbg(hdev->dev, in hl_cs_ctx_switch()
1714 "Need to run restore phase but restore CS is empty\n"); in hl_cs_ctx_switch()
1715 rc = 0; in hl_cs_ctx_switch()
1718 cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count); in hl_cs_ctx_switch()
1721 mutex_unlock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1724 dev_err(hdev->dev, in hl_cs_ctx_switch()
1725 "Failed to submit restore CS for context %d (%d)\n", in hl_cs_ctx_switch()
1726 ctx->asid, rc); in hl_cs_ctx_switch()
1735 jiffies_to_usecs(hdev->timeout_jiffies), in hl_cs_ctx_switch()
1738 if (ret == -ERESTARTSYS) { in hl_cs_ctx_switch()
1743 dev_err(hdev->dev, in hl_cs_ctx_switch()
1744 "Restore CS for context %d failed to complete %d\n", in hl_cs_ctx_switch()
1745 ctx->asid, ret); in hl_cs_ctx_switch()
1746 rc = -ENOEXEC; in hl_cs_ctx_switch()
1751 if (hdev->supports_ctx_switch) in hl_cs_ctx_switch()
1752 ctx->thread_ctx_switch_wait_token = 1; in hl_cs_ctx_switch()
1754 } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { in hl_cs_ctx_switch()
1756 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), in hl_cs_ctx_switch()
1757 100, jiffies_to_usecs(hdev->timeout_jiffies), false); in hl_cs_ctx_switch()
1759 if (rc == -ETIMEDOUT) { in hl_cs_ctx_switch()
1760 dev_err(hdev->dev, in hl_cs_ctx_switch()
1767 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) in hl_cs_ctx_switch()
1768 hl_device_reset(hdev, 0); in hl_cs_ctx_switch()
1779 * @hw_sob: the H/W SOB used in this signal CS.
1793 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in hl_cs_signal_sob_wraparound_handler()
1798 if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) { in hl_cs_signal_sob_wraparound_handler()
1811 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; in hl_cs_signal_sob_wraparound_handler()
1812 other_sob = &prop->hw_sob[other_sob_offset]; in hl_cs_signal_sob_wraparound_handler()
1814 if (kref_read(&other_sob->kref) != 1) { in hl_cs_signal_sob_wraparound_handler()
1815 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n", in hl_cs_signal_sob_wraparound_handler()
1817 return -EINVAL; in hl_cs_signal_sob_wraparound_handler()
1826 prop->next_sob_val = count + 1; in hl_cs_signal_sob_wraparound_handler()
1828 prop->next_sob_val = count; in hl_cs_signal_sob_wraparound_handler()
1831 prop->curr_sob_offset = other_sob_offset; in hl_cs_signal_sob_wraparound_handler()
1836 * for the reservation or the next signal cs. in hl_cs_signal_sob_wraparound_handler()
1837 * we do it here, and for both encaps and regular signal cs in hl_cs_signal_sob_wraparound_handler()
1841 * in addition, if we have combination of cs signal and in hl_cs_signal_sob_wraparound_handler()
1843 * no more reservations and only signal cs keep coming, in hl_cs_signal_sob_wraparound_handler()
1847 if (other_sob->need_reset) in hl_cs_signal_sob_wraparound_handler()
1852 sob->need_reset = true; in hl_cs_signal_sob_wraparound_handler()
1856 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", in hl_cs_signal_sob_wraparound_handler()
1857 prop->curr_sob_offset, q_idx); in hl_cs_signal_sob_wraparound_handler()
1859 prop->next_sob_val += count; in hl_cs_signal_sob_wraparound_handler()
1862 return 0; in hl_cs_signal_sob_wraparound_handler()
1871 int rc = 0; in cs_ioctl_extract_signal_seq()
1874 *signal_seq = chunk->encaps_signal_seq; in cs_ioctl_extract_signal_seq()
1875 return 0; in cs_ioctl_extract_signal_seq()
1878 signal_seq_arr_len = chunk->num_signal_seq_arr; in cs_ioctl_extract_signal_seq()
1882 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1883 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1884 dev_err(hdev->dev, in cs_ioctl_extract_signal_seq()
1885 "Wait for signal CS supports only one signal CS seq\n"); in cs_ioctl_extract_signal_seq()
1886 return -EINVAL; in cs_ioctl_extract_signal_seq()
1897 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_extract_signal_seq()
1898 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); in cs_ioctl_extract_signal_seq()
1899 return -ENOMEM; in cs_ioctl_extract_signal_seq()
1904 u64_to_user_ptr(chunk->signal_seq_arr), in cs_ioctl_extract_signal_seq()
1906 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1907 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1908 dev_err(hdev->dev, in cs_ioctl_extract_signal_seq()
1910 rc = -EFAULT; in cs_ioctl_extract_signal_seq()
1915 *signal_seq = signal_seq_arr[0]; in cs_ioctl_extract_signal_seq()
1924 struct hl_ctx *ctx, struct hl_cs *cs, in cs_ioctl_signal_wait_create_jobs() argument
1932 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_signal_wait_create_jobs()
1936 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1937 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1938 dev_err(hdev->dev, "Failed to allocate a new job\n"); in cs_ioctl_signal_wait_create_jobs()
1939 return -ENOMEM; in cs_ioctl_signal_wait_create_jobs()
1942 if (cs->type == CS_TYPE_WAIT) in cs_ioctl_signal_wait_create_jobs()
1943 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); in cs_ioctl_signal_wait_create_jobs()
1945 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); in cs_ioctl_signal_wait_create_jobs()
1949 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1950 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1952 return -EFAULT; in cs_ioctl_signal_wait_create_jobs()
1955 job->id = 0; in cs_ioctl_signal_wait_create_jobs()
1956 job->cs = cs; in cs_ioctl_signal_wait_create_jobs()
1957 job->user_cb = cb; in cs_ioctl_signal_wait_create_jobs()
1958 atomic_inc(&job->user_cb->cs_cnt); in cs_ioctl_signal_wait_create_jobs()
1959 job->user_cb_size = cb_size; in cs_ioctl_signal_wait_create_jobs()
1960 job->hw_queue_id = q_idx; in cs_ioctl_signal_wait_create_jobs()
1962 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) in cs_ioctl_signal_wait_create_jobs()
1963 && cs->encaps_signals) in cs_ioctl_signal_wait_create_jobs()
1964 job->encaps_sig_wait_offset = encaps_signal_offset; in cs_ioctl_signal_wait_create_jobs()
1967 * We call hl_cb_destroy() out of two reasons - we don't need the CB in in cs_ioctl_signal_wait_create_jobs()
1971 job->patched_cb = job->user_cb; in cs_ioctl_signal_wait_create_jobs()
1972 job->job_cb_size = job->user_cb_size; in cs_ioctl_signal_wait_create_jobs()
1973 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); in cs_ioctl_signal_wait_create_jobs()
1976 cs_get(cs); in cs_ioctl_signal_wait_create_jobs()
1978 cs->jobs_in_queue_cnt[job->hw_queue_id]++; in cs_ioctl_signal_wait_create_jobs()
1979 cs->jobs_cnt++; in cs_ioctl_signal_wait_create_jobs()
1981 list_add_tail(&job->cs_node, &cs->job_list); in cs_ioctl_signal_wait_create_jobs()
1985 return 0; in cs_ioctl_signal_wait_create_jobs()
1995 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_reserve_signals()
2000 int rc = 0; in cs_ioctl_reserve_signals()
2003 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n", in cs_ioctl_reserve_signals()
2005 rc = -EINVAL; in cs_ioctl_reserve_signals()
2009 if (q_idx >= hdev->asic_prop.max_queues) { in cs_ioctl_reserve_signals()
2010 dev_err(hdev->dev, "Queue index %d is invalid\n", in cs_ioctl_reserve_signals()
2012 rc = -EINVAL; in cs_ioctl_reserve_signals()
2016 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; in cs_ioctl_reserve_signals()
2018 if (!hw_queue_prop->supports_sync_stream) { in cs_ioctl_reserve_signals()
2019 dev_err(hdev->dev, in cs_ioctl_reserve_signals()
2022 rc = -EINVAL; in cs_ioctl_reserve_signals()
2026 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in cs_ioctl_reserve_signals()
2030 rc = -ENOMEM; in cs_ioctl_reserve_signals()
2034 handle->count = count; in cs_ioctl_reserve_signals()
2036 hl_ctx_get(hpriv->ctx); in cs_ioctl_reserve_signals()
2037 handle->ctx = hpriv->ctx; in cs_ioctl_reserve_signals()
2038 mgr = &hpriv->ctx->sig_mgr; in cs_ioctl_reserve_signals()
2040 spin_lock(&mgr->lock); in cs_ioctl_reserve_signals()
2041 hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC); in cs_ioctl_reserve_signals()
2042 spin_unlock(&mgr->lock); in cs_ioctl_reserve_signals()
2044 if (hdl_id < 0) { in cs_ioctl_reserve_signals()
2045 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n"); in cs_ioctl_reserve_signals()
2046 rc = -EINVAL; in cs_ioctl_reserve_signals()
2050 handle->id = hdl_id; in cs_ioctl_reserve_signals()
2051 handle->q_idx = q_idx; in cs_ioctl_reserve_signals()
2052 handle->hdev = hdev; in cs_ioctl_reserve_signals()
2053 kref_init(&handle->refcount); in cs_ioctl_reserve_signals()
2055 hdev->asic_funcs->hw_queues_lock(hdev); in cs_ioctl_reserve_signals()
2057 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; in cs_ioctl_reserve_signals()
2068 dev_err(hdev->dev, "Failed to switch SOB\n"); in cs_ioctl_reserve_signals()
2069 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_reserve_signals()
2070 rc = -EINVAL; in cs_ioctl_reserve_signals()
2076 handle->hw_sob = hw_sob; in cs_ioctl_reserve_signals()
2081 handle->pre_sob_val = prop->next_sob_val - handle->count; in cs_ioctl_reserve_signals()
2083 handle->cs_seq = ULLONG_MAX; in cs_ioctl_reserve_signals()
2085 *signals_count = prop->next_sob_val; in cs_ioctl_reserve_signals()
2086 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_reserve_signals()
2088 *sob_addr = handle->hw_sob->sob_addr; in cs_ioctl_reserve_signals()
2091 dev_dbg(hdev->dev, in cs_ioctl_reserve_signals()
2092 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n", in cs_ioctl_reserve_signals()
2093 hw_sob->sob_id, handle->hw_sob->sob_addr, in cs_ioctl_reserve_signals()
2094 prop->next_sob_val - 1, q_idx, hdl_id); in cs_ioctl_reserve_signals()
2098 spin_lock(&mgr->lock); in cs_ioctl_reserve_signals()
2099 idr_remove(&mgr->handles, hdl_id); in cs_ioctl_reserve_signals()
2100 spin_unlock(&mgr->lock); in cs_ioctl_reserve_signals()
2103 hl_ctx_put(handle->ctx); in cs_ioctl_reserve_signals()
2114 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_unreserve_signals()
2118 int rc = 0; in cs_ioctl_unreserve_signals()
2120 mgr = &hpriv->ctx->sig_mgr; in cs_ioctl_unreserve_signals()
2122 spin_lock(&mgr->lock); in cs_ioctl_unreserve_signals()
2123 encaps_sig_hdl = idr_find(&mgr->handles, handle_id); in cs_ioctl_unreserve_signals()
2125 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n", in cs_ioctl_unreserve_signals()
2126 handle_id, encaps_sig_hdl->hw_sob->sob_addr, in cs_ioctl_unreserve_signals()
2127 encaps_sig_hdl->count); in cs_ioctl_unreserve_signals()
2129 hdev->asic_funcs->hw_queues_lock(hdev); in cs_ioctl_unreserve_signals()
2131 q_idx = encaps_sig_hdl->q_idx; in cs_ioctl_unreserve_signals()
2132 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in cs_ioctl_unreserve_signals()
2133 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; in cs_ioctl_unreserve_signals()
2134 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); in cs_ioctl_unreserve_signals()
2138 * between the reserve-unreserve calls or SOB switch in cs_ioctl_unreserve_signals()
2141 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count in cs_ioctl_unreserve_signals()
2142 != prop->next_sob_val || in cs_ioctl_unreserve_signals()
2143 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) { in cs_ioctl_unreserve_signals()
2144 …dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %… in cs_ioctl_unreserve_signals()
2145 encaps_sig_hdl->pre_sob_val, in cs_ioctl_unreserve_signals()
2146 (prop->next_sob_val - encaps_sig_hdl->count)); in cs_ioctl_unreserve_signals()
2148 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_unreserve_signals()
2149 rc = -EINVAL; in cs_ioctl_unreserve_signals()
2157 prop->next_sob_val -= encaps_sig_hdl->count; in cs_ioctl_unreserve_signals()
2159 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_unreserve_signals()
2164 idr_remove(&mgr->handles, handle_id); in cs_ioctl_unreserve_signals()
2167 spin_unlock(&mgr->lock); in cs_ioctl_unreserve_signals()
2168 hl_ctx_put(encaps_sig_hdl->ctx); in cs_ioctl_unreserve_signals()
2172 rc = -EINVAL; in cs_ioctl_unreserve_signals()
2173 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); in cs_ioctl_unreserve_signals()
2177 spin_unlock(&mgr->lock); in cs_ioctl_unreserve_signals()
2195 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_signal_wait()
2197 u32 q_idx, collective_engine_id = 0; in cs_ioctl_signal_wait()
2200 struct hl_ctx *ctx = hpriv->ctx; in cs_ioctl_signal_wait()
2202 struct hl_cs *cs; in cs_ioctl_signal_wait() local
2206 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_signal_wait()
2215 chunk = &cs_chunk_array[0]; in cs_ioctl_signal_wait()
2217 if (chunk->queue_index >= hdev->asic_prop.max_queues) { in cs_ioctl_signal_wait()
2218 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2219 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2220 dev_err(hdev->dev, "Queue index %d is invalid\n", in cs_ioctl_signal_wait()
2221 chunk->queue_index); in cs_ioctl_signal_wait()
2222 rc = -EINVAL; in cs_ioctl_signal_wait()
2226 q_idx = chunk->queue_index; in cs_ioctl_signal_wait()
2227 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; in cs_ioctl_signal_wait()
2228 q_type = hw_queue_prop->type; in cs_ioctl_signal_wait()
2230 if (!hw_queue_prop->supports_sync_stream) { in cs_ioctl_signal_wait()
2231 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2232 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2233 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2236 rc = -EINVAL; in cs_ioctl_signal_wait()
2241 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { in cs_ioctl_signal_wait()
2242 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2243 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2244 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2246 rc = -EINVAL; in cs_ioctl_signal_wait()
2250 if (!hdev->nic_ports_mask) { in cs_ioctl_signal_wait()
2251 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2252 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2253 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2255 rc = -EINVAL; in cs_ioctl_signal_wait()
2259 collective_engine_id = chunk->collective_engine_id; in cs_ioctl_signal_wait()
2274 /* check if cs sequence has encapsulated in cs_ioctl_signal_wait()
2280 spin_lock(&ctx->sig_mgr.lock); in cs_ioctl_signal_wait()
2281 idp = &ctx->sig_mgr.handles; in cs_ioctl_signal_wait()
2283 if (encaps_sig_hdl->cs_seq == signal_seq) { in cs_ioctl_signal_wait()
2285 * needed when multiple wait cs are used with offset in cs_ioctl_signal_wait()
2289 * is 0 but it yet to be removed from the list. In this in cs_ioctl_signal_wait()
2292 if (kref_get_unless_zero(&encaps_sig_hdl->refcount)) in cs_ioctl_signal_wait()
2297 spin_unlock(&ctx->sig_mgr.lock); in cs_ioctl_signal_wait()
2300 /* treat as signal CS already finished */ in cs_ioctl_signal_wait()
2301 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", in cs_ioctl_signal_wait()
2303 rc = 0; in cs_ioctl_signal_wait()
2308 if (chunk->encaps_signal_offset > in cs_ioctl_signal_wait()
2309 encaps_sig_hdl->count) { in cs_ioctl_signal_wait()
2310 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n", in cs_ioctl_signal_wait()
2311 chunk->encaps_signal_offset, in cs_ioctl_signal_wait()
2312 encaps_sig_hdl->count); in cs_ioctl_signal_wait()
2313 rc = -EINVAL; in cs_ioctl_signal_wait()
2320 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2321 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2322 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2323 "Failed to get signal CS with seq 0x%llx\n", in cs_ioctl_signal_wait()
2330 /* signal CS already finished */ in cs_ioctl_signal_wait()
2331 rc = 0; in cs_ioctl_signal_wait()
2339 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT && in cs_ioctl_signal_wait()
2342 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL && in cs_ioctl_signal_wait()
2344 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2345 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2346 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2347 "CS seq 0x%llx is not of a signal/encaps-signal CS\n", in cs_ioctl_signal_wait()
2350 rc = -EINVAL; in cs_ioctl_signal_wait()
2354 if (completion_done(&sig_fence->completion)) { in cs_ioctl_signal_wait()
2355 /* signal CS already finished */ in cs_ioctl_signal_wait()
2357 rc = 0; in cs_ioctl_signal_wait()
2362 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); in cs_ioctl_signal_wait()
2371 * Save the signal CS fence for later initialization right before in cs_ioctl_signal_wait()
2372 * hanging the wait CS on the queue. in cs_ioctl_signal_wait()
2373 * for encaps signals case, we save the cs sequence and handle pointer in cs_ioctl_signal_wait()
2377 cs->signal_fence = sig_fence; in cs_ioctl_signal_wait()
2382 if (cs->encaps_signals) in cs_ioctl_signal_wait()
2383 cs->encaps_sig_hdl = encaps_sig_hdl; in cs_ioctl_signal_wait()
2386 hl_debugfs_add_cs(cs); in cs_ioctl_signal_wait()
2388 *cs_seq = cs->sequence; in cs_ioctl_signal_wait()
2391 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, in cs_ioctl_signal_wait()
2392 q_idx, chunk->encaps_signal_offset); in cs_ioctl_signal_wait()
2394 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, in cs_ioctl_signal_wait()
2395 cs, q_idx, collective_engine_id, in cs_ioctl_signal_wait()
2396 chunk->encaps_signal_offset); in cs_ioctl_signal_wait()
2398 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2399 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2400 rc = -EINVAL; in cs_ioctl_signal_wait()
2407 INIT_WORK(&cs->finish_work, cs_completion); in cs_ioctl_signal_wait()
2409 rc = hl_hw_queue_schedule_cs(cs); in cs_ioctl_signal_wait()
2411 /* In case wait cs failed here, it means the signal cs in cs_ioctl_signal_wait()
2416 rc = 0; in cs_ioctl_signal_wait()
2417 else if (rc != -EAGAIN) in cs_ioctl_signal_wait()
2418 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2419 "Failed to submit CS %d.%llu to H/W queues, error %d\n", in cs_ioctl_signal_wait()
2420 ctx->asid, cs->sequence, rc); in cs_ioctl_signal_wait()
2424 *signal_sob_addr_offset = cs->sob_addr_offset; in cs_ioctl_signal_wait()
2425 *signal_initial_sob_count = cs->initial_sob_count; in cs_ioctl_signal_wait()
2433 cs_rollback(hdev, cs); in cs_ioctl_signal_wait()
2437 /* We finished with the CS in this function, so put the ref */ in cs_ioctl_signal_wait()
2438 cs_put(cs); in cs_ioctl_signal_wait()
2441 kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); in cs_ioctl_signal_wait()
2450 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_engine_cores()
2455 if (!hdev->asic_prop.supports_engine_modes) in cs_ioctl_engine_cores()
2456 return -EPERM; in cs_ioctl_engine_cores()
2458 if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { in cs_ioctl_engine_cores()
2459 dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); in cs_ioctl_engine_cores()
2460 return -EINVAL; in cs_ioctl_engine_cores()
2464 dev_err(hdev->dev, "Engine core command is invalid\n"); in cs_ioctl_engine_cores()
2465 return -EINVAL; in cs_ioctl_engine_cores()
2471 return -ENOMEM; in cs_ioctl_engine_cores()
2474 dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); in cs_ioctl_engine_cores()
2476 return -EFAULT; in cs_ioctl_engine_cores()
2479 rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); in cs_ioctl_engine_cores()
2488 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_engines()
2493 if (!hdev->asic_prop.supports_engine_modes) in cs_ioctl_engines()
2494 return -EPERM; in cs_ioctl_engines()
2497 dev_err(hdev->dev, "Engine command is invalid\n"); in cs_ioctl_engines()
2498 return -EINVAL; in cs_ioctl_engines()
2501 max_num_of_engines = hdev->asic_prop.max_num_of_engines; in cs_ioctl_engines()
2503 max_num_of_engines = hdev->asic_prop.num_engine_cores; in cs_ioctl_engines()
2506 dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines); in cs_ioctl_engines()
2507 return -EINVAL; in cs_ioctl_engines()
2513 return -ENOMEM; in cs_ioctl_engines()
2516 dev_err(hdev->dev, "Failed to copy engine-ids array from user\n"); in cs_ioctl_engines()
2518 return -EFAULT; in cs_ioctl_engines()
2521 rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command); in cs_ioctl_engines()
2529 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_flush_pci_hbw_writes()
2530 struct asic_fixed_properties *prop = &hdev->asic_prop; in cs_ioctl_flush_pci_hbw_writes()
2532 if (!prop->hbw_flush_reg) { in cs_ioctl_flush_pci_hbw_writes()
2533 dev_dbg(hdev->dev, "HBW flush is not supported\n"); in cs_ioctl_flush_pci_hbw_writes()
2534 return -EOPNOTSUPP; in cs_ioctl_flush_pci_hbw_writes()
2537 RREG32(prop->hbw_flush_reg); in cs_ioctl_flush_pci_hbw_writes()
2539 return 0; in cs_ioctl_flush_pci_hbw_writes()
2545 enum hl_cs_type cs_type = 0; in hl_cs_ioctl()
2549 signals_count = 0, sob_addr = 0, handle_id = 0; in hl_cs_ioctl()
2550 u16 sob_initial_count = 0; in hl_cs_ioctl()
2561 cs_type = hl_cs_get_cs_type(args->in.cs_flags & in hl_cs_ioctl()
2563 chunks = (void __user *) (uintptr_t) args->in.chunks_execute; in hl_cs_ioctl()
2564 num_chunks = args->in.num_chunks_execute; in hl_cs_ioctl()
2565 flags = args->in.cs_flags; in hl_cs_ioctl()
2567 /* In case this is a staged CS, user should supply the CS sequence */ in hl_cs_ioctl()
2570 cs_seq = args->in.seq; in hl_cs_ioctl()
2573 ? msecs_to_jiffies(args->in.timeout * 1000) in hl_cs_ioctl()
2574 : hpriv->hdev->timeout_jiffies; in hl_cs_ioctl()
2581 &cs_seq, args->in.cs_flags, timeout, in hl_cs_ioctl()
2586 args->in.encaps_signals_q_idx, in hl_cs_ioctl()
2587 args->in.encaps_signals_count, in hl_cs_ioctl()
2592 args->in.encaps_sig_handle_id); in hl_cs_ioctl()
2595 rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, in hl_cs_ioctl()
2596 args->in.num_engine_cores, args->in.core_command); in hl_cs_ioctl()
2599 rc = cs_ioctl_engines(hpriv, args->in.engines, in hl_cs_ioctl()
2600 args->in.num_engines, args->in.engine_command); in hl_cs_ioctl()
2607 args->in.cs_flags, in hl_cs_ioctl()
2608 args->in.encaps_sig_handle_id, in hl_cs_ioctl()
2613 if (rc != -EAGAIN) { in hl_cs_ioctl()
2614 memset(args, 0, sizeof(*args)); in hl_cs_ioctl()
2618 args->out.handle_id = handle_id; in hl_cs_ioctl()
2619 args->out.sob_base_addr_offset = sob_addr; in hl_cs_ioctl()
2620 args->out.count = signals_count; in hl_cs_ioctl()
2623 args->out.sob_base_addr_offset = sob_addr; in hl_cs_ioctl()
2624 args->out.sob_count_before_submission = sob_initial_count; in hl_cs_ioctl()
2625 args->out.seq = cs_seq; in hl_cs_ioctl()
2628 args->out.sob_count_before_submission = sob_initial_count; in hl_cs_ioctl()
2629 args->out.seq = cs_seq; in hl_cs_ioctl()
2632 args->out.seq = cs_seq; in hl_cs_ioctl()
2636 args->out.status = rc; in hl_cs_ioctl()
2645 struct hl_device *hdev = ctx->hdev; in hl_wait_for_fence()
2648 int rc = 0, error; in hl_wait_for_fence()
2652 if (rc == -EINVAL) in hl_wait_for_fence()
2653 dev_notice_ratelimited(hdev->dev, in hl_wait_for_fence()
2654 "Can't wait on CS %llu because current CS is at seq %llu\n", in hl_wait_for_fence()
2655 seq, ctx->cs_sequence); in hl_wait_for_fence()
2660 if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, &timestamp_kt, &error)) { in hl_wait_for_fence()
2661 dev_dbg(hdev->dev, in hl_wait_for_fence()
2662 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", in hl_wait_for_fence()
2663 seq, ctx->cs_sequence); in hl_wait_for_fence()
2665 return 0; in hl_wait_for_fence()
2673 completion_rc = completion_done(&fence->completion); in hl_wait_for_fence()
2681 &fence->completion, timeout); in hl_wait_for_fence()
2684 error = fence->error; in hl_wait_for_fence()
2685 timestamp_kt = fence->timestamp; in hl_wait_for_fence()
2688 if (completion_rc > 0) { in hl_wait_for_fence()
2696 if (completion_rc == -ERESTARTSYS) in hl_wait_for_fence()
2698 else if (error == -ETIMEDOUT || error == -EIO) in hl_wait_for_fence()
2705 * hl_cs_poll_fences - iterate CS fences to check for CS completion
2707 * @mcs_data: multi-CS internal data
2708 * @mcs_compl: multi-CS completion structure
2710 * @return 0 on success, otherwise non 0 error code
2712 * The function iterates on all CS sequence in the list and set bit in
2713 * completion_bitmap for each completed CS.
2716 * completion to the multi-CS context.
2721 struct hl_fence **fence_ptr = mcs_data->fence_arr; in hl_cs_poll_fences()
2722 struct hl_device *hdev = mcs_data->ctx->hdev; in hl_cs_poll_fences()
2723 int i, rc, arr_len = mcs_data->arr_len; in hl_cs_poll_fences()
2724 u64 *seq_arr = mcs_data->seq_arr; in hl_cs_poll_fences()
2728 memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *)); in hl_cs_poll_fences()
2731 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); in hl_cs_poll_fences()
2736 * re-initialize the completion here to handle 2 possible cases: in hl_cs_poll_fences()
2737 * 1. CS will complete the multi-CS prior clearing the completion. in which in hl_cs_poll_fences()
2738 * case the fence iteration is guaranteed to catch the CS completion. in hl_cs_poll_fences()
2739 * 2. the completion will occur after re-init of the completion. in hl_cs_poll_fences()
2742 reinit_completion(&mcs_compl->completion); in hl_cs_poll_fences()
2746 * this value is maintained- no timestamp was updated in hl_cs_poll_fences()
2748 max_ktime = ktime_set(KTIME_SEC_MAX, 0); in hl_cs_poll_fences()
2751 for (i = 0; i < arr_len; i++, fence_ptr++) { in hl_cs_poll_fences()
2755 * In order to prevent case where we wait until timeout even though a CS associated in hl_cs_poll_fences()
2756 * with the multi-CS actually completed we do things in the below order: in hl_cs_poll_fences()
2757 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way in hl_cs_poll_fences()
2758 * any CS can, potentially, complete the multi CS for the specific QID (note in hl_cs_poll_fences()
2761 * 2. only after allowing multi-CS completion for the specific QID we check whether in hl_cs_poll_fences()
2762 * the specific CS already completed (and thus the wait for completion part will in hl_cs_poll_fences()
2763 * be skipped). if the CS not completed it is guaranteed that completing CS will in hl_cs_poll_fences()
2767 mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map; in hl_cs_poll_fences()
2770 * function won't sleep as it is called with timeout 0 (i.e. in hl_cs_poll_fences()
2773 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL); in hl_cs_poll_fences()
2775 dev_err(hdev->dev, in hl_cs_poll_fences()
2776 "wait_for_fence error :%d for CS seq %llu\n", in hl_cs_poll_fences()
2783 /* CS did not finished, QID to wait on already stored */ in hl_cs_poll_fences()
2788 * returns to user indicating CS completed before it finished in hl_cs_poll_fences()
2794 if (fence && !fence->mcs_handling_done) { in hl_cs_poll_fences()
2796 * in case multi CS is completed but MCS handling not done in hl_cs_poll_fences()
2797 * we "complete" the multi CS to prevent it from waiting in hl_cs_poll_fences()
2798 * until time-out and the "multi-CS handling done" will have in hl_cs_poll_fences()
2801 complete_all(&mcs_compl->completion); in hl_cs_poll_fences()
2805 mcs_data->completion_bitmap |= BIT(i); in hl_cs_poll_fences()
2811 if (fence && mcs_data->update_ts && in hl_cs_poll_fences()
2812 (ktime_compare(fence->timestamp, first_cs_time) < 0)) in hl_cs_poll_fences()
2813 first_cs_time = fence->timestamp; in hl_cs_poll_fences()
2816 mcs_data->update_ts = false; in hl_cs_poll_fences()
2817 mcs_data->gone_cs = true; in hl_cs_poll_fences()
2821 * already gone. In this case, CS set as completed but in hl_cs_poll_fences()
2824 mcs_data->completion_bitmap |= BIT(i); in hl_cs_poll_fences()
2827 dev_err(hdev->dev, "Invalid fence status\n"); in hl_cs_poll_fences()
2828 rc = -EINVAL; in hl_cs_poll_fences()
2834 hl_fences_put(mcs_data->fence_arr, arr_len); in hl_cs_poll_fences()
2836 if (mcs_data->update_ts && in hl_cs_poll_fences()
2837 (ktime_compare(first_cs_time, max_ktime) != 0)) in hl_cs_poll_fences()
2838 mcs_data->timestamp = ktime_to_ns(first_cs_time); in hl_cs_poll_fences()
2847 int rc = 0; in _hl_cs_wait_ioctl()
2850 *timestamp = 0; in _hl_cs_wait_ioctl()
2879 * hl_wait_multi_cs_completion_init - init completion structure
2897 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in hl_wait_multi_cs_completion_init()
2898 mcs_compl = &hdev->multi_cs_completion[i]; in hl_wait_multi_cs_completion_init()
2899 spin_lock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2900 if (!mcs_compl->used) { in hl_wait_multi_cs_completion_init()
2901 mcs_compl->used = 1; in hl_wait_multi_cs_completion_init()
2902 mcs_compl->timestamp = 0; in hl_wait_multi_cs_completion_init()
2904 * init QID map to 0 to avoid completion by CSs. the actual QID map in hl_wait_multi_cs_completion_init()
2905 * to multi-CS CSs will be set incrementally at a later stage in hl_wait_multi_cs_completion_init()
2907 mcs_compl->stream_master_qid_map = 0; in hl_wait_multi_cs_completion_init()
2908 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2911 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2915 dev_err(hdev->dev, "no available multi-CS completion structure\n"); in hl_wait_multi_cs_completion_init()
2916 return ERR_PTR(-ENOMEM); in hl_wait_multi_cs_completion_init()
2922 * hl_wait_multi_cs_completion_fini - return completion structure and set as
2931 * free completion structure, do it under lock to be in-sync with the in hl_wait_multi_cs_completion_fini()
2934 spin_lock(&mcs_compl->lock); in hl_wait_multi_cs_completion_fini()
2935 mcs_compl->used = 0; in hl_wait_multi_cs_completion_fini()
2936 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_fini()
2940 * hl_wait_multi_cs_completion - wait for first CS to complete
2942 * @mcs_data: multi-CS internal data
2944 * @return 0 on success, otherwise non 0 error code
2951 completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion, in hl_wait_multi_cs_completion()
2952 mcs_data->timeout_jiffies); in hl_wait_multi_cs_completion()
2955 if (completion_rc > 0) in hl_wait_multi_cs_completion()
2956 mcs_data->timestamp = mcs_compl->timestamp; in hl_wait_multi_cs_completion()
2958 if (completion_rc == -ERESTARTSYS) in hl_wait_multi_cs_completion()
2961 mcs_data->wait_status = completion_rc; in hl_wait_multi_cs_completion()
2963 return 0; in hl_wait_multi_cs_completion()
2967 * hl_multi_cs_completion_init - init array of multi-CS completion structures
2976 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in hl_multi_cs_completion_init()
2977 mcs_cmpl = &hdev->multi_cs_completion[i]; in hl_multi_cs_completion_init()
2978 mcs_cmpl->used = 0; in hl_multi_cs_completion_init()
2979 spin_lock_init(&mcs_cmpl->lock); in hl_multi_cs_completion_init()
2980 init_completion(&mcs_cmpl->completion); in hl_multi_cs_completion_init()
2985 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
2988 * @data: pointer to multi-CS wait ioctl in/out args
2994 struct hl_device *hdev = hpriv->hdev; in hl_multi_cs_wait_ioctl()
2997 struct hl_ctx *ctx = hpriv->ctx; in hl_multi_cs_wait_ioctl()
3005 for (i = 0 ; i < sizeof(args->in.pad) ; i++) in hl_multi_cs_wait_ioctl()
3006 if (args->in.pad[i]) { in hl_multi_cs_wait_ioctl()
3007 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); in hl_multi_cs_wait_ioctl()
3008 return -EINVAL; in hl_multi_cs_wait_ioctl()
3011 if (!hdev->supports_wait_for_multi_cs) { in hl_multi_cs_wait_ioctl()
3012 dev_err(hdev->dev, "Wait for multi CS is not supported\n"); in hl_multi_cs_wait_ioctl()
3013 return -EPERM; in hl_multi_cs_wait_ioctl()
3016 seq_arr_len = args->in.seq_arr_len; in hl_multi_cs_wait_ioctl()
3019 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n", in hl_multi_cs_wait_ioctl()
3021 return -EINVAL; in hl_multi_cs_wait_ioctl()
3028 return -ENOMEM; in hl_multi_cs_wait_ioctl()
3030 /* copy CS sequence array from user */ in hl_multi_cs_wait_ioctl()
3031 seq_arr = (void __user *) (uintptr_t) args->in.seq; in hl_multi_cs_wait_ioctl()
3034 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n"); in hl_multi_cs_wait_ioctl()
3035 rc = -EFAULT; in hl_multi_cs_wait_ioctl()
3042 rc = -ENOMEM; in hl_multi_cs_wait_ioctl()
3046 /* initialize the multi-CS internal data */ in hl_multi_cs_wait_ioctl()
3054 /* wait (with timeout) for the first CS to be completed */ in hl_multi_cs_wait_ioctl()
3055 mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); in hl_multi_cs_wait_ioctl()
3062 /* poll all CS fences, extract timestamp */ in hl_multi_cs_wait_ioctl()
3066 * skip wait for CS completion when one of the below is true: in hl_multi_cs_wait_ioctl()
3067 * - an error on the poll function in hl_multi_cs_wait_ioctl()
3068 * - one or more CS in the list completed in hl_multi_cs_wait_ioctl()
3069 * - the user called ioctl with timeout 0 in hl_multi_cs_wait_ioctl()
3071 if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) in hl_multi_cs_wait_ioctl()
3076 if (rc || (mcs_data.wait_status == 0)) in hl_multi_cs_wait_ioctl()
3080 * poll fences once again to update the CS map. in hl_multi_cs_wait_ioctl()
3091 * it got a completion) it either got completed by CS in the multi CS list in hl_multi_cs_wait_ioctl()
3093 * got completed by CS submitted to one of the shared stream master but in hl_multi_cs_wait_ioctl()
3094 * not in the multi CS list (in which case we should wait again but modify in hl_multi_cs_wait_ioctl()
3095 * the timeout and set timestamp as zero to let a CS related to the current in hl_multi_cs_wait_ioctl()
3096 * multi-CS set a new, relevant, timestamp) in hl_multi_cs_wait_ioctl()
3099 mcs_compl->timestamp = 0; in hl_multi_cs_wait_ioctl()
3112 if (rc == -ERESTARTSYS) { in hl_multi_cs_wait_ioctl()
3113 dev_err_ratelimited(hdev->dev, in hl_multi_cs_wait_ioctl()
3114 "user process got signal while waiting for Multi-CS\n"); in hl_multi_cs_wait_ioctl()
3115 rc = -EINTR; in hl_multi_cs_wait_ioctl()
3122 memset(args, 0, sizeof(*args)); in hl_multi_cs_wait_ioctl()
3125 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; in hl_multi_cs_wait_ioctl()
3126 args->out.cs_completion_map = mcs_data.completion_bitmap; in hl_multi_cs_wait_ioctl()
3128 /* if timestamp not 0- it's valid */ in hl_multi_cs_wait_ioctl()
3130 args->out.timestamp_nsec = mcs_data.timestamp; in hl_multi_cs_wait_ioctl()
3131 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_multi_cs_wait_ioctl()
3134 /* update if some CS was gone */ in hl_multi_cs_wait_ioctl()
3136 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; in hl_multi_cs_wait_ioctl()
3138 args->out.status = HL_WAIT_CS_STATUS_BUSY; in hl_multi_cs_wait_ioctl()
3141 return 0; in hl_multi_cs_wait_ioctl()
3146 struct hl_device *hdev = hpriv->hdev; in hl_cs_wait_ioctl()
3149 u64 seq = args->in.seq; in hl_cs_wait_ioctl()
3153 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, &timestamp); in hl_cs_wait_ioctl()
3155 if (rc == -ERESTARTSYS) { in hl_cs_wait_ioctl()
3156 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
3157 "user process got signal while waiting for CS handle %llu\n", in hl_cs_wait_ioctl()
3159 return -EINTR; in hl_cs_wait_ioctl()
3162 memset(args, 0, sizeof(*args)); in hl_cs_wait_ioctl()
3165 if (rc == -ETIMEDOUT) { in hl_cs_wait_ioctl()
3166 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
3167 "CS %llu has timed-out while user process is waiting for it\n", in hl_cs_wait_ioctl()
3169 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; in hl_cs_wait_ioctl()
3170 } else if (rc == -EIO) { in hl_cs_wait_ioctl()
3171 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
3172 "CS %llu has been aborted while user process is waiting for it\n", in hl_cs_wait_ioctl()
3174 args->out.status = HL_WAIT_CS_STATUS_ABORTED; in hl_cs_wait_ioctl()
3180 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_cs_wait_ioctl()
3181 args->out.timestamp_nsec = timestamp; in hl_cs_wait_ioctl()
3186 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; in hl_cs_wait_ioctl()
3189 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; in hl_cs_wait_ioctl()
3193 args->out.status = HL_WAIT_CS_STATUS_BUSY; in hl_cs_wait_ioctl()
3197 return 0; in hl_cs_wait_ioctl()
3206 struct hl_ts_buff *ts_buff = buf->private; in ts_buff_get_kernel_ts_record()
3208 (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + in ts_buff_get_kernel_ts_record()
3211 (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + in ts_buff_get_kernel_ts_record()
3212 (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); in ts_buff_get_kernel_ts_record()
3213 unsigned long iter_counter = 0; in ts_buff_get_kernel_ts_record()
3219 dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n", in ts_buff_get_kernel_ts_record()
3221 return -EINVAL; in ts_buff_get_kernel_ts_record()
3232 * then re-use it on other interrupt in ts_buff_get_kernel_ts_record()
3234 if (requested_offset_record->ts_reg_info.in_use) { in ts_buff_get_kernel_ts_record()
3235 current_cq_counter = *requested_offset_record->cq_kernel_addr; in ts_buff_get_kernel_ts_record()
3236 if (current_cq_counter < requested_offset_record->cq_target_value) { in ts_buff_get_kernel_ts_record()
3237 list_del(&requested_offset_record->wait_list_node); in ts_buff_get_kernel_ts_record()
3240 hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf); in ts_buff_get_kernel_ts_record()
3241 hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); in ts_buff_get_kernel_ts_record()
3243 dev_dbg(buf->mmg->dev, in ts_buff_get_kernel_ts_record()
3244 "ts node removed from interrupt list now can re-use\n"); in ts_buff_get_kernel_ts_record()
3246 dev_dbg(buf->mmg->dev, in ts_buff_get_kernel_ts_record()
3253 dev_err(buf->mmg->dev, in ts_buff_get_kernel_ts_record()
3256 return -EAGAIN; in ts_buff_get_kernel_ts_record()
3263 requested_offset_record->ts_reg_info.buf = buf; in ts_buff_get_kernel_ts_record()
3264 requested_offset_record->ts_reg_info.cq_cb = cq_cb; in ts_buff_get_kernel_ts_record()
3265 requested_offset_record->ts_reg_info.timestamp_kernel_addr = in ts_buff_get_kernel_ts_record()
3266 (u64 *) ts_buff->user_buff_address + ts_offset; in ts_buff_get_kernel_ts_record()
3267 requested_offset_record->cq_kernel_addr = in ts_buff_get_kernel_ts_record()
3268 (u64 *) cq_cb->kernel_address + cq_offset; in ts_buff_get_kernel_ts_record()
3269 requested_offset_record->cq_target_value = target_value; in ts_buff_get_kernel_ts_record()
3276 dev_dbg(buf->mmg->dev, "Found available node in TS kernel CB %p\n", in ts_buff_get_kernel_ts_record()
3278 return 0; in ts_buff_get_kernel_ts_record()
3293 int rc = 0; in _hl_interrupt_wait_ioctl()
3301 rc = -EINVAL; in _hl_interrupt_wait_ioctl()
3306 if (((u64 *) cq_cb->kernel_address + cq_counters_offset) >= in _hl_interrupt_wait_ioctl()
3307 ((u64 *) cq_cb->kernel_address + (cq_cb->size / sizeof(u64)))) { in _hl_interrupt_wait_ioctl()
3308 rc = -EINVAL; in _hl_interrupt_wait_ioctl()
3313 dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n", in _hl_interrupt_wait_ioctl()
3314 interrupt->interrupt_id, ts_offset, cq_counters_offset); in _hl_interrupt_wait_ioctl()
3317 rc = -EINVAL; in _hl_interrupt_wait_ioctl()
3324 &interrupt->wait_list_lock, &pend); in _hl_interrupt_wait_ioctl()
3330 rc = -ENOMEM; in _hl_interrupt_wait_ioctl()
3333 hl_fence_init(&pend->fence, ULONG_MAX); in _hl_interrupt_wait_ioctl()
3334 pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset; in _hl_interrupt_wait_ioctl()
3335 pend->cq_target_value = target_value; in _hl_interrupt_wait_ioctl()
3338 spin_lock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl()
3343 if (*pend->cq_kernel_addr >= target_value) { in _hl_interrupt_wait_ioctl()
3345 pend->ts_reg_info.in_use = 0; in _hl_interrupt_wait_ioctl()
3346 spin_unlock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl()
3351 *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); in _hl_interrupt_wait_ioctl()
3354 pend->fence.timestamp = ktime_get(); in _hl_interrupt_wait_ioctl()
3358 spin_unlock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl()
3360 pend->fence.timestamp = ktime_get(); in _hl_interrupt_wait_ioctl()
3371 * to avoid getting in the re-use section in ts_buff_get_kernel_ts_record in _hl_interrupt_wait_ioctl()
3376 * to re-use the same ts buff offset, and will try to delete a non existing in _hl_interrupt_wait_ioctl()
3380 pend->ts_reg_info.in_use = 1; in _hl_interrupt_wait_ioctl()
3382 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); in _hl_interrupt_wait_ioctl()
3383 spin_unlock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl()
3391 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, in _hl_interrupt_wait_ioctl()
3393 if (completion_rc > 0) { in _hl_interrupt_wait_ioctl()
3396 if (completion_rc == -ERESTARTSYS) { in _hl_interrupt_wait_ioctl()
3397 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl()
3399 interrupt->interrupt_id); in _hl_interrupt_wait_ioctl()
3400 rc = -EINTR; in _hl_interrupt_wait_ioctl()
3403 if (pend->fence.error == -EIO) { in _hl_interrupt_wait_ioctl()
3404 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl()
3406 pend->fence.error); in _hl_interrupt_wait_ioctl()
3407 rc = -EIO; in _hl_interrupt_wait_ioctl()
3410 /* The wait has timed-out. We don't know anything beyond that in _hl_interrupt_wait_ioctl()
3415 rc = 0; in _hl_interrupt_wait_ioctl()
3427 spin_lock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl()
3428 list_del(&pend->wait_list_node); in _hl_interrupt_wait_ioctl()
3429 spin_unlock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl()
3432 *timestamp = ktime_to_ns(pend->fence.timestamp); in _hl_interrupt_wait_ioctl()
3460 int rc = 0; in _hl_interrupt_wait_ioctl_user_addr()
3469 return -ENOMEM; in _hl_interrupt_wait_ioctl_user_addr()
3472 hl_fence_init(&pend->fence, ULONG_MAX); in _hl_interrupt_wait_ioctl_user_addr()
3477 spin_lock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl_user_addr()
3478 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); in _hl_interrupt_wait_ioctl_user_addr()
3479 spin_unlock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl_user_addr()
3485 dev_err(hdev->dev, "Failed to copy completion value from user\n"); in _hl_interrupt_wait_ioctl_user_addr()
3486 rc = -EFAULT; in _hl_interrupt_wait_ioctl_user_addr()
3493 pend->fence.timestamp = ktime_get(); in _hl_interrupt_wait_ioctl_user_addr()
3503 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, in _hl_interrupt_wait_ioctl_user_addr()
3509 if (completion_rc > 0) { in _hl_interrupt_wait_ioctl_user_addr()
3510 spin_lock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl_user_addr()
3516 reinit_completion(&pend->fence.completion); in _hl_interrupt_wait_ioctl_user_addr()
3517 spin_unlock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl_user_addr()
3520 dev_err(hdev->dev, "Failed to copy completion value from user\n"); in _hl_interrupt_wait_ioctl_user_addr()
3521 rc = -EFAULT; in _hl_interrupt_wait_ioctl_user_addr()
3528 } else if (pend->fence.error) { in _hl_interrupt_wait_ioctl_user_addr()
3529 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl_user_addr()
3531 pend->fence.error); in _hl_interrupt_wait_ioctl_user_addr()
3538 } else if (completion_rc == -ERESTARTSYS) { in _hl_interrupt_wait_ioctl_user_addr()
3539 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl_user_addr()
3541 interrupt->interrupt_id); in _hl_interrupt_wait_ioctl_user_addr()
3542 rc = -EINTR; in _hl_interrupt_wait_ioctl_user_addr()
3544 /* The wait has timed-out. We don't know anything beyond that in _hl_interrupt_wait_ioctl_user_addr()
3549 rc = 0; in _hl_interrupt_wait_ioctl_user_addr()
3554 spin_lock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl_user_addr()
3555 list_del(&pend->wait_list_node); in _hl_interrupt_wait_ioctl_user_addr()
3556 spin_unlock(&interrupt->wait_list_lock); in _hl_interrupt_wait_ioctl_user_addr()
3558 *timestamp = ktime_to_ns(pend->fence.timestamp); in _hl_interrupt_wait_ioctl_user_addr()
3569 struct hl_device *hdev = hpriv->hdev; in hl_interrupt_wait_ioctl()
3574 u64 timestamp = 0; in hl_interrupt_wait_ioctl()
3577 prop = &hdev->asic_prop; in hl_interrupt_wait_ioctl()
3579 if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) { in hl_interrupt_wait_ioctl()
3580 dev_err(hdev->dev, "no user interrupts allowed"); in hl_interrupt_wait_ioctl()
3581 return -EPERM; in hl_interrupt_wait_ioctl()
3584 interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); in hl_interrupt_wait_ioctl()
3586 first_interrupt = prop->first_available_user_interrupt; in hl_interrupt_wait_ioctl()
3587 last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1; in hl_interrupt_wait_ioctl()
3589 if (interrupt_id < prop->user_dec_intr_count) { in hl_interrupt_wait_ioctl()
3592 if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) { in hl_interrupt_wait_ioctl()
3593 dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed", in hl_interrupt_wait_ioctl()
3595 return -EINVAL; in hl_interrupt_wait_ioctl()
3598 interrupt = &hdev->user_interrupt[interrupt_id]; in hl_interrupt_wait_ioctl()
3602 int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count; in hl_interrupt_wait_ioctl()
3603 interrupt = &hdev->user_interrupt[int_idx]; in hl_interrupt_wait_ioctl()
3606 interrupt = &hdev->common_user_cq_interrupt; in hl_interrupt_wait_ioctl()
3608 interrupt = &hdev->common_decoder_interrupt; in hl_interrupt_wait_ioctl()
3610 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); in hl_interrupt_wait_ioctl()
3611 return -EINVAL; in hl_interrupt_wait_ioctl()
3614 if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) in hl_interrupt_wait_ioctl()
3615 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr, in hl_interrupt_wait_ioctl()
3616 args->in.interrupt_timeout_us, args->in.cq_counters_handle, in hl_interrupt_wait_ioctl()
3617 args->in.cq_counters_offset, in hl_interrupt_wait_ioctl()
3618 args->in.target, interrupt, in hl_interrupt_wait_ioctl()
3619 !!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT), in hl_interrupt_wait_ioctl()
3620 args->in.timestamp_handle, args->in.timestamp_offset, in hl_interrupt_wait_ioctl()
3623 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, in hl_interrupt_wait_ioctl()
3624 args->in.interrupt_timeout_us, args->in.addr, in hl_interrupt_wait_ioctl()
3625 args->in.target, interrupt, &status, in hl_interrupt_wait_ioctl()
3630 memset(args, 0, sizeof(*args)); in hl_interrupt_wait_ioctl()
3631 args->out.status = status; in hl_interrupt_wait_ioctl()
3634 args->out.timestamp_nsec = timestamp; in hl_interrupt_wait_ioctl()
3635 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_interrupt_wait_ioctl()
3638 return 0; in hl_interrupt_wait_ioctl()
3643 struct hl_device *hdev = hpriv->hdev; in hl_wait_ioctl()
3645 u32 flags = args->in.flags; in hl_wait_ioctl()
3651 if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active) in hl_wait_ioctl()
3652 return -EBUSY; in hl_wait_ioctl()