Lines Matching +full:cs +full:- +full:1
1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2021 HabanaLabs, Ltd.
23 * enum hl_cs_wait_status - cs wait status
24 * @CS_WAIT_STATUS_BUSY: cs was not completed yet
25 * @CS_WAIT_STATUS_COMPLETED: cs completed
26 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
65 * CS outcome store supports the following operations: in hl_push_cs_outcome()
66 * push outcome - store a recent CS outcome in the store in hl_push_cs_outcome()
67 * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store in hl_push_cs_outcome()
69 * It has a pre-allocated amount of nodes, each node stores in hl_push_cs_outcome()
70 * a single CS outcome. in hl_push_cs_outcome()
84 spin_lock_irqsave(&outcome_store->db_lock, flags); in hl_push_cs_outcome()
86 if (list_empty(&outcome_store->free_list)) { in hl_push_cs_outcome()
87 node = list_last_entry(&outcome_store->used_list, in hl_push_cs_outcome()
89 hash_del(&node->map_link); in hl_push_cs_outcome()
90 dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq); in hl_push_cs_outcome()
92 node = list_last_entry(&outcome_store->free_list, in hl_push_cs_outcome()
96 list_del_init(&node->list_link); in hl_push_cs_outcome()
98 node->seq = seq; in hl_push_cs_outcome()
99 node->ts = ts; in hl_push_cs_outcome()
100 node->error = error; in hl_push_cs_outcome()
102 list_add(&node->list_link, &outcome_store->used_list); in hl_push_cs_outcome()
103 hash_add(outcome_store->outcome_map, &node->map_link, node->seq); in hl_push_cs_outcome()
105 spin_unlock_irqrestore(&outcome_store->db_lock, flags); in hl_push_cs_outcome()
114 spin_lock_irqsave(&outcome_store->db_lock, flags); in hl_pop_cs_outcome()
116 hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq) in hl_pop_cs_outcome()
117 if (node->seq == seq) { in hl_pop_cs_outcome()
118 *ts = node->ts; in hl_pop_cs_outcome()
119 *error = node->error; in hl_pop_cs_outcome()
121 hash_del(&node->map_link); in hl_pop_cs_outcome()
122 list_del_init(&node->list_link); in hl_pop_cs_outcome()
123 list_add(&node->list_link, &outcome_store->free_list); in hl_pop_cs_outcome()
125 spin_unlock_irqrestore(&outcome_store->db_lock, flags); in hl_pop_cs_outcome()
130 spin_unlock_irqrestore(&outcome_store->db_lock, flags); in hl_pop_cs_outcome()
139 struct hl_device *hdev = hw_sob->hdev; in hl_sob_reset()
141 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id); in hl_sob_reset()
143 hdev->asic_funcs->reset_sob(hdev, hw_sob); in hl_sob_reset()
145 hw_sob->need_reset = false; in hl_sob_reset()
152 struct hl_device *hdev = hw_sob->hdev; in hl_sob_reset_error()
154 dev_crit(hdev->dev, in hl_sob_reset_error()
156 hw_sob->q_idx, hw_sob->sob_id); in hl_sob_reset_error()
162 kref_put(&hw_sob->kref, hl_sob_reset); in hw_sob_put()
168 kref_put(&hw_sob->kref, hl_sob_reset_error); in hw_sob_put_err()
174 kref_get(&hw_sob->kref); in hw_sob_get()
178 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
190 return -EINVAL; in hl_gen_sob_mask()
193 *mask = ~(1 << (sob_base & 0x7)); in hl_gen_sob_mask()
196 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--) in hl_gen_sob_mask()
200 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1)) in hl_gen_sob_mask()
201 return -EINVAL; in hl_gen_sob_mask()
223 kref_put(&fence->refcount, hl_fence_release); in hl_fence_put()
237 kref_get(&fence->refcount); in hl_fence_get()
242 kref_init(&fence->refcount); in hl_fence_init()
243 fence->cs_sequence = sequence; in hl_fence_init()
244 fence->error = 0; in hl_fence_init()
245 fence->timestamp = ktime_set(0, 0); in hl_fence_init()
246 fence->mcs_handling_done = false; in hl_fence_init()
247 init_completion(&fence->completion); in hl_fence_init()
250 void cs_get(struct hl_cs *cs) in cs_get() argument
252 kref_get(&cs->refcount); in cs_get()
255 static int cs_get_unless_zero(struct hl_cs *cs) in cs_get_unless_zero() argument
257 return kref_get_unless_zero(&cs->refcount); in cs_get_unless_zero()
260 static void cs_put(struct hl_cs *cs) in cs_put() argument
262 kref_put(&cs->refcount, cs_do_release); in cs_put()
274 kref_put(&job->refcount, cs_job_do_release); in hl_cs_job_put()
277 bool cs_needs_completion(struct hl_cs *cs) in cs_needs_completion() argument
279 /* In case this is a staged CS, only the last CS in sequence should in cs_needs_completion()
280 * get a completion, any non staged CS will always get a completion in cs_needs_completion()
282 if (cs->staged_cs && !cs->staged_last) in cs_needs_completion()
288 bool cs_needs_timeout(struct hl_cs *cs) in cs_needs_timeout() argument
290 /* In case this is a staged CS, only the first CS in sequence should in cs_needs_timeout()
291 * get a timeout, any non staged CS will always get a timeout in cs_needs_timeout()
293 if (cs->staged_cs && !cs->staged_first) in cs_needs_timeout()
302 return (job->queue_type == QUEUE_TYPE_EXT); in is_cb_patched()
306 * cs_parser - parse the user command submission
318 struct hl_device *hdev = hpriv->hdev; in cs_parser()
322 parser.ctx_id = job->cs->ctx->asid; in cs_parser()
323 parser.cs_sequence = job->cs->sequence; in cs_parser()
324 parser.job_id = job->id; in cs_parser()
326 parser.hw_queue_id = job->hw_queue_id; in cs_parser()
327 parser.job_userptr_list = &job->userptr_list; in cs_parser()
329 parser.user_cb = job->user_cb; in cs_parser()
330 parser.user_cb_size = job->user_cb_size; in cs_parser()
331 parser.queue_type = job->queue_type; in cs_parser()
332 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; in cs_parser()
333 job->patched_cb = NULL; in cs_parser()
334 parser.completion = cs_needs_completion(job->cs); in cs_parser()
336 rc = hdev->asic_funcs->cs_parser(hdev, &parser); in cs_parser()
340 job->patched_cb = parser.patched_cb; in cs_parser()
341 job->job_cb_size = parser.patched_cb_size; in cs_parser()
342 job->contains_dma_pkt = parser.contains_dma_pkt; in cs_parser()
343 atomic_inc(&job->patched_cb->cs_cnt); in cs_parser()
349 * won't be accessed again for this CS in cs_parser()
351 atomic_dec(&job->user_cb->cs_cnt); in cs_parser()
352 hl_cb_put(job->user_cb); in cs_parser()
353 job->user_cb = NULL; in cs_parser()
355 job->job_cb_size = job->user_cb_size; in cs_parser()
363 struct hl_cs *cs = job->cs; in hl_complete_job() local
366 hl_userptr_delete_list(hdev, &job->userptr_list); in hl_complete_job()
372 if (job->patched_cb) { in hl_complete_job()
373 atomic_dec(&job->patched_cb->cs_cnt); in hl_complete_job()
374 hl_cb_put(job->patched_cb); in hl_complete_job()
383 if (job->is_kernel_allocated_cb && in hl_complete_job()
384 (job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) { in hl_complete_job()
385 atomic_dec(&job->user_cb->cs_cnt); in hl_complete_job()
386 hl_cb_put(job->user_cb); in hl_complete_job()
393 spin_lock(&cs->job_lock); in hl_complete_job()
394 list_del(&job->cs_node); in hl_complete_job()
395 spin_unlock(&cs->job_lock); in hl_complete_job()
399 /* We decrement reference only for a CS that gets completion in hl_complete_job()
400 * because the reference was incremented only for this kind of CS in hl_complete_job()
403 * In staged submission, only the last CS marked as 'staged_last' in hl_complete_job()
405 * As for all the rest CS's in the staged submission which do not get in hl_complete_job()
406 * completion, their CS reference will be decremented by the in hl_complete_job()
407 * 'staged_last' CS during the CS release flow. in hl_complete_job()
408 * All relevant PQ CI counters will be incremented during the CS release in hl_complete_job()
411 if (cs_needs_completion(cs) && in hl_complete_job()
412 (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) { in hl_complete_job()
414 /* In CS based completions, the timestamp is already available, in hl_complete_job()
417 if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB) in hl_complete_job()
418 cs->completion_timestamp = job->timestamp; in hl_complete_job()
420 cs_put(cs); in hl_complete_job()
427 * hl_staged_cs_find_first - locate the first CS in this staged submission
432 * @note: This function must be called under 'hdev->cs_mirror_lock'
434 * Find and return a CS pointer with the given sequence
438 struct hl_cs *cs; in hl_staged_cs_find_first() local
440 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node) in hl_staged_cs_find_first()
441 if (cs->staged_cs && cs->staged_first && in hl_staged_cs_find_first()
442 cs->sequence == cs_seq) in hl_staged_cs_find_first()
443 return cs; in hl_staged_cs_find_first()
449 * is_staged_cs_last_exists - returns true if the last CS in sequence exists
452 * @cs: staged submission member
455 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs) in is_staged_cs_last_exists() argument
459 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs, in is_staged_cs_last_exists()
462 if (last_entry->staged_last) in is_staged_cs_last_exists()
469 * staged_cs_get - get CS reference if this CS is a part of a staged CS
472 * @cs: current CS
475 * Increment CS reference for every CS in this staged submission except for
476 * the CS which get completion.
478 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs) in staged_cs_get() argument
480 /* Only the last CS in this staged submission will get a completion. in staged_cs_get()
481 * We must increment the reference for all other CS's in this in staged_cs_get()
485 if (!cs->staged_last) in staged_cs_get()
486 cs_get(cs); in staged_cs_get()
490 * staged_cs_put - put a CS in case it is part of staged submission
493 * @cs: CS to put
495 * This function decrements a CS reference (for a non completion CS)
497 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) in staged_cs_put() argument
499 /* We release all CS's in a staged submission except the last in staged_cs_put()
500 * CS which we have never incremented its reference. in staged_cs_put()
502 if (!cs_needs_completion(cs)) in staged_cs_put()
503 cs_put(cs); in staged_cs_put()
506 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) in cs_handle_tdr() argument
510 if (!cs_needs_timeout(cs)) in cs_handle_tdr()
513 spin_lock(&hdev->cs_mirror_lock); in cs_handle_tdr()
516 * Hence, we choose the CS that reaches this function first which is in cs_handle_tdr()
517 * the CS marked as 'staged_last'. in cs_handle_tdr()
518 * In case single staged cs was submitted which has both first and last in cs_handle_tdr()
520 * removed the cs node from the list before getting here, in cs_handle_tdr()
521 * in such cases just continue with the cs to cancel it's TDR work. in cs_handle_tdr()
523 if (cs->staged_cs && cs->staged_last) { in cs_handle_tdr()
524 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); in cs_handle_tdr()
526 cs = first_cs; in cs_handle_tdr()
529 spin_unlock(&hdev->cs_mirror_lock); in cs_handle_tdr()
531 /* Don't cancel TDR in case this CS was timedout because we might be in cs_handle_tdr()
534 if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT) in cs_handle_tdr()
537 if (cs->tdr_active) in cs_handle_tdr()
538 cancel_delayed_work_sync(&cs->work_tdr); in cs_handle_tdr()
540 spin_lock(&hdev->cs_mirror_lock); in cs_handle_tdr()
542 /* queue TDR for next CS */ in cs_handle_tdr()
543 list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node) in cs_handle_tdr()
549 if (next && !next->tdr_active) { in cs_handle_tdr()
550 next->tdr_active = true; in cs_handle_tdr()
551 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); in cs_handle_tdr()
554 spin_unlock(&hdev->cs_mirror_lock); in cs_handle_tdr()
558 * force_complete_multi_cs - complete all contexts that wait on multi-CS
569 mcs_compl = &hdev->multi_cs_completion[i]; in force_complete_multi_cs()
571 spin_lock(&mcs_compl->lock); in force_complete_multi_cs()
573 if (!mcs_compl->used) { in force_complete_multi_cs()
574 spin_unlock(&mcs_compl->lock); in force_complete_multi_cs()
579 * multi-cS. in force_complete_multi_cs()
583 dev_err(hdev->dev, in force_complete_multi_cs()
584 "multi-CS completion context %d still waiting when calling force completion\n", in force_complete_multi_cs()
586 complete_all(&mcs_compl->completion); in force_complete_multi_cs()
587 spin_unlock(&mcs_compl->lock); in force_complete_multi_cs()
592 * complete_multi_cs - complete all waiting entities on multi-CS
595 * @cs: CS structure
597 * with the completed CS.
599 * - a completed CS worked on stream master QID 4, multi CS completion
602 * - a completed CS worked on stream master QID 4, multi CS completion
606 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) in complete_multi_cs() argument
608 struct hl_fence *fence = cs->fence; in complete_multi_cs()
611 /* in case of multi CS check for completion only for the first CS */ in complete_multi_cs()
612 if (cs->staged_cs && !cs->staged_first) in complete_multi_cs()
618 mcs_compl = &hdev->multi_cs_completion[i]; in complete_multi_cs()
619 if (!mcs_compl->used) in complete_multi_cs()
622 spin_lock(&mcs_compl->lock); in complete_multi_cs()
626 * 1. still waiting for completion in complete_multi_cs()
627 * 2. the completed CS has at least one overlapping stream in complete_multi_cs()
630 if (mcs_compl->used && in complete_multi_cs()
631 (fence->stream_master_qid_map & in complete_multi_cs()
632 mcs_compl->stream_master_qid_map)) { in complete_multi_cs()
633 /* extract the timestamp only of first completed CS */ in complete_multi_cs()
634 if (!mcs_compl->timestamp) in complete_multi_cs()
635 mcs_compl->timestamp = ktime_to_ns(fence->timestamp); in complete_multi_cs()
637 complete_all(&mcs_compl->completion); in complete_multi_cs()
643 * least one CS will be set as completed when polling in complete_multi_cs()
646 fence->mcs_handling_done = true; in complete_multi_cs()
649 spin_unlock(&mcs_compl->lock); in complete_multi_cs()
651 /* In case CS completed without mcs completion initialized */ in complete_multi_cs()
652 fence->mcs_handling_done = true; in complete_multi_cs()
656 struct hl_cs *cs, in cs_release_sob_reset_handler() argument
659 /* Skip this handler if the cs wasn't submitted, to avoid putting in cs_release_sob_reset_handler()
663 if (!hl_cs_cmpl->hw_sob || !cs->submitted) in cs_release_sob_reset_handler()
666 spin_lock(&hl_cs_cmpl->lock); in cs_release_sob_reset_handler()
669 * we get refcount upon reservation of signals or signal/wait cs for the in cs_release_sob_reset_handler()
670 * hw_sob object, and need to put it when the first staged cs in cs_release_sob_reset_handler()
671 * (which contains the encaps signals) or cs signal/wait is completed. in cs_release_sob_reset_handler()
673 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || in cs_release_sob_reset_handler()
674 (hl_cs_cmpl->type == CS_TYPE_WAIT) || in cs_release_sob_reset_handler()
675 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) || in cs_release_sob_reset_handler()
676 (!!hl_cs_cmpl->encaps_signals)) { in cs_release_sob_reset_handler()
677 dev_dbg(hdev->dev, in cs_release_sob_reset_handler()
678 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n", in cs_release_sob_reset_handler()
679 hl_cs_cmpl->cs_seq, in cs_release_sob_reset_handler()
680 hl_cs_cmpl->type, in cs_release_sob_reset_handler()
681 hl_cs_cmpl->hw_sob->sob_id, in cs_release_sob_reset_handler()
682 hl_cs_cmpl->sob_val); in cs_release_sob_reset_handler()
684 hw_sob_put(hl_cs_cmpl->hw_sob); in cs_release_sob_reset_handler()
686 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) in cs_release_sob_reset_handler()
687 hdev->asic_funcs->reset_sob_group(hdev, in cs_release_sob_reset_handler()
688 hl_cs_cmpl->sob_group); in cs_release_sob_reset_handler()
691 spin_unlock(&hl_cs_cmpl->lock); in cs_release_sob_reset_handler()
696 struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); in cs_do_release() local
697 struct hl_device *hdev = cs->ctx->hdev; in cs_do_release()
700 container_of(cs->fence, struct hl_cs_compl, base_fence); in cs_do_release()
702 cs->completed = true; in cs_do_release()
706 * finished, because each one of them took refcnt to CS, we still in cs_do_release()
708 * will have leaked memory and what's worse, the CS object (and in cs_do_release()
712 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_do_release()
715 if (!cs->submitted) { in cs_do_release()
717 * In case the wait for signal CS was submitted, the fence put in cs_do_release()
721 if (cs->type == CS_TYPE_WAIT || in cs_do_release()
722 cs->type == CS_TYPE_COLLECTIVE_WAIT) in cs_do_release()
723 hl_fence_put(cs->signal_fence); in cs_do_release()
729 hl_hw_queue_update_ci(cs); in cs_do_release()
731 /* remove CS from CS mirror list */ in cs_do_release()
732 spin_lock(&hdev->cs_mirror_lock); in cs_do_release()
733 list_del_init(&cs->mirror_node); in cs_do_release()
734 spin_unlock(&hdev->cs_mirror_lock); in cs_do_release()
736 cs_handle_tdr(hdev, cs); in cs_do_release()
738 if (cs->staged_cs) { in cs_do_release()
739 /* the completion CS decrements reference for the entire in cs_do_release()
742 if (cs->staged_last) { in cs_do_release()
746 &cs->staged_cs_node, staged_cs_node) in cs_do_release()
750 /* A staged CS will be a member in the list only after it in cs_do_release()
754 if (cs->submitted) { in cs_do_release()
755 spin_lock(&hdev->cs_mirror_lock); in cs_do_release()
756 list_del(&cs->staged_cs_node); in cs_do_release()
757 spin_unlock(&hdev->cs_mirror_lock); in cs_do_release()
760 /* decrement refcount to handle when first staged cs in cs_do_release()
763 if (hl_cs_cmpl->encaps_signals) in cs_do_release()
764 kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount, in cs_do_release()
768 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals) in cs_do_release()
769 kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); in cs_do_release()
775 hl_debugfs_remove_cs(cs); in cs_do_release()
777 hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL; in cs_do_release()
783 if (cs->timedout) in cs_do_release()
784 cs->fence->error = -ETIMEDOUT; in cs_do_release()
785 else if (cs->aborted) in cs_do_release()
786 cs->fence->error = -EIO; in cs_do_release()
787 else if (!cs->submitted) in cs_do_release()
788 cs->fence->error = -EBUSY; in cs_do_release()
790 if (unlikely(cs->skip_reset_on_timeout)) { in cs_do_release()
791 dev_err(hdev->dev, in cs_do_release()
793 cs->sequence, in cs_do_release()
794 div_u64(jiffies - cs->submission_time_jiffies, HZ)); in cs_do_release()
797 if (cs->timestamp) { in cs_do_release()
798 cs->fence->timestamp = cs->completion_timestamp; in cs_do_release()
799 hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence, in cs_do_release()
800 cs->fence->timestamp, cs->fence->error); in cs_do_release()
803 hl_ctx_put(cs->ctx); in cs_do_release()
805 complete_all(&cs->fence->completion); in cs_do_release()
806 complete_multi_cs(hdev, cs); in cs_do_release()
808 cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl); in cs_do_release()
810 hl_fence_put(cs->fence); in cs_do_release()
812 kfree(cs->jobs_in_queue_cnt); in cs_do_release()
813 kfree(cs); in cs_do_release()
818 struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); in cs_timedout() local
825 skip_reset_on_timeout = cs->skip_reset_on_timeout; in cs_timedout()
827 rc = cs_get_unless_zero(cs); in cs_timedout()
831 if ((!cs->submitted) || (cs->completed)) { in cs_timedout()
832 cs_put(cs); in cs_timedout()
836 hdev = cs->ctx->hdev; in cs_timedout()
839 if (hdev->reset_on_lockup) in cs_timedout()
842 hdev->reset_info.needs_reset = true; in cs_timedout()
844 /* Mark the CS is timed out so we won't try to cancel its TDR */ in cs_timedout()
845 cs->timedout = true; in cs_timedout()
848 /* Save only the first CS timeout parameters */ in cs_timedout()
849 rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0); in cs_timedout()
851 hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); in cs_timedout()
852 hdev->captured_err_info.cs_timeout.seq = cs->sequence; in cs_timedout()
856 timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000; in cs_timedout()
858 switch (cs->type) { in cs_timedout()
860 dev_err(hdev->dev, in cs_timedout()
862 cs->sequence, timeout_sec); in cs_timedout()
866 dev_err(hdev->dev, in cs_timedout()
868 cs->sequence, timeout_sec); in cs_timedout()
872 dev_err(hdev->dev, in cs_timedout()
874 cs->sequence, timeout_sec); in cs_timedout()
878 dev_err(hdev->dev, in cs_timedout()
880 cs->sequence, timeout_sec); in cs_timedout()
886 dev_err(hdev->dev, "Error during system state dump %d\n", rc); in cs_timedout()
888 cs_put(cs); in cs_timedout()
905 struct hl_cs *cs; in allocate_cs() local
908 cntr = &hdev->aggregated_cs_counters; in allocate_cs()
910 cs = kzalloc(sizeof(*cs), GFP_ATOMIC); in allocate_cs()
911 if (!cs) in allocate_cs()
912 cs = kzalloc(sizeof(*cs), GFP_KERNEL); in allocate_cs()
914 if (!cs) { in allocate_cs()
915 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
916 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
917 return -ENOMEM; in allocate_cs()
923 cs->ctx = ctx; in allocate_cs()
924 cs->submitted = false; in allocate_cs()
925 cs->completed = false; in allocate_cs()
926 cs->type = cs_type; in allocate_cs()
927 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); in allocate_cs()
928 cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); in allocate_cs()
929 cs->timeout_jiffies = timeout; in allocate_cs()
930 cs->skip_reset_on_timeout = in allocate_cs()
931 hdev->reset_info.skip_reset_on_timeout || in allocate_cs()
933 cs->submission_time_jiffies = jiffies; in allocate_cs()
934 INIT_LIST_HEAD(&cs->job_list); in allocate_cs()
935 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); in allocate_cs()
936 kref_init(&cs->refcount); in allocate_cs()
937 spin_lock_init(&cs->job_lock); in allocate_cs()
944 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
945 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
946 rc = -ENOMEM; in allocate_cs()
950 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, in allocate_cs()
951 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); in allocate_cs()
952 if (!cs->jobs_in_queue_cnt) in allocate_cs()
953 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, in allocate_cs()
954 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); in allocate_cs()
956 if (!cs->jobs_in_queue_cnt) { in allocate_cs()
957 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
958 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
959 rc = -ENOMEM; in allocate_cs()
963 cs_cmpl->hdev = hdev; in allocate_cs()
964 cs_cmpl->type = cs->type; in allocate_cs()
965 spin_lock_init(&cs_cmpl->lock); in allocate_cs()
966 cs->fence = &cs_cmpl->base_fence; in allocate_cs()
968 spin_lock(&ctx->cs_lock); in allocate_cs()
970 cs_cmpl->cs_seq = ctx->cs_sequence; in allocate_cs()
971 other = ctx->cs_pending[cs_cmpl->cs_seq & in allocate_cs()
972 (hdev->asic_prop.max_pending_cs - 1)]; in allocate_cs()
974 if (other && !completion_done(&other->completion)) { in allocate_cs()
979 * This causes a deadlock because this CS will never be in allocate_cs()
980 * completed as it depends on future CS's for completion. in allocate_cs()
982 if (other->cs_sequence == user_sequence) in allocate_cs()
983 dev_crit_ratelimited(hdev->dev, in allocate_cs()
984 "Staged CS %llu deadlock due to lack of resources", in allocate_cs()
987 dev_dbg_ratelimited(hdev->dev, in allocate_cs()
988 "Rejecting CS because of too many in-flights CS\n"); in allocate_cs()
989 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt); in allocate_cs()
990 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt); in allocate_cs()
991 rc = -EAGAIN; in allocate_cs()
996 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); in allocate_cs()
998 cs->sequence = cs_cmpl->cs_seq; in allocate_cs()
1000 ctx->cs_pending[cs_cmpl->cs_seq & in allocate_cs()
1001 (hdev->asic_prop.max_pending_cs - 1)] = in allocate_cs()
1002 &cs_cmpl->base_fence; in allocate_cs()
1003 ctx->cs_sequence++; in allocate_cs()
1005 hl_fence_get(&cs_cmpl->base_fence); in allocate_cs()
1009 spin_unlock(&ctx->cs_lock); in allocate_cs()
1011 *cs_new = cs; in allocate_cs()
1016 spin_unlock(&ctx->cs_lock); in allocate_cs()
1017 kfree(cs->jobs_in_queue_cnt); in allocate_cs()
1021 kfree(cs); in allocate_cs()
1026 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) in cs_rollback() argument
1030 staged_cs_put(hdev, cs); in cs_rollback()
1032 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_rollback()
1037 * release_reserved_encaps_signals() - release reserved encapsulated signals.
1040 * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with
1041 * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back.
1055 mgr = &ctx->sig_mgr; in release_reserved_encaps_signals()
1057 idr_for_each_entry(&mgr->handles, handle, id) in release_reserved_encaps_signals()
1058 if (handle->cs_seq == ULLONG_MAX) in release_reserved_encaps_signals()
1059 kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx); in release_reserved_encaps_signals()
1067 struct hl_cs *cs, *tmp; in hl_cs_rollback_all() local
1070 flush_workqueue(hdev->ts_free_obj_wq); in hl_cs_rollback_all()
1072 /* flush all completions before iterating over the CS mirror list in in hl_cs_rollback_all()
1075 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) in hl_cs_rollback_all()
1076 flush_workqueue(hdev->cq_wq[i]); in hl_cs_rollback_all()
1078 flush_workqueue(hdev->cs_cmplt_wq); in hl_cs_rollback_all()
1081 /* Make sure we don't have leftovers in the CS mirror list */ in hl_cs_rollback_all()
1082 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { in hl_cs_rollback_all()
1083 cs_get(cs); in hl_cs_rollback_all()
1084 cs->aborted = true; in hl_cs_rollback_all()
1085 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", in hl_cs_rollback_all()
1086 cs->ctx->asid, cs->sequence); in hl_cs_rollback_all()
1087 cs_rollback(hdev, cs); in hl_cs_rollback_all()
1088 cs_put(cs); in hl_cs_rollback_all()
1102 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in wake_pending_user_interrupt_threads()
1103 list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) { in wake_pending_user_interrupt_threads()
1104 pend->fence.error = -EIO; in wake_pending_user_interrupt_threads()
1105 complete_all(&pend->fence.completion); in wake_pending_user_interrupt_threads()
1107 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in wake_pending_user_interrupt_threads()
1109 spin_lock_irqsave(&interrupt->ts_list_lock, flags); in wake_pending_user_interrupt_threads()
1110 list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) { in wake_pending_user_interrupt_threads()
1111 list_del(&pend->list_node); in wake_pending_user_interrupt_threads()
1112 hl_mmap_mem_buf_put(pend->ts_reg_info.buf); in wake_pending_user_interrupt_threads()
1113 hl_cb_put(pend->ts_reg_info.cq_cb); in wake_pending_user_interrupt_threads()
1115 spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); in wake_pending_user_interrupt_threads()
1120 struct asic_fixed_properties *prop = &hdev->asic_prop; in hl_release_pending_user_interrupts()
1124 if (!prop->user_interrupt_count) in hl_release_pending_user_interrupts()
1134 for (i = 0 ; i < prop->user_interrupt_count ; i++) { in hl_release_pending_user_interrupts()
1135 interrupt = &hdev->user_interrupt[i]; in hl_release_pending_user_interrupts()
1139 interrupt = &hdev->common_user_cq_interrupt; in hl_release_pending_user_interrupts()
1142 interrupt = &hdev->common_decoder_interrupt; in hl_release_pending_user_interrupts()
1148 struct hl_cs *cs; in force_complete_cs() local
1150 spin_lock(&hdev->cs_mirror_lock); in force_complete_cs()
1152 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) { in force_complete_cs()
1153 cs->fence->error = -EIO; in force_complete_cs()
1154 complete_all(&cs->fence->completion); in force_complete_cs()
1157 spin_unlock(&hdev->cs_mirror_lock); in force_complete_cs()
1170 struct hl_cs *cs = job->cs; in job_wq_completion() local
1171 struct hl_device *hdev = cs->ctx->hdev; in job_wq_completion()
1179 struct hl_cs *cs = container_of(work, struct hl_cs, finish_work); in cs_completion() local
1180 struct hl_device *hdev = cs->ctx->hdev; in cs_completion()
1183 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_completion()
1190 struct hl_cs *cs; in hl_get_active_cs_num() local
1192 spin_lock(&hdev->cs_mirror_lock); in hl_get_active_cs_num()
1194 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) in hl_get_active_cs_num()
1195 if (!cs->completed) in hl_get_active_cs_num()
1198 spin_unlock(&hdev->cs_mirror_lock); in hl_get_active_cs_num()
1208 struct asic_fixed_properties *asic = &hdev->asic_prop; in validate_queue_index()
1211 /* This must be checked here to prevent out-of-bounds access to in validate_queue_index()
1214 if (chunk->queue_index >= asic->max_queues) { in validate_queue_index()
1215 dev_err(hdev->dev, "Queue index %d is invalid\n", in validate_queue_index()
1216 chunk->queue_index); in validate_queue_index()
1217 return -EINVAL; in validate_queue_index()
1220 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; in validate_queue_index()
1222 if (hw_queue_prop->type == QUEUE_TYPE_NA) { in validate_queue_index()
1223 dev_err(hdev->dev, "Queue index %d is not applicable\n", in validate_queue_index()
1224 chunk->queue_index); in validate_queue_index()
1225 return -EINVAL; in validate_queue_index()
1228 if (hw_queue_prop->binned) { in validate_queue_index()
1229 dev_err(hdev->dev, "Queue index %d is binned out\n", in validate_queue_index()
1230 chunk->queue_index); in validate_queue_index()
1231 return -EINVAL; in validate_queue_index()
1234 if (hw_queue_prop->driver_only) { in validate_queue_index()
1235 dev_err(hdev->dev, in validate_queue_index()
1237 chunk->queue_index); in validate_queue_index()
1238 return -EINVAL; in validate_queue_index()
1244 if (hw_queue_prop->type == QUEUE_TYPE_HW) { in validate_queue_index()
1245 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { in validate_queue_index()
1246 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { in validate_queue_index()
1247 dev_err(hdev->dev, in validate_queue_index()
1249 chunk->queue_index); in validate_queue_index()
1250 return -EINVAL; in validate_queue_index()
1255 if (!(hw_queue_prop->cb_alloc_flags & in validate_queue_index()
1257 dev_err(hdev->dev, in validate_queue_index()
1259 chunk->queue_index); in validate_queue_index()
1260 return -EINVAL; in validate_queue_index()
1266 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags in validate_queue_index()
1270 *queue_type = hw_queue_prop->type; in validate_queue_index()
1280 cb = hl_cb_get(mmg, chunk->cb_handle); in get_cb_from_cs_chunk()
1282 dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle); in get_cb_from_cs_chunk()
1286 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { in get_cb_from_cs_chunk()
1287 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); in get_cb_from_cs_chunk()
1291 atomic_inc(&cb->cs_cnt); in get_cb_from_cs_chunk()
1312 kref_init(&job->refcount); in hl_cs_allocate_job()
1313 job->queue_type = queue_type; in hl_cs_allocate_job()
1314 job->is_kernel_allocated_cb = is_kernel_allocated_cb; in hl_cs_allocate_job()
1317 INIT_LIST_HEAD(&job->userptr_list); in hl_cs_allocate_job()
1319 if (job->queue_type == QUEUE_TYPE_EXT) in hl_cs_allocate_job()
1320 INIT_WORK(&job->finish_work, job_wq_completion); in hl_cs_allocate_job()
1349 struct hl_device *hdev = hpriv->hdev; in hl_cs_sanity_checks()
1350 struct hl_ctx *ctx = hpriv->ctx; in hl_cs_sanity_checks()
1357 for (i = 0 ; i < sizeof(args->in.pad) ; i++) in hl_cs_sanity_checks()
1358 if (args->in.pad[i]) { in hl_cs_sanity_checks()
1359 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); in hl_cs_sanity_checks()
1360 return -EINVAL; in hl_cs_sanity_checks()
1364 return -EBUSY; in hl_cs_sanity_checks()
1366 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && in hl_cs_sanity_checks()
1367 !hdev->supports_staged_submission) { in hl_cs_sanity_checks()
1368 dev_err(hdev->dev, "staged submission not supported"); in hl_cs_sanity_checks()
1369 return -EPERM; in hl_cs_sanity_checks()
1372 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; in hl_cs_sanity_checks()
1375 dev_err(hdev->dev, in hl_cs_sanity_checks()
1376 "CS type flags are mutually exclusive, context %d\n", in hl_cs_sanity_checks()
1377 ctx->asid); in hl_cs_sanity_checks()
1378 return -EINVAL; in hl_cs_sanity_checks()
1382 num_chunks = args->in.num_chunks_execute; in hl_cs_sanity_checks()
1387 if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { in hl_cs_sanity_checks()
1388 dev_err(hdev->dev, "Sync stream CS is not supported\n"); in hl_cs_sanity_checks()
1389 return -EINVAL; in hl_cs_sanity_checks()
1394 dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); in hl_cs_sanity_checks()
1395 return -EINVAL; in hl_cs_sanity_checks()
1397 } else if (is_sync_stream && num_chunks != 1) { in hl_cs_sanity_checks()
1398 dev_err(hdev->dev, in hl_cs_sanity_checks()
1399 "Sync stream CS mandates one chunk only, context %d\n", in hl_cs_sanity_checks()
1400 ctx->asid); in hl_cs_sanity_checks()
1401 return -EINVAL; in hl_cs_sanity_checks()
1415 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1416 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1417 dev_err(hdev->dev, in hl_cs_copy_chunk_array()
1420 return -EINVAL; in hl_cs_copy_chunk_array()
1429 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in hl_cs_copy_chunk_array()
1430 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); in hl_cs_copy_chunk_array()
1431 return -ENOMEM; in hl_cs_copy_chunk_array()
1436 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1437 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1438 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); in hl_cs_copy_chunk_array()
1440 return -EFAULT; in hl_cs_copy_chunk_array()
1446 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, in cs_staged_submission() argument
1453 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST); in cs_staged_submission()
1454 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST); in cs_staged_submission()
1456 if (cs->staged_first) { in cs_staged_submission()
1457 /* Staged CS sequence is the first CS sequence */ in cs_staged_submission()
1458 INIT_LIST_HEAD(&cs->staged_cs_node); in cs_staged_submission()
1459 cs->staged_sequence = cs->sequence; in cs_staged_submission()
1461 if (cs->encaps_signals) in cs_staged_submission()
1462 cs->encaps_sig_hdl_id = encaps_signal_handle; in cs_staged_submission()
1467 cs->staged_sequence = sequence; in cs_staged_submission()
1470 /* Increment CS reference if needed */ in cs_staged_submission()
1471 staged_cs_get(hdev, cs); in cs_staged_submission()
1473 cs->staged_cs = true; in cs_staged_submission()
1482 for (i = 0; i < hdev->stream_master_qid_arr_size; i++) in get_stream_master_qid_mask()
1483 if (qid == hdev->stream_master_qid_arr[i]) in get_stream_master_qid_mask()
1495 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_default()
1498 struct hl_ctx *ctx = hpriv->ctx; in cs_ioctl_default()
1500 struct hl_cs *cs; in cs_ioctl_default() local
1506 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_default()
1511 hpriv->ctx); in cs_ioctl_default()
1521 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, in cs_ioctl_default()
1522 staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, in cs_ioctl_default()
1527 *cs_seq = cs->sequence; in cs_ioctl_default()
1529 hl_debugfs_add_cs(cs); in cs_ioctl_default()
1531 rc = cs_staged_submission(hdev, cs, user_sequence, flags, in cs_ioctl_default()
1537 * rather than the internal CS sequence in cs_ioctl_default()
1539 if (cs->staged_cs) in cs_ioctl_default()
1540 *cs_seq = cs->staged_sequence; in cs_ioctl_default()
1542 /* Validate ALL the CS chunks before submitting the CS */ in cs_ioctl_default()
1551 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1552 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1557 cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk); in cs_ioctl_default()
1560 &ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1561 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1562 rc = -EINVAL; in cs_ioctl_default()
1566 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; in cs_ioctl_default()
1575 * queues of this CS in cs_ioctl_default()
1577 if (hdev->supports_wait_for_multi_cs) in cs_ioctl_default()
1580 chunk->queue_index); in cs_ioctl_default()
1589 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_default()
1590 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_default()
1591 dev_err(hdev->dev, "Failed to allocate a new job\n"); in cs_ioctl_default()
1592 rc = -ENOMEM; in cs_ioctl_default()
1599 job->id = i + 1; in cs_ioctl_default()
1600 job->cs = cs; in cs_ioctl_default()
1601 job->user_cb = cb; in cs_ioctl_default()
1602 job->user_cb_size = chunk->cb_size; in cs_ioctl_default()
1603 job->hw_queue_id = chunk->queue_index; in cs_ioctl_default()
1605 cs->jobs_in_queue_cnt[job->hw_queue_id]++; in cs_ioctl_default()
1606 cs->jobs_cnt++; in cs_ioctl_default()
1608 list_add_tail(&job->cs_node, &cs->job_list); in cs_ioctl_default()
1611 * Increment CS reference. When CS reference is 0, CS is in cs_ioctl_default()
1616 if (cs_needs_completion(cs) && in cs_ioctl_default()
1617 (job->queue_type == QUEUE_TYPE_EXT || in cs_ioctl_default()
1618 job->queue_type == QUEUE_TYPE_HW)) in cs_ioctl_default()
1619 cs_get(cs); in cs_ioctl_default()
1625 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); in cs_ioctl_default()
1626 atomic64_inc(&cntr->parsing_drop_cnt); in cs_ioctl_default()
1627 dev_err(hdev->dev, in cs_ioctl_default()
1628 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", in cs_ioctl_default()
1629 cs->ctx->asid, cs->sequence, job->id, rc); in cs_ioctl_default()
1634 /* We allow a CS with any queue type combination as long as it does in cs_ioctl_default()
1637 if (int_queues_only && cs_needs_completion(cs)) { in cs_ioctl_default()
1638 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1639 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1640 dev_err(hdev->dev, in cs_ioctl_default()
1641 "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n", in cs_ioctl_default()
1642 cs->ctx->asid, cs->sequence); in cs_ioctl_default()
1643 rc = -EINVAL; in cs_ioctl_default()
1648 INIT_WORK(&cs->finish_work, cs_completion); in cs_ioctl_default()
1651 * store the (external/HW queues) streams used by the CS in the in cs_ioctl_default()
1652 * fence object for multi-CS completion in cs_ioctl_default()
1654 if (hdev->supports_wait_for_multi_cs) in cs_ioctl_default()
1655 cs->fence->stream_master_qid_map = stream_master_qid_map; in cs_ioctl_default()
1657 rc = hl_hw_queue_schedule_cs(cs); in cs_ioctl_default()
1659 if (rc != -EAGAIN) in cs_ioctl_default()
1660 dev_err(hdev->dev, in cs_ioctl_default()
1661 "Failed to submit CS %d.%llu to H/W queues, error %d\n", in cs_ioctl_default()
1662 cs->ctx->asid, cs->sequence, rc); in cs_ioctl_default()
1666 *signal_initial_sob_count = cs->initial_sob_count; in cs_ioctl_default()
1672 atomic_dec(&cb->cs_cnt); in cs_ioctl_default()
1675 cs_rollback(hdev, cs); in cs_ioctl_default()
1679 /* We finished with the CS in this function, so put the ref */ in cs_ioctl_default()
1680 cs_put(cs); in cs_ioctl_default()
1690 struct hl_device *hdev = hpriv->hdev; in hl_cs_ctx_switch()
1691 struct hl_ctx *ctx = hpriv->ctx; in hl_cs_ctx_switch()
1699 if (hdev->supports_ctx_switch) in hl_cs_ctx_switch()
1700 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); in hl_cs_ctx_switch()
1702 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { in hl_cs_ctx_switch()
1703 mutex_lock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1706 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); in hl_cs_ctx_switch()
1708 dev_err_ratelimited(hdev->dev, in hl_cs_ctx_switch()
1709 "Failed to switch to context %d, rejecting CS! %d\n", in hl_cs_ctx_switch()
1710 ctx->asid, rc); in hl_cs_ctx_switch()
1713 * while we want to do context-switch (-EBUSY), in hl_cs_ctx_switch()
1714 * we need to soft-reset because QMAN is in hl_cs_ctx_switch()
1720 if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) in hl_cs_ctx_switch()
1722 mutex_unlock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1727 hdev->asic_funcs->restore_phase_topology(hdev); in hl_cs_ctx_switch()
1729 chunks = (void __user *) (uintptr_t) args->in.chunks_restore; in hl_cs_ctx_switch()
1730 num_chunks = args->in.num_chunks_restore; in hl_cs_ctx_switch()
1733 dev_dbg(hdev->dev, in hl_cs_ctx_switch()
1734 "Need to run restore phase but restore CS is empty\n"); in hl_cs_ctx_switch()
1738 cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count); in hl_cs_ctx_switch()
1741 mutex_unlock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1744 dev_err(hdev->dev, in hl_cs_ctx_switch()
1745 "Failed to submit restore CS for context %d (%d)\n", in hl_cs_ctx_switch()
1746 ctx->asid, rc); in hl_cs_ctx_switch()
1755 jiffies_to_usecs(hdev->timeout_jiffies), in hl_cs_ctx_switch()
1758 dev_err(hdev->dev, in hl_cs_ctx_switch()
1759 "Restore CS for context %d failed to complete %d\n", in hl_cs_ctx_switch()
1760 ctx->asid, ret); in hl_cs_ctx_switch()
1761 rc = -ENOEXEC; in hl_cs_ctx_switch()
1766 if (hdev->supports_ctx_switch) in hl_cs_ctx_switch()
1767 ctx->thread_ctx_switch_wait_token = 1; in hl_cs_ctx_switch()
1769 } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { in hl_cs_ctx_switch()
1771 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), in hl_cs_ctx_switch()
1772 100, jiffies_to_usecs(hdev->timeout_jiffies), false); in hl_cs_ctx_switch()
1774 if (rc == -ETIMEDOUT) { in hl_cs_ctx_switch()
1775 dev_err(hdev->dev, in hl_cs_ctx_switch()
1782 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) in hl_cs_ctx_switch()
1794 * @hw_sob: the H/W SOB used in this signal CS.
1808 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in hl_cs_signal_sob_wraparound_handler()
1813 if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) { in hl_cs_signal_sob_wraparound_handler()
1826 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; in hl_cs_signal_sob_wraparound_handler()
1827 other_sob = &prop->hw_sob[other_sob_offset]; in hl_cs_signal_sob_wraparound_handler()
1829 if (kref_read(&other_sob->kref) != 1) { in hl_cs_signal_sob_wraparound_handler()
1830 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n", in hl_cs_signal_sob_wraparound_handler()
1832 return -EINVAL; in hl_cs_signal_sob_wraparound_handler()
1841 prop->next_sob_val = count + 1; in hl_cs_signal_sob_wraparound_handler()
1843 prop->next_sob_val = count; in hl_cs_signal_sob_wraparound_handler()
1846 prop->curr_sob_offset = other_sob_offset; in hl_cs_signal_sob_wraparound_handler()
1851 * for the reservation or the next signal cs. in hl_cs_signal_sob_wraparound_handler()
1852 * we do it here, and for both encaps and regular signal cs in hl_cs_signal_sob_wraparound_handler()
1856 * in addition, if we have combination of cs signal and in hl_cs_signal_sob_wraparound_handler()
1858 * no more reservations and only signal cs keep coming, in hl_cs_signal_sob_wraparound_handler()
1862 if (other_sob->need_reset) in hl_cs_signal_sob_wraparound_handler()
1867 sob->need_reset = true; in hl_cs_signal_sob_wraparound_handler()
1871 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", in hl_cs_signal_sob_wraparound_handler()
1872 prop->curr_sob_offset, q_idx); in hl_cs_signal_sob_wraparound_handler()
1874 prop->next_sob_val += count; in hl_cs_signal_sob_wraparound_handler()
1889 *signal_seq = chunk->encaps_signal_seq; in cs_ioctl_extract_signal_seq()
1893 signal_seq_arr_len = chunk->num_signal_seq_arr; in cs_ioctl_extract_signal_seq()
1896 if (signal_seq_arr_len != 1) { in cs_ioctl_extract_signal_seq()
1897 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1898 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1899 dev_err(hdev->dev, in cs_ioctl_extract_signal_seq()
1900 "Wait for signal CS supports only one signal CS seq\n"); in cs_ioctl_extract_signal_seq()
1901 return -EINVAL; in cs_ioctl_extract_signal_seq()
1912 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_extract_signal_seq()
1913 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); in cs_ioctl_extract_signal_seq()
1914 return -ENOMEM; in cs_ioctl_extract_signal_seq()
1919 u64_to_user_ptr(chunk->signal_seq_arr), in cs_ioctl_extract_signal_seq()
1921 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1922 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1923 dev_err(hdev->dev, in cs_ioctl_extract_signal_seq()
1925 rc = -EFAULT; in cs_ioctl_extract_signal_seq()
1939 struct hl_ctx *ctx, struct hl_cs *cs, in cs_ioctl_signal_wait_create_jobs() argument
1947 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_signal_wait_create_jobs()
1951 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1952 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1953 dev_err(hdev->dev, "Failed to allocate a new job\n"); in cs_ioctl_signal_wait_create_jobs()
1954 return -ENOMEM; in cs_ioctl_signal_wait_create_jobs()
1957 if (cs->type == CS_TYPE_WAIT) in cs_ioctl_signal_wait_create_jobs()
1958 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); in cs_ioctl_signal_wait_create_jobs()
1960 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); in cs_ioctl_signal_wait_create_jobs()
1964 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1965 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1967 return -EFAULT; in cs_ioctl_signal_wait_create_jobs()
1970 job->id = 0; in cs_ioctl_signal_wait_create_jobs()
1971 job->cs = cs; in cs_ioctl_signal_wait_create_jobs()
1972 job->user_cb = cb; in cs_ioctl_signal_wait_create_jobs()
1973 atomic_inc(&job->user_cb->cs_cnt); in cs_ioctl_signal_wait_create_jobs()
1974 job->user_cb_size = cb_size; in cs_ioctl_signal_wait_create_jobs()
1975 job->hw_queue_id = q_idx; in cs_ioctl_signal_wait_create_jobs()
1977 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) in cs_ioctl_signal_wait_create_jobs()
1978 && cs->encaps_signals) in cs_ioctl_signal_wait_create_jobs()
1979 job->encaps_sig_wait_offset = encaps_signal_offset; in cs_ioctl_signal_wait_create_jobs()
1982 * We call hl_cb_destroy() out of two reasons - we don't need the CB in in cs_ioctl_signal_wait_create_jobs()
1986 job->patched_cb = job->user_cb; in cs_ioctl_signal_wait_create_jobs()
1987 job->job_cb_size = job->user_cb_size; in cs_ioctl_signal_wait_create_jobs()
1988 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); in cs_ioctl_signal_wait_create_jobs()
1991 cs_get(cs); in cs_ioctl_signal_wait_create_jobs()
1993 cs->jobs_in_queue_cnt[job->hw_queue_id]++; in cs_ioctl_signal_wait_create_jobs()
1994 cs->jobs_cnt++; in cs_ioctl_signal_wait_create_jobs()
1996 list_add_tail(&job->cs_node, &cs->job_list); in cs_ioctl_signal_wait_create_jobs()
2010 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_reserve_signals()
2018 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n", in cs_ioctl_reserve_signals()
2020 rc = -EINVAL; in cs_ioctl_reserve_signals()
2024 if (q_idx >= hdev->asic_prop.max_queues) { in cs_ioctl_reserve_signals()
2025 dev_err(hdev->dev, "Queue index %d is invalid\n", in cs_ioctl_reserve_signals()
2027 rc = -EINVAL; in cs_ioctl_reserve_signals()
2031 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; in cs_ioctl_reserve_signals()
2033 if (!hw_queue_prop->supports_sync_stream) { in cs_ioctl_reserve_signals()
2034 dev_err(hdev->dev, in cs_ioctl_reserve_signals()
2037 rc = -EINVAL; in cs_ioctl_reserve_signals()
2041 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in cs_ioctl_reserve_signals()
2045 rc = -ENOMEM; in cs_ioctl_reserve_signals()
2049 handle->count = count; in cs_ioctl_reserve_signals()
2051 hl_ctx_get(hpriv->ctx); in cs_ioctl_reserve_signals()
2052 handle->ctx = hpriv->ctx; in cs_ioctl_reserve_signals()
2053 mgr = &hpriv->ctx->sig_mgr; in cs_ioctl_reserve_signals()
2055 spin_lock(&mgr->lock); in cs_ioctl_reserve_signals()
2056 hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC); in cs_ioctl_reserve_signals()
2057 spin_unlock(&mgr->lock); in cs_ioctl_reserve_signals()
2060 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n"); in cs_ioctl_reserve_signals()
2061 rc = -EINVAL; in cs_ioctl_reserve_signals()
2065 handle->id = hdl_id; in cs_ioctl_reserve_signals()
2066 handle->q_idx = q_idx; in cs_ioctl_reserve_signals()
2067 handle->hdev = hdev; in cs_ioctl_reserve_signals()
2068 kref_init(&handle->refcount); in cs_ioctl_reserve_signals()
2070 hdev->asic_funcs->hw_queues_lock(hdev); in cs_ioctl_reserve_signals()
2072 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; in cs_ioctl_reserve_signals()
2083 dev_err(hdev->dev, "Failed to switch SOB\n"); in cs_ioctl_reserve_signals()
2084 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_reserve_signals()
2085 rc = -EINVAL; in cs_ioctl_reserve_signals()
2091 handle->hw_sob = hw_sob; in cs_ioctl_reserve_signals()
2096 handle->pre_sob_val = prop->next_sob_val - handle->count; in cs_ioctl_reserve_signals()
2098 handle->cs_seq = ULLONG_MAX; in cs_ioctl_reserve_signals()
2100 *signals_count = prop->next_sob_val; in cs_ioctl_reserve_signals()
2101 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_reserve_signals()
2103 *sob_addr = handle->hw_sob->sob_addr; in cs_ioctl_reserve_signals()
2106 dev_dbg(hdev->dev, in cs_ioctl_reserve_signals()
2108 hw_sob->sob_id, handle->hw_sob->sob_addr, in cs_ioctl_reserve_signals()
2109 prop->next_sob_val - 1, q_idx, hdl_id); in cs_ioctl_reserve_signals()
2113 spin_lock(&mgr->lock); in cs_ioctl_reserve_signals()
2114 idr_remove(&mgr->handles, hdl_id); in cs_ioctl_reserve_signals()
2115 spin_unlock(&mgr->lock); in cs_ioctl_reserve_signals()
2118 hl_ctx_put(handle->ctx); in cs_ioctl_reserve_signals()
2129 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_unreserve_signals()
2135 mgr = &hpriv->ctx->sig_mgr; in cs_ioctl_unreserve_signals()
2137 spin_lock(&mgr->lock); in cs_ioctl_unreserve_signals()
2138 encaps_sig_hdl = idr_find(&mgr->handles, handle_id); in cs_ioctl_unreserve_signals()
2140 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n", in cs_ioctl_unreserve_signals()
2141 handle_id, encaps_sig_hdl->hw_sob->sob_addr, in cs_ioctl_unreserve_signals()
2142 encaps_sig_hdl->count); in cs_ioctl_unreserve_signals()
2144 hdev->asic_funcs->hw_queues_lock(hdev); in cs_ioctl_unreserve_signals()
2146 q_idx = encaps_sig_hdl->q_idx; in cs_ioctl_unreserve_signals()
2147 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in cs_ioctl_unreserve_signals()
2148 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; in cs_ioctl_unreserve_signals()
2149 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); in cs_ioctl_unreserve_signals()
2153 * between the reserve-unreserve calls or SOB switch in cs_ioctl_unreserve_signals()
2156 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count in cs_ioctl_unreserve_signals()
2157 != prop->next_sob_val || in cs_ioctl_unreserve_signals()
2158 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) { in cs_ioctl_unreserve_signals()
2159 …dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %… in cs_ioctl_unreserve_signals()
2160 encaps_sig_hdl->pre_sob_val, in cs_ioctl_unreserve_signals()
2161 (prop->next_sob_val - encaps_sig_hdl->count)); in cs_ioctl_unreserve_signals()
2163 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_unreserve_signals()
2164 rc = -EINVAL; in cs_ioctl_unreserve_signals()
2172 prop->next_sob_val -= encaps_sig_hdl->count; in cs_ioctl_unreserve_signals()
2174 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_unreserve_signals()
2179 idr_remove(&mgr->handles, handle_id); in cs_ioctl_unreserve_signals()
2182 spin_unlock(&mgr->lock); in cs_ioctl_unreserve_signals()
2183 hl_ctx_put(encaps_sig_hdl->ctx); in cs_ioctl_unreserve_signals()
2187 rc = -EINVAL; in cs_ioctl_unreserve_signals()
2188 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); in cs_ioctl_unreserve_signals()
2192 spin_unlock(&mgr->lock); in cs_ioctl_unreserve_signals()
2210 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_signal_wait()
2215 struct hl_ctx *ctx = hpriv->ctx; in cs_ioctl_signal_wait()
2217 struct hl_cs *cs; in cs_ioctl_signal_wait() local
2221 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_signal_wait()
2232 if (chunk->queue_index >= hdev->asic_prop.max_queues) { in cs_ioctl_signal_wait()
2233 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2234 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2235 dev_err(hdev->dev, "Queue index %d is invalid\n", in cs_ioctl_signal_wait()
2236 chunk->queue_index); in cs_ioctl_signal_wait()
2237 rc = -EINVAL; in cs_ioctl_signal_wait()
2241 q_idx = chunk->queue_index; in cs_ioctl_signal_wait()
2242 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; in cs_ioctl_signal_wait()
2243 q_type = hw_queue_prop->type; in cs_ioctl_signal_wait()
2245 if (!hw_queue_prop->supports_sync_stream) { in cs_ioctl_signal_wait()
2246 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2247 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2248 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2251 rc = -EINVAL; in cs_ioctl_signal_wait()
2256 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { in cs_ioctl_signal_wait()
2257 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2258 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2259 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2261 rc = -EINVAL; in cs_ioctl_signal_wait()
2265 if (!hdev->nic_ports_mask) { in cs_ioctl_signal_wait()
2266 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2267 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2268 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2270 rc = -EINVAL; in cs_ioctl_signal_wait()
2274 collective_engine_id = chunk->collective_engine_id; in cs_ioctl_signal_wait()
2289 /* check if cs sequence has encapsulated in cs_ioctl_signal_wait()
2295 spin_lock(&ctx->sig_mgr.lock); in cs_ioctl_signal_wait()
2296 idp = &ctx->sig_mgr.handles; in cs_ioctl_signal_wait()
2298 if (encaps_sig_hdl->cs_seq == signal_seq) { in cs_ioctl_signal_wait()
2300 * needed when multiple wait cs are used with offset in cs_ioctl_signal_wait()
2307 if (kref_get_unless_zero(&encaps_sig_hdl->refcount)) in cs_ioctl_signal_wait()
2312 spin_unlock(&ctx->sig_mgr.lock); in cs_ioctl_signal_wait()
2315 /* treat as signal CS already finished */ in cs_ioctl_signal_wait()
2316 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", in cs_ioctl_signal_wait()
2323 if (chunk->encaps_signal_offset > in cs_ioctl_signal_wait()
2324 encaps_sig_hdl->count) { in cs_ioctl_signal_wait()
2325 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n", in cs_ioctl_signal_wait()
2326 chunk->encaps_signal_offset, in cs_ioctl_signal_wait()
2327 encaps_sig_hdl->count); in cs_ioctl_signal_wait()
2328 rc = -EINVAL; in cs_ioctl_signal_wait()
2335 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2336 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2337 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2338 "Failed to get signal CS with seq 0x%llx\n", in cs_ioctl_signal_wait()
2345 /* signal CS already finished */ in cs_ioctl_signal_wait()
2354 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT && in cs_ioctl_signal_wait()
2357 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL && in cs_ioctl_signal_wait()
2359 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2360 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2361 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2362 "CS seq 0x%llx is not of a signal/encaps-signal CS\n", in cs_ioctl_signal_wait()
2365 rc = -EINVAL; in cs_ioctl_signal_wait()
2369 if (completion_done(&sig_fence->completion)) { in cs_ioctl_signal_wait()
2370 /* signal CS already finished */ in cs_ioctl_signal_wait()
2377 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); in cs_ioctl_signal_wait()
2386 * Save the signal CS fence for later initialization right before in cs_ioctl_signal_wait()
2387 * hanging the wait CS on the queue. in cs_ioctl_signal_wait()
2388 * for encaps signals case, we save the cs sequence and handle pointer in cs_ioctl_signal_wait()
2392 cs->signal_fence = sig_fence; in cs_ioctl_signal_wait()
2397 if (cs->encaps_signals) in cs_ioctl_signal_wait()
2398 cs->encaps_sig_hdl = encaps_sig_hdl; in cs_ioctl_signal_wait()
2401 hl_debugfs_add_cs(cs); in cs_ioctl_signal_wait()
2403 *cs_seq = cs->sequence; in cs_ioctl_signal_wait()
2406 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, in cs_ioctl_signal_wait()
2407 q_idx, chunk->encaps_signal_offset); in cs_ioctl_signal_wait()
2409 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, in cs_ioctl_signal_wait()
2410 cs, q_idx, collective_engine_id, in cs_ioctl_signal_wait()
2411 chunk->encaps_signal_offset); in cs_ioctl_signal_wait()
2413 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2414 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2415 rc = -EINVAL; in cs_ioctl_signal_wait()
2422 INIT_WORK(&cs->finish_work, cs_completion); in cs_ioctl_signal_wait()
2424 rc = hl_hw_queue_schedule_cs(cs); in cs_ioctl_signal_wait()
2426 /* In case wait cs failed here, it means the signal cs in cs_ioctl_signal_wait()
2432 else if (rc != -EAGAIN) in cs_ioctl_signal_wait()
2433 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2434 "Failed to submit CS %d.%llu to H/W queues, error %d\n", in cs_ioctl_signal_wait()
2435 ctx->asid, cs->sequence, rc); in cs_ioctl_signal_wait()
2439 *signal_sob_addr_offset = cs->sob_addr_offset; in cs_ioctl_signal_wait()
2440 *signal_initial_sob_count = cs->initial_sob_count; in cs_ioctl_signal_wait()
2448 cs_rollback(hdev, cs); in cs_ioctl_signal_wait()
2452 /* We finished with the CS in this function, so put the ref */ in cs_ioctl_signal_wait()
2453 cs_put(cs); in cs_ioctl_signal_wait()
2456 kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); in cs_ioctl_signal_wait()
2465 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_engine_cores()
2470 if (!hdev->asic_prop.supports_engine_modes) in cs_ioctl_engine_cores()
2471 return -EPERM; in cs_ioctl_engine_cores()
2473 if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { in cs_ioctl_engine_cores()
2474 dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); in cs_ioctl_engine_cores()
2475 return -EINVAL; in cs_ioctl_engine_cores()
2479 dev_err(hdev->dev, "Engine core command is invalid\n"); in cs_ioctl_engine_cores()
2480 return -EINVAL; in cs_ioctl_engine_cores()
2486 return -ENOMEM; in cs_ioctl_engine_cores()
2489 dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); in cs_ioctl_engine_cores()
2491 return -EFAULT; in cs_ioctl_engine_cores()
2494 rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); in cs_ioctl_engine_cores()
2503 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_engines()
2508 if (!hdev->asic_prop.supports_engine_modes) in cs_ioctl_engines()
2509 return -EPERM; in cs_ioctl_engines()
2512 dev_err(hdev->dev, "Engine command is invalid\n"); in cs_ioctl_engines()
2513 return -EINVAL; in cs_ioctl_engines()
2516 max_num_of_engines = hdev->asic_prop.max_num_of_engines; in cs_ioctl_engines()
2518 max_num_of_engines = hdev->asic_prop.num_engine_cores; in cs_ioctl_engines()
2521 dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines); in cs_ioctl_engines()
2522 return -EINVAL; in cs_ioctl_engines()
2528 return -ENOMEM; in cs_ioctl_engines()
2531 dev_err(hdev->dev, "Failed to copy engine-ids array from user\n"); in cs_ioctl_engines()
2533 return -EFAULT; in cs_ioctl_engines()
2536 rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command); in cs_ioctl_engines()
2544 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_flush_pci_hbw_writes()
2545 struct asic_fixed_properties *prop = &hdev->asic_prop; in cs_ioctl_flush_pci_hbw_writes()
2547 if (!prop->hbw_flush_reg) { in cs_ioctl_flush_pci_hbw_writes()
2548 dev_dbg(hdev->dev, "HBW flush is not supported\n"); in cs_ioctl_flush_pci_hbw_writes()
2549 return -EOPNOTSUPP; in cs_ioctl_flush_pci_hbw_writes()
2552 RREG32(prop->hbw_flush_reg); in cs_ioctl_flush_pci_hbw_writes()
2559 struct hl_fpriv *hpriv = file_priv->driver_priv; in hl_cs_ioctl()
2577 cs_type = hl_cs_get_cs_type(args->in.cs_flags & in hl_cs_ioctl()
2579 chunks = (void __user *) (uintptr_t) args->in.chunks_execute; in hl_cs_ioctl()
2580 num_chunks = args->in.num_chunks_execute; in hl_cs_ioctl()
2581 flags = args->in.cs_flags; in hl_cs_ioctl()
2583 /* In case this is a staged CS, user should supply the CS sequence */ in hl_cs_ioctl()
2586 cs_seq = args->in.seq; in hl_cs_ioctl()
2589 ? msecs_to_jiffies(args->in.timeout * 1000) in hl_cs_ioctl()
2590 : hpriv->hdev->timeout_jiffies; in hl_cs_ioctl()
2597 &cs_seq, args->in.cs_flags, timeout, in hl_cs_ioctl()
2602 args->in.encaps_signals_q_idx, in hl_cs_ioctl()
2603 args->in.encaps_signals_count, in hl_cs_ioctl()
2608 args->in.encaps_sig_handle_id); in hl_cs_ioctl()
2611 rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, in hl_cs_ioctl()
2612 args->in.num_engine_cores, args->in.core_command); in hl_cs_ioctl()
2615 rc = cs_ioctl_engines(hpriv, args->in.engines, in hl_cs_ioctl()
2616 args->in.num_engines, args->in.engine_command); in hl_cs_ioctl()
2623 args->in.cs_flags, in hl_cs_ioctl()
2624 args->in.encaps_sig_handle_id, in hl_cs_ioctl()
2629 if (rc != -EAGAIN) { in hl_cs_ioctl()
2634 args->out.handle_id = handle_id; in hl_cs_ioctl()
2635 args->out.sob_base_addr_offset = sob_addr; in hl_cs_ioctl()
2636 args->out.count = signals_count; in hl_cs_ioctl()
2639 args->out.sob_base_addr_offset = sob_addr; in hl_cs_ioctl()
2640 args->out.sob_count_before_submission = sob_initial_count; in hl_cs_ioctl()
2641 args->out.seq = cs_seq; in hl_cs_ioctl()
2644 args->out.sob_count_before_submission = sob_initial_count; in hl_cs_ioctl()
2645 args->out.seq = cs_seq; in hl_cs_ioctl()
2648 args->out.seq = cs_seq; in hl_cs_ioctl()
2652 args->out.status = rc; in hl_cs_ioctl()
2661 struct hl_device *hdev = ctx->hdev; in hl_wait_for_fence()
2668 if (rc == -EINVAL) in hl_wait_for_fence()
2669 dev_notice_ratelimited(hdev->dev, in hl_wait_for_fence()
2670 "Can't wait on CS %llu because current CS is at seq %llu\n", in hl_wait_for_fence()
2671 seq, ctx->cs_sequence); in hl_wait_for_fence()
2676 if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, ×tamp_kt, &error)) { in hl_wait_for_fence()
2677 dev_dbg(hdev->dev, in hl_wait_for_fence()
2678 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", in hl_wait_for_fence()
2679 seq, ctx->cs_sequence); in hl_wait_for_fence()
2684 completion_rc = 1; in hl_wait_for_fence()
2689 completion_rc = completion_done(&fence->completion); in hl_wait_for_fence()
2697 &fence->completion, timeout); in hl_wait_for_fence()
2700 error = fence->error; in hl_wait_for_fence()
2701 timestamp_kt = fence->timestamp; in hl_wait_for_fence()
2712 if (completion_rc == -ERESTARTSYS) in hl_wait_for_fence()
2714 else if (error == -ETIMEDOUT || error == -EIO) in hl_wait_for_fence()
2721 * hl_cs_poll_fences - iterate CS fences to check for CS completion
2723 * @mcs_data: multi-CS internal data
2724 * @mcs_compl: multi-CS completion structure
2728 * The function iterates on all CS sequence in the list and set bit in
2729 * completion_bitmap for each completed CS.
2732 * completion to the multi-CS context.
2737 struct hl_fence **fence_ptr = mcs_data->fence_arr; in hl_cs_poll_fences()
2738 struct hl_device *hdev = mcs_data->ctx->hdev; in hl_cs_poll_fences()
2739 int i, rc, arr_len = mcs_data->arr_len; in hl_cs_poll_fences()
2740 u64 *seq_arr = mcs_data->seq_arr; in hl_cs_poll_fences()
2747 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); in hl_cs_poll_fences()
2752 * re-initialize the completion here to handle 2 possible cases: in hl_cs_poll_fences()
2753 * 1. CS will complete the multi-CS prior clearing the completion. in which in hl_cs_poll_fences()
2754 * case the fence iteration is guaranteed to catch the CS completion. in hl_cs_poll_fences()
2755 * 2. the completion will occur after re-init of the completion. in hl_cs_poll_fences()
2758 reinit_completion(&mcs_compl->completion); in hl_cs_poll_fences()
2762 * this value is maintained- no timestamp was updated in hl_cs_poll_fences()
2771 * In order to prevent case where we wait until timeout even though a CS associated in hl_cs_poll_fences()
2772 * with the multi-CS actually completed we do things in the below order: in hl_cs_poll_fences()
2773 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way in hl_cs_poll_fences()
2774 * any CS can, potentially, complete the multi CS for the specific QID (note in hl_cs_poll_fences()
2777 * 2. only after allowing multi-CS completion for the specific QID we check whether in hl_cs_poll_fences()
2778 * the specific CS already completed (and thus the wait for completion part will in hl_cs_poll_fences()
2779 * be skipped). if the CS not completed it is guaranteed that completing CS will in hl_cs_poll_fences()
2783 mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map; in hl_cs_poll_fences()
2789 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL); in hl_cs_poll_fences()
2791 dev_err(hdev->dev, in hl_cs_poll_fences()
2792 "wait_for_fence error :%d for CS seq %llu\n", in hl_cs_poll_fences()
2799 /* CS did not finished, QID to wait on already stored */ in hl_cs_poll_fences()
2804 * returns to user indicating CS completed before it finished in hl_cs_poll_fences()
2810 if (fence && !fence->mcs_handling_done) { in hl_cs_poll_fences()
2812 * in case multi CS is completed but MCS handling not done in hl_cs_poll_fences()
2813 * we "complete" the multi CS to prevent it from waiting in hl_cs_poll_fences()
2814 * until time-out and the "multi-CS handling done" will have in hl_cs_poll_fences()
2817 complete_all(&mcs_compl->completion); in hl_cs_poll_fences()
2821 mcs_data->completion_bitmap |= BIT(i); in hl_cs_poll_fences()
2827 if (fence && mcs_data->update_ts && in hl_cs_poll_fences()
2828 (ktime_compare(fence->timestamp, first_cs_time) < 0)) in hl_cs_poll_fences()
2829 first_cs_time = fence->timestamp; in hl_cs_poll_fences()
2832 mcs_data->update_ts = false; in hl_cs_poll_fences()
2833 mcs_data->gone_cs = true; in hl_cs_poll_fences()
2837 * already gone. In this case, CS set as completed but in hl_cs_poll_fences()
2840 mcs_data->completion_bitmap |= BIT(i); in hl_cs_poll_fences()
2843 dev_err(hdev->dev, "Invalid fence status\n"); in hl_cs_poll_fences()
2844 rc = -EINVAL; in hl_cs_poll_fences()
2850 hl_fences_put(mcs_data->fence_arr, arr_len); in hl_cs_poll_fences()
2852 if (mcs_data->update_ts && in hl_cs_poll_fences()
2854 mcs_data->timestamp = ktime_to_ns(first_cs_time); in hl_cs_poll_fences()
2895 * hl_wait_multi_cs_completion_init - init completion structure
2914 mcs_compl = &hdev->multi_cs_completion[i]; in hl_wait_multi_cs_completion_init()
2915 spin_lock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2916 if (!mcs_compl->used) { in hl_wait_multi_cs_completion_init()
2917 mcs_compl->used = 1; in hl_wait_multi_cs_completion_init()
2918 mcs_compl->timestamp = 0; in hl_wait_multi_cs_completion_init()
2921 * to multi-CS CSs will be set incrementally at a later stage in hl_wait_multi_cs_completion_init()
2923 mcs_compl->stream_master_qid_map = 0; in hl_wait_multi_cs_completion_init()
2924 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2927 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2931 dev_err(hdev->dev, "no available multi-CS completion structure\n"); in hl_wait_multi_cs_completion_init()
2932 return ERR_PTR(-ENOMEM); in hl_wait_multi_cs_completion_init()
2938 * hl_wait_multi_cs_completion_fini - return completion structure and set as
2947 * free completion structure, do it under lock to be in-sync with the in hl_wait_multi_cs_completion_fini()
2950 spin_lock(&mcs_compl->lock); in hl_wait_multi_cs_completion_fini()
2951 mcs_compl->used = 0; in hl_wait_multi_cs_completion_fini()
2952 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_fini()
2956 * hl_wait_multi_cs_completion - wait for first CS to complete
2958 * @mcs_data: multi-CS internal data
2967 completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion, in hl_wait_multi_cs_completion()
2968 mcs_data->timeout_jiffies); in hl_wait_multi_cs_completion()
2972 mcs_data->timestamp = mcs_compl->timestamp; in hl_wait_multi_cs_completion()
2974 if (completion_rc == -ERESTARTSYS) in hl_wait_multi_cs_completion()
2977 mcs_data->wait_status = completion_rc; in hl_wait_multi_cs_completion()
2983 * hl_multi_cs_completion_init - init array of multi-CS completion structures
2993 mcs_cmpl = &hdev->multi_cs_completion[i]; in hl_multi_cs_completion_init()
2994 mcs_cmpl->used = 0; in hl_multi_cs_completion_init()
2995 spin_lock_init(&mcs_cmpl->lock); in hl_multi_cs_completion_init()
2996 init_completion(&mcs_cmpl->completion); in hl_multi_cs_completion_init()
3001 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
3004 * @data: pointer to multi-CS wait ioctl in/out args
3010 struct hl_device *hdev = hpriv->hdev; in hl_multi_cs_wait_ioctl()
3013 struct hl_ctx *ctx = hpriv->ctx; in hl_multi_cs_wait_ioctl()
3021 for (i = 0 ; i < sizeof(args->in.pad) ; i++) in hl_multi_cs_wait_ioctl()
3022 if (args->in.pad[i]) { in hl_multi_cs_wait_ioctl()
3023 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); in hl_multi_cs_wait_ioctl()
3024 return -EINVAL; in hl_multi_cs_wait_ioctl()
3027 if (!hdev->supports_wait_for_multi_cs) { in hl_multi_cs_wait_ioctl()
3028 dev_err(hdev->dev, "Wait for multi CS is not supported\n"); in hl_multi_cs_wait_ioctl()
3029 return -EPERM; in hl_multi_cs_wait_ioctl()
3032 seq_arr_len = args->in.seq_arr_len; in hl_multi_cs_wait_ioctl()
3035 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n", in hl_multi_cs_wait_ioctl()
3037 return -EINVAL; in hl_multi_cs_wait_ioctl()
3044 return -ENOMEM; in hl_multi_cs_wait_ioctl()
3046 /* copy CS sequence array from user */ in hl_multi_cs_wait_ioctl()
3047 seq_arr = (void __user *) (uintptr_t) args->in.seq; in hl_multi_cs_wait_ioctl()
3050 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n"); in hl_multi_cs_wait_ioctl()
3051 rc = -EFAULT; in hl_multi_cs_wait_ioctl()
3058 rc = -ENOMEM; in hl_multi_cs_wait_ioctl()
3062 /* initialize the multi-CS internal data */ in hl_multi_cs_wait_ioctl()
3070 /* wait (with timeout) for the first CS to be completed */ in hl_multi_cs_wait_ioctl()
3071 mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); in hl_multi_cs_wait_ioctl()
3078 /* poll all CS fences, extract timestamp */ in hl_multi_cs_wait_ioctl()
3082 * skip wait for CS completion when one of the below is true: in hl_multi_cs_wait_ioctl()
3083 * - an error on the poll function in hl_multi_cs_wait_ioctl()
3084 * - one or more CS in the list completed in hl_multi_cs_wait_ioctl()
3085 * - the user called ioctl with timeout 0 in hl_multi_cs_wait_ioctl()
3087 if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) in hl_multi_cs_wait_ioctl()
3096 * poll fences once again to update the CS map. in hl_multi_cs_wait_ioctl()
3107 * it got a completion) it either got completed by CS in the multi CS list in hl_multi_cs_wait_ioctl()
3109 * got completed by CS submitted to one of the shared stream master but in hl_multi_cs_wait_ioctl()
3110 * not in the multi CS list (in which case we should wait again but modify in hl_multi_cs_wait_ioctl()
3111 * the timeout and set timestamp as zero to let a CS related to the current in hl_multi_cs_wait_ioctl()
3112 * multi-CS set a new, relevant, timestamp) in hl_multi_cs_wait_ioctl()
3115 mcs_compl->timestamp = 0; in hl_multi_cs_wait_ioctl()
3128 if (rc == -ERESTARTSYS) { in hl_multi_cs_wait_ioctl()
3129 dev_err_ratelimited(hdev->dev, in hl_multi_cs_wait_ioctl()
3130 "user process got signal while waiting for Multi-CS\n"); in hl_multi_cs_wait_ioctl()
3131 rc = -EINTR; in hl_multi_cs_wait_ioctl()
3141 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; in hl_multi_cs_wait_ioctl()
3142 args->out.cs_completion_map = mcs_data.completion_bitmap; in hl_multi_cs_wait_ioctl()
3144 /* if timestamp not 0- it's valid */ in hl_multi_cs_wait_ioctl()
3146 args->out.timestamp_nsec = mcs_data.timestamp; in hl_multi_cs_wait_ioctl()
3147 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_multi_cs_wait_ioctl()
3150 /* update if some CS was gone */ in hl_multi_cs_wait_ioctl()
3152 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; in hl_multi_cs_wait_ioctl()
3154 args->out.status = HL_WAIT_CS_STATUS_BUSY; in hl_multi_cs_wait_ioctl()
3162 struct hl_device *hdev = hpriv->hdev; in hl_cs_wait_ioctl()
3165 u64 seq = args->in.seq; in hl_cs_wait_ioctl()
3169 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, ×tamp); in hl_cs_wait_ioctl()
3171 if (rc == -ERESTARTSYS) { in hl_cs_wait_ioctl()
3172 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
3173 "user process got signal while waiting for CS handle %llu\n", in hl_cs_wait_ioctl()
3175 return -EINTR; in hl_cs_wait_ioctl()
3181 if (rc == -ETIMEDOUT) { in hl_cs_wait_ioctl()
3182 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
3183 "CS %llu has timed-out while user process is waiting for it\n", in hl_cs_wait_ioctl()
3185 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; in hl_cs_wait_ioctl()
3186 } else if (rc == -EIO) { in hl_cs_wait_ioctl()
3187 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
3188 "CS %llu has been aborted while user process is waiting for it\n", in hl_cs_wait_ioctl()
3190 args->out.status = HL_WAIT_CS_STATUS_ABORTED; in hl_cs_wait_ioctl()
3196 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_cs_wait_ioctl()
3197 args->out.timestamp_nsec = timestamp; in hl_cs_wait_ioctl()
3202 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; in hl_cs_wait_ioctl()
3205 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; in hl_cs_wait_ioctl()
3209 args->out.status = HL_WAIT_CS_STATUS_BUSY; in hl_cs_wait_ioctl()
3219 record->ts_reg_info.cq_cb = cq_cb; in set_record_cq_info()
3220 record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset; in set_record_cq_info()
3221 record->cq_target_value = target_value; in set_record_cq_info()
3230 *req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + in validate_and_get_ts_record()
3232 ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + in validate_and_get_ts_record()
3233 (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); in validate_and_get_ts_record()
3239 return -EINVAL; in validate_and_get_ts_record()
3248 struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt; in unregister_timestamp_node()
3253 spin_lock_irqsave(&interrupt->ts_list_lock, flags); in unregister_timestamp_node()
3255 if (record->ts_reg_info.in_use) { in unregister_timestamp_node()
3256 record->ts_reg_info.in_use = false; in unregister_timestamp_node()
3257 list_del(&record->list_node); in unregister_timestamp_node()
3262 spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); in unregister_timestamp_node()
3266 hl_mmap_mem_buf_put(record->ts_reg_info.buf); in unregister_timestamp_node()
3267 hl_cb_put(record->ts_reg_info.cq_cb); in unregister_timestamp_node()
3276 struct hl_ts_buff *ts_buff = data->buf->private; in ts_get_and_handle_kernel_record()
3280 rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset, in ts_get_and_handle_kernel_record()
3285 /* In case the node already registered, need to unregister first then re-use */ in ts_get_and_handle_kernel_record()
3286 if (req_offset_record->ts_reg_info.in_use) { in ts_get_and_handle_kernel_record()
3292 if (data->interrupt->interrupt_id != in ts_get_and_handle_kernel_record()
3293 req_offset_record->ts_reg_info.interrupt->interrupt_id) { in ts_get_and_handle_kernel_record()
3296 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags); in ts_get_and_handle_kernel_record()
3302 spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags); in ts_get_and_handle_kernel_record()
3306 req_offset_record->ts_reg_info.in_use = true; in ts_get_and_handle_kernel_record()
3307 req_offset_record->ts_reg_info.buf = data->buf; in ts_get_and_handle_kernel_record()
3308 req_offset_record->ts_reg_info.timestamp_kernel_addr = in ts_get_and_handle_kernel_record()
3309 (u64 *) ts_buff->user_buff_address + data->ts_offset; in ts_get_and_handle_kernel_record()
3310 req_offset_record->ts_reg_info.interrupt = data->interrupt; in ts_get_and_handle_kernel_record()
3311 set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset, in ts_get_and_handle_kernel_record()
3312 data->target_value); in ts_get_and_handle_kernel_record()
3329 data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); in _hl_interrupt_ts_reg_ioctl()
3330 if (!data->cq_cb) { in _hl_interrupt_ts_reg_ioctl()
3331 rc = -EINVAL; in _hl_interrupt_ts_reg_ioctl()
3336 if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= in _hl_interrupt_ts_reg_ioctl()
3337 ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { in _hl_interrupt_ts_reg_ioctl()
3338 rc = -EINVAL; in _hl_interrupt_ts_reg_ioctl()
3342 data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle); in _hl_interrupt_ts_reg_ioctl()
3343 if (!data->buf) { in _hl_interrupt_ts_reg_ioctl()
3344 rc = -EINVAL; in _hl_interrupt_ts_reg_ioctl()
3348 spin_lock_irqsave(&data->interrupt->ts_list_lock, flags); in _hl_interrupt_ts_reg_ioctl()
3353 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); in _hl_interrupt_ts_reg_ioctl()
3360 if (*pend->cq_kernel_addr >= data->target_value) { in _hl_interrupt_ts_reg_ioctl()
3361 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); in _hl_interrupt_ts_reg_ioctl()
3363 pend->ts_reg_info.in_use = 0; in _hl_interrupt_ts_reg_ioctl()
3365 *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); in _hl_interrupt_ts_reg_ioctl()
3370 list_add_tail(&pend->list_node, &data->interrupt->ts_list_head); in _hl_interrupt_ts_reg_ioctl()
3371 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); in _hl_interrupt_ts_reg_ioctl()
3380 hl_mmap_mem_buf_put(data->buf); in _hl_interrupt_ts_reg_ioctl()
3382 hl_cb_put(data->cq_cb); in _hl_interrupt_ts_reg_ioctl()
3398 timeout = hl_usecs64_to_jiffies(data->intr_timeout_us); in _hl_interrupt_wait_ioctl()
3402 data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); in _hl_interrupt_wait_ioctl()
3403 if (!data->cq_cb) { in _hl_interrupt_wait_ioctl()
3404 rc = -EINVAL; in _hl_interrupt_wait_ioctl()
3409 if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= in _hl_interrupt_wait_ioctl()
3410 ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { in _hl_interrupt_wait_ioctl()
3411 rc = -EINVAL; in _hl_interrupt_wait_ioctl()
3417 rc = -ENOMEM; in _hl_interrupt_wait_ioctl()
3421 hl_fence_init(&pend->fence, ULONG_MAX); in _hl_interrupt_wait_ioctl()
3422 pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset; in _hl_interrupt_wait_ioctl()
3423 pend->cq_target_value = data->target_value; in _hl_interrupt_wait_ioctl()
3424 spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
3430 if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) { in _hl_interrupt_wait_ioctl()
3431 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
3433 if (*pend->cq_kernel_addr >= data->target_value) in _hl_interrupt_wait_ioctl()
3438 pend->fence.timestamp = ktime_get(); in _hl_interrupt_wait_ioctl()
3448 list_add_tail(&pend->list_node, &data->interrupt->wait_list_head); in _hl_interrupt_wait_ioctl()
3449 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
3452 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, in _hl_interrupt_wait_ioctl()
3455 if (pend->fence.error == -EIO) { in _hl_interrupt_wait_ioctl()
3456 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl()
3458 pend->fence.error); in _hl_interrupt_wait_ioctl()
3459 rc = -EIO; in _hl_interrupt_wait_ioctl()
3465 if (completion_rc == -ERESTARTSYS) { in _hl_interrupt_wait_ioctl()
3466 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl()
3468 data->interrupt->interrupt_id); in _hl_interrupt_wait_ioctl()
3469 rc = -EINTR; in _hl_interrupt_wait_ioctl()
3472 /* The wait has timed-out. We don't know anything beyond that in _hl_interrupt_wait_ioctl()
3488 spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
3489 list_del(&pend->list_node); in _hl_interrupt_wait_ioctl()
3490 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
3493 *timestamp = ktime_to_ns(pend->fence.timestamp); in _hl_interrupt_wait_ioctl()
3495 hl_cb_put(data->cq_cb); in _hl_interrupt_wait_ioctl()
3501 hl_cb_put(data->cq_cb); in _hl_interrupt_wait_ioctl()
3527 return -ENOMEM; in _hl_interrupt_wait_ioctl_user_addr()
3530 hl_fence_init(&pend->fence, ULONG_MAX); in _hl_interrupt_wait_ioctl_user_addr()
3535 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3536 list_add_tail(&pend->list_node, &interrupt->wait_list_head); in _hl_interrupt_wait_ioctl_user_addr()
3537 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3543 dev_err(hdev->dev, "Failed to copy completion value from user\n"); in _hl_interrupt_wait_ioctl_user_addr()
3544 rc = -EFAULT; in _hl_interrupt_wait_ioctl_user_addr()
3551 pend->fence.timestamp = ktime_get(); in _hl_interrupt_wait_ioctl_user_addr()
3561 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, in _hl_interrupt_wait_ioctl_user_addr()
3568 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3574 reinit_completion(&pend->fence.completion); in _hl_interrupt_wait_ioctl_user_addr()
3575 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3578 dev_err(hdev->dev, "Failed to copy completion value from user\n"); in _hl_interrupt_wait_ioctl_user_addr()
3579 rc = -EFAULT; in _hl_interrupt_wait_ioctl_user_addr()
3586 } else if (pend->fence.error) { in _hl_interrupt_wait_ioctl_user_addr()
3587 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl_user_addr()
3589 pend->fence.error); in _hl_interrupt_wait_ioctl_user_addr()
3596 } else if (completion_rc == -ERESTARTSYS) { in _hl_interrupt_wait_ioctl_user_addr()
3597 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl_user_addr()
3599 interrupt->interrupt_id); in _hl_interrupt_wait_ioctl_user_addr()
3600 rc = -EINTR; in _hl_interrupt_wait_ioctl_user_addr()
3602 /* The wait has timed-out. We don't know anything beyond that in _hl_interrupt_wait_ioctl_user_addr()
3612 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3613 list_del(&pend->list_node); in _hl_interrupt_wait_ioctl_user_addr()
3614 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl_user_addr()
3616 *timestamp = ktime_to_ns(pend->fence.timestamp); in _hl_interrupt_wait_ioctl_user_addr()
3627 struct hl_device *hdev = hpriv->hdev; in hl_interrupt_wait_ioctl()
3635 prop = &hdev->asic_prop; in hl_interrupt_wait_ioctl()
3637 if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) { in hl_interrupt_wait_ioctl()
3638 dev_err(hdev->dev, "no user interrupts allowed"); in hl_interrupt_wait_ioctl()
3639 return -EPERM; in hl_interrupt_wait_ioctl()
3642 interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); in hl_interrupt_wait_ioctl()
3644 first_interrupt = prop->first_available_user_interrupt; in hl_interrupt_wait_ioctl()
3645 last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1; in hl_interrupt_wait_ioctl()
3647 if (interrupt_id < prop->user_dec_intr_count) { in hl_interrupt_wait_ioctl()
3650 if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) { in hl_interrupt_wait_ioctl()
3651 dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed", in hl_interrupt_wait_ioctl()
3653 return -EINVAL; in hl_interrupt_wait_ioctl()
3656 interrupt = &hdev->user_interrupt[interrupt_id]; in hl_interrupt_wait_ioctl()
3660 int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count; in hl_interrupt_wait_ioctl()
3661 interrupt = &hdev->user_interrupt[int_idx]; in hl_interrupt_wait_ioctl()
3664 interrupt = &hdev->common_user_cq_interrupt; in hl_interrupt_wait_ioctl()
3666 interrupt = &hdev->common_decoder_interrupt; in hl_interrupt_wait_ioctl()
3668 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); in hl_interrupt_wait_ioctl()
3669 return -EINVAL; in hl_interrupt_wait_ioctl()
3672 if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) { in hl_interrupt_wait_ioctl()
3676 wait_intr_data.mmg = &hpriv->mem_mgr; in hl_interrupt_wait_ioctl()
3677 wait_intr_data.cq_handle = args->in.cq_counters_handle; in hl_interrupt_wait_ioctl()
3678 wait_intr_data.cq_offset = args->in.cq_counters_offset; in hl_interrupt_wait_ioctl()
3679 wait_intr_data.ts_handle = args->in.timestamp_handle; in hl_interrupt_wait_ioctl()
3680 wait_intr_data.ts_offset = args->in.timestamp_offset; in hl_interrupt_wait_ioctl()
3681 wait_intr_data.target_value = args->in.target; in hl_interrupt_wait_ioctl()
3682 wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us; in hl_interrupt_wait_ioctl()
3684 if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) { in hl_interrupt_wait_ioctl()
3687 * issues while handling the flow of re-use of the same offset. in hl_interrupt_wait_ioctl()
3689 * re-use flow might request to move ts node to another interrupt list, in hl_interrupt_wait_ioctl()
3692 mutex_lock(&hpriv->ctx->ts_reg_lock); in hl_interrupt_wait_ioctl()
3694 rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data, in hl_interrupt_wait_ioctl()
3697 mutex_unlock(&hpriv->ctx->ts_reg_lock); in hl_interrupt_wait_ioctl()
3699 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data, in hl_interrupt_wait_ioctl()
3702 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, in hl_interrupt_wait_ioctl()
3703 args->in.interrupt_timeout_us, args->in.addr, in hl_interrupt_wait_ioctl()
3704 args->in.target, interrupt, &status, in hl_interrupt_wait_ioctl()
3712 args->out.status = status; in hl_interrupt_wait_ioctl()
3715 args->out.timestamp_nsec = timestamp; in hl_interrupt_wait_ioctl()
3716 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_interrupt_wait_ioctl()
3724 struct hl_fpriv *hpriv = file_priv->driver_priv; in hl_wait_ioctl()
3725 struct hl_device *hdev = hpriv->hdev; in hl_wait_ioctl()
3727 u32 flags = args->in.flags; in hl_wait_ioctl()
3733 if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active) in hl_wait_ioctl()
3734 return -EBUSY; in hl_wait_ioctl()