Lines Matching +full:cs +full:- +full:0
1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
13 * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
23 ptr &= ((HL_QUEUE_LENGTH << 1) - 1); in hl_hw_queue_add_ptr()
28 return atomic_read(ci) & ((queue_len << 1) - 1); in queue_ci_get()
33 int delta = (q->pi - queue_ci_get(&q->ci, queue_len)); in queue_free_slots()
35 if (delta >= 0) in queue_free_slots()
36 return (queue_len - delta); in queue_free_slots()
38 return (abs(delta) - queue_len); in queue_free_slots()
41 void hl_hw_queue_update_ci(struct hl_cs *cs) in hl_hw_queue_update_ci() argument
43 struct hl_device *hdev = cs->ctx->hdev; in hl_hw_queue_update_ci()
47 if (hdev->disabled) in hl_hw_queue_update_ci()
50 q = &hdev->kernel_queues[0]; in hl_hw_queue_update_ci()
53 if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW) in hl_hw_queue_update_ci()
58 * 1. All queues of a non completion CS will never get a completion. in hl_hw_queue_update_ci()
61 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { in hl_hw_queue_update_ci()
62 if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT) in hl_hw_queue_update_ci()
63 atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); in hl_hw_queue_update_ci()
68 * hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
90 bd = q->kernel_address; in hl_hw_queue_submit_bd()
91 bd += hl_pi_2_offset(q->pi); in hl_hw_queue_submit_bd()
92 bd->ctl = cpu_to_le32(ctl); in hl_hw_queue_submit_bd()
93 bd->len = cpu_to_le32(len); in hl_hw_queue_submit_bd()
94 bd->ptr = cpu_to_le64(ptr); in hl_hw_queue_submit_bd()
96 if (q->dram_bd) in hl_hw_queue_submit_bd()
97 for (i = 0 ; i < 2 ; i++) { in hl_hw_queue_submit_bd()
98 addr = q->pq_dram_address + in hl_hw_queue_submit_bd()
99 ((hl_pi_2_offset(q->pi) * sizeof(struct hl_bd)) + (i * sizeof(u64))); in hl_hw_queue_submit_bd()
100 hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM, addr, in hl_hw_queue_submit_bd()
104 q->pi = hl_queue_inc_ptr(q->pi); in hl_hw_queue_submit_bd()
106 hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); in hl_hw_queue_submit_bd()
110 * ext_queue_sanity_checks - perform some sanity checks on external queue
120 * - Make sure we have enough space in the h/w queue
121 * - Make sure we have enough space in the completion queue
122 * - Reserve space in the completion queue (needs to be reversed if there
132 &hdev->completion_queue[q->cq_id].free_slots_cnt; in ext_queue_sanity_checks()
139 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", in ext_queue_sanity_checks()
140 q->hw_queue_id, num_of_entries); in ext_queue_sanity_checks()
141 return -EAGAIN; in ext_queue_sanity_checks()
147 * Add -1 to counter (decrement) unless counter was already 0 in ext_queue_sanity_checks()
150 * atomic_add_unless will return 0 if counter was already 0 in ext_queue_sanity_checks()
152 if (atomic_add_negative(num_of_entries * -1, free_slots)) { in ext_queue_sanity_checks()
153 dev_dbg(hdev->dev, "No space for %d on CQ %d\n", in ext_queue_sanity_checks()
154 num_of_entries, q->hw_queue_id); in ext_queue_sanity_checks()
156 return -EAGAIN; in ext_queue_sanity_checks()
160 return 0; in ext_queue_sanity_checks()
164 * int_queue_sanity_checks - perform some sanity checks on internal queue
173 * - Make sure we have enough space in the h/w queue
182 if (num_of_entries > q->int_queue_len) { in int_queue_sanity_checks()
183 dev_err(hdev->dev, in int_queue_sanity_checks()
185 q->hw_queue_id, num_of_entries); in int_queue_sanity_checks()
186 return -ENOMEM; in int_queue_sanity_checks()
190 free_slots_cnt = queue_free_slots(q, q->int_queue_len); in int_queue_sanity_checks()
193 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", in int_queue_sanity_checks()
194 q->hw_queue_id, num_of_entries); in int_queue_sanity_checks()
195 return -EAGAIN; in int_queue_sanity_checks()
198 return 0; in int_queue_sanity_checks()
202 * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
208 * more than once per CS for the same queue
220 dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", in hw_queue_sanity_checks()
221 q->hw_queue_id, num_of_entries); in hw_queue_sanity_checks()
222 return -EAGAIN; in hw_queue_sanity_checks()
225 return 0; in hw_queue_sanity_checks()
229 * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
242 struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; in hl_hw_queue_send_cb_no_cmpl()
243 int rc = 0; in hl_hw_queue_send_cb_no_cmpl()
245 hdev->asic_funcs->hw_queues_lock(hdev); in hl_hw_queue_send_cb_no_cmpl()
247 if (hdev->disabled) { in hl_hw_queue_send_cb_no_cmpl()
248 rc = -EPERM; in hl_hw_queue_send_cb_no_cmpl()
257 if (q->queue_type != QUEUE_TYPE_HW) { in hl_hw_queue_send_cb_no_cmpl()
263 hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); in hl_hw_queue_send_cb_no_cmpl()
266 hdev->asic_funcs->hw_queues_unlock(hdev); in hl_hw_queue_send_cb_no_cmpl()
272 * ext_queue_schedule_job - submit a JOB to an external queue
281 struct hl_device *hdev = job->cs->ctx->hdev; in ext_queue_schedule_job()
282 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; in ext_queue_schedule_job()
295 ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK); in ext_queue_schedule_job()
297 cb = job->patched_cb; in ext_queue_schedule_job()
298 len = job->job_cb_size; in ext_queue_schedule_job()
299 ptr = cb->bus_address; in ext_queue_schedule_job()
301 /* Skip completion flow in case this is a non completion CS */ in ext_queue_schedule_job()
302 if (!cs_needs_completion(job->cs)) in ext_queue_schedule_job()
306 ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) in ext_queue_schedule_job()
318 cq = &hdev->completion_queue[q->cq_id]; in ext_queue_schedule_job()
319 cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); in ext_queue_schedule_job()
321 hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, in ext_queue_schedule_job()
322 job->user_cb_size, in ext_queue_schedule_job()
325 q->msi_vec, in ext_queue_schedule_job()
326 job->contains_dma_pkt); in ext_queue_schedule_job()
328 q->shadow_queue[hl_pi_2_offset(q->pi)] = job; in ext_queue_schedule_job()
330 cq->pi = hl_cq_inc_ptr(cq->pi); in ext_queue_schedule_job()
337 * int_queue_schedule_job - submit a JOB to an internal queue
346 struct hl_device *hdev = job->cs->ctx->hdev; in int_queue_schedule_job()
347 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; in int_queue_schedule_job()
351 bd.ctl = 0; in int_queue_schedule_job()
352 bd.len = cpu_to_le32(job->job_cb_size); in int_queue_schedule_job()
354 if (job->is_kernel_allocated_cb) in int_queue_schedule_job()
358 bd.ptr = cpu_to_le64(job->user_cb->bus_address); in int_queue_schedule_job()
360 bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); in int_queue_schedule_job()
362 pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd); in int_queue_schedule_job()
364 q->pi++; in int_queue_schedule_job()
365 q->pi &= ((q->int_queue_len << 1) - 1); in int_queue_schedule_job()
367 hdev->asic_funcs->pqe_write(hdev, pi, &bd); in int_queue_schedule_job()
369 hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); in int_queue_schedule_job()
373 * hw_queue_schedule_job - submit a JOB to a H/W queue
382 struct hl_device *hdev = job->cs->ctx->hdev; in hw_queue_schedule_job()
383 struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; in hw_queue_schedule_job()
393 offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1); in hw_queue_schedule_job()
395 ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); in hw_queue_schedule_job()
397 len = job->job_cb_size; in hw_queue_schedule_job()
405 if (job->patched_cb) in hw_queue_schedule_job()
406 ptr = job->patched_cb->bus_address; in hw_queue_schedule_job()
407 else if (job->is_kernel_allocated_cb) in hw_queue_schedule_job()
408 ptr = job->user_cb->bus_address; in hw_queue_schedule_job()
410 ptr = (u64) (uintptr_t) job->user_cb; in hw_queue_schedule_job()
421 int rc = 0; in init_signal_cs()
423 q_idx = job->hw_queue_id; in init_signal_cs()
424 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in init_signal_cs()
425 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; in init_signal_cs()
427 cs_cmpl->hw_sob = hw_sob; in init_signal_cs()
428 cs_cmpl->sob_val = prop->next_sob_val; in init_signal_cs()
430 dev_dbg(hdev->dev, in init_signal_cs()
432 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx, in init_signal_cs()
433 cs_cmpl->cs_seq); in init_signal_cs()
438 hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, in init_signal_cs()
439 cs_cmpl->hw_sob->sob_id, 0, true); in init_signal_cs()
444 job->cs->sob_addr_offset = hw_sob->sob_addr; in init_signal_cs()
445 job->cs->initial_sob_count = prop->next_sob_val - 1; in init_signal_cs()
451 struct hl_cs *cs, struct hl_cs_job *job, in hl_hw_queue_encaps_sig_set_sob_info() argument
454 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; in hl_hw_queue_encaps_sig_set_sob_info()
455 u32 offset = 0; in hl_hw_queue_encaps_sig_set_sob_info()
457 cs_cmpl->hw_sob = handle->hw_sob; in hl_hw_queue_encaps_sig_set_sob_info()
465 * if user set wait offset to 0, then treat it as legacy wait cs, in hl_hw_queue_encaps_sig_set_sob_info()
468 if (job->encaps_sig_wait_offset) in hl_hw_queue_encaps_sig_set_sob_info()
469 offset = job->encaps_sig_wait_offset - 1; in hl_hw_queue_encaps_sig_set_sob_info()
471 cs_cmpl->sob_val = handle->pre_sob_val + offset; in hl_hw_queue_encaps_sig_set_sob_info()
474 static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, in init_wait_cs() argument
482 q_idx = job->hw_queue_id; in init_wait_cs()
483 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in init_wait_cs()
485 signal_cs_cmpl = container_of(cs->signal_fence, in init_wait_cs()
489 if (cs->encaps_signals) { in init_wait_cs()
494 hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl); in init_wait_cs()
496 …dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, of… in init_wait_cs()
497 cs->encaps_sig_hdl->q_idx, in init_wait_cs()
498 cs->encaps_sig_hdl->cs_seq, in init_wait_cs()
499 cs_cmpl->sob_val, in init_wait_cs()
500 job->encaps_sig_wait_offset); in init_wait_cs()
502 /* Copy the SOB id and value of the signal CS */ in init_wait_cs()
503 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; in init_wait_cs()
504 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; in init_wait_cs()
507 /* check again if the signal cs already completed. in init_wait_cs()
508 * if yes then don't send any wait cs since the hw_sob in init_wait_cs()
511 * while wait cs is not submitted. in init_wait_cs()
519 spin_lock(&signal_cs_cmpl->lock); in init_wait_cs()
521 if (completion_done(&cs->signal_fence->completion)) { in init_wait_cs()
522 spin_unlock(&signal_cs_cmpl->lock); in init_wait_cs()
523 return -EINVAL; in init_wait_cs()
526 kref_get(&cs_cmpl->hw_sob->kref); in init_wait_cs()
528 spin_unlock(&signal_cs_cmpl->lock); in init_wait_cs()
530 dev_dbg(hdev->dev, in init_wait_cs()
531 "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n", in init_wait_cs()
532 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, in init_wait_cs()
533 prop->base_mon_id, q_idx, cs->sequence); in init_wait_cs()
535 wait_prop.data = (void *) job->patched_cb; in init_wait_cs()
536 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; in init_wait_cs()
537 wait_prop.sob_mask = 0x1; in init_wait_cs()
538 wait_prop.sob_val = cs_cmpl->sob_val; in init_wait_cs()
539 wait_prop.mon_id = prop->base_mon_id; in init_wait_cs()
541 wait_prop.size = 0; in init_wait_cs()
543 hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop); in init_wait_cs()
546 hl_fence_put(cs->signal_fence); in init_wait_cs()
547 cs->signal_fence = NULL; in init_wait_cs()
549 return 0; in init_wait_cs()
553 * init_signal_wait_cs - initialize a signal/wait CS
554 * @cs: pointer to the signal/wait CS
558 static int init_signal_wait_cs(struct hl_cs *cs) in init_signal_wait_cs() argument
560 struct hl_ctx *ctx = cs->ctx; in init_signal_wait_cs()
561 struct hl_device *hdev = ctx->hdev; in init_signal_wait_cs()
564 container_of(cs->fence, struct hl_cs_compl, base_fence); in init_signal_wait_cs()
565 int rc = 0; in init_signal_wait_cs()
567 /* There is only one job in a signal/wait CS */ in init_signal_wait_cs()
568 job = list_first_entry(&cs->job_list, struct hl_cs_job, in init_signal_wait_cs()
571 if (cs->type & CS_TYPE_SIGNAL) in init_signal_wait_cs()
573 else if (cs->type & CS_TYPE_WAIT) in init_signal_wait_cs()
574 rc = init_wait_cs(hdev, cs, job, cs_cmpl); in init_signal_wait_cs()
580 (struct hl_device *hdev, struct hl_cs *cs) in encaps_sig_first_staged_cs_handler() argument
583 container_of(cs->fence, in encaps_sig_first_staged_cs_handler()
587 int rc = 0; in encaps_sig_first_staged_cs_handler()
589 mgr = &cs->ctx->sig_mgr; in encaps_sig_first_staged_cs_handler()
591 spin_lock(&mgr->lock); in encaps_sig_first_staged_cs_handler()
592 encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id); in encaps_sig_first_staged_cs_handler()
595 * Set handler CS sequence, in encaps_sig_first_staged_cs_handler()
596 * the CS which contains the encapsulated signals. in encaps_sig_first_staged_cs_handler()
598 encaps_sig_hdl->cs_seq = cs->sequence; in encaps_sig_first_staged_cs_handler()
603 cs_cmpl->encaps_signals = true; in encaps_sig_first_staged_cs_handler()
604 cs_cmpl->encaps_sig_hdl = encaps_sig_hdl; in encaps_sig_first_staged_cs_handler()
610 cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob; in encaps_sig_first_staged_cs_handler()
611 cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val + in encaps_sig_first_staged_cs_handler()
612 encaps_sig_hdl->count; in encaps_sig_first_staged_cs_handler()
614 …dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob… in encaps_sig_first_staged_cs_handler()
615 cs->sequence, encaps_sig_hdl->id, in encaps_sig_first_staged_cs_handler()
616 encaps_sig_hdl->count, in encaps_sig_first_staged_cs_handler()
617 encaps_sig_hdl->q_idx, in encaps_sig_first_staged_cs_handler()
618 cs_cmpl->hw_sob->sob_id, in encaps_sig_first_staged_cs_handler()
619 cs_cmpl->sob_val); in encaps_sig_first_staged_cs_handler()
622 dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n", in encaps_sig_first_staged_cs_handler()
623 cs->encaps_sig_hdl_id); in encaps_sig_first_staged_cs_handler()
624 rc = -EINVAL; in encaps_sig_first_staged_cs_handler()
627 spin_unlock(&mgr->lock); in encaps_sig_first_staged_cs_handler()
633 * hl_hw_queue_schedule_cs - schedule a command submission
634 * @cs: pointer to the CS
636 int hl_hw_queue_schedule_cs(struct hl_cs *cs) in hl_hw_queue_schedule_cs() argument
640 struct hl_ctx *ctx = cs->ctx; in hl_hw_queue_schedule_cs()
641 struct hl_device *hdev = ctx->hdev; in hl_hw_queue_schedule_cs()
644 int rc = 0, i, cq_cnt; in hl_hw_queue_schedule_cs()
648 cntr = &hdev->aggregated_cs_counters; in hl_hw_queue_schedule_cs()
650 hdev->asic_funcs->hw_queues_lock(hdev); in hl_hw_queue_schedule_cs()
653 atomic64_inc(&cntr->device_in_reset_drop_cnt); in hl_hw_queue_schedule_cs()
654 atomic64_inc(&ctx->cs_counters.device_in_reset_drop_cnt); in hl_hw_queue_schedule_cs()
655 dev_err(hdev->dev, in hl_hw_queue_schedule_cs()
656 "device is %s, CS rejected!\n", hdev->status[status]); in hl_hw_queue_schedule_cs()
657 rc = -EPERM; in hl_hw_queue_schedule_cs()
661 max_queues = hdev->asic_prop.max_queues; in hl_hw_queue_schedule_cs()
663 q = &hdev->kernel_queues[0]; in hl_hw_queue_schedule_cs()
664 for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) { in hl_hw_queue_schedule_cs()
665 if (cs->jobs_in_queue_cnt[i]) { in hl_hw_queue_schedule_cs()
666 switch (q->queue_type) { in hl_hw_queue_schedule_cs()
669 cs->jobs_in_queue_cnt[i], in hl_hw_queue_schedule_cs()
670 cs_needs_completion(cs) ? in hl_hw_queue_schedule_cs()
675 cs->jobs_in_queue_cnt[i]); in hl_hw_queue_schedule_cs()
679 cs->jobs_in_queue_cnt[i]); in hl_hw_queue_schedule_cs()
682 dev_err(hdev->dev, "Queue type %d is invalid\n", in hl_hw_queue_schedule_cs()
683 q->queue_type); in hl_hw_queue_schedule_cs()
684 rc = -EINVAL; in hl_hw_queue_schedule_cs()
690 &ctx->cs_counters.queue_full_drop_cnt); in hl_hw_queue_schedule_cs()
691 atomic64_inc(&cntr->queue_full_drop_cnt); in hl_hw_queue_schedule_cs()
695 if (q->queue_type == QUEUE_TYPE_EXT) in hl_hw_queue_schedule_cs()
700 if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) { in hl_hw_queue_schedule_cs()
701 rc = init_signal_wait_cs(cs); in hl_hw_queue_schedule_cs()
704 } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) { in hl_hw_queue_schedule_cs()
705 rc = hdev->asic_funcs->collective_wait_init_cs(cs); in hl_hw_queue_schedule_cs()
710 rc = hdev->asic_funcs->pre_schedule_cs(cs); in hl_hw_queue_schedule_cs()
712 dev_err(hdev->dev, in hl_hw_queue_schedule_cs()
713 "Failed in pre-submission operations of CS %d.%llu\n", in hl_hw_queue_schedule_cs()
714 ctx->asid, cs->sequence); in hl_hw_queue_schedule_cs()
718 hdev->shadow_cs_queue[cs->sequence & in hl_hw_queue_schedule_cs()
719 (hdev->asic_prop.max_pending_cs - 1)] = cs; in hl_hw_queue_schedule_cs()
721 if (cs->encaps_signals && cs->staged_first) { in hl_hw_queue_schedule_cs()
722 rc = encaps_sig_first_staged_cs_handler(hdev, cs); in hl_hw_queue_schedule_cs()
727 spin_lock(&hdev->cs_mirror_lock); in hl_hw_queue_schedule_cs()
729 /* Verify staged CS exists and add to the staged list */ in hl_hw_queue_schedule_cs()
730 if (cs->staged_cs && !cs->staged_first) { in hl_hw_queue_schedule_cs()
733 staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); in hl_hw_queue_schedule_cs()
735 dev_err(hdev->dev, in hl_hw_queue_schedule_cs()
737 cs->staged_sequence); in hl_hw_queue_schedule_cs()
738 rc = -EINVAL; in hl_hw_queue_schedule_cs()
743 dev_err(hdev->dev, in hl_hw_queue_schedule_cs()
745 cs->staged_sequence); in hl_hw_queue_schedule_cs()
746 rc = -EINVAL; in hl_hw_queue_schedule_cs()
750 list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node); in hl_hw_queue_schedule_cs()
752 /* update stream map of the first CS */ in hl_hw_queue_schedule_cs()
753 if (hdev->supports_wait_for_multi_cs) in hl_hw_queue_schedule_cs()
754 staged_cs->fence->stream_master_qid_map |= in hl_hw_queue_schedule_cs()
755 cs->fence->stream_master_qid_map; in hl_hw_queue_schedule_cs()
758 list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list); in hl_hw_queue_schedule_cs()
760 /* Queue TDR if the CS is the first entry and if timeout is wanted */ in hl_hw_queue_schedule_cs()
761 first_entry = list_first_entry(&hdev->cs_mirror_list, in hl_hw_queue_schedule_cs()
762 struct hl_cs, mirror_node) == cs; in hl_hw_queue_schedule_cs()
763 if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && in hl_hw_queue_schedule_cs()
764 first_entry && cs_needs_timeout(cs)) { in hl_hw_queue_schedule_cs()
765 cs->tdr_active = true; in hl_hw_queue_schedule_cs()
766 schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies); in hl_hw_queue_schedule_cs()
770 spin_unlock(&hdev->cs_mirror_lock); in hl_hw_queue_schedule_cs()
772 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in hl_hw_queue_schedule_cs()
773 switch (job->queue_type) { in hl_hw_queue_schedule_cs()
787 cs->submitted = true; in hl_hw_queue_schedule_cs()
792 spin_unlock(&hdev->cs_mirror_lock); in hl_hw_queue_schedule_cs()
794 q = &hdev->kernel_queues[0]; in hl_hw_queue_schedule_cs()
795 for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) { in hl_hw_queue_schedule_cs()
796 if ((q->queue_type == QUEUE_TYPE_EXT) && in hl_hw_queue_schedule_cs()
797 (cs->jobs_in_queue_cnt[i])) { in hl_hw_queue_schedule_cs()
799 &hdev->completion_queue[i].free_slots_cnt; in hl_hw_queue_schedule_cs()
800 atomic_add(cs->jobs_in_queue_cnt[i], free_slots); in hl_hw_queue_schedule_cs()
801 cq_cnt--; in hl_hw_queue_schedule_cs()
806 hdev->asic_funcs->hw_queues_unlock(hdev); in hl_hw_queue_schedule_cs()
812 * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
819 struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; in hl_hw_queue_inc_ci_kernel()
821 atomic_inc(&q->ci); in hl_hw_queue_inc_ci_kernel()
831 p = hl_cpu_accessible_dma_pool_alloc(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address); in ext_and_cpu_queue_init()
833 p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address, in ext_and_cpu_queue_init()
836 return -ENOMEM; in ext_and_cpu_queue_init()
838 q->kernel_address = p; in ext_and_cpu_queue_init()
840 q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL); in ext_and_cpu_queue_init()
841 if (!q->shadow_queue) { in ext_and_cpu_queue_init()
842 dev_err(hdev->dev, in ext_and_cpu_queue_init()
844 q->hw_queue_id); in ext_and_cpu_queue_init()
845 rc = -ENOMEM; in ext_and_cpu_queue_init()
850 atomic_set(&q->ci, 0); in ext_and_cpu_queue_init()
851 q->pi = 0; in ext_and_cpu_queue_init()
853 return 0; in ext_and_cpu_queue_init()
857 hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address); in ext_and_cpu_queue_init()
859 hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address, in ext_and_cpu_queue_init()
860 q->bus_address); in ext_and_cpu_queue_init()
869 p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id, in int_queue_init()
870 &q->bus_address, &q->int_queue_len); in int_queue_init()
872 dev_err(hdev->dev, in int_queue_init()
874 q->hw_queue_id); in int_queue_init()
875 return -EFAULT; in int_queue_init()
878 q->kernel_address = p; in int_queue_init()
879 q->pi = 0; in int_queue_init()
880 atomic_set(&q->ci, 0); in int_queue_init()
882 return 0; in int_queue_init()
899 p = hl_asic_dma_alloc_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, &q->bus_address, in hw_queue_init()
902 return -ENOMEM; in hw_queue_init()
904 q->kernel_address = p; in hw_queue_init()
907 atomic_set(&q->ci, 0); in hw_queue_init()
908 q->pi = 0; in hw_queue_init()
910 return 0; in hw_queue_init()
916 struct asic_fixed_properties *prop = &hdev->asic_prop; in sync_stream_queue_init()
920 sync_stream_prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in sync_stream_queue_init()
927 if (hdev->kernel_queues[q_idx].collective_mode == in sync_stream_queue_init()
929 reserved_mon_idx = hdev->collective_mon_idx; in sync_stream_queue_init()
932 sync_stream_prop->collective_mstr_mon_id[0] = in sync_stream_queue_init()
933 prop->collective_first_mon + reserved_mon_idx; in sync_stream_queue_init()
936 sync_stream_prop->collective_mstr_mon_id[1] = in sync_stream_queue_init()
937 prop->collective_first_mon + reserved_mon_idx + 1; in sync_stream_queue_init()
939 hdev->collective_mon_idx += HL_COLLECTIVE_RSVD_MSTR_MONS; in sync_stream_queue_init()
940 } else if (hdev->kernel_queues[q_idx].collective_mode == in sync_stream_queue_init()
942 reserved_mon_idx = hdev->collective_mon_idx++; in sync_stream_queue_init()
945 sync_stream_prop->collective_slave_mon_id = in sync_stream_queue_init()
946 prop->collective_first_mon + reserved_mon_idx; in sync_stream_queue_init()
949 if (!hdev->kernel_queues[q_idx].supports_sync_stream) in sync_stream_queue_init()
952 queue_idx = hdev->sync_stream_queue_idx++; in sync_stream_queue_init()
954 sync_stream_prop->base_sob_id = prop->sync_stream_first_sob + in sync_stream_queue_init()
956 sync_stream_prop->base_mon_id = prop->sync_stream_first_mon + in sync_stream_queue_init()
958 sync_stream_prop->next_sob_val = 1; in sync_stream_queue_init()
959 sync_stream_prop->curr_sob_offset = 0; in sync_stream_queue_init()
961 for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { in sync_stream_queue_init()
962 hw_sob = &sync_stream_prop->hw_sob[sob]; in sync_stream_queue_init()
963 hw_sob->hdev = hdev; in sync_stream_queue_init()
964 hw_sob->sob_id = sync_stream_prop->base_sob_id + sob; in sync_stream_queue_init()
965 hw_sob->sob_addr = in sync_stream_queue_init()
966 hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); in sync_stream_queue_init()
967 hw_sob->q_idx = q_idx; in sync_stream_queue_init()
968 kref_init(&hw_sob->kref); in sync_stream_queue_init()
975 &hdev->kernel_queues[q_idx].sync_stream_prop; in sync_stream_queue_reset()
978 * In case we got here due to a stuck CS, the refcnt might be bigger in sync_stream_queue_reset()
981 kref_init(&prop->hw_sob[prop->curr_sob_offset].kref); in sync_stream_queue_reset()
982 prop->curr_sob_offset = 0; in sync_stream_queue_reset()
983 prop->next_sob_val = 1; in sync_stream_queue_reset()
987 * queue_init - main initialization function for H/W queue object
993 * Allocate dma-able memory for the queue and initialize fields
994 * Returns 0 on success
1001 q->hw_queue_id = hw_queue_id; in queue_init()
1003 switch (q->queue_type) { in queue_init()
1017 q->valid = 0; in queue_init()
1018 return 0; in queue_init()
1020 dev_crit(hdev->dev, "wrong queue type %d during init\n", in queue_init()
1021 q->queue_type); in queue_init()
1022 rc = -EINVAL; in queue_init()
1026 sync_stream_queue_init(hdev, q->hw_queue_id); in queue_init()
1031 q->valid = 1; in queue_init()
1033 return 0; in queue_init()
1037 * hw_queue_fini - destroy queue
1046 if (!q->valid) in queue_fini()
1067 if (q->queue_type == QUEUE_TYPE_INT) in queue_fini()
1070 kfree(q->shadow_queue); in queue_fini()
1072 if (q->queue_type == QUEUE_TYPE_CPU) in queue_fini()
1073 hl_cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address); in queue_fini()
1075 hl_asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, q->kernel_address, in queue_fini()
1076 q->bus_address); in queue_fini()
1081 struct asic_fixed_properties *asic = &hdev->asic_prop; in hl_hw_queues_create()
1085 hdev->kernel_queues = kcalloc(asic->max_queues, in hl_hw_queues_create()
1086 sizeof(*hdev->kernel_queues), GFP_KERNEL); in hl_hw_queues_create()
1088 if (!hdev->kernel_queues) { in hl_hw_queues_create()
1089 dev_err(hdev->dev, "Not enough memory for H/W queues\n"); in hl_hw_queues_create()
1090 return -ENOMEM; in hl_hw_queues_create()
1094 for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues; in hl_hw_queues_create()
1095 i < asic->max_queues ; i++, q_ready_cnt++, q++) { in hl_hw_queues_create()
1097 q->queue_type = asic->hw_queues_props[i].type; in hl_hw_queues_create()
1098 q->supports_sync_stream = in hl_hw_queues_create()
1099 asic->hw_queues_props[i].supports_sync_stream; in hl_hw_queues_create()
1100 q->collective_mode = asic->hw_queues_props[i].collective_mode; in hl_hw_queues_create()
1101 q->dram_bd = asic->hw_queues_props[i].dram_bd; in hl_hw_queues_create()
1105 dev_err(hdev->dev, in hl_hw_queues_create()
1111 if (q->dram_bd) in hl_hw_queues_create()
1112 q->pq_dram_address = asic->hw_queues_props[i].q_dram_bd_address; in hl_hw_queues_create()
1115 return 0; in hl_hw_queues_create()
1118 for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) in hl_hw_queues_create()
1121 kfree(hdev->kernel_queues); in hl_hw_queues_create()
1129 u32 max_queues = hdev->asic_prop.max_queues; in hl_hw_queues_destroy()
1132 for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) in hl_hw_queues_destroy()
1135 kfree(hdev->kernel_queues); in hl_hw_queues_destroy()
1141 u32 max_queues = hdev->asic_prop.max_queues; in hl_hw_queue_reset()
1144 for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) { in hl_hw_queue_reset()
1145 if ((!q->valid) || in hl_hw_queue_reset()
1146 ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU))) in hl_hw_queue_reset()
1148 q->pi = 0; in hl_hw_queue_reset()
1149 atomic_set(&q->ci, 0); in hl_hw_queue_reset()
1151 if (q->supports_sync_stream) in hl_hw_queue_reset()
1152 sync_stream_queue_reset(hdev, q->hw_queue_id); in hl_hw_queue_reset()