diff options
Diffstat (limited to 'drivers/misc/habanalabs/common/hw_queue.c')
-rw-r--r-- | drivers/misc/habanalabs/common/hw_queue.c | 198 |
1 files changed, 155 insertions, 43 deletions
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index bcabfdbf1e01..76b7de8f1406 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -65,7 +65,7 @@ void hl_hw_queue_update_ci(struct hl_cs *cs) } /* - * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a + * hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a * H/W queue. * @hdev: pointer to habanalabs device structure * @q: pointer to habanalabs queue structure @@ -80,8 +80,8 @@ void hl_hw_queue_update_ci(struct hl_cs *cs) * This function must be called when the scheduler mutex is taken * */ -static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, - struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) +void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q, + u32 ctl, u32 len, u64 ptr) { struct hl_bd *bd; @@ -222,8 +222,8 @@ static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, * @cb_size: size of CB * @cb_ptr: pointer to CB location * - * This function sends a single CB, that must NOT generate a completion entry - * + * This function sends a single CB, that must NOT generate a completion entry. + * Sending CPU messages can be done instead via 'hl_hw_queue_submit_bd()' */ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, u32 cb_size, u64 cb_ptr) @@ -231,16 +231,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; int rc = 0; - /* - * The CPU queue is a synchronous queue with an effective depth of - * a single entry (although it is allocated with room for multiple - * entries). Therefore, there is a different lock, called - * send_cpu_message_lock, that serializes accesses to the CPU queue. - * As a result, we don't need to lock the access to the entire H/W - * queues module when submitting a JOB to the CPU queue - */ - if (q->queue_type != QUEUE_TYPE_CPU) - hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_lock(hdev); if (hdev->disabled) { rc = -EPERM; @@ -258,11 +249,10 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, goto out; } - ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); + hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); out: - if (q->queue_type != QUEUE_TYPE_CPU) - hdev->asic_funcs->hw_queues_unlock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); return rc; } @@ -328,7 +318,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job) cq->pi = hl_cq_inc_ptr(cq->pi); submit_bd: - ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); + hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr); } /* @@ -407,7 +397,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job) else ptr = (u64) (uintptr_t) job->user_cb; - ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); + hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr); } static int init_signal_cs(struct hl_device *hdev, @@ -426,8 +416,9 @@ static int init_signal_cs(struct hl_device *hdev, cs_cmpl->sob_val = prop->next_sob_val; dev_dbg(hdev->dev, - "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", - cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); + "generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n", + cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx, + cs_cmpl->cs_seq); /* we set an EB since we must make sure all oeprations are done * when sending the signal @@ -435,17 +426,37 @@ static int init_signal_cs(struct hl_device *hdev, hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, cs_cmpl->hw_sob->sob_id, 0, true); - rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1); + rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1, + false); return rc; } -static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, +void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev, + struct hl_cs *cs, struct hl_cs_job *job, + struct hl_cs_compl *cs_cmpl) +{ + struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; + + cs_cmpl->hw_sob = handle->hw_sob; + + /* Note that encaps_sig_wait_offset was validated earlier in the flow + * for offset value which exceeds the max reserved signal count. + * always decrement 1 of the offset since when the user + * set offset 1 for example he mean to wait only for the first + * signal only, which will be pre_sob_val, and if he set offset 2 + * then the value required is (pre_sob_val + 1) and so on... + */ + cs_cmpl->sob_val = handle->pre_sob_val + + (job->encaps_sig_wait_offset - 1); +} + +static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) { - struct hl_cs_compl *signal_cs_cmpl; - struct hl_sync_stream_properties *prop; struct hl_gen_wait_properties wait_prop; + struct hl_sync_stream_properties *prop; + struct hl_cs_compl *signal_cs_cmpl; u32 q_idx; q_idx = job->hw_queue_id; @@ -455,14 +466,51 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, struct hl_cs_compl, base_fence); - /* copy the SOB id and value of the signal CS */ - cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; - cs_cmpl->sob_val = signal_cs_cmpl->sob_val; + if (cs->encaps_signals) { + /* use the encaps signal handle stored earlier in the flow + * and set the SOB information from the encaps + * signals handle + */ + hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl); + + dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n", + cs->encaps_sig_hdl->q_idx, + cs->encaps_sig_hdl->cs_seq, + cs_cmpl->sob_val, + job->encaps_sig_wait_offset); + } else { + /* Copy the SOB id and value of the signal CS */ + cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; + cs_cmpl->sob_val = signal_cs_cmpl->sob_val; + } + + /* check again if the signal cs already completed. + * if yes then don't send any wait cs since the hw_sob + * could be in reset already. if signal is not completed + * then get refcount to hw_sob to prevent resetting the sob + * while wait cs is not submitted. + * note that this check is protected by two locks, + * hw queue lock and completion object lock, + * and the same completion object lock also protects + * the hw_sob reset handler function. + * The hw_queue lock prevent out of sync of hw_sob + * refcount value, changed by signal/wait flows. + */ + spin_lock(&signal_cs_cmpl->lock); + + if (completion_done(&cs->signal_fence->completion)) { + spin_unlock(&signal_cs_cmpl->lock); + return -EINVAL; + } + + kref_get(&cs_cmpl->hw_sob->kref); + + spin_unlock(&signal_cs_cmpl->lock); dev_dbg(hdev->dev, - "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", + "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n", cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, - prop->base_mon_id, q_idx); + prop->base_mon_id, q_idx, cs->sequence); wait_prop.data = (void *) job->patched_cb; wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; @@ -471,17 +519,14 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs, wait_prop.mon_id = prop->base_mon_id; wait_prop.q_idx = q_idx; wait_prop.size = 0; + hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop); - kref_get(&cs_cmpl->hw_sob->kref); - /* - * Must put the signal fence after the SOB refcnt increment so - * the SOB refcnt won't turn 0 and reset the SOB before the - * wait CS was submitted. - */ mb(); hl_fence_put(cs->signal_fence); cs->signal_fence = NULL; + + return 0; } /* @@ -506,7 +551,60 @@ static int init_signal_wait_cs(struct hl_cs *cs) if (cs->type & CS_TYPE_SIGNAL) rc = init_signal_cs(hdev, job, cs_cmpl); else if (cs->type & CS_TYPE_WAIT) - init_wait_cs(hdev, cs, job, cs_cmpl); + rc = init_wait_cs(hdev, cs, job, cs_cmpl); + + return rc; +} + +static int encaps_sig_first_staged_cs_handler + (struct hl_device *hdev, struct hl_cs *cs) +{ + struct hl_cs_compl *cs_cmpl = + container_of(cs->fence, + struct hl_cs_compl, base_fence); + struct hl_cs_encaps_sig_handle *encaps_sig_hdl; + struct hl_encaps_signals_mgr *mgr; + int rc = 0; + + mgr = &hdev->compute_ctx->sig_mgr; + + spin_lock(&mgr->lock); + encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id); + if (encaps_sig_hdl) { + /* + * Set handler CS sequence, + * the CS which contains the encapsulated signals. + */ + encaps_sig_hdl->cs_seq = cs->sequence; + /* store the handle and set encaps signal indication, + * to be used later in cs_do_release to put the last + * reference to encaps signals handlers. + */ + cs_cmpl->encaps_signals = true; + cs_cmpl->encaps_sig_hdl = encaps_sig_hdl; + + /* set hw_sob pointer in completion object + * since it's used in cs_do_release flow to put + * refcount to sob + */ + cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob; + cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val + + encaps_sig_hdl->count; + + dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n", + cs->sequence, encaps_sig_hdl->id, + encaps_sig_hdl->count, + encaps_sig_hdl->q_idx, + cs_cmpl->hw_sob->sob_id, + cs_cmpl->sob_val); + + } else { + dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n", + cs->encaps_sig_hdl_id); + rc = -EINVAL; + } + + spin_unlock(&mgr->lock); return rc; } @@ -581,14 +679,21 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) { rc = init_signal_wait_cs(cs); - if (rc) { - dev_err(hdev->dev, "Failed to submit signal cs\n"); + if (rc) goto unroll_cq_resv; - } - } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) - hdev->asic_funcs->collective_wait_init_cs(cs); + } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) { + rc = hdev->asic_funcs->collective_wait_init_cs(cs); + if (rc) + goto unroll_cq_resv; + } + if (cs->encaps_signals && cs->staged_first) { + rc = encaps_sig_first_staged_cs_handler(hdev, cs); + if (rc) + goto unroll_cq_resv; + } + spin_lock(&hdev->cs_mirror_lock); /* Verify staged CS exists and add to the staged list */ @@ -613,6 +718,11 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) } list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node); + + /* update stream map of the first CS */ + if (hdev->supports_wait_for_multi_cs) + staged_cs->fence->stream_master_qid_map |= + cs->fence->stream_master_qid_map; } list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list); @@ -834,6 +944,8 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) hw_sob = &sync_stream_prop->hw_sob[sob]; hw_sob->hdev = hdev; hw_sob->sob_id = sync_stream_prop->base_sob_id + sob; + hw_sob->sob_addr = + hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); hw_sob->q_idx = q_idx; kref_init(&hw_sob->kref); } |