summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common/command_submission.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common/command_submission.c')
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c389
1 files changed, 287 insertions, 102 deletions
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 4c8000fd246c..0a4ef13d9ac4 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/
@@ -533,8 +533,8 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
mcs_compl->stream_master_qid_map)) {
/* extract the timestamp only of first completed CS */
if (!mcs_compl->timestamp)
- mcs_compl->timestamp =
- ktime_to_ns(fence->timestamp);
+ mcs_compl->timestamp = ktime_to_ns(fence->timestamp);
+
complete_all(&mcs_compl->completion);
/*
@@ -733,6 +733,14 @@ static void cs_timedout(struct work_struct *work)
hdev = cs->ctx->hdev;
+ /* Save only the first CS timeout parameters */
+ rc = atomic_cmpxchg(&hdev->last_error.cs_write_disable, 0, 1);
+ if (!rc) {
+ hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime;
+ hdev->last_error.cs_timeout_timestamp = ktime_get();
+ hdev->last_error.cs_timeout_seq = cs->sequence;
+ }
+
switch (cs->type) {
case CS_TYPE_SIGNAL:
dev_err(hdev->dev,
@@ -767,9 +775,9 @@ static void cs_timedout(struct work_struct *work)
if (likely(!skip_reset_on_timeout)) {
if (hdev->reset_on_lockup)
- hl_device_reset(hdev, HL_RESET_TDR);
+ hl_device_reset(hdev, HL_DRV_RESET_TDR);
else
- hdev->needs_reset = true;
+ hdev->reset_info.needs_reset = true;
}
}
@@ -806,7 +814,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
cs->timeout_jiffies = timeout;
cs->skip_reset_on_timeout =
- hdev->skip_reset_on_timeout ||
+ hdev->reset_info.skip_reset_on_timeout ||
!!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
cs->submission_time_jiffies = jiffies;
INIT_LIST_HEAD(&cs->job_list);
@@ -1131,9 +1139,6 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
enum hl_cs_type cs_type;
if (!hl_device_operational(hdev, &status)) {
- dev_warn_ratelimited(hdev->dev,
- "Device is %s. Can't submit new CS\n",
- hdev->status[status]);
return -EBUSY;
}
@@ -1262,7 +1267,8 @@ static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
u32 num_chunks, u64 *cs_seq, u32 flags,
- u32 encaps_signals_handle, u32 timeout)
+ u32 encaps_signals_handle, u32 timeout,
+ u16 *signal_initial_sob_count)
{
bool staged_mid, int_queues_only = true;
struct hl_device *hdev = hpriv->hdev;
@@ -1429,6 +1435,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
goto free_cs_object;
}
+ *signal_initial_sob_count = cs->initial_sob_count;
+
rc = HL_CS_STATUS_SUCCESS;
goto put_cs;
@@ -1457,6 +1465,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
int rc = 0, do_ctx_switch;
void __user *chunks;
u32 num_chunks, tmp;
+ u16 sob_count;
int ret;
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
@@ -1497,7 +1506,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
rc = 0;
} else {
rc = cs_ioctl_default(hpriv, chunks, num_chunks,
- cs_seq, 0, 0, hdev->timeout_jiffies);
+ cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count);
}
mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1813,6 +1822,9 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
}
handle->count = count;
+
+ hl_ctx_get(hdev, hpriv->ctx);
+ handle->ctx = hpriv->ctx;
mgr = &hpriv->ctx->sig_mgr;
spin_lock(&mgr->lock);
@@ -1822,7 +1834,7 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
if (hdl_id < 0) {
dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
rc = -EINVAL;
- goto out;
+ goto put_ctx;
}
handle->id = hdl_id;
@@ -1875,7 +1887,10 @@ remove_idr:
idr_remove(&mgr->handles, hdl_id);
spin_unlock(&mgr->lock);
+put_ctx:
+ hl_ctx_put(handle->ctx);
kfree(handle);
+
out:
return rc;
}
@@ -1935,6 +1950,7 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
/* Release the id and free allocated memory of the handle */
idr_remove(&mgr->handles, handle_id);
+ hl_ctx_put(encaps_sig_hdl->ctx);
kfree(encaps_sig_hdl);
} else {
rc = -EINVAL;
@@ -1948,7 +1964,8 @@ out:
static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
void __user *chunks, u32 num_chunks,
- u64 *cs_seq, u32 flags, u32 timeout)
+ u64 *cs_seq, u32 flags, u32 timeout,
+ u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count)
{
struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
bool handle_found = false, is_wait_cs = false,
@@ -2180,6 +2197,9 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
goto free_cs_object;
}
+ *signal_sob_addr_offset = cs->sob_addr_offset;
+ *signal_initial_sob_count = cs->initial_sob_count;
+
rc = HL_CS_STATUS_SUCCESS;
if (is_wait_cs)
wait_cs_submitted = true;
@@ -2210,6 +2230,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
void __user *chunks;
u32 num_chunks, flags, timeout,
signals_count = 0, sob_addr = 0, handle_id = 0;
+ u16 sob_initial_count = 0;
int rc;
rc = hl_cs_sanity_checks(hpriv, args);
@@ -2240,7 +2261,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
case CS_TYPE_WAIT:
case CS_TYPE_COLLECTIVE_WAIT:
rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
- &cs_seq, args->in.cs_flags, timeout);
+ &cs_seq, args->in.cs_flags, timeout,
+ &sob_addr, &sob_initial_count);
break;
case CS_RESERVE_SIGNALS:
rc = cs_ioctl_reserve_signals(hpriv,
@@ -2256,20 +2278,33 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
args->in.cs_flags,
args->in.encaps_sig_handle_id,
- timeout);
+ timeout, &sob_initial_count);
break;
}
out:
if (rc != -EAGAIN) {
memset(args, 0, sizeof(*args));
- if (cs_type == CS_RESERVE_SIGNALS) {
+ switch (cs_type) {
+ case CS_RESERVE_SIGNALS:
args->out.handle_id = handle_id;
args->out.sob_base_addr_offset = sob_addr;
args->out.count = signals_count;
- } else {
+ break;
+ case CS_TYPE_SIGNAL:
+ args->out.sob_base_addr_offset = sob_addr;
+ args->out.sob_count_before_submission = sob_initial_count;
+ args->out.seq = cs_seq;
+ break;
+ case CS_TYPE_DEFAULT:
+ args->out.sob_count_before_submission = sob_initial_count;
args->out.seq = cs_seq;
+ break;
+ default:
+ args->out.seq = cs_seq;
+ break;
}
+
args->out.status = rc;
}
@@ -2334,16 +2369,18 @@ static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence
* hl_cs_poll_fences - iterate CS fences to check for CS completion
*
* @mcs_data: multi-CS internal data
+ * @mcs_compl: multi-CS completion structure
*
* @return 0 on success, otherwise non 0 error code
*
* The function iterates on all CS sequence in the list and set bit in
* completion_bitmap for each completed CS.
- * while iterating, the function can extracts the stream map to be later
- * used by the waiting function.
- * this function shall be called after taking context ref
+ * While iterating, the function sets the stream map of each fence in the fence
+ * array in the completion QID stream map to be used by CSs to perform
+ * completion to the multi-CS context.
+ * This function shall be called after taking context ref
*/
-static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
+static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl)
{
struct hl_fence **fence_ptr = mcs_data->fence_arr;
struct hl_device *hdev = mcs_data->ctx->hdev;
@@ -2360,6 +2397,15 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
return rc;
/*
+ * re-initialize the completion here to handle 2 possible cases:
+ * 1. CS will complete the multi-CS prior clearing the completion. in which
+ * case the fence iteration is guaranteed to catch the CS completion.
+ * 2. the completion will occur after re-init of the completion.
+ * in which case we will wake up immediately in wait_for_completion.
+ */
+ reinit_completion(&mcs_compl->completion);
+
+ /*
* set to maximum time to verify timestamp is valid: if at the end
* this value is maintained- no timestamp was updated
*/
@@ -2370,6 +2416,21 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
struct hl_fence *fence = *fence_ptr;
/*
+ * In order to prevent case where we wait until timeout even though a CS associated
+ * with the multi-CS actually completed we do things in the below order:
+ * 1. for each fence set it's QID map in the multi-CS completion QID map. This way
+ * any CS can, potentially, complete the multi CS for the specific QID (note
+ * that once completion is initialized, calling complete* and then wait on the
+ * completion will cause it to return at once)
+ * 2. only after allowing multi-CS completion for the specific QID we check whether
+ * the specific CS already completed (and thus the wait for completion part will
+ * be skipped). if the CS not completed it is guaranteed that completing CS will
+ * wake up the completion.
+ */
+ if (fence)
+ mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map;
+
+ /*
* function won't sleep as it is called with timeout 0 (i.e.
* poll the fence)
*/
@@ -2384,9 +2445,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
switch (status) {
case CS_WAIT_STATUS_BUSY:
- /* CS did not finished, keep waiting on its QID*/
- mcs_data->stream_master_qid_map |=
- fence->stream_master_qid_map;
+ /* CS did not finished, QID to wait on already stored */
break;
case CS_WAIT_STATUS_COMPLETED:
/*
@@ -2394,9 +2453,19 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
* returns to user indicating CS completed before it finished
* all of its mcs handling, to avoid race the next time the
* user waits for mcs.
+ * note: when reaching this case fence is definitely not NULL
+ * but NULL check was added to overcome static analysis
*/
- if (!fence->mcs_handling_done)
+ if (fence && !fence->mcs_handling_done) {
+ /*
+ * in case multi CS is completed but MCS handling not done
+ * we "complete" the multi CS to prevent it from waiting
+ * until time-out and the "multi-CS handling done" will have
+ * another chance at the next iteration
+ */
+ complete_all(&mcs_compl->completion);
break;
+ }
mcs_data->completion_bitmap |= BIT(i);
/*
@@ -2456,6 +2525,21 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
return rc;
}
+static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
+{
+ if (usecs <= U32_MAX)
+ return usecs_to_jiffies(usecs);
+
+ /*
+ * If the value in nanoseconds is larger than 64 bit, use the largest
+ * 64 bit value.
+ */
+ if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
+ return nsecs_to_jiffies(U64_MAX);
+
+ return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
+}
+
/*
* hl_wait_multi_cs_completion_init - init completion structure
*
@@ -2469,9 +2553,7 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
* the function gets the first available completion (by marking it "used")
* and initialize its values.
*/
-static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
- struct hl_device *hdev,
- u8 stream_master_bitmap)
+static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev)
{
struct multi_cs_completion *mcs_compl;
int i;
@@ -2483,8 +2565,11 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
if (!mcs_compl->used) {
mcs_compl->used = 1;
mcs_compl->timestamp = 0;
- mcs_compl->stream_master_qid_map = stream_master_bitmap;
- reinit_completion(&mcs_compl->completion);
+ /*
+ * init QID map to 0 to avoid completion by CSs. the actual QID map
+ * to multi-CS CSs will be set incrementally at a later stage
+ */
+ mcs_compl->stream_master_qid_map = 0;
spin_unlock(&mcs_compl->lock);
break;
}
@@ -2492,8 +2577,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
}
if (i == MULTI_CS_MAX_USER_CTX) {
- dev_err(hdev->dev,
- "no available multi-CS completion structure\n");
+ dev_err(hdev->dev, "no available multi-CS completion structure\n");
return ERR_PTR(-ENOMEM);
}
return mcs_compl;
@@ -2524,27 +2608,18 @@ static void hl_wait_multi_cs_completion_fini(
*
* @return 0 on success, otherwise non 0 error code
*/
-static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data)
+static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
+ struct multi_cs_completion *mcs_compl)
{
- struct hl_device *hdev = mcs_data->ctx->hdev;
- struct multi_cs_completion *mcs_compl;
long completion_rc;
- mcs_compl = hl_wait_multi_cs_completion_init(hdev,
- mcs_data->stream_master_qid_map);
- if (IS_ERR(mcs_compl))
- return PTR_ERR(mcs_compl);
-
- completion_rc = wait_for_completion_interruptible_timeout(
- &mcs_compl->completion,
- usecs_to_jiffies(mcs_data->timeout_us));
+ completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
+ mcs_data->timeout_jiffies);
/* update timestamp */
if (completion_rc > 0)
mcs_data->timestamp = mcs_compl->timestamp;
- hl_wait_multi_cs_completion_fini(mcs_compl);
-
mcs_data->wait_status = completion_rc;
return 0;
@@ -2577,6 +2652,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev)
*/
static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
+ struct multi_cs_completion *mcs_compl;
struct hl_device *hdev = hpriv->hdev;
struct multi_cs_data mcs_data = {0};
union hl_wait_cs_args *args = data;
@@ -2631,9 +2707,17 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
hl_ctx_get(hdev, ctx);
+ /* wait (with timeout) for the first CS to be completed */
+ mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
+ mcs_compl = hl_wait_multi_cs_completion_init(hdev);
+ if (IS_ERR(mcs_compl)) {
+ rc = PTR_ERR(mcs_compl);
+ goto put_ctx;
+ }
+
/* poll all CS fences, extract timestamp */
mcs_data.update_ts = true;
- rc = hl_cs_poll_fences(&mcs_data);
+ rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
/*
* skip wait for CS completion when one of the below is true:
* - an error on the poll function
@@ -2641,34 +2725,39 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
* - the user called ioctl with timeout 0
*/
if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
- goto put_ctx;
+ goto completion_fini;
- /* wait (with timeout) for the first CS to be completed */
- mcs_data.timeout_us = args->in.timeout_us;
- rc = hl_wait_multi_cs_completion(&mcs_data);
- if (rc)
- goto put_ctx;
+ while (true) {
+ rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
+ if (rc || (mcs_data.wait_status == 0))
+ break;
- if (mcs_data.wait_status > 0) {
/*
* poll fences once again to update the CS map.
* no timestamp should be updated this time.
*/
mcs_data.update_ts = false;
- rc = hl_cs_poll_fences(&mcs_data);
+ rc = hl_cs_poll_fences(&mcs_data, mcs_compl);
+
+ if (mcs_data.completion_bitmap)
+ break;
/*
* if hl_wait_multi_cs_completion returned before timeout (i.e.
- * it got a completion) we expect to see at least one CS
- * completed after the poll function.
+ * it got a completion) it either got completed by CS in the multi CS list
+ * (in which case the indication will be non empty completion_bitmap) or it
+ * got completed by CS submitted to one of the shared stream master but
+ * not in the multi CS list (in which case we should wait again but modify
+ * the timeout and set timestamp as zero to let a CS related to the current
+ * multi-CS set a new, relevant, timestamp)
*/
- if (!mcs_data.completion_bitmap) {
- dev_warn_ratelimited(hdev->dev,
- "Multi-CS got completion on wait but no CS completed\n");
- rc = -EFAULT;
- }
+ mcs_data.timeout_jiffies = mcs_data.wait_status;
+ mcs_compl->timestamp = 0;
}
+completion_fini:
+ hl_wait_multi_cs_completion_fini(mcs_compl);
+
put_ctx:
hl_ctx_put(ctx);
kfree(fence_arr);
@@ -2699,7 +2788,7 @@ free_seq_arr:
}
/* update if some CS was gone */
- if (mcs_data.timestamp)
+ if (!mcs_data.timestamp)
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
} else {
args->out.status = HL_WAIT_CS_STATUS_BUSY;
@@ -2766,37 +2855,129 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
}
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
- u32 timeout_us, u64 user_address,
- u64 target_value, u16 interrupt_offset,
- enum hl_cs_wait_status *status,
+ struct hl_cb_mgr *cb_mgr, u64 timeout_us,
+ u64 cq_counters_handle, u64 cq_counters_offset,
+ u64 target_value, struct hl_user_interrupt *interrupt,
+ u32 *status,
u64 *timestamp)
{
struct hl_user_pending_interrupt *pend;
- struct hl_user_interrupt *interrupt;
unsigned long timeout, flags;
- u64 completion_value;
long completion_rc;
+ struct hl_cb *cb;
int rc = 0;
+ u32 handle;
- if (timeout_us == U32_MAX)
- timeout = timeout_us;
- else
- timeout = usecs_to_jiffies(timeout_us);
+ timeout = hl_usecs64_to_jiffies(timeout_us);
hl_ctx_get(hdev, ctx);
- pend = kmalloc(sizeof(*pend), GFP_KERNEL);
+ cq_counters_handle >>= PAGE_SHIFT;
+ handle = (u32) cq_counters_handle;
+
+ cb = hl_cb_get(hdev, cb_mgr, handle);
+ if (!cb) {
+ hl_ctx_put(ctx);
+ return -EINVAL;
+ }
+
+ pend = kzalloc(sizeof(*pend), GFP_KERNEL);
if (!pend) {
+ hl_cb_put(cb);
hl_ctx_put(ctx);
return -ENOMEM;
}
hl_fence_init(&pend->fence, ULONG_MAX);
- if (interrupt_offset == HL_COMMON_USER_INTERRUPT_ID)
- interrupt = &hdev->common_user_interrupt;
- else
- interrupt = &hdev->user_interrupt[interrupt_offset];
+ pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
+ pend->cq_target_value = target_value;
+
+ /* We check for completion value as interrupt could have been received
+ * before we added the node to the wait list
+ */
+ if (*pend->cq_kernel_addr >= target_value) {
+ *status = HL_WAIT_CS_STATUS_COMPLETED;
+ /* There was no interrupt, we assume the completion is now. */
+ pend->fence.timestamp = ktime_get();
+ }
+
+ if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
+ goto set_timestamp;
+
+ /* Add pending user interrupt to relevant list for the interrupt
+ * handler to monitor
+ */
+ spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+ list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
+ spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+ /* Wait for interrupt handler to signal completion */
+ completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
+ timeout);
+ if (completion_rc > 0) {
+ *status = HL_WAIT_CS_STATUS_COMPLETED;
+ } else {
+ if (completion_rc == -ERESTARTSYS) {
+ dev_err_ratelimited(hdev->dev,
+ "user process got signal while waiting for interrupt ID %d\n",
+ interrupt->interrupt_id);
+ rc = -EINTR;
+ *status = HL_WAIT_CS_STATUS_ABORTED;
+ } else {
+ if (pend->fence.error == -EIO) {
+ dev_err_ratelimited(hdev->dev,
+ "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
+ pend->fence.error);
+ rc = -EIO;
+ *status = HL_WAIT_CS_STATUS_ABORTED;
+ } else {
+ dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n",
+ interrupt->interrupt_id);
+ rc = -ETIMEDOUT;
+ }
+ *status = HL_WAIT_CS_STATUS_BUSY;
+ }
+ }
+
+ spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+ list_del(&pend->wait_list_node);
+ spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
+
+set_timestamp:
+ *timestamp = ktime_to_ns(pend->fence.timestamp);
+
+ kfree(pend);
+ hl_cb_put(cb);
+ hl_ctx_put(ctx);
+
+ return rc;
+}
+
+static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx,
+ u64 timeout_us, u64 user_address,
+ u64 target_value, struct hl_user_interrupt *interrupt,
+
+ u32 *status,
+ u64 *timestamp)
+{
+ struct hl_user_pending_interrupt *pend;
+ unsigned long timeout, flags;
+ u64 completion_value;
+ long completion_rc;
+ int rc = 0;
+
+ timeout = hl_usecs64_to_jiffies(timeout_us);
+
+ hl_ctx_get(hdev, ctx);
+
+ pend = kzalloc(sizeof(*pend), GFP_KERNEL);
+ if (!pend) {
+ hl_ctx_put(ctx);
+ return -ENOMEM;
+ }
+
+ hl_fence_init(&pend->fence, ULONG_MAX);
/* Add pending user interrupt to relevant list for the interrupt
* handler to monitor
@@ -2815,13 +2996,14 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
}
if (completion_value >= target_value) {
- *status = CS_WAIT_STATUS_COMPLETED;
+ *status = HL_WAIT_CS_STATUS_COMPLETED;
/* There was no interrupt, we assume the completion is now. */
pend->fence.timestamp = ktime_get();
- } else
- *status = CS_WAIT_STATUS_BUSY;
+ } else {
+ *status = HL_WAIT_CS_STATUS_BUSY;
+ }
- if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
+ if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED))
goto remove_pending_user_interrupt;
wait_again:
@@ -2850,7 +3032,13 @@ wait_again:
}
if (completion_value >= target_value) {
- *status = CS_WAIT_STATUS_COMPLETED;
+ *status = HL_WAIT_CS_STATUS_COMPLETED;
+ } else if (pend->fence.error) {
+ dev_err_ratelimited(hdev->dev,
+ "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n",
+ pend->fence.error);
+ /* set the command completion status as ABORTED */
+ *status = HL_WAIT_CS_STATUS_ABORTED;
} else {
timeout = completion_rc;
goto wait_again;
@@ -2861,7 +3049,7 @@ wait_again:
interrupt->interrupt_id);
rc = -EINTR;
} else {
- *status = CS_WAIT_STATUS_BUSY;
+ *status = HL_WAIT_CS_STATUS_BUSY;
}
remove_pending_user_interrupt:
@@ -2879,11 +3067,12 @@ remove_pending_user_interrupt:
static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{
- u16 interrupt_id, interrupt_offset, first_interrupt, last_interrupt;
+ u16 interrupt_id, first_interrupt, last_interrupt;
struct hl_device *hdev = hpriv->hdev;
struct asic_fixed_properties *prop;
+ struct hl_user_interrupt *interrupt;
union hl_wait_cs_args *args = data;
- enum hl_cs_wait_status status;
+ u32 status = HL_WAIT_CS_STATUS_BUSY;
u64 timestamp;
int rc;
@@ -2894,8 +3083,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return -EPERM;
}
- interrupt_id =
- FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
+ interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
first_interrupt = prop->first_available_user_msix_interrupt;
last_interrupt = prop->first_available_user_msix_interrupt +
@@ -2908,15 +3096,21 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
}
if (interrupt_id == HL_COMMON_USER_INTERRUPT_ID)
- interrupt_offset = HL_COMMON_USER_INTERRUPT_ID;
+ interrupt = &hdev->common_user_interrupt;
else
- interrupt_offset = interrupt_id - first_interrupt;
+ interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];
- rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
+ if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
+ rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
+ args->in.interrupt_timeout_us, args->in.cq_counters_handle,
+ args->in.cq_counters_offset,
+ args->in.target, interrupt, &status,
+ &timestamp);
+ else
+ rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
args->in.interrupt_timeout_us, args->in.addr,
- args->in.target, interrupt_offset, &status,
+ args->in.target, interrupt, &status,
&timestamp);
-
if (rc) {
if (rc != -EINTR)
dev_err_ratelimited(hdev->dev,
@@ -2926,22 +3120,13 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
}
memset(args, 0, sizeof(*args));
+ args->out.status = status;
if (timestamp) {
args->out.timestamp_nsec = timestamp;
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
}
- switch (status) {
- case CS_WAIT_STATUS_COMPLETED:
- args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
- break;
- case CS_WAIT_STATUS_BUSY:
- default:
- args->out.status = HL_WAIT_CS_STATUS_BUSY;
- break;
- }
-
return 0;
}
@@ -2955,7 +3140,7 @@ int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
* user interrupt
*/
if (!hl_device_operational(hpriv->hdev, NULL))
- return -EPERM;
+ return -EBUSY;
if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
rc = hl_interrupt_wait_ioctl(hpriv, data);