diff options
Diffstat (limited to 'drivers/accel/habanalabs/common/command_submission.c')
-rw-r--r-- | drivers/accel/habanalabs/common/command_submission.c | 130 |
1 files changed, 100 insertions, 30 deletions
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index 8270db0a72a2..af9d2e22c6e7 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -14,10 +14,10 @@ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \ - HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) + HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) -#define MAX_TS_ITER_NUM 10 +#define MAX_TS_ITER_NUM 100 /** * enum hl_cs_wait_status - cs wait status @@ -657,7 +657,7 @@ static inline void cs_release_sob_reset_handler(struct hl_device *hdev, /* * we get refcount upon reservation of signals or signal/wait cs for the * hw_sob object, and need to put it when the first staged cs - * (which cotains the encaps signals) or cs signal/wait is completed. + * (which contains the encaps signals) or cs signal/wait is completed. */ if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || (hl_cs_cmpl->type == CS_TYPE_WAIT) || @@ -1082,9 +1082,8 @@ static void wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) { struct hl_user_pending_interrupt *pend, *temp; - unsigned long flags; - spin_lock_irqsave(&interrupt->wait_list_lock, flags); + spin_lock(&interrupt->wait_list_lock); list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) { if (pend->ts_reg_info.buf) { list_del(&pend->wait_list_node); @@ -1095,7 +1094,7 @@ wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) complete_all(&pend->fence.completion); } } - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + spin_unlock(&interrupt->wait_list_lock); } void hl_release_pending_user_interrupts(struct hl_device *hdev) @@ -1168,6 +1167,22 @@ static void cs_completion(struct work_struct *work) hl_complete_job(hdev, job); } +u32 hl_get_active_cs_num(struct hl_device *hdev) +{ + u32 active_cs_num = 0; + struct hl_cs *cs; + + spin_lock(&hdev->cs_mirror_lock); + + list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) + if (!cs->completed) + active_cs_num++; + + spin_unlock(&hdev->cs_mirror_lock); + + return active_cs_num; +} + static int validate_queue_index(struct hl_device *hdev, struct hl_cs_chunk *chunk, enum hl_queue_type *queue_type, @@ -1304,6 +1319,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) return CS_UNRESERVE_SIGNALS; else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) return CS_TYPE_ENGINE_CORE; + else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND) + return CS_TYPE_ENGINES; else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) return CS_TYPE_FLUSH_PCI_HBW_WRITES; else @@ -2429,10 +2446,13 @@ out: static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, u32 num_engine_cores, u32 core_command) { - int rc; struct hl_device *hdev = hpriv->hdev; void __user *engine_cores_arr; u32 *cores; + int rc; + + if (!hdev->asic_prop.supports_engine_modes) + return -EPERM; if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); @@ -2461,6 +2481,48 @@ static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, return rc; } +static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr, + u32 num_engines, enum hl_engine_command command) +{ + struct hl_device *hdev = hpriv->hdev; + u32 *engines, max_num_of_engines; + void __user *engines_arr; + int rc; + + if (!hdev->asic_prop.supports_engine_modes) + return -EPERM; + + if (command >= HL_ENGINE_COMMAND_MAX) { + dev_err(hdev->dev, "Engine command is invalid\n"); + return -EINVAL; + } + + max_num_of_engines = hdev->asic_prop.max_num_of_engines; + if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT) + max_num_of_engines = hdev->asic_prop.num_engine_cores; + + if (!num_engines || num_engines > max_num_of_engines) { + dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines); + return -EINVAL; + } + + engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr; + engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL); + if (!engines) + return -ENOMEM; + + if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) { + dev_err(hdev->dev, "Failed to copy engine-ids array from user\n"); + kfree(engines); + return -EFAULT; + } + + rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command); + kfree(engines); + + return rc; +} + static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) { struct hl_device *hdev = hpriv->hdev; @@ -2532,6 +2594,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, args->in.num_engine_cores, args->in.core_command); break; + case CS_TYPE_ENGINES: + rc = cs_ioctl_engines(hpriv, args->in.engines, + args->in.num_engines, args->in.engine_command); + break; case CS_TYPE_FLUSH_PCI_HBW_WRITES: rc = cs_ioctl_flush_pci_hbw_writes(hpriv); break; @@ -3143,8 +3209,9 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, struct hl_user_pending_interrupt *cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); - unsigned long flags, iter_counter = 0; + unsigned long iter_counter = 0; u64 current_cq_counter; + ktime_t timestamp; /* Validate ts_offset not exceeding last max */ if (requested_offset_record >= cb_last) { @@ -3153,8 +3220,10 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf, return -EINVAL; } + timestamp = ktime_get(); + start_over: - spin_lock_irqsave(wait_list_lock, flags); + spin_lock(wait_list_lock); /* Unregister only if we didn't reach the target value * since in this case there will be no handling in irq context @@ -3165,7 +3234,7 @@ start_over: current_cq_counter = *requested_offset_record->cq_kernel_addr; if (current_cq_counter < requested_offset_record->cq_target_value) { list_del(&requested_offset_record->wait_list_node); - spin_unlock_irqrestore(wait_list_lock, flags); + spin_unlock(wait_list_lock); hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf); hl_cb_put(requested_offset_record->ts_reg_info.cq_cb); @@ -3176,13 +3245,14 @@ start_over: dev_dbg(buf->mmg->dev, "ts node in middle of irq handling\n"); - /* irq handling in the middle give it time to finish */ - spin_unlock_irqrestore(wait_list_lock, flags); - usleep_range(1, 10); + /* irq thread handling in the middle give it time to finish */ + spin_unlock(wait_list_lock); + usleep_range(100, 1000); if (++iter_counter == MAX_TS_ITER_NUM) { dev_err(buf->mmg->dev, - "handling registration interrupt took too long!!\n"); - return -EINVAL; + "Timestamp offset processing reached timeout of %lld ms\n", + ktime_ms_delta(ktime_get(), timestamp)); + return -EAGAIN; } goto start_over; @@ -3197,7 +3267,7 @@ start_over: (u64 *) cq_cb->kernel_address + cq_offset; requested_offset_record->cq_target_value = target_value; - spin_unlock_irqrestore(wait_list_lock, flags); + spin_unlock(wait_list_lock); } *pend = requested_offset_record; @@ -3217,7 +3287,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, struct hl_user_pending_interrupt *pend; struct hl_mmap_mem_buf *buf; struct hl_cb *cq_cb; - unsigned long timeout, flags; + unsigned long timeout; long completion_rc; int rc = 0; @@ -3264,7 +3334,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, pend->cq_target_value = target_value; } - spin_lock_irqsave(&interrupt->wait_list_lock, flags); + spin_lock(&interrupt->wait_list_lock); /* We check for completion value as interrupt could have been received * before we added the node to the wait list @@ -3272,7 +3342,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, if (*pend->cq_kernel_addr >= target_value) { if (register_ts_record) pend->ts_reg_info.in_use = 0; - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + spin_unlock(&interrupt->wait_list_lock); *status = HL_WAIT_CS_STATUS_COMPLETED; @@ -3284,7 +3354,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, goto set_timestamp; } } else if (!timeout_us) { - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + spin_unlock(&interrupt->wait_list_lock); *status = HL_WAIT_CS_STATUS_BUSY; pend->fence.timestamp = ktime_get(); goto set_timestamp; @@ -3309,7 +3379,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, pend->ts_reg_info.in_use = 1; list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + spin_unlock(&interrupt->wait_list_lock); if (register_ts_record) { rc = *status = HL_WAIT_CS_STATUS_COMPLETED; @@ -3353,9 +3423,9 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, * for ts record, the node will be deleted in the irq handler after * we reach the target value. */ - spin_lock_irqsave(&interrupt->wait_list_lock, flags); + spin_lock(&interrupt->wait_list_lock); list_del(&pend->wait_list_node); - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + spin_unlock(&interrupt->wait_list_lock); set_timestamp: *timestamp = ktime_to_ns(pend->fence.timestamp); @@ -3383,7 +3453,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ u64 *timestamp) { struct hl_user_pending_interrupt *pend; - unsigned long timeout, flags; + unsigned long timeout; u64 completion_value; long completion_rc; int rc = 0; @@ -3403,9 +3473,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ /* Add pending user interrupt to relevant list for the interrupt * handler to monitor */ - spin_lock_irqsave(&interrupt->wait_list_lock, flags); + spin_lock(&interrupt->wait_list_lock); list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + spin_unlock(&interrupt->wait_list_lock); /* We check for completion value as interrupt could have been received * before we added the node to the wait list @@ -3436,14 +3506,14 @@ wait_again: * If comparison fails, keep waiting until timeout expires */ if (completion_rc > 0) { - spin_lock_irqsave(&interrupt->wait_list_lock, flags); + spin_lock(&interrupt->wait_list_lock); /* reinit_completion must be called before we check for user * completion value, otherwise, if interrupt is received after * the comparison and before the next wait_for_completion, * we will reach timeout and fail */ reinit_completion(&pend->fence.completion); - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + spin_unlock(&interrupt->wait_list_lock); if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { dev_err(hdev->dev, "Failed to copy completion value from user\n"); @@ -3480,9 +3550,9 @@ wait_again: } remove_pending_user_interrupt: - spin_lock_irqsave(&interrupt->wait_list_lock, flags); + spin_lock(&interrupt->wait_list_lock); list_del(&pend->wait_list_node); - spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); + spin_unlock(&interrupt->wait_list_lock); *timestamp = ktime_to_ns(pend->fence.timestamp); |