From eff6f4a0e70b7bcf4674f471a768860a74e638a6 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 16 Feb 2019 00:39:21 +0200 Subject: habanalabs: add command submission module This patch adds the main flow for the user to submit work to the device. Each work is described by a command submission object (CS). The CS contains 3 arrays of command buffers: One for execution, and two for context-switch (store and restore). For each CB, the user specifies on which queue to put that CB. In case of an internal queue, the entry doesn't contain a pointer to the CB but the address in the on-chip memory that the CB resides at. The driver parses some of the CBs to enforce security restrictions. The user receives a sequence number that represents the CS object. The user can then query the driver regarding the status of the CS, using that sequence number. In case the CS doesn't finish before the timeout expires, the driver will perform a soft-reset of the device. Reviewed-by: Mike Rapoport Signed-off-by: Oded Gabbay Signed-off-by: Greg Kroah-Hartman --- drivers/misc/habanalabs/device.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'drivers/misc/habanalabs/device.c') diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 2aa8a68cdf76..cc5f068df597 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -30,6 +30,8 @@ static void hpriv_release(struct kref *ref) put_pid(hpriv->taskpid); + mutex_destroy(&hpriv->restore_phase_mutex); + kfree(hpriv); /* Now the FD is really closed */ @@ -208,6 +210,8 @@ static int device_early_init(struct hl_device *hdev) mutex_init(&hdev->fd_open_cnt_lock); mutex_init(&hdev->send_cpu_message_lock); + INIT_LIST_HEAD(&hdev->hw_queues_mirror_list); + spin_lock_init(&hdev->hw_queues_mirror_lock); atomic_set(&hdev->in_reset, 0); atomic_set(&hdev->fd_open_cnt, 0); @@ -593,6 +597,9 @@ again: */ hdev->asic_funcs->halt_engines(hdev, hard_reset); + /* Go over all the queues, release all CS and their jobs */ + hl_cs_rollback_all(hdev); + if (hard_reset) { /* Release kernel context */ if (hl_ctx_put(hdev->kernel_ctx) != 1) { @@ -616,6 +623,12 @@ again: for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) hl_cq_reset(hdev, &hdev->completion_queue[i]); + /* Make sure the setup phase for the user context will run again */ + if (hdev->user_ctx) { + atomic_set(&hdev->user_ctx->thread_restore_token, 1); + hdev->user_ctx->thread_restore_wait_token = 0; + } + /* Finished tear-down, starting to re-initialize */ if (hard_reset) { @@ -952,6 +965,9 @@ void hl_device_fini(struct hl_device *hdev) */ hdev->asic_funcs->halt_engines(hdev, true); + /* Go over all the queues, release all CS and their jobs */ + hl_cs_rollback_all(hdev); + hl_cb_pool_fini(hdev); /* Release kernel context */ -- cgit v1.2.3