From ab5f5c3089a2c9b863ad0b67e89f168ec7e8f7e5 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 12 Jan 2021 18:37:19 +0200
Subject: habanalabs: wait for interrupt support

In order to support command submissions from user space, the driver
need to add support for user interrupt completions. The driver will
allow multiple user threads to wait for an interrupt and perform
a comparison with a given user address once interrupt expires.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 42 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 5a86b521a450..05c7cf4e727e 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -682,14 +682,46 @@ union hl_cs_args {
 	struct hl_cs_out out;
 };
 
+#define HL_WAIT_CS_FLAGS_INTERRUPT	0x2
+#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
+
 struct hl_wait_cs_in {
-	/* Command submission sequence number */
-	__u64 seq;
-	/* Absolute timeout to wait in microseconds */
-	__u64 timeout_us;
+	union {
+		struct {
+			/* Command submission sequence number */
+			__u64 seq;
+			/* Absolute timeout to wait for command submission
+			 * in microseconds
+			 */
+			__u64 timeout_us;
+		};
+
+		struct {
+			/* User address for completion comparison.
+			 * upon interrupt, driver will compare the value pointed
+			 * by this address with the supplied target value.
+			 * in order not to perform any comparison, set address
+			 * to all 1s.
+			 * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
+			 */
+			__u64 addr;
+			/* Target value for completion comparison */
+			__u32 target;
+			/* Absolute timeout to wait for interrupt
+			 * in microseconds
+			 */
+			__u32 interrupt_timeout_us;
+		};
+	};
+
 	/* Context ID - Currently not in use */
 	__u32 ctx_id;
-	__u32 pad;
+	/* HL_WAIT_CS_FLAGS_*
+	 * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
+	 * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
+	 * not to specify an interrupt id ,set mask to all 1s.
+	 */
+	__u32 flags;
 };
 
 #define HL_WAIT_CS_STATUS_COMPLETED	0
-- 
cgit v1.2.3


From 586f2caf0ef952ca5e0f38a00b7ba8d945345cf7 Mon Sep 17 00:00:00 2001
From: Sagiv Ozeri <sozeri@habana.ai>
Date: Tue, 23 Feb 2021 18:00:05 +0200
Subject: habanalabs: return current power via INFO IOCTL

Add driver implementation for reading the current power from the device
CPU F/W.

Signed-off-by: Sagiv Ozeri <sozeri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c      | 23 +++++++++++++++++++++++
 drivers/misc/habanalabs/common/habanalabs.h       |  1 +
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 22 ++++++++++++++++++++++
 drivers/misc/habanalabs/include/common/cpucp_if.h |  5 +++++
 include/uapi/misc/habanalabs.h                    |  9 +++++++++
 5 files changed, 60 insertions(+)

(limited to 'include')

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 6f3692bf5eff..2a58edaf984a 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -565,6 +565,29 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
 	return rc;
 }
 
+int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power)
+{
+	struct cpucp_packet pkt;
+	u64 result;
+	int rc;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to read power, error %d\n", rc);
+		return rc;
+	}
+
+	*power = result;
+
+	return rc;
+}
+
 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 {
 	/* Some of the status codes below are deprecated in newer f/w
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index fdb2a8c91f60..392a4a569049 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2361,6 +2361,7 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
 			u64 *total_energy);
 int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
 		u16 *pll_freq_arr);
+int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 			u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 848c2e588301..9fc429b82a92 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -446,6 +446,25 @@ static int pll_frequency_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 		min((size_t) max_size, sizeof(freq_info))) ? -EFAULT : 0;
 }
 
+static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	u32 max_size = args->return_size;
+	struct hl_power_info power_info = {0};
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	rc = hl_fw_cpucp_power_get(hdev, &power_info.power);
+	if (rc)
+		return rc;
+
+	return copy_to_user(out, &power_info,
+		min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
@@ -526,6 +545,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_PLL_FREQUENCY:
 		return pll_frequency_info(hpriv, args);
 
+	case HL_INFO_POWER:
+		return power_info(hpriv, args);
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index bf4e7900d8c8..6ba480a316ce 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -296,6 +296,9 @@ enum pq_init_status {
  *       The result is composed of 4 outputs, each is 16-bit
  *       frequency in MHz.
  *
+ * CPUCP_PACKET_POWER_GET
+ *       Fetch the present power consumption of the device (Current * Voltage).
+ *
  */
 
 enum cpucp_packet_id {
@@ -329,6 +332,8 @@ enum cpucp_packet_id {
 	CPUCP_PACKET_PCIE_REPLAY_CNT_GET,	/* internal */
 	CPUCP_PACKET_TOTAL_ENERGY_GET,		/* internal */
 	CPUCP_PACKET_PLL_INFO_GET,		/* internal */
+	CPUCP_PACKET_NIC_STATUS,		/* internal */
+	CPUCP_PACKET_POWER_GET,			/* internal */
 };
 
 #define CPUCP_PACKET_FENCE_VAL	0xFE8CE7A5
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 05c7cf4e727e..92fd000ce0d3 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -297,6 +297,7 @@ enum hl_device_status {
 #define HL_INFO_SYNC_MANAGER		14
 #define HL_INFO_TOTAL_ENERGY		15
 #define HL_INFO_PLL_FREQUENCY		16
+#define HL_INFO_POWER			17
 
 #define HL_INFO_VERSION_MAX_LEN	128
 #define HL_INFO_CARD_NAME_MAX_LEN	16
@@ -410,6 +411,14 @@ struct hl_pll_frequency_info {
 	__u16 output[HL_PLL_NUM_OUTPUTS];
 };
 
+/**
+ * struct hl_power_info - power information
+ * @power: power consumption
+ */
+struct hl_power_info {
+	__u64 power;
+};
+
 /**
  * struct hl_info_sync_manager - sync manager information
  * @first_available_sync_object: first available sob
-- 
cgit v1.2.3


From cf39395034c6b927d49abe554fb4e93730d00543 Mon Sep 17 00:00:00 2001
From: Alon Mizrahi <amizrahi@habana.ai>
Date: Mon, 22 Feb 2021 15:53:24 +0200
Subject: habanalabs: add custom timeout flag per cs

There is a need to allow to user to send command submissions with
custom timeout as some CS take longer than the max timeout that is
used by default.

Signed-off-by: Alon Mizrahi <amizrahi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c    | 35 ++++++++++++----------
 drivers/misc/habanalabs/common/habanalabs.h        |  2 ++
 drivers/misc/habanalabs/common/hw_queue.c          |  2 +-
 include/uapi/misc/habanalabs.h                     | 15 ++++++++--
 4 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 21a60b7c2091..ff8791a651fd 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -467,8 +467,7 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
 
 	if (next_entry_found && !next->tdr_active) {
 		next->tdr_active = true;
-		schedule_delayed_work(&next->work_tdr,
-					hdev->timeout_jiffies);
+		schedule_delayed_work(&next->work_tdr, next->timeout_jiffies);
 	}
 
 	spin_unlock(&hdev->cs_mirror_lock);
@@ -622,7 +621,7 @@ static void cs_timedout(struct work_struct *work)
 
 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 			enum hl_cs_type cs_type, u64 user_sequence,
-			struct hl_cs **cs_new)
+			struct hl_cs **cs_new, u32 flags, u32 timeout)
 {
 	struct hl_cs_counters_atomic *cntr;
 	struct hl_fence *other = NULL;
@@ -649,6 +648,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 	cs->submitted = false;
 	cs->completed = false;
 	cs->type = cs_type;
+	cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
+	cs->timeout_jiffies = timeout;
 	INIT_LIST_HEAD(&cs->job_list);
 	INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
 	kref_init(&cs->refcount);
@@ -1092,7 +1093,8 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
 }
 
 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
-				u32 num_chunks, u64 *cs_seq, u32 flags)
+				u32 num_chunks, u64 *cs_seq, u32 flags,
+				u32 timeout)
 {
 	bool staged_mid, int_queues_only = true;
 	struct hl_device *hdev = hpriv->hdev;
@@ -1121,11 +1123,11 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 		staged_mid = false;
 
 	rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
-			staged_mid ? user_sequence : ULLONG_MAX, &cs);
+			staged_mid ? user_sequence : ULLONG_MAX, &cs, flags,
+			timeout);
 	if (rc)
 		goto free_cs_chunk_array;
 
-	cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
 	*cs_seq = cs->sequence;
 
 	hl_debugfs_add_cs(cs);
@@ -1323,7 +1325,8 @@ static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
 		list_move_tail(&pending_cb->cb_node, &local_cb_list);
 	spin_unlock(&ctx->pending_cb_lock);
 
-	rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs);
+	rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0,
+				hdev->timeout_jiffies);
 	if (rc)
 		goto add_list_elements;
 
@@ -1424,7 +1427,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
 			rc = 0;
 		} else {
 			rc = cs_ioctl_default(hpriv, chunks, num_chunks,
-								cs_seq, 0);
+					cs_seq, 0, hdev->timeout_jiffies);
 		}
 
 		mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1594,7 +1597,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
 
 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 				void __user *chunks, u32 num_chunks,
-				u64 *cs_seq, bool timestamp)
+				u64 *cs_seq, u32 flags, u32 timeout)
 {
 	struct hl_cs_chunk *cs_chunk_array, *chunk;
 	struct hw_queue_properties *hw_queue_prop;
@@ -1700,7 +1703,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 		}
 	}
 
-	rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs);
+	rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
 	if (rc) {
 		if (cs_type == CS_TYPE_WAIT ||
 			cs_type == CS_TYPE_COLLECTIVE_WAIT)
@@ -1708,8 +1711,6 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 		goto free_cs_chunk_array;
 	}
 
-	cs->timestamp = !!timestamp;
-
 	/*
 	 * Save the signal CS fence for later initialization right before
 	 * hanging the wait CS on the queue.
@@ -1767,7 +1768,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 	enum hl_cs_type cs_type;
 	u64 cs_seq = ULONG_MAX;
 	void __user *chunks;
-	u32 num_chunks, flags;
+	u32 num_chunks, flags, timeout;
 	int rc;
 
 	rc = hl_cs_sanity_checks(hpriv, args);
@@ -1793,16 +1794,20 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 			!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
 		cs_seq = args->in.seq;
 
+	timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT
+			? msecs_to_jiffies(args->in.timeout * 1000)
+			: hpriv->hdev->timeout_jiffies;
+
 	switch (cs_type) {
 	case CS_TYPE_SIGNAL:
 	case CS_TYPE_WAIT:
 	case CS_TYPE_COLLECTIVE_WAIT:
 		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
-			&cs_seq, args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
+					&cs_seq, args->in.cs_flags, timeout);
 		break;
 	default:
 		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
-							args->in.cs_flags);
+						args->in.cs_flags, timeout);
 		break;
 	}
 
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index ddb65639f518..54d7735991c7 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1245,6 +1245,7 @@ struct hl_userptr {
  * @sequence: the sequence number of this CS.
  * @staged_sequence: the sequence of the staged submission this CS is part of,
  *                   relevant only if staged_cs is set.
+ * @timeout_jiffies: cs timeout in jiffies.
  * @type: CS_TYPE_*.
  * @submitted: true if CS was submitted to H/W.
  * @completed: true if CS was completed by device.
@@ -1273,6 +1274,7 @@ struct hl_cs {
 	struct list_head	debugfs_list;
 	u64			sequence;
 	u64			staged_sequence;
+	u64			timeout_jiffies;
 	enum hl_cs_type		type;
 	u8			submitted;
 	u8			completed;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 4acc25dccad3..173438461835 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -629,7 +629,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 	if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
 				first_entry && cs_needs_timeout(cs)) {
 		cs->tdr_active = true;
-		schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
+		schedule_delayed_work(&cs->work_tdr, cs->timeout_jiffies);
 
 	}
 
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 92fd000ce0d3..90798eaac728 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -630,6 +630,7 @@ struct hl_cs_chunk {
 #define HL_CS_FLAGS_STAGED_SUBMISSION		0x40
 #define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST	0x80
 #define HL_CS_FLAGS_STAGED_SUBMISSION_LAST	0x100
+#define HL_CS_FLAGS_CUSTOM_TIMEOUT		0x200
 
 #define HL_CS_STATUS_SUCCESS		0
 
@@ -665,8 +666,18 @@ struct hl_cs_in {
 	 */
 	__u32 num_chunks_execute;
 
-	/* Number of chunks in restore phase array - Currently not in use */
-	__u32 num_chunks_store;
+	union {
+		/* Number of chunks in restore phase array -
+		 * Currently not in use
+		 */
+		__u32 num_chunks_store;
+
+		/* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT
+		 * is set. this parameter is ignored in case of future multiple
+		 * users support.
+		 */
+		__u32 timeout;
+	};
 
 	/* HL_CS_FLAGS_* */
 	__u32 cs_flags;
-- 
cgit v1.2.3


From 131d1ba1304a9a2040648cb03309d9444e7fe45f Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Fri, 2 Apr 2021 22:24:38 +0300
Subject: habanalabs: remove the store jobs array from CS IOCTL

The store part was never implemented in the code and never been used
by the userspace applications.

We currently use the related parameters to a different purpose with
a defined union. However, there is no point in that and it is better
to just remove the union and the store parameters.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 include/uapi/misc/habanalabs.h | 35 ++++++++++-------------------------
 1 file changed, 10 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 90798eaac728..d3e017b5f0db 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -644,17 +644,10 @@ struct hl_cs_in {
 	/* holds address of array of hl_cs_chunk for execution phase */
 	__u64 chunks_execute;
 
-	union {
-		/* this holds address of array of hl_cs_chunk for store phase -
-		 * Currently not in use
-		 */
-		__u64 chunks_store;
-
-		/* Sequence number of a staged submission CS
-		 * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
-		 */
-		__u64 seq;
-	};
+	/* Sequence number of a staged submission CS
+	 * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
+	 */
+	__u64 seq;
 
 	/* Number of chunks in restore phase array. Maximum number is
 	 * HL_MAX_JOBS_PER_CS
@@ -666,18 +659,10 @@ struct hl_cs_in {
 	 */
 	__u32 num_chunks_execute;
 
-	union {
-		/* Number of chunks in restore phase array -
-		 * Currently not in use
-		 */
-		__u32 num_chunks_store;
-
-		/* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT
-		 * is set. this parameter is ignored in case of future multiple
-		 * users support.
-		 */
-		__u32 timeout;
-	};
+	/* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT
+	 * is set
+	 */
+	__u32 timeout;
 
 	/* HL_CS_FLAGS_* */
 	__u32 cs_flags;
@@ -1051,8 +1036,8 @@ struct hl_debug_args {
  * Each JOB will be enqueued on a specific queue, according to the user's input.
  * There can be more then one JOB per queue.
  *
- * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase,
- * a second set is for "execution" phase and a third set is for "store" phase.
+ * The CS IOCTL will receive two sets of JOBS. One set is for "restore" phase
+ * and a second set is for "execution" phase.
  * The JOBS on the "restore" phase are enqueued only after context-switch
  * (or if its the first CS for this context). The user can also order the
  * driver to run the "restore" phase explicitly
-- 
cgit v1.2.3