summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-04-19 12:53:42 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-04-19 12:53:42 +0300
commitcef62a615d22369d817757d1e4fe64fdf42a401e (patch)
treed5a2fdab8c0372a209b63c8b378bc129011be713 /include
parentd358b1733fc33d9f0261ce07c3d328787652245d (diff)
parent9f201aba56b92c3daa4b76efae056ddbb80d91e6 (diff)
downloadlinux-cef62a615d22369d817757d1e4fe64fdf42a401e.tar.xz
Merge tag 'misc-habanalabs-next-2019-04-19' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next
Oded writes: This tag contains many changes for kernel 5.2. The major changes are: - Add a new IOCTL for debug, profiling and trace operations on the device. This will allow the user to perform profiling and debugging of the deep learning topologies that are executing on the ASIC. - Add a shadow table for the ASIC's MMU page tables to avoid doing page table walks on the device's DRAM during map/unmap operations. - re-factor of ASIC-dependent code to be common code for all ASICs In addition, there are many small fixes and changes. The notable ones are: - Allow accessing the DRAM using virtual address through the debugFS interface. Until now, only physical addresses were valid, but that is useless for debugging when working with MMU. - Allow the user to modify the TPC clock relaxation value to better control TPC power consumption during topology execution. - Allow the user to inquire about the device's status (operational/Malfunction/in-reset) in the INFO IOCTL. - Improvements to the device's removal function, to prevent crash in case of force removal by the OS. - Prevent PTE read/write during hard-reset. This will improve stability of the device during hard-reset. * tag 'misc-habanalabs-next-2019-04-19' of git://people.freedesktop.org/~gabbayo/linux: (31 commits) habanalabs: prevent device PTE read/write during hard-reset habanalabs: improve IOCTLs behavior when disabled or reset habanalabs: all FD must be closed before removing device habanalabs: split mmu/no-mmu code paths in memory ioctl habanalabs: ASIC_AUTO_DETECT enum value is redundant habanalabs: refactoring in goya.c uapi/habanalabs: fix some comments in uapi file habanalabs: add goya implementation for debug configuration habanalabs: add new IOCTL for debug, tracing and profiling habanalabs: remove extra semicolon habanalabs: prevent CPU soft lockup on Palladium habanalabs: remove trailing blank line from EOF habanalabs: improve error messages habanalabs: add device status option to INFO IOCTL habanalabs: allow user to modify TPC clock relaxation value habanalabs: set new golden value to tpc clock relaxation habanalabs: never fail hard reset of device habanalabs: keep track of the device's dma mask habanalabs: add MMU shadow mapping habanalabs: Allow accessing DRAM virtual addresses via debugfs ...
Diffstat (limited to 'include')
-rw-r--r--include/uapi/misc/habanalabs.h158
1 files changed, 146 insertions, 12 deletions
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 7fd6f633534c..613d431da783 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -20,8 +20,8 @@
/*
* Queue Numbering
*
- * The external queues (DMA channels + CPU) MUST be before the internal queues
- * and each group (DMA channels + CPU and internal) must be contiguous inside
+ * The external queues (PCI DMA channels) MUST be before the internal queues
+ * and each group (PCI DMA channels and internal) must be contiguous inside
* itself but there can be a gap between the two groups (although not
* recommended)
*/
@@ -33,7 +33,7 @@ enum goya_queue_id {
GOYA_QUEUE_ID_DMA_3,
GOYA_QUEUE_ID_DMA_4,
GOYA_QUEUE_ID_CPU_PQ,
- GOYA_QUEUE_ID_MME,
+ GOYA_QUEUE_ID_MME, /* Internal queues start here */
GOYA_QUEUE_ID_TPC0,
GOYA_QUEUE_ID_TPC1,
GOYA_QUEUE_ID_TPC2,
@@ -45,11 +45,18 @@ enum goya_queue_id {
GOYA_QUEUE_ID_SIZE
};
+enum hl_device_status {
+ HL_DEVICE_STATUS_OPERATIONAL,
+ HL_DEVICE_STATUS_IN_RESET,
+ HL_DEVICE_STATUS_MALFUNCTION
+};
+
/* Opcode for management ioctl */
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
#define HL_INFO_DRAM_USAGE 2
#define HL_INFO_HW_IDLE 3
+#define HL_INFO_DEVICE_STATUS 4
#define HL_INFO_VERSION_MAX_LEN 128
@@ -82,6 +89,11 @@ struct hl_info_hw_idle {
__u32 pad;
};
+struct hl_info_device_status {
+ __u32 status;
+ __u32 pad;
+};
+
struct hl_info_args {
/* Location of relevant struct in userspace */
__u64 return_pointer;
@@ -181,7 +193,10 @@ struct hl_cs_in {
};
struct hl_cs_out {
- /* this holds the sequence number of the CS to pass to wait ioctl */
+ /*
+ * seq holds the sequence number of the CS to pass to wait ioctl. All
+ * values are valid except for 0 and ULLONG_MAX
+ */
__u64 seq;
/* HL_CS_STATUS_* */
__u32 status;
@@ -320,6 +335,107 @@ union hl_mem_args {
struct hl_mem_out out;
};
+#define HL_DEBUG_MAX_AUX_VALUES 10
+
+struct hl_debug_params_etr {
+ /* Address in memory to allocate buffer */
+ __u64 buffer_address;
+
+ /* Size of buffer to allocate */
+ __u64 buffer_size;
+
+ /* Sink operation mode: SW fifo, HW fifo, Circular buffer */
+ __u32 sink_mode;
+ __u32 pad;
+};
+
+struct hl_debug_params_etf {
+ /* Address in memory to allocate buffer */
+ __u64 buffer_address;
+
+ /* Size of buffer to allocate */
+ __u64 buffer_size;
+
+ /* Sink operation mode: SW fifo, HW fifo, Circular buffer */
+ __u32 sink_mode;
+ __u32 pad;
+};
+
+struct hl_debug_params_stm {
+ /* Two bit masks for HW event and Stimulus Port */
+ __u64 he_mask;
+ __u64 sp_mask;
+
+ /* Trace source ID */
+ __u32 id;
+
+ /* Frequency for the timestamp register */
+ __u32 frequency;
+};
+
+struct hl_debug_params_bmon {
+ /* Transaction address filter */
+ __u64 addr_range0;
+ __u64 addr_range1;
+
+ /* Capture window configuration */
+ __u32 bw_win;
+ __u32 win_capture;
+
+ /* Trace source ID */
+ __u32 id;
+ __u32 pad;
+};
+
+struct hl_debug_params_spmu {
+ /* Event types selection */
+ __u64 event_types[HL_DEBUG_MAX_AUX_VALUES];
+
+ /* Number of event types selection */
+ __u32 event_types_num;
+ __u32 pad;
+};
+
+/* Opcode for ETR component */
+#define HL_DEBUG_OP_ETR 0
+/* Opcode for ETF component */
+#define HL_DEBUG_OP_ETF 1
+/* Opcode for STM component */
+#define HL_DEBUG_OP_STM 2
+/* Opcode for FUNNEL component */
+#define HL_DEBUG_OP_FUNNEL 3
+/* Opcode for BMON component */
+#define HL_DEBUG_OP_BMON 4
+/* Opcode for SPMU component */
+#define HL_DEBUG_OP_SPMU 5
+/* Opcode for timestamp */
+#define HL_DEBUG_OP_TIMESTAMP 6
+
+struct hl_debug_args {
+ /*
+ * Pointer to user input structure.
+ * This field is relevant to specific opcodes.
+ */
+ __u64 input_ptr;
+ /* Pointer to user output structure */
+ __u64 output_ptr;
+ /* Size of user input structure */
+ __u32 input_size;
+ /* Size of user output structure */
+ __u32 output_size;
+ /* HL_DEBUG_OP_* */
+ __u32 op;
+ /*
+ * Register index in the component, taken from the debug_regs_index enum
+ * in the various ASIC header files
+ */
+ __u32 reg_idx;
+ /* Enable/disable */
+ __u32 enable;
+ /* Context ID - Currently not in use */
+ __u32 ctx_id;
+};
+
/*
* Various information operations such as:
* - H/W IP information
@@ -361,6 +477,12 @@ union hl_mem_args {
* Each JOB will be enqueued on a specific queue, according to the user's input.
* There can be more then one JOB per queue.
*
+ * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase,
+ * a second set is for "execution" phase and a third set is for "store" phase.
+ * The JOBS on the "restore" phase are enqueued only after context-switch
+ * (or if its the first CS for this context). The user can also order the
+ * driver to run the "restore" phase explicitly
+ *
* There are two types of queues - external and internal. External queues
* are DMA queues which transfer data from/to the Host. All other queues are
* internal. The driver will get completion notifications from the device only
@@ -377,19 +499,18 @@ union hl_mem_args {
* relevant queues. Therefore, the user mustn't assume the CS has been completed
* or has even started to execute.
*
- * Upon successful enqueue, the IOCTL returns an opaque handle which the user
+ * Upon successful enqueue, the IOCTL returns a sequence number which the user
* can use with the "Wait for CS" IOCTL to check whether the handle's CS
* external JOBS have been completed. Note that if the CS has internal JOBS
* which can execute AFTER the external JOBS have finished, the driver might
* report that the CS has finished executing BEFORE the internal JOBS have
* actually finish executing.
*
- * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase,
- * a second set is for "execution" phase and a third set is for "store" phase.
- * The JOBS on the "restore" phase are enqueued only after context-switch
- * (or if its the first CS for this context). The user can also order the
- * driver to run the "restore" phase explicitly
- *
+ * Even though the sequence number increments per CS, the user can NOT
+ * automatically assume that if CS with sequence number N finished, then CS
+ * with sequence number N-1 also finished. The user can make this assumption if
+ * and only if CS N and CS N-1 are exactly the same (same CBs for the same
+ * queues).
*/
#define HL_IOCTL_CS \
_IOWR('H', 0x03, union hl_cs_args)
@@ -444,7 +565,20 @@ union hl_mem_args {
#define HL_IOCTL_MEMORY \
_IOWR('H', 0x05, union hl_mem_args)
+/*
+ * Debug
+ * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces
+ *
+ * This IOCTL allows the user to get debug traces from the chip.
+ *
+ * The user needs to provide the register index and essential data such as
+ * buffer address and size.
+ *
+ */
+#define HL_IOCTL_DEBUG \
+ _IOWR('H', 0x06, struct hl_debug_args)
+
#define HL_COMMAND_START 0x01
-#define HL_COMMAND_END 0x06
+#define HL_COMMAND_END 0x07
#endif /* HABANALABS_H_ */