diff options
Diffstat (limited to 'include/uapi/misc/habanalabs.h')
-rw-r--r-- | include/uapi/misc/habanalabs.h | 126 |
1 files changed, 97 insertions, 29 deletions
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 9705b8adb60c..8c15a7d336a0 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -18,8 +18,18 @@ #define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ #define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START 0x80 /* 128 bytes */ -#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 48 -#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 24 +/* + * 128 SOBs reserved for collective wait + * 16 SOBs reserved for sync stream + */ +#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 144 + +/* + * 64 monitors reserved for collective wait + * 8 monitors reserved for sync stream + */ +#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72 + /* * Goya queue Numbering * @@ -76,10 +86,10 @@ enum gaudi_queue_id { GAUDI_QUEUE_ID_DMA_4_1 = 18, /* internal */ GAUDI_QUEUE_ID_DMA_4_2 = 19, /* internal */ GAUDI_QUEUE_ID_DMA_4_3 = 20, /* internal */ - GAUDI_QUEUE_ID_DMA_5_0 = 21, /* external */ - GAUDI_QUEUE_ID_DMA_5_1 = 22, /* external */ - GAUDI_QUEUE_ID_DMA_5_2 = 23, /* external */ - GAUDI_QUEUE_ID_DMA_5_3 = 24, /* external */ + GAUDI_QUEUE_ID_DMA_5_0 = 21, /* internal */ + GAUDI_QUEUE_ID_DMA_5_1 = 22, /* internal */ + GAUDI_QUEUE_ID_DMA_5_2 = 23, /* internal */ + GAUDI_QUEUE_ID_DMA_5_3 = 24, /* internal */ GAUDI_QUEUE_ID_DMA_6_0 = 25, /* internal */ GAUDI_QUEUE_ID_DMA_6_1 = 26, /* internal */ GAUDI_QUEUE_ID_DMA_6_2 = 27, /* internal */ @@ -232,7 +242,8 @@ enum gaudi_engine_id { enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, - HL_DEVICE_STATUS_MALFUNCTION + HL_DEVICE_STATUS_MALFUNCTION, + HL_DEVICE_STATUS_NEEDS_RESET }; /* Opcode for management ioctl @@ -284,6 +295,7 @@ enum hl_device_status { #define HL_INFO_CLK_THROTTLE_REASON 13 #define HL_INFO_SYNC_MANAGER 14 #define HL_INFO_TOTAL_ENERGY 15 +#define HL_INFO_PLL_FREQUENCY 16 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -385,6 +397,12 @@ struct hl_info_energy { __u64 total_energy_consumption; }; +#define HL_PLL_NUM_OUTPUTS 4 + +struct hl_pll_frequency_info { + __u16 output[HL_PLL_NUM_OUTPUTS]; +}; + /** * struct hl_info_sync_manager - sync manager information * @first_available_sync_object: first available sob @@ -397,23 +415,28 @@ struct hl_info_sync_manager { /** * struct hl_info_cs_counters - command submission counters - * @out_of_mem_drop_cnt: dropped due to memory allocation issue - * @parsing_drop_cnt: dropped due to error in packet parsing - * @queue_full_drop_cnt: dropped due to queue full - * @device_in_reset_drop_cnt: dropped due to device in reset - * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight + * @total_out_of_mem_drop_cnt: total dropped due to memory allocation issue + * @ctx_out_of_mem_drop_cnt: context dropped due to memory allocation issue + * @total_parsing_drop_cnt: total dropped due to error in packet parsing + * @ctx_parsing_drop_cnt: context dropped due to error in packet parsing + * @total_queue_full_drop_cnt: total dropped due to queue full + * @ctx_queue_full_drop_cnt: context dropped due to queue full + * @total_device_in_reset_drop_cnt: total dropped due to device in reset + * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset + * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight + * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight */ -struct hl_cs_counters { - __u64 out_of_mem_drop_cnt; - __u64 parsing_drop_cnt; - __u64 queue_full_drop_cnt; - __u64 device_in_reset_drop_cnt; - __u64 max_cs_in_flight_drop_cnt; -}; - struct hl_info_cs_counters { - struct hl_cs_counters cs_counters; - struct hl_cs_counters ctx_cs_counters; + __u64 total_out_of_mem_drop_cnt; + __u64 ctx_out_of_mem_drop_cnt; + __u64 total_parsing_drop_cnt; + __u64 ctx_parsing_drop_cnt; + __u64 total_queue_full_drop_cnt; + __u64 ctx_queue_full_drop_cnt; + __u64 total_device_in_reset_drop_cnt; + __u64 ctx_device_in_reset_drop_cnt; + __u64 total_max_cs_in_flight_drop_cnt; + __u64 ctx_max_cs_in_flight_drop_cnt; }; enum gaudi_dcores { @@ -449,6 +472,8 @@ struct hl_info_args { * resolution. */ __u32 period_ms; + /* PLL frequency retrieval */ + __u32 pll_index; }; __u32 pad; @@ -458,6 +483,8 @@ struct hl_info_args { #define HL_CB_OP_CREATE 0 /* Opcode to destroy previously created command buffer */ #define HL_CB_OP_DESTROY 1 +/* Opcode to retrieve information about a command buffer */ +#define HL_CB_OP_INFO 2 /* 2MB minus 32 bytes for 2xMSG_PROT */ #define HL_MAX_CB_SIZE (0x200000 - 32) @@ -481,8 +508,17 @@ struct hl_cb_in { }; struct hl_cb_out { - /* Handle of CB */ - __u64 cb_handle; + union { + /* Handle of CB */ + __u64 cb_handle; + + /* Information about CB */ + struct { + /* Usage count of CB */ + __u32 usage_cnt; + __u32 pad; + }; + }; }; union hl_cb_args { @@ -490,6 +526,22 @@ union hl_cb_args { struct hl_cb_out out; }; +/* HL_CS_CHUNK_FLAGS_ values + * + * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: + * Indicates if the CB was allocated and mapped by userspace. + * User allocated CB is a command buffer allocated by the user, via malloc + * (or similar). After allocating the CB, the user invokes “memory ioctl” + * to map the user memory into a device virtual address. The user provides + * this address via the cb_handle field. The interface provides the + * ability to create a large CBs, Which aren’t limited to + * “HL_MAX_CB_SIZE”. Therefore, it increases the PCI-DMA queues + * throughput. This CB allocation method also reduces the use of Linux + * DMA-able memory pool. Which are limited and used by other Linux + * sub-systems. + */ +#define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1 + /* * This structure size must always be fixed to 64-bytes for backward * compatibility @@ -507,7 +559,8 @@ struct hl_cs_chunk { */ __u64 cb_handle; - /* Relevant only when HL_CS_FLAGS_WAIT is set. + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set. * This holds address of array of u64 values that contain * signal CS sequence numbers. The wait described by this job * will listen on all those signals (wait event per signal) @@ -525,7 +578,8 @@ struct hl_cs_chunk { */ __u32 cb_size; - /* Relevant only when HL_CS_FLAGS_WAIT is set. + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set. * Number of entries in signal_seq_arr */ __u32 num_signal_seq_arr; @@ -534,14 +588,22 @@ struct hl_cs_chunk { /* HL_CS_CHUNK_FLAGS_* */ __u32 cs_chunk_flags; + /* Relevant only when HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * This holds the collective engine ID. The wait described by this job + * will sync with this engine and with all NICs before completion. + */ + __u32 collective_engine_id; + /* Align structure to 64 bytes */ - __u32 pad[11]; + __u32 pad[10]; }; -/* SIGNAL and WAIT flags are mutually exclusive */ +/* SIGNAL and WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ #define HL_CS_FLAGS_FORCE_RESTORE 0x1 #define HL_CS_FLAGS_SIGNAL 0x2 #define HL_CS_FLAGS_WAIT 0x4 +#define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8 +#define HL_CS_FLAGS_TIMESTAMP 0x20 #define HL_CS_STATUS_SUCCESS 0 @@ -612,10 +674,16 @@ struct hl_wait_cs_in { #define HL_WAIT_CS_STATUS_ABORTED 3 #define HL_WAIT_CS_STATUS_INTERRUPTED 4 +#define HL_WAIT_CS_STATUS_FLAG_GONE 0x1 +#define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2 + struct hl_wait_cs_out { /* HL_WAIT_CS_STATUS_* */ __u32 status; - __u32 pad; + /* HL_WAIT_CS_STATUS_FLAG* */ + __u32 flags; + /* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */ + __s64 timestamp_nsec; }; union hl_wait_cs_args { |