From 71386e11f262f5fc66000c0d83b4fd8cbbaf9d38 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Sun, 3 Jul 2022 17:40:57 +0300 Subject: habanalabs: removed seq_file parameter from is_idle asic functions Change is_idle functions so it would be more usable outside debugfs. Do this by replacing seq_file parameter with regular string. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 47 +++++++++++++- drivers/misc/habanalabs/common/habanalabs.h | 17 ++++- drivers/misc/habanalabs/gaudi/gaudi.c | 47 +++++++------- drivers/misc/habanalabs/gaudi2/gaudi2.c | 97 +++++++++++++++-------------- drivers/misc/habanalabs/goya/goya.c | 32 +++++----- 5 files changed, 151 insertions(+), 89 deletions(-) diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 64439f33a19b..90c91c1b2c10 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -17,6 +17,7 @@ #define MMU_ASID_BUF_SIZE 10 #define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE) #define I2C_MAX_TRANSACTION_LEN 8 +#define ENGINES_DATA_MAX_SIZE SZ_16K static struct dentry *hl_debug_root; @@ -586,11 +587,37 @@ err: return -EINVAL; } +void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...) +{ + va_list args; + int str_size; + + va_start(args, fmt); + /* Calculate formatted string length. Assuming each string is null terminated, hence + * increment result by 1 + */ + str_size = vsnprintf(NULL, 0, fmt, args) + 1; + va_end(args); + + if ((e->actual_size + str_size) < e->allocated_buf_size) { + va_start(args, fmt); + vsnprintf(e->buf + e->actual_size, str_size, fmt, args); + va_end(args); + } + + /* Need to update the size even when not updating destination buffer to get the exact size + * of all input strings + */ + e->actual_size += str_size; + +} + static int engines_show(struct seq_file *s, void *data) { struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; + struct engines_data eng_data; if (hdev->reset_info.in_reset) { dev_warn_ratelimited(hdev->dev, @@ -598,7 +625,25 @@ static int engines_show(struct seq_file *s, void *data) return 0; } - hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s); + eng_data.actual_size = 0; + eng_data.allocated_buf_size = ENGINES_DATA_MAX_SIZE; + eng_data.buf = vmalloc(eng_data.allocated_buf_size); + if (!eng_data.buf) + return -ENOMEM; + + hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data); + + if (eng_data.actual_size > eng_data.allocated_buf_size) { + dev_err(hdev->dev, + "Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n", + eng_data.actual_size, eng_data.allocated_buf_size); + vfree(eng_data.buf); + return -ENOMEM; + } + + seq_write(s, eng_data.buf, eng_data.actual_size); + + vfree(eng_data.buf); return 0; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index d59bba9e55c9..440e154dbe31 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1371,6 +1371,18 @@ struct fw_load_mgr { struct hl_cs; +/** + * struct engines_data - asic engines data + * @buf: buffer for engines data in ascii + * @actual_size: actual size of data that was written by the driver to the allocated buffer + * @allocated_buf_size: total size of allocated buffer + */ +struct engines_data { + char *buf; + int actual_size; + u32 allocated_buf_size; +}; + /** * struct hl_asic_funcs - ASIC specific functions that are can be called from * common code. @@ -1570,8 +1582,8 @@ struct hl_asic_funcs { int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); - bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, - u8 mask_len, struct seq_file *s); + bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e); int (*non_hard_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev); @@ -3743,6 +3755,7 @@ struct hl_mmap_mem_buf * hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp, void *args); +__printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...); #ifdef CONFIG_DEBUG_FS diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index cb2988e2c7a8..a3eca13c3fa9 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -8066,8 +8066,8 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) return 0; } -static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, - u8 mask_len, struct seq_file *s) +static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) { struct gaudi_device *gaudi = hdev->asic_specific; const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; @@ -8079,8 +8079,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u64 offset; int i, dma_id, port; - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" "--- ------- ------------ ---------- -------------\n"); @@ -8097,14 +8097,14 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); - if (s) - seq_printf(s, fmt, dma_id, + if (e) + hl_engine_data_sprintf(e, fmt, dma_id, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts, dma_core_sts0); } - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" "--- ------- ------------ ---------- ----------\n"); @@ -8119,14 +8119,14 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); - if (s) - seq_printf(s, fmt, i, + if (e) + hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); } - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" "--- ------- ------------ ---------- -----------\n"); @@ -8147,20 +8147,21 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); - if (s) { + if (e) { if (!is_slave) - seq_printf(s, fmt, i, + hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); else - seq_printf(s, mme_slave_fmt, i, + hl_engine_data_sprintf(e, mme_slave_fmt, i, is_eng_idle ? "Y" : "N", "-", "-", mme_arch_sts); } } - if (s) - seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" + if (e) + hl_engine_data_sprintf(e, + "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" "--- ------- ------------ ----------\n"); for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { @@ -8174,8 +8175,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); - if (s) - seq_printf(s, nic_fmt, port, + if (e) + hl_engine_data_sprintf(e, nic_fmt, port, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts); } @@ -8189,15 +8190,15 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); - if (s) - seq_printf(s, nic_fmt, port, + if (e) + hl_engine_data_sprintf(e, nic_fmt, port, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts); } } - if (s) - seq_puts(s, "\n"); + if (e) + hl_engine_data_sprintf(e, "\n"); return is_idle; } diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 98336a1a84b0..1140cf7db4a3 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -1663,7 +1663,7 @@ struct gaudi2_cache_invld_params { }; struct gaudi2_tpc_idle_data { - struct seq_file *s; + struct engines_data *e; unsigned long *mask; bool *is_idle; const char *tpc_fmt; @@ -6172,14 +6172,15 @@ static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int ins if (idle_data->mask && !is_eng_idle) set_bit(engine_idx, idle_data->mask); - if (idle_data->s) - seq_printf(idle_data->s, idle_data->tpc_fmt, dcore, inst, + if (idle_data->e) + hl_engine_data_sprintf(idle_data->e, + idle_data->tpc_fmt, dcore, inst, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); } -static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, - u8 mask_len, struct seq_file *s) +static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) { u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask, mme_arch_sts, dec_swreg15, dec_enabled_bit; @@ -6197,7 +6198,7 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, struct gaudi2_tpc_idle_data tpc_idle_data = { .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", - .s = s, + .e = e, .mask = mask, .is_idle = &is_idle, }; @@ -6209,8 +6210,8 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, int engine_idx, i, j; /* EDMA, Two engines per Dcore */ - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" "---- ---- ------- ------------ ----------------------\n"); @@ -6239,19 +6240,19 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, edma_fmt, i, j, - is_eng_idle ? "Y" : "N", - qm_glbl_sts0, - dma_core_idle_ind_mask); + if (e) + hl_engine_data_sprintf(e, edma_fmt, i, j, + is_eng_idle ? "Y" : "N", + qm_glbl_sts0, + dma_core_idle_ind_mask); } } /* PDMA, Two engines in Full chip */ - if (s) - seq_puts(s, - "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" - "---- ------- ------------ ----------------------\n"); + if (e) + hl_engine_data_sprintf(e, + "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" + "---- ------- ------------ ----------------------\n"); for (i = 0 ; i < NUM_OF_PDMA ; i++) { engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i; @@ -6269,16 +6270,16 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, pdma_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, - dma_core_idle_ind_mask); + if (e) + hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N", + qm_glbl_sts0, dma_core_idle_ind_mask); } /* NIC, twelve macros in Full chip */ - if (s && hdev->nic_ports_mask) - seq_puts(s, - "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" - "--- ------- ------------ ----------\n"); + if (e && hdev->nic_ports_mask) + hl_engine_data_sprintf(e, + "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" + "--- ------- ------------ ----------\n"); for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { if (!(i & 1)) @@ -6302,15 +6303,15 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, nic_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, - qm_cgm_sts); + if (e) + hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N", + qm_glbl_sts0, qm_cgm_sts); } - if (s) - seq_puts(s, - "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" - "--- ---- ------- ------------ ---------------\n"); + if (e) + hl_engine_data_sprintf(e, + "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" + "--- ---- ------- ------------ ---------------\n"); /* MME, one per Dcore */ for (i = 0 ; i < NUM_OF_DCORES ; i++) { engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET; @@ -6327,8 +6328,8 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, is_eng_idle &= IS_MME_IDLE(mme_arch_sts); is_idle &= is_eng_idle; - if (s) - seq_printf(s, mme_fmt, i, "N", + if (e) + hl_engine_data_sprintf(e, mme_fmt, i, "N", is_eng_idle ? "Y" : "N", qm_glbl_sts0, mme_arch_sts); @@ -6340,16 +6341,16 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, /* * TPC */ - if (s && prop->tpc_enabled_mask) - seq_puts(s, + if (e && prop->tpc_enabled_mask) + hl_engine_data_sprintf(e, "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n" "---- --- -------- ------------ ---------- ----------------------\n"); gaudi2_iterate_tpcs(hdev, &tpc_iter); /* Decoders, two each Dcore and two shared PCIe decoders */ - if (s && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) - seq_puts(s, + if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) + hl_engine_data_sprintf(e, "\nCORE DEC is_idle VSI_CMD_SWREG15\n" "---- --- ------- ---------------\n"); @@ -6370,13 +6371,14 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, dec_fmt, i, j, is_eng_idle ? "Y" : "N", dec_swreg15); + if (e) + hl_engine_data_sprintf(e, dec_fmt, i, j, + is_eng_idle ? "Y" : "N", dec_swreg15); } } - if (s && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) - seq_puts(s, + if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) + hl_engine_data_sprintf(e, "\nPCIe DEC is_idle VSI_CMD_SWREG15\n" "-------- ------- ---------------\n"); @@ -6395,12 +6397,13 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, pcie_dec_fmt, i, is_eng_idle ? "Y" : "N", dec_swreg15); + if (e) + hl_engine_data_sprintf(e, pcie_dec_fmt, i, + is_eng_idle ? "Y" : "N", dec_swreg15); } - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" "---- ---- ------- ------------ ---------- -------------\n"); @@ -6419,8 +6422,8 @@ static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(engine_idx, mask); - if (s) - seq_printf(s, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", + if (e) + hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, qm_cgm_sts, "-"); } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index db4487c33582..7b9f7f8b51f4 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5137,8 +5137,8 @@ int goya_cpucp_info_get(struct hl_device *hdev) return 0; } -static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, - u8 mask_len, struct seq_file *s) +static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, + struct engines_data *e) { const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n"; const char *dma_fmt = "%-5d%-9s%#-14x%#x\n"; @@ -5149,9 +5149,9 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u64 offset; int i; - if (s) - seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n" - "--- ------- ------------ -------------\n"); + if (e) + hl_engine_data_sprintf(e, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n" + "--- ------- ------------ -------------\n"); offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0; @@ -5164,13 +5164,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask); - if (s) - seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N", + if (e) + hl_engine_data_sprintf(e, dma_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, dma_core_sts0); } - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n" "--- ------- ------------ -------------- ----------\n"); @@ -5187,13 +5187,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask); - if (s) - seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N", + if (e) + hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts); } - if (s) - seq_puts(s, + if (e) + hl_engine_data_sprintf(e, "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n" "--- ------- ------------ -------------- -----------\n"); @@ -5207,10 +5207,10 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, if (mask && !is_eng_idle) set_bit(GOYA_ENGINE_ID_MME_0, mask); - if (s) { - seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, + if (e) { + hl_engine_data_sprintf(e, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, cmdq_glbl_sts0, mme_arch_sts); - seq_puts(s, "\n"); + hl_engine_data_sprintf(e, "\n"); } return is_idle; -- cgit v1.2.3 From 2d4c09e3f93a8496bcb03270a104640ef1ecd39c Mon Sep 17 00:00:00 2001 From: Yang Li Date: Thu, 14 Jul 2022 09:29:08 +0800 Subject: habanalabs: Simplify bool conversion Fix the following coccicheck warning: ./drivers/misc/habanalabs/gaudi2/gaudi2.c:9727:48-53: WARNING: conversion to bool not needed here Signed-off-by: Yang Li Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 1140cf7db4a3..fd917e837075 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -9727,7 +9727,7 @@ static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id) { - bool is_pmmu = (mmu_id == HW_CAP_PMMU ? true : false); + bool is_pmmu = (mmu_id == HW_CAP_PMMU); struct gaudi2_device *gaudi2 = hdev->asic_specific; u32 mmu_base; -- cgit v1.2.3 From e4507995da974e2758621982941f9ff2ea18134b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 14 Jul 2022 11:31:19 +0100 Subject: habanalabs: Fix spelling mistake "Scrubing" -> "Scrubbing" There is a spelling mistake in a dev_dbg message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index a3eca13c3fa9..a7923960fce1 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -4723,7 +4723,7 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev) addr = prop->sram_user_base_address; size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; - dev_dbg(hdev->dev, "Scrubing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", + dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", addr, addr + size, val); rc = gaudi_memset_device_memory(hdev, addr, size, val); if (rc) { -- cgit v1.2.3 From bc9b271e6c92b5f3bfe25f73b11e8e878f386075 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 13 Jul 2022 15:08:09 +0300 Subject: habanalabs: rename non_hard_reset to compute_reset In order to be more explicit we should use the term compute_reset for describing the reset in which only the compute engines gets reset. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 +- drivers/misc/habanalabs/common/habanalabs.h | 4 ++-- drivers/misc/habanalabs/gaudi/gaudi.c | 4 ++-- drivers/misc/habanalabs/gaudi2/gaudi2.c | 4 ++-- drivers/misc/habanalabs/goya/goya.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index b30aeb1c657f..90e346727a7c 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1556,7 +1556,7 @@ kill_processes: if (!hdev->asic_prop.fw_security_enabled) hl_fw_set_max_power(hdev); } else { - rc = hdev->asic_funcs->non_hard_reset_late_init(hdev); + rc = hdev->asic_funcs->compute_reset_late_init(hdev); if (rc) { if (reset_upon_device_release) dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 440e154dbe31..6d7b66cd50f1 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1446,7 +1446,7 @@ struct engines_data { * @send_heartbeat: send is-alive packet to CPU-CP and verify response. * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. - * @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset + * @compute_reset_late_init: perform certain actions needed after a compute reset * @hw_queues_lock: acquire H/W queues lock. * @hw_queues_unlock: release H/W queues lock. * @kdma_lock: acquire H/W queues lock. Relevant from GRECO ASIC @@ -1584,7 +1584,7 @@ struct hl_asic_funcs { int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, struct engines_data *e); - int (*non_hard_reset_late_init)(struct hl_device *hdev); + int (*compute_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev); void (*kdma_lock)(struct hl_device *hdev, int dcore_id); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index a7923960fce1..20f62730be02 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7427,7 +7427,7 @@ static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, event_type, desc); } -static int gaudi_non_hard_reset_late_init(struct hl_device *hdev) +static int gaudi_compute_reset_late_init(struct hl_device *hdev) { /* GAUDI doesn't support any reset except hard-reset */ return -EPERM; @@ -9193,7 +9193,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .send_heartbeat = gaudi_send_heartbeat, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, - .non_hard_reset_late_init = gaudi_non_hard_reset_late_init, + .compute_reset_late_init = gaudi_compute_reset_late_init, .hw_queues_lock = gaudi_hw_queues_lock, .hw_queues_unlock = gaudi_hw_queues_unlock, .kdma_lock = NULL, diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index fd917e837075..ab6ad06cec03 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -6129,7 +6129,7 @@ done: return ret_val; } -static int gaudi2_non_hard_reset_late_init(struct hl_device *hdev) +static int gaudi2_compute_reset_late_init(struct hl_device *hdev) { struct gaudi2_device *gaudi2 = hdev->asic_specific; size_t irq_arr_size; @@ -9930,7 +9930,7 @@ static const struct hl_asic_funcs gaudi2_funcs = { .send_heartbeat = gaudi2_send_heartbeat, .debug_coresight = gaudi2_debug_coresight, .is_device_idle = gaudi2_is_device_idle, - .non_hard_reset_late_init = gaudi2_non_hard_reset_late_init, + .compute_reset_late_init = gaudi2_compute_reset_late_init, .hw_queues_lock = gaudi2_hw_queues_lock, .hw_queues_unlock = gaudi2_hw_queues_unlock, .kdma_lock = gaudi2_kdma_lock, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 7b9f7f8b51f4..d4459c290ea8 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4559,7 +4559,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, return rc; } -static int goya_non_hard_reset_late_init(struct hl_device *hdev) +static int goya_compute_reset_late_init(struct hl_device *hdev) { /* * Unmask all IRQs since some could have been received @@ -5478,7 +5478,7 @@ static const struct hl_asic_funcs goya_funcs = { .send_heartbeat = goya_send_heartbeat, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, - .non_hard_reset_late_init = goya_non_hard_reset_late_init, + .compute_reset_late_init = goya_compute_reset_late_init, .hw_queues_lock = goya_hw_queues_lock, .hw_queues_unlock = goya_hw_queues_unlock, .kdma_lock = NULL, -- cgit v1.2.3 From 28742772a0bb798f67a774e91172e06a18cd9855 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 12 Jul 2022 18:19:11 +0300 Subject: habanalabs/gaudi2: enable all MMU SPI/SEI interrupts Currently only part of the MMU SPI/SEI interrupts are enabled, although there is no real reason to not enable all. The only exception is "burst_fifo_full" which is expected for PMMU because it has a 2 entries FIFO, and thus is it not enabled for it. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index ab6ad06cec03..465d9c319c3c 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -117,6 +117,12 @@ #define MMU_RANGE_INV_ASID_EN_SHIFT 1 #define MMU_RANGE_INV_ASID_SHIFT 2 +/* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has + * a 2 entries FIFO, and hence it is not enabled for it. + */ +#define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0) +#define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0) + #define GAUDI2_MAX_STRING_LEN 64 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \ @@ -4956,8 +4962,7 @@ static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base) return 0; } -static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, - u32 stlb_base) +static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) { u32 status, timeout_usec; int rc; @@ -4985,7 +4990,6 @@ static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, return rc; WREG32(mmu_base + MMU_BYPASS_OFFSET, 0); - WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, 0xF); rc = hl_poll_timeout( hdev, @@ -5042,6 +5046,8 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev) DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK); } + WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); + rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); if (rc) return rc; @@ -5092,6 +5098,8 @@ static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id, RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1, STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK); + WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); + rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); if (rc) return rc; -- cgit v1.2.3 From 913bd4179b82adfeece29243711ccaf4330772b6 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Wed, 13 Jul 2022 13:47:23 +0300 Subject: habanalabs: add return code field to module iterator Up until now the module iterator called void callback functions and so caller activating callback that may fail suffered from 2 issues: 1. The need to "plant" return called in the private data. This is a drawback since the iterator itself should not be aware of the private data of the caller. 2. Due to 1 even in a failure the iterator would keep iterating instead of break upon error. To overcome this an optional rc field added to the iterator context. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 7 +++++-- drivers/misc/habanalabs/gaudi2/gaudi2.c | 26 ++++++++++++++++-------- drivers/misc/habanalabs/gaudi2/gaudi2_security.c | 19 ++++++++--------- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 6d7b66cd50f1..8c2c94fb1322 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -208,6 +208,7 @@ enum hl_protection_levels { * struct iterate_module_ctx - HW module iterator * @fn: function to apply to each HW module instance * @data: optional internal data to the function iterator + * @rc: return code for optional use of iterator/iterator-caller */ struct iterate_module_ctx { /* @@ -217,10 +218,12 @@ struct iterate_module_ctx { * @inst: HW module instance within the block * @offset: current HW module instance offset from the 1-st HW module instance * in the 1-st block - * @data: function specific data + * @ctx: the iterator context. */ - void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, void *data); + void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, + struct iterate_module_ctx *ctx); void *data; + int rc; }; struct hl_block_glbl_sec { diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 465d9c319c3c..3531a339e742 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -1712,6 +1712,9 @@ void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) int dcore, inst, tpc_seq; u32 offset; + /* init the return code */ + ctx->rc = 0; + for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) { for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) { tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst; @@ -1721,7 +1724,12 @@ void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst); - ctx->fn(hdev, dcore, inst, offset, ctx->data); + ctx->fn(hdev, dcore, inst, offset, ctx); + if (ctx->rc) { + dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n", + dcore, inst); + return; + } } } @@ -1730,7 +1738,9 @@ void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx) /* special check for PCI TPC (DCORE0_TPC6) */ offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1); - ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx->data); + ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx); + if (ctx->rc) + dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n"); } static bool gaudi2_host_phys_addr_valid(u64 addr) @@ -4507,10 +4517,10 @@ struct gaudi2_tpc_init_cfg_data { }; static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst, - u32 offset, void *data) + u32 offset, struct iterate_module_ctx *ctx) { struct gaudi2_device *gaudi2 = hdev->asic_specific; - struct gaudi2_tpc_init_cfg_data *cfg_data = data; + struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data; u32 queue_id_base; u8 seq; @@ -6155,9 +6165,9 @@ static int gaudi2_compute_reset_late_init(struct hl_device *hdev) } static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, - void *data) + struct iterate_module_ctx *ctx) { - struct gaudi2_tpc_idle_data *idle_data = (struct gaudi2_tpc_idle_data *)data; + struct gaudi2_tpc_idle_data *idle_data = ctx->data; u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; bool is_eng_idle; int engine_idx; @@ -6736,9 +6746,9 @@ static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid) } static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset, - void *data) + struct iterate_module_ctx *ctx) { - struct gaudi2_tpc_mmu_data *mmu_data = (struct gaudi2_tpc_mmu_data *)data; + struct gaudi2_tpc_mmu_data *mmu_data = ctx->data; WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0); WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid); diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c index 89a06ff5ba34..c4165db06db2 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c @@ -2583,9 +2583,9 @@ struct gaudi2_tpc_pb_data { }; static void gaudi2_config_tpcs_glbl_sec(struct hl_device *hdev, int dcore, int inst, u32 offset, - void *data) + struct iterate_module_ctx *ctx) { - struct gaudi2_tpc_pb_data *pb_data = (struct gaudi2_tpc_pb_data *)data; + struct gaudi2_tpc_pb_data *pb_data = ctx->data; hl_config_glbl_sec(hdev, gaudi2_pb_dcr0_tpc0, pb_data->glbl_sec, offset, pb_data->block_array_size); @@ -2660,15 +2660,14 @@ static int gaudi2_init_pb_tpc(struct hl_device *hdev) struct gaudi2_tpc_arc_pb_data { u32 unsecured_regs_arr_size; u32 arc_regs_arr_size; - int rc; }; static void gaudi2_config_tpcs_pb_ranges(struct hl_device *hdev, int dcore, int inst, u32 offset, - void *data) + struct iterate_module_ctx *ctx) { - struct gaudi2_tpc_arc_pb_data *pb_data = (struct gaudi2_tpc_arc_pb_data *)data; + struct gaudi2_tpc_arc_pb_data *pb_data = ctx->data; - pb_data->rc |= hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1, + ctx->rc = hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1, offset, gaudi2_pb_dcr0_tpc0_arc, pb_data->arc_regs_arr_size, gaudi2_pb_dcr0_tpc0_arc_unsecured_regs, @@ -2683,12 +2682,12 @@ static int gaudi2_init_pb_tpc_arc(struct hl_device *hdev) tpc_arc_pb_data.arc_regs_arr_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc); tpc_arc_pb_data.unsecured_regs_arr_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc_unsecured_regs); - tpc_arc_pb_data.rc = 0; + tpc_iter.fn = &gaudi2_config_tpcs_pb_ranges; tpc_iter.data = &tpc_arc_pb_data; gaudi2_iterate_tpcs(hdev, &tpc_iter); - return tpc_arc_pb_data.rc; + return tpc_iter.rc; } static int gaudi2_init_pb_sm_objs(struct hl_device *hdev) @@ -3547,9 +3546,9 @@ struct gaudi2_ack_pb_tpc_data { }; static void gaudi2_ack_pb_tpc_config(struct hl_device *hdev, int dcore, int inst, u32 offset, - void *data) + struct iterate_module_ctx *ctx) { - struct gaudi2_ack_pb_tpc_data *pb_data = (struct gaudi2_ack_pb_tpc_data *)data; + struct gaudi2_ack_pb_tpc_data *pb_data = ctx->data; hl_ack_pb_single_dcore(hdev, offset, HL_PB_SINGLE_INSTANCE, HL_PB_NA, gaudi2_pb_dcr0_tpc0, pb_data->tpc_regs_array_size); -- cgit v1.2.3 From cd6b0cea89862a5b3411246a2410881a988d5b0f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 6 Apr 2022 12:07:19 +0300 Subject: habanalabs/gaudi: increase default cs timeout to 10 minutes In order to improve scalability and reduce host overhead, it is better to increase the default TDR timeout of Gaudi1 from 30 seconds to 10 minutes. This will allow the DL Framework (e.g. PyTorch, TensorFlow) to remove the host sync they are using now and improve overall performance on scaleout training. Note that one can always set the timeout to a custom value via a kernel module parameter given during driver load. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_drv.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index f733ead605e7..d59d8cdf33e6 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -27,7 +27,10 @@ static struct class *hl_class; static DEFINE_IDR(hl_devs_idr); static DEFINE_MUTEX(hl_devs_idr_lock); -static int timeout_locked = 30; +#define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */ +#define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */ + +static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED; static int reset_on_lockup = 1; static int memory_scrub; static ulong boot_error_status_mask = ULONG_MAX; @@ -314,12 +317,22 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev) hdev->boot_error_status_mask = boot_error_status_mask; } -static void fixup_device_params_per_asic(struct hl_device *hdev) +static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout) { switch (hdev->asic_type) { - case ASIC_GOYA: case ASIC_GAUDI: case ASIC_GAUDI_SEC: + /* If user didn't request a different timeout than the default one, we have + * a different default timeout for Gaudi + */ + if (timeout == HL_DEFAULT_TIMEOUT_LOCKED) + hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED * + MSEC_PER_SEC); + + hdev->reset_upon_device_release = 0; + break; + + case ASIC_GOYA: hdev->reset_upon_device_release = 0; break; @@ -339,7 +352,7 @@ static int fixup_device_params(struct hl_device *hdev) hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; if (tmp_timeout) - hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000); + hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC); else hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; @@ -360,7 +373,7 @@ static int fixup_device_params(struct hl_device *hdev) if (!hdev->cpu_queues_enable) hdev->heartbeat = 0; - fixup_device_params_per_asic(hdev); + fixup_device_params_per_asic(hdev, tmp_timeout); return 0; } -- cgit v1.2.3 From ae937492ecc7e561ace4f01c6c0c14e868744d58 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Mon, 18 Jul 2022 21:02:34 +0300 Subject: habanalabs/gaudi2: remove old interrupt mappings Interrupt enumration has changed some time ago but the old mapping was accidentally left in the driver. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2P.h | 3 -- .../include/gaudi2/gaudi2_async_virt_events.h | 57 ---------------------- 2 files changed, 60 deletions(-) delete mode 100644 drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h index e4bc4009f05b..5110574a650e 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h @@ -15,7 +15,6 @@ #include "../include/gaudi2/gaudi2_packets.h" #include "../include/gaudi2/gaudi2_fw_if.h" #include "../include/gaudi2/gaudi2_async_events.h" -#include "../include/gaudi2/gaudi2_async_virt_events.h" #define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb" #define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb" @@ -511,8 +510,6 @@ struct dup_block_ctx { * @hbm_cfg: HBM subsystem settings * @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock. * @kdma_lock_mutex: used by simulator instead of kdma_lock. - * @use_deprecated_event_mappings: use old event mappings which are about to be - * deprecated */ struct gaudi2_device { int (*cpucp_info_get)(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h deleted file mode 100644 index 6d6ed7838a64..000000000000 --- a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2022 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef __GAUDI2_ASYNC_VIRT_EVENTS_H_ -#define __GAUDI2_ASYNC_VIRT_EVENTS_H_ - -enum gaudi2_async_virt_event_id { - GAUDI2_EVENT_NIC3_QM1_OLD = 1206, - GAUDI2_EVENT_NIC4_QM0_OLD = 1207, - GAUDI2_EVENT_NIC4_QM1_OLD = 1208, - GAUDI2_EVENT_NIC5_QM0_OLD = 1209, - GAUDI2_EVENT_NIC5_QM1_OLD = 1210, - GAUDI2_EVENT_NIC6_QM0_OLD = 1211, - GAUDI2_EVENT_NIC6_QM1_OLD = 1212, - GAUDI2_EVENT_NIC7_QM0_OLD = 1213, - GAUDI2_EVENT_NIC7_QM1_OLD = 1214, - GAUDI2_EVENT_NIC8_QM0_OLD = 1215, - GAUDI2_EVENT_NIC8_QM1_OLD = 1216, - GAUDI2_EVENT_NIC9_QM0_OLD = 1217, - GAUDI2_EVENT_NIC9_QM1_OLD = 1218, - GAUDI2_EVENT_NIC10_QM0_OLD = 1219, - GAUDI2_EVENT_NIC10_QM1_OLD = 1220, - GAUDI2_EVENT_NIC11_QM0_OLD = 1221, - GAUDI2_EVENT_NIC11_QM1_OLD = 1222, - GAUDI2_EVENT_CPU_PKT_SANITY_FAILED_OLD = 1223, - GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0_OLD = 1224, - GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG1_OLD = 1225, - GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG0_OLD = 1226, - GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG1_OLD = 1227, - GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG0_OLD = 1228, - GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG1_OLD = 1229, - GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG0_OLD = 1230, - GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG1_OLD = 1231, - GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG0_OLD = 1232, - GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG1_OLD = 1233, - GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG0_OLD = 1234, - GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG1_OLD = 1235, - GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG0_OLD = 1236, - GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG1_OLD = 1237, - GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG0_OLD = 1238, - GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG1_OLD = 1239, - GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG0_OLD = 1240, - GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG1_OLD = 1241, - GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG0_OLD = 1242, - GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG1_OLD = 1243, - GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG0_OLD = 1244, - GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG1_OLD = 1245, - GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG0_OLD = 1246, - GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1_OLD = 1247, - GAUDI2_EVENT_ARC_DCCM_FULL_OLD = 1248, -}; - -#endif /* __GAUDI2_ASYNC_VIRT_EVENTS_H_ */ -- cgit v1.2.3 From f25a72b8b9f4885ea30263a02d1083e283c9e718 Mon Sep 17 00:00:00 2001 From: Bharat Jauhari Date: Wed, 1 Jun 2022 15:18:47 +0300 Subject: habanalabs: fix spelling mistakes Cosmetic commit, no logical changes. It just fixes the spelling mistakes. Signed-off-by: Bharat Jauhari Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 8 ++++---- drivers/misc/habanalabs/common/habanalabs.h | 27 +++++++++++++-------------- drivers/misc/habanalabs/common/memory_mgr.c | 10 +++++----- include/uapi/misc/habanalabs.h | 10 +++++----- 4 files changed, 27 insertions(+), 28 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 608ca67527a5..58c1eff16df6 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -1514,7 +1514,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev) hdev->asic_funcs->init_firmware_preload_params(hdev); /* - * In order to determine boot method (static VS dymanic) we need to + * In order to determine boot method (static VS dynamic) we need to * read the boot caps register */ rc = hl_fw_read_preboot_caps(hdev); @@ -1781,7 +1781,7 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev, * * @return the CRC32 result * - * NOTE: kernel's CRC32 differ's from standard CRC32 calculation. + * NOTE: kernel's CRC32 differs from standard CRC32 calculation. * in order to be aligned we need to flip the bits of both the input * initial CRC and kernel's CRC32 result. * in addition both sides use initial CRC of 0, @@ -1798,7 +1798,7 @@ static u32 hl_fw_compat_crc32(u8 *data, size_t size) * * @hdev: pointer to the habanalabs device structure * @addr: device address of memory transfer - * @size: memory transter size + * @size: memory transfer size * @region: PCI memory region * * @return 0 on success, otherwise non-zero error code @@ -2547,7 +2547,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, /* * when testing FW load (without Linux) on PLDM we don't want to * wait until boot fit is active as it may take several hours. - * instead, we load the bootfit and let it do all initializations in + * instead, we load the bootfit and let it do all initialization in * the background. */ if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX)) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 8c2c94fb1322..350bc5b9f174 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -94,7 +94,7 @@ struct hl_fpriv; #define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ /** - * enum hl_mmu_page_table_locaion - mmu page table location + * enum hl_mmu_page_table_location - mmu page table location * @MMU_DR_PGT: page-table is located on device DRAM. * @MMU_HR_PGT: page-table is located on host memory. * @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported. @@ -800,7 +800,7 @@ struct hl_fence { * @lock: spinlock to protect fence. * @hdev: habanalabs device structure. * @hw_sob: the H/W SOB used in this signal/wait CS. - * @encaps_sig_hdl: encaps signals hanlder. + * @encaps_sig_hdl: encaps signals handler. * @cs_seq: command submission sequence number. * @type: type of the CS - signal/wait. * @sob_val: the SOB value that is used in this signal/wait CS. @@ -908,7 +908,7 @@ struct hl_mmap_mem_buf { * @size: holds the CB's size. * @cs_cnt: holds number of CS that this CB participates in. * @is_pool: true if CB was acquired from the pool, false otherwise. - * @is_internal: internaly allocated + * @is_internal: internally allocated * @is_mmu_mapped: true if the CB is mapped to the device's MMU. */ struct hl_cb { @@ -1116,7 +1116,7 @@ struct timestamp_reg_info { * @fence: hl fence object for interrupt completion * @cq_target_value: CQ target value * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt - * handler for taget value comparison + * handler for target value comparison */ struct hl_user_pending_interrupt { struct timestamp_reg_info ts_reg_info; @@ -1742,10 +1742,10 @@ struct hl_cs_outcome { /** * struct hl_cs_outcome_store - represents a limited store of completed CS outcomes - * @outcome_map: index of completed CS searcheable by sequence number + * @outcome_map: index of completed CS searchable by sequence number * @used_list: list of outcome objects currently in use * @free_list: list of outcome objects currently not in use - * @nodes_pool: a static pool of preallocated outcome objects + * @nodes_pool: a static pool of pre-allocated outcome objects * @db_lock: any operation on the store must take this lock */ struct hl_cs_outcome_store { @@ -1769,7 +1769,7 @@ struct hl_cs_outcome_store { * @refcount: reference counter for the context. Context is released only when * this hits 0l. It is incremented on CS and CS_WAIT. * @cs_pending: array of hl fence objects representing pending CS. - * @outcome_store: storage data structure used to remember ouitcomes of completed + * @outcome_store: storage data structure used to remember outcomes of completed * command submissions for a long time after CS id wraparound. * @va_range: holds available virtual addresses for host and dram mappings. * @mem_hash_lock: protects the mem_hash. @@ -1838,7 +1838,6 @@ struct hl_ctx_mgr { }; - /* * COMMAND SUBMISSIONS */ @@ -1904,7 +1903,7 @@ struct hl_userptr { * @tdr_active: true if TDR was activated for this CS (to prevent * double TDR activation). * @aborted: true if CS was aborted due to some device error. - * @timestamp: true if a timestmap must be captured upon completion. + * @timestamp: true if a timestamp must be captured upon completion. * @staged_last: true if this is the last staged CS and needs completion. * @staged_first: true if this is the first staged CS and we need to receive * timeout for this CS. @@ -2229,7 +2228,7 @@ struct hl_info_list { /** * struct hl_debugfs_entry - debugfs dentry wrapper. - * @info_ent: dentry realted ops. + * @info_ent: dentry related ops. * @dev_entry: ASIC specific debugfs manager. */ struct hl_debugfs_entry { @@ -2934,7 +2933,7 @@ struct razwi_info { * struct undefined_opcode_info - info about last undefined opcode error * @timestamp: timestamp of the undefined opcode error * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ - * entiers. In case all streams array entries are + * entries. In case all streams array entries are * filled with values, it means the execution was in Lower-CP. * @cq_addr: the address of the current handled command buffer * @cq_size: the size of the current handled command buffer @@ -2975,7 +2974,7 @@ struct last_error_session_info { /** * struct hl_reset_info - holds current device reset information. * @lock: lock to protect critical reset flows. - * @compute_reset_cnt: number of compte resets since the driver was loaded. + * @compute_reset_cnt: number of compute resets since the driver was loaded. * @hard_reset_cnt: number of hard resets since the driver was loaded. * @hard_reset_schedule_flags: hard reset is scheduled to after current compute reset, * here we hold the hard reset flags. @@ -2986,7 +2985,7 @@ struct last_error_session_info { * @hard_reset_pending: is there a hard reset work pending. * @curr_reset_cause: saves an enumerated reset cause when a hard reset is * triggered, and cleared after it is shared with preboot. - * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden + * @prev_reset_trigger: saves the previous trigger which caused a reset, overridden * with a new value on next reset * @reset_trigger_repeated: set if device reset is triggered more than once with * same cause. @@ -3064,7 +3063,7 @@ struct hl_reset_info { * @hl_chip_info: ASIC's sensors information. * @device_status_description: device status description. * @hl_debugfs: device's debugfs manager. - * @cb_pool: list of preallocated CBs. + * @cb_pool: list of pre allocated CBs. * @cb_pool_lock: protects the CB pool. * @internal_cb_pool_virt_addr: internal command buffer pool virtual address. * @internal_cb_pool_dma_addr: internal command buffer pool dma address. diff --git a/drivers/misc/habanalabs/common/memory_mgr.c b/drivers/misc/habanalabs/common/memory_mgr.c index 56df962d2f3c..1936d653699e 100644 --- a/drivers/misc/habanalabs/common/memory_mgr.c +++ b/drivers/misc/habanalabs/common/memory_mgr.c @@ -11,7 +11,7 @@ * hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to * the buffer descriptor. * - * @mmg: parent unifed memory manager + * @mmg: parent unified memory manager * @handle: requested buffer handle * * Find the buffer in the store and return a pointer to its descriptor. @@ -104,7 +104,7 @@ int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf) * hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the * given handle. * - * @mmg: parent unifed memory manager + * @mmg: parent unified memory manager * @handle: requested buffer handle * * Decrease the reference to the buffer, and release it if it was the last one. @@ -137,7 +137,7 @@ int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle) /** * hl_mmap_mem_buf_alloc - allocate a new mappable buffer * - * @mmg: parent unifed memory manager + * @mmg: parent unified memory manager * @behavior: behavior object describing this buffer polymorphic behavior * @gfp: gfp flags to use for the memory allocations * @args: additional args passed to behavior->alloc @@ -222,7 +222,7 @@ static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = { /** * hl_mem_mgr_mmap - map the given buffer to the user * - * @mmg: unifed memory manager + * @mmg: unified memory manager * @vma: the vma object for which mmap was closed. * @args: additional args passed to behavior->mmap * @@ -322,7 +322,7 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg) /** * hl_mem_mgr_fini - release unified memory manager * - * @mmg: parent unifed memory manager + * @mmg: parent unified memory manager * * Release the unified memory manager. Shall be called from an interrupt context. */ diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 5d06d5c74dd1..be06b1307c44 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -876,13 +876,13 @@ struct hl_info_hw_idle { __u32 is_idle; /* * Bitmask of busy engines. - * Bits definition is according to `enum _enging_id'. + * Bits definition is according to `enum _engine_id'. */ __u32 busy_engines_mask; /* * Extended Bitmask of busy engines. - * Bits definition is according to `enum _enging_id'. + * Bits definition is according to `enum _engine_id'. */ __u64 busy_engines_mask_ext[HL_BUSY_ENGINES_MASK_EXT_SIZE]; }; @@ -1078,12 +1078,12 @@ struct hl_info_razwi_event { * struct hl_info_undefined_opcode_event - info about last undefined opcode error * @timestamp: timestamp of the undefined opcode error * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ - * entiers. In case all streams array entries are + * entries. In case all streams array entries are * filled with values, it means the execution was in Lower-CP. * @cq_addr: the address of the current handled command buffer * @cq_size: the size of the current handled command buffer * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. - * should be equal to 1 incase of undefined opcode + * should be equal to 1 in case of undefined opcode * in Upper-CP (specific stream) and equal to 4 incase * of undefined opcode in Lower-CP. * @engine_id: engine-id that the error occurred on @@ -1412,7 +1412,7 @@ struct hl_cs_out { /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ struct { - /* This is the resereved signal handle id */ + /* This is the reserved signal handle id */ __u32 handle_id; /* This is the signals count */ -- cgit v1.2.3 From 0c819c9a04413facd3b167ec1e6f5928e909fcb2 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Mon, 18 Jul 2022 22:02:13 +0300 Subject: habanalabs: wrap macro arg with parentheses The macro argument is cast-ed to u32 in some of the places. Because this arg can be some arithmetic computation (e.g. address + offset) the cast should be on the whole expression. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 350bc5b9f174..d3efec600458 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2506,7 +2506,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); break; \ (val) = __elbi_read; \ } else {\ - (val) = RREG32((u32)addr); \ + (val) = RREG32((u32)(addr)); \ } \ if (cond) \ break; \ @@ -2517,7 +2517,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); break; \ (val) = __elbi_read; \ } else {\ - (val) = RREG32((u32)addr); \ + (val) = RREG32((u32)(addr)); \ } \ break; \ } \ -- cgit v1.2.3 From 5f92c1e2961912e2a7a10d8e7b998b7cd9dd1d0e Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 19 Jul 2022 12:16:01 +0300 Subject: habanalabs: remove all kdma locks We don't use KDMA concurrently in the driver. The only use is through debugfs and we don't protect concurrent access through it. Reported-by: Dan Carpenter Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 4 ---- drivers/misc/habanalabs/gaudi/gaudi.c | 2 -- drivers/misc/habanalabs/gaudi2/gaudi2.c | 23 ----------------------- drivers/misc/habanalabs/gaudi2/gaudi2P.h | 3 --- drivers/misc/habanalabs/goya/goya.c | 2 -- 5 files changed, 34 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index d3efec600458..8d9e96c6092a 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1452,8 +1452,6 @@ struct engines_data { * @compute_reset_late_init: perform certain actions needed after a compute reset * @hw_queues_lock: acquire H/W queues lock. * @hw_queues_unlock: release H/W queues lock. - * @kdma_lock: acquire H/W queues lock. Relevant from GRECO ASIC - * @kdma_unlock: release H/W queues lock. Relevant from GRECO ASIC * @get_pci_id: retrieve PCI ID. * @get_eeprom_data: retrieve EEPROM data from F/W. * @get_monitor_dump: retrieve monitor registers dump from F/W. @@ -1590,8 +1588,6 @@ struct hl_asic_funcs { int (*compute_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev); - void (*kdma_lock)(struct hl_device *hdev, int dcore_id); - void (*kdma_unlock)(struct hl_device *hdev, int dcore_id); u32 (*get_pci_id)(struct hl_device *hdev); int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size); int (*get_monitor_dump)(struct hl_device *hdev, void *data); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 20f62730be02..4d11efed3e64 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -9196,8 +9196,6 @@ static const struct hl_asic_funcs gaudi_funcs = { .compute_reset_late_init = gaudi_compute_reset_late_init, .hw_queues_lock = gaudi_hw_queues_lock, .hw_queues_unlock = gaudi_hw_queues_unlock, - .kdma_lock = NULL, - .kdma_unlock = NULL, .get_pci_id = gaudi_get_pci_id, .get_eeprom_data = gaudi_get_eeprom_data, .get_monitor_dump = gaudi_get_monitor_dump, diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 3531a339e742..2c43ed403509 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -3010,7 +3010,6 @@ static int gaudi2_sw_init(struct hl_device *hdev) } spin_lock_init(&gaudi2->hw_queues_lock); - spin_lock_init(&gaudi2->kdma_lock); gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, &gaudi2->scratchpad_bus_address, @@ -6464,22 +6463,6 @@ static void gaudi2_hw_queues_unlock(struct hl_device *hdev) spin_unlock(&gaudi2->hw_queues_lock); } -static void gaudi2_kdma_lock(struct hl_device *hdev, int dcore_id) - __acquires(&gaudi2->kdma_lock) -{ - struct gaudi2_device *gaudi2 = hdev->asic_specific; - - spin_lock(&gaudi2->kdma_lock); -} - -static void gaudi2_kdma_unlock(struct hl_device *hdev, int dcore_id) - __releases(&gaudi2->kdma_lock) -{ - struct gaudi2_device *gaudi2 = hdev->asic_specific; - - spin_unlock(&gaudi2->kdma_lock); -} - static u32 gaudi2_get_pci_id(struct hl_device *hdev) { return hdev->pdev->device; @@ -9122,8 +9105,6 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v goto unreserve_va; } - hdev->asic_funcs->kdma_lock(hdev, 0); - /* Enable MMU on KDMA */ gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); @@ -9151,8 +9132,6 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); - hdev->asic_funcs->kdma_unlock(hdev, 0); - mutex_lock(&ctx->mmu_lock); hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, @@ -9951,8 +9930,6 @@ static const struct hl_asic_funcs gaudi2_funcs = { .compute_reset_late_init = gaudi2_compute_reset_late_init, .hw_queues_lock = gaudi2_hw_queues_lock, .hw_queues_unlock = gaudi2_hw_queues_unlock, - .kdma_lock = gaudi2_kdma_lock, - .kdma_unlock = gaudi2_kdma_unlock, .get_pci_id = gaudi2_get_pci_id, .get_eeprom_data = gaudi2_get_eeprom_data, .get_monitor_dump = gaudi2_get_monitor_dump, diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h index 5110574a650e..347ea1dd78e2 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h @@ -457,7 +457,6 @@ struct dup_block_ctx { * the user can map. * @lfsr_rand_seeds: array of MME ACC random seeds to set. * @hw_queues_lock: protects the H/W queues from concurrent access. - * @kdma_lock: protects the KDMA engine from concurrent access. * @scratchpad_kernel_address: general purpose PAGE_SIZE contiguous memory, * this memory region should be write-only. * currently used for HBW QMAN writes which is @@ -509,7 +508,6 @@ struct dup_block_ctx { * @flush_db_fifo: flag to force flush DB FIFO after a write. * @hbm_cfg: HBM subsystem settings * @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock. - * @kdma_lock_mutex: used by simulator instead of kdma_lock. */ struct gaudi2_device { int (*cpucp_info_get)(struct hl_device *hdev); @@ -518,7 +516,6 @@ struct gaudi2_device { int lfsr_rand_seeds[MME_NUM_OF_LFSR_SEEDS]; spinlock_t hw_queues_lock; - spinlock_t kdma_lock; void *scratchpad_kernel_address; dma_addr_t scratchpad_bus_address; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index d4459c290ea8..91429d6ea037 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5481,8 +5481,6 @@ static const struct hl_asic_funcs goya_funcs = { .compute_reset_late_init = goya_compute_reset_late_init, .hw_queues_lock = goya_hw_queues_lock, .hw_queues_unlock = goya_hw_queues_unlock, - .kdma_lock = NULL, - .kdma_unlock = NULL, .get_pci_id = goya_get_pci_id, .get_eeprom_data = goya_get_eeprom_data, .get_monitor_dump = goya_get_monitor_dump, -- cgit v1.2.3 From f018c54e3de6619c46e33ab1c613761e9fba21d0 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Sat, 9 Jul 2022 12:34:17 +0300 Subject: habanalabs: add uapi to retrieve engines status Currently, to get engines status, user needed to read debugfs file with root permissions. This new uapi allows user apace apps retrieve status, so for example, in case of failure, status can be retrieved immediately by the application itself which runs without root permissions. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 3 +- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 40 +++++++++++++++++++++++ include/uapi/misc/habanalabs.h | 9 +++++ 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 90c91c1b2c10..c297248748d3 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -17,7 +17,6 @@ #define MMU_ASID_BUF_SIZE 10 #define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE) #define I2C_MAX_TRANSACTION_LEN 8 -#define ENGINES_DATA_MAX_SIZE SZ_16K static struct dentry *hl_debug_root; @@ -626,7 +625,7 @@ static int engines_show(struct seq_file *s, void *data) } eng_data.actual_size = 0; - eng_data.allocated_buf_size = ENGINES_DATA_MAX_SIZE; + eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE; eng_data.buf = vmalloc(eng_data.allocated_buf_size); if (!eng_data.buf) return -ENOMEM; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 6a30bd98ab5e..ec55c66fedd6 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -14,6 +14,7 @@ #include #include #include +#include static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), @@ -697,6 +698,42 @@ static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args) return 0; } +static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + u32 status_buf_size = args->return_size; + struct hl_device *hdev = hpriv->hdev; + struct engines_data eng_data; + int rc; + + if ((status_buf_size < SZ_1K) || (status_buf_size > HL_ENGINES_DATA_MAX_SIZE) || (!out)) + return -EINVAL; + + eng_data.actual_size = 0; + eng_data.allocated_buf_size = status_buf_size; + eng_data.buf = vmalloc(status_buf_size); + if (!eng_data.buf) + return -ENOMEM; + + hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data); + + if (eng_data.actual_size > eng_data.allocated_buf_size) { + dev_err(hdev->dev, + "Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n", + eng_data.actual_size, status_buf_size); + vfree(eng_data.buf); + return -ENOMEM; + } + + args->user_buffer_actual_size = eng_data.actual_size; + rc = copy_to_user(out, eng_data.buf, min_t(size_t, status_buf_size, eng_data.actual_size)) ? + -EFAULT : 0; + + vfree(eng_data.buf); + + return rc; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -812,6 +849,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_UNREGISTER_EVENTFD: return eventfd_unregister(hpriv, args); + case HL_INFO_ENGINE_STATUS: + return engine_status_info(hpriv, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -EINVAL; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index be06b1307c44..83ca6f40f4ba 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -787,10 +787,14 @@ enum hl_server_type { #define HL_INFO_UNREGISTER_EVENTFD 29 #define HL_INFO_GET_EVENTS 30 #define HL_INFO_UNDEFINED_OPCODE_EVENT 31 +#define HL_INFO_ENGINE_STATUS 32 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 +/* Maximum buffer size for retrieving engines status */ +#define HL_ENGINES_DATA_MAX_SIZE SZ_1M + /** * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC * @sram_base_address: The first SRAM physical base address that is free to be @@ -1130,6 +1134,10 @@ enum gaudi_dcores { * resolution. Currently not in use. * @pll_index: Index as defined in hl__pll_index enumeration. * @eventfd: event file descriptor for event notifications. + * @user_buffer_actual_size: Actual data size which was copied to user allocated buffer by the + * driver. It is possible for the user to allocate buffer larger than + * needed, hence updating this variable so user will know the exact amount + * of bytes copied by the kernel to the buffer. * @pad: Padding to 64 bit. */ struct hl_info_args { @@ -1143,6 +1151,7 @@ struct hl_info_args { __u32 period_ms; __u32 pll_index; __u32 eventfd; + __u32 user_buffer_actual_size; }; __u32 pad; -- cgit v1.2.3 From 21fc79336b9587fcc251e77246b68b6e20340146 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 20 Jul 2022 20:02:20 +0300 Subject: habanalabs/gaudi2: mark PCIE access error as fatal F/W events are enabled in a late phase of the device init, so an event for a PCIE access error during the init, can be received after the init is already done and considered as successful. A resulting device reset, which does the same H/W init, can end similarly with this event right after the reset is done and considered as successful, and a loop of this sequence can continue. To avoid it mark the PCIE access error as a fatal event, so after 2 consecutive events no more resets will be done. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 2c43ed403509..68ab407fa6ba 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8532,6 +8532,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: gaudi2_print_pcie_addr_dec_info(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; break; case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: -- cgit v1.2.3 From d6501ecfb6233197b5b7dbd6aa5256636f3931f3 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Sun, 24 Jul 2022 08:45:34 +0300 Subject: habanalabs/gaudi: fix print format for div_sel Print format was for int (%d) while variable is u32. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 4d11efed3e64..866dc4b891d6 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -939,9 +939,7 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) else freq = pll_clk / (div_fctr + 1); } else { - dev_warn(hdev->dev, - "Received invalid div select value: %d", - div_sel); + dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); freq = 0; } } -- cgit v1.2.3 From 68c82ba9a96f47dac9963c9f22b8c6bf4af00e02 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Sun, 24 Jul 2022 10:23:05 +0300 Subject: habanalabs/gaudi: read div_sel value from firmware Even when running with unsecured f/w, we should read the PLL div_sel value from the f/w as this register is always privileged. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 866dc4b891d6..7f52935dc603 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -899,12 +899,13 @@ static int gaudi_early_fini(struct hl_device *hdev) */ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) { - struct asic_fixed_properties *prop = &hdev->asic_prop; u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; + struct asic_fixed_properties *prop = &hdev->asic_prop; u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; int rc; - if (hdev->asic_prop.fw_security_enabled) { + if ((hdev->fw_components & FW_TYPE_LINUX) && + (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { struct gaudi_device *gaudi = hdev->asic_specific; if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) -- cgit v1.2.3 From 0c876b47a54a5ae1331a99da9cc115f8f5c90990 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Sun, 24 Jul 2022 17:40:23 +0300 Subject: habanalabs: fix command submission sanity check When a CS is submitted, the ioctl handler checks the CS flags and performs a sanity check, according to its value. As new CS flags are added, the sanity check needs to be updated according to the new flags. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 90a4574cbe2d..304e4f3b0e7e 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -12,7 +12,9 @@ #include #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ - HL_CS_FLAGS_COLLECTIVE_WAIT) + HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ + HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) + #define MAX_TS_ITER_NUM 10 @@ -1253,6 +1255,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) u32 cs_type_flags, num_chunks; enum hl_device_status status; enum hl_cs_type cs_type; + bool is_sync_stream; if (!hl_device_operational(hdev, &status)) { return -EBUSY; @@ -1276,9 +1279,10 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) cs_type = hl_cs_get_cs_type(cs_type_flags); num_chunks = args->in.num_chunks_execute; - if (unlikely((cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || - cs_type == CS_TYPE_COLLECTIVE_WAIT) && - !hdev->supports_sync_stream)) { + is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || + cs_type == CS_TYPE_COLLECTIVE_WAIT); + + if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { dev_err(hdev->dev, "Sync stream CS is not supported\n"); return -EINVAL; } @@ -1288,7 +1292,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); return -EINVAL; } - } else if (num_chunks != 1) { + } else if (is_sync_stream && num_chunks != 1) { dev_err(hdev->dev, "Sync stream CS mandates one chunk only, context %d\n", ctx->asid); -- cgit v1.2.3 From 7fa6c0fe8b2154f84162b8aacbe581df722a6f0c Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 27 Jul 2022 09:04:13 +0300 Subject: habanalabs: avoid returning a valid handle if map_block() fails map_block() sets the block id handle even if get_hw_block_id() fails, and in this case it uses block id 0 which might be a valid id. Modify it to set the handle only if get_hw_block_id() succeeds. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/memory.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 61bc1bfe984a..0698c3c363bd 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1418,18 +1418,23 @@ vm_type_err: return rc; } -static int map_block(struct hl_device *hdev, u64 address, u64 *handle, - u32 *size) +static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size) { - u32 block_id = 0; + u32 block_id; int rc; + *handle = 0; + if (size) + *size = 0; + rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id); + if (rc) + return rc; *handle = block_id | HL_MMAP_TYPE_BLOCK; *handle <<= PAGE_SHIFT; - return rc; + return 0; } static void hw_block_vm_close(struct vm_area_struct *vma) -- cgit v1.2.3 From 5f46217221dfdda94244d88ac6c1354293fc681b Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Fri, 29 Jul 2022 16:30:48 +0300 Subject: habanalabs: fix vma fields assignments order in hl_hw_block_mmap() In hl_hw_block_mmap(), the vma's 'vm_private_data' and 'vm_ops' fields are assigned before filling the content of the private data. In between there is a call to the ASIC hw_block_mmap() function, and if it fails, the vma close function will be called with a bad private data value. Fix the order of assignments to avoid this issue. In hl_hw_block_mmap() the vma's 'vm_private_data and vm_ops are assigned before setting the Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/memory.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 0698c3c363bd..a027fa88889b 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1492,23 +1492,22 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) if (!lnode) return -ENOMEM; - vma->vm_ops = &hw_block_vm_ops; - vma->vm_private_data = lnode; - - hl_ctx_get(ctx); - rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); if (rc) { - hl_ctx_put(ctx); kfree(lnode); return rc; } + hl_ctx_get(ctx); + lnode->ctx = ctx; lnode->vaddr = vma->vm_start; lnode->size = block_size; lnode->id = block_id; + vma->vm_private_data = lnode; + vma->vm_ops = &hw_block_vm_ops; + mutex_lock(&ctx->hw_block_list_lock); list_add_tail(&lnode->node, &ctx->hw_block_mem_list); mutex_unlock(&ctx->hw_block_list_lock); -- cgit v1.2.3 From f0d4944c20819edf4de2c5c17963491d23e213da Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 19 Jul 2022 09:01:53 +0300 Subject: habanalabs: add a missing lock for in_reset indication Add a missing lock in hl_device_resume() when it assigns a value to the 'in_reset' indication. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 90e346727a7c..6a98aae90f49 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1091,7 +1091,9 @@ int hl_device_resume(struct hl_device *hdev) /* 'in_reset' was set to true during suspend, now we must clear it in order * for hard reset to be performed */ + spin_lock(&hdev->reset_info.lock); hdev->reset_info.in_reset = 0; + spin_unlock(&hdev->reset_info.lock); rc = hl_device_reset(hdev, HL_DRV_RESET_HARD); if (rc) { -- cgit v1.2.3 From 7ca9022bd776d5a1c694ec0973e3d2e8671013c2 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 31 Jul 2022 08:27:36 +0300 Subject: habanalabs/uapi: move defines to better place inside file Cosmetic change to move the eventfd events defines to a better location in the file, closer to other INFO IOCTL defines. Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 83ca6f40f4ba..0da8894ab94a 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -707,6 +707,21 @@ enum hl_server_type { HL_SERVER_GAUDI2_HLS2 = 5 }; +/* + * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command + * + * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event + * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code + * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset + * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error + * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable + */ +#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) +#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) +#define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) +#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) +#define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) + /* Opcode for management ioctl * * HW_IP_INFO - Receive information about different IP blocks in the @@ -1883,21 +1898,6 @@ struct hl_debug_args { __u32 ctx_id; }; -/* - * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command - * - * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event - * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code - * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset - * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error - * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable - */ -#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) -#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) -#define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) -#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) -#define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) - /* * Various information operations such as: * - H/W IP information -- cgit v1.2.3 From ab6c08f0d597408ce7ab4a0f92088cf7cefd2915 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 31 Jul 2022 09:10:24 +0300 Subject: habanalabs: move common function out of debugfs.c A common function that is called from multiple places can't be located in degugfs.c because that file is only compiled if debugfs is enabled in the kernel config file. This can lead to undefined symbol compilation error. Reported-by: kernel test robot Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 25 ------------------------- drivers/misc/habanalabs/common/device.c | 24 ++++++++++++++++++++++++ 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index c297248748d3..69fd3ed7680a 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -586,31 +586,6 @@ err: return -EINVAL; } -void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...) -{ - va_list args; - int str_size; - - va_start(args, fmt); - /* Calculate formatted string length. Assuming each string is null terminated, hence - * increment result by 1 - */ - str_size = vsnprintf(NULL, 0, fmt, args) + 1; - va_end(args); - - if ((e->actual_size + str_size) < e->allocated_buf_size) { - va_start(args, fmt); - vsnprintf(e->buf + e->actual_size, str_size, fmt, args); - va_end(args); - } - - /* Need to update the size even when not updating destination buffer to get the exact size - * of all input strings - */ - e->actual_size += str_size; - -} - static int engines_show(struct seq_file *s, void *data) { struct hl_debugfs_entry *entry = s->private; diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 6a98aae90f49..ab2497b6d164 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -267,6 +267,30 @@ int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, return 0; } +void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...) +{ + va_list args; + int str_size; + + va_start(args, fmt); + /* Calculate formatted string length. Assuming each string is null terminated, hence + * increment result by 1 + */ + str_size = vsnprintf(NULL, 0, fmt, args) + 1; + va_end(args); + + if ((e->actual_size + str_size) < e->allocated_buf_size) { + va_start(args, fmt); + vsnprintf(e->buf + e->actual_size, str_size, fmt, args); + va_end(args); + } + + /* Need to update the size even when not updating destination buffer to get the exact size + * of all input strings + */ + e->actual_size += str_size; +} + enum hl_device_status hl_device_status(struct hl_device *hdev) { enum hl_device_status status; -- cgit v1.2.3 From 6419b5232efacb59b227c7088d1c00b98bdb82de Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Wed, 20 Jul 2022 13:53:37 +0300 Subject: habanalabs/gaudi2: change device f/w security check On Gaudi2 the f/w always configures the PCIe iATU and allows access to scratchpad registers. Therefore, we can know if the f/w is secured by reading a status bit from the f/w registers. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 2 ++ drivers/misc/habanalabs/gaudi2/gaudi2.c | 21 ++++++--------------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 58c1eff16df6..cbcb9442bdca 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -1476,6 +1476,8 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev) */ prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN); + prop->fw_security_enabled = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_SECURITY_EN); + dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n", cpu_boot_dev_sts0); diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 68ab407fa6ba..9ccde0258eca 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -2493,7 +2493,6 @@ static int gaudi2_early_init(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; struct pci_dev *pdev = hdev->pdev; resource_size_t pci_bar_size; - u32 fw_boot_status; int rc; rc = gaudi2_set_fixed_properties(hdev); @@ -2521,22 +2520,14 @@ static int gaudi2_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID); hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID); - /* If FW security is enabled at this point it means no access to ELBI */ - if (hdev->asic_prop.fw_security_enabled) { - hdev->asic_prop.iatu_done_by_fw = true; - goto pci_init; - } - - rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, &fw_boot_status); - if (rc) - goto free_queue_props; - - /* Check whether FW is configuring iATU */ - if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && - (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) + /* + * Only in pldm driver config iATU + */ + if (hdev->pldm) + hdev->asic_prop.iatu_done_by_fw = false; + else hdev->asic_prop.iatu_done_by_fw = true; -pci_init: rc = hl_pci_init(hdev); if (rc) goto free_queue_props; -- cgit v1.2.3 From 07056f58e43319902cd1072c00df2846b31e14b8 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 7 Aug 2022 16:36:30 +0300 Subject: habanalabs: remove left-over code from bring-up There is some left-over code from the gaudi2 bring-up that wasn't removed so far. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 20 ---------------- .../misc/habanalabs/include/common/hl_boot_if.h | 28 ++++++++++++++++------ 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index cbcb9442bdca..12d0f18c1f6c 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -15,14 +15,6 @@ #define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */ -struct fw_binning_conf { - u64 tpc_binning; - u32 dec_binning; - u32 hbm_binning; - u32 edma_binning; - u32 mme_redundancy; -}; - static char *extract_fw_ver_from_str(const char *fw_str) { char *str, *fw_ver, *whitespace; @@ -2424,18 +2416,6 @@ static int hl_fw_dynamic_send_msg(struct hl_device *hdev, msg.reset_cause = *(__u8 *) data; break; - case HL_COMMS_BINNING_CONF_TYPE: - { - struct fw_binning_conf *binning_conf = (struct fw_binning_conf *) data; - - msg.tpc_binning_conf = cpu_to_le64(binning_conf->tpc_binning); - msg.dec_binning_conf = cpu_to_le32(binning_conf->dec_binning); - msg.hbm_binning_conf = cpu_to_le32(binning_conf->hbm_binning); - msg.edma_binning_conf = cpu_to_le32(binning_conf->edma_binning); - msg.mme_redundancy_conf = cpu_to_le32(binning_conf->mme_redundancy); - break; - } - default: dev_err(hdev->dev, "Send COMMS message - invalid message type %u\n", diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index a3594119bc51..f2f6488de625 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -465,6 +465,26 @@ enum comms_msg_type { HL_COMMS_BINNING_CONF_TYPE = 3, }; +/* + * Binning information shared between LKD and FW + * @tpc_mask - TPC binning information + * @dec_mask - Decoder binning information + * @hbm_mask - HBM binning information + * @edma_mask - EDMA binning information + * @mme_mask_l - MME binning information lower 32 + * @mme_mask_h - MME binning information upper 32 + * @reserved - reserved field for 64 bit alignment + */ +struct lkd_fw_binning_info { + __le64 tpc_mask; + __le32 dec_mask; + __le32 hbm_mask; + __le32 edma_mask; + __le32 mme_mask_l; + __le32 mme_mask_h; + __le32 reserved; +}; + /* TODO: remove this struct after the code is updated to use message */ /* this is the comms descriptor header - meta data */ struct comms_desc_header { @@ -525,13 +545,7 @@ struct lkd_fw_comms_msg { struct { __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ }; - struct { - __le64 tpc_binning_conf; - __le32 dec_binning_conf; - __le32 hbm_binning_conf; - __le32 edma_binning_conf; - __le32 mme_redundancy_conf; /* use MME_REDUNDANT_COLUMN */ - }; + struct lkd_fw_binning_info binning_info; }; }; -- cgit v1.2.3 From 194e515c79462f1ad09ebcc9e01a3acb84a98d82 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Thu, 7 Jul 2022 18:42:47 +0300 Subject: habanalabs/gaudi2: new API to control engine cores running mode The current flow of halting the engine cores is implemented by command buffers built by the user space and sent towards the Driver. This current flow is broken since the user space does not know when the cores actually halt as sending a workload is async op. Therefore the application can not free the memory that is mapped to the engine cores. This new API allows the user space to control the running mode. The API call is sync (returns after the cores are set to the requested mode). Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_submission.c | 43 ++++++++++++- drivers/misc/habanalabs/common/habanalabs.h | 8 ++- drivers/misc/habanalabs/gaudi2/gaudi2.c | 72 ++++++++++++++++++++-- .../include/gaudi2/asic_reg/gaudi2_regs.h | 1 + include/uapi/misc/habanalabs.h | 38 ++++++++++-- 5 files changed, 151 insertions(+), 11 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 304e4f3b0e7e..cf4118515678 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -13,7 +13,7 @@ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ - HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) + HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND) #define MAX_TS_ITER_NUM 10 @@ -1244,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) return CS_RESERVE_SIGNALS; else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) return CS_UNRESERVE_SIGNALS; + else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) + return CS_TYPE_ENGINE_CORE; else return CS_TYPE_DEFAULT; } @@ -2355,6 +2357,41 @@ out: return rc; } +static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, + u32 num_engine_cores, u32 core_command) +{ + int rc; + struct hl_device *hdev = hpriv->hdev; + void __user *engine_cores_arr; + u32 *cores; + + if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { + dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); + return -EINVAL; + } + + if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) { + dev_err(hdev->dev, "Engine core command is invalid\n"); + return -EINVAL; + } + + engine_cores_arr = (void __user *) (uintptr_t) engine_cores; + cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL); + if (!cores) + return -ENOMEM; + + if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) { + dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); + kfree(cores); + return -EFAULT; + } + + rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); + kfree(cores); + + return rc; +} + int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) { union hl_cs_args *args = data; @@ -2407,6 +2444,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) rc = cs_ioctl_unreserve_signals(hpriv, args->in.encaps_sig_handle_id); break; + case CS_TYPE_ENGINE_CORE: + rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, + args->in.num_engine_cores, args->in.core_command); + break; default: rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, args->in.cs_flags, diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 8d9e96c6092a..ae3f5832fe58 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -345,7 +345,8 @@ enum hl_cs_type { CS_TYPE_WAIT, CS_TYPE_COLLECTIVE_WAIT, CS_RESERVE_SIGNALS, - CS_UNRESERVE_SIGNALS + CS_UNRESERVE_SIGNALS, + CS_TYPE_ENGINE_CORE }; /* @@ -617,6 +618,7 @@ struct hl_hints_range { * which the property supports_user_set_page_size is true * (i.e. the DRAM supports multiple page sizes), otherwise * it will shall be equal to dram_page_size. + * @num_engine_cores: number of engine cpu cores * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use @@ -737,6 +739,7 @@ struct asic_fixed_properties { u32 faulty_dram_cluster_map; u32 xbar_edge_enabled_mask; u32 device_mem_alloc_default_page_size; + u32 num_engine_cores; u16 collective_first_sob; u16 collective_first_mon; u16 sync_stream_first_sob; @@ -1511,6 +1514,7 @@ struct engines_data { * @check_if_razwi_happened: check if there was a razwi due to RR violation. * @access_dev_mem: access device memory * @set_dram_bar_base: set the base of the DRAM BAR + * @set_engine_cores: set a config command to enigne cores */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1645,6 +1649,8 @@ struct hl_asic_funcs { int (*access_dev_mem)(struct hl_device *hdev, enum pci_region region_type, u64 addr, u64 *val, enum debugfs_access_type acc_type); u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); + int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids, + u32 num_cores, u32 core_command); }; diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 9ccde0258eca..676419961f86 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -1989,6 +1989,7 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; } + prop->num_engine_cores = CPU_ID_MAX; prop->cfg_size = CFG_SIZE; prop->max_asid = MAX_ASID; prop->num_of_events = GAUDI2_EVENT_SIZE; @@ -3751,14 +3752,16 @@ static void gaudi2_stop_dec(struct hl_device *hdev) gaudi2_stop_pcie_dec(hdev); } -static void gaudi2_halt_arc(struct hl_device *hdev, u32 cpu_id) +static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) { u32 reg_base, reg_val; reg_base = gaudi2_arc_blocks_bases[cpu_id]; + if (run_mode == HL_ENGINE_CORE_RUN) + reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1); + else + reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); - /* Halt ARC */ - reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val); } @@ -3768,10 +3771,37 @@ static void gaudi2_halt_arcs(struct hl_device *hdev) for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) { if (gaudi2_is_arc_enabled(hdev, arc_id)) - gaudi2_halt_arc(hdev, arc_id); + gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT); } } +static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) +{ + int rc; + u32 reg_base, val, ack_mask, timeout_usec = 100000; + + if (hdev->pldm) + timeout_usec *= 100; + + reg_base = gaudi2_arc_blocks_bases[cpu_id]; + if (run_mode == HL_ENGINE_CORE_RUN) + ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK; + else + ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK; + + rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET, + val, ((val & ack_mask) == ack_mask), + 1000, timeout_usec); + + if (!rc) { + /* Clear */ + val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0); + WREG32(reg_base + ARC_HALT_REQ_OFFSET, val); + } + + return rc; +} + static void gaudi2_reset_arcs(struct hl_device *hdev) { struct gaudi2_device *gaudi2 = hdev->asic_specific; @@ -3796,8 +3826,39 @@ static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev) queue_id = GAUDI2_QUEUE_ID_NIC_0_0; - for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) + for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { + if (!(hdev->nic_ports_mask & BIT(i))) + continue; + gaudi2_qman_manual_flush_common(hdev, queue_id); + } +} + +static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, + u32 num_cores, u32 core_command) +{ + int i, rc; + + + for (i = 0 ; i < num_cores ; i++) { + if (gaudi2_is_arc_enabled(hdev, core_ids[i])) + gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); + } + + for (i = 0 ; i < num_cores ; i++) { + if (gaudi2_is_arc_enabled(hdev, core_ids[i])) { + rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command); + + if (rc) { + dev_err(hdev->dev, "failed to %s arc: %d\n", + (core_command == HL_ENGINE_CORE_HALT) ? + "HALT" : "RUN", core_ids[i]); + return -1; + } + } + } + + return 0; } static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) @@ -9968,6 +10029,7 @@ static const struct hl_asic_funcs gaudi2_funcs = { .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size, .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = gaudi2_set_hbm_bar_base, + .set_engine_cores = gaudi2_set_engine_cores, }; void gaudi2_set_asic_funcs(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h index d0e2c68a639f..bfda4223bdc8 100644 --- a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h +++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h @@ -239,6 +239,7 @@ #define SFT_IF_RTR_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE) #define ARC_HALT_REQ_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE) +#define ARC_HALT_ACK_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_ACK - mmARC_FARM_ARC0_AUX_BASE) #define ARC_REGION_CFG_OFFSET(region) \ (mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 0da8894ab94a..f51c6ae4f94d 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1361,17 +1361,47 @@ struct hl_cs_chunk { #define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 #define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 +/* + * The engine cores CS is merged into the existing CS ioctls. + * Use it to control the engine cores mode. + */ +#define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 + #define HL_CS_STATUS_SUCCESS 0 #define HL_MAX_JOBS_PER_CS 512 +/* HL_ENGINE_CORE_ values + * + * HL_ENGINE_CORE_HALT: engine core halt + * HL_ENGINE_CORE_RUN: engine core run + */ +#define HL_ENGINE_CORE_HALT (1 << 0) +#define HL_ENGINE_CORE_RUN (1 << 1) + struct hl_cs_in { - /* this holds address of array of hl_cs_chunk for restore phase */ - __u64 chunks_restore; + union { + struct { + /* this holds address of array of hl_cs_chunk for restore phase */ + __u64 chunks_restore; - /* holds address of array of hl_cs_chunk for execution phase */ - __u64 chunks_execute; + /* holds address of array of hl_cs_chunk for execution phase */ + __u64 chunks_execute; + }; + + /* Valid only when HL_CS_FLAGS_ENGINE_CORE_COMMAND is set */ + struct { + /* this holds address of array of uint32 for engine_cores */ + __u64 engine_cores; + + /* number of engine cores in engine_cores array */ + __u32 num_engine_cores; + + /* the core command to be sent towards engine cores */ + __u32 core_command; + }; + }; union { /* -- cgit v1.2.3 From 07ecaa0d85decb73a2907a4b419cfa7739517d5e Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 27 Jun 2022 22:06:51 +0300 Subject: habanalabs: unify hwmon resources clean up Since hwmon fini code is common for all asics, unified it to common function. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 1 + drivers/misc/habanalabs/common/hwmon.c | 21 +++++++++++++++++++++ drivers/misc/habanalabs/gaudi/gaudi.c | 18 +----------------- drivers/misc/habanalabs/gaudi2/gaudi2.c | 18 +----------------- drivers/misc/habanalabs/goya/goya.c | 17 +---------------- 5 files changed, 25 insertions(+), 50 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index ae3f5832fe58..f495a4b82f73 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3529,6 +3529,7 @@ void hl_sysfs_fini(struct hl_device *hdev); int hl_hwmon_init(struct hl_device *hdev); void hl_hwmon_fini(struct hl_device *hdev); +void hl_hwmon_release_resources(struct hl_device *hdev); int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, struct hl_ctx *ctx, u32 cb_size, bool internal_cb, diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c index 57f5d2c48330..8c262aeb425e 100644 --- a/drivers/misc/habanalabs/common/hwmon.c +++ b/drivers/misc/habanalabs/common/hwmon.c @@ -910,3 +910,24 @@ void hl_hwmon_fini(struct hl_device *hdev) hwmon_device_unregister(hdev->hwmon_dev); } + +void hl_hwmon_release_resources(struct hl_device *hdev) +{ + const struct hwmon_channel_info **channel_info_arr; + int i = 0; + + if (!hdev->hl_chip_info->info) + return; + + channel_info_arr = hdev->hl_chip_info->info; + + while (channel_info_arr[i]) { + kfree(channel_info_arr[i]->config); + kfree(channel_info_arr[i]); + i++; + } + + kfree(channel_info_arr); + + hdev->hl_chip_info->info = NULL; +} diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 7f52935dc603..96020693ac29 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -1682,23 +1682,7 @@ disable_pci_access: static void gaudi_late_fini(struct hl_device *hdev) { - const struct hwmon_channel_info **channel_info_arr; - int i = 0; - - if (!hdev->hl_chip_info->info) - return; - - channel_info_arr = hdev->hl_chip_info->info; - - while (channel_info_arr[i]) { - kfree(channel_info_arr[i]->config); - kfree(channel_info_arr[i]); - i++; - } - - kfree(channel_info_arr); - - hdev->hl_chip_info->info = NULL; + hl_hwmon_release_resources(hdev); } static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 676419961f86..fa806e5b6680 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -2711,23 +2711,7 @@ disable_pci_access: static void gaudi2_late_fini(struct hl_device *hdev) { - const struct hwmon_channel_info **channel_info_arr; - int i = 0; - - if (!hdev->hl_chip_info->info) - return; - - channel_info_arr = hdev->hl_chip_info->info; - - while (channel_info_arr[i]) { - kfree(channel_info_arr[i]->config); - kfree(channel_info_arr[i]); - i++; - } - - kfree(channel_info_arr); - - hdev->hl_chip_info->info = NULL; + hl_hwmon_release_resources(hdev); } static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 91429d6ea037..87465a28af0d 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -916,26 +916,11 @@ int goya_late_init(struct hl_device *hdev) */ void goya_late_fini(struct hl_device *hdev) { - const struct hwmon_channel_info **channel_info_arr; struct goya_device *goya = hdev->asic_specific; - int i = 0; cancel_delayed_work_sync(&goya->goya_work->work_freq); - if (!hdev->hl_chip_info->info) - return; - - channel_info_arr = hdev->hl_chip_info->info; - - while (channel_info_arr[i]) { - kfree(channel_info_arr[i]->config); - kfree(channel_info_arr[i]); - i++; - } - - kfree(channel_info_arr); - - hdev->hl_chip_info->info = NULL; + hl_hwmon_release_resources(hdev); } static void goya_set_pci_memory_regions(struct hl_device *hdev) -- cgit v1.2.3 From 65d3c635137e24625740801dc21d885f66193299 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Wed, 3 Aug 2022 16:36:02 +0300 Subject: habanalabs: fix H/W block handling for partial unmappings Several munmap() calls can be done or a mapped H/W block that has a larger size than a page size. Releasing the object should be done only when all mapped range is unmapped. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 14 ++++++++------ drivers/misc/habanalabs/common/habanalabs.h | 6 ++++-- drivers/misc/habanalabs/common/memory.c | 10 +++++++++- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 69fd3ed7680a..48d3ec8b5c82 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -291,14 +291,16 @@ static int vm_show(struct seq_file *s, void *data) if (ctx->asid != HL_KERNEL_ASID_ID && !list_empty(&ctx->hw_block_mem_list)) { seq_puts(s, "\nhw_block mappings:\n\n"); - seq_puts(s, " virtual address size HW block id\n"); - seq_puts(s, "-------------------------------------------\n"); + seq_puts(s, + " virtual address block size mapped size HW block id\n"); + seq_puts(s, + "---------------------------------------------------------------\n"); mutex_lock(&ctx->hw_block_list_lock); - list_for_each_entry(lnode, &ctx->hw_block_mem_list, - node) { + list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) { seq_printf(s, - " 0x%-14lx %-6u %-9u\n", - lnode->vaddr, lnode->size, lnode->id); + " 0x%-14lx %-6u %-6u %-9u\n", + lnode->vaddr, lnode->block_size, lnode->mapped_size, + lnode->id); } mutex_unlock(&ctx->hw_block_list_lock); } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index f495a4b82f73..237a887b3a43 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2063,14 +2063,16 @@ struct hl_vm_hash_node { * @node: node to hang on the list in context object. * @ctx: the context this node belongs to. * @vaddr: virtual address of the HW block. - * @size: size of the block. + * @block_size: size of the block. + * @mapped_size: size of the block which is mapped. May change if partial un-mappings are done. * @id: HW block id (handle). */ struct hl_vm_hw_block_list_node { struct list_head node; struct hl_ctx *ctx; unsigned long vaddr; - u32 size; + u32 block_size; + u32 mapped_size; u32 id; }; diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index a027fa88889b..5bc704da889d 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1442,6 +1442,13 @@ static void hw_block_vm_close(struct vm_area_struct *vma) struct hl_vm_hw_block_list_node *lnode = (struct hl_vm_hw_block_list_node *) vma->vm_private_data; struct hl_ctx *ctx = lnode->ctx; + long new_mmap_size; + + new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start); + if (new_mmap_size > 0) { + lnode->mapped_size = new_mmap_size; + return; + } mutex_lock(&ctx->hw_block_list_lock); list_del(&lnode->node); @@ -1502,7 +1509,8 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) lnode->ctx = ctx; lnode->vaddr = vma->vm_start; - lnode->size = block_size; + lnode->block_size = block_size; + lnode->mapped_size = lnode->block_size; lnode->id = block_id; vma->vm_private_data = lnode; -- cgit v1.2.3 From 107a5bcc0b34c0f4f6c8b771321bf3a4e095335d Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 3 Aug 2022 11:59:19 +0300 Subject: habanalabs: remove secured PCI IDs Secured PCI ID will not be supported in new asics because the security status can always be read from the f/w. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_drv.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index d59d8cdf33e6..8026793d9083 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -58,14 +58,12 @@ MODULE_PARM_DESC(boot_error_status_mask, #define PCI_IDS_GAUDI_SEC 0x1010 #define PCI_IDS_GAUDI2 0x1020 -#define PCI_IDS_GAUDI2_SEC 0x1030 static const struct pci_device_id ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), }, - { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), }, { 0, } }; MODULE_DEVICE_TABLE(pci, ids); @@ -95,9 +93,6 @@ static enum hl_asic_type get_asic_type(u16 device) case PCI_IDS_GAUDI2: asic_type = ASIC_GAUDI2; break; - case PCI_IDS_GAUDI2_SEC: - asic_type = ASIC_GAUDI2_SEC; - break; default: asic_type = ASIC_INVALID; break; @@ -110,7 +105,6 @@ static bool is_asic_secured(enum hl_asic_type asic_type) { switch (asic_type) { case ASIC_GAUDI_SEC: - case ASIC_GAUDI2_SEC: return true; default: return false; -- cgit v1.2.3 From 6457271f64a2b6674b11aebb2888427eb4558c22 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 3 Aug 2022 17:16:56 +0300 Subject: habanalabs: expose device security status through sysfs In order for the user to know if he is running on a secured device or not, a sysfs node is added. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- Documentation/ABI/testing/sysfs-driver-habanalabs | 6 ++++++ drivers/misc/habanalabs/common/sysfs.c | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs index 96646fb2e7a1..6963640a2615 100644 --- a/Documentation/ABI/testing/sysfs-driver-habanalabs +++ b/Documentation/ABI/testing/sysfs-driver-habanalabs @@ -176,6 +176,12 @@ KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Version of the device's preboot F/W code +What: /sys/class/habanalabs/hl/security_enabled +Date: Oct 2022 +KernelVersion: 6.1 +Contact: obitton@habana.ai +Description: Displays the device's security status + What: /sys/class/habanalabs/hl/soft_reset Date: Jan 2019 KernelVersion: 5.1 diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 6c5271f01160..36e9814139d1 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -375,6 +375,14 @@ out: return max_size; } +static ssize_t security_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled); +} + static DEVICE_ATTR_RO(armcp_kernel_ver); static DEVICE_ATTR_RO(armcp_ver); static DEVICE_ATTR_RO(cpld_ver); @@ -393,6 +401,7 @@ static DEVICE_ATTR_RO(status); static DEVICE_ATTR_RO(thermal_ver); static DEVICE_ATTR_RO(uboot_ver); static DEVICE_ATTR_RO(fw_os_ver); +static DEVICE_ATTR_RO(security_enabled); static struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, @@ -417,6 +426,7 @@ static struct attribute *hl_dev_attrs[] = { &dev_attr_thermal_ver.attr, &dev_attr_uboot_ver.attr, &dev_attr_fw_os_ver.attr, + &dev_attr_security_enabled.attr, NULL, }; -- cgit v1.2.3 From 38a4358009456bfd7e4893c4f98ee401efce26eb Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Wed, 3 Aug 2022 17:25:33 +0300 Subject: habanalabs: expose device security status using info ioctl In order for the user to know if he is running on a secured device or not, we add it also to the hw_ip info ioctl. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 1 + include/uapi/misc/habanalabs.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index ec55c66fedd6..c7bd000750c8 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -104,6 +104,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.edma_enabled_mask = prop->edma_enabled_mask; hw_ip.server_type = prop->server_type; + hw_ip.security_enabled = prop->fw_security_enabled; return copy_to_user(out, &hw_ip, min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index f51c6ae4f94d..3005cc04d4b1 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -840,6 +840,7 @@ enum hl_server_type { * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant * for Goya/Gaudi only. * @dram_enabled: Whether the DRAM is enabled. + * @security_enabled: Whether security is enabled on device. * @mme_master_slave_mode: Indicate whether the MME is working in master/slave * configuration. Relevant for Greco and later. * @cpucp_version: The CPUCP f/w version. @@ -871,7 +872,7 @@ struct hl_info_hw_ip_info { __u32 psoc_pci_pll_div_factor; __u8 tpc_enabled_mask; __u8 dram_enabled; - __u8 reserved; + __u8 security_enabled; __u8 mme_master_slave_mode; __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; -- cgit v1.2.3 From 75bc3986fc768912715d4898d5f4aac3fadbc155 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Mon, 8 Aug 2022 14:03:44 +0300 Subject: habanalabs: fix bug when setting va block size the size of a block is always 'block->end - block->start + 1' Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 5bc704da889d..e3b40dbf154c 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -457,7 +457,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev, prev = list_prev_entry(va_block, node); if (&prev->node != va_list && prev->end + 1 == va_block->start) { prev->end = va_block->end; - prev->size = prev->end - prev->start; + prev->size = prev->end - prev->start + 1; list_del(&va_block->node); kfree(va_block); va_block = prev; @@ -466,7 +466,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev, next = list_next_entry(va_block, node); if (&next->node != va_list && va_block->end + 1 == next->start) { next->start = va_block->start; - next->size = next->end - next->start; + next->size = next->end - next->start + 1; list_del(&va_block->node); kfree(va_block); } -- cgit v1.2.3 From 78da23cb103336be27a5fa5f3d16ff7b08f7b4b9 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Wed, 10 Aug 2022 15:39:20 +0300 Subject: habanalabs: fix missing info in sysfs documentation The kernel version field wasn't updated when a few entries were upstreamed. Signed-off-by: Oded Gabbay --- Documentation/ABI/testing/sysfs-driver-habanalabs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs index 6963640a2615..13b5b2ec3be7 100644 --- a/Documentation/ABI/testing/sysfs-driver-habanalabs +++ b/Documentation/ABI/testing/sysfs-driver-habanalabs @@ -16,7 +16,7 @@ Description: Version of the application running on the device's CPU What: /sys/class/habanalabs/hl/clk_max_freq_mhz Date: Jun 2019 -KernelVersion: not yet upstreamed +KernelVersion: 5.7 Contact: ogabbay@kernel.org Description: Allows the user to set the maximum clock frequency, in MHz. The device clock might be set to lower value than the maximum. @@ -26,7 +26,7 @@ Description: Allows the user to set the maximum clock frequency, in MHz. What: /sys/class/habanalabs/hl/clk_cur_freq_mhz Date: Jun 2019 -KernelVersion: not yet upstreamed +KernelVersion: 5.7 Contact: ogabbay@kernel.org Description: Displays the current frequency, in MHz, of the device clock. This property is valid only for the Gaudi ASIC family @@ -236,6 +236,6 @@ Description: Version of the u-boot running on the device's CPU What: /sys/class/habanalabs/hl/vrm_ver Date: Jan 2022 -KernelVersion: not yet upstreamed +KernelVersion: 5.17 Contact: ogabbay@kernel.org Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI -- cgit v1.2.3 From 273190d4204ef44b14b97e571ed7b4e42504189f Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 4 Aug 2022 17:32:30 +0300 Subject: habanalabs: add cdev index data member Instead of recalculating the cdev index, store it in a dedicated data member. This data member is intended to be passed to other drivers using the auxiliary bus infra and hence this new data member is necessary in case that the calculation is changed in the future. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 10 ++++++---- drivers/misc/habanalabs/common/habanalabs.h | 4 +++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index ab2497b6d164..b662d40f18e8 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1730,7 +1730,9 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) char *name; bool add_cdev_sysfs_on_err = false; - name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2); + hdev->cdev_idx = hdev->id / 2; + + name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx); if (!name) { rc = -ENOMEM; goto out_disabled; @@ -1745,7 +1747,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) if (rc) goto out_disabled; - name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2); + name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx); if (!name) { rc = -ENOMEM; goto free_dev; @@ -2023,10 +2025,10 @@ out_disabled: if (hdev->pdev) dev_err(&hdev->pdev->dev, "Failed to initialize hl%d. Device is NOT usable !\n", - hdev->id / 2); + hdev->cdev_idx); else pr_err("Failed to initialize hl%d. Device is NOT usable !\n", - hdev->id / 2); + hdev->cdev_idx); return rc; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 237a887b3a43..a3c516c31b54 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3129,7 +3129,8 @@ struct hl_reset_info { * @edma_binning: contains mask of edma engines that is received from the f/w which * indicates which edma engines are binned-out * @id: device minor. - * @id_control: minor of the control device + * @id_control: minor of the control device. + * @cdev_idx: char device index. Used for setting its name. * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit * addresses. * @is_in_dram_scrub: true if dram scrub operation is on going. @@ -3289,6 +3290,7 @@ struct hl_device { u32 edma_binning; u16 id; u16 id_control; + u16 cdev_idx; u16 cpu_pci_msb_addr; u8 is_in_dram_scrub; u8 disabled; -- cgit v1.2.3 From 6173572f29a4f9f27b9655666e55cee20b0b8cf5 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 15 Aug 2022 13:59:14 +0300 Subject: habanalabs: select FW_LOADER in Kconfig The driver is loading firmware to the device and we use the firmware loading functions from the FW_LOADER module. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/Kconfig b/drivers/misc/habanalabs/Kconfig index 861c81006c6d..bd01d0d940c0 100644 --- a/drivers/misc/habanalabs/Kconfig +++ b/drivers/misc/habanalabs/Kconfig @@ -10,6 +10,7 @@ config HABANA_AI select HWMON select DMA_SHARED_BUFFER select CRC32 + select FW_LOADER help Enables PCIe card driver for Habana's AI Processors (AIP) that are designed to accelerate Deep Learning inference and training workloads. -- cgit v1.2.3 From 46e49f434fcaafe3c62232aaa0358f03b462141d Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Mon, 15 Aug 2022 11:40:55 +0300 Subject: habanalabs: if map page fails don't try to unmap it The original code tried to unmap a page that was not mapped as part of the map page error path. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/mmu/mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 60740de47b34..4153aec55594 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -403,6 +403,8 @@ int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr, dev_err(hdev->dev, "Map failed for va 0x%llx to pa 0x%llx\n", curr_va, curr_pa); + /* last mapping failed so don't try to unmap it - reduce off by page_size */ + off -= page_size; goto unmap; } } -- cgit v1.2.3 From fb855768d33fe7ec1c0e5b9ed21a72478a81f77e Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 15 Aug 2022 20:13:30 +0300 Subject: habanalabs: fix calculation of DRAM base address in PCIe BAR The calculation of the device DRAM base address before setting the relevant PCIe BAR to point at it, has an assumption that this BAR is used to access only the DRAM, and thus the covered DRAM size is a power of 2. In future ASICs it is not necessarily true, so need to update the calculation to support also a non-power-of-2 size. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index b662d40f18e8..0b3097802b00 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -42,7 +42,11 @@ static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr) struct asic_fixed_properties *prop = &hdev->asic_prop; u64 bar_base_addr; - bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); + if (is_power_of_2(prop->dram_pci_bar_size)) + bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); + else + bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) * + prop->dram_pci_bar_size; return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); } -- cgit v1.2.3 From 7b5d13c9cae72b9baac88009401d5518cd86bb0d Mon Sep 17 00:00:00 2001 From: Rajarama Manjukody Bhat Date: Fri, 12 Aug 2022 09:28:20 +0300 Subject: habanalabs/gaudi2: assigning PQFs for ARC f/w in PDMA Assigning 3 PQFs in PDMA1 and 2 PQFs in PDMA0 for ARC firmware usage. Signed-off-by: Rajarama Manjukody Bhat Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 13 +++++++++---- drivers/misc/habanalabs/gaudi2/gaudi2_masks.h | 8 +++++++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index fa806e5b6680..c907e0fbf182 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -4175,11 +4175,15 @@ static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base, WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0); /* Enable the QMAN channel. - * PDMA1 QMAN configuration is different, as we do not allow user to - * access CP2/3, it is reserved for the ARC usage. + * PDMA QMAN configuration is different, as we do not allow user to + * access some of the CPs. + * PDMA0: CP2/3 are reserved for the ARC usage. + * PDMA1: CP1/2/3 are reserved for the ARC usage. */ if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]) WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE); + else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]) + WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE); else WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE); } @@ -5580,10 +5584,11 @@ static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id) u64 hw_test_cap_bit = 0; switch (hw_queue_id) { - case GAUDI2_QUEUE_ID_PDMA_0_0 ... GAUDI2_QUEUE_ID_PDMA_1_1: + case GAUDI2_QUEUE_ID_PDMA_0_0: + case GAUDI2_QUEUE_ID_PDMA_0_1: + case GAUDI2_QUEUE_ID_PDMA_1_0: hw_cap_mask = HW_CAP_PDMA_MASK; break; - case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: hw_test_cap_bit = HW_CAP_EDMA_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2); diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h index eed16d642a5a..0239d118abc5 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h @@ -51,12 +51,18 @@ (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) -#define PDMA1_QMAN_ENABLE \ +#define PDMA0_QMAN_ENABLE \ ((0x3 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \ (0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \ (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) +#define PDMA1_QMAN_ENABLE \ + ((0x1 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \ + (0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \ + (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ + (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) + /* QM_IDLE_MASK is valid for all engines QM idle check */ #define QM_IDLE_MASK (DCORE0_EDMA0_QM_GLBL_STS0_PQF_IDLE_MASK | \ DCORE0_EDMA0_QM_GLBL_STS0_CQF_IDLE_MASK | \ -- cgit v1.2.3 From 191a4443c39b278fbb0898590530bef122b26b7e Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Wed, 8 Jun 2022 09:58:59 +0300 Subject: habanalabs: define trace events This patch adds trace events for habanalabs driver to gain all the benefits such an infrastructure can supply. The following events were added: - MMU map/unmap: to be able to track driver's memory allocations - DMA alloc/free: to track our DMA allocation the above trace points in conjunction will help us map the device memory usage as well as to be able to track memory violations. Signed-off-by: Ohad Sharabi Acked-by: Oded Gabbay Reviewed-by: Steven Rostedt (Google) Signed-off-by: Oded Gabbay --- MAINTAINERS | 1 + drivers/misc/habanalabs/common/habanalabs_drv.c | 3 + include/trace/events/habanalabs.h | 90 +++++++++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 include/trace/events/habanalabs.h diff --git a/MAINTAINERS b/MAINTAINERS index da6d4fd517b0..20ead1fd0e19 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8886,6 +8886,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git F: Documentation/ABI/testing/debugfs-driver-habanalabs F: Documentation/ABI/testing/sysfs-driver-habanalabs F: drivers/misc/habanalabs/ +F: include/trace/events/habanalabs.h F: include/uapi/misc/habanalabs.h HACKRF MEDIA DRIVER diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 8026793d9083..e12148428731 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -14,6 +14,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" diff --git a/include/trace/events/habanalabs.h b/include/trace/events/habanalabs.h new file mode 100644 index 000000000000..09ca516e1624 --- /dev/null +++ b/include/trace/events/habanalabs.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2021 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM habanalabs + +#if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HABANALABS_H + +#include + +DECLARE_EVENT_CLASS(habanalabs_mmu_template, + TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte), + + TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte), + + TP_STRUCT__entry( + __string(dname, dev_name(dev)) + __field(u64, virt_addr) + __field(u64, phys_addr) + __field(u32, page_size) + __field(u8, flush_pte) + ), + + TP_fast_assign( + __assign_str(dname, dev_name(dev)); + __entry->virt_addr = virt_addr; + __entry->phys_addr = phys_addr; + __entry->page_size = page_size; + __entry->flush_pte = flush_pte; + ), + + TP_printk("%s: vaddr: %#llx, paddr: %#llx, psize: %#x, flush: %s", + __get_str(dname), + __entry->virt_addr, + __entry->phys_addr, + __entry->page_size, + __entry->flush_pte ? "true" : "false") +); + +DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_map, + TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte), + TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte)); + +DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_unmap, + TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte), + TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte)); + +DECLARE_EVENT_CLASS(habanalabs_dma_alloc_template, + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size), + + TP_ARGS(dev, cpu_addr, dma_addr, size), + + TP_STRUCT__entry( + __string(dname, dev_name(dev)) + __field(u64, cpu_addr) + __field(u64, dma_addr) + __field(u32, size) + ), + + TP_fast_assign( + __assign_str(dname, dev_name(dev)); + __entry->cpu_addr = cpu_addr; + __entry->dma_addr = dma_addr; + __entry->size = size; + ), + + TP_printk("%s: cpu_addr: %#llx, dma_addr: %#llx, size: %#x", + __get_str(dname), + __entry->cpu_addr, + __entry->dma_addr, + __entry->size) +); + +DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_alloc, + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size), + TP_ARGS(dev, cpu_addr, dma_addr, size)); + +DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_free, + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size), + TP_ARGS(dev, cpu_addr, dma_addr, size)); + +#endif /* if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ) */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 4eb87df3d04aa725e752fe2df0df3e83f204d247 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Wed, 8 Jun 2022 10:27:59 +0300 Subject: habanalabs: trace MMU map/unmap page This patch utilize the defined tracepoint to trace the MMU's pages map/unmap operations. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Reviewed-by: Steven Rostedt (Google) Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/mmu/mmu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 4153aec55594..264f3b9edc88 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -9,6 +9,8 @@ #include "../habanalabs.h" +#include + /** * hl_mmu_get_funcs() - get MMU functions structure * @hdev: habanalabs device structure. @@ -259,6 +261,9 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu if (flush_pte) mmu_funcs->flush(ctx); + if (trace_habanalabs_mmu_unmap_enabled() && !rc) + trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte); + return rc; } @@ -344,6 +349,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s if (flush_pte) mmu_funcs->flush(ctx); + trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte); + return 0; err: -- cgit v1.2.3 From 0263256791094180ab8749b224ef7bfe0bfd67bb Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Sun, 12 Jun 2022 15:00:29 +0300 Subject: habanalabs: trace DMA allocations This patch add tracepoints in the code for DMA allocation. The main purpose is to be able to cross data with the map operations and determine whether memory violation occurred, for example free DMA allocation before unmapping it from device memory. To achieve this the DMA alloc/free code flows were refactored so that a single DMA tracepoint will catch many flows. To get better understanding of what happened in the DMA allocations the real allocating function is added to the trace as well. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Reviewed-by: Steven Rostedt (Google) Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 49 ++++++++++++++++++----------- drivers/misc/habanalabs/common/habanalabs.h | 40 +++++++++++++++++------ include/trace/events/habanalabs.h | 19 ++++++----- 3 files changed, 73 insertions(+), 35 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 0b3097802b00..230b7eeef962 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -13,6 +13,8 @@ #include #include +#include + #define HL_RESET_DELAY_USEC 10000 /* 10ms */ enum dma_alloc_type { @@ -101,9 +103,10 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val } static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, - gfp_t flag, enum dma_alloc_type alloc_type) + gfp_t flag, enum dma_alloc_type alloc_type, + const char *caller) { - void *ptr; + void *ptr = NULL; switch (alloc_type) { case DMA_ALLOC_COHERENT: @@ -117,11 +120,16 @@ static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t break; } + if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr)) + trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size, + caller); + return ptr; } static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, enum dma_alloc_type alloc_type) + dma_addr_t dma_handle, enum dma_alloc_type alloc_type, + const char *caller) { switch (alloc_type) { case DMA_ALLOC_COHERENT: @@ -134,39 +142,44 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle); break; } + + trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller); } -void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, - gfp_t flag) +void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, + gfp_t flag, const char *caller) { - return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT); + return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller); } -void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr, - dma_addr_t dma_handle) +void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, const char *caller) { - hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT); + hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller); } -void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle) +void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle, const char *caller) { - return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE); + return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller); } -void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) +void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr, + const char *caller) { - hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE); + hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller); } -void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags, - dma_addr_t *dma_handle) +void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, + dma_addr_t *dma_handle, const char *caller) { - return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL); + return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller); } -void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr) +void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, + const char *caller) { - hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL); + hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller); } int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index a3c516c31b54..43b9427d9c97 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -143,6 +143,25 @@ enum hl_mmu_enablement { #define HL_MAX_DCORES 8 +/* DMA alloc/free wrappers */ +#define hl_asic_dma_alloc_coherent(hdev, size, dma_handle, flags) \ + hl_asic_dma_alloc_coherent_caller(hdev, size, dma_handle, flags, __func__) + +#define hl_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle) \ + hl_cpu_accessible_dma_pool_alloc_caller(hdev, size, dma_handle, __func__) + +#define hl_asic_dma_pool_zalloc(hdev, size, mem_flags, dma_handle) \ + hl_asic_dma_pool_zalloc_caller(hdev, size, mem_flags, dma_handle, __func__) + +#define hl_asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle) \ + hl_asic_dma_free_coherent_caller(hdev, size, cpu_addr, dma_handle, __func__) + +#define hl_cpu_accessible_dma_pool_free(hdev, size, vaddr) \ + hl_cpu_accessible_dma_pool_free_caller(hdev, size, vaddr, __func__) + +#define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \ + hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__) + /* * Reset Flags * @@ -3446,15 +3465,18 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size, } uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr); -void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, - gfp_t flag); -void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr, - dma_addr_t dma_handle); -void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle); -void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); -void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags, - dma_addr_t *dma_handle); -void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr); +void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, + gfp_t flag, const char *caller); +void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, const char *caller); +void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size, + dma_addr_t *dma_handle, const char *caller); +void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr, + const char *caller); +void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, + dma_addr_t *dma_handle, const char *caller); +void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, + const char *caller); int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); diff --git a/include/trace/events/habanalabs.h b/include/trace/events/habanalabs.h index 09ca516e1624..f05c5fa668a2 100644 --- a/include/trace/events/habanalabs.h +++ b/include/trace/events/habanalabs.h @@ -51,15 +51,16 @@ DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_unmap, TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte)); DECLARE_EVENT_CLASS(habanalabs_dma_alloc_template, - TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size), + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller), - TP_ARGS(dev, cpu_addr, dma_addr, size), + TP_ARGS(dev, cpu_addr, dma_addr, size, caller), TP_STRUCT__entry( __string(dname, dev_name(dev)) __field(u64, cpu_addr) __field(u64, dma_addr) __field(u32, size) + __field(const char *, caller) ), TP_fast_assign( @@ -67,22 +68,24 @@ DECLARE_EVENT_CLASS(habanalabs_dma_alloc_template, __entry->cpu_addr = cpu_addr; __entry->dma_addr = dma_addr; __entry->size = size; + __entry->caller = caller; ), - TP_printk("%s: cpu_addr: %#llx, dma_addr: %#llx, size: %#x", + TP_printk("%s: cpu_addr: %#llx, dma_addr: %#llx, size: %#x, caller: %s", __get_str(dname), __entry->cpu_addr, __entry->dma_addr, - __entry->size) + __entry->size, + __entry->caller) ); DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_alloc, - TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size), - TP_ARGS(dev, cpu_addr, dma_addr, size)); + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller), + TP_ARGS(dev, cpu_addr, dma_addr, size, caller)); DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_free, - TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size), - TP_ARGS(dev, cpu_addr, dma_addr, size)); + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller), + TP_ARGS(dev, cpu_addr, dma_addr, size, caller)); #endif /* if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ) */ -- cgit v1.2.3 From 262042af1397099f88386830152770bcfd0de122 Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Mon, 23 May 2022 08:59:19 +0300 Subject: habanalabs: set command buffer host VA dynamically Set the addresses for userspace command buffer dynamically instead of hard-coded. There is no reason for it to be hard-coded. Signed-off-by: Dafna Hirschfeld Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_buffer.c | 18 ++++++++++++++---- drivers/misc/habanalabs/common/habanalabs.h | 10 +++------- drivers/misc/habanalabs/common/memory.c | 2 +- drivers/misc/habanalabs/gaudi2/gaudi2.c | 3 --- drivers/misc/habanalabs/gaudi2/gaudi2P.h | 3 --- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index b027f66f8bd4..c3e2568542a1 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -12,6 +12,8 @@ #include #include +#define CB_VA_POOL_SIZE (4UL * SZ_1G) + static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) { struct hl_device *hdev = ctx->hdev; @@ -25,7 +27,7 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) if (!hdev->supports_cb_mapping) { dev_err_ratelimited(hdev->dev, - "Cannot map CB because no VA range is allocated for CB mapping\n"); + "Mapping a CB to the device's MMU is not supported\n"); return -EINVAL; } @@ -566,16 +568,23 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx) return -ENOMEM; } - rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr, - prop->cb_va_end_addr - prop->cb_va_start_addr, -1); + ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, + CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); + if (!ctx->cb_va_pool_base) { + rc = -ENOMEM; + goto err_pool_destroy; + } + rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1); if (rc) { dev_err(hdev->dev, "Failed to add memory to VA gen pool for CB mapping\n"); - goto err_pool_destroy; + goto err_unreserve_va_block; } return 0; +err_unreserve_va_block: + hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); err_pool_destroy: gen_pool_destroy(ctx->cb_va_pool); @@ -590,4 +599,5 @@ void hl_cb_va_pool_fini(struct hl_ctx *ctx) return; gen_pool_destroy(ctx->cb_va_pool); + hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 43b9427d9c97..d7fd4f57abf3 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -567,10 +567,6 @@ struct hl_hints_range { * @tpc_binning_mask: which TPCs are binned. 0 means usable and 1 means binned. * @dram_enabled_mask: which DRAMs are enabled. * @dram_binning_mask: which DRAMs are binned. 0 means usable, 1 means binned. - * @cb_va_start_addr: virtual start address of command buffers which are mapped - * to the device's MMU. - * @cb_va_end_addr: virtual end address of command buffers which are mapped to - * the device's MMU. * @dram_hints_align_mask: dram va hint addresses alignment mask which is used * for hints validity check. * @cfg_base_address: config space base address. @@ -713,8 +709,6 @@ struct asic_fixed_properties { u64 tpc_binning_mask; u64 dram_enabled_mask; u64 dram_binning_mask; - u64 cb_va_start_addr; - u64 cb_va_end_addr; u64 dram_hints_align_mask; u64 cfg_base_address; u64 mmu_cache_mng_addr; @@ -1803,6 +1797,7 @@ struct hl_cs_outcome_store { * @cb_va_pool: device VA pool for command buffers which are mapped to the * device's MMU. * @sig_mgr: encaps signals handle manager. + * @cb_va_pool_base: the base address for the device VA pool * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed * to user so user could inquire about CS. It is used as * index to cs_pending array. @@ -1838,6 +1833,7 @@ struct hl_ctx { struct hl_cs_counters_atomic cs_counters; struct gen_pool *cb_va_pool; struct hl_encaps_signals_mgr sig_mgr; + u64 cb_va_pool_base; u64 cs_sequence; u64 *dram_default_hops; spinlock_t cs_lock; @@ -3600,7 +3596,7 @@ void hl_hw_block_mem_init(struct hl_ctx *ctx); void hl_hw_block_mem_fini(struct hl_ctx *ctx); u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, - enum hl_va_range_type type, u32 size, u32 alignment); + enum hl_va_range_type type, u64 size, u32 alignment); int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, u64 start_addr, u64 size); int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index e3b40dbf154c..0a653fff08d4 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -755,7 +755,7 @@ out: * - Return the start address of the virtual block. */ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, - enum hl_va_range_type type, u32 size, u32 alignment) + enum hl_va_range_type type, u64 size, u32 alignment) { return get_va_block(hdev, ctx->va_range[type], size, 0, max(alignment, ctx->va_range[type]->page_size), diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index c907e0fbf182..ff0f9e9db1b5 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -2022,9 +2022,6 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->server_type = HL_SERVER_TYPE_UNKNOWN; - prop->cb_va_start_addr = VA_HOST_SPACE_USER_MAPPED_CB_START; - prop->cb_va_end_addr = VA_HOST_SPACE_USER_MAPPED_CB_END; - prop->max_dec = NUMBER_OF_DEC; prop->clk_pll_index = HL_GAUDI2_MME_PLL; diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h index 347ea1dd78e2..9094a702678d 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h @@ -139,9 +139,6 @@ #define VA_HOST_SPACE_HPAGE_START 0xFFF0800000000000ull #define VA_HOST_SPACE_HPAGE_END 0xFFF1000000000000ull /* 140TB */ -#define VA_HOST_SPACE_USER_MAPPED_CB_START 0xFFF1000000000000ull -#define VA_HOST_SPACE_USER_MAPPED_CB_END 0xFFF1000100000000ull /* 4GB */ - /* 140TB */ #define VA_HOST_SPACE_PAGE_SIZE (VA_HOST_SPACE_PAGE_END - VA_HOST_SPACE_PAGE_START) -- cgit v1.2.3 From c38f72370b615d48c7eb44389b229105f07a70e2 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Mon, 22 Aug 2022 10:59:34 +0300 Subject: habanalabs: perform context switch flow only if needed Except Goya, none of our ASICs require context switch flow, hence we enable this flow only where it is needed. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 10 ++++++---- drivers/misc/habanalabs/common/habanalabs.h | 2 ++ drivers/misc/habanalabs/goya/goya.c | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index cf4118515678..746b688d34cf 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -1590,13 +1590,14 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, struct hl_device *hdev = hpriv->hdev; struct hl_ctx *ctx = hpriv->ctx; bool need_soft_reset = false; - int rc = 0, do_ctx_switch; + int rc = 0, do_ctx_switch = 0; void __user *chunks; u32 num_chunks, tmp; u16 sob_count; int ret; - do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); + if (hdev->supports_ctx_switch) + do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { mutex_lock(&hpriv->restore_phase_mutex); @@ -1667,9 +1668,10 @@ wait_again: } } - ctx->thread_ctx_switch_wait_token = 1; + if (hdev->supports_ctx_switch) + ctx->thread_ctx_switch_wait_token = 1; - } else if (!ctx->thread_ctx_switch_wait_token) { + } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { rc = hl_poll_timeout_memory(hdev, &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), 100, jiffies_to_usecs(hdev->timeout_jiffies), false); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index d7fd4f57abf3..33c6476b60a9 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3199,6 +3199,7 @@ struct hl_reset_info { * Used only for testing. * @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the f/w, which verifies * that the f/w is always alive. Used only for testing. + * @supports_ctx_switch: true if a ctx switch is required upon first submission. */ struct hl_device { struct pci_dev *pdev; @@ -3335,6 +3336,7 @@ struct hl_device { u8 compute_ctx_in_release; u8 supports_mmu_prefetch; u8 reset_upon_device_release; + u8 supports_ctx_switch; /* Parameters for bring-up */ u64 nic_ports_mask; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 87465a28af0d..d8fb91d257b9 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -1025,6 +1025,7 @@ static int goya_sw_init(struct hl_device *hdev) hdev->asic_prop.supports_compute_reset = true; hdev->asic_prop.allow_inference_soft_reset = true; hdev->supports_wait_for_multi_cs = false; + hdev->supports_ctx_switch = true; hdev->asic_funcs->set_pci_memory_regions(hdev); -- cgit v1.2.3 From d155df4f628a5312a485235aa8cc5ba78e11ea65 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 23 Aug 2022 16:23:56 +0300 Subject: habanalabs: ignore EEPROM errors during boot EEPROM errors reported by firmware are basically warnings and should not fail the boot process. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 9 +++++++++ drivers/misc/habanalabs/include/common/hl_boot_if.h | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 12d0f18c1f6c..4ede4bb03e8e 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -573,6 +573,15 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val); /* All warnings should go here in order not to reach the unknown error validation */ + if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) { + dev_warn(hdev->dev, + "Device boot warning - EEPROM failure detected, default settings applied\n"); + /* This is a warning so we don't want it to disable the + * device + */ + err_val &= ~CPU_BOOT_ERR0_EEPROM_FAIL; + } + if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) { dev_warn(hdev->dev, "Device boot warning - Skipped DRAM initialization\n"); diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index f2f6488de625..2e45be5de4fe 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -34,6 +34,7 @@ enum cpu_boot_err { CPU_BOOT_ERR_BINNING_FAIL = 19, CPU_BOOT_ERR_TPM_FAIL = 20, CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, + CPU_BOOT_ERR_EEPROM_FAIL = 22, CPU_BOOT_ERR_ENABLED = 31, CPU_BOOT_ERR_SCND_EN = 63, CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ @@ -115,6 +116,9 @@ enum cpu_boot_err { * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature * sensor. * + * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults + * are used. + * * CPU_BOOT_ERR0_ENABLED Error registers enabled. * This is a main indication that the * running FW populates the error @@ -139,6 +143,7 @@ enum cpu_boot_err { #define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) #define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) #define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) +#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL) #define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) #define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) -- cgit v1.2.3 From 988262ef2fb9b43719ce40af1efe9bfbc62b461c Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Tue, 23 Aug 2022 17:41:52 +0300 Subject: habanalabs/gaudi2: log critical events with no rate limit When we have a storm of errors of HBM ECC SERR we can reach a situation where driver start hard reset flow without logging the error cause that caused the hard reset due to logs rate limiting. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index ff0f9e9db1b5..6bebd5eb0294 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8200,10 +8200,17 @@ static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type, return true; } - dev_err_ratelimited(hdev->dev, - "System Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Critical(%u). Error cause: %s\n", - hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, - sei_data->hdr.is_critical, hbm_mc_sei_cause[cause_idx]); + if (sei_data->hdr.is_critical) + dev_err(hdev->dev, + "System Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", + hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, + hbm_mc_sei_cause[cause_idx]); + + else + dev_err_ratelimited(hdev->dev, + "System Non-Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", + hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, + hbm_mc_sei_cause[cause_idx]); /* Print error-specific info */ switch (cause_idx) { -- cgit v1.2.3 From 0855bf8b17374fef702844664af70454fa6951ef Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 21 Aug 2022 13:50:51 +0300 Subject: habanalabs/gaudi2: dump detailed information upon RAZWI In order to improve debuggability, we add all available information when a RAZWI event occur. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 206 ++++++++++++++++++++++++-------- 1 file changed, 155 insertions(+), 51 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 6bebd5eb0294..4696da7a57c1 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -1531,17 +1531,57 @@ static const u32 rtr_coordinates_to_rtr_id[NUM_OF_RTR_PER_DCORE * NUM_OF_DCORES] RTR_ID_X_Y(17, 11) }; +enum rtr_id { + DCORE0_RTR0, + DCORE0_RTR1, + DCORE0_RTR2, + DCORE0_RTR3, + DCORE0_RTR4, + DCORE0_RTR5, + DCORE0_RTR6, + DCORE0_RTR7, + DCORE1_RTR0, + DCORE1_RTR1, + DCORE1_RTR2, + DCORE1_RTR3, + DCORE1_RTR4, + DCORE1_RTR5, + DCORE1_RTR6, + DCORE1_RTR7, + DCORE2_RTR0, + DCORE2_RTR1, + DCORE2_RTR2, + DCORE2_RTR3, + DCORE2_RTR4, + DCORE2_RTR5, + DCORE2_RTR6, + DCORE2_RTR7, + DCORE3_RTR0, + DCORE3_RTR1, + DCORE3_RTR2, + DCORE3_RTR3, + DCORE3_RTR4, + DCORE3_RTR5, + DCORE3_RTR6, + DCORE3_RTR7, +}; + static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { - 1, 1, 2, 2, 3, 3, 14, 14, 13, 13, 12, 12, 19, 19, 18, 18, 17, - 17, 28, 28, 29, 29, 30, 30, 0 + DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3, + DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4, + DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, + DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, + DCORE0_RTR0 }; static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = { - 0, 0, 15, 15, 16, 16, 31, 31, 0, 0 + DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0, + DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0 }; static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = { - 15, 15, 15, 15, 15, 16, 16, 16, 16, 31, 31, 31 + DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, + DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 }; struct sft_info { @@ -1554,11 +1594,11 @@ static const struct sft_info gaudi2_edma_initiator_sft_id[NUM_OF_EDMA_PER_DCORE }; static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = { - 0, 0 + DCORE0_RTR0, DCORE0_RTR0 }; static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = { - 16, 31 + DCORE2_RTR0, DCORE3_RTR7 }; struct mme_initiators_rtr_id { @@ -7062,10 +7102,6 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg); razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg); } - - dev_err_ratelimited(hdev->dev, - "%s-RAZWI SHARED RR HBW WR error, captured address HI 0x%x LO 0x%x, Initiator coordinates 0x%x\n", - name, razwi_hi, razwi_lo, razwi_xy); } else { if (read_razwi_regs) { razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI); @@ -7076,11 +7112,11 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev, razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg); razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg); } - - dev_err_ratelimited(hdev->dev, - "%s-RAZWI SHARED RR HBW AR error, captured address HI 0x%x LO 0x%x, Initiator coordinates 0x%x\n", - name, razwi_hi, razwi_lo, razwi_xy); } + + dev_err_ratelimited(hdev->dev, + "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", + name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy); } static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, @@ -7338,7 +7374,79 @@ static void gaudi2_check_if_razwi_happened(struct hl_device *hdev) gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL); } -static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, +static const char *gaudi2_get_initiators_name(u32 rtr_id) +{ + switch (rtr_id) { + case DCORE0_RTR0: + return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU"; + case DCORE0_RTR1: + return "TPC0/1"; + case DCORE0_RTR2: + return "TPC2/3"; + case DCORE0_RTR3: + return "TPC4/5"; + case DCORE0_RTR4: + return "MME0_SBTE0/1"; + case DCORE0_RTR5: + return "MME0_WAP0/SBTE2"; + case DCORE0_RTR6: + return "MME0_CTRL_WR/SBTE3"; + case DCORE0_RTR7: + return "MME0_WAP1/CTRL_RD/SBTE4"; + case DCORE1_RTR0: + return "MME1_WAP1/CTRL_RD/SBTE4"; + case DCORE1_RTR1: + return "MME1_CTRL_WR/SBTE3"; + case DCORE1_RTR2: + return "MME1_WAP0/SBTE2"; + case DCORE1_RTR3: + return "MME1_SBTE0/1"; + case DCORE1_RTR4: + return "TPC10/11"; + case DCORE1_RTR5: + return "TPC8/9"; + case DCORE1_RTR6: + return "TPC6/7"; + case DCORE1_RTR7: + return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7"; + case DCORE2_RTR0: + return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0"; + case DCORE2_RTR1: + return "TPC16/17"; + case DCORE2_RTR2: + return "TPC14/15"; + case DCORE2_RTR3: + return "TPC12/13"; + case DCORE2_RTR4: + return "MME2_SBTE0/1"; + case DCORE2_RTR5: + return "MME2_WAP0/SBTE2"; + case DCORE2_RTR6: + return "MME2_CTRL_WR/SBTE3"; + case DCORE2_RTR7: + return "MME2_WAP1/CTRL_RD/SBTE4"; + case DCORE3_RTR0: + return "MME3_WAP1/CTRL_RD/SBTE4"; + case DCORE3_RTR1: + return "MME3_CTRL_WR/SBTE3"; + case DCORE3_RTR2: + return "MME3_WAP0/SBTE2"; + case DCORE3_RTR3: + return "MME3_SBTE0/1"; + case DCORE3_RTR4: + return "TPC18/19"; + case DCORE3_RTR5: + return "TPC20/21"; + case DCORE3_RTR6: + return "TPC22/23"; + case DCORE3_RTR7: + return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC"; + default: + return "N/A"; + } +} + +static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id, u64 rtr_ctrl_base_addr, bool is_write) { u32 razwi_hi, razwi_lo; @@ -7347,50 +7455,47 @@ static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI); razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO); - dev_err_ratelimited(hdev->dev, - "RAZWI PSOC unmapped HBW WR error, ctr_base 0x%llx, captured address HI 0x%x, LO 0x%x\n", - rtr_ctrl_base_addr, razwi_hi, razwi_lo); - /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1); } else { razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI); - razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO); - dev_err_ratelimited(hdev->dev, - "RAZWI PSOC unmapped HBW AR error, ctr_base 0x%llx, captured address HI 0x%x, LO 0x%x\n", - rtr_ctrl_base_addr, razwi_hi, razwi_lo); - /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1); } + + dev_err_ratelimited(hdev->dev, + "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n", + is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo); + + dev_err_ratelimited(hdev->dev, + "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); } -static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, - u64 rtr_ctrl_base_addr, bool is_write) +static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id, + u64 rtr_ctrl_base_addr, bool is_write) { u32 razwi_addr; if (is_write) { razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR); - dev_err_ratelimited(hdev->dev, - "RAZWI PSOC unmapped LBW WR error, ctr_base 0x%llx, captured address 0x%x\n", - rtr_ctrl_base_addr, razwi_addr); - /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1); } else { razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR); - dev_err_ratelimited(hdev->dev, - "RAZWI PSOC unmapped LBW AR error, ctr_base 0x%llx, captured address 0x%x\n", - rtr_ctrl_base_addr, razwi_addr); - /* Clear set indication */ WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1); } + + dev_err_ratelimited(hdev->dev, + "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n", + is_write ? "WR" : "RD", rtr_id, razwi_addr); + + dev_err_ratelimited(hdev->dev, + "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); } /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ @@ -7408,21 +7513,16 @@ static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev) } razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO); - - xy = (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK) - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_SHIFT; + xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info); dev_err_ratelimited(hdev->dev, - "PSOC RAZWI interrupt: Mask %d, WAS_AR %d, WAS_AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", - (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK) - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_SHIFT, - (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK) - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_SHIFT, - (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK) - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_SHIFT, xy, - (razwi_mask_info & - PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK) - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_SHIFT); + "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info), + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info), + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info), + xy, + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info)); + if (xy == 0) { dev_err_ratelimited(hdev->dev, "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n"); @@ -7452,16 +7552,20 @@ static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev) lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET); if (hbw_aw_set) - gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_ctrl_base_addr, true); + gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, + rtr_ctrl_base_addr, true); if (hbw_ar_set) - gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_ctrl_base_addr, false); + gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, + rtr_ctrl_base_addr, false); if (lbw_aw_set) - gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_ctrl_base_addr, true); + gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, + rtr_ctrl_base_addr, true); if (lbw_ar_set) - gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_ctrl_base_addr, false); + gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, + rtr_ctrl_base_addr, false); clear: /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ -- cgit v1.2.3 From 4745b2f0d0d4b291ec69619c815f53fd8a968d9a Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Wed, 17 Aug 2022 17:43:43 +0300 Subject: habanalabs: send device active message to f/w As part of the RAS that is done by the f/w, we should send a message to the f/w when a user either acquires or releases the device. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 2 ++ drivers/misc/habanalabs/common/firmware_if.c | 15 +++++++++++++++ drivers/misc/habanalabs/common/habanalabs.h | 3 +++ drivers/misc/habanalabs/common/habanalabs_drv.c | 2 ++ drivers/misc/habanalabs/gaudi/gaudi.c | 6 ++++++ drivers/misc/habanalabs/gaudi2/gaudi2.c | 12 ++++++++++++ drivers/misc/habanalabs/gaudi2/gaudi2P.h | 1 + drivers/misc/habanalabs/goya/goya.c | 6 ++++++ drivers/misc/habanalabs/include/common/cpucp_if.h | 11 +++++++++++ 9 files changed, 58 insertions(+) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 230b7eeef962..d6df0bd55e9f 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -470,6 +470,8 @@ static int hl_device_release(struct inode *inode, struct file *filp) hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; + hdev->asic_funcs->send_device_activity(hdev, false); + return 0; } diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 4ede4bb03e8e..cd2eb7e73be5 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -454,6 +454,21 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, size); } +int hl_fw_send_device_activity(struct hl_device *hdev, bool open) +{ + struct cpucp_packet pkt; + int rc; + + memset(&pkt, 0, sizeof(pkt)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.value = cpu_to_le64(open); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); + if (rc) + dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open); + + return rc; +} + int hl_fw_send_heartbeat(struct hl_device *hdev) { struct cpucp_packet hb_pkt; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 33c6476b60a9..c1bd82d4a83c 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1528,6 +1528,7 @@ struct engines_data { * @access_dev_mem: access device memory * @set_dram_bar_base: set the base of the DRAM BAR * @set_engine_cores: set a config command to enigne cores + * @send_device_activity: indication to FW about device availability */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1664,6 +1665,7 @@ struct hl_asic_funcs { u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids, u32 num_cores, u32 core_command); + int (*send_device_activity)(struct hl_device *hdev, bool open); }; @@ -3715,6 +3717,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev, struct cpucp_hbm_row_info *info); int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num); int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid); +int hl_fw_send_device_activity(struct hl_device *hdev, bool open); int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index e12148428731..849e54fe78a6 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -204,6 +204,8 @@ int hl_device_open(struct inode *inode, struct file *filp) goto out_err; } + rc = hdev->asic_funcs->send_device_activity(hdev, true); + list_add(&hpriv->dev_node, &hdev->fpriv_list); mutex_unlock(&hdev->fpriv_list_lock); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 96020693ac29..87dbdbb220da 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -9132,6 +9132,11 @@ static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; } +static int gaudi_send_device_activity(struct hl_device *hdev, bool open) +{ + return 0; +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_early_init, .early_fini = gaudi_early_fini, @@ -9224,6 +9229,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .mmu_get_real_page_size = hl_mmu_get_real_page_size, .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = gaudi_set_hbm_bar_base, + .send_device_activity = gaudi_send_device_activity, }; /** diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 4696da7a57c1..330869cb4c0b 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -10031,6 +10031,17 @@ static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data) return -EOPNOTSUPP; } +int gaudi2_send_device_activity(struct hl_device *hdev, bool open) +{ + struct gaudi2_device *gaudi2 = hdev->asic_specific; + + if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37) + return 0; + + /* TODO: add check for FW version using minor ver once it's known */ + return hl_fw_send_device_activity(hdev, open); +} + static const struct hl_asic_funcs gaudi2_funcs = { .early_init = gaudi2_early_init, .early_fini = gaudi2_early_fini, @@ -10127,6 +10138,7 @@ static const struct hl_asic_funcs gaudi2_funcs = { .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = gaudi2_set_hbm_bar_base, .set_engine_cores = gaudi2_set_engine_cores, + .send_device_activity = gaudi2_send_device_activity, }; void gaudi2_set_asic_funcs(struct hl_device *hdev) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h index 9094a702678d..a99c348bbf39 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h @@ -553,5 +553,6 @@ void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32 u32 offended_addr); int gaudi2_init_security(struct hl_device *hdev); void gaudi2_ack_protection_bits_errors(struct hl_device *hdev); +int gaudi2_send_device_activity(struct hl_device *hdev, bool open); #endif /* GAUDI2P_H_ */ diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index d8fb91d257b9..5ef9e3ca97a6 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5420,6 +5420,11 @@ static int goya_scrub_device_dram(struct hl_device *hdev, u64 val) return -EOPNOTSUPP; } +static int goya_send_device_activity(struct hl_device *hdev, bool open) +{ + return 0; +} + static const struct hl_asic_funcs goya_funcs = { .early_init = goya_early_init, .early_fini = goya_early_fini, @@ -5512,6 +5517,7 @@ static const struct hl_asic_funcs goya_funcs = { .mmu_get_real_page_size = hl_mmu_get_real_page_size, .access_dev_mem = hl_access_dev_mem, .set_dram_bar_base = goya_set_ddr_bar_base, + .send_device_activity = goya_send_device_activity, }; /* diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index abf40e1c4965..b837bb1f4cd3 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -636,6 +636,10 @@ enum pq_init_status { * passes the max size it allows the CpuCP to write to the structure, to prevent * data corruption in case of mismatched driver/FW versions. * Relevant only to Gaudi. + * + * CPUCP_PACKET_ACTIVE_STATUS_SET - + * LKD sends FW indication whether device is free or in use, this indication is reported + * also to the BMC. */ enum cpucp_packet_id { @@ -691,6 +695,13 @@ enum cpucp_packet_id { CPUCP_PACKET_RESERVED4, /* not used */ CPUCP_PACKET_RESERVED5, /* not used */ CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ + CPUCP_PACKET_RESERVED6, /* not used */ + CPUCP_PACKET_RESERVED7, /* not used */ + CPUCP_PACKET_RESERVED8, /* not used */ + CPUCP_PACKET_RESERVED9, /* not used */ + CPUCP_PACKET_RESERVED10, /* not used */ + CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ + CPUCP_PACKET_ID_MAX /* must be last */ }; #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 -- cgit v1.2.3 From f5ec364c9ecd1113492e15ceaafd4447f5836528 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Sun, 28 Aug 2022 12:46:27 +0300 Subject: habanalabs: send device activity in a proper context 'Device activity open packet' should be sent outside of mutex as there is no real necessity for a lock. In addition 'device activity close packet' should be sent upon an actual release of the device. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 4 ++-- drivers/misc/habanalabs/common/habanalabs_drv.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index d6df0bd55e9f..5f6407ed3b04 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -363,6 +363,8 @@ static void hpriv_release(struct kref *ref) hdev = hpriv->hdev; + hdev->asic_funcs->send_device_activity(hdev, false); + put_pid(hpriv->taskpid); hl_debugfs_remove_file(hpriv); @@ -470,8 +472,6 @@ static int hl_device_release(struct inode *inode, struct file *filp) hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; - hdev->asic_funcs->send_device_activity(hdev, false); - return 0; } diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 849e54fe78a6..fd9c8680f954 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -204,11 +204,11 @@ int hl_device_open(struct inode *inode, struct file *filp) goto out_err; } - rc = hdev->asic_funcs->send_device_activity(hdev, true); - list_add(&hpriv->dev_node, &hdev->fpriv_list); mutex_unlock(&hdev->fpriv_list_lock); + hdev->asic_funcs->send_device_activity(hdev, true); + hl_debugfs_add_file(hpriv); atomic_set(&hdev->last_error.cs_timeout.write_enable, 1); -- cgit v1.2.3 From 62adba0a55a7dc522f179b8ff8e0b3b7250c440f Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Tue, 23 Aug 2022 14:32:42 +0300 Subject: habanalabs: fix possible hole in device va cb_map_mem() uses gen_pool_alloc() to get virtual address for mapping a CB. The mapping is done in chunks of page size, so if the CB size is larger, it is possible that the allocated virtual addresses won't be consecutive. User retrieves this device VA which returns the virtual address in the first va_block. If there is a "hole" in the virtual addresses, user can configure a HW block with a bad device VA. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_buffer.c | 99 ++++--------------------- drivers/misc/habanalabs/common/habanalabs.h | 7 +- 2 files changed, 20 insertions(+), 86 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index c3e2568542a1..d16de18863ba 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -18,11 +18,7 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) { struct hl_device *hdev = ctx->hdev; struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_vm_va_block *va_block, *tmp; - dma_addr_t bus_addr; - u64 virt_addr; u32 page_size = prop->pmmu.page_size; - s32 offset; int rc; if (!hdev->supports_cb_mapping) { @@ -37,106 +33,45 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) return -EINVAL; } - INIT_LIST_HEAD(&cb->va_block_list); - - for (bus_addr = cb->bus_address; - bus_addr < cb->bus_address + cb->size; - bus_addr += page_size) { - - virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size); - if (!virt_addr) { - dev_err(hdev->dev, - "Failed to allocate device virtual address for CB\n"); - rc = -ENOMEM; - goto err_va_pool_free; - } + if (cb->is_mmu_mapped) + return 0; - va_block = kzalloc(sizeof(*va_block), GFP_KERNEL); - if (!va_block) { - rc = -ENOMEM; - gen_pool_free(ctx->cb_va_pool, virt_addr, page_size); - goto err_va_pool_free; - } + cb->roundup_size = roundup(cb->size, page_size); - va_block->start = virt_addr; - va_block->end = virt_addr + page_size - 1; - va_block->size = page_size; - list_add_tail(&va_block->node, &cb->va_block_list); + cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size); + if (!cb->virtual_addr) { + dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n"); + return -ENOMEM; } mutex_lock(&ctx->mmu_lock); - - bus_addr = cb->bus_address; - offset = 0; - list_for_each_entry(va_block, &cb->va_block_list, node) { - rc = hl_mmu_map_page(ctx, va_block->start, bus_addr, - va_block->size, list_is_last(&va_block->node, - &cb->va_block_list)); - if (rc) { - dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", - va_block->start); - goto err_va_umap; - } - - bus_addr += va_block->size; - offset += va_block->size; + rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size); + if (rc) { + dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr); + goto err_va_umap; } - rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); - mutex_unlock(&ctx->mmu_lock); cb->is_mmu_mapped = true; - return rc; err_va_umap: - list_for_each_entry(va_block, &cb->va_block_list, node) { - if (offset <= 0) - break; - hl_mmu_unmap_page(ctx, va_block->start, va_block->size, - offset <= va_block->size); - offset -= va_block->size; - } - - rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); - -err_va_pool_free: - list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { - gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); - list_del(&va_block->node); - kfree(va_block); - } - + gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); return rc; } static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) { struct hl_device *hdev = ctx->hdev; - struct hl_vm_va_block *va_block, *tmp; mutex_lock(&ctx->mmu_lock); - - list_for_each_entry(va_block, &cb->va_block_list, node) - if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size, - list_is_last(&va_block->node, - &cb->va_block_list))) - dev_warn_ratelimited(hdev->dev, - "Failed to unmap CB's va 0x%llx\n", - va_block->start); - + hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size); hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); - list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { - gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); - list_del(&va_block->node); - kfree(va_block); - } + gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); } static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) @@ -378,7 +313,6 @@ int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle) static int hl_cb_info(struct hl_mem_mgr *mmg, u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va) { - struct hl_vm_va_block *va_block; struct hl_cb *cb; int rc = 0; @@ -390,9 +324,8 @@ static int hl_cb_info(struct hl_mem_mgr *mmg, } if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { - va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node); - if (va_block) { - *device_va = va_block->start; + if (cb->is_mmu_mapped) { + *device_va = cb->virtual_addr; } else { dev_err(mmg->dev, "CB is not mapped to the device's MMU\n"); rc = -EINVAL; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index c1bd82d4a83c..b7e01651d429 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -917,11 +917,11 @@ struct hl_mmap_mem_buf { * @buf: back pointer to the parent mappable memory buffer * @debugfs_list: node in debugfs list of command buffers. * @pool_list: node in pool list of command buffers. - * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to - * the device's MMU. * @kernel_address: Holds the CB's kernel virtual address. + * @virtual_addr: Holds the CB's virtual address. * @bus_address: Holds the CB's DMA address. * @size: holds the CB's size. + * @roundup_size: holds the cb size after roundup to page size. * @cs_cnt: holds number of CS that this CB participates in. * @is_pool: true if CB was acquired from the pool, false otherwise. * @is_internal: internally allocated @@ -933,10 +933,11 @@ struct hl_cb { struct hl_mmap_mem_buf *buf; struct list_head debugfs_list; struct list_head pool_list; - struct list_head va_block_list; void *kernel_address; + u64 virtual_addr; dma_addr_t bus_address; u32 size; + u32 roundup_size; atomic_t cs_cnt; u8 is_pool; u8 is_internal; -- cgit v1.2.3 From aee3fd74fe579b6de5d8661dac559df91ab36b12 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 30 Aug 2022 15:07:51 +0300 Subject: habanalabs/gaudi: rename mme cfg error response print Current description is misleading hence we rename it to a more suitable error description. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 330869cb4c0b..a0b15b2f2ea4 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -616,7 +616,7 @@ static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = "qman_axi_err", "wap sei (wbc axi err)", "arc sei", - "mme_cfg_unalign_addr", + "cfg access error", "qm_sw_err", "sbte_dbg_intr_0", "sbte_dbg_intr_1", -- cgit v1.2.3 From a0fc8688c003172455f99b6b7e185b167ed964a0 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 1 Sep 2022 14:12:56 +0300 Subject: habanalabs/gaudi2: read F/W security indication after hard reset F/W security status might change after every reset. Add the reading of the preboot status to the hard reset sequence, which among others reads this security indication. As this preboot status reading includes the waiting for the preboot to be ready, it can be removed from the CPU init which is done in a later stage. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 7 +++++++ drivers/misc/habanalabs/common/firmware_if.c | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 5f6407ed3b04..cc392d062f0d 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -1563,6 +1563,13 @@ kill_processes: */ hdev->disabled = false; + /* F/W security enabled indication might be updated after hard-reset */ + if (hard_reset) { + rc = hl_fw_read_preboot_status(hdev); + if (rc) + goto out_err; + } + rc = hdev->asic_funcs->hw_init(hdev); if (rc) { dev_err(hdev->dev, "failed to initialize the H/W after reset\n"); diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index cd2eb7e73be5..8bfb459a8282 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -2509,13 +2509,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, */ dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs; - /* if no preboot loaded indication- wait for preboot */ - if (!(hdev->fw_loader.fw_comp_loaded & FW_TYPE_PREBOOT_CPU)) { - rc = hl_fw_wait_preboot_ready(hdev); - if (rc) - return -EIO; - } - rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE, 0, true, fw_loader->cpu_timeout); -- cgit v1.2.3 From 0626fa1a4d311b55b5f20a90380915f1bc135607 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 23 Aug 2022 16:58:38 +0300 Subject: habanalabs: add support for new cpucp return codes Firmware now responds with a more detailed cpucp return codes. Driver can now distinguish between error and debug return codes. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 34 +++++++++++++++++++++-- drivers/misc/habanalabs/common/habanalabs.h | 2 ++ drivers/misc/habanalabs/gaudi2/gaudi2.c | 2 ++ drivers/misc/habanalabs/include/common/cpucp_if.h | 17 +++++++++++- 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 8bfb459a8282..c2375917fc02 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -252,7 +252,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, struct cpucp_packet *pkt; dma_addr_t pkt_dma_addr; struct hl_bd *sent_bd; - u32 tmp, expected_ack_val, pi; + u32 tmp, expected_ack_val, pi, opcode; int rc; pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr); @@ -319,8 +319,35 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT; if (rc) { - dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", - rc, (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT); + opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT; + + if (!prop->supports_advanced_cpucp_rc) { + dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode); + goto scrub_descriptor; + } + + switch (rc) { + case cpucp_packet_invalid: + dev_err(hdev->dev, + "CPU packet %d is not supported by F/W\n", opcode); + break; + case cpucp_packet_fault: + dev_err(hdev->dev, + "F/W failed processing CPU packet %d\n", opcode); + break; + case cpucp_packet_invalid_pkt: + dev_dbg(hdev->dev, + "CPU packet %d is not supported by F/W\n", opcode); + break; + case cpucp_packet_invalid_params: + dev_err(hdev->dev, + "F/W reports invalid parameters for CPU packet %d\n", opcode); + break; + + default: + dev_err(hdev->dev, + "Unknown F/W ERROR %d for CPU packet %d\n", rc, opcode); + } /* propagate the return code from the f/w to the callers who want to check it */ if (result) @@ -332,6 +359,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, *result = le64_to_cpu(pkt->result); } +scrub_descriptor: /* Scrub previous buffer descriptor 'ctl' field which contains the * previous PI value written during packet submission. * We must do this or else F/W can read an old value upon queue wraparound. diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index b7e01651d429..959e3616cc2f 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -678,6 +678,7 @@ struct hl_hints_range { * @set_max_power_on_device_init: true if need to set max power in F/W on device init. * @supports_user_set_page_size: true if user can set the allocation page size. * @dma_mask: the dma mask to be set for this device + * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -785,6 +786,7 @@ struct asic_fixed_properties { u8 set_max_power_on_device_init; u8 supports_user_set_page_size; u8 dma_mask; + u8 supports_advanced_cpucp_rc; }; /** diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index a0b15b2f2ea4..db18e066509c 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -2721,6 +2721,8 @@ static int gaudi2_late_init(struct hl_device *hdev) struct gaudi2_device *gaudi2 = hdev->asic_specific; int rc; + hdev->asic_prop.supports_advanced_cpucp_rc = true; + rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, gaudi2->virt_msix_db_dma_addr); if (rc) { diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index b837bb1f4cd3..9593d1a26945 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -824,10 +824,25 @@ enum cpucp_led_index { CPUCP_LED2_INDEX }; +/* + * enum cpucp_packet_rc - Error return code + * @cpucp_packet_success -> in case of success. + * @cpucp_packet_invalid -> this is to support Goya and Gaudi platform. + * @cpucp_packet_fault -> in case of processing error like failing to + * get device binding or semaphore etc. + * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. This is + * supported Greco onwards. + * @cpucp_packet_invalid_params -> when checking parameter like length of buffer + * or attribute value etc. Supported Greco onwards. + * @cpucp_packet_rc_max -> It indicates size of enum so should be at last. + */ enum cpucp_packet_rc { cpucp_packet_success, cpucp_packet_invalid, - cpucp_packet_fault + cpucp_packet_fault, + cpucp_packet_invalid_pkt, + cpucp_packet_invalid_params, + cpucp_packet_rc_max }; /* -- cgit v1.2.3 From 76925f55c9ba46faaf4054a8192dc5814bc2e0ab Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Thu, 1 Sep 2022 16:37:08 +0300 Subject: habanalabs: fix resetting the DRAM BAR Current code does not takes into account the new DRAM region base and so calculated address is wrong and can lead to crush. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/device.c | 41 ++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index cc392d062f0d..c6a00bb259fb 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -28,8 +28,9 @@ enum dma_alloc_type { /* * hl_set_dram_bar- sets the bar to allow later access to address * - * @hdev: pointer to habanalabs device structure + * @hdev: pointer to habanalabs device structure. * @addr: the address the caller wants to access. + * @region: the PCI region. * * @return: the old BAR base address on success, U64_MAX for failure. * The caller should set it back to the old address after use. @@ -39,10 +40,10 @@ enum dma_alloc_type { * This function can be called also if the bar doesn't need to be set, * in that case it just won't change the base. */ -static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr) +static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region) { struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 bar_base_addr; + u64 bar_base_addr, old_base; if (is_power_of_2(prop->dram_pci_bar_size)) bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); @@ -50,51 +51,53 @@ static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr) bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) * prop->dram_pci_bar_size; - return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); -} + old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); + /* in case of success we need to update the new BAR base */ + if (old_base != U64_MAX) + region->region_base = bar_base_addr; + + return old_base; +} static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, enum debugfs_access_type acc_type, enum pci_region region_type) { struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; + void __iomem *acc_addr; u64 old_base = 0, rc; if (region_type == PCI_REGION_DRAM) { - old_base = hl_set_dram_bar(hdev, addr); + old_base = hl_set_dram_bar(hdev, addr, region); if (old_base == U64_MAX) return -EIO; } + acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base + + region->offset_in_bar; switch (acc_type) { case DEBUGFS_READ8: - *val = readb(hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + *val = readb(acc_addr); break; case DEBUGFS_WRITE8: - writeb(*val, hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + writeb(*val, acc_addr); break; case DEBUGFS_READ32: - *val = readl(hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + *val = readl(acc_addr); break; case DEBUGFS_WRITE32: - writel(*val, hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + writel(*val, acc_addr); break; case DEBUGFS_READ64: - *val = readq(hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + *val = readq(acc_addr); break; case DEBUGFS_WRITE64: - writeq(*val, hdev->pcie_bar[region->bar_id] + - addr - region->region_base + region->offset_in_bar); + writeq(*val, acc_addr); break; } if (region_type == PCI_REGION_DRAM) { - rc = hl_set_dram_bar(hdev, old_base); + rc = hl_set_dram_bar(hdev, old_base, region); if (rc == U64_MAX) return -EIO; } -- cgit v1.2.3 From c833ac1a5f34a21e9e9f8605b2f3f9f8dcaab6a0 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 4 Sep 2022 10:39:27 +0300 Subject: habanalabs/gaudi2: free event irq if init fails In case initialization fails after event irq was requested, we need to release that irq. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index db18e066509c..60694b8ed6fe 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -3581,7 +3581,7 @@ static int gaudi2_enable_msix(struct hl_device *hdev) rc = gaudi2_dec_enable_msix(hdev); if (rc) { dev_err(hdev->dev, "Failed to enable decoder IRQ"); - goto free_completion_irq; + goto free_event_irq; } for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0; @@ -3612,6 +3612,10 @@ free_user_irq: gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); +free_event_irq: + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); + free_irq(irq, cq); + free_completion_irq: irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); free_irq(irq, cq); -- cgit v1.2.3 From 6f0818c9fc9b81d8a303a8d3fb1826d71777f7ed Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Thu, 18 Aug 2022 12:54:23 +0300 Subject: habanalabs: new notifier events for device state Add new notifier events that inform several device states. General H/W error raised on device general H/W error occurs. User engine error is raised when a device engine informs of an error. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 39 +++++++++++++++++++++--- drivers/misc/habanalabs/gaudi2/gaudi2.c | 54 ++++++++++++++++++++++++++++++++- include/uapi/misc/habanalabs.h | 4 +++ 3 files changed, 91 insertions(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 87dbdbb220da..2b328cb62096 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7685,6 +7685,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; goto reset_device; @@ -7694,6 +7695,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: gaudi_print_irq_info(hdev, event_type, false); fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; case GAUDI_EVENT_HBM0_SPI_0: @@ -7705,6 +7707,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; case GAUDI_EVENT_HBM0_SPI_1: @@ -7716,6 +7719,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr gaudi_hbm_event_to_dev(event_type), &eq_entry->hbm_ecc_data); hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI_EVENT_TPC0_DEC: @@ -7730,6 +7734,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr reset_required = gaudi_tpc_read_interrupts(hdev, tpc_dec_event_to_tpc_id(event_type), "AXI_SLV_DEC_Error"); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; if (reset_required) { dev_err(hdev->dev, "reset required due to %s\n", gaudi_irq_map_table[event_type].name); @@ -7738,6 +7743,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr goto reset_device; } else { hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; } break; @@ -7753,6 +7759,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr reset_required = gaudi_tpc_read_interrupts(hdev, tpc_krn_event_to_tpc_id(event_type), "KRN_ERR"); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; if (reset_required) { dev_err(hdev->dev, "reset required due to %s\n", gaudi_irq_map_table[event_type].name); @@ -7761,6 +7768,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr goto reset_device; } else { hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; } break; @@ -7789,9 +7797,25 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI_EVENT_PCIE_DEC: + case GAUDI_EVENT_CPU_AXI_SPLITTER: + case GAUDI_EVENT_PSOC_AXI_DEC: + case GAUDI_EVENT_PSOC_PRSTN_FALL: + gaudi_print_irq_info(hdev, event_type, true); + hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + break; + + case GAUDI_EVENT_MMU_PAGE_FAULT: + case GAUDI_EVENT_MMU_WR_PERM: + gaudi_print_irq_info(hdev, event_type, true); + hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; + break; + case GAUDI_EVENT_MME0_WBC_RSP: case GAUDI_EVENT_MME0_SBAB0_RSP: case GAUDI_EVENT_MME1_WBC_RSP: @@ -7800,11 +7824,6 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_MME2_SBAB0_RSP: case GAUDI_EVENT_MME3_WBC_RSP: case GAUDI_EVENT_MME3_SBAB0_RSP: - case GAUDI_EVENT_CPU_AXI_SPLITTER: - case GAUDI_EVENT_PSOC_AXI_DEC: - case GAUDI_EVENT_PSOC_PRSTN_FALL: - case GAUDI_EVENT_MMU_PAGE_FAULT: - case GAUDI_EVENT_MMU_WR_PERM: case GAUDI_EVENT_RAZWI_OR_ADC: case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: @@ -7824,10 +7843,12 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr gaudi_print_irq_info(hdev, event_type, true); gaudi_handle_qman_err(hdev, event_type, &event_mask); hl_fw_unmask_irq(hdev, event_type); + event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); break; case GAUDI_EVENT_RAZWI_OR_ADC_SW: gaudi_print_irq_info(hdev, event_type, true); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; goto reset_device; case GAUDI_EVENT_TPC0_BMON_SPMU: @@ -7841,11 +7862,13 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: gaudi_print_irq_info(hdev, event_type, false); hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: gaudi_print_nic_axi_irq_info(hdev, event_type, &data); hl_fw_unmask_irq(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: @@ -7853,6 +7876,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr gaudi_print_sm_sei_info(hdev, event_type, &eq_entry->sm_sei_data); rc = hl_state_dump(hdev); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; if (rc) dev_err(hdev->dev, "Error during system state dump %d\n", rc); @@ -7863,6 +7887,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr break; case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; gaudi_print_clk_change_info(hdev, event_type); hl_fw_unmask_irq(hdev, event_type); break; @@ -7872,20 +7897,24 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr dev_err(hdev->dev, "Received high temp H/W interrupt %d (cause %d)\n", event_type, cause); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI_EVENT_DEV_RESET_REQ: gaudi_print_irq_info(hdev, event_type, false); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: gaudi_print_irq_info(hdev, event_type, false); gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; case GAUDI_EVENT_FW_ALIVE_S: gaudi_print_irq_info(hdev, event_type, false); gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; goto reset_device; default: diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 60694b8ed6fe..f749f7377ea6 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8530,6 +8530,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent struct gaudi2_device *gaudi2 = hdev->asic_specific; bool reset_required = false, skip_reset = false; int index, sbte_index; + u64 event_mask = 0; u16 event_type; ctl = le32_to_cpu(eq_entry->hdr.ctl); @@ -8551,6 +8552,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent fallthrough; case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); break; @@ -8560,21 +8562,25 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent fallthrough; case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: gaudi2_handle_qman_err(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; gaudi2_handle_arc_farm_sei_err(hdev); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_AXI_ERR_RSP: gaudi2_handle_cpu_sei_err(hdev); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: @@ -8582,6 +8588,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause); gaudi2_handle_qm_sei_err(hdev, event_type, NULL); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: @@ -8589,11 +8596,13 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent gaudi2_tpc_ack_interrupts(hdev, index, "AXI_ERR_RSP", &eq_entry->razwi_with_intr_cause); gaudi2_handle_qm_sei_err(hdev, event_type, NULL); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_TPC0_KERNEL_ERR: @@ -8624,6 +8633,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) / (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR); gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_DEC0_SPI: @@ -8639,6 +8649,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent index = (event_type - GAUDI2_EVENT_DEC0_SPI) / (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI); gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: @@ -8651,6 +8662,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent gaudi2_handle_mme_err(hdev, index, "CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info); gaudi2_handle_qm_sei_err(hdev, event_type, NULL); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_MME0_QMAN_SW_ERROR: @@ -8661,6 +8673,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent (GAUDI2_EVENT_MME1_QMAN_SW_ERROR - GAUDI2_EVENT_MME0_QMAN_SW_ERROR); gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: @@ -8671,50 +8684,58 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID); gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: case GAUDI2_EVENT_KDMA0_CORE: gaudi2_handle_kdma_core_event(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE: gaudi2_handle_dma_core_event(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: gaudi2_print_pcie_addr_dec_info(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: - case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: gaudi2_handle_mmu_spi_sei_err(hdev, event_type); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL: gaudi2_handle_hif_fatal(hdev, event_type, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PMMU_FATAL_0: gaudi2_handle_pif_fatal(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT: gaudi2_ack_psoc_razwi_event_handler(hdev); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; reset_required = true; @@ -8723,25 +8744,31 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5: gaudi2_handle_hbm_cattrip(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI: gaudi2_handle_hbm_mc_spi(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE: gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: gaudi2_handle_psoc_drain(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_AXI_ECC: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_L2_RAM_ECC: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP: case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: @@ -8755,17 +8782,24 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP); gaudi2_handle_mme_sbte_err(hdev, index, sbte_index, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PSOC_AXI_ERR_RSP: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; + break; case GAUDI2_EVENT_PSOC_PRSTN_FALL: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PCIE_APB_TIMEOUT: reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PCIE_FATAL_ERR: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_TPC0_BMON_SPMU: case GAUDI2_EVENT_TPC1_BMON_SPMU: @@ -8817,6 +8851,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_DEC8_BMON_SPMU: case GAUDI2_EVENT_DEC9_BMON_SPMU: case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU: + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: @@ -8824,43 +8859,53 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: gaudi2_print_clk_change_info(hdev, event_type); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: gaudi2_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_PCIE_FLR_REQUESTED: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; /* Do nothing- FW will handle it */ break; case GAUDI2_EVENT_PCIE_P2P_MSIX: gaudi2_handle_pcie_p2p_msix(hdev); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE: index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE; skip_reset = !gaudi2_handle_sm_err(hdev, index); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR: + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n", le64_to_cpu(eq_entry->data[0])); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT: dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", le64_to_cpu(eq_entry->data[0])); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: gaudi2_print_cpu_pkt_failure_info(hdev, &eq_entry->pkt_sync_err); + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; break; case GAUDI2_EVENT_ARC_DCCM_FULL: hl_arc_event_handle(hdev, &eq_entry->arc_data); + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; break; default: @@ -8876,15 +8921,22 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent if (!gaudi2_irq_map_table[event_type].msg) hl_fw_unmask_irq(hdev, event_type); + if (event_mask) + hl_notifier_event_send_all(hdev, event_mask); + return; reset_device: if (hdev->hard_reset_on_fw_events) { hl_device_reset(hdev, reset_flags); + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; } else { if (!gaudi2_irq_map_table[event_type].msg) hl_fw_unmask_irq(hdev, event_type); } + + if (event_mask) + hl_notifier_event_send_all(hdev, event_mask); } static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 3005cc04d4b1..a4bab0fd8223 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -715,12 +715,16 @@ enum hl_server_type { * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable + * HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state + * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error */ #define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) #define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) #define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) #define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) +#define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5) +#define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) /* Opcode for management ioctl * -- cgit v1.2.3 From 82736b063fde67ea2a9b16ef5acf3d5db03e2deb Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 5 Sep 2022 17:14:45 +0300 Subject: habanalabs: MMU invalidation h/w is per device The code used the mmu mutex to protect access to the context's page tables and invalidation of the MMU cache. Because pgt are per context, the mmu mutex was a member of the context object. The problem is that the device has a single MMU invalidation h/w (per MMU). Therefore, the mmu mutex should not be a property of the context but a property of the device. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_buffer.c | 10 +++++----- drivers/misc/habanalabs/common/habanalabs.h | 10 +++++++--- drivers/misc/habanalabs/common/memory.c | 14 +++++++------- drivers/misc/habanalabs/common/mmu/mmu.c | 22 +++++++++++----------- drivers/misc/habanalabs/gaudi/gaudi.c | 8 ++++---- drivers/misc/habanalabs/gaudi2/gaudi2.c | 16 ++++++++-------- 6 files changed, 42 insertions(+), 38 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index d16de18863ba..2b332991ac6a 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -44,20 +44,20 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) return -ENOMEM; } - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size); if (rc) { dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr); goto err_va_umap; } rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); cb->is_mmu_mapped = true; return rc; err_va_umap: - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); return rc; } @@ -66,10 +66,10 @@ static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) { struct hl_device *hdev = ctx->hdev; - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size); hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 959e3616cc2f..9c2123ddc548 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1793,8 +1793,6 @@ struct hl_cs_outcome_store { * command submissions for a long time after CS id wraparound. * @va_range: holds available virtual addresses for host and dram mappings. * @mem_hash_lock: protects the mem_hash. - * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the - * MMU hash or walking the PGT requires talking this lock. * @hw_block_list_lock: protects the HW block memory list. * @debugfs_list: node in debugfs list of contexts. * @hw_block_mem_list: list of HW block virtual mapped addresses. @@ -1831,7 +1829,6 @@ struct hl_ctx { struct hl_cs_outcome_store outcome_store; struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX]; struct mutex mem_hash_lock; - struct mutex mmu_lock; struct mutex hw_block_list_lock; struct list_head debugfs_list; struct list_head hw_block_mem_list; @@ -3079,6 +3076,12 @@ struct hl_reset_info { * @asid_mutex: protects asid_bitmap. * @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue. * @debug_lock: protects critical section of setting debug mode for device + * @mmu_lock: protects the MMU page tables and invalidation h/w. Although the + * page tables are per context, the invalidation h/w is per MMU. + * Therefore, we can't allow multiple contexts (we only have two, + * user and kernel) to access the invalidation h/w at the same time. + * In addition, any change to the PGT, modifying the MMU hash or + * walking the PGT requires talking this lock. * @asic_prop: ASIC specific immutable properties. * @asic_funcs: ASIC specific functions. * @asic_specific: ASIC specific information to use only from ASIC files. @@ -3244,6 +3247,7 @@ struct hl_device { struct mutex asid_mutex; struct mutex send_cpu_message_lock; struct mutex debug_lock; + struct mutex mmu_lock; struct asic_fixed_properties asic_prop; const struct hl_asic_funcs *asic_funcs; void *asic_specific; diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 0a653fff08d4..096fa3c1ae95 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1210,18 +1210,18 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device goto va_block_err; } - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); if (rc) { dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); goto map_err; } rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, ctx->asid, ret_vaddr, phys_pg_pack->total_size); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); if (rc) goto map_err; @@ -1362,7 +1362,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, else vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); @@ -1375,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr, phys_pg_pack->total_size); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); /* * If the context is closing we don't need to check for the MMU cache @@ -2771,13 +2771,13 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) unmap_device_va(ctx, &args, true); } - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); /* invalidate the cache once after the unmapping loop */ hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); INIT_LIST_HEAD(&free_list); diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 264f3b9edc88..cf8946266615 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -47,6 +47,8 @@ int hl_mmu_init(struct hl_device *hdev) if (!hdev->mmu_enable) return 0; + mutex_init(&hdev->mmu_lock); + if (hdev->mmu_func[MMU_DR_PGT].init != NULL) { rc = hdev->mmu_func[MMU_DR_PGT].init(hdev); if (rc) @@ -88,6 +90,8 @@ void hl_mmu_fini(struct hl_device *hdev) if (hdev->mmu_func[MMU_HR_PGT].fini != NULL) hdev->mmu_func[MMU_HR_PGT].fini(hdev); + + mutex_destroy(&hdev->mmu_lock); } /** @@ -106,8 +110,6 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx) if (!hdev->mmu_enable) return 0; - mutex_init(&ctx->mmu_lock); - if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) { rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx); if (rc) @@ -151,8 +153,6 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL) hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx); - - mutex_destroy(&ctx->mmu_lock); } /* @@ -609,9 +609,9 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); if (rc) return rc; @@ -701,16 +701,16 @@ static void hl_mmu_prefetch_work_function(struct work_struct *work) { struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work); struct hl_ctx *ctx = pfw->ctx; + struct hl_device *hdev = ctx->hdev; - if (!hl_device_operational(ctx->hdev, NULL)) + if (!hl_device_operational(hdev, NULL)) goto put_ctx; - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); - ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, - pfw->va, pfw->size); + hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, pfw->va, pfw->size); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); put_ctx: /* diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 2b328cb62096..48ff3b103b9f 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -8405,13 +8405,13 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev, goto destroy_internal_cb_pool; } - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, HOST_SPACE_INTERNAL_CB_SZ); hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); if (rc) goto unreserve_internal_cb_pool; @@ -8438,13 +8438,13 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) return; - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); gen_pool_destroy(hdev->internal_cb_pool); diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index f749f7377ea6..5761ca5d50ae 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -9302,12 +9302,12 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v } /* Create mapping on asic side */ - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV, ctx->asid, reserved_va_base, SZ_2M); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); if (rc) { dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); goto unreserve_va; @@ -9340,11 +9340,11 @@ static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, ctx->asid, reserved_va_base, SZ_2M); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); unreserve_va: hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); free_data_buffer: @@ -9397,11 +9397,11 @@ static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *c goto destroy_internal_cb_pool; } - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, HOST_SPACE_INTERNAL_CB_SZ); hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); if (rc) goto unreserve_internal_cb_pool; @@ -9426,11 +9426,11 @@ static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx * if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) return; - mutex_lock(&ctx->mmu_lock); + mutex_lock(&hdev->mmu_lock); hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); - mutex_unlock(&ctx->mmu_lock); + mutex_unlock(&hdev->mmu_lock); gen_pool_destroy(hdev->internal_cb_pool); -- cgit v1.2.3 From cecde184ca32ae862c5494a7875d03592c893ab9 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 5 Sep 2022 16:24:21 +0300 Subject: habanalabs/gaudi2: print RAZWI info upon PCIe access error Add the dump of the RAZWI information when a PCIe access is blocked by RR. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 52 ++++++++++++++++++++++++--- drivers/misc/habanalabs/gaudi2/gaudi2_masks.h | 13 +++++++ 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 5761ca5d50ae..c040e01adafe 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -7963,14 +7963,58 @@ static void gaudi2_handle_dma_core_event(struct hl_device *hdev, u64 intr_cause_ gaudi2_dma_core_interrupts_cause[i]); } +static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev) +{ + u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; + + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; + if (RREG32(razwi_happened_addr)) { + gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, + NULL); + WREG32(razwi_happened_addr, 0x1); + } + + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; + if (RREG32(razwi_happened_addr)) { + gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, + NULL); + WREG32(razwi_happened_addr, 0x1); + } + + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; + if (RREG32(razwi_happened_addr)) { + gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, + NULL); + WREG32(razwi_happened_addr, 0x1); + } + + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; + if (RREG32(razwi_happened_addr)) { + gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, + NULL); + WREG32(razwi_happened_addr, 0x1); + } +} + static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data) { int i; - for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE; i++) - if (intr_cause_data & BIT_ULL(i)) - dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n", - gaudi2_pcie_addr_dec_error_cause[i]); + for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { + if (!(intr_cause_data & BIT_ULL(i))) + continue; + + dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n", + gaudi2_pcie_addr_dec_error_cause[i]); + + switch (intr_cause_data & BIT_ULL(i)) { + case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: + break; + case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: + gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev); + break; + } + } } static void gaudi2_handle_pif_fatal(struct hl_device *hdev, u64 intr_cause_data) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h index 0239d118abc5..e9ac87828221 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2_masks.h @@ -144,4 +144,17 @@ #define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT 15 #define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK 0x8000 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_SHIFT 0 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK 0x1 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_SHIFT 1 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK 0x2 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_SHIFT 2 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK 0x4 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_SHIFT 3 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_MASK 0x8 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_SHIFT 4 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_MASK 0x10 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_SHIFT 5 +#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_MASK 0x20 + #endif /* GAUDI2_MASKS_H_ */ -- cgit v1.2.3 From f0b6d3cc29b709089f7a0de6a3d64a73ed0d67bd Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 6 Sep 2022 16:37:14 +0300 Subject: habanalabs/gaudi2: increase hard-reset sleep time to 2 sec The access to the device registers is blocked during hard reset, until preboot runs and allows the access to specific registers, including the PSOC BTM_FSM register which is used to know when the reset is done. Between the reset request and until this register is polled there is a small delay of 500 msec which is not enough for F/W to process the reset and for preboot to run, so the register might be accessed while it is blocked. To avoid it, increase the delay to 2 sec. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index c040e01adafe..6ed9b3ce16dd 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -21,7 +21,7 @@ #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */ -#define GAUDI2_RESET_TIMEOUT_MSEC 500 /* 500ms */ +#define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ #define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */ #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */ #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */ -- cgit v1.2.3 From 04d53cd2a6fb9936c938b624d99320cf2f842758 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Mon, 29 Aug 2022 16:56:28 +0300 Subject: habanalabs/gaudi2: get f/w reset status register dynamically Get the firmware reset status address from the dynamic registers we read from the firmware instead of using a define. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 5 ++++- drivers/misc/habanalabs/include/common/hl_boot_if.h | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index 6ed9b3ce16dd..b95eab4c237c 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -5439,7 +5439,10 @@ static void gaudi2_execute_soft_reset(struct hl_device *hdev, u32 reset_sleep_ms if (!driver_performs_reset) { /* set SP to indicate reset request sent to FW */ - WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); + if (dyn_regs->cpu_rst_status) + WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); + else + WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); diff --git a/drivers/misc/habanalabs/include/common/hl_boot_if.h b/drivers/misc/habanalabs/include/common/hl_boot_if.h index 2e45be5de4fe..e0ea51cc7475 100644 --- a/drivers/misc/habanalabs/include/common/hl_boot_if.h +++ b/drivers/misc/habanalabs/include/common/hl_boot_if.h @@ -431,7 +431,9 @@ struct cpu_dyn_regs { __le32 gic_host_ints_irq; __le32 gic_host_soft_rst_irq; __le32 gic_rot_qm_irq_ctrl; - __le32 reserved1[22]; /* reserve for future use */ + __le32 cpu_rst_status; + __le32 eng_arc_irq_ctrl; + __le32 reserved1[20]; /* reserve for future use */ }; /* TODO: remove the desc magic after the code is updated to use message */ -- cgit v1.2.3 From 97a78e3d8e176de80323b7a01cd6b26f0b6dcdc1 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Wed, 7 Sep 2022 16:15:39 +0300 Subject: habanalabs: rename error info structure As a preparation for adding more errors to it, change to more suitable name. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../misc/habanalabs/common/command_submission.c | 6 ++--- drivers/misc/habanalabs/common/habanalabs.h | 12 ++++----- drivers/misc/habanalabs/common/habanalabs_drv.c | 6 ++--- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 30 ++++++++++----------- drivers/misc/habanalabs/gaudi/gaudi.c | 31 +++++++++++----------- 5 files changed, 43 insertions(+), 42 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 746b688d34cf..fbe5003191bf 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -826,10 +826,10 @@ static void cs_timedout(struct work_struct *work) } /* Save only the first CS timeout parameters */ - rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0); + rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0); if (rc) { - hdev->last_error.cs_timeout.timestamp = ktime_get(); - hdev->last_error.cs_timeout.seq = cs->sequence; + hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); + hdev->captured_err_info.cs_timeout.seq = cs->sequence; event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT | HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 9c2123ddc548..44050d463e23 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -2981,12 +2981,12 @@ struct undefined_opcode_info { }; /** - * struct last_error_session_info - info about last session errors occurred. - * @cs_timeout: CS timeout error last information. - * @razwi: razwi last information. + * struct hl_error_info - holds information collected during an error. + * @cs_timeout: CS timeout error information. + * @razwi: razwi information. * @undef_opcode: undefined opcode information */ -struct last_error_session_info { +struct hl_error_info { struct cs_timeout_info cs_timeout; struct razwi_info razwi; struct undefined_opcode_info undef_opcode; @@ -3111,7 +3111,7 @@ struct hl_reset_info { * @state_dump_specs: constants and dictionaries needed to dump system state. * @multi_cs_completion: array of multi-CS completion. * @clk_throttling: holds information about current/previous clock throttling events - * @last_error: holds information about last session in which CS timeout or razwi error occurred. + * @captured_err_info: holds information about errors. * @reset_info: holds current device reset information. * @stream_master_qid_arr: pointer to array with QIDs of master streams. * @fw_major_version: major version of current loaded preboot. @@ -3286,7 +3286,7 @@ struct hl_device { struct multi_cs_completion multi_cs_completion[ MULTI_CS_MAX_USER_CTX]; struct hl_clk_throttle clk_throttling; - struct last_error_session_info last_error; + struct hl_error_info captured_err_info; struct hl_reset_info reset_info; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index fd9c8680f954..5250bfb6790e 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -211,9 +211,9 @@ int hl_device_open(struct inode *inode, struct file *filp) hl_debugfs_add_file(hpriv); - atomic_set(&hdev->last_error.cs_timeout.write_enable, 1); - atomic_set(&hdev->last_error.razwi.write_enable, 1); - hdev->last_error.undef_opcode.write_enable = true; + atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1); + atomic_set(&hdev->captured_err_info.razwi.write_enable, 1); + hdev->captured_err_info.undef_opcode.write_enable = true; hdev->open_counter++; hdev->last_successful_open_jif = jiffies; diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index c7bd000750c8..ab0be082f3a6 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -593,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - info.seq = hdev->last_error.cs_timeout.seq; - info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp); + info.seq = hdev->captured_err_info.cs_timeout.seq; + info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } @@ -609,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp); - info.addr = hdev->last_error.razwi.addr; - info.engine_id_1 = hdev->last_error.razwi.engine_id_1; - info.engine_id_2 = hdev->last_error.razwi.engine_id_2; - info.no_engine_id = hdev->last_error.razwi.non_engine_initiator; - info.error_type = hdev->last_error.razwi.type; + info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp); + info.addr = hdev->captured_err_info.razwi.addr; + info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1; + info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2; + info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator; + info.error_type = hdev->captured_err_info.razwi.type; return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } @@ -629,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar if ((!max_size) || (!out)) return -EINVAL; - info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp); - info.engine_id = hdev->last_error.undef_opcode.engine_id; - info.cq_addr = hdev->last_error.undef_opcode.cq_addr; - info.cq_size = hdev->last_error.undef_opcode.cq_size; - info.stream_id = hdev->last_error.undef_opcode.stream_id; - info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len; - memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams, + info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp); + info.engine_id = hdev->captured_err_info.undef_opcode.engine_id; + info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr; + info.cq_size = hdev->captured_err_info.undef_opcode.cq_size; + info.stream_id = hdev->captured_err_info.undef_opcode.stream_id; + info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len; + memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams, sizeof(info.cb_addr_streams)); return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 48ff3b103b9f..f81a141b4741 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6894,9 +6894,9 @@ static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 strea stream, cq_ptr, size); if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { - hdev->last_error.undef_opcode.cq_addr = cq_ptr; - hdev->last_error.undef_opcode.cq_size = size; - hdev->last_error.undef_opcode.stream_id = stream; + hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; + hdev->captured_err_info.undef_opcode.cq_size = size; + hdev->captured_err_info.undef_opcode.stream_id = stream; } } @@ -6962,7 +6962,7 @@ static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, } if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { - struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode; + struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; u32 arr_idx = undef_opcode->cb_addr_streams_len; if (arr_idx == 0) { @@ -7046,11 +7046,11 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev, } /* check for undefined opcode */ if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && - hdev->last_error.undef_opcode.write_enable) { - memset(&hdev->last_error.undef_opcode, 0, - sizeof(hdev->last_error.undef_opcode)); + hdev->captured_err_info.undef_opcode.write_enable) { + memset(&hdev->captured_err_info.undef_opcode, 0, + sizeof(hdev->captured_err_info.undef_opcode)); - hdev->last_error.undef_opcode.write_enable = false; + hdev->captured_err_info.undef_opcode.write_enable = false; *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; } @@ -7332,18 +7332,19 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type); /* In case it's the first razwi, save its parameters*/ - rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0); + rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0); if (rc) { - hdev->last_error.razwi.timestamp = ktime_get(); - hdev->last_error.razwi.addr = razwi_addr; - hdev->last_error.razwi.engine_id_1 = engine_id_1; - hdev->last_error.razwi.engine_id_2 = engine_id_2; + hdev->captured_err_info.razwi.timestamp = ktime_get(); + hdev->captured_err_info.razwi.addr = razwi_addr; + hdev->captured_err_info.razwi.engine_id_1 = engine_id_1; + hdev->captured_err_info.razwi.engine_id_2 = engine_id_2; /* * If first engine id holds non valid value the razwi initiator * does not have engine id */ - hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX); - hdev->last_error.razwi.type = razwi_type; + hdev->captured_err_info.razwi.non_engine_initiator = + (engine_id_1 == U16_MAX); + hdev->captured_err_info.razwi.type = razwi_type; } } -- cgit v1.2.3 From ff13b900b0b2b28486b714f615b1f919973275c2 Mon Sep 17 00:00:00 2001 From: Tal Cohen Date: Wed, 17 Aug 2022 12:46:07 +0300 Subject: habanalabs/gaudi: change TPC Assert to use TPC DEC instead of QMAN err This change is done while there is a problem to use QMAN error for TPC assert async. The problem involves security limitation that exists to generate the assert via QMAN error. Signed-off-by: Tal Cohen Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index f81a141b4741..e80ebace49c8 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7216,12 +7216,6 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e switch (event_type) { case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: - /* In TPC QM event, notify on TPC assertion. While there isn't - * a specific event for assertion yet, the FW generates QM event. - * The SW upper layer will inspect an internal mapped area to indicate - * if the event is a tpc assertion or tpc QM. - */ - *event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; index = event_type - GAUDI_EVENT_TPC0_QM; qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; @@ -7731,6 +7725,12 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr case GAUDI_EVENT_TPC5_DEC: case GAUDI_EVENT_TPC6_DEC: case GAUDI_EVENT_TPC7_DEC: + /* In TPC DEC event, notify on TPC assertion. While there isn't + * a specific event for assertion yet, the FW generates TPC DEC event. + * The SW upper layer will inspect an internal mapped area to indicate + * if the event is a TPC Assertion or a "real" TPC DEC. + */ + event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; gaudi_print_irq_info(hdev, event_type, true); reset_required = gaudi_tpc_read_interrupts(hdev, tpc_dec_event_to_tpc_id(event_type), -- cgit v1.2.3 From 43657dadfeffbec63b9ed358e6f82e9c64ff334c Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Thu, 8 Sep 2022 18:24:41 +0300 Subject: habanalabs/gaudi2: add handling to pmmu events in eqe handler In order to get the error cause and the captured address in case of page fault, added pmmu events to eqe handler. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index b95eab4c237c..b8b8b2dc2095 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -8756,6 +8756,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP: + case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR: case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: gaudi2_handle_mmu_spi_sei_err(hdev, event_type); reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; -- cgit v1.2.3 From 0c88760f8f5e13e32f624a1da71144b240b05125 Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 1 Aug 2022 15:23:44 +0300 Subject: habanalabs/gaudi2: add secured attestation info uapi User will provide a nonce via the ioctl, and will retrieve secured attestation data of the boot, generated using given nonce. Signed-off-by: Dani Liberman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 46 ++++++++++++++ drivers/misc/habanalabs/common/habanalabs.h | 3 + drivers/misc/habanalabs/common/habanalabs_ioctl.c | 52 +++++++++++++++ drivers/misc/habanalabs/include/common/cpucp_if.h | 77 ++++++++++++++++++++++- include/uapi/misc/habanalabs.h | 43 +++++++++++++ 5 files changed, 219 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index c2375917fc02..26a7529083e1 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -2988,3 +2988,49 @@ void hl_fw_set_max_power(struct hl_device *hdev) if (rc) dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); } + +static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void *data, u32 size, + u32 nonce, u32 timeout) +{ + struct cpucp_packet pkt = {}; + dma_addr_t req_dma_addr; + void *req_cpu_addr; + int rc; + + req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr); + if (!data) { + dev_err(hdev->dev, + "Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id); + return -ENOMEM; + } + + memset(data, 0, size); + + pkt.ctl = cpu_to_le32(packet_id << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(req_dma_addr); + pkt.data_max_size = cpu_to_le32(size); + pkt.nonce = cpu_to_le32(nonce); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + timeout, NULL); + if (rc) { + dev_err(hdev->dev, + "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc); + goto out; + } + + memcpy(data, req_cpu_addr, size); + +out: + hl_cpu_accessible_dma_pool_free(hdev, size, req_cpu_addr); + + return rc; +} + +int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info, + u32 nonce) +{ + return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_SEC_ATTEST_GET, sec_attest_info, + sizeof(struct cpucp_sec_attest_info), nonce, + HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC); +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 44050d463e23..58c95b13be69 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -66,6 +66,7 @@ struct hl_fpriv; #define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ #define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ #define HL_CPUCP_MON_DUMP_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC 10000000 /* 10s */ #define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */ #define HL_FW_COMMS_STATUS_PLDM_POLL_INTERVAL_USEC 1000000 /* 1s */ @@ -3748,6 +3749,8 @@ int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *va void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value); long hl_fw_get_max_power(struct hl_device *hdev); void hl_fw_set_max_power(struct hl_device *hdev); +int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info, + u32 nonce); int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index ab0be082f3a6..43afe40966e5 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -662,6 +662,55 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_ return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; } +static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args) +{ + void __user *out = (void __user *) (uintptr_t) args->return_pointer; + struct cpucp_sec_attest_info *sec_attest_info; + struct hl_info_sec_attest *info; + u32 max_size = args->return_size; + int rc; + + if ((!max_size) || (!out)) + return -EINVAL; + + sec_attest_info = kmalloc(sizeof(*sec_attest_info), GFP_KERNEL); + if (!sec_attest_info) + return -ENOMEM; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + rc = -ENOMEM; + goto free_sec_attest_info; + } + + rc = hl_fw_get_sec_attest_info(hpriv->hdev, sec_attest_info, args->sec_attest_nonce); + if (rc) + goto free_info; + + info->nonce = le32_to_cpu(sec_attest_info->nonce); + info->pcr_quote_len = le16_to_cpu(sec_attest_info->pcr_quote_len); + info->pub_data_len = le16_to_cpu(sec_attest_info->pub_data_len); + info->certificate_len = le16_to_cpu(sec_attest_info->certificate_len); + info->pcr_num_reg = sec_attest_info->pcr_num_reg; + info->pcr_reg_len = sec_attest_info->pcr_reg_len; + info->quote_sig_len = sec_attest_info->quote_sig_len; + memcpy(&info->pcr_data, &sec_attest_info->pcr_data, sizeof(info->pcr_data)); + memcpy(&info->pcr_quote, &sec_attest_info->pcr_quote, sizeof(info->pcr_quote)); + memcpy(&info->public_data, &sec_attest_info->public_data, sizeof(info->public_data)); + memcpy(&info->certificate, &sec_attest_info->certificate, sizeof(info->certificate)); + memcpy(&info->quote_sig, &sec_attest_info->quote_sig, sizeof(info->quote_sig)); + + rc = copy_to_user(out, info, + min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0; + +free_info: + kfree(info); +free_sec_attest_info: + kfree(sec_attest_info); + + return rc; +} + static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args) { int rc; @@ -844,6 +893,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_DRAM_PENDING_ROWS: return dram_pending_rows_info(hpriv, args); + case HL_INFO_SECURED_ATTESTATION: + return sec_attest_info(hpriv, args); + case HL_INFO_REGISTER_EVENTFD: return eventfd_register(hpriv, args); diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index 9593d1a26945..baa5aa43b6f4 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -629,6 +629,12 @@ enum pq_init_status { * CPUCP_PACKET_ENGINE_CORE_ASID_SET - * Packet to perform engine core ASID configuration * + * CPUCP_PACKET_SEC_ATTEST_GET - + * Get the attestaion data that is collected during various stages of the + * boot sequence. the attestation data is also hashed with some unique + * number (nonce) provided by the host to prevent replay attacks. + * public key and certificate also provided as part of the FW response. + * * CPUCP_PACKET_MONITOR_DUMP_GET - * Get monitors registers dump from the CpuCP kernel. * The CPU will put the registers dump in the a buffer allocated by the driver @@ -691,15 +697,15 @@ enum cpucp_packet_id { CPUCP_PACKET_RESERVED, /* not used */ CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ CPUCP_PACKET_RESERVED2, /* not used */ + CPUCP_PACKET_SEC_ATTEST_GET, /* internal */ CPUCP_PACKET_RESERVED3, /* not used */ CPUCP_PACKET_RESERVED4, /* not used */ - CPUCP_PACKET_RESERVED5, /* not used */ CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ + CPUCP_PACKET_RESERVED5, /* not used */ CPUCP_PACKET_RESERVED6, /* not used */ CPUCP_PACKET_RESERVED7, /* not used */ CPUCP_PACKET_RESERVED8, /* not used */ CPUCP_PACKET_RESERVED9, /* not used */ - CPUCP_PACKET_RESERVED10, /* not used */ CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ CPUCP_PACKET_ID_MAX /* must be last */ }; @@ -794,6 +800,9 @@ struct cpucp_packet { * result cannot be used to hold general purpose data. */ __le32 status_mask; + + /* random, used once number, for security packets */ + __le32 nonce; }; /* For NIC requests */ @@ -1219,6 +1228,70 @@ enum cpu_reset_status { CPU_RST_STATUS_SOFT_RST_DONE = 1, }; +#define SEC_PCR_DATA_BUF_SZ 256 +#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ +#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ + +/* + * struct cpucp_sec_attest_info - attestation report of the boot + * @pcr_data: raw values of the PCR registers + * @pcr_num_reg: number of PCR registers in the pcr_data array + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) + * @nonce: number only used once. random number provided by host. this also + * passed to the quote command as a qualifying data. + * @pcr_quote_len: length of the attestation quote data (bytes) + * @pcr_quote: attestation report data structure + * @quote_sig_len: length of the attestation report signature (bytes) + * @quote_sig: signature structure of the attestation report + * @pub_data_len: length of the public data (bytes) + * @public_data: public key for the signed attestation + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the attestation signing key + */ +struct cpucp_sec_attest_info { + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; + __u8 pcr_num_reg; + __u8 pcr_reg_len; + __le16 pad0; + __le32 nonce; + __le16 pcr_quote_len; + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; + __u8 quote_sig_len; + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + +/* + * struct cpucp_dev_info_signed - device information signed by a secured device + * @info: device information structure as defined above + * @nonce: number only used once. random number provided by host. this number is + * hashed and signed along with the device information. + * @info_sig_len: length of the attestation signature (bytes) + * @info_sig: signature of the info + nonce data. + * @pub_data_len: length of the public data (bytes) + * @public_data: public key info signed info data + * (outPublic + name + qualifiedName) + * @certificate_len: length of the certificate (bytes) + * @certificate: certificate for the signing key + */ +struct cpucp_dev_info_signed { + struct cpucp_info info; /* assumed to be 64bit aligned */ + __le32 nonce; + __le32 pad0; + __u8 info_sig_len; + __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; + __le16 pub_data_len; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __le16 certificate_len; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; +}; + /* * struct dcore_monitor_regs_data - DCORE monitor regs data. * the structure follows sync manager block layout. relevant only to Gaudi. diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index a4bab0fd8223..e00ebe05097d 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -773,6 +773,7 @@ enum hl_server_type { * Razwi initiator. * Razwi cause, was it a page fault or MMU access error. * HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation + * HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot. * HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications. * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd * HL_INFO_GET_EVENTS - Retrieve the last occurred events @@ -802,6 +803,7 @@ enum hl_server_type { #define HL_INFO_CS_TIMEOUT_EVENT 24 #define HL_INFO_RAZWI_EVENT 25 #define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26 +#define HL_INFO_SECURED_ATTESTATION 27 #define HL_INFO_REGISTER_EVENTFD 28 #define HL_INFO_UNREGISTER_EVENTFD 29 #define HL_INFO_GET_EVENTS 30 @@ -1133,6 +1135,45 @@ struct hl_info_dev_memalloc_page_sizes { __u64 page_order_bitmask; }; +#define SEC_PCR_DATA_BUF_SZ 256 +#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ +#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ + +/* + * struct hl_info_sec_attest - attestation report of the boot + * @nonce: number only used once. random number provided by host. this also passed to the quote + * command as a qualifying data. + * @pcr_quote_len: length of the attestation quote data (bytes) + * @pub_data_len: length of the public data (bytes) + * @certificate_len: length of the certificate (bytes) + * @pcr_num_reg: number of PCR registers in the pcr_data array + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) + * @quote_sig_len: length of the attestation report signature (bytes) + * @pcr_data: raw values of the PCR registers + * @pcr_quote: attestation report data structure + * @quote_sig: signature structure of the attestation report + * @public_data: public key for the signed attestation + * (outPublic + name + qualifiedName) + * @certificate: certificate for the attestation signing key + */ +struct hl_info_sec_attest { + __u32 nonce; + __u16 pcr_quote_len; + __u16 pub_data_len; + __u16 certificate_len; + __u8 pcr_num_reg; + __u8 pcr_reg_len; + __u8 quote_sig_len; + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; + __u8 pad0[2]; +}; + enum gaudi_dcores { HL_GAUDI_WS_DCORE, HL_GAUDI_WN_DCORE, @@ -1158,6 +1199,7 @@ enum gaudi_dcores { * driver. It is possible for the user to allocate buffer larger than * needed, hence updating this variable so user will know the exact amount * of bytes copied by the kernel to the buffer. + * @sec_attest_nonce: Nonce number used for attestation report. * @pad: Padding to 64 bit. */ struct hl_info_args { @@ -1172,6 +1214,7 @@ struct hl_info_args { __u32 pll_index; __u32 eventfd; __u32 user_buffer_actual_size; + __u32 sec_attest_nonce; }; __u32 pad; -- cgit v1.2.3 From 006fd8cb659bb02789dc1ec48836fff5f348ba8d Mon Sep 17 00:00:00 2001 From: Li zeming Date: Mon, 19 Sep 2022 10:20:54 +0800 Subject: habanalabs/gaudi2: Remove unnecessary (void*) conversions The void pointer object can be directly assigned to different structure objects, it does not need to be cast. Signed-off-by: Li zeming Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c index b8b8b2dc2095..75c4bef7841c 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c @@ -9592,7 +9592,7 @@ static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb) { - struct hl_cb *cb = (struct hl_cb *) data; + struct hl_cb *cb = data; struct packet_msg_short *pkt; u32 value, ctl, pkt_size = sizeof(*pkt); @@ -9685,7 +9685,7 @@ static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt) static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop) { - struct hl_cb *cb = (struct hl_cb *) prop->data; + struct hl_cb *cb = prop->data; void *buf = (void *) (uintptr_t) (cb->kernel_address); u64 monitor_base, fence_addr = 0; @@ -9737,7 +9737,7 @@ static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_propert static void gaudi2_reset_sob(struct hl_device *hdev, void *data) { - struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; + struct hl_hw_sob *hw_sob = data; dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id); -- cgit v1.2.3 From 4f3ce5e0d0f85d6be0a2bc3a2aa75ba3b649c7c6 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 19 Sep 2022 11:30:03 +0300 Subject: habanalabs: failure to open device due to reset is debug level If the user wants to open the device, and the device is currently in reset, the user will get an error from the open(). We don't need to display an error in the dmesg for that as it is not a real error and we can spam the kernel log with this message. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 5250bfb6790e..112632afe7d5 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -161,7 +161,7 @@ int hl_device_open(struct inode *inode, struct file *filp) mutex_lock(&hdev->fpriv_list_lock); if (!hl_device_operational(hdev, &status)) { - dev_err_ratelimited(hdev->dev, + dev_dbg_ratelimited(hdev->dev, "Can't open %s because it is %s\n", dev_name(hdev->dev), hdev->status[status]); @@ -271,7 +271,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) mutex_lock(&hdev->fpriv_ctrl_list_lock); if (!hl_device_operational(hdev, NULL)) { - dev_err_ratelimited(hdev->dev_ctrl, + dev_dbg_ratelimited(hdev->dev_ctrl, "Can't open %s because it is disabled or in reset\n", dev_name(hdev->dev_ctrl)); rc = -EPERM; -- cgit v1.2.3 From bb677d527e14184d89012ce332128f3767fa9925 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Thu, 15 Sep 2022 11:10:56 +0300 Subject: habanalabs/gaudi2: allow user to flush PCIE by read In order for the user to flush PCIE he needs to read some register from PCIE block. The chosen register is SPECIAL_GLBL_SPARE_0 and hence needs to be unsecured. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi2/gaudi2_security.c | 7 +- .../include/gaudi2/asic_reg/gaudi2_regs.h | 1 + .../gaudi2/asic_reg/pcie_wrap_special_regs.h | 185 +++++++++++++++++++++ 3 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c index c4165db06db2..c6906fb14229 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c @@ -2559,6 +2559,10 @@ static const u32 gaudi2_pb_pcie[] = { mmPCIE_WRAP_BASE, }; +static const u32 gaudi2_pb_pcie_unsecured_regs[] = { + mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0, +}; + static const u32 gaudi2_pb_thermal_sensor0[] = { mmDCORE0_XFT_BASE, mmDCORE0_TSTDVS_BASE, @@ -3418,7 +3422,8 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev) rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA, HL_PB_SINGLE_INSTANCE, HL_PB_NA, gaudi2_pb_pcie, ARRAY_SIZE(gaudi2_pb_pcie), - NULL, HL_PB_NA); + gaudi2_pb_pcie_unsecured_regs, + ARRAY_SIZE(gaudi2_pb_pcie_unsecured_regs)); /* Thermal Sensor. * Skip when security is enabled in F/W, because the blocks are protected by privileged RR. diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h index bfda4223bdc8..6aa1b1412462 100644 --- a/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h +++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h @@ -132,6 +132,7 @@ #include "dcore0_mme_ctrl_lo_arch_tensor_a_regs.h" #include "dcore0_mme_ctrl_lo_arch_tensor_b_regs.h" #include "dcore0_mme_ctrl_lo_arch_tensor_cout_regs.h" +#include "pcie_wrap_special_regs.h" #include "pdma0_qm_masks.h" #include "pdma0_core_masks.h" diff --git a/drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h b/drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h new file mode 100644 index 000000000000..46558e7a7f63 --- /dev/null +++ b/drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2016-2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +/************************************ + ** This is an auto-generated file ** + ** DO NOT EDIT BELOW ** + ************************************/ + +#ifndef ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ +#define ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ + +/* + ***************************************** + * PCIE_WRAP_SPECIAL + * (Prototype: SPECIAL_REGS) + ***************************************** + */ + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_0 0x4C01E80 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_1 0x4C01E84 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_2 0x4C01E88 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_3 0x4C01E8C + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_4 0x4C01E90 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_5 0x4C01E94 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_6 0x4C01E98 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_7 0x4C01E9C + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_8 0x4C01EA0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_9 0x4C01EA4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_10 0x4C01EA8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_11 0x4C01EAC + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_12 0x4C01EB0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_13 0x4C01EB4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_14 0x4C01EB8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_15 0x4C01EBC + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_16 0x4C01EC0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_17 0x4C01EC4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_18 0x4C01EC8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_19 0x4C01ECC + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_20 0x4C01ED0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_21 0x4C01ED4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_22 0x4C01ED8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_23 0x4C01EDC + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_24 0x4C01EE0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_25 0x4C01EE4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_26 0x4C01EE8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_27 0x4C01EEC + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_28 0x4C01EF0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_29 0x4C01EF4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_30 0x4C01EF8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_31 0x4C01EFC + +#define mmPCIE_WRAP_SPECIAL_MEM_GW_DATA 0x4C01F00 + +#define mmPCIE_WRAP_SPECIAL_MEM_GW_REQ 0x4C01F04 + +#define mmPCIE_WRAP_SPECIAL_MEM_NUMOF 0x4C01F0C + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_SEL 0x4C01F10 + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_CTL 0x4C01F14 + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_MASK 0x4C01F18 + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_GLBL_ERR_MASK 0x4C01F1C + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_STS 0x4C01F20 + +#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_ADDR 0x4C01F24 + +#define mmPCIE_WRAP_SPECIAL_MEM_RM 0x4C01F28 + +#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_MASK 0x4C01F40 + +#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_ADDR 0x4C01F44 + +#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_CAUSE 0x4C01F48 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0 0x4C01F60 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_1 0x4C01F64 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_2 0x4C01F68 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_3 0x4C01F6C + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_0 0x4C01F80 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_1 0x4C01F84 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_2 0x4C01F88 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_3 0x4C01F8C + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_4 0x4C01F90 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_5 0x4C01F94 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_6 0x4C01F98 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_7 0x4C01F9C + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_8 0x4C01FA0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_9 0x4C01FA4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_10 0x4C01FA8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_11 0x4C01FAC + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_12 0x4C01FB0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_13 0x4C01FB4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_14 0x4C01FB8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_15 0x4C01FBC + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_16 0x4C01FC0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_17 0x4C01FC4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_18 0x4C01FC8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_19 0x4C01FCC + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_20 0x4C01FD0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_21 0x4C01FD4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_22 0x4C01FD8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_23 0x4C01FDC + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_24 0x4C01FE0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_25 0x4C01FE4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_26 0x4C01FE8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_27 0x4C01FEC + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_28 0x4C01FF0 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_29 0x4C01FF4 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_30 0x4C01FF8 + +#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_31 0x4C01FFC + +#endif /* ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ */ -- cgit v1.2.3 From 8412bb69ed789464adadf7f0906971c7be29e204 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Thu, 15 Sep 2022 09:19:03 +0300 Subject: habanalabs: build ASICs from new to old Newer ASICs code changes more often, has more chance to fail compilation. So, let's compile them first so errors in those files will fail compilation sooner. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile index b35d7000c86b..a48a9e0969ed 100644 --- a/drivers/misc/habanalabs/Makefile +++ b/drivers/misc/habanalabs/Makefile @@ -8,13 +8,13 @@ obj-$(CONFIG_HABANA_AI) := habanalabs.o include $(src)/common/Makefile habanalabs-y += $(HL_COMMON_FILES) -include $(src)/goya/Makefile -habanalabs-y += $(HL_GOYA_FILES) +include $(src)/gaudi2/Makefile +habanalabs-y += $(HL_GAUDI2_FILES) include $(src)/gaudi/Makefile habanalabs-y += $(HL_GAUDI_FILES) -include $(src)/gaudi2/Makefile -habanalabs-y += $(HL_GAUDI2_FILES) +include $(src)/goya/Makefile +habanalabs-y += $(HL_GOYA_FILES) habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o -- cgit v1.2.3 From 6b9b9e244fdd0d6c5ee21b7b9d74282d9e43733a Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Tue, 30 Aug 2022 13:01:03 +0300 Subject: habanalabs: remove some f/w descriptor validations To be forward-backward compatible with the firmware in the initial communication during preboot, we need to remove the validation of the header size. This will allow us to add more fields to the lkd_fw_comms_desc structure. Instead of the validation of the header size, we just print warning when some mismatch in descriptor has been revealed, and we calculate the CRC base on descriptor size reported by the firmware instead of calculating it ourselves. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 43 +++++++++------------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 26a7529083e1..2de6a9bd564d 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -1900,50 +1900,36 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev, u64 addr; int rc; - if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC) { - dev_err(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n", + if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC) + dev_warn(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n", fw_desc->header.magic); - return -EIO; - } - if (fw_desc->header.version != HL_COMMS_DESC_VER) { - dev_err(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n", + if (fw_desc->header.version != HL_COMMS_DESC_VER) + dev_warn(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n", fw_desc->header.version); - return -EIO; - } /* - * calc CRC32 of data without header. + * Calc CRC32 of data without header. use the size of the descriptor + * reported by firmware, without calculating it ourself, to allow adding + * more fields to the lkd_fw_comms_desc structure. * note that no alignment/stride address issues here as all structures - * are 64 bit padded + * are 64 bit padded. */ - data_size = sizeof(struct lkd_fw_comms_desc) - - sizeof(struct comms_desc_header); data_ptr = (u8 *)fw_desc + sizeof(struct comms_desc_header); - - if (le16_to_cpu(fw_desc->header.size) != data_size) { - dev_err(hdev->dev, - "Invalid descriptor size 0x%x, expected size 0x%zx\n", - le16_to_cpu(fw_desc->header.size), data_size); - return -EIO; - } + data_size = le16_to_cpu(fw_desc->header.size); data_crc32 = hl_fw_compat_crc32(data_ptr, data_size); - if (data_crc32 != le32_to_cpu(fw_desc->header.crc32)) { - dev_err(hdev->dev, - "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n", - data_crc32, fw_desc->header.crc32); + dev_err(hdev->dev, "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n", + data_crc32, fw_desc->header.crc32); return -EIO; } /* find memory region to which to copy the image */ addr = le64_to_cpu(fw_desc->img_addr); region_id = hl_get_pci_memory_region(hdev, addr); - if ((region_id != PCI_REGION_SRAM) && - ((region_id != PCI_REGION_DRAM))) { - dev_err(hdev->dev, - "Invalid region to copy FW image address=%llx\n", addr); + if ((region_id != PCI_REGION_SRAM) && ((region_id != PCI_REGION_DRAM))) { + dev_err(hdev->dev, "Invalid region to copy FW image address=%llx\n", addr); return -EIO; } @@ -1960,8 +1946,7 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev, fw_loader->dynamic_loader.fw_image_size, region); if (rc) { - dev_err(hdev->dev, - "invalid mem transfer request for FW image\n"); + dev_err(hdev->dev, "invalid mem transfer request for FW image\n"); return rc; } -- cgit v1.2.3 From e403856468456aeaff68a5cb0a851d945c133ed9 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 19 Sep 2022 18:46:03 +0300 Subject: habanalabs/gaudi: use 8KB aligned address for TPC kernels I$ prefetch is enabled when sending a TPC kernel to initialize the TPC memory, and it has a restriction that the base address will be aligned to 8KB. Currently the base address is 128 bytes from the start address of the device SRAM, so prefetching will start 128 bytes before the actual kernel memory. Modify the kernel address to be 8KB aligned. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index e80ebace49c8..92560414e843 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -984,9 +984,10 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev, init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); - dst_addr = (prop->sram_user_base_address & - GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> - GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; + + /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ + dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, + round_up(prop->sram_user_base_address, SZ_8K)); init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); -- cgit v1.2.3 From 259cee1c2422bcff7ba6bb4e8179faadb52ebdee Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 20 Sep 2022 00:08:40 +0300 Subject: habanalabs: eliminate aggregate use warning When doing sizeof() and giving as argument a dereference of a pointer-to-a-pointer object, clang will issue a warning. Eliminate the warning by passing struct * Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 4 ++-- drivers/misc/habanalabs/common/device.c | 4 ++-- drivers/misc/habanalabs/common/hw_queue.c | 4 +--- drivers/misc/habanalabs/common/hwmon.c | 3 ++- drivers/misc/habanalabs/common/memory.c | 3 +-- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index fbe5003191bf..fa05770865c6 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -2571,7 +2571,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_com ktime_t max_ktime, first_cs_time; enum hl_cs_wait_status status; - memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr)); + memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *)); /* get all fences under the same lock */ rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); @@ -2873,7 +2873,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) } /* allocate array for the fences */ - fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL); + fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL); if (!fence_arr) { rc = -ENOMEM; goto free_seq_arr; diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index c6a00bb259fb..233d8b46c831 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -719,7 +719,7 @@ static int device_early_init(struct hl_device *hdev) if (hdev->asic_prop.completion_queues_count) { hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, - sizeof(*hdev->cq_wq), + sizeof(struct workqueue_struct *), GFP_KERNEL); if (!hdev->cq_wq) { rc = -ENOMEM; @@ -1863,7 +1863,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) } hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs, - sizeof(*hdev->shadow_cs_queue), GFP_KERNEL); + sizeof(struct hl_cs *), GFP_KERNEL); if (!hdev->shadow_cs_queue) { rc = -ENOMEM; goto cq_fini; diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 3f15ab9d827f..d0087c0ec48c 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -826,9 +826,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, q->kernel_address = p; - q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, - sizeof(*q->shadow_queue), - GFP_KERNEL); + q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL); if (!q->shadow_queue) { dev_err(hdev->dev, "Failed to allocate shadow queue for H/W queue %d\n", diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c index 8c262aeb425e..55eb0203817f 100644 --- a/drivers/misc/habanalabs/common/hwmon.c +++ b/drivers/misc/habanalabs/common/hwmon.c @@ -194,7 +194,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen curr_arr[sensors_by_type_next_index[type]++] = flags; } - channels_info = kcalloc(num_active_sensor_types + 1, sizeof(*channels_info), GFP_KERNEL); + channels_info = kcalloc(num_active_sensor_types + 1, sizeof(struct hwmon_channel_info *), + GFP_KERNEL); if (!channels_info) { rc = -ENOMEM; goto channels_info_array_err; diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 096fa3c1ae95..ef28f3b37b93 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -2308,8 +2308,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, return -EFAULT; } - userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages), - GFP_KERNEL); + userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); if (!userptr->pages) return -ENOMEM; -- cgit v1.2.3