From 467cfe945656df044c8cf9121e5cdbe5b977b497 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Mon, 19 Feb 2024 13:43:55 +0200 Subject: accel/habanalabs/gaudi2: align embedded specs headers Align embedded headers to latest release. Reviewed-by: Tomer Tayar Signed-off-by: Ofir Bitton --- include/linux/habanalabs/cpucp_if.h | 10 ++++++++-- include/linux/habanalabs/hl_boot_if.h | 29 ++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index f316c8d0f3fc..1ac1d68193e3 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -42,6 +42,12 @@ enum eq_event_id { EQ_EVENT_PWR_BRK_ENTRY, EQ_EVENT_PWR_BRK_EXIT, EQ_EVENT_HEARTBEAT, + EQ_EVENT_CPLD_RESET_REASON, + EQ_EVENT_CPLD_SHUTDOWN, + EQ_EVENT_POWER_EVT_START, + EQ_EVENT_POWER_EVT_END, + EQ_EVENT_THERMAL_EVT_START, + EQ_EVENT_THERMAL_EVT_END, }; /* @@ -1165,7 +1171,7 @@ struct cpucp_security_info { struct cpucp_info { struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; __u8 kernel_version[VERSION_MAX_LEN]; - __le32 reserved; + __le32 reserved1; __le32 card_type; __le32 card_location; __le32 cpld_version; @@ -1187,7 +1193,7 @@ struct cpucp_info { __u8 substrate_version; __u8 eq_health_check_supported; struct cpucp_security_info sec_info; - __le32 fw_hbm_region_size; + __le32 reserved2; __u8 pll_map[PLL_MAP_LEN]; __le64 mme_binning_mask; __u8 fw_os_version[VERSION_MAX_LEN]; diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h index 93366d5621fd..d2a9fc96424b 100644 --- a/include/linux/habanalabs/hl_boot_if.h +++ b/include/linux/habanalabs/hl_boot_if.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2018-2020 HabanaLabs, Ltd. + * Copyright 2018-2023 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -49,7 +49,6 @@ enum cpu_boot_err { #define CPU_BOOT_ERR_FATAL_MASK \ ((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) | \ (1 << CPU_BOOT_ERR_PLL_FAIL) | \ - (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) | \ (1 << CPU_BOOT_ERR_BINNING_FAIL) | \ (1 << CPU_BOOT_ERR_DRAM_SKIPPED) | \ (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) | \ @@ -194,6 +193,8 @@ enum cpu_boot_dev_sts { CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24, CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25, CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26, + CPU_BOOT_DEV_STS_NIC_MEM_CLEAR_EN = 27, + CPU_BOOT_DEV_STS_MMU_PGTBL_DRAM_EN = 28, CPU_BOOT_DEV_STS_ENABLED = 31, CPU_BOOT_DEV_STS_SCND_EN = 63, CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */ @@ -331,6 +332,17 @@ enum cpu_boot_dev_sts { * HWMON enum mapping to cpucp enums. * Initialized in: linux * + * CPU_BOOT_DEV_STS0_NIC_MEM_CLEAR_EN + * If set, means f/w supports nic hbm memory clear and + * tmr,txs hbm memory init. + * Initialized in: zephyr-mgmt + * + * CPU_BOOT_DEV_STS_MMU_PGTBL_DRAM_EN + * MMU page tables are located in DRAM. + * F/W initializes security settings for MMU + * page tables to reside in DRAM. + * Initialized in: zephyr-mgmt + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -367,6 +379,8 @@ enum cpu_boot_dev_sts { #define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN) #define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN) #define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN) +#define CPU_BOOT_DEV_STS0_NIC_MEM_CLEAR_EN (1 << CPU_BOOT_DEV_STS_NIC_MEM_CLEAR_EN) +#define CPU_BOOT_DEV_STS0_MMU_PGTBL_DRAM_EN (1 << CPU_BOOT_DEV_STS_MMU_PGTBL_DRAM_EN) #define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) #define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) @@ -450,11 +464,11 @@ struct cpu_dyn_regs { __le32 gic_dma_core_irq_ctrl; __le32 gic_host_halt_irq; __le32 gic_host_ints_irq; - __le32 gic_host_soft_rst_irq; + __le32 reserved0; __le32 gic_rot_qm_irq_ctrl; - __le32 cpu_rst_status; + __le32 reserved1; __le32 eng_arc_irq_ctrl; - __le32 reserved1[20]; /* reserve for future use */ + __le32 reserved2[20]; /* reserve for future use */ }; /* TODO: remove the desc magic after the code is updated to use message */ @@ -551,8 +565,9 @@ enum lkd_fw_ascii_msg_lvls { LKD_FW_ASCII_MSG_DBG = 3, }; -#define LKD_FW_ASCII_MSG_MAX_LEN 128 -#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */ +#define LKD_FW_ASCII_MSG_MAX_LEN 128 +#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */ +#define LKD_FW_ASCII_MSG_MIN_DESC_VERSION 3 struct lkd_fw_ascii_msg { __u8 valid; -- cgit v1.2.3 From c2a27584ff3b7f17da3ed2abfa3956f9d376e3da Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 11 Mar 2024 18:31:10 +0200 Subject: accel/habanalabs: separate nonce from max_size in cpucp_packet struct In struct cpucp_packet both nonce and data_max_size members are in an union overlapping each other. This is a problem as they both are used in attestation and info_signed packets. The solution here is to move the nonce member to a different union under the same structure. Signed-off-by: Dani Liberman Reviewed-by: Ofir Bitton Signed-off-by: Ofir Bitton --- include/linux/habanalabs/cpucp_if.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 1ac1d68193e3..0913415243e8 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -859,9 +859,6 @@ struct cpucp_packet { * result cannot be used to hold general purpose data. */ __le32 status_mask; - - /* random, used once number, for security packets */ - __le32 nonce; }; union { @@ -870,6 +867,9 @@ struct cpucp_packet { /* For Generic packet sub index */ __le32 pkt_subidx; + + /* random, used once number, for security packets */ + __le32 nonce; }; }; -- cgit v1.2.3 From cebb64f9335b073cc2877fed16c613c56bed82bc Mon Sep 17 00:00:00 2001 From: Vitaly Margolin Date: Mon, 27 May 2024 17:25:37 +0300 Subject: accel/habanalabs: add cpld ts cpld_timestamp cpucp Add cpld_timestamp field to cpucp_info structure and return cpld timestamp as part of cpld version Signed-off-by: Vitaly Margolin Reviewed-by: Ofir Bitton Signed-off-by: Ofir Bitton --- drivers/accel/habanalabs/common/sysfs.c | 5 +++-- include/linux/habanalabs/cpucp_if.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index b6c63f8a0c1b..e9f8ccc0bbf9 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -142,8 +142,9 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "0x%08x\n", - le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_version)); + return sprintf(buf, "0x%08x%08x\n", + le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_timestamp), + le32_to_cpu(hdev->asic_prop.cpucp_info.cpld_version)); } static ssize_t cpucp_kernel_ver_show(struct device *dev, diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 0913415243e8..1ed17887f1a8 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -1146,6 +1146,7 @@ struct cpucp_security_info { * (0 = fully functional, 1 = lower-half is not functional, * 2 = upper-half is not functional) * @sec_info: security information + * @cpld_timestamp: CPLD programmed F/W timestamp. * @pll_map: Bit map of supported PLLs for current ASIC version. * @mme_binning_mask: MME binning mask, * bits [0:6] <==> dcore0 mme fma @@ -1193,7 +1194,7 @@ struct cpucp_info { __u8 substrate_version; __u8 eq_health_check_supported; struct cpucp_security_info sec_info; - __le32 reserved2; + __le32 cpld_timestamp; __u8 pll_map[PLL_MAP_LEN]; __le64 mme_binning_mask; __u8 fw_os_version[VERSION_MAX_LEN]; -- cgit v1.2.3 From c4548eee537ed7671170eb370c6c8db12dcfd3e8 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 16 Apr 2024 19:01:42 +0300 Subject: accel/habanalabs: dump the EQ entries headers on EQ heartbeat failure Add a dump of the EQ entries headers upon a EQ heartbeat failure. Signed-off-by: Tomer Tayar Reviewed-by: Ofir Bitton Signed-off-by: Ofir Bitton --- drivers/accel/habanalabs/common/device.c | 2 ++ drivers/accel/habanalabs/common/habanalabs.h | 1 + drivers/accel/habanalabs/common/irq.c | 25 +++++++++++++++++++++++++ include/linux/habanalabs/cpucp_if.h | 3 +++ 4 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 3efc26dd9497..7bd7c2eb5dd2 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -1082,6 +1082,8 @@ static bool hl_device_eq_heartbeat_received(struct hl_device *hdev) atomic_read(&hdev->kernel_queues[cpu_q_id].ci), atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask); + hl_eq_dump(hdev, &hdev->event_queue); + return false; } diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index f4ac5e9b1974..ce78b331e244 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3754,6 +3754,7 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q); void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q); void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q); void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q); +void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q); irqreturn_t hl_irq_handler_cq(int irq, void *arg); irqreturn_t hl_irq_handler_eq(int irq, void *arg); irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg); diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c index 2caf2df4de08..7c9f2f6a2870 100644 --- a/drivers/accel/habanalabs/common/irq.c +++ b/drivers/accel/habanalabs/common/irq.c @@ -697,3 +697,28 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) memset(q->kernel_address, 0, q->size); } + +void hl_eq_dump(struct hl_device *hdev, struct hl_eq *q) +{ + u32 eq_length, eqe_size, ctl, ready, mode, type, index; + struct hl_eq_header *hdr; + u8 *ptr; + int i; + + eq_length = HL_EQ_LENGTH; + eqe_size = q->size / HL_EQ_LENGTH; + + dev_info(hdev->dev, "Contents of EQ entries headers:\n"); + + for (i = 0, ptr = q->kernel_address ; i < eq_length ; ++i, ptr += eqe_size) { + hdr = (struct hl_eq_header *) ptr; + ctl = le32_to_cpu(hdr->ctl); + ready = FIELD_GET(EQ_CTL_READY_MASK, ctl); + mode = FIELD_GET(EQ_CTL_EVENT_MODE_MASK, ctl); + type = FIELD_GET(EQ_CTL_EVENT_TYPE_MASK, ctl); + index = FIELD_GET(EQ_CTL_INDEX_MASK, ctl); + + dev_info(hdev->dev, "%02u: %#010x [ready: %u, mode %u, type %04u, index %05u]\n", + i, ctl, ready, mode, type, index); + } +} diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 1ed17887f1a8..7ed3fdd55dda 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -397,6 +397,9 @@ struct hl_eq_entry { #define EQ_CTL_READY_SHIFT 31 #define EQ_CTL_READY_MASK 0x80000000 +#define EQ_CTL_EVENT_MODE_SHIFT 28 +#define EQ_CTL_EVENT_MODE_MASK 0x70000000 + #define EQ_CTL_EVENT_TYPE_SHIFT 16 #define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000 -- cgit v1.2.3