From 0a068adde505a90ece23caaf19b77567e1d18298 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 21 Jul 2020 10:49:51 +0300 Subject: habanalabs: add information about PCIe controller Update firmware header with new API for getting pcie info such as tx/rx throughput and replay counter. These counters are needed by customers for monitor and maintenance of multiple devices. Add new opcodes to the INFO ioctl to retrieve these counters. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 33cd2ae653d2..954f2c022d33 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4580,18 +4580,22 @@ static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type) { switch (event_type) { case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S: + hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER; dev_info_ratelimited(hdev->dev, "Clock throttling due to power consumption\n"); break; case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E: + hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER; dev_info_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n"); break; case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S: + hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL; dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n"); break; case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E: + hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL; dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n"); break; -- cgit v1.2.3 From d6b045c083842d86a749357210c3006a36e3b66f Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Thu, 6 Aug 2020 09:20:49 +0300 Subject: habanalabs: print the queue id in case of an error If there is a failure during the testing of a queue, to ease up debugging - print the queue id. Signed-off-by: Dotan Barak Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 9 ++++++--- drivers/misc/habanalabs/goya/goya.c | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 45ba3a5f5b14..89d44cd82a27 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3448,7 +3448,8 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) &fence_dma_addr); if (!fence_ptr) { dev_err(hdev->dev, - "Failed to allocate memory for queue testing\n"); + "Failed to allocate memory for H/W queue %d testing\n", + hw_queue_id); return -ENOMEM; } @@ -3459,7 +3460,8 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) GFP_KERNEL, &pkt_dma_addr); if (!fence_pkt) { dev_err(hdev->dev, - "Failed to allocate packet for queue testing\n"); + "Failed to allocate packet for H/W queue %d testing\n", + hw_queue_id); rc = -ENOMEM; goto free_fence_ptr; } @@ -3476,7 +3478,8 @@ static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) pkt_dma_addr); if (rc) { dev_err(hdev->dev, - "Failed to send fence packet\n"); + "Failed to send fence packet to H/W queue %d\n", + hw_queue_id); goto free_pkt; } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 954f2c022d33..ac4d44fa56e4 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2946,7 +2946,8 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) &fence_dma_addr); if (!fence_ptr) { dev_err(hdev->dev, - "Failed to allocate memory for queue testing\n"); + "Failed to allocate memory for H/W queue %d testing\n", + hw_queue_id); return -ENOMEM; } @@ -2957,7 +2958,8 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) GFP_KERNEL, &pkt_dma_addr); if (!fence_pkt) { dev_err(hdev->dev, - "Failed to allocate packet for queue testing\n"); + "Failed to allocate packet for H/W queue %d testing\n", + hw_queue_id); rc = -ENOMEM; goto free_fence_ptr; } @@ -2974,7 +2976,8 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) pkt_dma_addr); if (rc) { dev_err(hdev->dev, - "Failed to send fence packet\n"); + "Failed to send fence packet to H/W queue %d\n", + hw_queue_id); goto free_pkt; } -- cgit v1.2.3 From d90416c84d86ff78a2181f135d72d564430107b8 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Wed, 12 Aug 2020 17:20:13 +0300 Subject: habanalabs: extend busy engines mask to 64 bits change busy engines bitmask to 64 bits in order to represent more engines, needed for future ASIC support. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 2 +- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 +- drivers/misc/habanalabs/gaudi/gaudi.c | 2 +- drivers/misc/habanalabs/goya/goya.c | 2 +- include/uapi/misc/habanalabs.h | 6 ++++++ 5 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 2fd268e4cf10..fbdf105c4bb2 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -754,7 +754,7 @@ struct hl_asic_funcs { void (*set_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, void *data); - bool (*is_device_idle)(struct hl_device *hdev, u32 *mask, + bool (*is_device_idle)(struct hl_device *hdev, u64 *mask, struct seq_file *s); int (*soft_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index fe6c5534d378..a94800014243 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -132,7 +132,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) return -EINVAL; hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, - &hw_idle.busy_engines_mask, NULL); + &hw_idle.busy_engines_mask_ext, NULL); return copy_to_user(out, &hw_idle, min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index a0932261e67c..ba964a316b0b 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6083,7 +6083,7 @@ static int gaudi_armcp_info_get(struct hl_device *hdev) return 0; } -static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask, +static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask, struct seq_file *s) { struct gaudi_device *gaudi = hdev->asic_specific; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index ac4d44fa56e4..5fb3565c80c5 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5148,7 +5148,7 @@ static void goya_disable_clock_gating(struct hl_device *hdev) /* clock gating not supported in Goya */ } -static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, +static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask, struct seq_file *s) { const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n"; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index ca6dc1fc250e..693081728ef3 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -319,6 +319,12 @@ struct hl_info_hw_idle { * Bits definition is according to `enum _enging_id'. */ __u32 busy_engines_mask; + + /* + * Extended Bitmask of busy engines. + * Bits definition is according to `enum _enging_id'. + */ + __u64 busy_engines_mask_ext; }; struct hl_info_device_status { -- cgit v1.2.3 From 2f55342c5e4d3ea94c0b8237f3ad26963269f90f Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 15 Aug 2020 16:28:10 +0300 Subject: habanalabs: replace armcp with the generic cpucp ArmCP mandates that the device CPU is always an ARM processor, which might be wrong in the future. Most of this change is an internal renaming of variables, functions and defines but there are two entries in sysfs which have armcp in their names. Add identical cpucp entries but don't remove yet the armcp entries. Those will be deprecated next year. Add the documentation about it in sysfs documentation. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- Documentation/ABI/testing/sysfs-driver-habanalabs | 18 +- drivers/misc/habanalabs/common/debugfs.c | 18 +- drivers/misc/habanalabs/common/device.c | 2 +- drivers/misc/habanalabs/common/firmware_if.c | 127 ++++--- drivers/misc/habanalabs/common/habanalabs.h | 20 +- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 12 +- drivers/misc/habanalabs/common/hwmon.c | 60 ++-- drivers/misc/habanalabs/common/irq.c | 2 +- drivers/misc/habanalabs/common/sysfs.c | 60 ++-- drivers/misc/habanalabs/gaudi/gaudi.c | 30 +- drivers/misc/habanalabs/gaudi/gaudiP.h | 4 +- drivers/misc/habanalabs/goya/goya.c | 34 +- drivers/misc/habanalabs/goya/goyaP.h | 2 +- drivers/misc/habanalabs/include/common/armcp_if.h | 418 ---------------------- drivers/misc/habanalabs/include/common/cpucp_if.h | 417 +++++++++++++++++++++ include/uapi/misc/habanalabs.h | 4 +- 16 files changed, 631 insertions(+), 597 deletions(-) delete mode 100644 drivers/misc/habanalabs/include/common/armcp_if.h create mode 100644 drivers/misc/habanalabs/include/common/cpucp_if.h (limited to 'drivers/misc/habanalabs/goya') diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs index 1a14bf9b22ba..169ae4b2a180 100644 --- a/Documentation/ABI/testing/sysfs-driver-habanalabs +++ b/Documentation/ABI/testing/sysfs-driver-habanalabs @@ -2,13 +2,17 @@ What: /sys/class/habanalabs/hl/armcp_kernel_ver Date: Jan 2019 KernelVersion: 5.1 Contact: oded.gabbay@gmail.com -Description: Version of the Linux kernel running on the device's CPU +Description: Version of the Linux kernel running on the device's CPU. + Will be DEPRECATED in Linux kernel version 5.10, and be + replaced with cpucp_kernel_ver What: /sys/class/habanalabs/hl/armcp_ver Date: Jan 2019 KernelVersion: 5.1 Contact: oded.gabbay@gmail.com Description: Version of the application running on the device's CPU + Will be DEPRECATED in Linux kernel version 5.10, and be + replaced with cpucp_ver What: /sys/class/habanalabs/hl/clk_max_freq_mhz Date: Jun 2019 @@ -33,6 +37,18 @@ KernelVersion: 5.1 Contact: oded.gabbay@gmail.com Description: Version of the Device's CPLD F/W +What: /sys/class/habanalabs/hl/cpucp_kernel_ver +Date: Oct 2020 +KernelVersion: 5.10 +Contact: oded.gabbay@gmail.com +Description: Version of the Linux kernel running on the device's CPU + +What: /sys/class/habanalabs/hl/cpucp_ver +Date: Oct 2020 +KernelVersion: 5.10 +Contact: oded.gabbay@gmail.com +Description: Version of the application running on the device's CPU + What: /sys/class/habanalabs/hl/device_type Date: Jan 2019 KernelVersion: 5.1 diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 4b416f64f6ec..c27c0f94c97a 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -21,7 +21,7 @@ static struct dentry *hl_debug_root; static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, u8 i2c_reg, long *val) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; if (hl_device_disabled_or_in_reset(hdev)) @@ -29,8 +29,8 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.i2c_bus = i2c_bus; pkt.i2c_addr = i2c_addr; pkt.i2c_reg = i2c_reg; @@ -47,7 +47,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, u8 i2c_reg, u32 val) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; if (hl_device_disabled_or_in_reset(hdev)) @@ -55,8 +55,8 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.i2c_bus = i2c_bus; pkt.i2c_addr = i2c_addr; pkt.i2c_reg = i2c_reg; @@ -73,7 +73,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; if (hl_device_disabled_or_in_reset(hdev)) @@ -81,8 +81,8 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_LED_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.led_index = cpu_to_le32(led); pkt.value = cpu_to_le64(state); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index dcb7f9ca7a67..6e916cc22a4c 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -871,7 +871,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, * so this message won't be sent */ if (hl_fw_send_pci_access_msg(hdev, - ARMCP_PACKET_DISABLE_PCI_ACCESS)) + CPUCP_PACKET_DISABLE_PCI_ACCESS)) dev_warn(hdev->dev, "Failed to disable PCI access by F/W\n"); } diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index eb66ff532c6a..f2a38e95359a 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -68,9 +68,9 @@ out: int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) { - struct armcp_packet pkt = {}; + struct cpucp_packet pkt = {}; - pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT); return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); @@ -79,7 +79,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, u16 len, u32 timeout, long *result) { - struct armcp_packet *pkt; + struct cpucp_packet *pkt; dma_addr_t pkt_dma_addr; u32 tmp; int rc = 0; @@ -111,7 +111,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, } rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, - (tmp == ARMCP_PACKET_FENCE_VAL), 1000, + (tmp == CPUCP_PACKET_FENCE_VAL), 1000, timeout, true); hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); @@ -124,12 +124,12 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, tmp = le32_to_cpu(pkt->ctl); - rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT; + rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT; if (rc) { dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, - (tmp & ARMCP_PKT_CTL_OPCODE_MASK) - >> ARMCP_PKT_CTL_OPCODE_SHIFT); + (tmp & CPUCP_PKT_CTL_OPCODE_MASK) + >> CPUCP_PKT_CTL_OPCODE_SHIFT); rc = -EIO; } else if (result) { *result = (long) le64_to_cpu(pkt->result); @@ -145,14 +145,14 @@ out: int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) { - struct armcp_packet pkt; + struct cpucp_packet pkt; long result; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(event_type); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -167,12 +167,12 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, size_t irq_arr_size) { - struct armcp_unmask_irq_arr_packet *pkt; + struct cpucp_unmask_irq_arr_packet *pkt; size_t total_pkt_size; long result; int rc; - total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) + + total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) + irq_arr_size; /* data should be aligned to 8 bytes in order to ArmCP to copy it */ @@ -191,8 +191,8 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0])); memcpy(&pkt->irqs, irq_arr, irq_arr_size); - pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << + CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, 0, &result); @@ -207,19 +207,19 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, int hl_fw_test_cpu_queue(struct hl_device *hdev) { - struct armcp_packet test_pkt = {}; + struct cpucp_packet test_pkt = {}; long result; int rc; - test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << - ARMCP_PKT_CTL_OPCODE_SHIFT); - test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); + test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << + CPUCP_PKT_CTL_OPCODE_SHIFT); + test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, sizeof(test_pkt), 0, &result); if (!rc) { - if (result != ARMCP_PACKET_FENCE_VAL) + if (result != CPUCP_PACKET_FENCE_VAL) dev_err(hdev->dev, "CPU queue test failed (0x%08lX)\n", result); } else { @@ -251,61 +251,61 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, int hl_fw_send_heartbeat(struct hl_device *hdev) { - struct armcp_packet hb_pkt = {}; + struct cpucp_packet hb_pkt = {}; long result; int rc; - hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST << - ARMCP_PKT_CTL_OPCODE_SHIFT); - hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); + hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST << + CPUCP_PKT_CTL_OPCODE_SHIFT); + hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, sizeof(hb_pkt), 0, &result); - if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) + if ((rc) || (result != CPUCP_PACKET_FENCE_VAL)) rc = -EIO; return rc; } -int hl_fw_armcp_info_get(struct hl_device *hdev) +int hl_fw_cpucp_info_get(struct hl_device *hdev) { struct asic_fixed_properties *prop = &hdev->asic_prop; - struct armcp_packet pkt = {}; - void *armcp_info_cpu_addr; - dma_addr_t armcp_info_dma_addr; + struct cpucp_packet pkt = {}; + void *cpucp_info_cpu_addr; + dma_addr_t cpucp_info_dma_addr; long result; int rc; - armcp_info_cpu_addr = + cpucp_info_cpu_addr = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, - sizeof(struct armcp_info), - &armcp_info_dma_addr); - if (!armcp_info_cpu_addr) { + sizeof(struct cpucp_info), + &cpucp_info_dma_addr); + if (!cpucp_info_cpu_addr) { dev_err(hdev->dev, "Failed to allocate DMA memory for ArmCP info packet\n"); return -ENOMEM; } - memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info)); + memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); - pkt.addr = cpu_to_le64(armcp_info_dma_addr); - pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info)); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(cpucp_info_dma_addr); + pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info)); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); + HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, "Failed to handle ArmCP info pkt, error %d\n", rc); goto out; } - memcpy(&prop->armcp_info, armcp_info_cpu_addr, - sizeof(prop->armcp_info)); + memcpy(&prop->cpucp_info, cpucp_info_cpu_addr, + sizeof(prop->cpucp_info)); - rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors); + rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors); if (rc) { dev_err(hdev->dev, "Failed to build hwmon channel info, error %d\n", rc); @@ -315,14 +315,14 @@ int hl_fw_armcp_info_get(struct hl_device *hdev) out: hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, - sizeof(struct armcp_info), armcp_info_cpu_addr); + sizeof(struct cpucp_info), cpucp_info_cpu_addr); return rc; } int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) { - struct armcp_packet pkt = {}; + struct cpucp_packet pkt = {}; void *eeprom_info_cpu_addr; dma_addr_t eeprom_info_dma_addr; long result; @@ -339,13 +339,13 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) memset(eeprom_info_cpu_addr, 0, max_size); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.addr = cpu_to_le64(eeprom_info_dma_addr); pkt.data_max_size = cpu_to_le32(max_size); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_EEPROM_TIMEOUT_USEC, &result); + HL_CPUCP_EEPROM_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, @@ -363,20 +363,20 @@ out: return rc; } -int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, +int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters) { - struct armcp_packet pkt = {}; + struct cpucp_packet pkt = {}; long result; int rc; - pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_THROUGHPUT_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); /* Fetch PCI rx counter */ - pkt.index = cpu_to_le32(armcp_pcie_throughput_rx); + pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); + HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, "Failed to handle ArmCP PCI info pkt, error %d\n", rc); @@ -385,9 +385,9 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, counters->rx_throughput = result; /* Fetch PCI tx counter */ - pkt.index = cpu_to_le32(armcp_pcie_throughput_tx); + pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); + HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, "Failed to handle ArmCP PCI info pkt, error %d\n", rc); @@ -396,11 +396,11 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, counters->tx_throughput = result; /* Fetch PCI replay counter */ - pkt.ctl = cpu_to_le32(ARMCP_PACKET_PCIE_REPLAY_CNT_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); + HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, "Failed to handle ArmCP PCI info pkt, error %d\n", rc); @@ -411,21 +411,20 @@ int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, return rc; } -int hl_fw_armcp_total_energy_get(struct hl_device *hdev, - u64 *total_energy) +int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) { - struct armcp_packet pkt = {}; + struct cpucp_packet pkt = {}; long result; int rc; - pkt.ctl = cpu_to_le32(ARMCP_PACKET_TOTAL_ENERGY_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_ARMCP_INFO_TIMEOUT_USEC, &result); + HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, - "Failed to handle ArmCP total energy pkt, error %d\n", + "Failed to handle CpuCP total energy pkt, error %d\n", rc); return rc; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 6577a73e3227..6912f88a4b01 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -8,7 +8,7 @@ #ifndef HABANALABSP_H_ #define HABANALABSP_H_ -#include "../include/common/armcp_if.h" +#include "../include/common/cpucp_if.h" #include "../include/common/qman_if.h" #include @@ -34,8 +34,8 @@ #define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */ -#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ -#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ +#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ #define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */ @@ -250,7 +250,7 @@ struct hl_mmu_properties { /** * struct asic_fixed_properties - ASIC specific immutable properties. * @hw_queues_props: H/W queues properties. - * @armcp_info: received various information from ArmCP regarding the H/W, e.g. + * @cpucp_info: received various information from CPU-CP regarding the H/W, e.g. * available sensors. * @uboot_ver: F/W U-boot version. * @preboot_ver: F/W Preboot version. @@ -301,7 +301,7 @@ struct hl_mmu_properties { */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; - struct armcp_info armcp_info; + struct cpucp_info cpucp_info; char uboot_ver[VERSION_MAX_LEN]; char preboot_ver[VERSION_MAX_LEN]; struct hl_mmu_properties dmmu; @@ -1588,7 +1588,7 @@ struct hl_device { u64 clock_gating_mask; atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; - enum armcp_card_types card_type; + enum cpucp_card_types card_type; int cs_active_cnt; u32 major; u32 high_pll; @@ -1776,7 +1776,7 @@ int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq); uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms); int hl_build_hwmon_channel_info(struct hl_device *hdev, - struct armcp_sensor *sensors_arr); + struct cpucp_sensor *sensors_arr); int hl_sysfs_init(struct hl_device *hdev); void hl_sysfs_fini(struct hl_device *hdev); @@ -1848,11 +1848,11 @@ void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); int hl_fw_send_heartbeat(struct hl_device *hdev); -int hl_fw_armcp_info_get(struct hl_device *hdev); +int hl_fw_cpucp_info_get(struct hl_device *hdev); int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size); -int hl_fw_armcp_pci_counters_get(struct hl_device *hdev, +int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); -int hl_fw_armcp_total_energy_get(struct hl_device *hdev, +int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 18ee14b4b0e1..07317ea49129 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -65,14 +65,14 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) hw_ip.dram_enabled = 1; hw_ip.num_of_events = prop->num_of_events; - memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version, + memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version, min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN)); - memcpy(hw_ip.card_name, prop->armcp_info.card_name, + memcpy(hw_ip.card_name, prop->cpucp_info.card_name, min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN)); - hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version); - hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location); + hw_ip.cpld_version = le32_to_cpu(prop->cpucp_info.cpld_version); + hw_ip.module_id = le32_to_cpu(prop->cpucp_info.card_location); hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; @@ -288,7 +288,7 @@ static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) if ((!max_size) || (!out)) return -EINVAL; - rc = hl_fw_armcp_pci_counters_get(hdev, &pci_counters); + rc = hl_fw_cpucp_pci_counters_get(hdev, &pci_counters); if (rc) return rc; @@ -369,7 +369,7 @@ static int total_energy_consumption_info(struct hl_fpriv *hpriv, if ((!max_size) || (!out)) return -EINVAL; - rc = hl_fw_armcp_total_energy_get(hdev, + rc = hl_fw_cpucp_total_energy_get(hdev, &total_energy.total_energy_consumption); if (rc) return rc; diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c index b997336fa75f..2ac29cb2fe61 100644 --- a/drivers/misc/habanalabs/common/hwmon.c +++ b/drivers/misc/habanalabs/common/hwmon.c @@ -13,7 +13,7 @@ #define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) int hl_build_hwmon_channel_info(struct hl_device *hdev, - struct armcp_sensor *sensors_arr) + struct cpucp_sensor *sensors_arr) { u32 counts[HWMON_NR_SENSOR_TYPES] = {0}; u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL}; @@ -24,7 +24,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, enum hwmon_sensor_types type; int rc, i, j; - for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) { + for (i = 0 ; i < CPUCP_MAX_SENSORS ; i++) { type = le32_to_cpu(sensors_arr[i].type); if ((type == 0) && (sensors_arr[i].flags == 0)) @@ -311,13 +311,13 @@ static const struct hwmon_ops hl_hwmon_ops = { int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -337,13 +337,13 @@ int hl_get_temperature(struct hl_device *hdev, int hl_set_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); pkt.value = __cpu_to_le64(value); @@ -362,13 +362,13 @@ int hl_set_temperature(struct hl_device *hdev, int hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -388,13 +388,13 @@ int hl_get_voltage(struct hl_device *hdev, int hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -414,13 +414,13 @@ int hl_get_current(struct hl_device *hdev, int hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FAN_SPEED_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -440,13 +440,13 @@ int hl_get_fan_speed(struct hl_device *hdev, int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); @@ -466,13 +466,13 @@ int hl_get_pwm_info(struct hl_device *hdev, void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); pkt.value = cpu_to_le64(value); @@ -489,13 +489,13 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); pkt.value = __cpu_to_le64(value); @@ -514,13 +514,13 @@ int hl_set_voltage(struct hl_device *hdev, int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.type = __cpu_to_le16(attr); pkt.value = __cpu_to_le64(value); @@ -549,7 +549,7 @@ int hl_hwmon_init(struct hl_device *hdev) hdev->hl_chip_info->ops = &hl_hwmon_ops; hdev->hwmon_dev = hwmon_device_register_with_info(dev, - prop->armcp_info.card_name, hdev, + prop->cpucp_info.card_name, hdev, hdev->hl_chip_info, NULL); if (IS_ERR(hdev->hwmon_dev)) { rc = PTR_ERR(hdev->hwmon_dev); diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index c8db717023f5..d20e40a53d70 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -11,7 +11,7 @@ /** * struct hl_eqe_work - This structure is used to schedule work of EQ - * entry and armcp_reset event + * entry and cpucp_reset event * * @eq_work: workqueue object to run when EQ entry is received * @hdev: pointer to device structure diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 5ae484cc84cd..3ceae87016b1 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -11,18 +11,18 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { - struct armcp_packet pkt; + struct cpucp_packet pkt; long result; int rc; memset(&pkt, 0, sizeof(pkt)); if (curr) - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); else - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.pll_index = cpu_to_le32(pll_index); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -40,13 +40,13 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.pll_index = cpu_to_le32(pll_index); pkt.value = cpu_to_le64(freq); @@ -61,14 +61,14 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) u64 hl_get_max_power(struct hl_device *hdev) { - struct armcp_packet pkt; + struct cpucp_packet pkt; long result; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET << + CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, &result); @@ -83,13 +83,13 @@ u64 hl_get_max_power(struct hl_device *hdev) void hl_set_max_power(struct hl_device *hdev) { - struct armcp_packet pkt; + struct cpucp_packet pkt; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(hdev->max_power); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -112,7 +112,7 @@ static ssize_t armcp_kernel_ver_show(struct device *dev, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version); + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version); } static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr, @@ -120,7 +120,7 @@ static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version); + return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version); } static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, @@ -129,7 +129,23 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, struct hl_device *hdev = dev_get_drvdata(dev); return sprintf(buf, "0x%08x\n", - hdev->asic_prop.armcp_info.cpld_version); + hdev->asic_prop.cpucp_info.cpld_version); +} + +static ssize_t cpucp_kernel_ver_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version); +} + +static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version); } static ssize_t infineon_ver_show(struct device *dev, @@ -138,7 +154,7 @@ static ssize_t infineon_ver_show(struct device *dev, struct hl_device *hdev = dev_get_drvdata(dev); return sprintf(buf, "0x%04x\n", - hdev->asic_prop.armcp_info.infineon_version); + hdev->asic_prop.cpucp_info.infineon_version); } static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, @@ -146,7 +162,7 @@ static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version); + return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.fuse_version); } static ssize_t thermal_ver_show(struct device *dev, @@ -154,7 +170,7 @@ static ssize_t thermal_ver_show(struct device *dev, { struct hl_device *hdev = dev_get_drvdata(dev); - return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version); + return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version); } static ssize_t preboot_btl_ver_show(struct device *dev, @@ -356,6 +372,8 @@ out: static DEVICE_ATTR_RO(armcp_kernel_ver); static DEVICE_ATTR_RO(armcp_ver); static DEVICE_ATTR_RO(cpld_ver); +static DEVICE_ATTR_RO(cpucp_kernel_ver); +static DEVICE_ATTR_RO(cpucp_ver); static DEVICE_ATTR_RO(device_type); static DEVICE_ATTR_RO(fuse_ver); static DEVICE_ATTR_WO(hard_reset); @@ -380,6 +398,8 @@ static struct attribute *hl_dev_attrs[] = { &dev_attr_armcp_kernel_ver.attr, &dev_attr_armcp_ver.attr, &dev_attr_cpld_ver.attr, + &dev_attr_cpucp_kernel_ver.attr, + &dev_attr_cpucp_ver.attr, &dev_attr_device_type.attr, &dev_attr_fuse_ver.attr, &dev_attr_hard_reset.attr, diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 4a4327d9cbbf..076a7697f85d 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -359,7 +359,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id); static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); -static int gaudi_armcp_info_get(struct hl_device *hdev); +static int gaudi_cpucp_info_get(struct hl_device *hdev); static void gaudi_disable_clock_gating(struct hl_device *hdev); static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); @@ -465,7 +465,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; - strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); prop->max_pending_cs = GAUDI_MAX_PENDING_CS; @@ -786,13 +786,13 @@ static int gaudi_late_init(struct hl_device *hdev) struct gaudi_device *gaudi = hdev->asic_specific; int rc; - rc = gaudi->armcp_info_get(hdev); + rc = gaudi->cpucp_info_get(hdev); if (rc) { - dev_err(hdev->dev, "Failed to get armcp info\n"); + dev_err(hdev->dev, "Failed to get cpucp info\n"); return rc; } - rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS); if (rc) { dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); return rc; @@ -817,7 +817,7 @@ static int gaudi_late_init(struct hl_device *hdev) return 0; disable_pci_access: - hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); return rc; } @@ -987,7 +987,7 @@ static int gaudi_sw_init(struct hl_device *hdev) } } - gaudi->armcp_info_get = gaudi_armcp_info_get; + gaudi->cpucp_info_get = gaudi_cpucp_info_get; gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ; @@ -3078,7 +3078,7 @@ static int gaudi_suspend(struct hl_device *hdev) { int rc; - rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); if (rc) dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); @@ -6053,7 +6053,7 @@ static int gaudi_send_heartbeat(struct hl_device *hdev) return hl_fw_send_heartbeat(hdev); } -static int gaudi_armcp_info_get(struct hl_device *hdev) +static int gaudi_cpucp_info_get(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -6062,19 +6062,19 @@ static int gaudi_armcp_info_get(struct hl_device *hdev) if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - rc = hl_fw_armcp_info_get(hdev); + rc = hl_fw_cpucp_info_get(hdev); if (rc) return rc; - if (!strlen(prop->armcp_info.card_name)) - strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME, + if (!strlen(prop->cpucp_info.card_name)) + strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); - hdev->card_type = le32_to_cpu(hdev->asic_prop.armcp_info.card_type); + hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); - if (hdev->card_type == armcp_card_type_pci) + if (hdev->card_type == cpucp_card_type_pci) prop->max_power_default = MAX_POWER_DEFAULT_PCI; - else if (hdev->card_type == armcp_card_type_pmc) + else if (hdev->card_type == cpucp_card_type_pmc) prop->max_power_default = MAX_POWER_DEFAULT_PMC; hdev->max_power = prop->max_power_default; diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 1368f6298c80..b86eb98b145c 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -216,7 +216,7 @@ struct gaudi_internal_qman_info { /** * struct gaudi_device - ASIC specific manage structure. - * @armcp_info_get: get information on device from ArmCP + * @cpucp_info_get: get information on device from CPU-CP * @hw_queues_lock: protects the H/W queues from concurrent access. * @clk_gate_mutex: protects code areas that require clock gating to be disabled * temporarily @@ -239,7 +239,7 @@ struct gaudi_internal_qman_info { * 8-bit value so use u8. */ struct gaudi_device { - int (*armcp_info_get)(struct hl_device *hdev); + int (*cpucp_info_get)(struct hl_device *hdev); /* TODO: remove hw_queues_lock after moving to scheduler code */ spinlock_t hw_queues_lock; diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 5fb3565c80c5..c41f2917863b 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -449,7 +449,7 @@ int goya_get_fixed_properties(struct hl_device *hdev) prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; - strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, + strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); prop->max_pending_cs = GOYA_MAX_PENDING_CS; @@ -727,9 +727,9 @@ int goya_late_init(struct hl_device *hdev) if (rc) return rc; - rc = goya_armcp_info_get(hdev); + rc = goya_cpucp_info_get(hdev); if (rc) { - dev_err(hdev->dev, "Failed to get armcp info %d\n", rc); + dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc); return rc; } @@ -739,7 +739,7 @@ int goya_late_init(struct hl_device *hdev) */ WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size)); - rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS); if (rc) { dev_err(hdev->dev, "Failed to enable PCI access from CPU %d\n", rc); @@ -2648,7 +2648,7 @@ int goya_suspend(struct hl_device *hdev) { int rc; - rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); + rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS); if (rc) dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); @@ -4500,14 +4500,14 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type, static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, size_t irq_arr_size) { - struct armcp_unmask_irq_arr_packet *pkt; + struct cpucp_unmask_irq_arr_packet *pkt; size_t total_pkt_size; long result; int rc; int irq_num_entries, irq_arr_index; __le32 *goya_irq_arr; - total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) + + total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) + irq_arr_size; /* data should be aligned to 8 bytes in order to ArmCP to copy it */ @@ -4534,8 +4534,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, goya_irq_arr[irq_arr_index] = cpu_to_le32(irq_arr[irq_arr_index]); - pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << + CPUCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, 0, &result); @@ -4560,14 +4560,14 @@ static int goya_soft_reset_late_init(struct hl_device *hdev) static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) { - struct armcp_packet pkt; + struct cpucp_packet pkt; long result; int rc; memset(&pkt, 0, sizeof(pkt)); - pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ << - ARMCP_PKT_CTL_OPCODE_SHIFT); + pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ << + CPUCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(event_type); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), @@ -5103,7 +5103,7 @@ int goya_send_heartbeat(struct hl_device *hdev) return hl_fw_send_heartbeat(hdev); } -int goya_armcp_info_get(struct hl_device *hdev) +int goya_cpucp_info_get(struct hl_device *hdev) { struct goya_device *goya = hdev->asic_specific; struct asic_fixed_properties *prop = &hdev->asic_prop; @@ -5113,11 +5113,11 @@ int goya_armcp_info_get(struct hl_device *hdev) if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) return 0; - rc = hl_fw_armcp_info_get(hdev); + rc = hl_fw_cpucp_info_get(hdev); if (rc) return rc; - dram_size = le64_to_cpu(prop->armcp_info.dram_size); + dram_size = le64_to_cpu(prop->cpucp_info.dram_size); if (dram_size) { if ((!is_power_of_2(dram_size)) || (dram_size < DRAM_PHYS_DEFAULT_SIZE)) { @@ -5131,8 +5131,8 @@ int goya_armcp_info_get(struct hl_device *hdev) prop->dram_end_address = prop->dram_base_address + dram_size; } - if (!strlen(prop->armcp_info.card_name)) - strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME, + if (!strlen(prop->cpucp_info.card_name)) + strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN); return 0; diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index bb7474ee9784..09b4006d4dc3 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -207,7 +207,7 @@ void goya_set_max_power(struct hl_device *hdev, u64 value); void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp); -int goya_armcp_info_get(struct hl_device *hdev); +int goya_cpucp_info_get(struct hl_device *hdev); int goya_debug_coresight(struct hl_device *hdev, void *data); void goya_halt_coresight(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/include/common/armcp_if.h b/drivers/misc/habanalabs/include/common/armcp_if.h deleted file mode 100644 index 4d78898524e9..000000000000 --- a/drivers/misc/habanalabs/include/common/armcp_if.h +++ /dev/null @@ -1,418 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * Copyright 2016-2020 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef ARMCP_IF_H -#define ARMCP_IF_H - -#include - -/* - * EVENT QUEUE - */ - -struct hl_eq_header { - __le32 reserved; - __le32 ctl; -}; - -struct hl_eq_ecc_data { - __le64 ecc_address; - __le64 ecc_syndrom; - __u8 memory_wrapper_idx; - __u8 pad[7]; -}; - -struct hl_eq_entry { - struct hl_eq_header hdr; - union { - struct hl_eq_ecc_data ecc_data; - __le64 data[7]; - }; -}; - -#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) - -#define EQ_CTL_READY_SHIFT 31 -#define EQ_CTL_READY_MASK 0x80000000 - -#define EQ_CTL_EVENT_TYPE_SHIFT 16 -#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000 - -enum pq_init_status { - PQ_INIT_STATUS_NA = 0, - PQ_INIT_STATUS_READY_FOR_CP, - PQ_INIT_STATUS_READY_FOR_HOST, - PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI -}; - -/* - * ArmCP Primary Queue Packets - * - * During normal operation, the host's kernel driver needs to send various - * messages to ArmCP, usually either to SET some value into a H/W periphery or - * to GET the current value of some H/W periphery. For example, SET the - * frequency of MME/TPC and GET the value of the thermal sensor. - * - * These messages can be initiated either by the User application or by the - * host's driver itself, e.g. power management code. In either case, the - * communication from the host's driver to ArmCP will *always* be in - * synchronous mode, meaning that the host will send a single message and poll - * until the message was acknowledged and the results are ready (if results are - * needed). - * - * This means that only a single message can be sent at a time and the host's - * driver must wait for its result before sending the next message. Having said - * that, because these are control messages which are sent in a relatively low - * frequency, this limitation seems acceptable. It's important to note that - * in case of multiple devices, messages to different devices *can* be sent - * at the same time. - * - * The message, inputs/outputs (if relevant) and fence object will be located - * on the device DDR at an address that will be determined by the host's driver. - * During device initialization phase, the host will pass to ArmCP that address. - * Most of the message types will contain inputs/outputs inside the message - * itself. The common part of each message will contain the opcode of the - * message (its type) and a field representing a fence object. - * - * When the host's driver wishes to send a message to ArmCP, it will write the - * message contents to the device DDR, clear the fence object and then write the - * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue - * the 484 interrupt-id to the ARM core. - * - * Upon receiving the 484 interrupt-id, ArmCP will read the message from the - * DDR. In case the message is a SET operation, ArmCP will first perform the - * operation and then write to the fence object on the device DDR. In case the - * message is a GET operation, ArmCP will first fill the results section on the - * device DDR and then write to the fence object. If an error occurred, ArmCP - * will fill the rc field with the right error code. - * - * In the meantime, the host's driver will poll on the fence object. Once the - * host sees that the fence object is signaled, it will read the results from - * the device DDR (if relevant) and resume the code execution in the host's - * driver. - * - * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 - * so the value being put by the host's driver matches the value read by ArmCP - * - * Non-QMAN packets should be limited to values 1 through (2^8 - 1) - * - * Detailed description: - * - * ARMCP_PACKET_DISABLE_PCI_ACCESS - - * After receiving this packet the embedded CPU must NOT issue PCI - * transactions (read/write) towards the Host CPU. This also include - * sending MSI-X interrupts. - * This packet is usually sent before the device is moved to D3Hot state. - * - * ARMCP_PACKET_ENABLE_PCI_ACCESS - - * After receiving this packet the embedded CPU is allowed to issue PCI - * transactions towards the Host CPU, including sending MSI-X interrupts. - * This packet is usually send after the device is moved to D0 state. - * - * ARMCP_PACKET_TEMPERATURE_GET - - * Fetch the current temperature / Max / Max Hyst / Critical / - * Critical Hyst of a specified thermal sensor. The packet's - * arguments specify the desired sensor and the field to get. - * - * ARMCP_PACKET_VOLTAGE_GET - - * Fetch the voltage / Max / Min of a specified sensor. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_CURRENT_GET - - * Fetch the current / Max / Min of a specified sensor. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_FAN_SPEED_GET - - * Fetch the speed / Max / Min of a specified fan. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_PWM_GET - - * Fetch the pwm value / mode of a specified pwm. The packet's - * arguments specify the sensor and type. - * - * ARMCP_PACKET_PWM_SET - - * Set the pwm value / mode of a specified pwm. The packet's - * arguments specify the sensor, type and value. - * - * ARMCP_PACKET_FREQUENCY_SET - - * Set the frequency of a specified PLL. The packet's arguments specify - * the PLL and the desired frequency. The actual frequency in the device - * might differ from the requested frequency. - * - * ARMCP_PACKET_FREQUENCY_GET - - * Fetch the frequency of a specified PLL. The packet's arguments specify - * the PLL. - * - * ARMCP_PACKET_LED_SET - - * Set the state of a specified led. The packet's arguments - * specify the led and the desired state. - * - * ARMCP_PACKET_I2C_WR - - * Write 32-bit value to I2C device. The packet's arguments specify the - * I2C bus, address and value. - * - * ARMCP_PACKET_I2C_RD - - * Read 32-bit value from I2C device. The packet's arguments specify the - * I2C bus and address. - * - * ARMCP_PACKET_INFO_GET - - * Fetch information from the device as specified in the packet's - * structure. The host's driver passes the max size it allows the ArmCP to - * write to the structure, to prevent data corruption in case of - * mismatched driver/FW versions. - * - * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed - * - * ARMCP_PACKET_UNMASK_RAZWI_IRQ - - * Unmask the given IRQ. The IRQ number is specified in the value field. - * The packet is sent after receiving an interrupt and printing its - * relevant information. - * - * ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - - * Unmask the given IRQs. The IRQs numbers are specified in an array right - * after the armcp_packet structure, where its first element is the array - * length. The packet is sent after a soft reset was done in order to - * handle any interrupts that were sent during the reset process. - * - * ARMCP_PACKET_TEST - - * Test packet for ArmCP connectivity. The CPU will put the fence value - * in the result field. - * - * ARMCP_PACKET_FREQUENCY_CURR_GET - - * Fetch the current frequency of a specified PLL. The packet's arguments - * specify the PLL. - * - * ARMCP_PACKET_MAX_POWER_GET - - * Fetch the maximal power of the device. - * - * ARMCP_PACKET_MAX_POWER_SET - - * Set the maximal power of the device. The packet's arguments specify - * the power. - * - * ARMCP_PACKET_EEPROM_DATA_GET - - * Get EEPROM data from the ArmCP kernel. The buffer is specified in the - * addr field. The CPU will put the returned data size in the result - * field. In addition, the host's driver passes the max size it allows the - * ArmCP to write to the structure, to prevent data corruption in case of - * mismatched driver/FW versions. - * - * ARMCP_PACKET_TEMPERATURE_SET - - * Set the value of the offset property of a specified thermal sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * ARMCP_PACKET_VOLTAGE_SET - - * Trigger the reset_history property of a specified voltage sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - * - * ARMCP_PACKET_CURRENT_SET - - * Trigger the reset_history property of a specified current sensor. - * The packet's arguments specify the desired sensor and the field to - * set. - */ - -enum armcp_packet_id { - ARMCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ - ARMCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ - ARMCP_PACKET_TEMPERATURE_GET, /* sysfs */ - ARMCP_PACKET_VOLTAGE_GET, /* sysfs */ - ARMCP_PACKET_CURRENT_GET, /* sysfs */ - ARMCP_PACKET_FAN_SPEED_GET, /* sysfs */ - ARMCP_PACKET_PWM_GET, /* sysfs */ - ARMCP_PACKET_PWM_SET, /* sysfs */ - ARMCP_PACKET_FREQUENCY_SET, /* sysfs */ - ARMCP_PACKET_FREQUENCY_GET, /* sysfs */ - ARMCP_PACKET_LED_SET, /* debugfs */ - ARMCP_PACKET_I2C_WR, /* debugfs */ - ARMCP_PACKET_I2C_RD, /* debugfs */ - ARMCP_PACKET_INFO_GET, /* IOCTL */ - ARMCP_PACKET_FLASH_PROGRAM_REMOVED, - ARMCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ - ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ - ARMCP_PACKET_TEST, /* internal */ - ARMCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ - ARMCP_PACKET_MAX_POWER_GET, /* sysfs */ - ARMCP_PACKET_MAX_POWER_SET, /* sysfs */ - ARMCP_PACKET_EEPROM_DATA_GET, /* sysfs */ - ARMCP_RESERVED, - ARMCP_PACKET_TEMPERATURE_SET, /* sysfs */ - ARMCP_PACKET_VOLTAGE_SET, /* sysfs */ - ARMCP_PACKET_CURRENT_SET, /* sysfs */ - ARMCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ - ARMCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ - ARMCP_PACKET_TOTAL_ENERGY_GET, /* internal */ -}; - -#define ARMCP_PACKET_FENCE_VAL 0xFE8CE7A5 - -#define ARMCP_PKT_CTL_RC_SHIFT 12 -#define ARMCP_PKT_CTL_RC_MASK 0x0000F000 - -#define ARMCP_PKT_CTL_OPCODE_SHIFT 16 -#define ARMCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 - -struct armcp_packet { - union { - __le64 value; /* For SET packets */ - __le64 result; /* For GET packets */ - __le64 addr; /* For PQ */ - }; - - __le32 ctl; - - __le32 fence; /* Signal to host that message is completed */ - - union { - struct {/* For temperature/current/voltage/fan/pwm get/set */ - __le16 sensor_index; - __le16 type; - }; - - struct { /* For I2C read/write */ - __u8 i2c_bus; - __u8 i2c_addr; - __u8 i2c_reg; - __u8 pad; /* unused */ - }; - - /* For any general request */ - __le32 index; - - /* For frequency get/set */ - __le32 pll_index; - - /* For led set */ - __le32 led_index; - - /* For get Armcp info/EEPROM data */ - __le32 data_max_size; - }; - - __le32 reserved; -}; - -struct armcp_unmask_irq_arr_packet { - struct armcp_packet armcp_pkt; - __le32 length; - __le32 irqs[0]; -}; - -enum armcp_packet_rc { - armcp_packet_success, - armcp_packet_invalid, - armcp_packet_fault -}; - -/* - * armcp_temp_type should adhere to hwmon_temp_attributes - * defined in Linux kernel hwmon.h file - */ -enum armcp_temp_type { - armcp_temp_input, - armcp_temp_max = 6, - armcp_temp_max_hyst, - armcp_temp_crit, - armcp_temp_crit_hyst, - armcp_temp_offset = 19, - armcp_temp_highest = 22, - armcp_temp_reset_history = 23 -}; - -enum armcp_in_attributes { - armcp_in_input, - armcp_in_min, - armcp_in_max, - armcp_in_highest = 7, - armcp_in_reset_history -}; - -enum armcp_curr_attributes { - armcp_curr_input, - armcp_curr_min, - armcp_curr_max, - armcp_curr_highest = 7, - armcp_curr_reset_history -}; - -enum armcp_fan_attributes { - armcp_fan_input, - armcp_fan_min = 2, - armcp_fan_max -}; - -enum armcp_pwm_attributes { - armcp_pwm_input, - armcp_pwm_enable -}; - -enum armcp_pcie_throughput_attributes { - armcp_pcie_throughput_tx, - armcp_pcie_throughput_rx -}; - -/* Event Queue Packets */ - -struct eq_generic_event { - __le64 data[7]; -}; - -/* - * ArmCP info - */ - -#define CARD_NAME_MAX_LEN 16 -#define VERSION_MAX_LEN 128 -#define ARMCP_MAX_SENSORS 128 - -struct armcp_sensor { - __le32 type; - __le32 flags; -}; - -/** - * struct armcp_card_types - ASIC card type. - * @armcp_card_type_pci: PCI card. - * @armcp_card_type_pmc: PCI Mezzanine Card. - */ -enum armcp_card_types { - armcp_card_type_pci, - armcp_card_type_pmc -}; - -/** - * struct armcp_info - Info from ArmCP that is necessary to the host's driver - * @sensors: available sensors description. - * @kernel_version: ArmCP linux kernel version. - * @reserved: reserved field. - * @card_type: card configuration type. - * @card_location: in a server, each card has different connections topology - * depending on its location (relevant for PMC card type) - * @cpld_version: CPLD programmed F/W version. - * @infineon_version: Infineon main DC-DC version. - * @fuse_version: silicon production FUSE information. - * @thermal_version: thermald S/W version. - * @armcp_version: ArmCP S/W version. - * @dram_size: available DRAM size. - * @card_name: card name that will be displayed in HWMON subsystem on the host - */ -struct armcp_info { - struct armcp_sensor sensors[ARMCP_MAX_SENSORS]; - __u8 kernel_version[VERSION_MAX_LEN]; - __le32 reserved; - __le32 card_type; - __le32 card_location; - __le32 cpld_version; - __le32 infineon_version; - __u8 fuse_version[VERSION_MAX_LEN]; - __u8 thermal_version[VERSION_MAX_LEN]; - __u8 armcp_version[VERSION_MAX_LEN]; - __le64 dram_size; - char card_name[CARD_NAME_MAX_LEN]; -}; - -#endif /* ARMCP_IF_H */ diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h new file mode 100644 index 000000000000..1e8480e978e2 --- /dev/null +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -0,0 +1,417 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2020 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef CPUCP_IF_H +#define CPUCP_IF_H + +#include + +/* + * EVENT QUEUE + */ + +struct hl_eq_header { + __le32 reserved; + __le32 ctl; +}; + +struct hl_eq_ecc_data { + __le64 ecc_address; + __le64 ecc_syndrom; + __u8 memory_wrapper_idx; + __u8 pad[7]; +}; + +struct hl_eq_entry { + struct hl_eq_header hdr; + union { + struct hl_eq_ecc_data ecc_data; + __le64 data[7]; + }; +}; + +#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry) + +#define EQ_CTL_READY_SHIFT 31 +#define EQ_CTL_READY_MASK 0x80000000 + +#define EQ_CTL_EVENT_TYPE_SHIFT 16 +#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000 + +enum pq_init_status { + PQ_INIT_STATUS_NA = 0, + PQ_INIT_STATUS_READY_FOR_CP, + PQ_INIT_STATUS_READY_FOR_HOST, + PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI +}; + +/* + * CpuCP Primary Queue Packets + * + * During normal operation, the host's kernel driver needs to send various + * messages to CpuCP, usually either to SET some value into a H/W periphery or + * to GET the current value of some H/W periphery. For example, SET the + * frequency of MME/TPC and GET the value of the thermal sensor. + * + * These messages can be initiated either by the User application or by the + * host's driver itself, e.g. power management code. In either case, the + * communication from the host's driver to CpuCP will *always* be in + * synchronous mode, meaning that the host will send a single message and poll + * until the message was acknowledged and the results are ready (if results are + * needed). + * + * This means that only a single message can be sent at a time and the host's + * driver must wait for its result before sending the next message. Having said + * that, because these are control messages which are sent in a relatively low + * frequency, this limitation seems acceptable. It's important to note that + * in case of multiple devices, messages to different devices *can* be sent + * at the same time. + * + * The message, inputs/outputs (if relevant) and fence object will be located + * on the device DDR at an address that will be determined by the host's driver. + * During device initialization phase, the host will pass to CpuCP that address. + * Most of the message types will contain inputs/outputs inside the message + * itself. The common part of each message will contain the opcode of the + * message (its type) and a field representing a fence object. + * + * When the host's driver wishes to send a message to CPU CP, it will write the + * message contents to the device DDR, clear the fence object and then write to + * the PSOC_ARC1_AUX_SW_INTR, to issue interrupt 121 to ARC Management CPU. + * + * Upon receiving the interrupt (#121), CpuCP will read the message from the + * DDR. In case the message is a SET operation, CpuCP will first perform the + * operation and then write to the fence object on the device DDR. In case the + * message is a GET operation, CpuCP will first fill the results section on the + * device DDR and then write to the fence object. If an error occurred, CpuCP + * will fill the rc field with the right error code. + * + * In the meantime, the host's driver will poll on the fence object. Once the + * host sees that the fence object is signaled, it will read the results from + * the device DDR (if relevant) and resume the code execution in the host's + * driver. + * + * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8 + * so the value being put by the host's driver matches the value read by CpuCP + * + * Non-QMAN packets should be limited to values 1 through (2^8 - 1) + * + * Detailed description: + * + * CPUCP_PACKET_DISABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU must NOT issue PCI + * transactions (read/write) towards the Host CPU. This also include + * sending MSI-X interrupts. + * This packet is usually sent before the device is moved to D3Hot state. + * + * CPUCP_PACKET_ENABLE_PCI_ACCESS - + * After receiving this packet the embedded CPU is allowed to issue PCI + * transactions towards the Host CPU, including sending MSI-X interrupts. + * This packet is usually send after the device is moved to D0 state. + * + * CPUCP_PACKET_TEMPERATURE_GET - + * Fetch the current temperature / Max / Max Hyst / Critical / + * Critical Hyst of a specified thermal sensor. The packet's + * arguments specify the desired sensor and the field to get. + * + * CPUCP_PACKET_VOLTAGE_GET - + * Fetch the voltage / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_CURRENT_GET - + * Fetch the current / Max / Min of a specified sensor. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_FAN_SPEED_GET - + * Fetch the speed / Max / Min of a specified fan. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_GET - + * Fetch the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor and type. + * + * CPUCP_PACKET_PWM_SET - + * Set the pwm value / mode of a specified pwm. The packet's + * arguments specify the sensor, type and value. + * + * CPUCP_PACKET_FREQUENCY_SET - + * Set the frequency of a specified PLL. The packet's arguments specify + * the PLL and the desired frequency. The actual frequency in the device + * might differ from the requested frequency. + * + * CPUCP_PACKET_FREQUENCY_GET - + * Fetch the frequency of a specified PLL. The packet's arguments specify + * the PLL. + * + * CPUCP_PACKET_LED_SET - + * Set the state of a specified led. The packet's arguments + * specify the led and the desired state. + * + * CPUCP_PACKET_I2C_WR - + * Write 32-bit value to I2C device. The packet's arguments specify the + * I2C bus, address and value. + * + * CPUCP_PACKET_I2C_RD - + * Read 32-bit value from I2C device. The packet's arguments specify the + * I2C bus and address. + * + * CPUCP_PACKET_INFO_GET - + * Fetch information from the device as specified in the packet's + * structure. The host's driver passes the max size it allows the CpuCP to + * write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ - + * Unmask the given IRQ. The IRQ number is specified in the value field. + * The packet is sent after receiving an interrupt and printing its + * relevant information. + * + * CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY - + * Unmask the given IRQs. The IRQs numbers are specified in an array right + * after the cpucp_packet structure, where its first element is the array + * length. The packet is sent after a soft reset was done in order to + * handle any interrupts that were sent during the reset process. + * + * CPUCP_PACKET_TEST - + * Test packet for CpuCP connectivity. The CPU will put the fence value + * in the result field. + * + * CPUCP_PACKET_FREQUENCY_CURR_GET - + * Fetch the current frequency of a specified PLL. The packet's arguments + * specify the PLL. + * + * CPUCP_PACKET_MAX_POWER_GET - + * Fetch the maximal power of the device. + * + * CPUCP_PACKET_MAX_POWER_SET - + * Set the maximal power of the device. The packet's arguments specify + * the power. + * + * CPUCP_PACKET_EEPROM_DATA_GET - + * Get EEPROM data from the CpuCP kernel. The buffer is specified in the + * addr field. The CPU will put the returned data size in the result + * field. In addition, the host's driver passes the max size it allows the + * CpuCP to write to the structure, to prevent data corruption in case of + * mismatched driver/FW versions. + * + * CPUCP_PACKET_TEMPERATURE_SET - + * Set the value of the offset property of a specified thermal sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_VOLTAGE_SET - + * Trigger the reset_history property of a specified voltage sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + * + * CPUCP_PACKET_CURRENT_SET - + * Trigger the reset_history property of a specified current sensor. + * The packet's arguments specify the desired sensor and the field to + * set. + */ + +enum cpucp_packet_id { + CPUCP_PACKET_DISABLE_PCI_ACCESS = 1, /* internal */ + CPUCP_PACKET_ENABLE_PCI_ACCESS, /* internal */ + CPUCP_PACKET_TEMPERATURE_GET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_GET, /* sysfs */ + CPUCP_PACKET_CURRENT_GET, /* sysfs */ + CPUCP_PACKET_FAN_SPEED_GET, /* sysfs */ + CPUCP_PACKET_PWM_GET, /* sysfs */ + CPUCP_PACKET_PWM_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_SET, /* sysfs */ + CPUCP_PACKET_FREQUENCY_GET, /* sysfs */ + CPUCP_PACKET_LED_SET, /* debugfs */ + CPUCP_PACKET_I2C_WR, /* debugfs */ + CPUCP_PACKET_I2C_RD, /* debugfs */ + CPUCP_PACKET_INFO_GET, /* IOCTL */ + CPUCP_PACKET_FLASH_PROGRAM_REMOVED, + CPUCP_PACKET_UNMASK_RAZWI_IRQ, /* internal */ + CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY, /* internal */ + CPUCP_PACKET_TEST, /* internal */ + CPUCP_PACKET_FREQUENCY_CURR_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_GET, /* sysfs */ + CPUCP_PACKET_MAX_POWER_SET, /* sysfs */ + CPUCP_PACKET_EEPROM_DATA_GET, /* sysfs */ + CPUCP_RESERVED, + CPUCP_PACKET_TEMPERATURE_SET, /* sysfs */ + CPUCP_PACKET_VOLTAGE_SET, /* sysfs */ + CPUCP_PACKET_CURRENT_SET, /* sysfs */ + CPUCP_PACKET_PCIE_THROUGHPUT_GET, /* internal */ + CPUCP_PACKET_PCIE_REPLAY_CNT_GET, /* internal */ + CPUCP_PACKET_TOTAL_ENERGY_GET, /* internal */ +}; + +#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 + +#define CPUCP_PKT_CTL_RC_SHIFT 12 +#define CPUCP_PKT_CTL_RC_MASK 0x0000F000 + +#define CPUCP_PKT_CTL_OPCODE_SHIFT 16 +#define CPUCP_PKT_CTL_OPCODE_MASK 0x1FFF0000 + +struct cpucp_packet { + union { + __le64 value; /* For SET packets */ + __le64 result; /* For GET packets */ + __le64 addr; /* For PQ */ + }; + + __le32 ctl; + + __le32 fence; /* Signal to host that message is completed */ + + union { + struct {/* For temperature/current/voltage/fan/pwm get/set */ + __le16 sensor_index; + __le16 type; + }; + + struct { /* For I2C read/write */ + __u8 i2c_bus; + __u8 i2c_addr; + __u8 i2c_reg; + __u8 pad; /* unused */ + }; + + /* For any general request */ + __le32 index; + + /* For frequency get/set */ + __le32 pll_index; + + /* For led set */ + __le32 led_index; + + /* For get CpuCP info/EEPROM data */ + __le32 data_max_size; + }; + + __le32 reserved; +}; + +struct cpucp_unmask_irq_arr_packet { + struct cpucp_packet cpucp_pkt; + __le32 length; + __le32 irqs[0]; +}; + +enum cpucp_packet_rc { + cpucp_packet_success, + cpucp_packet_invalid, + cpucp_packet_fault +}; + +/* + * cpucp_temp_type should adhere to hwmon_temp_attributes + * defined in Linux kernel hwmon.h file + */ +enum cpucp_temp_type { + cpucp_temp_input, + cpucp_temp_max = 6, + cpucp_temp_max_hyst, + cpucp_temp_crit, + cpucp_temp_crit_hyst, + cpucp_temp_offset = 19, + cpucp_temp_highest = 22, + cpucp_temp_reset_history = 23 +}; + +enum cpucp_in_attributes { + cpucp_in_input, + cpucp_in_min, + cpucp_in_max, + cpucp_in_highest = 7, + cpucp_in_reset_history +}; + +enum cpucp_curr_attributes { + cpucp_curr_input, + cpucp_curr_min, + cpucp_curr_max, + cpucp_curr_highest = 7, + cpucp_curr_reset_history +}; + +enum cpucp_fan_attributes { + cpucp_fan_input, + cpucp_fan_min = 2, + cpucp_fan_max +}; + +enum cpucp_pwm_attributes { + cpucp_pwm_input, + cpucp_pwm_enable +}; + +enum cpucp_pcie_throughput_attributes { + cpucp_pcie_throughput_tx, + cpucp_pcie_throughput_rx +}; + +/* Event Queue Packets */ + +struct eq_generic_event { + __le64 data[7]; +}; + +/* + * CpuCP info + */ + +#define CARD_NAME_MAX_LEN 16 +#define VERSION_MAX_LEN 128 +#define CPUCP_MAX_SENSORS 128 + +struct cpucp_sensor { + __le32 type; + __le32 flags; +}; + +/** + * struct cpucp_card_types - ASIC card type. + * @cpucp_card_type_pci: PCI card. + * @cpucp_card_type_pmc: PCI Mezzanine Card. + */ +enum cpucp_card_types { + cpucp_card_type_pci, + cpucp_card_type_pmc +}; + +/** + * struct cpucp_info - Info from CpuCP that is necessary to the host's driver + * @sensors: available sensors description. + * @kernel_version: CpuCP linux kernel version. + * @reserved: reserved field. + * @card_type: card configuration type. + * @card_location: in a server, each card has different connections topology + * depending on its location (relevant for PMC card type) + * @cpld_version: CPLD programmed F/W version. + * @infineon_version: Infineon main DC-DC version. + * @fuse_version: silicon production FUSE information. + * @thermal_version: thermald S/W version. + * @cpucp_version: CpuCP S/W version. + * @dram_size: available DRAM size. + * @card_name: card name that will be displayed in HWMON subsystem on the host + */ +struct cpucp_info { + struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; + __u8 kernel_version[VERSION_MAX_LEN]; + __le32 reserved; + __le32 card_type; + __le32 card_location; + __le32 cpld_version; + __le32 infineon_version; + __u8 fuse_version[VERSION_MAX_LEN]; + __u8 thermal_version[VERSION_MAX_LEN]; + __u8 cpucp_version[VERSION_MAX_LEN]; + __le64 dram_size; + char card_name[CARD_NAME_MAX_LEN]; +}; + +#endif /* CPUCP_IF_H */ diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 6803991726e8..a2dcad29340f 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -297,7 +297,7 @@ struct hl_info_hw_ip_info { __u32 device_id; /* PCI Device ID */ __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */ __u32 reserved[2]; - __u32 armcp_cpld_version; + __u32 cpld_version; __u32 psoc_pci_pll_nr; __u32 psoc_pci_pll_nf; __u32 psoc_pci_pll_od; @@ -305,7 +305,7 @@ struct hl_info_hw_ip_info { __u8 tpc_enabled_mask; __u8 dram_enabled; __u8 pad[2]; - __u8 armcp_version[HL_INFO_VERSION_MAX_LEN]; + __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; }; -- cgit v1.2.3 From f763946aefe67b3ea58696b75a930ba1ed886a83 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 29 Aug 2020 11:24:03 +0300 Subject: habanalabs: cast to u64 before shift > 31 bits When shifting a boolean variable by more than 31 bits and putting the result into a u64 variable, we need to cast the boolean into unsigned 64 bits to prevent possible overflow. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 8 +++++--- drivers/misc/habanalabs/goya/goya.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 076a7697f85d..084019788e11 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6114,7 +6114,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask, is_idle &= is_eng_idle; if (mask) - *mask |= !is_eng_idle << + *mask |= ((u64) !is_eng_idle) << (GAUDI_ENGINE_ID_DMA_0 + dma_id); if (s) seq_printf(s, fmt, dma_id, @@ -6137,7 +6137,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask, is_idle &= is_eng_idle; if (mask) - *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i); + *mask |= ((u64) !is_eng_idle) << + (GAUDI_ENGINE_ID_TPC_0 + i); if (s) seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N", @@ -6165,7 +6166,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask, is_idle &= is_eng_idle; if (mask) - *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i); + *mask |= ((u64) !is_eng_idle) << + (GAUDI_ENGINE_ID_MME_0 + i); if (s) { if (!is_slave) seq_printf(s, fmt, i, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index c41f2917863b..88847eb1b472 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5173,7 +5173,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask, is_idle &= is_eng_idle; if (mask) - *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i); + *mask |= ((u64) !is_eng_idle) << + (GOYA_ENGINE_ID_DMA_0 + i); if (s) seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, dma_core_sts0); @@ -5196,7 +5197,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask, is_idle &= is_eng_idle; if (mask) - *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i); + *mask |= ((u64) !is_eng_idle) << + (GOYA_ENGINE_ID_TPC_0 + i); if (s) seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts); @@ -5216,7 +5218,7 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask, is_idle &= is_eng_idle; if (mask) - *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0; + *mask |= ((u64) !is_eng_idle) << GOYA_ENGINE_ID_MME_0; if (s) { seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, cmdq_glbl_sts0, mme_arch_sts); -- cgit v1.2.3 From 0db575350cb1a2fa724f0198fd40b2c91ace5cb7 Mon Sep 17 00:00:00 2001 From: Hillf Danton Date: Sun, 23 Aug 2020 07:32:42 +0800 Subject: habanalabs: make use of dma_mmap_coherent Add dma_mmap_coherent() for goya and gaudi to match their use of dma_alloc_coherent(), see the Link tag for why. Link: https://lore.kernel.org/lkml/20200609091727.GA23814@lst.de/ Cc: Christoph Hellwig Cc: Zhang Li Cc: Ding Z Nan Signed-off-by: Hillf Danton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_buffer.c | 9 ++------- drivers/misc/habanalabs/common/habanalabs.h | 2 +- drivers/misc/habanalabs/gaudi/gaudi.c | 7 +++---- drivers/misc/habanalabs/goya/goya.c | 7 +++---- 4 files changed, 9 insertions(+), 16 deletions(-) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 6563e4dfe7b6..ba63cee74050 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -300,7 +300,6 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) { struct hl_device *hdev = hpriv->hdev; struct hl_cb *cb; - phys_addr_t address; u32 handle, user_cb_size; int rc; @@ -360,12 +359,8 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) vma->vm_private_data = cb; - /* Calculate address for CB */ - address = virt_to_phys((void *) (uintptr_t) cb->kernel_address); - - rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address, - address, cb->size); - + rc = hdev->asic_funcs->cb_mmap(hdev, vma, (void *) cb->kernel_address, + cb->bus_address, cb->size); if (rc) { spin_lock(&cb->lock); cb->mmap = false; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index cdb7a672ed30..6ea8ae616cf4 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -710,7 +710,7 @@ struct hl_asic_funcs { int (*suspend)(struct hl_device *hdev); int (*resume)(struct hl_device *hdev); int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, - u64 kaddress, phys_addr_t paddress, u32 size); + void *cpu_addr, dma_addr_t dma_addr, size_t size); void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi); void (*pqe_write)(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 084019788e11..bc7e7e3ba3a8 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3091,17 +3091,16 @@ static int gaudi_resume(struct hl_device *hdev) } static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, - u64 kaddress, phys_addr_t paddress, u32 size) + void *cpu_addr, dma_addr_t dma_addr, size_t size) { int rc; vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; - rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT, - size, vma->vm_page_prot); + rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); if (rc) - dev_err(hdev->dev, "remap_pfn_range error %d", rc); + dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); return rc; } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 88847eb1b472..94b7958ba5fb 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2661,17 +2661,16 @@ int goya_resume(struct hl_device *hdev) } static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, - u64 kaddress, phys_addr_t paddress, u32 size) + void *cpu_addr, dma_addr_t dma_addr, size_t size) { int rc; vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE; - rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT, - size, vma->vm_page_prot); + rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); if (rc) - dev_err(hdev->dev, "remap_pfn_range error %d", rc); + dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); return rc; } -- cgit v1.2.3 From 6138bbe911264198ba16659c333084ab4bfb0c73 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Fri, 4 Sep 2020 20:18:16 +0300 Subject: habanalabs: rename ArmCP to CPU-CP There were a couple of comments where the name ArmCP was still used. Rename it to CPU-CP. In addition, rename ArmCP or ARM in log messages to "device CPU". Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 17 +++++++++-------- drivers/misc/habanalabs/common/habanalabs.h | 22 +++++++++++----------- drivers/misc/habanalabs/gaudi/gaudi.c | 4 ++-- drivers/misc/habanalabs/goya/goya.c | 2 +- 4 files changed, 23 insertions(+), 22 deletions(-) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index f2a38e95359a..4409962d30ae 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -175,7 +175,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) + irq_arr_size; - /* data should be aligned to 8 bytes in order to ArmCP to copy it */ + /* data should be aligned to 8 bytes in order to CPU-CP to copy it */ total_pkt_size = (total_pkt_size + 0x7) & ~0x7; /* total_pkt_size is casted to u16 later on */ @@ -283,7 +283,7 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev) &cpucp_info_dma_addr); if (!cpucp_info_cpu_addr) { dev_err(hdev->dev, - "Failed to allocate DMA memory for ArmCP info packet\n"); + "Failed to allocate DMA memory for CPU-CP info packet\n"); return -ENOMEM; } @@ -298,7 +298,7 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev) HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, - "Failed to handle ArmCP info pkt, error %d\n", rc); + "Failed to handle CPU-CP info pkt, error %d\n", rc); goto out; } @@ -333,7 +333,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) max_size, &eeprom_info_dma_addr); if (!eeprom_info_cpu_addr) { dev_err(hdev->dev, - "Failed to allocate DMA memory for ArmCP EEPROM packet\n"); + "Failed to allocate DMA memory for CPU-CP EEPROM packet\n"); return -ENOMEM; } @@ -349,7 +349,8 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size) if (rc) { dev_err(hdev->dev, - "Failed to handle ArmCP EEPROM packet, error %d\n", rc); + "Failed to handle CPU-CP EEPROM packet, error %d\n", + rc); goto out; } @@ -379,7 +380,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, - "Failed to handle ArmCP PCI info pkt, error %d\n", rc); + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); return rc; } counters->rx_throughput = result; @@ -390,7 +391,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, - "Failed to handle ArmCP PCI info pkt, error %d\n", rc); + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); return rc; } counters->tx_throughput = result; @@ -403,7 +404,7 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, HL_CPUCP_INFO_TIMEOUT_USEC, &result); if (rc) { dev_err(hdev->dev, - "Failed to handle ArmCP PCI info pkt, error %d\n", rc); + "Failed to handle CPU-CP PCI info pkt, error %d\n", rc); return rc; } counters->replay_cnt = (u32) result; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 6ea8ae616cf4..b56410d75065 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -466,7 +466,7 @@ struct hl_cs_job; #define HL_EQ_LENGTH 64 #define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE) -/* Host <-> ArmCP shared memory size */ +/* Host <-> CPU-CP shared memory size */ #define HL_CPU_ACCESSIBLE_MEM_SIZE SZ_2M /** @@ -648,7 +648,7 @@ enum div_select_defs { * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM. * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM. * @add_device_attr: add ASIC specific device attributes. - * @handle_eqe: handle event queue entry (IRQ) from ArmCP. + * @handle_eqe: handle event queue entry (IRQ) from CPU-CP. * @set_pll_profile: change PLL profile (manual/automatic). * @get_events_stat: retrieve event queue entries histogram. * @read_pte: read MMU page table entry from DRAM. @@ -657,7 +657,7 @@ enum div_select_defs { * (L1 only) or hard (L0 & L1) flush. * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with * ASID-VA-size mask. - * @send_heartbeat: send is-alive packet to ArmCP and verify response. + * @send_heartbeat: send is-alive packet to CPU-CP and verify response. * @set_clock_gating: enable/disable clock gating per engine according to * clock gating mask in hdev * @disable_clock_gating: disable clock gating completely @@ -1438,8 +1438,8 @@ struct hl_device_idle_busy_ts { * @dev: related kernel basic device structure. * @dev_ctrl: related kernel device structure for the control device * @work_freq: delayed work to lower device frequency if possible. - * @work_heartbeat: delayed work for ArmCP is-alive check. - * @asic_name: ASIC specific nmae. + * @work_heartbeat: delayed work for CPU-CP is-alive check. + * @asic_name: ASIC specific name. * @asic_type: ASIC specific type. * @completion_queue: array of hl_cq. * @cq_wq: work queues of completion queues for executing work in process @@ -1450,14 +1450,14 @@ struct hl_device_idle_busy_ts { * @hw_queues_mirror_list: CS mirror list for TDR. * @hw_queues_mirror_lock: protects hw_queues_mirror_list. * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs. - * @event_queue: event queue for IRQ from ArmCP. + * @event_queue: event queue for IRQ from CPU-CP. * @dma_pool: DMA pool for small allocations. - * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address. - * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address. - * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool. + * @cpu_accessible_dma_mem: Host <-> CPU-CP shared memory CPU address. + * @cpu_accessible_dma_address: Host <-> CPU-CP shared memory DMA address. + * @cpu_accessible_dma_pool: Host <-> CPU-CP shared memory pool. * @asid_bitmap: holds used/available ASIDs. * @asid_mutex: protects asid_bitmap. - * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue. + * @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue. * @debug_lock: protects critical section of setting debug mode for device * @asic_prop: ASIC specific immutable properties. * @asic_funcs: ASIC specific functions. @@ -1511,7 +1511,7 @@ struct hl_device_idle_busy_ts { * @late_init_done: is late init stage was done during initialization. * @hwmon_initialized: is H/W monitor sensors was initialized. * @hard_reset_pending: is there a hard reset work pending. - * @heartbeat: is heartbeat sanity check towards ArmCP enabled. + * @heartbeat: is heartbeat sanity check towards CPU-CP enabled. * @reset_on_lockup: true if a reset should be done in case of stuck CS, false * otherwise. * @dram_supports_virtual_memory: is MMU enabled towards DRAM. diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index bc7e7e3ba3a8..7946179632fb 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -2854,7 +2854,7 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) if (err) { dev_err(hdev->dev, - "Failed to communicate with ARM CPU (ArmCP timeout)\n"); + "Failed to communicate with Device CPU (CPU-CP timeout)\n"); return -EIO; } @@ -5616,7 +5616,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, bool soft_reset_required = false; /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock - * gating, and thus cannot be done in ArmCP and should be done instead + * gating, and thus cannot be done in CPU-CP and should be done instead * by the driver. */ diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 94b7958ba5fb..fb7e4f50c198 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4509,7 +4509,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) + irq_arr_size; - /* data should be aligned to 8 bytes in order to ArmCP to copy it */ + /* data should be aligned to 8 bytes in order to CPU-CP to copy it */ total_pkt_size = (total_pkt_size + 0x7) & ~0x7; /* total_pkt_size is casted to u16 later on */ -- cgit v1.2.3 From b01a971f80be339fb072044ef201ac724db9b58c Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Fri, 4 Sep 2020 20:21:39 +0300 Subject: habanalabs: remove unused ASIC function pointer Old function pointer that was left when the call to this function pointer was removed. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 5 ----- drivers/misc/habanalabs/gaudi/gaudi.c | 1 - drivers/misc/habanalabs/goya/goya.c | 1 - 3 files changed, 7 deletions(-) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index b56410d75065..ec765320159a 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -675,8 +675,6 @@ enum div_select_defs { * ASIC * @get_hw_state: retrieve the H/W state * @pci_bars_map: Map PCI BARs. - * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns - * old address the bar pointed to or U64_MAX for failure * @init_iatu: Initialize the iATU unit inside the PCI controller. * @rreg: Read a register. Needed for simulator support. * @wreg: Write a register. Needed for simulator support. @@ -779,7 +777,6 @@ struct hl_asic_funcs { u16 len, u32 timeout, long *result); enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev); int (*pci_bars_map)(struct hl_device *hdev); - u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); int (*init_iatu)(struct hl_device *hdev); u32 (*rreg)(struct hl_device *hdev, u32 reg); void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); @@ -1874,8 +1871,6 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data); -int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar, - u64 addr); int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, struct hl_inbound_pci_region *pci_region); int hl_pci_set_outbound_region(struct hl_device *hdev, diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 7946179632fb..483989500863 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -6642,7 +6642,6 @@ static const struct hl_asic_funcs gaudi_funcs = { .send_cpu_message = gaudi_send_cpu_message, .get_hw_state = gaudi_get_hw_state, .pci_bars_map = gaudi_pci_bars_map, - .set_dram_bar_base = gaudi_set_hbm_bar_base, .init_iatu = gaudi_init_iatu, .rreg = hl_rreg, .wreg = hl_wreg, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index fb7e4f50c198..804c83eb12b7 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5377,7 +5377,6 @@ static const struct hl_asic_funcs goya_funcs = { .send_cpu_message = goya_send_cpu_message, .get_hw_state = goya_get_hw_state, .pci_bars_map = goya_pci_bars_map, - .set_dram_bar_base = goya_set_ddr_bar_base, .init_iatu = goya_init_iatu, .rreg = hl_rreg, .wreg = hl_wreg, -- cgit v1.2.3 From 1fb2f3743754994fec412f63a12cbc4d72f49e38 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Fri, 4 Sep 2020 21:39:14 +0300 Subject: habanalabs: check flag before reset because of f/w event For consistency with GAUDI code, add check of the relevant flag in the device structure before resetting the GOYA device in case of firmware event. Reviewed-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 804c83eb12b7..46a900fb3ef8 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4644,7 +4644,8 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: goya_print_irq_info(hdev, event_type, false); - hl_device_reset(hdev, true, false); + if (hdev->hard_reset_on_fw_events) + hl_device_reset(hdev, true, false); break; case GOYA_ASYNC_EVENT_ID_PCIE_DEC: -- cgit v1.2.3 From 7edf341b9ef5012408a2ddac7ba30bc94e1dffaf Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Sun, 23 Aug 2020 13:23:13 +0300 Subject: habanalabs: add num_hops to hl_mmu_properties This commit adds the number of HOPs supported by the device to the device MMU properties. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 6 ++++++ drivers/misc/habanalabs/gaudi/gaudi.c | 1 + drivers/misc/habanalabs/goya/goya.c | 2 ++ drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h | 2 ++ 4 files changed, 11 insertions(+) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 15e746a5fe35..8ef61926ed3b 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -236,12 +236,15 @@ enum hl_device_hw_state { * @hop2_shift: shift of hop 2 mask. * @hop3_shift: shift of hop 3 mask. * @hop4_shift: shift of hop 4 mask. + * @hop5_shift: shift of hop 5 mask. * @hop0_mask: mask to get the PTE address in hop 0. * @hop1_mask: mask to get the PTE address in hop 1. * @hop2_mask: mask to get the PTE address in hop 2. * @hop3_mask: mask to get the PTE address in hop 3. * @hop4_mask: mask to get the PTE address in hop 4. + * @hop5_mask: mask to get the PTE address in hop 5. * @page_size: default page size used to allocate memory. + * @num_hops: The amount of hops supported by the translation table. */ struct hl_mmu_properties { u64 start_addr; @@ -251,12 +254,15 @@ struct hl_mmu_properties { u64 hop2_shift; u64 hop3_shift; u64 hop4_shift; + u64 hop5_shift; u64 hop0_mask; u64 hop1_mask; u64 hop2_mask; u64 hop3_mask; u64 hop4_mask; + u64 hop5_mask; u32 page_size; + u32 num_hops; }; /** diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 483989500863..bf010ff31ced 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -441,6 +441,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->pmmu.end_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; prop->pmmu.page_size = PAGE_SIZE_4KB; + prop->pmmu.num_hops = MMU_ARCH_5_HOPS; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 46a900fb3ef8..6f831cc54d37 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -426,12 +426,14 @@ int goya_get_fixed_properties(struct hl_device *hdev) prop->dmmu.start_addr = VA_DDR_SPACE_START; prop->dmmu.end_addr = VA_DDR_SPACE_END; prop->dmmu.page_size = PAGE_SIZE_2MB; + prop->dmmu.num_hops = MMU_ARCH_5_HOPS; /* shifts and masks are the same in PMMU and DMMU */ memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu)); prop->pmmu.start_addr = VA_HOST_SPACE_START; prop->pmmu.end_addr = VA_HOST_SPACE_END; prop->pmmu.page_size = PAGE_SIZE_4KB; + prop->pmmu.num_hops = MMU_ARCH_5_HOPS; /* PMMU and HPMMU are the same except of page size */ memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h index 468bb045fbd1..dedf20e8f956 100644 --- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h +++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h @@ -29,6 +29,8 @@ #define HOP3_SHIFT 21 #define HOP4_SHIFT 12 +#define MMU_ARCH_5_HOPS 5 + #define HOP_PHYS_ADDR_MASK (~FLAGS_MASK) #define HL_PTE_SIZE sizeof(u64) -- cgit v1.2.3 From fa8641a14f2841e1712e554ebfa58f1ac7b7db1b Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 7 Sep 2020 17:36:41 +0300 Subject: habanalabs: Save context in a command buffer object Future changes require using a context while handling a command buffer, and thus need to save the context in the command buffer object. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_buffer.c | 22 +++++++++++++--------- drivers/misc/habanalabs/common/debugfs.c | 2 +- drivers/misc/habanalabs/common/habanalabs.h | 9 +++++---- drivers/misc/habanalabs/gaudi/gaudi.c | 10 ++++++---- drivers/misc/habanalabs/goya/goya.c | 10 ++++++---- 5 files changed, 31 insertions(+), 22 deletions(-) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index ba63cee74050..0cb556fb4a8b 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -47,6 +47,8 @@ static void cb_release(struct kref *ref) hl_debugfs_remove_cb(cb); + hl_ctx_put(cb->ctx); + cb_do_release(hdev, cb); } @@ -107,11 +109,12 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, } int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, - u32 cb_size, u64 *handle, int ctx_id, bool internal_cb) + struct hl_ctx *ctx, u32 cb_size, bool internal_cb, + u64 *handle) { struct hl_cb *cb; bool alloc_new_cb = true; - int rc; + int rc, ctx_id = ctx->asid; /* * Can't use generic function to check this because of special case @@ -163,7 +166,8 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, } cb->hdev = hdev; - cb->ctx_id = ctx_id; + cb->ctx = ctx; + hl_ctx_get(hdev, cb->ctx); spin_lock(&mgr->cb_lock); rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); @@ -191,6 +195,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, return 0; release_cb: + hl_ctx_put(cb->ctx); cb_do_release(hdev, cb); out_err: *handle = 0; @@ -250,9 +255,8 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) args->in.cb_size, HL_MAX_CB_SIZE); rc = -EINVAL; } else { - rc = hl_cb_create(hdev, &hpriv->cb_mgr, - args->in.cb_size, &handle, - hpriv->ctx->asid, false); + rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx, + args->in.cb_size, false, &handle); } memset(args, 0, sizeof(*args)); @@ -424,7 +428,7 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr) if (kref_put(&cb->refcount, cb_release) != 1) dev_err(hdev->dev, "CB %d for CTX ID %d is still alive\n", - id, cb->ctx_id); + id, cb->ctx->asid); } idr_destroy(&mgr->cb_handles); @@ -437,8 +441,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, struct hl_cb *cb; int rc; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle, - HL_KERNEL_ASID_ID, internal_cb); + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size, + internal_cb, &cb_handle); if (rc) { dev_err(hdev->dev, "Failed to allocate CB for the kernel driver %d\n", rc); diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 5d4665d2a0fb..912ddfa360b1 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -111,7 +111,7 @@ static int command_buffers_show(struct seq_file *s, void *data) } seq_printf(s, " %03llu %d 0x%08x %d %d %d\n", - cb->id, cb->ctx_id, cb->size, + cb->id, cb->ctx->asid, cb->size, kref_read(&cb->refcount), cb->mmap, cb->cs_cnt); } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 8ef61926ed3b..88c68b664ef6 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -417,6 +417,7 @@ struct hl_cb_mgr { * struct hl_cb - describes a Command Buffer. * @refcount: reference counter for usage of the CB. * @hdev: pointer to device this CB belongs to. + * @ctx: pointer to the CB owner's context. * @lock: spinlock to protect mmap/cs flows. * @debugfs_list: node in debugfs list of command buffers. * @pool_list: node in pool list of command buffers. @@ -426,7 +427,6 @@ struct hl_cb_mgr { * @mmap_size: Holds the CB's size that was mmaped. * @size: holds the CB's size. * @cs_cnt: holds number of CS that this CB participates in. - * @ctx_id: holds the ID of the owner's context. * @mmap: true if the CB is currently mmaped to user. * @is_pool: true if CB was acquired from the pool, false otherwise. * @is_internal: internaly allocated @@ -434,6 +434,7 @@ struct hl_cb_mgr { struct hl_cb { struct kref refcount; struct hl_device *hdev; + struct hl_ctx *ctx; spinlock_t lock; struct list_head debugfs_list; struct list_head pool_list; @@ -443,7 +444,6 @@ struct hl_cb { u32 mmap_size; u32 size; u32 cs_cnt; - u32 ctx_id; u8 mmap; u8 is_pool; u8 is_internal; @@ -1838,8 +1838,9 @@ void hl_sysfs_fini(struct hl_device *hdev); int hl_hwmon_init(struct hl_device *hdev); void hl_hwmon_fini(struct hl_device *hdev); -int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size, - u64 *handle, int ctx_id, bool internal_cb); +int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, + struct hl_ctx *ctx, u32 cb_size, bool internal_cb, + u64 *handle); int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle); int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index bf010ff31ced..b51cc6c1d541 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -4114,8 +4114,9 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev, parser->patched_cb_size = parser->user_cb_size + sizeof(struct packet_msg_prot) * 2; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size, - &patched_cb_handle, HL_KERNEL_ASID_ID, false); + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, + parser->patched_cb_size, false, + &patched_cb_handle); if (rc) { dev_err(hdev->dev, @@ -4187,8 +4188,9 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, if (rc) goto free_userptr; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size, - &patched_cb_handle, HL_KERNEL_ASID_ID, false); + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, + parser->patched_cb_size, false, + &patched_cb_handle); if (rc) { dev_err(hdev->dev, "Failed to allocate patched CB for DMA CS %d\n", rc); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 6f831cc54d37..6c81a4b148de 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3810,8 +3810,9 @@ static int goya_parse_cb_mmu(struct hl_device *hdev, parser->patched_cb_size = parser->user_cb_size + sizeof(struct packet_msg_prot) * 2; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size, - &patched_cb_handle, HL_KERNEL_ASID_ID, false); + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, + parser->patched_cb_size, false, + &patched_cb_handle); if (rc) { dev_err(hdev->dev, @@ -3883,8 +3884,9 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev, if (rc) goto free_userptr; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, parser->patched_cb_size, - &patched_cb_handle, HL_KERNEL_ASID_ID, false); + rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, + parser->patched_cb_size, false, + &patched_cb_handle); if (rc) { dev_err(hdev->dev, "Failed to allocate patched CB for DMA CS %d\n", rc); -- cgit v1.2.3 From ef6a0f6caa4a5dbfbb42b642e23fb06182798d30 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Thu, 9 Jul 2020 16:17:48 +0300 Subject: habanalabs: Add an option to map CB to device MMU There are cases in which the device should access the host memory of a CB through the device MMU, and thus this memory should be mapped. The patch adds a flag to the CB IOCTL, in which a user can ask the driver to perform the mapping when creating a CB. The mapping is allowed only if a dedicated VA range was allocated for the specific ASIC. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_buffer.c | 196 +++++++++++++++++++++++- drivers/misc/habanalabs/common/context.c | 12 +- drivers/misc/habanalabs/common/habanalabs.h | 20 ++- drivers/misc/habanalabs/gaudi/gaudi.c | 4 +- drivers/misc/habanalabs/goya/goya.c | 4 +- include/uapi/misc/habanalabs.h | 12 +- 6 files changed, 237 insertions(+), 11 deletions(-) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 0cb556fb4a8b..901e213daf40 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -13,6 +13,131 @@ #include #include +static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_vm_va_block *va_block, *tmp; + dma_addr_t bus_addr; + u64 virt_addr; + u32 page_size = prop->pmmu.page_size; + s32 offset; + int rc; + + if (!hdev->supports_cb_mapping) { + dev_err_ratelimited(hdev->dev, + "Cannot map CB because no VA range is allocated for CB mapping\n"); + return -EINVAL; + } + + if (!hdev->mmu_enable) { + dev_err_ratelimited(hdev->dev, + "Cannot map CB because MMU is disabled\n"); + return -EINVAL; + } + + INIT_LIST_HEAD(&cb->va_block_list); + + for (bus_addr = cb->bus_address; + bus_addr < cb->bus_address + cb->size; + bus_addr += page_size) { + + virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size); + if (!virt_addr) { + dev_err(hdev->dev, + "Failed to allocate device virtual address for CB\n"); + rc = -ENOMEM; + goto err_va_pool_free; + } + + va_block = kzalloc(sizeof(*va_block), GFP_KERNEL); + if (!va_block) { + rc = -ENOMEM; + gen_pool_free(ctx->cb_va_pool, virt_addr, page_size); + goto err_va_pool_free; + } + + va_block->start = virt_addr; + va_block->end = virt_addr + page_size; + va_block->size = page_size; + list_add_tail(&va_block->node, &cb->va_block_list); + } + + mutex_lock(&ctx->mmu_lock); + + bus_addr = cb->bus_address; + offset = 0; + list_for_each_entry(va_block, &cb->va_block_list, node) { + rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size, + list_is_last(&va_block->node, + &cb->va_block_list)); + if (rc) { + dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", + va_block->start); + goto err_va_umap; + } + + bus_addr += va_block->size; + offset += va_block->size; + } + + hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR); + + mutex_unlock(&ctx->mmu_lock); + + cb->is_mmu_mapped = true; + + return 0; + +err_va_umap: + list_for_each_entry(va_block, &cb->va_block_list, node) { + if (offset <= 0) + break; + hl_mmu_unmap(ctx, va_block->start, va_block->size, + offset <= va_block->size); + offset -= va_block->size; + } + + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + + mutex_unlock(&ctx->mmu_lock); + +err_va_pool_free: + list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { + gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); + list_del(&va_block->node); + kfree(va_block); + } + + return rc; +} + +static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm_va_block *va_block, *tmp; + + mutex_lock(&ctx->mmu_lock); + + list_for_each_entry(va_block, &cb->va_block_list, node) + if (hl_mmu_unmap(ctx, va_block->start, va_block->size, + list_is_last(&va_block->node, + &cb->va_block_list))) + dev_warn_ratelimited(hdev->dev, + "Failed to unmap CB's va 0x%llx\n", + va_block->start); + + hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); + + mutex_unlock(&ctx->mmu_lock); + + list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { + gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); + list_del(&va_block->node); + kfree(va_block); + } +} + static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) { if (cb->is_internal) @@ -47,6 +172,9 @@ static void cb_release(struct kref *ref) hl_debugfs_remove_cb(cb); + if (cb->is_mmu_mapped) + cb_unmap_mem(cb->ctx, cb); + hl_ctx_put(cb->ctx); cb_do_release(hdev, cb); @@ -110,7 +238,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, struct hl_ctx *ctx, u32 cb_size, bool internal_cb, - u64 *handle) + bool map_cb, u64 *handle) { struct hl_cb *cb; bool alloc_new_cb = true; @@ -169,13 +297,26 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, cb->ctx = ctx; hl_ctx_get(hdev, cb->ctx); + if (map_cb) { + if (ctx_id == HL_KERNEL_ASID_ID) { + dev_err(hdev->dev, + "CB mapping is not supported for kernel context\n"); + rc = -EINVAL; + goto release_cb; + } + + rc = cb_map_mem(ctx, cb); + if (rc) + goto release_cb; + } + spin_lock(&mgr->cb_lock); rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); spin_unlock(&mgr->cb_lock); if (rc < 0) { dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n"); - goto release_cb; + goto unmap_mem; } cb->id = (u64) rc; @@ -194,6 +335,9 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, return 0; +unmap_mem: + if (cb->is_mmu_mapped) + cb_unmap_mem(cb->ctx, cb); release_cb: hl_ctx_put(cb->ctx); cb_do_release(hdev, cb); @@ -256,7 +400,9 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) rc = -EINVAL; } else { rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx, - args->in.cb_size, false, &handle); + args->in.cb_size, false, + !!(args->in.flags & HL_CB_FLAGS_MAP), + &handle); } memset(args, 0, sizeof(*args)); @@ -442,7 +588,7 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, int rc; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size, - internal_cb, &cb_handle); + internal_cb, false, &cb_handle); if (rc) { dev_err(hdev->dev, "Failed to allocate CB for the kernel driver %d\n", rc); @@ -498,3 +644,45 @@ int hl_cb_pool_fini(struct hl_device *hdev) return 0; } + +int hl_cb_va_pool_init(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + int rc; + + if (!hdev->supports_cb_mapping) + return 0; + + ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1); + if (!ctx->cb_va_pool) { + dev_err(hdev->dev, + "Failed to create VA gen pool for CB mapping\n"); + return -ENOMEM; + } + + rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr, + prop->cb_va_end_addr - prop->cb_va_start_addr, -1); + if (rc) { + dev_err(hdev->dev, + "Failed to add memory to VA gen pool for CB mapping\n"); + goto err_pool_destroy; + } + + return 0; + +err_pool_destroy: + gen_pool_destroy(ctx->cb_va_pool); + + return rc; +} + +void hl_cb_va_pool_fini(struct hl_ctx *ctx) +{ + struct hl_device *hdev = ctx->hdev; + + if (!hdev->supports_cb_mapping) + return; + + gen_pool_destroy(ctx->cb_va_pool); +} diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c index b168a9fce817..df8171a2226c 100644 --- a/drivers/misc/habanalabs/common/context.c +++ b/drivers/misc/habanalabs/common/context.c @@ -37,6 +37,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) if ((hdev->in_debug) && (hdev->compute_ctx == ctx)) hl_device_set_debug_mode(hdev, false); + hl_cb_va_pool_fini(ctx); hl_vm_ctx_fini(ctx); hl_asid_free(hdev, ctx->asid); } else { @@ -155,15 +156,24 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) goto err_asid_free; } + rc = hl_cb_va_pool_init(ctx); + if (rc) { + dev_err(hdev->dev, + "Failed to init VA pool for mapped CB\n"); + goto err_vm_ctx_fini; + } + rc = hdev->asic_funcs->ctx_init(ctx); if (rc) { dev_err(hdev->dev, "ctx_init failed\n"); - goto err_vm_ctx_fini; + goto err_cb_va_pool_fini; } } return 0; +err_cb_va_pool_fini: + hl_cb_va_pool_fini(ctx); err_vm_ctx_fini: hl_vm_ctx_fini(ctx); err_asid_free: diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 88c68b664ef6..eaa9bf3f82a3 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -291,6 +291,10 @@ struct hl_mmu_properties { * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register. * @mmu_pgt_addr: base physical address in DRAM of MMU page tables. * @mmu_dram_default_page_addr: DRAM default page physical address. + * @cb_va_start_addr: virtual start address of command buffers which are mapped + * to the device's MMU. + * @cb_va_end_addr: virtual end address of command buffers which are mapped to + * the device's MMU. * @mmu_pgt_size: MMU page tables total size. * @mmu_pte_size: PTE size in MMU page tables. * @mmu_hop_table_size: MMU hop table size. @@ -339,6 +343,8 @@ struct asic_fixed_properties { u64 pcie_aux_dbi_reg_addr; u64 mmu_pgt_addr; u64 mmu_dram_default_page_addr; + u64 cb_va_start_addr; + u64 cb_va_end_addr; u32 mmu_pgt_size; u32 mmu_pte_size; u32 mmu_hop_table_size; @@ -421,6 +427,8 @@ struct hl_cb_mgr { * @lock: spinlock to protect mmap/cs flows. * @debugfs_list: node in debugfs list of command buffers. * @pool_list: node in pool list of command buffers. + * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to + * the device's MMU. * @id: the CB's ID. * @kernel_address: Holds the CB's kernel virtual address. * @bus_address: Holds the CB's DMA address. @@ -430,6 +438,7 @@ struct hl_cb_mgr { * @mmap: true if the CB is currently mmaped to user. * @is_pool: true if CB was acquired from the pool, false otherwise. * @is_internal: internaly allocated + * @is_mmu_mapped: true if the CB is mapped to the device's MMU. */ struct hl_cb { struct kref refcount; @@ -438,6 +447,7 @@ struct hl_cb { spinlock_t lock; struct list_head debugfs_list; struct list_head pool_list; + struct list_head va_block_list; u64 id; u64 kernel_address; dma_addr_t bus_address; @@ -447,6 +457,7 @@ struct hl_cb { u8 mmap; u8 is_pool; u8 is_internal; + u8 is_mmu_mapped; }; @@ -843,6 +854,8 @@ struct hl_va_range { * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the * MMU hash or walking the PGT requires talking this lock. * @debugfs_list: node in debugfs list of contexts. + * @cb_va_pool: device VA pool for command buffers which are mapped to the + * device's MMU. * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed * to user so user could inquire about CS. It is used as * index to cs_pending array. @@ -874,6 +887,7 @@ struct hl_ctx { struct mutex mmu_lock; struct list_head debugfs_list; struct hl_cs_counters cs_counters; + struct gen_pool *cb_va_pool; u64 cs_sequence; u64 *dram_default_hops; spinlock_t cs_lock; @@ -1574,6 +1588,7 @@ struct hl_mmu_funcs { * @sync_stream_queue_idx: helper index for sync stream queues initialization. * @supports_coresight: is CoreSight supported. * @supports_soft_reset: is soft reset supported. + * @supports_cb_mapping: is mapping a CB to the device's MMU supported. */ struct hl_device { struct pci_dev *pdev; @@ -1673,6 +1688,7 @@ struct hl_device { u8 sync_stream_queue_idx; u8 supports_coresight; u8 supports_soft_reset; + u8 supports_cb_mapping; /* Parameters for bring-up */ u8 mmu_enable; @@ -1840,7 +1856,7 @@ void hl_hwmon_fini(struct hl_device *hdev); int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, struct hl_ctx *ctx, u32 cb_size, bool internal_cb, - u64 *handle); + bool map_cb, u64 *handle); int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle); int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma); struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, @@ -1852,6 +1868,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, bool internal_cb); int hl_cb_pool_init(struct hl_device *hdev); int hl_cb_pool_fini(struct hl_device *hdev); +int hl_cb_va_pool_init(struct hl_ctx *ctx); +void hl_cb_va_pool_fini(struct hl_ctx *ctx); void hl_cs_rollback_all(struct hl_device *hdev); struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index b51cc6c1d541..6f7f6ad7a358 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -4115,7 +4115,7 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev, sizeof(struct packet_msg_prot) * 2; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, - parser->patched_cb_size, false, + parser->patched_cb_size, false, false, &patched_cb_handle); if (rc) { @@ -4189,7 +4189,7 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, goto free_userptr; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, - parser->patched_cb_size, false, + parser->patched_cb_size, false, false, &patched_cb_handle); if (rc) { dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 6c81a4b148de..5cddd46a8fb8 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3811,7 +3811,7 @@ static int goya_parse_cb_mmu(struct hl_device *hdev, sizeof(struct packet_msg_prot) * 2; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, - parser->patched_cb_size, false, + parser->patched_cb_size, false, false, &patched_cb_handle); if (rc) { @@ -3885,7 +3885,7 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev, goto free_userptr; rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, - parser->patched_cb_size, false, + parser->patched_cb_size, false, false, &patched_cb_handle); if (rc) { dev_err(hdev->dev, diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index d449f8a31ce6..9705b8adb60c 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -462,6 +462,9 @@ struct hl_info_args { /* 2MB minus 32 bytes for 2xMSG_PROT */ #define HL_MAX_CB_SIZE (0x200000 - 32) +/* Indicates whether the command buffer should be mapped to the device's MMU */ +#define HL_CB_FLAGS_MAP 0x1 + struct hl_cb_in { /* Handle of CB or 0 if we want to create one */ __u64 cb_handle; @@ -473,7 +476,8 @@ struct hl_cb_in { __u32 cb_size; /* Context ID - Currently not in use */ __u32 ctx_id; - __u32 pad; + /* HL_CB_FLAGS_* */ + __u32 flags; }; struct hl_cb_out { @@ -856,6 +860,12 @@ struct hl_debug_args { * When creating a new CB, the IOCTL returns a handle of it, and the user-space * process needs to use that handle to mmap the buffer so it can access them. * + * In some instances, the device must access the command buffer through the + * device's MMU, and thus its memory should be mapped. In these cases, user can + * indicate the driver that such a mapping is required. + * The resulting device virtual address will be used internally by the driver, + * and won't be returned to user. + * */ #define HL_IOCTL_CB \ _IOWR('H', 0x02, union hl_cb_args) -- cgit v1.2.3 From 57799ce9f85c23ecbbc9d356969ea54bd6b8a647 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 13 Sep 2020 15:51:28 +0300 Subject: habanalabs: add indication of security-enabled F/W Future F/W versions will have enhanced security measures and the driver won't be able to do certain configurations that it always did and those configurations will be done by the firmware. We use the firmware's preboot version to determine whether security measures are enabled or not. Because we need this very early in our code, the read of the preboot version is moved to the earliest possible place, right after the device's PCI initialization. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 55 ++++++++++++++++++++++++---- drivers/misc/habanalabs/common/habanalabs.h | 8 +++- drivers/misc/habanalabs/common/pci.c | 16 +++++++- drivers/misc/habanalabs/gaudi/gaudi.c | 28 +++++++++----- drivers/misc/habanalabs/goya/goya.c | 7 +++- 5 files changed, 94 insertions(+), 20 deletions(-) (limited to 'drivers/misc/habanalabs/goya') diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 4409962d30ae..cd41c7ceb0e7 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -474,8 +474,11 @@ static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg) "Device boot error - NIC F/W initialization failed\n"); } -static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status) +static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) { + /* Some of the status codes below are deprecated in newer f/w + * versions but we keep them here for backward compatibility + */ switch (status) { case CPU_BOOT_STATUS_NA: dev_err(hdev->dev, @@ -521,6 +524,48 @@ static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status) } } +int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 boot_err0_reg, u32 timeout) +{ + u32 status; + int rc; + + if (!hdev->cpu_enable) + return 0; + + /* Need to check two possible scenarios: + * + * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where + * the preboot is waiting for the boot fit + * + * All other status values - for older firmwares where the uboot was + * loaded from the FLASH + */ + rc = hl_poll_timeout( + hdev, + cpu_boot_status_reg, + status, + (status == CPU_BOOT_STATUS_IN_UBOOT) || + (status == CPU_BOOT_STATUS_DRAM_RDY) || + (status == CPU_BOOT_STATUS_NIC_FW_RDY) || + (status == CPU_BOOT_STATUS_READY_TO_BOOT) || + (status == CPU_BOOT_STATUS_SRAM_AVAIL) || + (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT), + 10000, + timeout); + + if (rc) { + dev_err(hdev->dev, "Failed to read preboot version\n"); + detect_cpu_boot_status(hdev, status); + fw_read_errors(hdev, boot_err0_reg); + return -EIO; + } + + hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT); + + return 0; +} + int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, u32 boot_err0_reg, bool skip_bmc, @@ -586,15 +631,11 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, 10000, cpu_timeout); - /* Read U-Boot, preboot versions now in case we will later fail */ + /* Read U-Boot version now in case we will later fail */ hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT); - hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT); - /* Some of the status codes below are deprecated in newer f/w - * versions but we keep them here for backward compatibility - */ if (rc) { - hl_detect_cpu_boot_status(hdev, status); + detect_cpu_boot_status(hdev, status); rc = -EIO; goto out; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index eaa9bf3f82a3..80d4d7385ffe 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -320,6 +320,8 @@ struct hl_mmu_properties { * @first_available_user_mon: first monitor available for the user * @tpc_enabled_mask: which TPCs are enabled. * @completion_queues_count: number of completion queues. + * @fw_security_disabled: true if security measures are disabled in firmware, + * false otherwise */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -370,6 +372,7 @@ struct asic_fixed_properties { u16 first_available_user_mon[HL_MAX_DCORES]; u8 tpc_enabled_mask; u8 completion_queues_count; + u8 fw_security_disabled; }; /** @@ -1933,6 +1936,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, u32 msg_to_cpu_reg, u32 cpu_msg_status_reg, u32 boot_err0_reg, bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout); +int hl_fw_read_preboot_ver(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 boot_err0_reg, u32 timeout); int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], bool is_wc[3]); @@ -1941,7 +1946,8 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region, struct hl_inbound_pci_region *pci_region); int hl_pci_set_outbound_region(struct hl_device *hdev, struct hl_outbound_pci_region *pci_region); -int hl_pci_init(struct hl_device *hdev); +int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 boot_err0_reg, u32 preboot_ver_timeout); void hl_pci_fini(struct hl_device *hdev); long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); diff --git a/drivers/misc/habanalabs/common/pci.c b/drivers/misc/habanalabs/common/pci.c index 923b2606e29f..4327e5704ebb 100644 --- a/drivers/misc/habanalabs/common/pci.c +++ b/drivers/misc/habanalabs/common/pci.c @@ -338,12 +338,17 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev) /** * hl_pci_init() - PCI initialization code. * @hdev: Pointer to hl_device structure. + * @cpu_boot_status_reg: status register of the device's CPU + * @boot_err0_reg: boot error register of the device's CPU + * @preboot_ver_timeout: how much to wait before bailing out on reading + * the preboot version * * Set DMA masks, initialize the PCI controller and map the PCI BARs. * * Return: 0 on success, non-zero for failure. */ -int hl_pci_init(struct hl_device *hdev) +int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg, + u32 boot_err0_reg, u32 preboot_ver_timeout) { struct pci_dev *pdev = hdev->pdev; int rc; @@ -375,6 +380,15 @@ int hl_pci_init(struct hl_device *hdev) if (rc) goto unmap_pci_bars; + /* Before continuing in the initialization, we need to read the preboot + * version to determine whether we run with a security-enabled firmware + * The check will be done in each ASIC's specific code + */ + rc = hl_fw_read_preboot_ver(hdev, cpu_boot_status_reg, boot_err0_reg, + preboot_ver_timeout); + if (rc) + goto unmap_pci_bars; + return 0; unmap_pci_bars: diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 86650be5898f..1b51e670bd4e 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -599,10 +599,15 @@ static int gaudi_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); - rc = hl_pci_init(hdev); + rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, + mmCPU_BOOT_ERR0, GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC); if (rc) goto free_queue_props; + /* GAUDI Firmware does not yet support security */ + prop->fw_security_disabled = true; + dev_info(hdev->dev, "firmware-level security is disabled\n"); + return 0; free_queue_props: @@ -2871,6 +2876,18 @@ static void gaudi_pre_hw_init(struct hl_device *hdev) /* Perform read from the device to make sure device is up */ RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG); + /* Set the access through PCI bars (Linux driver only) as + * secured + */ + WREG32(mmPCIE_WRAP_LBW_PROT_OVR, + (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | + PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); + + /* Perform read to flush the waiting writes to ensure + * configuration was set in the device + */ + RREG32(mmPCIE_WRAP_LBW_PROT_OVR); + /* * Let's mark in the H/W that we have reached this point. We check * this value in the reset_before_init function to understand whether @@ -2879,15 +2896,6 @@ static void gaudi_pre_hw_init(struct hl_device *hdev) */ WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); - /* Set the access through PCI bars (Linux driver only) as secured */ - WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | - PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); - - /* Perform read to flush the waiting writes to ensure configuration - * was set in the device - */ - RREG32(mmPCIE_WRAP_LBW_PROT_OVR); - /* Configure the reset registers. Must be done as early as possible * in case we fail during H/W initialization */ diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 5cddd46a8fb8..5db52064ed9e 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -600,10 +600,15 @@ static int goya_early_init(struct hl_device *hdev) prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); - rc = hl_pci_init(hdev); + rc = hl_pci_init(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS, + mmCPU_BOOT_ERR0, GOYA_BOOT_FIT_REQ_TIMEOUT_USEC); if (rc) goto free_queue_props; + /* Goya Firmware does not support security */ + prop->fw_security_disabled = true; + dev_info(hdev->dev, "firmware-level security is disabled\n"); + if (!hdev->pldm) { val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS); if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK) -- cgit v1.2.3