diff options
author | Oded Gabbay <ogabbay@kernel.org> | 2022-02-17 17:07:03 +0300 |
---|---|---|
committer | Oded Gabbay <ogabbay@kernel.org> | 2022-02-28 15:22:05 +0300 |
commit | 26ef1c000bc21a192618c9ec651dd36ba63ca00c (patch) | |
tree | c0ce547df3d061a61db7b2fec60c6363413a1a41 /drivers/misc | |
parent | f23f280277d5a701de99c6652623b6bf8801c534 (diff) | |
download | linux-26ef1c000bc21a192618c9ec651dd36ba63ca00c.tar.xz |
habanalabs/gaudi: handle axi errors from NIC engines
Various AXI errors can occur in the NIC engines and are reported to
the driver by the f/w. Add code to print the errors and ack them to
the f/w.
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/misc')
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index a60c0e23d23c..95201d995743 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -7665,6 +7665,48 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev, fw_alive->thread_id, fw_alive->uptime_seconds); } +static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, + void *data) +{ + char desc[64] = "", *type; + struct eq_nic_sei_event *eq_nic_sei = data; + u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; + + switch (eq_nic_sei->axi_error_cause) { + case RXB: + type = "RXB"; + break; + case RXE: + type = "RXE"; + break; + case TXS: + type = "TXS"; + break; + case TXE: + type = "TXE"; + break; + case QPC_RESP: + type = "QPC_RESP"; + break; + case NON_AXI_ERR: + type = "NON_AXI_ERR"; + break; + case TMR: + type = "TMR"; + break; + default: + dev_err(hdev->dev, "unknown NIC AXI cause %d\n", + eq_nic_sei->axi_error_cause); + type = "N/A"; + break; + } + + snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, + eq_nic_sei->id); + dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", + event_type, desc); +} + static int gaudi_non_hard_reset_late_init(struct hl_device *hdev) { /* GAUDI doesn't support any reset except hard-reset */ @@ -7898,6 +7940,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) { struct gaudi_device *gaudi = hdev->asic_specific; + u64 data = le64_to_cpu(eq_entry->data[0]); u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); u32 fw_fatal_err_flag = 0; u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) @@ -8095,6 +8138,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: + gaudi_print_nic_axi_irq_info(hdev, event_type, &data); + hl_fw_unmask_irq(hdev, event_type); + break; + case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: gaudi_print_irq_info(hdev, event_type, false); gaudi_print_sm_sei_info(hdev, event_type, |