From bf23ffc8a9a777dfdeb04232e0946b803adbb6a9 Mon Sep 17 00:00:00 2001 From: Thinh Tran Date: Fri, 18 Aug 2023 11:14:40 -0500 Subject: bnx2x: new flag for track HW resource allocation While injecting PCIe errors to the upstream PCIe switch of a BCM57810 NIC, system hangs/crashes were observed. After several calls to bnx2x_tx_timout() complete, bnx2x_nic_unload() is called to free up HW resources and bnx2x_napi_disable() is called to release NAPI objects. Later, when the EEH driver calls bnx2x_io_slot_reset() to complete the recovery process, bnx2x attempts to disable NAPI again by calling bnx2x_napi_disable() and freeing resources which have already been freed, resulting in a hang or crash. Introduce a new flag to track the HW resource and NAPI allocation state, refactor duplicated code into a single function, check page pool allocation status before freeing, and reduces debug output when a TX timeout event occurs. Reviewed-by: Manish Chopra Tested-by: Abdul Haleem Tested-by: David Christensen Reviewed-by: Simon Horman Tested-by: Venkata Sai Duggi Signed-off-by: Thinh Tran Link: https://lore.kernel.org/r/20230818161443.708785-2-thinhtr@linux.vnet.ibm.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 ++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 21 ++++++++++------ drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 32 ++++++++++++++---------- drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 17 +++++++------ 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 8bcde0a6e011..e2a4e1088b7f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1508,6 +1508,8 @@ struct bnx2x { bool cnic_loaded; struct cnic_eth_dev *(*cnic_probe)(struct net_device *); + bool nic_stopped; + /* Flag that indicates that we can start looking for FCoE L2 queue * completions in the default status block. */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 6ea5521074d3..e9c1e1bb5580 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) bnx2x_add_all_napi(bp); DP(NETIF_MSG_IFUP, "napi added\n"); bnx2x_napi_enable(bp); + bp->nic_stopped = false; if (IS_PF(bp)) { /* set pf load just before approaching the MCP */ @@ -2960,6 +2961,7 @@ load_error2: load_error1: bnx2x_napi_disable(bp); bnx2x_del_all_napi(bp); + bp->nic_stopped = true; /* clear pf_load status, as it was already set */ if (IS_PF(bp)) @@ -3095,14 +3097,17 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) if (!CHIP_IS_E1x(bp)) bnx2x_pf_disable(bp); - /* Disable HW interrupts, NAPI */ - bnx2x_netif_stop(bp, 1); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); - /* Release IRQs */ - bnx2x_free_irq(bp); + if (!bp->nic_stopped) { + /* Disable HW interrupts, NAPI */ + bnx2x_netif_stop(bp, 1); + /* Delete all NAPI objects */ + bnx2x_del_all_napi(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); + /* Release IRQs */ + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } /* Report UNLOAD_DONE to MCP */ bnx2x_send_unload_done(bp, false); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 1e7a6f1d4223..0d8e61c63c7c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -9474,15 +9474,18 @@ unload_error: } } - /* Disable HW interrupts, NAPI */ - bnx2x_netif_stop(bp, 1); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); + if (!bp->nic_stopped) { + /* Disable HW interrupts, NAPI */ + bnx2x_netif_stop(bp, 1); + /* Delete all NAPI objects */ + bnx2x_del_all_napi(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); - /* Release IRQs */ - bnx2x_free_irq(bp); + /* Release IRQs */ + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } /* Reset the chip, unless PCI function is offline. If we reach this * point following a PCI error handling, it means device is really @@ -14238,13 +14241,16 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) } bnx2x_drain_tx_queues(bp); bnx2x_send_unload_req(bp, UNLOAD_RECOVERY); - bnx2x_netif_stop(bp, 1); - bnx2x_del_all_napi(bp); + if (!bp->nic_stopped) { + bnx2x_netif_stop(bp, 1); + bnx2x_del_all_napi(bp); - if (CNIC_LOADED(bp)) - bnx2x_del_all_napi_cnic(bp); + if (CNIC_LOADED(bp)) + bnx2x_del_all_napi_cnic(bp); - bnx2x_free_irq(bp); + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } /* Report UNLOAD_DONE to MCP */ bnx2x_send_unload_done(bp, true); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 0657a0f5170f..8946a931e87e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -529,13 +529,16 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp) bnx2x_vfpf_finalize(bp, &req->first_tlv); free_irq: - /* Disable HW interrupts, NAPI */ - bnx2x_netif_stop(bp, 0); - /* Delete all NAPI objects */ - bnx2x_del_all_napi(bp); - - /* Release IRQs */ - bnx2x_free_irq(bp); + if (!bp->nic_stopped) { + /* Disable HW interrupts, NAPI */ + bnx2x_netif_stop(bp, 0); + /* Delete all NAPI objects */ + bnx2x_del_all_napi(bp); + + /* Release IRQs */ + bnx2x_free_irq(bp); + bp->nic_stopped = true; + } } static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf, -- cgit v1.2.3