diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-06-10 04:15:50 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-06-10 04:15:51 +0300 |
| commit | 61abe5db23f0799e602352c796de6fcf76d23b41 (patch) | |
| tree | aab135dbf5fccc712bd89df915408c28e2da7a55 | |
| parent | 41d3e102edc2a6de73f0a32de6ce9a11243c14fa (diff) | |
| parent | a96c84126dc38e8d09aa93d165b30dc50c628562 (diff) | |
| download | linux-61abe5db23f0799e602352c796de6fcf76d23b41.tar.xz | |
Merge branch 'net-add-retry-mechanism-to-ndo_set_rx_mode_async'
Stanislav Fomichev says:
====================
net: add retry mechanism to ndo_set_rx_mode_async
Original async ndo_set_rx_mode work left one place where we do netdev_WARN
in response to a ENOMEM. The intent was to see whether actual real
users can hit that (adding uc/mc under memory pressure seems like a
very unlikely thing to do). However, it was quickly triggered by
syzbot's failslab. Add a retry mechanism and downgrade netdev_WARN
to netdev_err. The retry logic is a typical exponential backoff:
1, 2, 4, 8 seconds, 15 in total, hopefully enough for a system to resolve
memory pressure.
====================
Link: https://patch.msgid.link/20260608154014.227538-1-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 9 | ||||
| -rw-r--r-- | drivers/net/dummy.c | 7 | ||||
| -rw-r--r-- | drivers/net/ethernet/broadcom/bnxt/bnxt.c | 40 | ||||
| -rw-r--r-- | drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 | ||||
| -rw-r--r-- | drivers/net/ethernet/intel/iavf/iavf_main.c | 10 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 | ||||
| -rw-r--r-- | drivers/net/ethernet/meta/fbnic/fbnic_netdev.c | 8 | ||||
| -rw-r--r-- | drivers/net/netdevsim/netdev.c | 7 | ||||
| -rw-r--r-- | drivers/net/netkit.c | 7 | ||||
| -rw-r--r-- | include/linux/netdevice.h | 16 | ||||
| -rw-r--r-- | net/core/dev.c | 4 | ||||
| -rw-r--r-- | net/core/dev.h | 2 | ||||
| -rw-r--r-- | net/core/dev_addr_lists.c | 53 |
13 files changed, 116 insertions, 57 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3e1e1e861739..16a015b67206 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1297,18 +1297,19 @@ static int ipoib_hard_header(struct sk_buff *skb, return IPOIB_HARD_LEN; } -static void ipoib_set_rx_mode_async(struct net_device *dev, - struct netdev_hw_addr_list *uc, - struct netdev_hw_addr_list *mc) +static int ipoib_set_rx_mode_async(struct net_device *dev, + struct netdev_hw_addr_list *uc, + struct netdev_hw_addr_list *mc) { struct ipoib_dev_priv *priv = ipoib_priv(dev); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set"); - return; + return 0; } queue_work(priv->wq, &priv->restart_task); + return 0; } static int ipoib_get_iflink(const struct net_device *dev) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index f6732eab5923..36a4e85c6668 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -47,10 +47,11 @@ static int numdummies = 1; /* fake multicast ability */ -static void set_multicast_list(struct net_device *dev, - struct netdev_hw_addr_list *uc, - struct netdev_hw_addr_list *mc) +static int set_multicast_list(struct net_device *dev, + struct netdev_hw_addr_list *uc, + struct netdev_hw_addr_list *mc) { + return 0; } static void dummy_get_stats64(struct net_device *dev, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index dbaf9586ee22..1eb214e4b511 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11232,8 +11232,12 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) } rc = bnxt_cfg_rx_mode(bp, &bp->dev->uc, true); - if (rc) + if (rc == -EAGAIN) { + netif_rx_mode_schedule_retry(bp->dev); + rc = 0; + } else if (rc) { goto err_out; + } skip_rx_mask: rc = bnxt_hwrm_set_coal(bp); @@ -13679,9 +13683,9 @@ static bool bnxt_uc_list_updated(struct bnxt *bp, return false; } -static void bnxt_set_rx_mode(struct net_device *dev, - struct netdev_hw_addr_list *uc, - struct netdev_hw_addr_list *mc) +static int bnxt_set_rx_mode(struct net_device *dev, + struct netdev_hw_addr_list *uc, + struct netdev_hw_addr_list *mc) { struct bnxt *bp = netdev_priv(dev); struct bnxt_vnic_info *vnic; @@ -13690,7 +13694,7 @@ static void bnxt_set_rx_mode(struct net_device *dev, u32 mask; if (!test_bit(BNXT_STATE_OPEN, &bp->state)) - return; + return 0; vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT]; mask = vnic->rx_mask; @@ -13716,8 +13720,10 @@ static void bnxt_set_rx_mode(struct net_device *dev, if (mask != vnic->rx_mask || uc_update || mc_update) { vnic->rx_mask = mask; - bnxt_cfg_rx_mode(bp, uc, uc_update); + return bnxt_cfg_rx_mode(bp, uc, uc_update); } + + return 0; } static int bnxt_cfg_rx_mode(struct bnxt *bp, struct netdev_hw_addr_list *uc, @@ -13756,11 +13762,10 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp, struct netdev_hw_addr_list *uc, rc = bnxt_hwrm_set_vnic_filter(bp, 0, i, vnic->uc_list + off); if (rc) { if (BNXT_VF(bp) && rc == -ENODEV) { - if (!test_and_set_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state)) - netdev_warn(bp->dev, "Cannot configure L2 filters while PF is unavailable, will retry\n"); - else - netdev_dbg(bp->dev, "PF still unavailable while configuring L2 filters.\n"); - rc = 0; + netdev_warn(bp->dev, "Cannot configure L2 filters while PF is unavailable, will retry\n"); + rc = -EAGAIN; + } else if (rc == -EAGAIN) { + netdev_warn(bp->dev, "FW busy while setting vnic filter, will retry\n"); } else { netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n", rc); } @@ -13768,8 +13773,6 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp, struct netdev_hw_addr_list *uc, return rc; } } - if (test_and_clear_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state)) - netdev_notice(bp->dev, "Retry of L2 filter configuration successful.\n"); skip_uc: if ((vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS) && @@ -14360,9 +14363,6 @@ static void bnxt_timer(struct timer_list *t) } } - if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state)) - bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT); - if ((BNXT_CHIP_P5(bp)) && !bp->chip_rev && netif_carrier_ok(dev)) bnxt_queue_sp_work(bp, BNXT_RING_COAL_NOW_SP_EVENT); @@ -14725,7 +14725,6 @@ static void bnxt_ulp_restart(struct bnxt *bp) static void bnxt_sp_task(struct work_struct *work) { struct bnxt *bp = container_of(work, struct bnxt, sp_task); - struct net_device *dev = bp->dev; set_bit(BNXT_STATE_IN_SP_TASK, &bp->state); smp_mb__after_atomic(); @@ -14803,13 +14802,6 @@ static void bnxt_sp_task(struct work_struct *work) /* These functions below will clear BNXT_STATE_IN_SP_TASK. They * must be the last functions to be called before exiting. */ - if (test_and_clear_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event)) { - bnxt_lock_sp(bp); - if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_cfg_rx_mode(bp, &dev->uc, true); - bnxt_unlock_sp(bp); - } - if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event)) bnxt_reset(bp, false); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 1920a161841e..29ff5a584d16 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2467,7 +2467,6 @@ struct bnxt { #define BNXT_STATE_DRV_REGISTERED 7 #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8 #define BNXT_STATE_NAPI_DISABLED 9 -#define BNXT_STATE_L2_FILTER_RETRY 10 #define BNXT_STATE_FW_ACTIVATE 11 #define BNXT_STATE_RECOVER 12 #define BNXT_STATE_FW_NON_FATAL_COND 13 @@ -2622,7 +2621,6 @@ struct bnxt { struct work_struct sp_task; unsigned long sp_event; -#define BNXT_RX_MASK_SP_EVENT 0 #define BNXT_RX_NTP_FLTR_SP_EVENT 1 #define BNXT_LINK_CHNG_SP_EVENT 2 #define BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT 3 diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 8b53ffb75650..29b8403a066b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1134,10 +1134,12 @@ bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter) * @netdev: network interface device structure * @uc: snapshot of uc address list * @mc: snapshot of mc address list + * + * Return: 0 on success. **/ -static void iavf_set_rx_mode(struct net_device *netdev, - struct netdev_hw_addr_list *uc, - struct netdev_hw_addr_list *mc) +static int iavf_set_rx_mode(struct net_device *netdev, + struct netdev_hw_addr_list *uc, + struct netdev_hw_addr_list *mc) { struct iavf_adapter *adapter = netdev_priv(netdev); @@ -1150,6 +1152,8 @@ static void iavf_set_rx_mode(struct net_device *netdev, if (iavf_promiscuous_mode_changed(adapter)) adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE; spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock); + + return 0; } /** diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c5d26c6829a0..775f0c6e55c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4142,13 +4142,15 @@ static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); } -static void mlx5e_set_rx_mode(struct net_device *dev, - struct netdev_hw_addr_list *uc, - struct netdev_hw_addr_list *mc) +static int mlx5e_set_rx_mode(struct net_device *dev, + struct netdev_hw_addr_list *uc, + struct netdev_hw_addr_list *mc) { struct mlx5e_priv *priv = netdev_priv(dev); mlx5e_fs_set_rx_mode_work(priv->fs, dev, uc, mc); + + return 0; } static int mlx5e_set_mac(struct net_device *netdev, void *addr) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index f99ca551c1ce..dd77ab6052c8 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -240,9 +240,9 @@ void __fbnic_set_rx_mode(struct fbnic_dev *fbd, fbnic_write_tce_tcam(fbd); } -static void fbnic_set_rx_mode(struct net_device *netdev, - struct netdev_hw_addr_list *uc, - struct netdev_hw_addr_list *mc) +static int fbnic_set_rx_mode(struct net_device *netdev, + struct netdev_hw_addr_list *uc, + struct netdev_hw_addr_list *mc) { struct fbnic_net *fbn = netdev_priv(netdev); struct fbnic_dev *fbd = fbn->fbd; @@ -250,6 +250,8 @@ static void fbnic_set_rx_mode(struct net_device *netdev, /* No need to update the hardware if we are not running */ if (netif_running(netdev)) __fbnic_set_rx_mode(fbd, uc, mc); + + return 0; } static int fbnic_set_mac(struct net_device *netdev, void *p) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index a750768912b5..27e5f109f933 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -185,10 +185,11 @@ out_drop_cnt: return NETDEV_TX_OK; } -static void nsim_set_rx_mode(struct net_device *dev, - struct netdev_hw_addr_list *uc, - struct netdev_hw_addr_list *mc) +static int nsim_set_rx_mode(struct net_device *dev, + struct netdev_hw_addr_list *uc, + struct netdev_hw_addr_list *mc) { + return 0; } static int nsim_change_mtu(struct net_device *dev, int new_mtu) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index 0ad6a806d7d5..a3931cd82132 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -186,11 +186,12 @@ static int netkit_get_iflink(const struct net_device *dev) return iflink; } -static void netkit_set_multicast(struct net_device *dev, - struct netdev_hw_addr_list *uc, - struct netdev_hw_addr_list *mc) +static int netkit_set_multicast(struct net_device *dev, + struct netdev_hw_addr_list *uc, + struct netdev_hw_addr_list *mc) { /* Nothing to do, we receive whatever gets pushed to us! */ + return 0; } static int netkit_set_macaddr(struct net_device *dev, void *sa) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9b876cd930d7..7f4f0837c09f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1122,13 +1122,14 @@ struct netdev_net_notifier { * Cannot sleep, called with netif_addr_lock_bh held. * Deprecated in favor of ndo_set_rx_mode_async. * - * void (*ndo_set_rx_mode_async)(struct net_device *dev, - * struct netdev_hw_addr_list *uc, - * struct netdev_hw_addr_list *mc); + * int (*ndo_set_rx_mode_async)(struct net_device *dev, + * struct netdev_hw_addr_list *uc, + * struct netdev_hw_addr_list *mc); * Async version of ndo_set_rx_mode which runs in process context * with rtnl_lock and netdev_lock_ops(dev) held. The uc/mc parameters * are snapshots of the address lists - iterate with - * netdev_hw_addr_list_for_each(ha, uc). + * netdev_hw_addr_list_for_each(ha, uc). Return 0 on success or a + * negative errno to request a retry via the core backoff. * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address @@ -1455,7 +1456,7 @@ struct net_device_ops { void (*ndo_change_rx_flags)(struct net_device *dev, int flags); void (*ndo_set_rx_mode)(struct net_device *dev); - void (*ndo_set_rx_mode_async)( + int (*ndo_set_rx_mode_async)( struct net_device *dev, struct netdev_hw_addr_list *uc, struct netdev_hw_addr_list *mc); @@ -1932,6 +1933,8 @@ enum netdev_reg_state { * @rx_mode_node: List entry for rx_mode work processing * @rx_mode_tracker: Refcount tracker for rx_mode work * @rx_mode_addr_cache: Recycled snapshot entries for rx_mode work + * @rx_mode_retry_timer: Timer that re-queues rx_mode work after failure + * @rx_mode_retry_count: Number of consecutive retries already scheduled * @uc: unicast mac addresses * @mc: multicast mac addresses * @dev_addrs: list of device hw addresses @@ -2325,6 +2328,8 @@ struct net_device { struct list_head rx_mode_node; netdevice_tracker rx_mode_tracker; struct netdev_hw_addr_list rx_mode_addr_cache; + struct timer_list rx_mode_retry_timer; + unsigned int rx_mode_retry_count; #ifdef CONFIG_LOCKDEP unsigned char nested_level; #endif @@ -5151,6 +5156,7 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +void netif_rx_mode_schedule_retry(struct net_device *dev); int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); diff --git a/net/core/dev.c b/net/core/dev.c index 1ecd5691992e..202e35acb15b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1775,6 +1775,7 @@ static void __dev_close_many(struct list_head *head) if (ops->ndo_stop) ops->ndo_stop(dev); + netif_rx_mode_cancel_retry(dev); netif_set_up(dev, false); netpoll_poll_enable(dev); } @@ -12094,8 +12095,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #endif mutex_init(&dev->lock); - INIT_LIST_HEAD(&dev->rx_mode_node); - __hw_addr_init(&dev->rx_mode_addr_cache); + netif_rx_mode_init(dev); dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); diff --git a/net/core/dev.h b/net/core/dev.h index 9e9431440869..4121c50e7c88 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -166,8 +166,10 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier); void __dev_set_rx_mode(struct net_device *dev); int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify); +void netif_rx_mode_init(struct net_device *dev); bool netif_rx_mode_clean(struct net_device *dev); void netif_rx_mode_sync(struct net_device *dev); +void netif_rx_mode_cancel_retry(struct net_device *dev); void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, unsigned int gchanges, u32 portid, diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 6b493af8dc8b..e17f64a65e17 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -1252,6 +1252,35 @@ static int netif_uc_promisc_update(struct net_device *dev) return 0; } +/* Total retry budget (4): 1+2+4+8 = 15 seconds */ +#define NETIF_RX_MODE_RETRY_MAX 4 + +void netif_rx_mode_schedule_retry(struct net_device *dev) +{ + unsigned long delay; + + netdev_assert_locked_ops_compat(dev); + + if (dev->rx_mode_retry_count >= NETIF_RX_MODE_RETRY_MAX) { + netdev_err(dev, "rx_mode retry limit reached, giving up\n"); + return; + } + + delay = HZ << dev->rx_mode_retry_count; + if (mod_timer(&dev->rx_mode_retry_timer, jiffies + delay)) + return; + if (!dev->rx_mode_retry_count) + netdev_info(dev, "rx_mode install failed, retrying with backoff\n"); + dev->rx_mode_retry_count++; +} +EXPORT_SYMBOL_GPL(netif_rx_mode_schedule_retry); + +void netif_rx_mode_cancel_retry(struct net_device *dev) +{ + timer_delete_sync(&dev->rx_mode_retry_timer); + dev->rx_mode_retry_count = 0; +} + static void netif_rx_mode_run(struct net_device *dev) { struct netdev_hw_addr_list uc_snap, mc_snap, uc_ref, mc_ref; @@ -1275,8 +1304,8 @@ static void netif_rx_mode_run(struct net_device *dev) err = netif_addr_lists_snapshot(dev, &uc_snap, &mc_snap, &uc_ref, &mc_ref); if (err) { - netdev_WARN(dev, "failed to sync uc/mc addresses\n"); netif_addr_unlock_bh(dev); + netif_rx_mode_schedule_retry(dev); return; } @@ -1292,12 +1321,17 @@ static void netif_rx_mode_run(struct net_device *dev) __dev_set_promiscuity(dev, promisc_inc, false); if (ops->ndo_set_rx_mode_async) { - ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap); + err = ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap); netif_addr_lock_bh(dev); netif_addr_lists_reconcile(dev, &uc_snap, &mc_snap, &uc_ref, &mc_ref); netif_addr_unlock_bh(dev); + + if (err) + netif_rx_mode_schedule_retry(dev); + else + dev->rx_mode_retry_count = 0; } else if (ops->ndo_set_rx_mode) { netif_addr_lock_bh(dev); ops->ndo_set_rx_mode(dev); @@ -1350,6 +1384,21 @@ static void netif_rx_mode_queue(struct net_device *dev) schedule_work(&rx_mode_work); } +static void netif_rx_mode_retry(struct timer_list *t) +{ + struct net_device *dev = + timer_container_of(dev, t, rx_mode_retry_timer); + + netif_rx_mode_queue(dev); +} + +void netif_rx_mode_init(struct net_device *dev) +{ + INIT_LIST_HEAD(&dev->rx_mode_node); + __hw_addr_init(&dev->rx_mode_addr_cache); + timer_setup(&dev->rx_mode_retry_timer, netif_rx_mode_retry, 0); +} + /** * __dev_set_rx_mode() - upload unicast and multicast address lists to device * and configure RX filtering. |
