From ebda2f0bbde540ff7da168d2837f8cfb14581e2e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 14 Jan 2025 19:53:09 -0800 Subject: net: add netdev_lock() / netdev_unlock() helpers Add helpers for locking the netdev instance, use it in drivers and the shaper code. This will make grepping for the lock usage much easier, as we extend the lock to cover more fields. Reviewed-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20250115035319.559603-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3e6336775baf..6d440db35d5f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2444,8 +2444,12 @@ struct net_device { u32 napi_defer_hard_irqs; /** - * @lock: protects @net_shaper_hierarchy, feel free to use for other - * netdev-scope protection. Ordering: take after rtnl_lock. + * @lock: netdev-scope lock, protects a small selection of fields. + * Should always be taken using netdev_lock() / netdev_unlock() helpers. + * Drivers are free to use it for other protection. + * + * Protects: @net_shaper_hierarchy. + * Ordering: take after rtnl_lock. */ struct mutex lock; @@ -2671,6 +2675,21 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, enum netdev_queue_type type, struct napi_struct *napi); +static inline void netdev_lock(struct net_device *dev) +{ + mutex_lock(&dev->lock); +} + +static inline void netdev_unlock(struct net_device *dev) +{ + mutex_unlock(&dev->lock); +} + +static inline void netdev_assert_locked(struct net_device *dev) +{ + lockdep_assert_held(&dev->lock); +} + static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) { napi->irq = irq; -- cgit v1.2.3 From 5fda3f35349b6b7f22f5f5095a3821261d515075 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 14 Jan 2025 19:53:10 -0800 Subject: net: make netdev_lock() protect netdev->reg_state Protect writes to netdev->reg_state with netdev_lock(). From now on holding netdev_lock() is sufficient to prevent the net_device from getting unregistered, so code which wants to hold just a single netdev around no longer needs to hold rtnl_lock. We do not protect the NETREG_UNREGISTERED -> NETREG_RELEASED transition. We'd need to move mutex_destroy(netdev->lock) to .release, but the real reason is that trying to stop the unregistration process mid-way would be unsafe / crazy. Taking references on such devices is not safe, either. So the intended semantics are to lock REGISTERED devices. Reviewed-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115035319.559603-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- net/core/dev.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6d440db35d5f..007bcfa383c9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2448,7 +2448,7 @@ struct net_device { * Should always be taken using netdev_lock() / netdev_unlock() helpers. * Drivers are free to use it for other protection. * - * Protects: @net_shaper_hierarchy. + * Protects: @reg_state, @net_shaper_hierarchy. * Ordering: take after rtnl_lock. */ struct mutex lock; diff --git a/net/core/dev.c b/net/core/dev.c index 47e6b0f73cfc..bbe6fb9e32cd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10695,7 +10695,9 @@ int register_netdevice(struct net_device *dev) ret = netdev_register_kobject(dev); + netdev_lock(dev); WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED); + netdev_unlock(dev); if (ret) goto err_uninit_notify; @@ -10969,7 +10971,9 @@ void netdev_run_todo(void) continue; } + netdev_lock(dev); WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED); + netdev_unlock(dev); linkwatch_sync_dev(dev); } @@ -11575,7 +11579,9 @@ void unregister_netdevice_many_notify(struct list_head *head, list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ unlist_netdevice(dev); + netdev_lock(dev); WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING); + netdev_unlock(dev); } flush_all_backlogs(); -- cgit v1.2.3 From 5112457f3d8e41f987908266068af88ef9f3ab78 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 14 Jan 2025 19:53:12 -0800 Subject: net: add netdev->up protected by netdev_lock() Some uAPI (netdev netlink) hide net_device's sub-objects while the interface is down to ensure uniform behavior across drivers. To remove the rtnl_lock dependency from those uAPIs we need a way to safely tell if the device is down or up. Add an indication of whether device is open or closed, protected by netdev->lock. The semantics are the same as IFF_UP, but taking netdev_lock around every write to ->flags would be a lot of code churn. We don't want to blanket the entire open / close path by netdev_lock, because it will prevent us from applying it to specific structures - core helpers won't be able to take that lock from any function called by the drivers on open/close paths. So the state of the flag is "pessimistic", as in it may report false negatives, but never false positives. Reviewed-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115035319.559603-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 14 +++++++++++++- net/core/dev.c | 4 ++-- net/core/dev.h | 12 ++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 007bcfa383c9..cac81b0a166f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2443,12 +2443,24 @@ struct net_device { unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; + /** + * @up: copy of @state's IFF_UP, but safe to read with just @lock. + * May report false negatives while the device is being opened + * or closed (@lock does not protect .ndo_open, or .ndo_close). + */ + bool up; + /** * @lock: netdev-scope lock, protects a small selection of fields. * Should always be taken using netdev_lock() / netdev_unlock() helpers. * Drivers are free to use it for other protection. * - * Protects: @reg_state, @net_shaper_hierarchy. + * Protects: + * @net_shaper_hierarchy, @reg_state + * + * Partially protects (writers must hold both @lock and rtnl_lock): + * @up + * * Ordering: take after rtnl_lock. */ struct mutex lock; diff --git a/net/core/dev.c b/net/core/dev.c index 968603cfed09..65bf95593da7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1619,7 +1619,7 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) if (ret) clear_bit(__LINK_STATE_START, &dev->state); else { - dev->flags |= IFF_UP; + netif_set_up(dev, true); dev_set_rx_mode(dev); dev_activate(dev); add_device_randomness(dev->dev_addr, dev->addr_len); @@ -1698,7 +1698,7 @@ static void __dev_close_many(struct list_head *head) if (ops->ndo_stop) ops->ndo_stop(dev); - dev->flags &= ~IFF_UP; + netif_set_up(dev, false); netpoll_poll_enable(dev); } } diff --git a/net/core/dev.h b/net/core/dev.h index 25ae732c0775..ef37e2dd44f4 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -128,6 +128,18 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh); +static inline void netif_set_up(struct net_device *dev, bool value) +{ + if (value) + dev->flags |= IFF_UP; + else + dev->flags &= ~IFF_UP; + + netdev_lock(dev); + dev->up = value; + netdev_unlock(dev); +} + static inline void netif_set_gso_max_size(struct net_device *dev, unsigned int size) { -- cgit v1.2.3 From 1b23cdbd2bbc4b40e21c12ae86c2781e347ff0f8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 14 Jan 2025 19:53:13 -0800 Subject: net: protect netdev->napi_list with netdev_lock() Hold netdev->lock when NAPIs are getting added or removed. This will allow safe access to NAPI instances of a net_device without rtnl_lock. Create a family of helpers which assume the lock is already taken. Switch iavf to them, as it makes extensive use of netdev->lock, already. Reviewed-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115035319.559603-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf_main.c | 6 ++-- include/linux/netdevice.h | 54 +++++++++++++++++++++++++---- net/core/dev.c | 15 +++++--- 3 files changed, 60 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index ab908d620285..2db97c5d9f9e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1800,8 +1800,8 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter) q_vector->v_idx = q_idx; q_vector->reg_idx = q_idx; cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask); - netif_napi_add(adapter->netdev, &q_vector->napi, - iavf_napi_poll); + netif_napi_add_locked(adapter->netdev, &q_vector->napi, + iavf_napi_poll); } return 0; @@ -1827,7 +1827,7 @@ static void iavf_free_q_vectors(struct iavf_adapter *adapter) for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx]; - netif_napi_del(&q_vector->napi); + netif_napi_del_locked(&q_vector->napi); } kfree(adapter->q_vectors); adapter->q_vectors = NULL; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cac81b0a166f..3130a8c807dd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2456,7 +2456,7 @@ struct net_device { * Drivers are free to use it for other protection. * * Protects: - * @net_shaper_hierarchy, @reg_state + * @napi_list, @net_shaper_hierarchy, @reg_state * * Partially protects (writers must hold both @lock and rtnl_lock): * @up @@ -2712,8 +2712,19 @@ static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) */ #define NAPI_POLL_WEIGHT 64 -void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight); +void netif_napi_add_weight_locked(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight); + +static inline void +netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) +{ + netdev_lock(dev); + netif_napi_add_weight_locked(dev, napi, poll, weight); + netdev_unlock(dev); +} /** * netif_napi_add() - initialize a NAPI context @@ -2731,6 +2742,13 @@ netif_napi_add(struct net_device *dev, struct napi_struct *napi, netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); } +static inline void +netif_napi_add_locked(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int)) +{ + netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT); +} + static inline void netif_napi_add_tx_weight(struct net_device *dev, struct napi_struct *napi, @@ -2741,6 +2759,15 @@ netif_napi_add_tx_weight(struct net_device *dev, netif_napi_add_weight(dev, napi, poll, weight); } +static inline void +netif_napi_add_config_locked(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int index) +{ + napi->index = index; + napi->config = &dev->napi_config[index]; + netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT); +} + /** * netif_napi_add_config - initialize a NAPI context with persistent config * @dev: network device @@ -2752,9 +2779,9 @@ static inline void netif_napi_add_config(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int index) { - napi->index = index; - napi->config = &dev->napi_config[index]; - netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); + netdev_lock(dev); + netif_napi_add_config_locked(dev, napi, poll, index); + netdev_unlock(dev); } /** @@ -2774,6 +2801,8 @@ static inline void netif_napi_add_tx(struct net_device *dev, netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT); } +void __netif_napi_del_locked(struct napi_struct *napi); + /** * __netif_napi_del - remove a NAPI context * @napi: NAPI context @@ -2782,7 +2811,18 @@ static inline void netif_napi_add_tx(struct net_device *dev, * containing @napi. Drivers might want to call this helper to combine * all the needed RCU grace periods into a single one. */ -void __netif_napi_del(struct napi_struct *napi); +static inline void __netif_napi_del(struct napi_struct *napi) +{ + netdev_lock(napi->dev); + __netif_napi_del_locked(napi); + netdev_unlock(napi->dev); +} + +static inline void netif_napi_del_locked(struct napi_struct *napi) +{ + __netif_napi_del_locked(napi); + synchronize_net(); +} /** * netif_napi_del - remove a NAPI context diff --git a/net/core/dev.c b/net/core/dev.c index 65bf95593da7..235707c0f631 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6910,9 +6910,12 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) list_add_rcu(&napi->dev_list, higher); /* adds after higher */ } -void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight) +void netif_napi_add_weight_locked(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight) { + netdev_assert_locked(dev); if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state))) return; @@ -6953,7 +6956,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, dev->threaded = false; netif_napi_set_irq(napi, -1); } -EXPORT_SYMBOL(netif_napi_add_weight); +EXPORT_SYMBOL(netif_napi_add_weight_locked); void napi_disable(struct napi_struct *n) { @@ -7024,8 +7027,10 @@ static void flush_gro_hash(struct napi_struct *napi) } /* Must be called in process context */ -void __netif_napi_del(struct napi_struct *napi) +void __netif_napi_del_locked(struct napi_struct *napi) { + netdev_assert_locked(napi->dev); + if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) return; @@ -7045,7 +7050,7 @@ void __netif_napi_del(struct napi_struct *napi) napi->thread = NULL; } } -EXPORT_SYMBOL(__netif_napi_del); +EXPORT_SYMBOL(__netif_napi_del_locked); static int __napi_poll(struct napi_struct *n, bool *repoll) { -- cgit v1.2.3 From 413f0271f3966e0c73d4937963f19335af19e628 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 14 Jan 2025 19:53:14 -0800 Subject: net: protect NAPI enablement with netdev_lock() Wrap napi_enable() / napi_disable() with netdev_lock(). Provide the "already locked" flavor of the API. iavf needs the usual adjustment. A number of drivers call napi_enable() under a spin lock, so they have to be modified to take netdev_lock() first, then spin lock then call napi_enable_locked(). Protecting napi_enable() implies that napi->napi_id is protected by netdev_lock(). Acked-by: Francois Romieu # via-velocity Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115035319.559603-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pcnet32.c | 11 ++++++-- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 +-- drivers/net/ethernet/marvell/mvneta.c | 5 +++- drivers/net/ethernet/via/via-velocity.c | 6 +++-- include/linux/netdevice.h | 11 +++----- net/core/dev.c | 41 ++++++++++++++++++++++++----- 6 files changed, 56 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 72db9f9e7bee..c6bd803f5b0c 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -462,7 +462,7 @@ static void pcnet32_netif_start(struct net_device *dev) val = lp->a->read_csr(ioaddr, CSR3); val &= 0x00ff; lp->a->write_csr(ioaddr, CSR3, val); - napi_enable(&lp->napi); + napi_enable_locked(&lp->napi); } /* @@ -889,6 +889,7 @@ static int pcnet32_set_ringparam(struct net_device *dev, if (netif_running(dev)) pcnet32_netif_stop(dev); + netdev_lock(dev); spin_lock_irqsave(&lp->lock, flags); lp->a->write_csr(ioaddr, CSR0, CSR0_STOP); /* stop the chip */ @@ -920,6 +921,7 @@ static int pcnet32_set_ringparam(struct net_device *dev, } spin_unlock_irqrestore(&lp->lock, flags); + netdev_unlock(dev); netif_info(lp, drv, dev, "Ring Param Settings: RX: %d, TX: %d\n", lp->rx_ring_size, lp->tx_ring_size); @@ -985,6 +987,7 @@ static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1) if (netif_running(dev)) pcnet32_netif_stop(dev); + netdev_lock(dev); spin_lock_irqsave(&lp->lock, flags); lp->a->write_csr(ioaddr, CSR0, CSR0_STOP); /* stop the chip */ @@ -1122,6 +1125,7 @@ clean_up: lp->a->write_bcr(ioaddr, 20, 4); /* return to 16bit mode */ } spin_unlock_irqrestore(&lp->lock, flags); + netdev_unlock(dev); return rc; } /* end pcnet32_loopback_test */ @@ -2101,6 +2105,7 @@ static int pcnet32_open(struct net_device *dev) return -EAGAIN; } + netdev_lock(dev); spin_lock_irqsave(&lp->lock, flags); /* Check for a valid station address */ if (!is_valid_ether_addr(dev->dev_addr)) { @@ -2266,7 +2271,7 @@ static int pcnet32_open(struct net_device *dev) goto err_free_ring; } - napi_enable(&lp->napi); + napi_enable_locked(&lp->napi); /* Re-initialize the PCNET32, and start it when done. */ lp->a->write_csr(ioaddr, 1, (lp->init_dma_addr & 0xffff)); @@ -2300,6 +2305,7 @@ static int pcnet32_open(struct net_device *dev) lp->a->read_csr(ioaddr, CSR0)); spin_unlock_irqrestore(&lp->lock, flags); + netdev_unlock(dev); return 0; /* Always succeed */ @@ -2315,6 +2321,7 @@ err_free_ring: err_free_irq: spin_unlock_irqrestore(&lp->lock, flags); + netdev_unlock(dev); free_irq(dev->irq, dev); return rc; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2db97c5d9f9e..cbfaaa5b7d02 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1180,7 +1180,7 @@ static void iavf_napi_enable_all(struct iavf_adapter *adapter) q_vector = &adapter->q_vectors[q_idx]; napi = &q_vector->napi; - napi_enable(napi); + napi_enable_locked(napi); } } @@ -1196,7 +1196,7 @@ static void iavf_napi_disable_all(struct iavf_adapter *adapter) for (q_idx = 0; q_idx < q_vectors; q_idx++) { q_vector = &adapter->q_vectors[q_idx]; - napi_disable(&q_vector->napi); + napi_disable_locked(&q_vector->napi); } } diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 9e79a60baebc..aa049cee576d 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4392,6 +4392,7 @@ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node) if (pp->neta_armada3700) return 0; + netdev_lock(port->napi.dev); spin_lock(&pp->lock); /* * Configuring the driver for a new CPU while the driver is @@ -4418,7 +4419,7 @@ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node) /* Mask all ethernet port interrupts */ on_each_cpu(mvneta_percpu_mask_interrupt, pp, true); - napi_enable(&port->napi); + napi_enable_locked(&port->napi); /* * Enable per-CPU interrupts on the CPU that is @@ -4439,6 +4440,8 @@ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node) MVNETA_CAUSE_LINK_CHANGE); netif_tx_start_all_queues(pp->dev); spin_unlock(&pp->lock); + netdev_unlock(port->napi.dev); + return 0; } diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index dd4a07c97eee..5aa93144a4f5 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2320,7 +2320,8 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu) if (ret < 0) goto out_free_tmp_vptr_1; - napi_disable(&vptr->napi); + netdev_lock(dev); + napi_disable_locked(&vptr->napi); spin_lock_irqsave(&vptr->lock, flags); @@ -2342,12 +2343,13 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu) velocity_give_many_rx_descs(vptr); - napi_enable(&vptr->napi); + napi_enable_locked(&vptr->napi); mac_enable_int(vptr->mac_regs); netif_start_queue(dev); spin_unlock_irqrestore(&vptr->lock, flags); + netdev_unlock(dev); velocity_free_rings(tmp_vptr); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3130a8c807dd..3941e4d0073e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -382,7 +382,7 @@ struct napi_struct { struct sk_buff *skb; struct list_head rx_list; /* Pending GRO_NORMAL skbs */ int rx_count; /* length of rx_list */ - unsigned int napi_id; + unsigned int napi_id; /* protected by netdev_lock */ struct hrtimer timer; struct task_struct *thread; unsigned long gro_flush_timeout; @@ -570,16 +570,11 @@ static inline bool napi_complete(struct napi_struct *n) int dev_set_threaded(struct net_device *dev, bool threaded); -/** - * napi_disable - prevent NAPI from scheduling - * @n: NAPI context - * - * Stop NAPI from being scheduled on this context. - * Waits till any outstanding processing completes. - */ void napi_disable(struct napi_struct *n); +void napi_disable_locked(struct napi_struct *n); void napi_enable(struct napi_struct *n); +void napi_enable_locked(struct napi_struct *n); /** * napi_synchronize - wait until NAPI is not running diff --git a/net/core/dev.c b/net/core/dev.c index 235707c0f631..cfd88bc6ce5f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6958,11 +6958,13 @@ void netif_napi_add_weight_locked(struct net_device *dev, } EXPORT_SYMBOL(netif_napi_add_weight_locked); -void napi_disable(struct napi_struct *n) +void napi_disable_locked(struct napi_struct *n) { unsigned long val, new; might_sleep(); + netdev_assert_locked(n->dev); + set_bit(NAPI_STATE_DISABLE, &n->state); val = READ_ONCE(n->state); @@ -6985,16 +6987,25 @@ void napi_disable(struct napi_struct *n) clear_bit(NAPI_STATE_DISABLE, &n->state); } -EXPORT_SYMBOL(napi_disable); +EXPORT_SYMBOL(napi_disable_locked); /** - * napi_enable - enable NAPI scheduling - * @n: NAPI context + * napi_disable() - prevent NAPI from scheduling + * @n: NAPI context * - * Resume NAPI from being scheduled on this context. - * Must be paired with napi_disable. + * Stop NAPI from being scheduled on this context. + * Waits till any outstanding processing completes. + * Takes netdev_lock() for associated net_device. */ -void napi_enable(struct napi_struct *n) +void napi_disable(struct napi_struct *n) +{ + netdev_lock(n->dev); + napi_disable_locked(n); + netdev_unlock(n->dev); +} +EXPORT_SYMBOL(napi_disable); + +void napi_enable_locked(struct napi_struct *n) { unsigned long new, val = READ_ONCE(n->state); @@ -7011,6 +7022,22 @@ void napi_enable(struct napi_struct *n) new |= NAPIF_STATE_THREADED; } while (!try_cmpxchg(&n->state, &val, new)); } +EXPORT_SYMBOL(napi_enable_locked); + +/** + * napi_enable() - enable NAPI scheduling + * @n: NAPI context + * + * Enable scheduling of a NAPI instance. + * Must be paired with napi_disable(). + * Takes netdev_lock() for associated net_device. + */ +void napi_enable(struct napi_struct *n) +{ + netdev_lock(n->dev); + napi_enable_locked(n); + netdev_unlock(n->dev); +} EXPORT_SYMBOL(napi_enable); static void flush_gro_hash(struct napi_struct *napi) -- cgit v1.2.3 From 1bb86cf8f44b1c1a320566558250b1f5121f6fd3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 14 Jan 2025 19:53:16 -0800 Subject: net: protect threaded status of NAPI with netdev_lock() Now that NAPI instances can't come and go without holding netdev->lock we can trivially switch from rtnl_lock() to netdev_lock() for setting netdev->threaded via sysfs. Note that since we do not lock netdev_lock around sysfs calls in the core we don't have to "trylock" like we do with rtnl_lock. Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115035319.559603-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 13 +++++++++++-- net/core/dev.c | 2 ++ net/core/net-sysfs.c | 34 ++++++++++++++++++++++++++++++++-- 3 files changed, 45 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3941e4d0073e..20e773bbd181 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -384,7 +384,7 @@ struct napi_struct { int rx_count; /* length of rx_list */ unsigned int napi_id; /* protected by netdev_lock */ struct hrtimer timer; - struct task_struct *thread; + struct task_struct *thread; /* protected by netdev_lock */ unsigned long gro_flush_timeout; unsigned long irq_suspend_timeout; u32 defer_hard_irqs; @@ -2451,11 +2451,13 @@ struct net_device { * Drivers are free to use it for other protection. * * Protects: - * @napi_list, @net_shaper_hierarchy, @reg_state + * @napi_list, @net_shaper_hierarchy, @reg_state, @threaded * * Partially protects (writers must hold both @lock and rtnl_lock): * @up * + * Also protects some fields in struct napi_struct. + * * Ordering: take after rtnl_lock. */ struct mutex lock; @@ -2697,6 +2699,13 @@ static inline void netdev_assert_locked(struct net_device *dev) lockdep_assert_held(&dev->lock); } +static inline void netdev_assert_locked_or_invisible(struct net_device *dev) +{ + if (dev->reg_state == NETREG_REGISTERED || + dev->reg_state == NETREG_UNREGISTERING) + netdev_assert_locked(dev); +} + static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) { napi->irq = irq; diff --git a/net/core/dev.c b/net/core/dev.c index 2ef50a3ee4a1..34db90f345d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6785,6 +6785,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded) struct napi_struct *napi; int err = 0; + netdev_assert_locked_or_invisible(dev); + if (dev->threaded == threaded) return 0; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2d9afc6e2161..9365a7185a1d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -36,7 +36,7 @@ static const char fmt_uint[] = "%u\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; -/* Caller holds RTNL or RCU */ +/* Caller holds RTNL, netdev->lock or RCU */ static inline int dev_isalive(const struct net_device *dev) { return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED; @@ -108,6 +108,36 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, return ret; } +/* Same as netdev_store() but takes netdev_lock() instead of rtnl_lock() */ +static ssize_t +netdev_lock_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len, + int (*set)(struct net_device *, unsigned long)) +{ + struct net_device *netdev = to_net_dev(dev); + struct net *net = dev_net(netdev); + unsigned long new; + int ret; + + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + ret = kstrtoul(buf, 0, &new); + if (ret) + return ret; + + netdev_lock(netdev); + + if (dev_isalive(netdev)) { + ret = (*set)(netdev, new); + if (ret == 0) + ret = len; + } + netdev_unlock(netdev); + + return ret; +} + NETDEVICE_SHOW_RO(dev_id, fmt_hex); NETDEVICE_SHOW_RO(dev_port, fmt_dec); NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); @@ -638,7 +668,7 @@ static ssize_t threaded_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - return netdev_store(dev, attr, buf, len, modify_napi_threaded); + return netdev_lock_store(dev, attr, buf, len, modify_napi_threaded); } static DEVICE_ATTR_RW(threaded); -- cgit v1.2.3 From 53ed30800d3fd36e1e9f7ba8014b150632f714b1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 14 Jan 2025 19:53:17 -0800 Subject: net: protect napi->irq with netdev_lock() Take netdev_lock() in netif_napi_set_irq(). All NAPI "control fields" are now protected by that lock (most of the other ones are set during napi add/del). The napi_hash_node is fully protected by the hash spin lock, but close enough for the kdoc... Reviewed-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115035319.559603-10-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 10 +++++++++- net/core/dev.c | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 20e773bbd181..a47ff20365f9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -388,6 +388,7 @@ struct napi_struct { unsigned long gro_flush_timeout; unsigned long irq_suspend_timeout; u32 defer_hard_irqs; + /* all fields past this point are write-protected by netdev_lock */ /* control-path-only fields follow */ struct list_head dev_list; struct hlist_node napi_hash_node; @@ -2706,11 +2707,18 @@ static inline void netdev_assert_locked_or_invisible(struct net_device *dev) netdev_assert_locked(dev); } -static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) +static inline void netif_napi_set_irq_locked(struct napi_struct *napi, int irq) { napi->irq = irq; } +static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) +{ + netdev_lock(napi->dev); + netif_napi_set_irq_locked(napi, irq); + netdev_unlock(napi->dev); +} + /* Default NAPI poll() weight * Device drivers are strongly advised to not use bigger value */ diff --git a/net/core/dev.c b/net/core/dev.c index 34db90f345d5..b6722ed9767a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6957,7 +6957,7 @@ void netif_napi_add_weight_locked(struct net_device *dev, */ if (dev->threaded && napi_kthread_create(napi)) dev->threaded = false; - netif_napi_set_irq(napi, -1); + netif_napi_set_irq_locked(napi, -1); } EXPORT_SYMBOL(netif_napi_add_weight_locked); -- cgit v1.2.3 From e7ed2ba757bf86a4f90ae9c4080235fc9c74d8a2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 14 Jan 2025 19:53:18 -0800 Subject: net: protect NAPI config fields with netdev_lock() Protect the following members of netdev and napi by netdev_lock: - defer_hard_irqs, - gro_flush_timeout, - irq_suspend_timeout. The first two are written via sysfs (which this patch switches to new lock), and netdev genl which holds both netdev and rtnl locks. irq_suspend_timeout is only written by netdev genl. Reviewed-by: Joe Damato Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250115035319.559603-11-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 7 ++++--- net/core/net-sysfs.c | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a47ff20365f9..8308d9c75918 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -384,11 +384,11 @@ struct napi_struct { int rx_count; /* length of rx_list */ unsigned int napi_id; /* protected by netdev_lock */ struct hrtimer timer; - struct task_struct *thread; /* protected by netdev_lock */ + /* all fields past this point are write-protected by netdev_lock */ + struct task_struct *thread; unsigned long gro_flush_timeout; unsigned long irq_suspend_timeout; u32 defer_hard_irqs; - /* all fields past this point are write-protected by netdev_lock */ /* control-path-only fields follow */ struct list_head dev_list; struct hlist_node napi_hash_node; @@ -2452,7 +2452,8 @@ struct net_device { * Drivers are free to use it for other protection. * * Protects: - * @napi_list, @net_shaper_hierarchy, @reg_state, @threaded + * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, + * @net_shaper_hierarchy, @reg_state, @threaded * * Partially protects (writers must hold both @lock and rtnl_lock): * @up diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 9365a7185a1d..07cb99b114bd 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -450,7 +450,7 @@ static ssize_t gro_flush_timeout_store(struct device *dev, if (!capable(CAP_NET_ADMIN)) return -EPERM; - return netdev_store(dev, attr, buf, len, change_gro_flush_timeout); + return netdev_lock_store(dev, attr, buf, len, change_gro_flush_timeout); } NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong); @@ -470,7 +470,8 @@ static ssize_t napi_defer_hard_irqs_store(struct device *dev, if (!capable(CAP_NET_ADMIN)) return -EPERM; - return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs); + return netdev_lock_store(dev, attr, buf, len, + change_napi_defer_hard_irqs); } NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint); -- cgit v1.2.3