From bae2da826196ff4ab439b57683dce883e274faef Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:28 -0700 Subject: net: remove netif_set_real_num_rx_queues() helper for when SYSFS=n Since commit a953be53ce40 ("net-sysfs: add support for device-specific rx queue sysfs attributes"), so for at least a decade now it is safe to call net_rx_queue_update_kobjects() when SYSFS=n. That function does its own ifdef-inery and will return 0. Remove the unnecessary stub for netif_set_real_num_rx_queues(). Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f22cca7c03ad..55859c565f84 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4062,17 +4062,7 @@ static inline bool netif_is_multiqueue(const struct net_device *dev) } int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); - -#ifdef CONFIG_SYSFS int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); -#else -static inline int netif_set_real_num_rx_queues(struct net_device *dev, - unsigned int rxqs) -{ - dev->real_num_rx_queues = rxqs; - return 0; -} -#endif int netif_set_real_num_queues(struct net_device *dev, unsigned int txq, unsigned int rxq); -- cgit v1.2.3 From 4b702f8b72c7b05daa1b763fdc0840aa78178c3a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:30 -0700 Subject: net: explain "protection types" for the instance lock Try to define some terminology for which fields are protected by which lock and how. Some fields are protected by both rtnl_lock and instance lock which is hard to talk about without having a "key phrase" to refer to a particular protection scheme. "ops protected" fields are defined later in the series, one by one. Add ASSERT_RTNL() to netdev_ops_assert_locked() for drivers not other instance protection of ops. Hopefully it's not too confusion that netdev_lock_ops() does not match the lock which netdev_ops_assert_locked() will assert, exactly. The noun "ops" is in a different place in the name, so I think it's acceptable... Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 28 ++++++++++++++++++++++------ include/net/netdev_lock.h | 3 +++ 2 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 55859c565f84..2b91fb96a411 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2496,19 +2496,35 @@ struct net_device { * Should always be taken using netdev_lock() / netdev_unlock() helpers. * Drivers are free to use it for other protection. * - * Protects: + * For the drivers that implement shaper or queue API, the scope + * of this lock is expanded to cover most ndo/queue/ethtool/sysfs + * operations. Drivers may opt-in to this behavior by setting + * @request_ops_lock. + * + * @lock protection mixes with rtnl_lock in multiple ways, fields are + * either: + * + * - simply protected by the instance @lock; + * + * - double protected - writers hold both locks, readers hold either; + * + * - ops protected - protected by the lock held around the NDOs + * and other callbacks, that is the instance lock on devices for + * which netdev_need_ops_lock() returns true, otherwise by rtnl_lock; + * + * - double ops protected - always protected by rtnl_lock but for + * devices for which netdev_need_ops_lock() returns true - also + * the instance lock. + * + * Simply protects: * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, * @net_shaper_hierarchy, @reg_state, @threaded * - * Partially protects (writers must hold both @lock and rtnl_lock): + * Double protects: * @up * * Also protects some fields in struct napi_struct. * - * For the drivers that implement shaper or queue API, the scope - * of this lock is expanded to cover most ndo/queue/ethtool/sysfs - * operations. - * * Ordering: take after rtnl_lock. */ struct mutex lock; diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index 689ffdfae50d..efd302375ef2 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -5,6 +5,7 @@ #include #include +#include static inline bool netdev_trylock(struct net_device *dev) { @@ -51,6 +52,8 @@ static inline void netdev_ops_assert_locked(const struct net_device *dev) { if (netdev_need_ops_lock(dev)) lockdep_assert_held(&dev->lock); + else + ASSERT_RTNL(); } static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, -- cgit v1.2.3 From 0a65dcf6249b75c841b4218426b0d246a805c7e0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:31 -0700 Subject: net: designate queue counts as "double ops protected" by instance lock Drivers which opt into instance lock protection of ops should only call set_real_num_*_queues() under the instance lock. This means that queue counts are double protected (writes are under both rtnl_lock and instance lock, readers under either). Some readers may still be under the rtnl_lock, however, so for now we need double protection of writers. OTOH queue API paths are only under the protection of the instance lock, so we need to validate that the instance is actually locking ops, otherwise the input checks we do against queue count are racy. Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 +++ net/core/dev.c | 2 ++ net/core/net-sysfs.c | 2 ++ net/core/netdev-genl.c | 7 +++++++ net/core/netdev_rx_queue.c | 3 +++ 5 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2b91fb96a411..60ef367d8575 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2523,6 +2523,9 @@ struct net_device { * Double protects: * @up * + * Double ops protects: + * @real_num_rx_queues, @real_num_tx_queues + * * Also protects some fields in struct napi_struct. * * Ordering: take after rtnl_lock. diff --git a/net/core/dev.c b/net/core/dev.c index 6295f00e97a7..2d9be3ecd5e6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3130,6 +3130,7 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->reg_state == NETREG_REGISTERED || dev->reg_state == NETREG_UNREGISTERING) { ASSERT_RTNL(); + netdev_ops_assert_locked(dev); rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, txq); @@ -3179,6 +3180,7 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq) if (dev->reg_state == NETREG_REGISTERED) { ASSERT_RTNL(); + netdev_ops_assert_locked(dev); rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues, rxq); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b6fbe629ccee..1ace0cd01adc 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -2148,8 +2148,10 @@ static void remove_queue_kobjects(struct net_device *dev) net_rx_queue_update_kobjects(dev, real_rx, 0); netdev_queue_update_kobjects(dev, real_tx, 0); + netdev_lock_ops(dev); dev->real_num_rx_queues = 0; dev->real_num_tx_queues = 0; + netdev_unlock_ops(dev); #ifdef CONFIG_SYSFS kset_unregister(dev->queues_kset); #endif diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 9e4882a22407..fd1cfa9707dc 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -867,6 +867,13 @@ int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock_sock; } + if (!netdev_need_ops_lock(netdev)) { + err = -EOPNOTSUPP; + NL_SET_BAD_ATTR(info->extack, + info->attrs[NETDEV_A_DEV_IFINDEX]); + goto err_unlock; + } + if (dev_xdp_prog_count(netdev)) { NL_SET_ERR_MSG(info->extack, "unable to bind dmabuf to device with XDP program attached"); err = -EEXIST; diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index a5b234b33cd5..3af716f77a13 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -92,6 +92,9 @@ static int __net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, struct netdev_rx_queue *rxq; int ret; + if (!netdev_need_ops_lock(dev)) + return -EOPNOTSUPP; + if (ifq_idx >= dev->real_num_rx_queues) return -EINVAL; ifq_idx = array_index_nospec(ifq_idx, dev->real_num_rx_queues); -- cgit v1.2.3 From 310ae9eb2617c62deedef8f121d7ca1ae774fa76 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 24 Mar 2025 15:45:32 -0700 Subject: net: designate queue -> napi linking as "ops protected" netdev netlink is the only reader of netdev_{,rx_}queue->napi, and it already holds netdev->lock. Switch protection of the writes to netdev->lock to "ops protected". The expectation will be now that accessing queue->napi will require netdev->lock for "ops locked" drivers, and rtnl_lock for all other drivers. Current "ops locked" drivers don't require any changes. gve and netdevsim use _locked() helpers right next to netif_queue_set_napi() so they must be holding the instance lock. iavf doesn't call it. bnxt is a bit messy but all paths seem locked. Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250324224537.248800-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 5 +++-- include/net/netdev_lock.h | 8 ++++++++ include/net/netdev_rx_queue.h | 2 +- net/core/dev.c | 3 +-- 4 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 60ef367d8575..fa79145518d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -710,7 +710,7 @@ struct netdev_queue { * slow- / control-path part */ /* NAPI instance for the queue - * Readers and writers must hold RTNL + * "ops protected", see comment about net_device::lock */ struct napi_struct *napi; @@ -2526,7 +2526,8 @@ struct net_device { * Double ops protects: * @real_num_rx_queues, @real_num_tx_queues * - * Also protects some fields in struct napi_struct. + * Also protects some fields in: + * struct napi_struct, struct netdev_queue, struct netdev_rx_queue * * Ordering: take after rtnl_lock. */ diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index efd302375ef2..1c0c9a94cc22 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -56,6 +56,14 @@ static inline void netdev_ops_assert_locked(const struct net_device *dev) ASSERT_RTNL(); } +static inline void +netdev_ops_assert_locked_or_invisible(const struct net_device *dev) +{ + if (dev->reg_state == NETREG_REGISTERED || + dev->reg_state == NETREG_UNREGISTERING) + netdev_ops_assert_locked(dev); +} + static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) { diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index af40842f229d..b2238b551dce 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -24,7 +24,7 @@ struct netdev_rx_queue { struct xsk_buff_pool *pool; #endif /* NAPI instance for the queue - * Readers and writers must hold RTNL + * "ops protected", see comment about net_device::lock */ struct napi_struct *napi; struct pp_memory_provider_params mp_params; diff --git a/net/core/dev.c b/net/core/dev.c index 2d9be3ecd5e6..ab74e1f005d2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6901,8 +6901,7 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, if (WARN_ON_ONCE(napi && !napi->dev)) return; - if (dev->reg_state >= NETREG_REGISTERED) - ASSERT_RTNL(); + netdev_ops_assert_locked_or_invisible(dev); switch (type) { case NETDEV_QUEUE_TYPE_RX: -- cgit v1.2.3