summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-06-09 20:13:08 +0300
committerJakub Kicinski <kuba@kernel.org>2026-06-09 20:13:08 +0300
commit3abbe30231441f1fbb3305e9854c56a34650af53 (patch)
tree28e37f85137ae408657f4edf4fc0cd81cfef2692 /include
parent9415471e01c1aaac43daa6af3a261dc0c6c3a47c (diff)
parentb5125db8d0205048a3b9dafec7f8e2b0a702101f (diff)
downloadlinux-3abbe30231441f1fbb3305e9854c56a34650af53.tar.xz
Merge branch 'net-ethtool-let-ops-locked-drivers-run-without-rtnl_lock'
Jakub Kicinski says: ==================== net: ethtool: let ops locked drivers run without rtnl_lock With the ethtool_get_link_ksettings() situation hopefully ironed out the previous series (commit 6a5d837f0ce2) let's return to the main part of the series. We have been slowly moving towards removing the rtnl_lock dependency in driver ops since the concept of "ops-locked" drivers have been introduced last year. Since last year will take the netdev instance lock before invoking any ndo or ethtool op of "ops-locked" drivers. We dipped our toes into rtnl_lock-less ops with the queue binding API. Queue stats, NAPI, and other netdev-netlink objects are also queried without holding rtnl_lock already. It's time to take the next logical step and lift the requirement from ethtool ops. The direct motivation for this patchset is that ethtool ops often involve communicating with device FW, and may take a long time to complete. Aggressive polling of device state on machines with 10+ NICs have been shown to significantly increase rtnl_lock pressure. There's a handful of areas which still need rtnl_lock (see below). I decided to convert everything to rtnl_lock-less by default, and add a set of flags which let the drivers request rtnl_lock to still be taken. I don't love this, but I'm worried that opt-in would be even more confusing. Known issues / exclusions: - qdiscs - qdisc configuration currently assumes rtnl_lock, this is mostly impacting set_channels callback. qdisc config is probably the easiest one of the exclusions to tackle, it's fairly self-contained. - features - even tho feature changes are (correctly) plumbed to the driver thru ndos they are part of ethtool uAPI. ethtool itself calls netdev_features_change() if it has spotted device feature change before vs after to the callback. Some drivers also call netdev_features_change() directly in response to various changes, e.g. setting priv flags. Since features have to propagate to upper and lower devices anything that touches features is quite hard to move from under rtnl_lock. - phylink - phylink and SFP depend on rtnl_lock today, I suspect that this is purely for historic reasons. I started poking at it and don't really see a need for a global lock. But accessing the netdev instance lock from the SFP entry points will require some attention from the phylink folks. - phydev - similar to phylink, looks quite doable. But no ops-locked driver currently has a phydev (fbnic only uses phylink) so phydev related paths retain a ASSERT_RTNL() for now. Tested on mlx5, bnxt and fbnic. ==================== Link: https://patch.msgid.link/20260605002912.3456868-1-kuba@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/ethtool.h36
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/phy_link_topology.h5
-rw-r--r--include/net/netdev_lock.h11
4 files changed, 53 insertions, 2 deletions
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index f51346a6a686..1b834e2a522e 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -930,6 +930,19 @@ struct kernel_ethtool_ts_info {
u32 rx_filters;
};
+/* Bits for ethtool_ops::op_needs_rtnl
+ * LINKSETTINGS cover a number of commands, but in most cases we want to keep
+ * these bits separate, per GET and SET. GET is much easier to "unlock".
+ */
+#define ETHTOOL_OP_NEEDS_RTNL_LINKSETTINGS BIT(0)
+#define ETHTOOL_OP_NEEDS_RTNL_SPFLAGS BIT(1)
+#define ETHTOOL_OP_NEEDS_RTNL_SRINGPARAM BIT(2)
+#define ETHTOOL_OP_NEEDS_RTNL_SCHANNELS BIT(3)
+#define ETHTOOL_OP_NEEDS_RTNL_SCOALESCE BIT(4)
+#define ETHTOOL_OP_NEEDS_RTNL_GPAUSEPARAM BIT(5)
+#define ETHTOOL_OP_NEEDS_RTNL_SPAUSEPARAM BIT(6)
+#define ETHTOOL_OP_NEEDS_RTNL_RSS BIT(7)
+
/**
* struct ethtool_ops - optional netdev operations
* @supported_input_xfrm: supported types of input xfrm from %RXH_XFRM_*.
@@ -956,6 +969,16 @@ struct kernel_ethtool_ts_info {
* @supported_coalesce_params: supported types of interrupt coalescing.
* @supported_ring_params: supported ring params.
* @supported_hwtstamp_qualifiers: bitfield of supported hwtstamp qualifier.
+ * @op_needs_rtnl: mask of %ETHTOOL_OP_NEEDS_RTNL_* bits.
+ * For use with ops-locked drivers (ignored otherwise). Selects which
+ * ethtool callbacks driver needs to still be executed under rtnl_lock
+ * (in addition to the netdev instance lock).
+ * The following commonly used core APIs currently require rtnl_lock
+ * (this list may not be exhaustive):
+ * - phylink helpers (note that phydev is currently unsupported!)
+ * - netdev_update_features()
+ * - netif_set_real_num_tx_queues()
+ *
* @get_drvinfo: Report driver/device information. Modern drivers no
* longer have to implement this callback. Most fields are
* correctly filled in by the core using system information, or
@@ -1154,8 +1177,14 @@ struct kernel_ethtool_ts_info {
* @get_mm_stats: Query the 802.3 MAC Merge layer statistics.
*
* All operations are optional (i.e. the function pointer may be set
- * to %NULL) and callers must take this into account. Callers must
- * hold the RTNL lock.
+ * to %NULL) and callers must take this into account.
+ *
+ * For traditional drivers callers hold ``rtnl_lock`` across the call.
+ * For "ops locked" drivers (see Documentation/networking/netdevices.rst)
+ * callers instead hold the netdev instance lock (``netdev_lock_ops``);
+ * ``rtnl_lock`` is additionally held only for callbacks for which
+ * the driver opts in via the matching ``ETHTOOL_OP_NEEDS_RTNL_*`` bit
+ * in @op_needs_rtnl.
*
* See the structures used by these operations for further documentation.
* Note that for all operations using a structure ending with a zero-
@@ -1178,6 +1207,7 @@ struct ethtool_ops {
u32 supported_coalesce_params;
u32 supported_ring_params;
u32 supported_hwtstamp_qualifiers;
+ u32 op_needs_rtnl;
void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
int (*get_regs_len)(struct net_device *);
void (*get_regs)(struct net_device *, struct ethtool_regs *, void *);
@@ -1351,6 +1381,7 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
* within RTNL.
* @rss_indir_user_size: Number of user provided entries for the default
* (context 0) indirection table.
+ * @phys_id_busy: Loop blinking the device LED is running.
* @wol_enabled: Wake-on-LAN is enabled
* @module_fw_flash_in_progress: Module firmware flashing is in progress.
*/
@@ -1358,6 +1389,7 @@ struct ethtool_netdev_state {
struct xarray rss_ctx;
struct mutex rss_lock;
u32 rss_indir_user_size;
+ unsigned phys_id_busy:1;
unsigned wol_enabled:1;
unsigned module_fw_flash_in_progress:1;
};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 74507c006490..9b876cd930d7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2583,6 +2583,9 @@ struct net_device {
* Double protects:
* @up, @moving_ns, @nd_net, @xdp_features
*
+ * Ops protects:
+ * @cfg, @cfg_pending, @ethtool, @hwprov
+ *
* Double ops protects:
* @real_num_rx_queues, @real_num_tx_queues
*
diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h
index 68a59e25821c..95575f68d5bc 100644
--- a/include/linux/phy_link_topology.h
+++ b/include/linux/phy_link_topology.h
@@ -36,6 +36,11 @@ struct phy_device_node {
struct phy_device *phy;
};
+static inline bool phy_link_topo_empty(struct net_device *dev)
+{
+ return !dev->link_topo;
+}
+
#if IS_ENABLED(CONFIG_PHYLIB)
int phy_link_topo_add_phy(struct net_device *dev,
struct phy_device *phy,
diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h
index d3daec4e93f3..9fb3e93857c3 100644
--- a/include/net/netdev_lock.h
+++ b/include/net/netdev_lock.h
@@ -102,6 +102,14 @@ static inline void netdev_unlock_ops_compat(struct net_device *dev)
rtnl_unlock();
}
+/* Matching "ops protected" category from netdevice.h */
+static inline int netdev_is_locked_ops_compat(const struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ return lockdep_is_held(&dev->lock);
+ return lockdep_rtnl_is_held();
+}
+
static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,
const struct lockdep_map *b)
{
@@ -138,6 +146,9 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,
#define netdev_lock_dereference(p, dev) \
rcu_dereference_protected(p, lockdep_is_held(&(dev)->lock))
+#define netdev_ops_lock_dereference(p, dev) \
+ rcu_dereference_protected(p, netdev_is_locked_ops_compat(dev))
+
int netdev_debug_event(struct notifier_block *nb, unsigned long event,
void *ptr);