From ec763c234d7f60c5bce0fa2611ba79f5be1af76b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 4 Oct 2024 15:10:28 -0700 Subject: Revert "rtnetlink: add guard for RTNL" This reverts commit 464eb03c4a7cfb32cb3324249193cf6bb5b35152. Once we have a per-netns RTNL, we won't use guard(rtnl). Also, there's no users for now. $ grep -rnI "guard(rtnl" || true $ Suggested-by: Eric Dumazet Link: https://lore.kernel.org/netdev/CANn89i+KoYzUH+VPLdGmLABYf5y4TW0hrM4UAeQQJ9AREty0iw@mail.gmail.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index a7da7dfc06a2..cdfc897f1e3c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -7,7 +7,6 @@ #include #include #include -#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -47,8 +46,6 @@ extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); -DEFINE_LOCK_GUARD_0(rtnl, rtnl_lock(), rtnl_unlock()) - extern wait_queue_head_t netdev_unregistering_wq; extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; -- cgit v1.2.3 From 76aed95319da25d6884dff01d5f0149e4b542f96 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 4 Oct 2024 15:10:29 -0700 Subject: rtnetlink: Add per-netns RTNL. The goal is to break RTNL down into per-netns mutex. This patch adds per-netns mutex and its helper functions, rtnl_net_lock() and rtnl_net_unlock(). rtnl_net_lock() acquires the global RTNL and per-netns RTNL mutex, and rtnl_net_unlock() releases them. We will replace 800+ rtnl_lock() with rtnl_net_lock() and finally removes rtnl_lock() in rtnl_net_lock(). When we need to nest per-netns RTNL mutex, we will use __rtnl_net_lock(), and its locking order is defined by rtnl_net_lock_cmp_fn() as follows: 1. init_net is first 2. netns address ascending order Note that the conversion will be done under CONFIG_DEBUG_NET_SMALL_RTNL with LOCKDEP so that we can carefully add the extra mutex without slowing down RTNL operations during conversion. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 21 ++++++++++++++++ include/net/net_namespace.h | 4 ++++ net/Kconfig.debug | 15 ++++++++++++ net/core/net_namespace.c | 6 +++++ net/core/rtnetlink.c | 58 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 104 insertions(+) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index cdfc897f1e3c..edd840a49989 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -92,6 +92,27 @@ static inline bool lockdep_rtnl_is_held(void) #define rcu_replace_pointer_rtnl(rp, p) \ rcu_replace_pointer(rp, p, lockdep_rtnl_is_held()) +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL +void __rtnl_net_lock(struct net *net); +void __rtnl_net_unlock(struct net *net); +void rtnl_net_lock(struct net *net); +void rtnl_net_unlock(struct net *net); +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); +#else +static inline void __rtnl_net_lock(struct net *net) {} +static inline void __rtnl_net_unlock(struct net *net) {} + +static inline void rtnl_net_lock(struct net *net) +{ + rtnl_lock(); +} + +static inline void rtnl_net_unlock(struct net *net) +{ + rtnl_unlock(); +} +#endif + static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { return rtnl_dereference(dev->ingress_queue); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index e67b483cc8bb..873c0f9fdac6 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -188,6 +188,10 @@ struct net { #if IS_ENABLED(CONFIG_SMC) struct netns_smc smc; #endif +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL + /* Move to a better place when the config guard is removed. */ + struct mutex rtnl_mutex; +#endif } __randomize_layout; #include diff --git a/net/Kconfig.debug b/net/Kconfig.debug index 5e3fffe707dd..277fab8c4d77 100644 --- a/net/Kconfig.debug +++ b/net/Kconfig.debug @@ -24,3 +24,18 @@ config DEBUG_NET help Enable extra sanity checks in networking. This is mostly used by fuzzers, but is safe to select. + +config DEBUG_NET_SMALL_RTNL + bool "Add extra per-netns mutex inside RTNL" + depends on DEBUG_KERNEL && NET && LOCK_DEBUGGING_SUPPORT + select PROVE_LOCKING + default n + help + rtnl_lock() is being replaced with rtnl_net_lock() that + acquires the global RTNL and a small per-netns RTNL mutex. + + During the conversion, rtnl_net_lock() just adds an extra + mutex in every RTNL scope and slows down the operations. + + Once the conversion completes, rtnl_lock() will be removed + and rtnetlink will gain per-netns scalability. diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index e39479f1c9a4..105e3cd26763 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -334,6 +334,12 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ idr_init(&net->netns_ids); spin_lock_init(&net->nsid_lock); mutex_init(&net->ipv4.ra_mutex); + +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL + mutex_init(&net->rtnl_mutex); + lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL); +#endif + preinit_net_sysctl(net); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 682d8d3127db..46567fa54e42 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -179,6 +179,64 @@ bool lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ +#ifdef CONFIG_DEBUG_NET_SMALL_RTNL +void __rtnl_net_lock(struct net *net) +{ + ASSERT_RTNL(); + + mutex_lock(&net->rtnl_mutex); +} +EXPORT_SYMBOL(__rtnl_net_lock); + +void __rtnl_net_unlock(struct net *net) +{ + ASSERT_RTNL(); + + mutex_unlock(&net->rtnl_mutex); +} +EXPORT_SYMBOL(__rtnl_net_unlock); + +void rtnl_net_lock(struct net *net) +{ + rtnl_lock(); + __rtnl_net_lock(net); +} +EXPORT_SYMBOL(rtnl_net_lock); + +void rtnl_net_unlock(struct net *net) +{ + __rtnl_net_unlock(net); + rtnl_unlock(); +} +EXPORT_SYMBOL(rtnl_net_unlock); + +static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b) +{ + if (net_eq(net_a, net_b)) + return 0; + + /* always init_net first */ + if (net_eq(net_a, &init_net)) + return -1; + + if (net_eq(net_b, &init_net)) + return 1; + + /* otherwise lock in ascending order */ + return net_a < net_b ? -1 : 1; +} + +int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) +{ + const struct net *net_a, *net_b; + + net_a = container_of(a, struct net, rtnl_mutex.dep_map); + net_b = container_of(b, struct net, rtnl_mutex.dep_map); + + return rtnl_net_cmp_locks(net_a, net_b); +} +#endif + static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) -- cgit v1.2.3 From 844e5e7e656d3a7a904fd5607f8491d6fd01db8e Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 4 Oct 2024 15:10:30 -0700 Subject: rtnetlink: Add assertion helpers for per-netns RTNL. Once an RTNL scope is converted with rtnl_net_lock(), we will replace RTNL helper functions inside the scope with the following per-netns alternatives: ASSERT_RTNL() -> ASSERT_RTNL_NET(net) rcu_dereference_rtnl(p) -> rcu_dereference_rtnl_net(net, p) Note that the per-netns helpers are equivalent to the conventional helpers unless CONFIG_DEBUG_NET_SMALL_RTNL is enabled. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/linux/rtnetlink.h | 45 +++++++++++++++++++++++++++++++++++++++++---- net/core/rtnetlink.c | 12 ++++++++++++ 2 files changed, 53 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index edd840a49989..8468a4ce8510 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -51,6 +51,10 @@ extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; +#define ASSERT_RTNL() \ + WARN_ONCE(!rtnl_is_locked(), \ + "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) + #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); #else @@ -98,6 +102,22 @@ void __rtnl_net_unlock(struct net *net); void rtnl_net_lock(struct net *net); void rtnl_net_unlock(struct net *net); int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); + +bool rtnl_net_is_locked(struct net *net); + +#define ASSERT_RTNL_NET(net) \ + WARN_ONCE(!rtnl_net_is_locked(net), \ + "RTNL_NET: assertion failed at %s (%d)\n", \ + __FILE__, __LINE__) + +bool lockdep_rtnl_net_is_held(struct net *net); + +#define rcu_dereference_rtnl_net(net, p) \ + rcu_dereference_check(p, lockdep_rtnl_net_is_held(net)) +#define rtnl_net_dereference(net, p) \ + rcu_dereference_protected(p, lockdep_rtnl_net_is_held(net)) +#define rcu_replace_pointer_rtnl_net(net, rp, p) \ + rcu_replace_pointer(rp, p, lockdep_rtnl_net_is_held(net)) #else static inline void __rtnl_net_lock(struct net *net) {} static inline void __rtnl_net_unlock(struct net *net) {} @@ -111,6 +131,27 @@ static inline void rtnl_net_unlock(struct net *net) { rtnl_unlock(); } + +static inline void ASSERT_RTNL_NET(struct net *net) +{ + ASSERT_RTNL(); +} + +static inline void *rcu_dereference_rtnl_net(struct net *net, void *p) +{ + return rcu_dereference_rtnl(p); +} + +static inline void *rtnl_net_dereference(struct net *net, void *p) +{ + return rtnl_dereference(p); +} + +static inline void *rcu_replace_pointer_rtnl_net(struct net *net, + void *rp, void *p) +{ + return rcu_replace_pointer_rtnl(rp, p); +} #endif static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) @@ -140,10 +181,6 @@ void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); -#define ASSERT_RTNL() \ - WARN_ONCE(!rtnl_is_locked(), \ - "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) - extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 46567fa54e42..6d68247aea70 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -235,6 +235,18 @@ int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map * return rtnl_net_cmp_locks(net_a, net_b); } + +bool rtnl_net_is_locked(struct net *net) +{ + return rtnl_is_locked() && mutex_is_locked(&net->rtnl_mutex); +} +EXPORT_SYMBOL(rtnl_net_is_locked); + +bool lockdep_rtnl_net_is_held(struct net *net) +{ + return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex); +} +EXPORT_SYMBOL(lockdep_rtnl_net_is_held); #endif static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; -- cgit v1.2.3