From 1c36d186a0c81f3b55b2722736163233b05f8756 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:30 +0000 Subject: ipmr: Define net->ipv4.{ipmr_notifier_ops,ipmr_seq} under CONFIG_IP_MROUTE. net->ipv4.ipmr_notifier_ops and net->ipv4.ipmr_seq are used only in net/ipv4/ipmr.c. Let's move these definitions under CONFIG_IP_MROUTE. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-13-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8e971c7bf164..380ff34c0233 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -279,6 +279,8 @@ struct netns_ipv4 { struct list_head mr_tables; struct fib_rules_ops *mr_rules_ops; #endif + struct fib_notifier_ops *ipmr_notifier_ops; + unsigned int ipmr_seq; /* protected by rtnl_mutex */ #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; @@ -290,9 +292,6 @@ struct netns_ipv4 { struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* writes protected by rtnl_mutex */ - struct fib_notifier_ops *ipmr_notifier_ops; - unsigned int ipmr_seq; /* protected by rtnl_mutex */ - atomic_t rt_genid; siphash_key_t ip_id_key; struct hlist_head *inet_addr_lst; -- cgit v1.2.3 From 4480d5fa1f6ebe7dfc546e14371d63c8b915a82d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:31 +0000 Subject: ipmr/ip6mr: Convert net->ipv[46].ipmr_seq to atomic_t. We will no longer hold RTNL for ipmr_mfc_add() and ipmr_mfc_delete(). MFC entry can be loosely connected with VIF by its index for mrt->vif_table[] (stored in mfc_parent), but the two tables are not synchronised. i.e. Even if VIF 1 is removed, MFC for VIF 1 is not automatically removed. The only field that the MFC/VIF interfaces share is net->ipv[46].ipmr_seq, which is protected by RTNL. Adding a new mutex for both just to protect a single field is overkill. Let's convert the field to atomic_t. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-14-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/mroute_base.h | 8 ++++---- include/net/netns/ipv4.h | 2 +- include/net/netns/ipv6.h | 2 +- net/ipv4/ipmr.c | 4 ++-- net/ipv6/ip6mr.c | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 0075f6e5c3da..0baa6f994da9 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -76,7 +76,7 @@ static inline int mr_call_vif_notifiers(struct net *net, struct vif_device *vif, struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, - unsigned int *ipmr_seq) + atomic_t *ipmr_seq) { struct vif_entry_notifier_info info = { .info = { @@ -89,7 +89,7 @@ static inline int mr_call_vif_notifiers(struct net *net, }; ASSERT_RTNL(); - (*ipmr_seq)++; + atomic_inc(ipmr_seq); return call_fib_notifiers(net, event_type, &info.info); } @@ -198,7 +198,7 @@ static inline int mr_call_mfc_notifiers(struct net *net, unsigned short family, enum fib_event_type event_type, struct mr_mfc *mfc, u32 tb_id, - unsigned int *ipmr_seq) + atomic_t *ipmr_seq) { struct mfc_entry_notifier_info info = { .info = { @@ -209,7 +209,7 @@ static inline int mr_call_mfc_notifiers(struct net *net, }; ASSERT_RTNL(); - (*ipmr_seq)++; + atomic_inc(ipmr_seq); return call_fib_notifiers(net, event_type, &info.info); } diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 380ff34c0233..94dca64fec41 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -280,7 +280,7 @@ struct netns_ipv4 { struct fib_rules_ops *mr_rules_ops; #endif struct fib_notifier_ops *ipmr_notifier_ops; - unsigned int ipmr_seq; /* protected by rtnl_mutex */ + atomic_t ipmr_seq; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 34bdb1308e8f..499e4288170f 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -118,7 +118,7 @@ struct netns_ipv6 { struct seg6_pernet_data *seg6_data; struct fib_notifier_ops *notifier_ops; struct fib_notifier_ops *ip6mr_notifier_ops; - unsigned int ipmr_seq; /* protected by rtnl_mutex */ + atomic_t ipmr_seq; struct { struct hlist_head head; spinlock_t lock; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 07f2d4f8dcbe..6ec73796d84d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3226,7 +3226,7 @@ static const struct net_protocol pim_protocol = { static unsigned int ipmr_seq_read(const struct net *net) { - return READ_ONCE(net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); + return atomic_read(&net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); } static int ipmr_dump(struct net *net, struct notifier_block *nb, @@ -3247,7 +3247,7 @@ static int __net_init ipmr_notifier_init(struct net *net) { struct fib_notifier_ops *ops; - net->ipv4.ipmr_seq = 0; + atomic_set(&net->ipv4.ipmr_seq, 0); ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); if (IS_ERR(ops)) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e047a4680ab0..85010ff21c98 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1280,7 +1280,7 @@ static int ip6mr_device_event(struct notifier_block *this, static unsigned int ip6mr_seq_read(const struct net *net) { - return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); + return atomic_read(&net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); } static int ip6mr_dump(struct net *net, struct notifier_block *nb, @@ -1305,7 +1305,7 @@ static int __net_init ip6mr_notifier_init(struct net *net) { struct fib_notifier_ops *ops; - net->ipv6.ipmr_seq = 0; + atomic_set(&net->ipv6.ipmr_seq, 0); ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); if (IS_ERR(ops)) -- cgit v1.2.3 From 3c1e53e55418d4ca4040e281501643a96e227974 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:32 +0000 Subject: ipmr: Add dedicated mutex for mrt->{mfc_hash,mfc_cache_list}. We will no longer hold RTNL for ipmr_rtm_route() to modify the MFC hash table. Only __dev_get_by_index() in rtm_to_ipmr_mfcc() is the RTNL dependant, otherwise, we just need protection for mrt->mfc_hash and mrt->mfc_cache_list. Let's add a new mutex for ipmr_mfc_add(), ipmr_mfc_delete(), and mroute_clean_tables() (setsockopt(MRT_FLUSH or MRT_DONE)). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-15-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/net/netns/ipv4.h | 1 + net/ipv4/ipmr.c | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 94dca64fec41..4c249aeaf7f1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -281,6 +281,7 @@ struct netns_ipv4 { #endif struct fib_notifier_ops *ipmr_notifier_ops; atomic_t ipmr_seq; + struct mutex mfc_mutex; #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6ec73796d84d..d4983d8a9b2a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1329,6 +1329,8 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags, /* Wipe the cache */ if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { + mutex_lock(&net->ipv4.mfc_mutex); + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) @@ -1341,6 +1343,8 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags, mroute_netlink_event(mrt, cache, RTM_DELROUTE); mr_cache_put(c); } + + mutex_unlock(&net->ipv4.mfc_mutex); } if (flags & MRT_FLUSH_MFC) { @@ -1498,12 +1502,17 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, } if (parent == 0) parent = mfc.mfcc_parent; + + mutex_lock(&net->ipv4.mfc_mutex); + if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, sk == rtnl_dereference(mrt->mroute_sk), parent); + + mutex_unlock(&net->ipv4.mfc_mutex); break; case MRT_FLUSH: { LIST_HEAD(dev_kill_list); @@ -2913,21 +2922,26 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - int ret, mrtsock, parent; - struct mr_table *tbl; + int ret, mrtsock = 0, parent; + struct mr_table *tbl = NULL; struct mfcctl mfcc; - mrtsock = 0; - tbl = NULL; ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); if (ret < 0) return ret; parent = ret ? mfcc.mfcc_parent : -1; + + mutex_lock(&net->ipv4.mfc_mutex); + if (nlh->nlmsg_type == RTM_NEWROUTE) - return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); + ret = ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); else - return ipmr_mfc_delete(tbl, &mfcc, parent); + ret = ipmr_mfc_delete(tbl, &mfcc, parent); + + mutex_unlock(&net->ipv4.mfc_mutex); + + return ret; } static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) @@ -3269,6 +3283,8 @@ static int __net_init ipmr_net_init(struct net *net) LIST_HEAD(dev_kill_list); int err; + mutex_init(&net->ipv4.mfc_mutex); + err = ipmr_notifier_init(net); if (err) goto ipmr_notifier_fail; -- cgit v1.2.3 From bddafc06ca5ee1be4d10061f7954c6d6be5dc1d8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 28 Feb 2026 22:17:33 +0000 Subject: ipmr: Don't hold RTNL for ipmr_rtm_route(). ipmr_mfc_add() and ipmr_mfc_delete() are already protected by a dedicated mutex. rtm_to_ipmr_mfcc() calls __ipmr_get_table(), __dev_get_by_index(), amd ipmr_find_vif(). Once __dev_get_by_index() is converted to dev_get_by_index_rcu(), we can move the other two functions under that same RCU section and drop RTNL for ipmr_rtm_route(). Let's do that conversion and drop ASSERT_RTNL() in mr_call_mfc_notifiers(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260228221800.1082070-16-kuniyu@google.com Signed-off-by: Jakub Kicinski --- include/linux/mroute_base.h | 1 - net/ipv4/ipmr.c | 34 +++++++++++++++++++++------------- 2 files changed, 21 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 0baa6f994da9..cf3374580f74 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -208,7 +208,6 @@ static inline int mr_call_mfc_notifiers(struct net *net, .tb_id = tb_id }; - ASSERT_RTNL(); atomic_inc(ipmr_seq); return call_fib_notifiers(net, event_type, &info.info); } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d4983d8a9b2a..8a08d09b4c30 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1211,7 +1211,6 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) struct net *net = read_pnet(&mrt->net); struct mfc_cache *c; - /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, mfc->mfcc_mcastgrp.s_addr, parent); @@ -1238,7 +1237,6 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, if (mfc->mfcc_parent >= MAXVIFS) return -ENFILE; - /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, mfc->mfcc_mcastgrp.s_addr, parent); @@ -2853,10 +2851,10 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, { struct net_device *dev = NULL; u32 tblid = RT_TABLE_DEFAULT; + int ret, rem, iif = 0; struct mr_table *mrt; struct nlattr *attr; struct rtmsg *rtm; - int ret, rem; ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, extack); @@ -2883,11 +2881,7 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); break; case RTA_IIF: - dev = __dev_get_by_index(net, nla_get_u32(attr)); - if (!dev) { - ret = -ENODEV; - goto out; - } + iif = nla_get_u32(attr); break; case RTA_MULTIPATH: if (ipmr_nla_get_ttls(attr, mfcc) < 0) { @@ -2903,16 +2897,30 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, break; } } + + rcu_read_lock(); + mrt = __ipmr_get_table(net, tblid); if (!mrt) { ret = -ENOENT; - goto out; + goto unlock; } + + if (iif) { + dev = dev_get_by_index_rcu(net, iif); + if (!dev) { + ret = -ENODEV; + goto unlock; + } + + mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); + } + *mrtret = mrt; *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; - if (dev) - mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); +unlock: + rcu_read_unlock(); out: return ret; } @@ -3343,9 +3351,9 @@ static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK, .dumpit = ipmr_rtm_dumplink, .flags = RTNL_FLAG_DUMP_UNLOCKED}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE, - .doit = ipmr_rtm_route}, + .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, - .doit = ipmr_rtm_route}, + .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, -- cgit v1.2.3