From 64587e936b6526539f5d2cf1bb667c52be937cd7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 12 Jun 2026 06:32:04 +0000 Subject: ipv4: fib: Flush all fib_info in fib_table_flush() during netns dismantle. Even when fib_table_flush() is called with flush_all true, it does not flush all fib_info due to this condition: !(fi->fib_flags & RTNH_F_DEAD) && !fib_props[fa->fa_type].error) This creates an implicit ordering between default_device_exit_batch() and fib_net_exit_batch(). fib_table_flush(flush_all=true) must be called after all devices are NETDEV_UNREGISTERed, which is after nexthop_flush_dev() marks RTNH_F_DEAD. This would cause memory leak if the order were reversed. fib_table_flush() does not skip non-dead error routes when flush_all is true: !flush_all && !(fi->fib_flags & RTNH_F_DEAD) && fib_props[fa->fa_type].error Let's merge the two conditions not to skip all non-dead fib_info during netns dismantle. Note that we could further apply !flush_all to the basic table id check and the rtmsg_fib() call in the loop. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260612063225.455191-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_trie.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1308213791f1..07068207b888 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2046,18 +2046,12 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; - if (!fi || tb->tb_id != fa->tb_id || - (!(fi->fib_flags & RTNH_F_DEAD) && - !fib_props[fa->fa_type].error)) { + if (!fi || tb->tb_id != fa->tb_id) { slen = fa->fa_slen; continue; } - /* When not flushing the entire table, skip error - * routes that are not marked for deletion. - */ - if (!flush_all && fib_props[fa->fa_type].error && - !(fi->fib_flags & RTNH_F_DEAD)) { + if (!flush_all && !(fi->fib_flags & RTNH_F_DEAD)) { slen = fa->fa_slen; continue; } -- cgit v1.2.3 From c993bd0102aac43deea38f72fc1e909030b1c6ed Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 12 Jun 2026 06:32:05 +0000 Subject: ipv4: fib: Call fib_proc_exit() and nl_fib_lookup_exit() at ->pre_exit(). We will call ip_fib_net_exit() from ->exit_rtnl(). Since the exit callbacks are called in the following order, 1. ->pre_exit() ~~~ synchronize_rcu() ~~~ 2. ->exit_rtnl() : ip_fib_net_exit() 3. ->exit() : fib_proc_exit() / nl_fib_lookup_exit() 4. ->exit_batch() : fib4_semantics_exit() the reverse order of fib_net_init() would get messed up. Let's move fib_proc_exit() and nl_fib_lookup_exit() to ->pre_exit(). This is fine because procfs/netlink access from userspace cannot occur at this point and synchronize_rcu() is not needed. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260612063225.455191-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ceeb87b13b93..3b1bd53c7357 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1656,7 +1656,7 @@ out_semantics: goto out; } -static void __net_exit fib_net_exit(struct net *net) +static void __net_exit fib_net_pre_exit(struct net *net) { fib_proc_exit(net); nl_fib_lookup_exit(net); @@ -1680,7 +1680,7 @@ static void __net_exit fib_net_exit_batch(struct list_head *net_list) static struct pernet_operations fib_net_ops = { .init = fib_net_init, - .exit = fib_net_exit, + .pre_exit = fib_net_pre_exit, .exit_batch = fib_net_exit_batch, }; -- cgit v1.2.3 From 49374d87e839bdd88e6a5dcd866a4034713fb512 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 12 Jun 2026 06:32:06 +0000 Subject: ipv4: fib: Free net->ipv4.{fib_table_hash,notifier_ops} without RTNL. We will call ip_fib_net_exit() from ->exit_rtnl(). However, some paths will still access net->ipv4.fib_table_hash after ->exit_rtnl(). For example, fib_flush() is called from fib_disable_ip() for NETDEV_UNREGISTER. Let's move kfree(net->ipv4.fib_table_hash) and fib4_notifier_exit() from ip_fib_net_exit() to its caller. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260612063225.455191-4-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3b1bd53c7357..c3e3b5633fd0 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1615,9 +1615,6 @@ static void ip_fib_net_exit(struct net *net) #ifdef CONFIG_IP_MULTIPLE_TABLES fib4_rules_exit(net); #endif - - kfree(net->ipv4.fib_table_hash); - fib4_notifier_exit(net); } static int __net_init fib_net_init(struct net *net) @@ -1653,6 +1650,9 @@ out_semantics: rtnl_net_lock(net); ip_fib_net_exit(net); rtnl_net_unlock(net); + + kfree(net->ipv4.fib_table_hash); + fib4_notifier_exit(net); goto out; } @@ -1674,8 +1674,11 @@ static void __net_exit fib_net_exit_batch(struct list_head *net_list) } rtnl_unlock(); - list_for_each_entry(net, net_list, exit_list) + list_for_each_entry(net, net_list, exit_list) { + kfree(net->ipv4.fib_table_hash); + fib4_notifier_exit(net); fib4_semantics_exit(net); + } } static struct pernet_operations fib_net_ops = { -- cgit v1.2.3 From 5a7fef12d931f27e09767f8b7a9bf0c1ee096abd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 12 Jun 2026 06:32:07 +0000 Subject: ipv4: fib: Avoid calling fib_trie_table() in fib_new_table() for dying net. We will call ip_fib_net_exit() from ->exit_rtnl(). All fib_table will be destroyed before devices are unregistered. During device unregistration, inetdev_destroy() could call fib_del_ifaddr(), which calls fib_magic(RTM_DELROUTE). fib_magic() calls fib_new_table(), but we do not want to create a new table after ip_fib_net_exit() destroys all tables. As a prep, let's add check_net() before fib_trie_table() in fib_new_table(). fib_trie_table() is also called from fib_trie_unmerge(), but fib_get_table() fails first in fib_unmerge(), so the same problem does not occur there. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260612063225.455191-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c3e3b5633fd0..d147471d1d8e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -88,7 +88,8 @@ struct fib_table *fib_new_table(struct net *net, u32 id) if (id == RT_TABLE_LOCAL && !net->ipv4.fib_has_custom_rules) alias = fib_new_table(net, RT_TABLE_MAIN); - tb = fib_trie_table(id, alias); + if (check_net(net)) + tb = fib_trie_table(id, alias); if (!tb) return NULL; -- cgit v1.2.3 From 759923cf03b062b5b8cdc770e2819a67ebe1cacd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 12 Jun 2026 06:32:08 +0000 Subject: ipv4: fib: Convert fib_net_exit_batch() to ->exit_rtnl(). Currently, IPv4 routes are flushed in ->exit_batch() after all devices are unregistered. Unlike IPv6, IPv4 routes are not added from the fast path, so we can flush routes before default_device_exit_batch(). Let's call ip_fib_net_exit() from ->exit_rtnl() to save one RTNL locking dance. ip_fib_net_exit() must use list_del_rcu() for fib_table for the fast path on dying dev. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20260612063225.455191-6-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d147471d1d8e..c7d1f31650d7 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1607,7 +1607,7 @@ static void ip_fib_net_exit(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { - hlist_del(&tb->tb_hlist); + hlist_del_rcu(&tb->tb_hlist); fib_table_flush(net, tb, true); fib_free_table(tb); } @@ -1663,29 +1663,24 @@ static void __net_exit fib_net_pre_exit(struct net *net) nl_fib_lookup_exit(net); } -static void __net_exit fib_net_exit_batch(struct list_head *net_list) +static void __net_exit fib_net_exit_rtnl(struct net *net, + struct list_head *dev_kill_list) { - struct net *net; - - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) { - __rtnl_net_lock(net); - ip_fib_net_exit(net); - __rtnl_net_unlock(net); - } - rtnl_unlock(); + ip_fib_net_exit(net); +} - list_for_each_entry(net, net_list, exit_list) { - kfree(net->ipv4.fib_table_hash); - fib4_notifier_exit(net); - fib4_semantics_exit(net); - } +static void __net_exit fib_net_exit(struct net *net) +{ + kfree(net->ipv4.fib_table_hash); + fib4_notifier_exit(net); + fib4_semantics_exit(net); } static struct pernet_operations fib_net_ops = { .init = fib_net_init, .pre_exit = fib_net_pre_exit, - .exit_batch = fib_net_exit_batch, + .exit_rtnl = fib_net_exit_rtnl, + .exit = fib_net_exit, }; static const struct rtnl_msg_handler fib_rtnl_msg_handlers[] __initconst = { -- cgit v1.2.3