diff options
author | David S. Miller <davem@davemloft.net> | 2022-02-11 14:44:27 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2022-02-11 14:44:27 +0300 |
commit | c002496babfdc82c5b8fbe3f5a3b73201dca2c73 (patch) | |
tree | 5c85b956925de91067e6cef0af62465f4aa26443 | |
parent | 926eae604403acfa27ba5b072af458e87e634a50 (diff) | |
parent | 29e5375d7fcb5f88b438d74d537bbfd67ac75a64 (diff) | |
download | linux-c002496babfdc82c5b8fbe3f5a3b73201dca2c73.tar.xz |
Merge branch 'ipv6-loopback'
Eric Dumazet says:
====================
ipv6: remove addrconf reliance on loopback
Second patch in this series removes IPv6 requirement about the netns
loopback device being the last device being dismantled.
This was needed because rt6_uncached_list_flush_dev()
and ip6_dst_ifdown() had to switch dst dev to a known
device (loopback).
Instead of loopback, we can use the (hidden) blackhole_netdev
which is also always there.
This will allow future simplfications of netdev_run_to()
and other parts of the stack like default_device_exit_batch().
Last two patches are optimizations for both IP families.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/ip6_fib.h | 3 | ||||
-rw-r--r-- | net/ipv4/route.c | 12 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 78 | ||||
-rw-r--r-- | net/ipv6/route.c | 42 | ||||
-rw-r--r-- | net/ipv6/xfrm6_policy.c | 1 |
5 files changed, 64 insertions, 72 deletions
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 40ae8f1b18e5..32c83bb68879 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -367,9 +367,8 @@ struct rt6_statistics { __u32 fib_rt_cache; /* cached rt entries in exception table */ __u32 fib_discarded_routes; /* total number of routes delete */ - /* The following stats are not protected by any lock */ + /* The following stat is not protected by any lock */ atomic_t fib_rt_alloc; /* total number of routes alloced */ - atomic_t fib_rt_uncache; /* rt entries in uncached list */ }; #define RTN_TL_ROOT 0x0001 diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 634766e6c7cc..202d6b1fff43 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1485,6 +1485,7 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) struct uncached_list { spinlock_t lock; struct list_head head; + struct list_head quarantine; }; static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); @@ -1506,7 +1507,7 @@ void rt_del_uncached_list(struct rtable *rt) struct uncached_list *ul = rt->rt_uncached_list; spin_lock_bh(&ul->lock); - list_del(&rt->rt_uncached); + list_del_init(&rt->rt_uncached); spin_unlock_bh(&ul->lock); } } @@ -1521,20 +1522,24 @@ static void ipv4_dst_destroy(struct dst_entry *dst) void rt_flush_dev(struct net_device *dev) { - struct rtable *rt; + struct rtable *rt, *safe; int cpu; for_each_possible_cpu(cpu) { struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); + if (list_empty(&ul->head)) + continue; + spin_lock_bh(&ul->lock); - list_for_each_entry(rt, &ul->head, rt_uncached) { + list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) { if (rt->dst.dev != dev) continue; rt->dst.dev = blackhole_netdev; dev_replace_track(dev, blackhole_netdev, &rt->dst.dev_tracker, GFP_ATOMIC); + list_move(&rt->rt_uncached, &ul->quarantine); } spin_unlock_bh(&ul->lock); } @@ -3706,6 +3711,7 @@ int __init ip_rt_init(void) struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); INIT_LIST_HEAD(&ul->head); + INIT_LIST_HEAD(&ul->quarantine); spin_lock_init(&ul->lock); } #ifdef CONFIG_IP_ROUTE_CLASSID diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4f402bc38f05..02d31d4fcab3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -372,7 +372,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ASSERT_RTNL(); - if (dev->mtu < IPV6_MIN_MTU) + if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev) return ERR_PTR(-EINVAL); ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); @@ -400,21 +400,22 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) /* We refer to the device */ dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL); - if (snmp6_alloc_dev(ndev) < 0) { - netdev_dbg(dev, "%s: cannot allocate memory for statistics\n", - __func__); - neigh_parms_release(&nd_tbl, ndev->nd_parms); - dev_put_track(dev, &ndev->dev_tracker); - kfree(ndev); - return ERR_PTR(err); - } + if (dev != blackhole_netdev) { + if (snmp6_alloc_dev(ndev) < 0) { + netdev_dbg(dev, "%s: cannot allocate memory for statistics\n", + __func__); + neigh_parms_release(&nd_tbl, ndev->nd_parms); + dev_put_track(dev, &ndev->dev_tracker); + kfree(ndev); + return ERR_PTR(err); + } - if (snmp6_register_dev(ndev) < 0) { - netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n", - __func__, dev->name); - goto err_release; + if (snmp6_register_dev(ndev) < 0) { + netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n", + __func__, dev->name); + goto err_release; + } } - /* One reference from device. */ refcount_set(&ndev->refcnt, 1); @@ -445,25 +446,28 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; - err = addrconf_sysctl_register(ndev); - if (err) { - ipv6_mc_destroy_dev(ndev); - snmp6_unregister_dev(ndev); - goto err_release; + if (dev != blackhole_netdev) { + err = addrconf_sysctl_register(ndev); + if (err) { + ipv6_mc_destroy_dev(ndev); + snmp6_unregister_dev(ndev); + goto err_release; + } } /* protected by rtnl_lock */ rcu_assign_pointer(dev->ip6_ptr, ndev); - /* Join interface-local all-node multicast group */ - ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); + if (dev != blackhole_netdev) { + /* Join interface-local all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); - /* Join all-node multicast group */ - ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); - - /* Join all-router multicast group if forwarding is set */ - if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST)) - ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); + /* Join all-node multicast group */ + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); + /* Join all-router multicast group if forwarding is set */ + if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST)) + ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); + } return ndev; err_release: @@ -7233,26 +7237,8 @@ int __init addrconf_init(void) goto out_nowq; } - /* The addrconf netdev notifier requires that loopback_dev - * has it's ipv6 private information allocated and setup - * before it can bring up and give link-local addresses - * to other devices which are up. - * - * Unfortunately, loopback_dev is not necessarily the first - * entry in the global dev_base list of net devices. In fact, - * it is likely to be the very last entry on that list. - * So this causes the notifier registry below to try and - * give link-local addresses to all devices besides loopback_dev - * first, then loopback_dev, which cases all the non-loopback_dev - * devices to fail to get a link-local address. - * - * So, as a temporary fix, allocate the ipv6 structure for - * loopback_dev first by hand. - * Longer term, all of the dependencies ipv6 has upon the loopback - * device and it being up should be removed. - */ rtnl_lock(); - idev = ipv6_add_dev(init_net.loopback_dev); + idev = ipv6_add_dev(blackhole_netdev); rtnl_unlock(); if (IS_ERR(idev)) { err = PTR_ERR(idev); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dd98a11fbdb6..46c7a1b36075 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -130,6 +130,7 @@ static struct fib6_info *rt6_get_route_info(struct net *net, struct uncached_list { spinlock_t lock; struct list_head head; + struct list_head quarantine; }; static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); @@ -149,35 +150,34 @@ void rt6_uncached_list_del(struct rt6_info *rt) { if (!list_empty(&rt->rt6i_uncached)) { struct uncached_list *ul = rt->rt6i_uncached_list; - struct net *net = dev_net(rt->dst.dev); spin_lock_bh(&ul->lock); - list_del(&rt->rt6i_uncached); - atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache); + list_del_init(&rt->rt6i_uncached); spin_unlock_bh(&ul->lock); } } -static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) +static void rt6_uncached_list_flush_dev(struct net_device *dev) { - struct net_device *loopback_dev = net->loopback_dev; int cpu; - if (dev == loopback_dev) - return; - for_each_possible_cpu(cpu) { struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); - struct rt6_info *rt; + struct rt6_info *rt, *safe; + + if (list_empty(&ul->head)) + continue; spin_lock_bh(&ul->lock); - list_for_each_entry(rt, &ul->head, rt6i_uncached) { + list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) { struct inet6_dev *rt_idev = rt->rt6i_idev; struct net_device *rt_dev = rt->dst.dev; + bool handled = false; if (rt_idev->dev == dev) { - rt->rt6i_idev = in6_dev_get(loopback_dev); + rt->rt6i_idev = in6_dev_get(blackhole_netdev); in6_dev_put(rt_idev); + handled = true; } if (rt_dev == dev) { @@ -185,7 +185,11 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) dev_replace_track(rt_dev, blackhole_netdev, &rt->dst.dev_tracker, GFP_ATOMIC); + handled = true; } + if (handled) + list_move(&rt->rt6i_uncached, + &ul->quarantine); } spin_unlock_bh(&ul->lock); } @@ -373,13 +377,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; - struct net_device *loopback_dev = - dev_net(dev)->loopback_dev; - if (idev && idev->dev != loopback_dev) { - struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; + if (idev && idev->dev != blackhole_netdev) { + struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev); + + if (blackhole_idev) { + rt->rt6i_idev = blackhole_idev; in6_dev_put(idev); } } @@ -2244,7 +2247,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, * if caller sets RT6_LOOKUP_F_DST_NOREF flag. */ rt6_uncached_list_add(rt); - atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); rcu_read_unlock(); return rt; @@ -3287,7 +3289,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, * do proper release of the net_device */ rt6_uncached_list_add(rt); - atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); @@ -4896,7 +4897,7 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event) void rt6_disable_ip(struct net_device *dev, unsigned long event) { rt6_sync_down_dev(dev, event); - rt6_uncached_list_flush_dev(dev_net(dev), dev); + rt6_uncached_list_flush_dev(dev); neigh_ifdown(&nd_tbl, dev); } @@ -6736,6 +6737,7 @@ int __init ip6_route_init(void) struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); INIT_LIST_HEAD(&ul->head); + INIT_LIST_HEAD(&ul->quarantine); spin_lock_init(&ul->lock); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index fad687ee6dd8..55bb2cbae13d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -92,7 +92,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt6.rt6i_src = rt->rt6i_src; INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); rt6_uncached_list_add(&xdst->u.rt6); - atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache); return 0; } |