diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-07 00:45:08 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-07 00:45:08 +0300 |
commit | aae3dbb4776e7916b6cd442d00159bea27a695c1 (patch) | |
tree | d074c5d783a81e7e2e084b1eba77f57459da7e37 /net/ipv6/route.c | |
parent | ec3604c7a5aae8953545b0d05495357009a960e5 (diff) | |
parent | 66bed8465a808400eb14562510e26c8818082cb8 (diff) | |
download | linux-aae3dbb4776e7916b6cd442d00159bea27a695c1.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Support ipv6 checksum offload in sunvnet driver, from Shannon
Nelson.
2) Move to RB-tree instead of custom AVL code in inetpeer, from Eric
Dumazet.
3) Allow generic XDP to work on virtual devices, from John Fastabend.
4) Add bpf device maps and XDP_REDIRECT, which can be used to build
arbitrary switching frameworks using XDP. From John Fastabend.
5) Remove UFO offloads from the tree, gave us little other than bugs.
6) Remove the IPSEC flow cache, from Florian Westphal.
7) Support ipv6 route offload in mlxsw driver.
8) Support VF representors in bnxt_en, from Sathya Perla.
9) Add support for forward error correction modes to ethtool, from
Vidya Sagar Ravipati.
10) Add time filter for packet scheduler action dumping, from Jamal Hadi
Salim.
11) Extend the zerocopy sendmsg() used by virtio and tap to regular
sockets via MSG_ZEROCOPY. From Willem de Bruijn.
12) Significantly rework value tracking in the BPF verifier, from Edward
Cree.
13) Add new jump instructions to eBPF, from Daniel Borkmann.
14) Rework rtnetlink plumbing so that operations can be run without
taking the RTNL semaphore. From Florian Westphal.
15) Support XDP in tap driver, from Jason Wang.
16) Add 32-bit eBPF JIT for ARM, from Shubham Bansal.
17) Add Huawei hinic ethernet driver.
18) Allow to report MD5 keys in TCP inet_diag dumps, from Ivan
Delalande.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1780 commits)
i40e: point wb_desc at the nvm_wb_desc during i40e_read_nvm_aq
i40e: avoid NVM acquire deadlock during NVM update
drivers: net: xgene: Remove return statement from void function
drivers: net: xgene: Configure tx/rx delay for ACPI
drivers: net: xgene: Read tx/rx delay for ACPI
rocker: fix kcalloc parameter order
rds: Fix non-atomic operation on shared flag variable
net: sched: don't use GFP_KERNEL under spin lock
vhost_net: correctly check tx avail during rx busy polling
net: mdio-mux: add mdio_mux parameter to mdio_mux_init()
rxrpc: Make service connection lookup always check for retry
net: stmmac: Delete dead code for MDIO registration
gianfar: Fix Tx flow control deactivation
cxgb4: Ignore MPS_TX_INT_CAUSE[Bubble] for T6
cxgb4: Fix pause frame count in t4_get_port_stats
cxgb4: fix memory leak
tun: rename generic_xdp to skb_xdp
tun: reserve extra headroom only when XDP is set
net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping
net: dsa: bcm_sf2: Advertise number of egress queues
...
Diffstat (limited to 'net/ipv6/route.c')
-rw-r--r-- | net/ipv6/route.c | 138 |
1 files changed, 109 insertions, 29 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2d0e7798c793..26cc9f483b6d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -446,16 +446,6 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } -/* Multipath route selection: - * Hash based function using packet header and flowlabel. - * Adapted from fib_info_hashfn() - */ -static int rt6_info_hash_nhsfn(unsigned int candidate_count, - const struct flowi6 *fl6) -{ - return get_hash_from_flowi6(fl6) % candidate_count; -} - static struct rt6_info *rt6_multipath_select(struct rt6_info *match, struct flowi6 *fl6, int oif, int strict) @@ -463,7 +453,13 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, struct rt6_info *sibling, *next_sibling; int route_choosen; - route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); + /* We might have already computed the hash for ICMPv6 errors. In such + * case it will always be non-zero. Otherwise now is the time to do it. + */ + if (!fl6->mp_hash) + fl6->mp_hash = rt6_multipath_hash(fl6, NULL); + + route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1); /* Don't change the route, if route_choosen == 0 * (siblings does not include ourself) */ @@ -959,10 +955,34 @@ int ip6_ins_rt(struct rt6_info *rt) return __ip6_ins_rt(rt, &info, &mxc, NULL); } +/* called with rcu_lock held */ +static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) +{ + struct net_device *dev = rt->dst.dev; + + if (rt->rt6i_flags & RTF_LOCAL) { + /* for copies of local routes, dst->dev needs to be the + * device if it is a master device, the master device if + * device is enslaved, and the loopback as the default + */ + if (netif_is_l3_slave(dev) && + !rt6_need_strict(&rt->rt6i_dst.addr)) + dev = l3mdev_master_dev_rcu(dev); + else if (!netif_is_l3_master(dev)) + dev = dev_net(dev)->loopback_dev; + /* last case is netif_is_l3_master(dev) is true in which + * case we want dev returned to be dev + */ + } + + return dev; +} + static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct net_device *dev; struct rt6_info *rt; /* @@ -972,8 +992,10 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) ort = (struct rt6_info *)ort->dst.from; - rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0); - + rcu_read_lock(); + dev = ip6_rt_get_dev_rcu(ort); + rt = __ip6_dst_alloc(dev_net(dev), dev, 0); + rcu_read_unlock(); if (!rt) return NULL; @@ -1001,11 +1023,13 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) { + struct net_device *dev; struct rt6_info *pcpu_rt; - pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), - rt->dst.dev, rt->dst.flags); - + rcu_read_lock(); + dev = ip6_rt_get_dev_rcu(rt); + pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags); + rcu_read_unlock(); if (!pcpu_rt) return NULL; ip6_rt_copy_init(pcpu_rt, rt); @@ -1187,6 +1211,54 @@ struct dst_entry *ip6_route_input_lookup(struct net *net, } EXPORT_SYMBOL_GPL(ip6_route_input_lookup); +static void ip6_multipath_l3_keys(const struct sk_buff *skb, + struct flow_keys *keys) +{ + const struct ipv6hdr *outer_iph = ipv6_hdr(skb); + const struct ipv6hdr *key_iph = outer_iph; + const struct ipv6hdr *inner_iph; + const struct icmp6hdr *icmph; + struct ipv6hdr _inner_iph; + + if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6)) + goto out; + + icmph = icmp6_hdr(skb); + if (icmph->icmp6_type != ICMPV6_DEST_UNREACH && + icmph->icmp6_type != ICMPV6_PKT_TOOBIG && + icmph->icmp6_type != ICMPV6_TIME_EXCEED && + icmph->icmp6_type != ICMPV6_PARAMPROB) + goto out; + + inner_iph = skb_header_pointer(skb, + skb_transport_offset(skb) + sizeof(*icmph), + sizeof(_inner_iph), &_inner_iph); + if (!inner_iph) + goto out; + + key_iph = inner_iph; +out: + memset(keys, 0, sizeof(*keys)); + keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + keys->addrs.v6addrs.src = key_iph->saddr; + keys->addrs.v6addrs.dst = key_iph->daddr; + keys->tags.flow_label = ip6_flowinfo(key_iph); + keys->basic.ip_proto = key_iph->nexthdr; +} + +/* if skb is set it will be used and fl6 can be NULL */ +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb) +{ + struct flow_keys hash_keys; + + if (skb) { + ip6_multipath_l3_keys(skb, &hash_keys); + return flow_hash_from_keys(&hash_keys); + } + + return get_hash_from_flowi6(fl6); +} + void ip6_route_input(struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); @@ -1205,6 +1277,8 @@ void ip6_route_input(struct sk_buff *skb) tun_info = skb_tunnel_info(skb); if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; + if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) + fl6.mp_hash = rt6_multipath_hash(&fl6, skb); skb_dst_drop(skb); skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); } @@ -2698,15 +2772,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { u32 tb_id; struct net *net = dev_net(idev->dev); - struct net_device *dev = net->loopback_dev; + struct net_device *dev = idev->dev; struct rt6_info *rt; - /* use L3 Master device as loopback for host routes if device - * is enslaved and address is not link local or multicast - */ - if (!rt6_need_strict(addr)) - dev = l3mdev_master_dev_rcu(idev->dev) ? : dev; - rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); @@ -3337,6 +3405,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, goto nla_put_failure; } + if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) + *flags |= RTNH_F_OFFLOAD; + /* not needed for multipath encoding b/c it has a rtnexthop struct */ if (!skip_oif && rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) @@ -3615,8 +3686,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct net_device *dev; int flags = 0; - dev = __dev_get_by_index(net, iif); + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, iif); if (!dev) { + rcu_read_unlock(); err = -ENODEV; goto errout; } @@ -3628,15 +3702,19 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!fibmatch) dst = ip6_route_input_lookup(net, dev, &fl6, flags); + else + dst = ip6_route_lookup(net, &fl6, 0); + + rcu_read_unlock(); } else { fl6.flowi6_oif = oif; if (!fibmatch) dst = ip6_route_output(net, NULL, &fl6); + else + dst = ip6_route_lookup(net, &fl6, 0); } - if (fibmatch) - dst = ip6_route_lookup(net, &fl6, 0); rt = container_of(dst, struct rt6_info, dst); if (rt->dst.error) { @@ -3928,6 +4006,7 @@ static int __net_init ip6_route_net_init(struct net *net) ip6_template_metrics, true); #ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.fib6_has_custom_rules = false; net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, sizeof(*net->ipv6.ip6_prohibit_entry), GFP_KERNEL); @@ -4103,9 +4182,10 @@ int __init ip6_route_init(void) goto fib6_rules_init; ret = -ENOBUFS; - if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || - __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || - __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) + if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) || + __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) || + __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, + RTNL_FLAG_DOIT_UNLOCKED)) goto out_register_late_subsys; ret = register_netdevice_notifier(&ip6_route_dev_notifier); |