diff options
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r-- | drivers/net/vxlan.c | 350 |
1 files changed, 173 insertions, 177 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2ba01ca02c9c..bb70dd5723b5 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -52,7 +52,7 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -static int vxlan_net_id; +static unsigned int vxlan_net_id; static struct rtnl_link_ops vxlan_link_ops; static const u8 all_zeros_mac[ETH_ALEN + 2]; @@ -1754,21 +1754,16 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, } min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len - + VXLAN_HLEN + iphdr_len - + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); + + VXLAN_HLEN + iphdr_len; /* Need space for new headers (invalidates iph ptr) */ err = skb_cow_head(skb, min_headroom); if (unlikely(err)) - goto out_free; - - skb = vlan_hwaccel_push_inside(skb); - if (WARN_ON(!skb)) - return -ENOMEM; + return err; err = iptunnel_handle_offloads(skb, type); if (err) - goto out_free; + return err; vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = VXLAN_HF_VNI; @@ -1792,19 +1787,16 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, if (vxflags & VXLAN_F_GPE) { err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol); if (err < 0) - goto out_free; + return err; inner_protocol = skb->protocol; } skb_set_inner_protocol(skb, inner_protocol); return 0; - -out_free: - kfree_skb(skb); - return err; } -static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, +static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev, + struct vxlan_sock *sock4, struct sk_buff *skb, int oif, u8 tos, __be32 daddr, __be32 *saddr, struct dst_cache *dst_cache, @@ -1814,6 +1806,9 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct rtable *rt = NULL; struct flowi4 fl4; + if (!sock4) + return ERR_PTR(-EIO); + if (tos && !info) use_cache = false; if (use_cache) { @@ -1831,16 +1826,27 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, fl4.saddr = *saddr; rt = ip_route_output_key(vxlan->net, &fl4); - if (!IS_ERR(rt)) { + if (likely(!IS_ERR(rt))) { + if (rt->dst.dev == dev) { + netdev_dbg(dev, "circular route to %pI4\n", &daddr); + ip_rt_put(rt); + return ERR_PTR(-ELOOP); + } + *saddr = fl4.saddr; if (use_cache) dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); + } else { + netdev_dbg(dev, "no route to %pI4\n", &daddr); + return ERR_PTR(-ENETUNREACH); } return rt; } #if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, + struct net_device *dev, + struct vxlan_sock *sock6, struct sk_buff *skb, int oif, u8 tos, __be32 label, const struct in6_addr *daddr, @@ -1848,7 +1854,6 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { - struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct dst_entry *ndst; struct flowi6 fl6; @@ -1876,8 +1881,16 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, err = ipv6_stub->ipv6_dst_lookup(vxlan->net, sock6->sock->sk, &ndst, &fl6); - if (err < 0) - return ERR_PTR(err); + if (unlikely(err < 0)) { + netdev_dbg(dev, "no route to %pI6\n", daddr); + return ERR_PTR(-ENETUNREACH); + } + + if (unlikely(ndst->dev == dev)) { + netdev_dbg(dev, "circular route to %pI6\n", daddr); + dst_release(ndst); + return ERR_PTR(-ELOOP); + } *saddr = fl6.saddr; if (use_cache) @@ -1931,23 +1944,55 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, } } +static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, + struct vxlan_dev *vxlan, union vxlan_addr *daddr, + __be32 dst_port, __be32 vni, struct dst_entry *dst, + u32 rt_flags) +{ +#if IS_ENABLED(CONFIG_IPV6) + /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of + * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple + * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry. + */ + BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL); +#endif + /* Bypass encapsulation if the destination is local */ + if (rt_flags & RTCF_LOCAL && + !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { + struct vxlan_dev *dst_vxlan; + + dst_release(dst); + dst_vxlan = vxlan_find_vni(vxlan->net, vni, + daddr->sa.sa_family, dst_port, + vxlan->flags); + if (!dst_vxlan) { + dev->stats.tx_errors++; + kfree_skb(skb); + + return -ENOENT; + } + vxlan_encap_bypass(skb, vxlan, dst_vxlan); + return 1; + } + + return 0; +} + static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_rdst *rdst, bool did_rsc) { struct dst_cache *dst_cache; struct ip_tunnel_info *info; struct vxlan_dev *vxlan = netdev_priv(dev); - struct sock *sk; - struct rtable *rt = NULL; - const struct iphdr *old_iph; + const struct iphdr *old_iph = ip_hdr(skb); union vxlan_addr *dst; union vxlan_addr remote_ip, local_ip; union vxlan_addr *src; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 src_port = 0, dst_port; + struct dst_entry *ndst = NULL; __be32 vni, label; - __be16 df = 0; __u8 tos, ttl; int err; u32 flags = vxlan->flags; @@ -1957,19 +2002,40 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, info = skb_tunnel_info(skb); if (rdst) { + dst = &rdst->remote_ip; + if (vxlan_addr_any(dst)) { + if (did_rsc) { + /* short-circuited back to local bridge */ + vxlan_encap_bypass(skb, vxlan, vxlan); + return; + } + goto drop; + } + dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = rdst->remote_vni; - dst = &rdst->remote_ip; src = &vxlan->cfg.saddr; dst_cache = &rdst->dst_cache; + md->gbp = skb->mark; + ttl = vxlan->cfg.ttl; + if (!ttl && vxlan_addr_multicast(dst)) + ttl = 1; + + tos = vxlan->cfg.tos; + if (tos == 1) + tos = ip_tunnel_get_dsfield(old_iph, skb); + + if (dst->sa.sa_family == AF_INET) + udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX); + else + udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); + label = vxlan->cfg.label; } else { if (!info) { WARN_ONCE(1, "%s: Missing encapsulation instructions\n", dev->name); goto drop; } - dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; - vni = tunnel_id_to_key32(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); if (remote_ip.sa.sa_family == AF_INET) { remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; @@ -1979,182 +2045,111 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, local_ip.sin6.sin6_addr = info->key.u.ipv6.src; } dst = &remote_ip; + dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; + vni = tunnel_id_to_key32(info->key.tun_id); src = &local_ip; dst_cache = &info->dst_cache; - } - - if (vxlan_addr_any(dst)) { - if (did_rsc) { - /* short-circuited back to local bridge */ - vxlan_encap_bypass(skb, vxlan, vxlan); - return; - } - goto drop; - } - - old_iph = ip_hdr(skb); - - ttl = vxlan->cfg.ttl; - if (!ttl && vxlan_addr_multicast(dst)) - ttl = 1; - - tos = vxlan->cfg.tos; - if (tos == 1) - tos = ip_tunnel_get_dsfield(old_iph, skb); - - label = vxlan->cfg.label; - src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, - vxlan->cfg.port_max, true); - - if (info) { + if (info->options_len) + md = ip_tunnel_info_opts(info); ttl = info->key.ttl; tos = info->key.tos; label = info->key.label; udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); - - if (info->options_len) - md = ip_tunnel_info_opts(info); - } else { - md->gbp = skb->mark; } + src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, + vxlan->cfg.port_max, true); if (dst->sa.sa_family == AF_INET) { struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); + struct rtable *rt; + __be16 df = 0; - if (!sock4) - goto drop; - sk = sock4->sock->sk; - - rt = vxlan_get_route(vxlan, skb, + rt = vxlan_get_route(vxlan, dev, sock4, skb, rdst ? rdst->remote_ifindex : 0, tos, dst->sin.sin_addr.s_addr, &src->sin.sin_addr.s_addr, dst_cache, info); if (IS_ERR(rt)) { - netdev_dbg(dev, "no route to %pI4\n", - &dst->sin.sin_addr.s_addr); - dev->stats.tx_carrier_errors++; + err = PTR_ERR(rt); goto tx_error; } - if (rt->dst.dev == dev) { - netdev_dbg(dev, "circular route to %pI4\n", - &dst->sin.sin_addr.s_addr); - dev->stats.collisions++; - goto rt_tx_error; - } - /* Bypass encapsulation if the destination is local */ - if (!info && rt->rt_flags & RTCF_LOCAL && - !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { - struct vxlan_dev *dst_vxlan; - - ip_rt_put(rt); - dst_vxlan = vxlan_find_vni(vxlan->net, vni, - dst->sa.sa_family, dst_port, - vxlan->flags); - if (!dst_vxlan) - goto tx_error; - vxlan_encap_bypass(skb, vxlan, dst_vxlan); - return; - } - - if (!info) - udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX); - else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) + if (!info) { + err = encap_bypass_if_local(skb, dev, vxlan, dst, + dst_port, vni, &rt->dst, + rt->rt_flags); + if (err) + return; + } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) { df = htons(IP_DF); + } + ndst = &rt->dst; tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); - err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr), + err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), vni, md, flags, udp_sum); if (err < 0) - goto xmit_tx_error; + goto tx_error; - udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr, + udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, src->sin.sin_addr.s_addr, dst->sin.sin_addr.s_addr, tos, ttl, df, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); - struct dst_entry *ndst; - u32 rt6i_flags; - if (!sock6) - goto drop; - sk = sock6->sock->sk; - - ndst = vxlan6_get_route(vxlan, skb, + ndst = vxlan6_get_route(vxlan, dev, sock6, skb, rdst ? rdst->remote_ifindex : 0, tos, label, &dst->sin6.sin6_addr, &src->sin6.sin6_addr, dst_cache, info); if (IS_ERR(ndst)) { - netdev_dbg(dev, "no route to %pI6\n", - &dst->sin6.sin6_addr); - dev->stats.tx_carrier_errors++; + err = PTR_ERR(ndst); + ndst = NULL; goto tx_error; } - if (ndst->dev == dev) { - netdev_dbg(dev, "circular route to %pI6\n", - &dst->sin6.sin6_addr); - dst_release(ndst); - dev->stats.collisions++; - goto tx_error; - } + if (!info) { + u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; - /* Bypass encapsulation if the destination is local */ - rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; - if (!info && rt6i_flags & RTF_LOCAL && - !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { - struct vxlan_dev *dst_vxlan; - - dst_release(ndst); - dst_vxlan = vxlan_find_vni(vxlan->net, vni, - dst->sa.sa_family, dst_port, - vxlan->flags); - if (!dst_vxlan) - goto tx_error; - vxlan_encap_bypass(skb, vxlan, dst_vxlan); - return; + err = encap_bypass_if_local(skb, dev, vxlan, dst, + dst_port, vni, ndst, + rt6i_flags); + if (err) + return; } - if (!info) - udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); skb_scrub_packet(skb, xnet); err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), vni, md, flags, udp_sum); - if (err < 0) { - dst_release(ndst); - dev->stats.tx_errors++; - return; - } - udp_tunnel6_xmit_skb(ndst, sk, skb, dev, + if (err < 0) + goto tx_error; + + udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev, &src->sin6.sin6_addr, &dst->sin6.sin6_addr, tos, ttl, label, src_port, dst_port, !udp_sum); #endif } - return; drop: dev->stats.tx_dropped++; - goto tx_free; + dev_kfree_skb(skb); + return; -xmit_tx_error: - /* skb is already freed. */ - skb = NULL; -rt_tx_error: - ip_rt_put(rt); tx_error: + if (err == -ELOOP) + dev->stats.collisions++; + else if (err == -ENETUNREACH) + dev->stats.tx_carrier_errors++; + dst_release(ndst); dev->stats.tx_errors++; -tx_free: - dev_kfree_skb(skb); + kfree_skb(skb); } /* Transmit local packets over Vxlan @@ -2394,43 +2389,31 @@ static void vxlan_set_multicast_list(struct net_device *dev) { } -static int __vxlan_change_mtu(struct net_device *dev, - struct net_device *lowerdev, - struct vxlan_rdst *dst, int new_mtu, bool strict) +static int vxlan_change_mtu(struct net_device *dev, int new_mtu) { - int max_mtu = IP_MAX_MTU; - - if (lowerdev) - max_mtu = lowerdev->mtu; + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_rdst *dst = &vxlan->default_dst; + struct net_device *lowerdev = __dev_get_by_index(vxlan->net, + dst->remote_ifindex); + bool use_ipv6 = false; if (dst->remote_ip.sa.sa_family == AF_INET6) - max_mtu -= VXLAN6_HEADROOM; - else - max_mtu -= VXLAN_HEADROOM; - - if (new_mtu < 68) - return -EINVAL; + use_ipv6 = true; - if (new_mtu > max_mtu) { - if (strict) + /* This check is different than dev->max_mtu, because it looks at + * the lowerdev->mtu, rather than the static dev->max_mtu + */ + if (lowerdev) { + int max_mtu = lowerdev->mtu - + (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + if (new_mtu > max_mtu) return -EINVAL; - - new_mtu = max_mtu; } dev->mtu = new_mtu; return 0; } -static int vxlan_change_mtu(struct net_device *dev, int new_mtu) -{ - struct vxlan_dev *vxlan = netdev_priv(dev); - struct vxlan_rdst *dst = &vxlan->default_dst; - struct net_device *lowerdev = __dev_get_by_index(vxlan->net, - dst->remote_ifindex); - return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true); -} - static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct vxlan_dev *vxlan = netdev_priv(dev); @@ -2445,9 +2428,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); struct rtable *rt; - if (!sock4) - return -EINVAL; - rt = vxlan_get_route(vxlan, skb, 0, info->key.tos, + rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos, info->key.u.ipv4.dst, &info->key.u.ipv4.src, NULL, info); if (IS_ERR(rt)) @@ -2455,9 +2436,10 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ip_rt_put(rt); } else { #if IS_ENABLED(CONFIG_IPV6) + struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); struct dst_entry *ndst; - ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos, + ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos, info->key.label, &info->key.u.ipv6.dst, &info->key.u.ipv6.src, NULL, info); if (IS_ERR(ndst)) @@ -2545,10 +2527,8 @@ static void vxlan_setup(struct net_device *dev) dev->features |= NETIF_F_GSO_SOFTWARE; dev->vlan_features = dev->features; - dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; netif_keep_dst(dev); dev->priv_flags |= IFF_NO_QUEUE; @@ -2821,6 +2801,10 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, vxlan_ether_setup(dev); } + /* MTU range: 68 - 65535 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = ETH_MAX_MTU; + vxlan->net = src_net; dst->remote_vni = conf->vni; @@ -2864,7 +2848,8 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, #endif if (!conf->mtu) - dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + dev->mtu = lowerdev->mtu - + (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); needed_headroom = lowerdev->hard_header_len; } else if (vxlan_addr_multicast(&dst->remote_ip)) { @@ -2873,9 +2858,20 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, } if (conf->mtu) { - err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false); - if (err) - return err; + int max_mtu = ETH_MAX_MTU; + + if (lowerdev) + max_mtu = lowerdev->mtu; + + max_mtu -= (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM); + + if (conf->mtu < dev->min_mtu || conf->mtu > dev->max_mtu) + return -EINVAL; + + dev->mtu = conf->mtu; + + if (conf->mtu > max_mtu) + dev->mtu = max_mtu; } if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA) |