diff options
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r-- | drivers/net/vxlan.c | 153 |
1 files changed, 51 insertions, 102 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index beb377b2d4b7..be4649a49c5e 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -42,6 +42,7 @@ #include <net/netns/generic.h> #include <net/vxlan.h> #include <net/protocol.h> +#include <net/udp_tunnel.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/addrconf.h> @@ -66,12 +67,6 @@ #define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */ -/* VXLAN protocol header */ -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -}; - /* UDP port for VXLAN traffic. * The IANA assigned port is 4789, but the Linux default is 8472 * for compatibility with early adopters. @@ -274,13 +269,15 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); } -/* Find VXLAN socket based on network namespace and UDP port */ -static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port) +/* Find VXLAN socket based on network namespace, address family and UDP port */ +static struct vxlan_sock *vxlan_find_sock(struct net *net, + sa_family_t family, __be16 port) { struct vxlan_sock *vs; hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { - if (inet_sk(vs->sock->sk)->inet_sport == port) + if (inet_sk(vs->sock->sk)->inet_sport == port && + inet_sk(vs->sock->sk)->sk.sk_family == family) return vs; } return NULL; @@ -299,11 +296,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id) } /* Look up VNI in a per net namespace table */ -static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port) +static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, + sa_family_t family, __be16 port) { struct vxlan_sock *vs; - vs = vxlan_find_sock(net, port); + vs = vxlan_find_sock(net, family, port); if (!vs) return NULL; @@ -620,6 +618,8 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff) int vxlan_len = sizeof(struct vxlanhdr) + sizeof(struct ethhdr); int err = -ENOSYS; + udp_tunnel_gro_complete(skb, nhoff); + eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr)); type = eh->h_proto; @@ -1062,7 +1062,6 @@ void vxlan_sock_release(struct vxlan_sock *vs) spin_lock(&vn->sock_lock); hlist_del_rcu(&vs->hlist); - rcu_assign_sk_user_data(vs->sock->sk, NULL); vxlan_notify_del_rx_port(vs); spin_unlock(&vn->sock_lock); @@ -1158,8 +1157,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!vs) goto drop; - skb_pop_rcv_encapsulation(skb); - vs->rcv(vs, skb, vxh->vx_vni); return 0; @@ -1338,7 +1335,6 @@ out: } #if IS_ENABLED(CONFIG_IPV6) - static struct sk_buff *vxlan_na_create(struct sk_buff *request, struct neighbour *n, bool isrouter) { @@ -1440,9 +1436,6 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) if (!in6_dev) goto out; - if (!pskb_may_pull(skb, skb->len)) - goto out; - iphdr = ipv6_hdr(skb); saddr = &iphdr->saddr; daddr = &iphdr->daddr; @@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) return false; } -static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb, - bool udp_csum) -{ - int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; - return iptunnel_handle_offloads(skb, udp_csum, type); -} - #if IS_ENABLED(CONFIG_IPV6) static int vxlan6_xmit_skb(struct vxlan_sock *vs, struct dst_entry *dst, struct sk_buff *skb, @@ -1587,13 +1573,12 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, __be16 src_port, __be16 dst_port, __be32 vni, bool xnet) { - struct ipv6hdr *ip6h; struct vxlanhdr *vxh; - struct udphdr *uh; int min_headroom; int err; + bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk); - skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk)); + skb = udp_tunnel_handle_offloads(skb, udp_sum); if (IS_ERR(skb)) return -EINVAL; @@ -1621,38 +1606,10 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs, vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; - __skb_push(skb, sizeof(*uh)); - skb_reset_transport_header(skb); - uh = udp_hdr(skb); - - uh->dest = dst_port; - uh->source = src_port; - - uh->len = htons(skb->len); + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - skb_dst_set(skb, dst); - - udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb, - saddr, daddr, skb->len); - - __skb_push(skb, sizeof(*ip6h)); - skb_reset_network_header(skb); - ip6h = ipv6_hdr(skb); - ip6h->version = 6; - ip6h->priority = prio; - ip6h->flow_lbl[0] = 0; - ip6h->flow_lbl[1] = 0; - ip6h->flow_lbl[2] = 0; - ip6h->payload_len = htons(skb->len); - ip6h->nexthdr = IPPROTO_UDP; - ip6h->hop_limit = ttl; - ip6h->daddr = *daddr; - ip6h->saddr = *saddr; - - ip6tunnel_xmit(skb, dev); + udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio, + ttl, src_port, dst_port); return 0; } #endif @@ -1663,11 +1620,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, __be16 src_port, __be16 dst_port, __be32 vni, bool xnet) { struct vxlanhdr *vxh; - struct udphdr *uh; int min_headroom; int err; + bool udp_sum = !vs->sock->sk->sk_no_check_tx; - skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx); + skb = udp_tunnel_handle_offloads(skb, udp_sum); if (IS_ERR(skb)) return -EINVAL; @@ -1693,20 +1650,10 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; - __skb_push(skb, sizeof(*uh)); - skb_reset_transport_header(skb); - uh = udp_hdr(skb); - - uh->dest = dst_port; - uh->source = src_port; - - uh->len = htons(skb->len); - - udp_set_csum(vs->sock->sk->sk_no_check_tx, skb, - src, dst, skb->len); + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, - tos, ttl, df, xnet); + return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos, + ttl, df, src_port, dst_port, xnet); } EXPORT_SYMBOL_GPL(vxlan_xmit_skb); @@ -1717,6 +1664,8 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, struct pcpu_sw_netstats *tx_stats, *rx_stats; union vxlan_addr loopback; union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip; + struct net_device *dev = skb->dev; + int len = skb->len; tx_stats = this_cpu_ptr(src_vxlan->dev->tstats); rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats); @@ -1740,16 +1689,16 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, u64_stats_update_begin(&tx_stats->syncp); tx_stats->tx_packets++; - tx_stats->tx_bytes += skb->len; + tx_stats->tx_bytes += len; u64_stats_update_end(&tx_stats->syncp); if (netif_rx(skb) == NET_RX_SUCCESS) { u64_stats_update_begin(&rx_stats->syncp); rx_stats->rx_packets++; - rx_stats->rx_bytes += skb->len; + rx_stats->rx_bytes += len; u64_stats_update_end(&rx_stats->syncp); } else { - skb->dev->stats.rx_dropped++; + dev->stats.rx_dropped++; } } @@ -1821,7 +1770,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *dst_vxlan; ip_rt_put(rt); - dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port); + dst_vxlan = vxlan_find_vni(vxlan->net, vni, + dst->sa.sa_family, dst_port); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1875,7 +1825,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_dev *dst_vxlan; dst_release(ndst); - dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port); + dst_vxlan = vxlan_find_vni(vxlan->net, vni, + dst->sa.sa_family, dst_port); if (!dst_vxlan) goto tx_error; vxlan_encap_bypass(skb, vxlan, dst_vxlan); @@ -1927,7 +1878,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) return arp_reduce(dev, skb); #if IS_ENABLED(CONFIG_IPV6) else if (ntohs(eth->h_proto) == ETH_P_IPV6 && - skb->len >= sizeof(struct ipv6hdr) + sizeof(struct nd_msg) && + pskb_may_pull(skb, sizeof(struct ipv6hdr) + + sizeof(struct nd_msg)) && ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { struct nd_msg *msg; @@ -1936,6 +1888,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) return neigh_reduce(dev, skb); } + eth = eth_hdr(skb); #endif } @@ -2033,13 +1986,15 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id); struct vxlan_sock *vs; + bool ipv6 = vxlan->flags & VXLAN_F_IPV6; dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; spin_lock(&vn->sock_lock); - vs = vxlan_find_sock(vxlan->net, vxlan->dst_port); + vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, + vxlan->dst_port); if (vs) { /* If we have a socket with same port already, reuse it */ atomic_inc(&vs->refcnt); @@ -2242,7 +2197,7 @@ static void vxlan_setup(struct net_device *dev) dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + netif_keep_dst(dev); dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; INIT_LIST_HEAD(&vxlan->next); @@ -2335,8 +2290,7 @@ static const struct ethtool_ops vxlan_ethtool_ops = { static void vxlan_del_work(struct work_struct *work) { struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work); - - sk_release_kernel(vs->sock->sk); + udp_tunnel_sock_release(vs->sock); kfree_rcu(vs, rcu); } @@ -2352,9 +2306,9 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6, if (ipv6) { udp_conf.family = AF_INET6; udp_conf.use_udp6_tx_checksums = - !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); + !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); udp_conf.use_udp6_rx_checksums = - !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); + !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX); } else { udp_conf.family = AF_INET; udp_conf.local_ip.s_addr = INADDR_ANY; @@ -2369,9 +2323,6 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6, if (err < 0) return ERR_PTR(err); - /* Disable multicast loopback */ - inet_sk(sock->sk)->mc_loop = 0; - return sock; } @@ -2383,9 +2334,9 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_sock *vs; struct socket *sock; - struct sock *sk; unsigned int h; bool ipv6 = !!(flags & VXLAN_F_IPV6); + struct udp_tunnel_sock_cfg tunnel_cfg; vs = kzalloc(sizeof(*vs), GFP_KERNEL); if (!vs) @@ -2403,11 +2354,9 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, } vs->sock = sock; - sk = sock->sk; atomic_set(&vs->refcnt, 1); vs->rcv = rcv; vs->data = data; - rcu_assign_sk_user_data(vs->sock->sk, vs); /* Initialize the vxlan udp offloads structure */ vs->udp_offloads.port = port; @@ -2420,14 +2369,12 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, spin_unlock(&vn->sock_lock); /* Mark socket as an encapsulation socket. */ - udp_sk(sk)->encap_type = 1; - udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; -#if IS_ENABLED(CONFIG_IPV6) - if (ipv6) - ipv6_stub->udpv6_encap_enable(); - else -#endif - udp_encap_enable(); + tunnel_cfg.sk_user_data = vs; + tunnel_cfg.encap_type = 1; + tunnel_cfg.encap_rcv = vxlan_udp_encap_recv; + tunnel_cfg.encap_destroy = NULL; + + setup_udp_tunnel_sock(net, sock, &tunnel_cfg); return vs; } @@ -2438,6 +2385,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, { struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_sock *vs; + bool ipv6 = flags & VXLAN_F_IPV6; vs = vxlan_socket_create(net, port, rcv, data, flags); if (!IS_ERR(vs)) @@ -2447,7 +2395,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, return vs; spin_lock(&vn->sock_lock); - vs = vxlan_find_sock(net, port); + vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); if (vs) { if (vs->rcv == rcv) atomic_inc(&vs->refcnt); @@ -2606,7 +2554,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX])) vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX; - if (vxlan_find_vni(net, vni, vxlan->dst_port)) { + if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET, + vxlan->dst_port)) { pr_info("duplicate VNI %u\n", vni); return -EEXIST; } |