summaryrefslogtreecommitdiff
path: root/drivers/net/vxlan.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r--drivers/net/vxlan.c153
1 files changed, 51 insertions, 102 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index beb377b2d4b7..be4649a49c5e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -42,6 +42,7 @@
#include <net/netns/generic.h>
#include <net/vxlan.h>
#include <net/protocol.h>
+#include <net/udp_tunnel.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/addrconf.h>
@@ -66,12 +67,6 @@
#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
-/* VXLAN protocol header */
-struct vxlanhdr {
- __be32 vx_flags;
- __be32 vx_vni;
-};
-
/* UDP port for VXLAN traffic.
* The IANA assigned port is 4789, but the Linux default is 8472
* for compatibility with early adopters.
@@ -274,13 +269,15 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
}
-/* Find VXLAN socket based on network namespace and UDP port */
-static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
+/* Find VXLAN socket based on network namespace, address family and UDP port */
+static struct vxlan_sock *vxlan_find_sock(struct net *net,
+ sa_family_t family, __be16 port)
{
struct vxlan_sock *vs;
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
- if (inet_sk(vs->sock->sk)->inet_sport == port)
+ if (inet_sk(vs->sock->sk)->inet_sport == port &&
+ inet_sk(vs->sock->sk)->sk.sk_family == family)
return vs;
}
return NULL;
@@ -299,11 +296,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
}
/* Look up VNI in a per net namespace table */
-static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
+static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
+ sa_family_t family, __be16 port)
{
struct vxlan_sock *vs;
- vs = vxlan_find_sock(net, port);
+ vs = vxlan_find_sock(net, family, port);
if (!vs)
return NULL;
@@ -620,6 +618,8 @@ static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
int vxlan_len = sizeof(struct vxlanhdr) + sizeof(struct ethhdr);
int err = -ENOSYS;
+ udp_tunnel_gro_complete(skb, nhoff);
+
eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr));
type = eh->h_proto;
@@ -1062,7 +1062,6 @@ void vxlan_sock_release(struct vxlan_sock *vs)
spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist);
- rcu_assign_sk_user_data(vs->sock->sk, NULL);
vxlan_notify_del_rx_port(vs);
spin_unlock(&vn->sock_lock);
@@ -1158,8 +1157,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!vs)
goto drop;
- skb_pop_rcv_encapsulation(skb);
-
vs->rcv(vs, skb, vxh->vx_vni);
return 0;
@@ -1338,7 +1335,6 @@ out:
}
#if IS_ENABLED(CONFIG_IPV6)
-
static struct sk_buff *vxlan_na_create(struct sk_buff *request,
struct neighbour *n, bool isrouter)
{
@@ -1440,9 +1436,6 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb)
if (!in6_dev)
goto out;
- if (!pskb_may_pull(skb, skb->len))
- goto out;
-
iphdr = ipv6_hdr(skb);
saddr = &iphdr->saddr;
daddr = &iphdr->daddr;
@@ -1572,13 +1565,6 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
}
-static inline struct sk_buff *vxlan_handle_offloads(struct sk_buff *skb,
- bool udp_csum)
-{
- int type = udp_csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- return iptunnel_handle_offloads(skb, udp_csum, type);
-}
-
#if IS_ENABLED(CONFIG_IPV6)
static int vxlan6_xmit_skb(struct vxlan_sock *vs,
struct dst_entry *dst, struct sk_buff *skb,
@@ -1587,13 +1573,12 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
__be16 src_port, __be16 dst_port, __be32 vni,
bool xnet)
{
- struct ipv6hdr *ip6h;
struct vxlanhdr *vxh;
- struct udphdr *uh;
int min_headroom;
int err;
+ bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk);
- skb = vxlan_handle_offloads(skb, !udp_get_no_check6_tx(vs->sock->sk));
+ skb = udp_tunnel_handle_offloads(skb, udp_sum);
if (IS_ERR(skb))
return -EINVAL;
@@ -1621,38 +1606,10 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
-
- uh->len = htons(skb->len);
+ skb_set_inner_protocol(skb, htons(ETH_P_TEB));
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
- IPSKB_REROUTED);
- skb_dst_set(skb, dst);
-
- udp6_set_csum(udp_get_no_check6_tx(vs->sock->sk), skb,
- saddr, daddr, skb->len);
-
- __skb_push(skb, sizeof(*ip6h));
- skb_reset_network_header(skb);
- ip6h = ipv6_hdr(skb);
- ip6h->version = 6;
- ip6h->priority = prio;
- ip6h->flow_lbl[0] = 0;
- ip6h->flow_lbl[1] = 0;
- ip6h->flow_lbl[2] = 0;
- ip6h->payload_len = htons(skb->len);
- ip6h->nexthdr = IPPROTO_UDP;
- ip6h->hop_limit = ttl;
- ip6h->daddr = *daddr;
- ip6h->saddr = *saddr;
-
- ip6tunnel_xmit(skb, dev);
+ udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
+ ttl, src_port, dst_port);
return 0;
}
#endif
@@ -1663,11 +1620,11 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
{
struct vxlanhdr *vxh;
- struct udphdr *uh;
int min_headroom;
int err;
+ bool udp_sum = !vs->sock->sk->sk_no_check_tx;
- skb = vxlan_handle_offloads(skb, !vs->sock->sk->sk_no_check_tx);
+ skb = udp_tunnel_handle_offloads(skb, udp_sum);
if (IS_ERR(skb))
return -EINVAL;
@@ -1693,20 +1650,10 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
- __skb_push(skb, sizeof(*uh));
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- uh->dest = dst_port;
- uh->source = src_port;
-
- uh->len = htons(skb->len);
-
- udp_set_csum(vs->sock->sk->sk_no_check_tx, skb,
- src, dst, skb->len);
+ skb_set_inner_protocol(skb, htons(ETH_P_TEB));
- return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
- tos, ttl, df, xnet);
+ return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
+ ttl, df, src_port, dst_port, xnet);
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
@@ -1717,6 +1664,8 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
struct pcpu_sw_netstats *tx_stats, *rx_stats;
union vxlan_addr loopback;
union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
+ struct net_device *dev = skb->dev;
+ int len = skb->len;
tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
@@ -1740,16 +1689,16 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
u64_stats_update_begin(&tx_stats->syncp);
tx_stats->tx_packets++;
- tx_stats->tx_bytes += skb->len;
+ tx_stats->tx_bytes += len;
u64_stats_update_end(&tx_stats->syncp);
if (netif_rx(skb) == NET_RX_SUCCESS) {
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->rx_packets++;
- rx_stats->rx_bytes += skb->len;
+ rx_stats->rx_bytes += len;
u64_stats_update_end(&rx_stats->syncp);
} else {
- skb->dev->stats.rx_dropped++;
+ dev->stats.rx_dropped++;
}
}
@@ -1821,7 +1770,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_dev *dst_vxlan;
ip_rt_put(rt);
- dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
+ dst_vxlan = vxlan_find_vni(vxlan->net, vni,
+ dst->sa.sa_family, dst_port);
if (!dst_vxlan)
goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1875,7 +1825,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_dev *dst_vxlan;
dst_release(ndst);
- dst_vxlan = vxlan_find_vni(vxlan->net, vni, dst_port);
+ dst_vxlan = vxlan_find_vni(vxlan->net, vni,
+ dst->sa.sa_family, dst_port);
if (!dst_vxlan)
goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1927,7 +1878,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
return arp_reduce(dev, skb);
#if IS_ENABLED(CONFIG_IPV6)
else if (ntohs(eth->h_proto) == ETH_P_IPV6 &&
- skb->len >= sizeof(struct ipv6hdr) + sizeof(struct nd_msg) &&
+ pskb_may_pull(skb, sizeof(struct ipv6hdr)
+ + sizeof(struct nd_msg)) &&
ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
struct nd_msg *msg;
@@ -1936,6 +1888,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
return neigh_reduce(dev, skb);
}
+ eth = eth_hdr(skb);
#endif
}
@@ -2033,13 +1986,15 @@ static int vxlan_init(struct net_device *dev)
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs;
+ bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
spin_lock(&vn->sock_lock);
- vs = vxlan_find_sock(vxlan->net, vxlan->dst_port);
+ vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
+ vxlan->dst_port);
if (vs) {
/* If we have a socket with same port already, reuse it */
atomic_inc(&vs->refcnt);
@@ -2242,7 +2197,7 @@ static void vxlan_setup(struct net_device *dev)
dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
INIT_LIST_HEAD(&vxlan->next);
@@ -2335,8 +2290,7 @@ static const struct ethtool_ops vxlan_ethtool_ops = {
static void vxlan_del_work(struct work_struct *work)
{
struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work);
-
- sk_release_kernel(vs->sock->sk);
+ udp_tunnel_sock_release(vs->sock);
kfree_rcu(vs, rcu);
}
@@ -2352,9 +2306,9 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
if (ipv6) {
udp_conf.family = AF_INET6;
udp_conf.use_udp6_tx_checksums =
- !!(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
+ !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
udp_conf.use_udp6_rx_checksums =
- !!(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
+ !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
} else {
udp_conf.family = AF_INET;
udp_conf.local_ip.s_addr = INADDR_ANY;
@@ -2369,9 +2323,6 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
if (err < 0)
return ERR_PTR(err);
- /* Disable multicast loopback */
- inet_sk(sock->sk)->mc_loop = 0;
-
return sock;
}
@@ -2383,9 +2334,9 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
struct socket *sock;
- struct sock *sk;
unsigned int h;
bool ipv6 = !!(flags & VXLAN_F_IPV6);
+ struct udp_tunnel_sock_cfg tunnel_cfg;
vs = kzalloc(sizeof(*vs), GFP_KERNEL);
if (!vs)
@@ -2403,11 +2354,9 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
}
vs->sock = sock;
- sk = sock->sk;
atomic_set(&vs->refcnt, 1);
vs->rcv = rcv;
vs->data = data;
- rcu_assign_sk_user_data(vs->sock->sk, vs);
/* Initialize the vxlan udp offloads structure */
vs->udp_offloads.port = port;
@@ -2420,14 +2369,12 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
spin_unlock(&vn->sock_lock);
/* Mark socket as an encapsulation socket. */
- udp_sk(sk)->encap_type = 1;
- udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
-#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6)
- ipv6_stub->udpv6_encap_enable();
- else
-#endif
- udp_encap_enable();
+ tunnel_cfg.sk_user_data = vs;
+ tunnel_cfg.encap_type = 1;
+ tunnel_cfg.encap_rcv = vxlan_udp_encap_recv;
+ tunnel_cfg.encap_destroy = NULL;
+
+ setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
return vs;
}
@@ -2438,6 +2385,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
+ bool ipv6 = flags & VXLAN_F_IPV6;
vs = vxlan_socket_create(net, port, rcv, data, flags);
if (!IS_ERR(vs))
@@ -2447,7 +2395,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
return vs;
spin_lock(&vn->sock_lock);
- vs = vxlan_find_sock(net, port);
+ vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
if (vs) {
if (vs->rcv == rcv)
atomic_inc(&vs->refcnt);
@@ -2606,7 +2554,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
- if (vxlan_find_vni(net, vni, vxlan->dst_port)) {
+ if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
+ vxlan->dst_port)) {
pr_info("duplicate VNI %u\n", vni);
return -EEXIST;
}