summaryrefslogtreecommitdiff
path: root/drivers/net/vxlan.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r--drivers/net/vxlan.c430
1 files changed, 226 insertions, 204 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e7d16687538b..bb70dd5723b5 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -52,7 +52,7 @@ static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
-static int vxlan_net_id;
+static unsigned int vxlan_net_id;
static struct rtnl_link_ops vxlan_link_ops;
static const u8 all_zeros_mac[ETH_ALEN + 2];
@@ -583,7 +583,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk,
}
}
- pp = eth_gro_receive(head, skb);
+ pp = call_gro_receive(eth_gro_receive, head, skb);
flush = 0;
out:
@@ -611,6 +611,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f;
int notify = 0;
+ int rc;
f = __vxlan_find_mac(vxlan, mac);
if (f) {
@@ -641,8 +642,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
if ((flags & NLM_F_APPEND) &&
(is_multicast_ether_addr(f->eth_addr) ||
is_zero_ether_addr(f->eth_addr))) {
- int rc = vxlan_fdb_append(f, ip, port, vni, ifindex,
- &rd);
+ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
if (rc < 0)
return rc;
@@ -673,7 +673,11 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
INIT_LIST_HEAD(&f->remotes);
memcpy(f->eth_addr, mac, ETH_ALEN);
- vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+ rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+ if (rc < 0) {
+ kfree(f);
+ return rc;
+ }
++vxlan->addrcnt;
hlist_add_head_rcu(&f->hlist,
@@ -943,17 +947,22 @@ static bool vxlan_snoop(struct net_device *dev,
static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
{
struct vxlan_dev *vxlan;
+ struct vxlan_sock *sock4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct vxlan_sock *sock6;
+#endif
unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
+ sock4 = rtnl_dereference(dev->vn4_sock);
+
/* The vxlan_sock is only used by dev, leaving group has
* no effect on other vxlan devices.
*/
- if (family == AF_INET && dev->vn4_sock &&
- atomic_read(&dev->vn4_sock->refcnt) == 1)
+ if (family == AF_INET && sock4 && atomic_read(&sock4->refcnt) == 1)
return false;
#if IS_ENABLED(CONFIG_IPV6)
- if (family == AF_INET6 && dev->vn6_sock &&
- atomic_read(&dev->vn6_sock->refcnt) == 1)
+ sock6 = rtnl_dereference(dev->vn6_sock);
+ if (family == AF_INET6 && sock6 && atomic_read(&sock6->refcnt) == 1)
return false;
#endif
@@ -961,10 +970,12 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
if (!netif_running(vxlan->dev) || vxlan == dev)
continue;
- if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock)
+ if (family == AF_INET &&
+ rtnl_dereference(vxlan->vn4_sock) != sock4)
continue;
#if IS_ENABLED(CONFIG_IPV6)
- if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock)
+ if (family == AF_INET6 &&
+ rtnl_dereference(vxlan->vn6_sock) != sock6)
continue;
#endif
@@ -1005,22 +1016,25 @@ static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
static void vxlan_sock_release(struct vxlan_dev *vxlan)
{
- bool ipv4 = __vxlan_sock_release_prep(vxlan->vn4_sock);
+ struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
#if IS_ENABLED(CONFIG_IPV6)
- bool ipv6 = __vxlan_sock_release_prep(vxlan->vn6_sock);
+ struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+
+ rcu_assign_pointer(vxlan->vn6_sock, NULL);
#endif
+ rcu_assign_pointer(vxlan->vn4_sock, NULL);
synchronize_net();
- if (ipv4) {
- udp_tunnel_sock_release(vxlan->vn4_sock->sock);
- kfree(vxlan->vn4_sock);
+ if (__vxlan_sock_release_prep(sock4)) {
+ udp_tunnel_sock_release(sock4->sock);
+ kfree(sock4);
}
#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6) {
- udp_tunnel_sock_release(vxlan->vn6_sock->sock);
- kfree(vxlan->vn6_sock);
+ if (__vxlan_sock_release_prep(sock6)) {
+ udp_tunnel_sock_release(sock6->sock);
+ kfree(sock6);
}
#endif
}
@@ -1036,18 +1050,21 @@ static int vxlan_igmp_join(struct vxlan_dev *vxlan)
int ret = -EINVAL;
if (ip->sa.sa_family == AF_INET) {
+ struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
struct ip_mreqn mreq = {
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
.imr_ifindex = ifindex,
};
- sk = vxlan->vn4_sock->sock->sk;
+ sk = sock4->sock->sk;
lock_sock(sk);
ret = ip_mc_join_group(sk, &mreq);
release_sock(sk);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- sk = vxlan->vn6_sock->sock->sk;
+ struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+
+ sk = sock6->sock->sk;
lock_sock(sk);
ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
&ip->sin6.sin6_addr);
@@ -1067,18 +1084,21 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
int ret = -EINVAL;
if (ip->sa.sa_family == AF_INET) {
+ struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
struct ip_mreqn mreq = {
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
.imr_ifindex = ifindex,
};
- sk = vxlan->vn4_sock->sock->sk;
+ sk = sock4->sock->sk;
lock_sock(sk);
ret = ip_mc_leave_group(sk, &mreq);
release_sock(sk);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- sk = vxlan->vn6_sock->sock->sk;
+ struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+
+ sk = sock6->sock->sk;
lock_sock(sk);
ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
&ip->sin6.sin6_addr);
@@ -1734,21 +1754,16 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
}
min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
- + VXLAN_HLEN + iphdr_len
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
+ + VXLAN_HLEN + iphdr_len;
/* Need space for new headers (invalidates iph ptr) */
err = skb_cow_head(skb, min_headroom);
if (unlikely(err))
- goto out_free;
-
- skb = vlan_hwaccel_push_inside(skb);
- if (WARN_ON(!skb))
- return -ENOMEM;
+ return err;
err = iptunnel_handle_offloads(skb, type);
if (err)
- goto out_free;
+ return err;
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = VXLAN_HF_VNI;
@@ -1772,19 +1787,16 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
if (vxflags & VXLAN_F_GPE) {
err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
if (err < 0)
- goto out_free;
+ return err;
inner_protocol = skb->protocol;
}
skb_set_inner_protocol(skb, inner_protocol);
return 0;
-
-out_free:
- kfree_skb(skb);
- return err;
}
-static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
+static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev,
+ struct vxlan_sock *sock4,
struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr,
struct dst_cache *dst_cache,
@@ -1794,6 +1806,9 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
struct rtable *rt = NULL;
struct flowi4 fl4;
+ if (!sock4)
+ return ERR_PTR(-EIO);
+
if (tos && !info)
use_cache = false;
if (use_cache) {
@@ -1811,16 +1826,27 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
fl4.saddr = *saddr;
rt = ip_route_output_key(vxlan->net, &fl4);
- if (!IS_ERR(rt)) {
+ if (likely(!IS_ERR(rt))) {
+ if (rt->dst.dev == dev) {
+ netdev_dbg(dev, "circular route to %pI4\n", &daddr);
+ ip_rt_put(rt);
+ return ERR_PTR(-ELOOP);
+ }
+
*saddr = fl4.saddr;
if (use_cache)
dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
+ } else {
+ netdev_dbg(dev, "no route to %pI4\n", &daddr);
+ return ERR_PTR(-ENETUNREACH);
}
return rt;
}
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
+ struct net_device *dev,
+ struct vxlan_sock *sock6,
struct sk_buff *skb, int oif, u8 tos,
__be32 label,
const struct in6_addr *daddr,
@@ -1833,6 +1859,9 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct flowi6 fl6;
int err;
+ if (!sock6)
+ return ERR_PTR(-EIO);
+
if (tos && !info)
use_cache = false;
if (use_cache) {
@@ -1850,10 +1879,18 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
fl6.flowi6_proto = IPPROTO_UDP;
err = ipv6_stub->ipv6_dst_lookup(vxlan->net,
- vxlan->vn6_sock->sock->sk,
+ sock6->sock->sk,
&ndst, &fl6);
- if (err < 0)
- return ERR_PTR(err);
+ if (unlikely(err < 0)) {
+ netdev_dbg(dev, "no route to %pI6\n", daddr);
+ return ERR_PTR(-ENETUNREACH);
+ }
+
+ if (unlikely(ndst->dev == dev)) {
+ netdev_dbg(dev, "circular route to %pI6\n", daddr);
+ dst_release(ndst);
+ return ERR_PTR(-ELOOP);
+ }
*saddr = fl6.saddr;
if (use_cache)
@@ -1907,23 +1944,55 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
}
}
+static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
+ struct vxlan_dev *vxlan, union vxlan_addr *daddr,
+ __be32 dst_port, __be32 vni, struct dst_entry *dst,
+ u32 rt_flags)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
+ * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple
+ * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry.
+ */
+ BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
+#endif
+ /* Bypass encapsulation if the destination is local */
+ if (rt_flags & RTCF_LOCAL &&
+ !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
+ struct vxlan_dev *dst_vxlan;
+
+ dst_release(dst);
+ dst_vxlan = vxlan_find_vni(vxlan->net, vni,
+ daddr->sa.sa_family, dst_port,
+ vxlan->flags);
+ if (!dst_vxlan) {
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+
+ return -ENOENT;
+ }
+ vxlan_encap_bypass(skb, vxlan, dst_vxlan);
+ return 1;
+ }
+
+ return 0;
+}
+
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_rdst *rdst, bool did_rsc)
{
struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct sock *sk;
- struct rtable *rt = NULL;
- const struct iphdr *old_iph;
+ const struct iphdr *old_iph = ip_hdr(skb);
union vxlan_addr *dst;
union vxlan_addr remote_ip, local_ip;
union vxlan_addr *src;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be16 src_port = 0, dst_port;
+ struct dst_entry *ndst = NULL;
__be32 vni, label;
- __be16 df = 0;
__u8 tos, ttl;
int err;
u32 flags = vxlan->flags;
@@ -1933,19 +2002,40 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
info = skb_tunnel_info(skb);
if (rdst) {
+ dst = &rdst->remote_ip;
+ if (vxlan_addr_any(dst)) {
+ if (did_rsc) {
+ /* short-circuited back to local bridge */
+ vxlan_encap_bypass(skb, vxlan, vxlan);
+ return;
+ }
+ goto drop;
+ }
+
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = rdst->remote_vni;
- dst = &rdst->remote_ip;
src = &vxlan->cfg.saddr;
dst_cache = &rdst->dst_cache;
+ md->gbp = skb->mark;
+ ttl = vxlan->cfg.ttl;
+ if (!ttl && vxlan_addr_multicast(dst))
+ ttl = 1;
+
+ tos = vxlan->cfg.tos;
+ if (tos == 1)
+ tos = ip_tunnel_get_dsfield(old_iph, skb);
+
+ if (dst->sa.sa_family == AF_INET)
+ udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
+ else
+ udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
+ label = vxlan->cfg.label;
} else {
if (!info) {
WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
dev->name);
goto drop;
}
- dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
- vni = tunnel_id_to_key32(info->key.tun_id);
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
if (remote_ip.sa.sa_family == AF_INET) {
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
@@ -1955,179 +2045,111 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
}
dst = &remote_ip;
+ dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
+ vni = tunnel_id_to_key32(info->key.tun_id);
src = &local_ip;
dst_cache = &info->dst_cache;
- }
-
- if (vxlan_addr_any(dst)) {
- if (did_rsc) {
- /* short-circuited back to local bridge */
- vxlan_encap_bypass(skb, vxlan, vxlan);
- return;
- }
- goto drop;
- }
-
- old_iph = ip_hdr(skb);
-
- ttl = vxlan->cfg.ttl;
- if (!ttl && vxlan_addr_multicast(dst))
- ttl = 1;
-
- tos = vxlan->cfg.tos;
- if (tos == 1)
- tos = ip_tunnel_get_dsfield(old_iph, skb);
-
- label = vxlan->cfg.label;
- src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
- vxlan->cfg.port_max, true);
-
- if (info) {
+ if (info->options_len)
+ md = ip_tunnel_info_opts(info);
ttl = info->key.ttl;
tos = info->key.tos;
label = info->key.label;
udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
-
- if (info->options_len)
- md = ip_tunnel_info_opts(info);
- } else {
- md->gbp = skb->mark;
}
+ src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+ vxlan->cfg.port_max, true);
if (dst->sa.sa_family == AF_INET) {
- if (!vxlan->vn4_sock)
- goto drop;
- sk = vxlan->vn4_sock->sock->sk;
+ struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
+ struct rtable *rt;
+ __be16 df = 0;
- rt = vxlan_get_route(vxlan, skb,
+ rt = vxlan_get_route(vxlan, dev, sock4, skb,
rdst ? rdst->remote_ifindex : 0, tos,
dst->sin.sin_addr.s_addr,
&src->sin.sin_addr.s_addr,
dst_cache, info);
if (IS_ERR(rt)) {
- netdev_dbg(dev, "no route to %pI4\n",
- &dst->sin.sin_addr.s_addr);
- dev->stats.tx_carrier_errors++;
+ err = PTR_ERR(rt);
goto tx_error;
}
- if (rt->dst.dev == dev) {
- netdev_dbg(dev, "circular route to %pI4\n",
- &dst->sin.sin_addr.s_addr);
- dev->stats.collisions++;
- goto rt_tx_error;
- }
-
/* Bypass encapsulation if the destination is local */
- if (!info && rt->rt_flags & RTCF_LOCAL &&
- !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
- struct vxlan_dev *dst_vxlan;
-
- ip_rt_put(rt);
- dst_vxlan = vxlan_find_vni(vxlan->net, vni,
- dst->sa.sa_family, dst_port,
- vxlan->flags);
- if (!dst_vxlan)
- goto tx_error;
- vxlan_encap_bypass(skb, vxlan, dst_vxlan);
- return;
- }
-
- if (!info)
- udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
- else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+ if (!info) {
+ err = encap_bypass_if_local(skb, dev, vxlan, dst,
+ dst_port, vni, &rt->dst,
+ rt->rt_flags);
+ if (err)
+ return;
+ } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
df = htons(IP_DF);
+ }
+ ndst = &rt->dst;
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
+ err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
vni, md, flags, udp_sum);
if (err < 0)
- goto xmit_tx_error;
+ goto tx_error;
- udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr,
+ udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, src->sin.sin_addr.s_addr,
dst->sin.sin_addr.s_addr, tos, ttl, df,
src_port, dst_port, xnet, !udp_sum);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- struct dst_entry *ndst;
- u32 rt6i_flags;
+ struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
- if (!vxlan->vn6_sock)
- goto drop;
- sk = vxlan->vn6_sock->sock->sk;
-
- ndst = vxlan6_get_route(vxlan, skb,
+ ndst = vxlan6_get_route(vxlan, dev, sock6, skb,
rdst ? rdst->remote_ifindex : 0, tos,
label, &dst->sin6.sin6_addr,
&src->sin6.sin6_addr,
dst_cache, info);
if (IS_ERR(ndst)) {
- netdev_dbg(dev, "no route to %pI6\n",
- &dst->sin6.sin6_addr);
- dev->stats.tx_carrier_errors++;
+ err = PTR_ERR(ndst);
+ ndst = NULL;
goto tx_error;
}
- if (ndst->dev == dev) {
- netdev_dbg(dev, "circular route to %pI6\n",
- &dst->sin6.sin6_addr);
- dst_release(ndst);
- dev->stats.collisions++;
- goto tx_error;
- }
+ if (!info) {
+ u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
- /* Bypass encapsulation if the destination is local */
- rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
- if (!info && rt6i_flags & RTF_LOCAL &&
- !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
- struct vxlan_dev *dst_vxlan;
-
- dst_release(ndst);
- dst_vxlan = vxlan_find_vni(vxlan->net, vni,
- dst->sa.sa_family, dst_port,
- vxlan->flags);
- if (!dst_vxlan)
- goto tx_error;
- vxlan_encap_bypass(skb, vxlan, dst_vxlan);
- return;
+ err = encap_bypass_if_local(skb, dev, vxlan, dst,
+ dst_port, vni, ndst,
+ rt6i_flags);
+ if (err)
+ return;
}
- if (!info)
- udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
-
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
skb_scrub_packet(skb, xnet);
err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
vni, md, flags, udp_sum);
- if (err < 0) {
- dst_release(ndst);
- dev->stats.tx_errors++;
- return;
- }
- udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
+ if (err < 0)
+ goto tx_error;
+
+ udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
&src->sin6.sin6_addr,
&dst->sin6.sin6_addr, tos, ttl,
label, src_port, dst_port, !udp_sum);
#endif
}
-
return;
drop:
dev->stats.tx_dropped++;
- goto tx_free;
+ dev_kfree_skb(skb);
+ return;
-xmit_tx_error:
- /* skb is already freed. */
- skb = NULL;
-rt_tx_error:
- ip_rt_put(rt);
tx_error:
+ if (err == -ELOOP)
+ dev->stats.collisions++;
+ else if (err == -ENETUNREACH)
+ dev->stats.tx_carrier_errors++;
+ dst_release(ndst);
dev->stats.tx_errors++;
-tx_free:
- dev_kfree_skb(skb);
+ kfree_skb(skb);
}
/* Transmit local packets over Vxlan
@@ -2367,43 +2389,31 @@ static void vxlan_set_multicast_list(struct net_device *dev)
{
}
-static int __vxlan_change_mtu(struct net_device *dev,
- struct net_device *lowerdev,
- struct vxlan_rdst *dst, int new_mtu, bool strict)
+static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
{
- int max_mtu = IP_MAX_MTU;
-
- if (lowerdev)
- max_mtu = lowerdev->mtu;
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
+ dst->remote_ifindex);
+ bool use_ipv6 = false;
if (dst->remote_ip.sa.sa_family == AF_INET6)
- max_mtu -= VXLAN6_HEADROOM;
- else
- max_mtu -= VXLAN_HEADROOM;
-
- if (new_mtu < 68)
- return -EINVAL;
+ use_ipv6 = true;
- if (new_mtu > max_mtu) {
- if (strict)
+ /* This check is different than dev->max_mtu, because it looks at
+ * the lowerdev->mtu, rather than the static dev->max_mtu
+ */
+ if (lowerdev) {
+ int max_mtu = lowerdev->mtu -
+ (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
+ if (new_mtu > max_mtu)
return -EINVAL;
-
- new_mtu = max_mtu;
}
dev->mtu = new_mtu;
return 0;
}
-static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_rdst *dst = &vxlan->default_dst;
- struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
- dst->remote_ifindex);
- return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true);
-}
-
static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -2415,11 +2425,10 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
if (ip_tunnel_info_af(info) == AF_INET) {
+ struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
struct rtable *rt;
- if (!vxlan->vn4_sock)
- return -EINVAL;
- rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
+ rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos,
info->key.u.ipv4.dst,
&info->key.u.ipv4.src, NULL, info);
if (IS_ERR(rt))
@@ -2427,11 +2436,10 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
ip_rt_put(rt);
} else {
#if IS_ENABLED(CONFIG_IPV6)
+ struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
struct dst_entry *ndst;
- if (!vxlan->vn6_sock)
- return -EINVAL;
- ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
+ ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos,
info->key.label, &info->key.u.ipv6.dst,
&info->key.u.ipv6.src, NULL, info);
if (IS_ERR(ndst))
@@ -2519,10 +2527,8 @@ static void vxlan_setup(struct net_device *dev)
dev->features |= NETIF_F_GSO_SOFTWARE;
dev->vlan_features = dev->features;
- dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
netif_keep_dst(dev);
dev->priv_flags |= IFF_NO_QUEUE;
@@ -2740,10 +2746,10 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
return PTR_ERR(vs);
#if IS_ENABLED(CONFIG_IPV6)
if (ipv6)
- vxlan->vn6_sock = vs;
+ rcu_assign_pointer(vxlan->vn6_sock, vs);
else
#endif
- vxlan->vn4_sock = vs;
+ rcu_assign_pointer(vxlan->vn4_sock, vs);
vxlan_vs_add_dev(vs, vxlan);
return 0;
}
@@ -2754,9 +2760,9 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan)
bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
int ret = 0;
- vxlan->vn4_sock = NULL;
+ RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
#if IS_ENABLED(CONFIG_IPV6)
- vxlan->vn6_sock = NULL;
+ RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
if (ipv6 || metadata)
ret = __vxlan_sock_add(vxlan, true);
#endif
@@ -2795,6 +2801,10 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
vxlan_ether_setup(dev);
}
+ /* MTU range: 68 - 65535 */
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = ETH_MAX_MTU;
+
vxlan->net = src_net;
dst->remote_vni = conf->vni;
@@ -2838,7 +2848,8 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
#endif
if (!conf->mtu)
- dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
+ dev->mtu = lowerdev->mtu -
+ (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
needed_headroom = lowerdev->hard_header_len;
} else if (vxlan_addr_multicast(&dst->remote_ip)) {
@@ -2847,9 +2858,20 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
}
if (conf->mtu) {
- err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
- if (err)
- return err;
+ int max_mtu = ETH_MAX_MTU;
+
+ if (lowerdev)
+ max_mtu = lowerdev->mtu;
+
+ max_mtu -= (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
+
+ if (conf->mtu < dev->min_mtu || conf->mtu > dev->max_mtu)
+ return -EINVAL;
+
+ dev->mtu = conf->mtu;
+
+ if (conf->mtu > max_mtu)
+ dev->mtu = max_mtu;
}
if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)