summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/Makefile2
-rw-r--r--drivers/net/vxlan/Makefile7
-rw-r--r--drivers/net/vxlan/vxlan_core.c (renamed from drivers/net/vxlan.c)434
-rw-r--r--drivers/net/vxlan/vxlan_multicast.c272
-rw-r--r--drivers/net/vxlan/vxlan_private.h162
-rw-r--r--drivers/net/vxlan/vxlan_vnifilter.c999
-rw-r--r--include/net/vxlan.h54
-rw-r--r--include/uapi/linux/if_link.h49
-rw-r--r--include/uapi/linux/rtnetlink.h9
-rw-r--r--security/selinux/nlmsgtab.c5
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh579
11 files changed, 2307 insertions, 265 deletions
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 50b23e71065f..3f1192d3c52d 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_TUN) += tun.o
obj-$(CONFIG_TAP) += tap.o
obj-$(CONFIG_VETH) += veth.o
obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
-obj-$(CONFIG_VXLAN) += vxlan.o
+obj-$(CONFIG_VXLAN) += vxlan/
obj-$(CONFIG_GENEVE) += geneve.o
obj-$(CONFIG_BAREUDP) += bareudp.o
obj-$(CONFIG_GTP) += gtp.o
diff --git a/drivers/net/vxlan/Makefile b/drivers/net/vxlan/Makefile
new file mode 100644
index 000000000000..d4c255499b72
--- /dev/null
+++ b/drivers/net/vxlan/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the vxlan driver
+#
+
+obj-$(CONFIG_VXLAN) += vxlan.o
+
+vxlan-objs := vxlan_core.o vxlan_multicast.o vxlan_vnifilter.o
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan/vxlan_core.c
index d0dc90d3dac2..4ab09dd5a32a 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -34,10 +34,10 @@
#include <net/ip6_checksum.h>
#endif
+#include "vxlan_private.h"
+
#define VXLAN_VERSION "0.1"
-#define PORT_HASH_BITS 8
-#define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
#define FDB_AGE_DEFAULT 300 /* 5 min */
#define FDB_AGE_INTERVAL (10 * HZ) /* rescan interval */
@@ -53,41 +53,15 @@ static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
-static unsigned int vxlan_net_id;
-static struct rtnl_link_ops vxlan_link_ops;
+unsigned int vxlan_net_id;
-static const u8 all_zeros_mac[ETH_ALEN + 2];
+const u8 all_zeros_mac[ETH_ALEN + 2];
+static struct rtnl_link_ops vxlan_link_ops;
static int vxlan_sock_add(struct vxlan_dev *vxlan);
static void vxlan_vs_del_dev(struct vxlan_dev *vxlan);
-/* per-network namespace private data for this module */
-struct vxlan_net {
- struct list_head vxlan_list;
- struct hlist_head sock_list[PORT_HASH_SIZE];
- spinlock_t sock_lock;
- struct notifier_block nexthop_notifier_block;
-};
-
-/* Forwarding table entry */
-struct vxlan_fdb {
- struct hlist_node hlist; /* linked list of entries */
- struct rcu_head rcu;
- unsigned long updated; /* jiffies */
- unsigned long used;
- struct list_head remotes;
- u8 eth_addr[ETH_ALEN];
- u16 state; /* see ndm_state */
- __be32 vni;
- u16 flags; /* see ndm_flags and below */
- struct list_head nh_list;
- struct nexthop __rcu *nh;
- struct vxlan_dev __rcu *vdev;
-};
-
-#define NTF_VXLAN_ADDED_BY_USER 0x100
-
/* salt for hash table */
static u32 vxlan_salt __read_mostly;
@@ -98,17 +72,6 @@ static inline bool vxlan_collect_metadata(struct vxlan_sock *vs)
}
#if IS_ENABLED(CONFIG_IPV6)
-static inline
-bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
-{
- if (a->sa.sa_family != b->sa.sa_family)
- return false;
- if (a->sa.sa_family == AF_INET6)
- return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
- else
- return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
-}
-
static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
{
if (nla_len(nla) >= sizeof(struct in6_addr)) {
@@ -135,12 +98,6 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
#else /* !CONFIG_IPV6 */
-static inline
-bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
-{
- return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
-}
-
static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
{
if (nla_len(nla) >= sizeof(struct in6_addr)) {
@@ -161,37 +118,6 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
}
#endif
-/* Virtual Network hash table head */
-static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
-{
- return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
-}
-
-/* Socket hash table head */
-static inline struct hlist_head *vs_head(struct net *net, __be16 port)
-{
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-
- return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
-}
-
-/* First remote destination for a forwarding entry.
- * Guaranteed to be non-NULL because remotes are never deleted.
- */
-static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
-{
- if (rcu_access_pointer(fdb->nh))
- return NULL;
- return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
-}
-
-static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
-{
- if (rcu_access_pointer(fdb->nh))
- return NULL;
- return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
-}
-
/* Find VXLAN socket based on network namespace, address family, UDP port,
* enabled unshareable flags and socket device binding (see l3mdev with
* non-default VRF).
@@ -213,18 +139,29 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
return NULL;
}
-static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
- __be32 vni)
+static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs,
+ int ifindex, __be32 vni,
+ struct vxlan_vni_node **vninode)
{
+ struct vxlan_vni_node *vnode;
struct vxlan_dev_node *node;
/* For flow based devices, map all packets to VNI 0 */
- if (vs->flags & VXLAN_F_COLLECT_METADATA)
+ if (vs->flags & VXLAN_F_COLLECT_METADATA &&
+ !(vs->flags & VXLAN_F_VNIFILTER))
vni = 0;
hlist_for_each_entry_rcu(node, vni_head(vs, vni), hlist) {
- if (node->vxlan->default_dst.remote_vni != vni)
+ if (!node->vxlan)
continue;
+ vnode = NULL;
+ if (node->vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
+ vnode = vxlan_vnifilter_lookup(node->vxlan, vni);
+ if (!vnode)
+ continue;
+ } else if (node->vxlan->default_dst.remote_vni != vni) {
+ continue;
+ }
if (IS_ENABLED(CONFIG_IPV6)) {
const struct vxlan_config *cfg = &node->vxlan->cfg;
@@ -234,6 +171,8 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, int ifindex,
continue;
}
+ if (vninode)
+ *vninode = vnode;
return node->vxlan;
}
@@ -251,7 +190,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, int ifindex,
if (!vs)
return NULL;
- return vxlan_vs_find_vni(vs, ifindex, vni);
+ return vxlan_vs_find_vni(vs, ifindex, vni, NULL);
}
/* Fill in neighbour message in skbuff. */
@@ -493,7 +432,7 @@ static u32 eth_hash(const unsigned char *addr)
return hash_64(value, FDB_HASH_BITS);
}
-static u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
+u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
{
/* use 1 byte of OUI and 3 bytes of NIC */
u32 key = get_unaligned((u32 *)(addr + 2));
@@ -501,7 +440,7 @@ static u32 eth_vni_hash(const unsigned char *addr, __be32 vni)
return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1);
}
-static u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
+u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni)
{
if (vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)
return eth_vni_hash(mac, vni);
@@ -920,12 +859,12 @@ err_inval:
return err;
}
-static int vxlan_fdb_create(struct vxlan_dev *vxlan,
- const u8 *mac, union vxlan_addr *ip,
- __u16 state, __be16 port, __be32 src_vni,
- __be32 vni, __u32 ifindex, __u16 ndm_flags,
- u32 nhid, struct vxlan_fdb **fdb,
- struct netlink_ext_ack *extack)
+int vxlan_fdb_create(struct vxlan_dev *vxlan,
+ const u8 *mac, union vxlan_addr *ip,
+ __u16 state, __be16 port, __be32 src_vni,
+ __be32 vni, __u32 ifindex, __u16 ndm_flags,
+ u32 nhid, struct vxlan_fdb **fdb,
+ struct netlink_ext_ack *extack)
{
struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f;
@@ -1150,13 +1089,13 @@ err_notify:
}
/* Add new entry to forwarding table -- assumes lock held */
-static int vxlan_fdb_update(struct vxlan_dev *vxlan,
- const u8 *mac, union vxlan_addr *ip,
- __u16 state, __u16 flags,
- __be16 port, __be32 src_vni, __be32 vni,
- __u32 ifindex, __u16 ndm_flags, u32 nhid,
- bool swdev_notify,
- struct netlink_ext_ack *extack)
+int vxlan_fdb_update(struct vxlan_dev *vxlan,
+ const u8 *mac, union vxlan_addr *ip,
+ __u16 state, __u16 flags,
+ __be16 port, __be32 src_vni, __be32 vni,
+ __u32 ifindex, __u16 ndm_flags, u32 nhid,
+ bool swdev_notify,
+ struct netlink_ext_ack *extack)
{
struct vxlan_fdb *f;
@@ -1307,10 +1246,10 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
return err;
}
-static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
- const unsigned char *addr, union vxlan_addr ip,
- __be16 port, __be32 src_vni, __be32 vni,
- u32 ifindex, bool swdev_notify)
+int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
+ const unsigned char *addr, union vxlan_addr ip,
+ __be16 port, __be32 src_vni, __be32 vni,
+ u32 ifindex, bool swdev_notify)
{
struct vxlan_rdst *rd = NULL;
struct vxlan_fdb *f;
@@ -1519,56 +1458,6 @@ static bool vxlan_snoop(struct net_device *dev,
return false;
}
-/* See if multicast group is already in use by other ID */
-static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
-{
- struct vxlan_dev *vxlan;
- struct vxlan_sock *sock4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct vxlan_sock *sock6;
-#endif
- unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
-
- sock4 = rtnl_dereference(dev->vn4_sock);
-
- /* The vxlan_sock is only used by dev, leaving group has
- * no effect on other vxlan devices.
- */
- if (family == AF_INET && sock4 && refcount_read(&sock4->refcnt) == 1)
- return false;
-#if IS_ENABLED(CONFIG_IPV6)
- sock6 = rtnl_dereference(dev->vn6_sock);
- if (family == AF_INET6 && sock6 && refcount_read(&sock6->refcnt) == 1)
- return false;
-#endif
-
- list_for_each_entry(vxlan, &vn->vxlan_list, next) {
- if (!netif_running(vxlan->dev) || vxlan == dev)
- continue;
-
- if (family == AF_INET &&
- rtnl_dereference(vxlan->vn4_sock) != sock4)
- continue;
-#if IS_ENABLED(CONFIG_IPV6)
- if (family == AF_INET6 &&
- rtnl_dereference(vxlan->vn6_sock) != sock6)
- continue;
-#endif
-
- if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
- &dev->default_dst.remote_ip))
- continue;
-
- if (vxlan->default_dst.remote_ifindex !=
- dev->default_dst.remote_ifindex)
- continue;
-
- return true;
- }
-
- return false;
-}
-
static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
{
struct vxlan_net *vn;
@@ -1602,7 +1491,10 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan)
RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
synchronize_net();
- vxlan_vs_del_dev(vxlan);
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
+ vxlan_vs_del_vnigrp(vxlan);
+ else
+ vxlan_vs_del_dev(vxlan);
if (__vxlan_sock_release_prep(sock4)) {
udp_tunnel_sock_release(sock4->sock);
@@ -1617,76 +1509,6 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan)
#endif
}
-/* Update multicast group membership when first VNI on
- * multicast address is brought up
- */
-static int vxlan_igmp_join(struct vxlan_dev *vxlan)
-{
- struct sock *sk;
- union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
- int ifindex = vxlan->default_dst.remote_ifindex;
- int ret = -EINVAL;
-
- if (ip->sa.sa_family == AF_INET) {
- struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
- struct ip_mreqn mreq = {
- .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
- .imr_ifindex = ifindex,
- };
-
- sk = sock4->sock->sk;
- lock_sock(sk);
- ret = ip_mc_join_group(sk, &mreq);
- release_sock(sk);
-#if IS_ENABLED(CONFIG_IPV6)
- } else {
- struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
-
- sk = sock6->sock->sk;
- lock_sock(sk);
- ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
- &ip->sin6.sin6_addr);
- release_sock(sk);
-#endif
- }
-
- return ret;
-}
-
-/* Inverse of vxlan_igmp_join when last VNI is brought down */
-static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
-{
- struct sock *sk;
- union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
- int ifindex = vxlan->default_dst.remote_ifindex;
- int ret = -EINVAL;
-
- if (ip->sa.sa_family == AF_INET) {
- struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
- struct ip_mreqn mreq = {
- .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
- .imr_ifindex = ifindex,
- };
-
- sk = sock4->sock->sk;
- lock_sock(sk);
- ret = ip_mc_leave_group(sk, &mreq);
- release_sock(sk);
-#if IS_ENABLED(CONFIG_IPV6)
- } else {
- struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
-
- sk = sock6->sock->sk;
- lock_sock(sk);
- ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
- &ip->sin6.sin6_addr);
- release_sock(sk);
-#endif
- }
-
- return ret;
-}
-
static bool vxlan_remcsum(struct vxlanhdr *unparsed,
struct sk_buff *skb, u32 vxflags)
{
@@ -1828,6 +1650,7 @@ static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph,
/* Callback from net/ipv4/udp.c to receive packets */
static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
{
+ struct vxlan_vni_node *vninode = NULL;
struct vxlan_dev *vxlan;
struct vxlan_sock *vs;
struct vxlanhdr unparsed;
@@ -1860,7 +1683,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
- vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
+ vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni, &vninode);
if (!vxlan)
goto drop;
@@ -1930,6 +1753,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
if (!vxlan_ecn_decapsulate(vs, oiph, skb)) {
++vxlan->dev->stats.rx_frame_errors;
++vxlan->dev->stats.rx_errors;
+ vxlan_vnifilter_count(vxlan, vni, vninode,
+ VXLAN_VNI_STATS_RX_ERRORS, 0);
goto drop;
}
@@ -1938,10 +1763,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
if (unlikely(!(vxlan->dev->flags & IFF_UP))) {
rcu_read_unlock();
atomic_long_inc(&vxlan->dev->rx_dropped);
+ vxlan_vnifilter_count(vxlan, vni, vninode,
+ VXLAN_VNI_STATS_RX_DROPS, 0);
goto drop;
}
dev_sw_netstats_rx_add(vxlan->dev, skb->len);
+ vxlan_vnifilter_count(vxlan, vni, vninode, VXLAN_VNI_STATS_RX, skb->len);
gro_cells_receive(&vxlan->gro_cells, skb);
rcu_read_unlock();
@@ -1975,7 +1803,7 @@ static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
return -ENOENT;
vni = vxlan_vni(hdr->vx_vni);
- vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
+ vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni, NULL);
if (!vxlan)
return -ENOENT;
@@ -2049,8 +1877,12 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
reply->ip_summed = CHECKSUM_UNNECESSARY;
reply->pkt_type = PACKET_HOST;
- if (netif_rx_ni(reply) == NET_RX_DROP)
+ if (netif_rx_ni(reply) == NET_RX_DROP) {
dev->stats.rx_dropped++;
+ vxlan_vnifilter_count(vxlan, vni, NULL,
+ VXLAN_VNI_STATS_RX_DROPS, 0);
+ }
+
} else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
union vxlan_addr ipa = {
.sin.sin_addr.s_addr = tip,
@@ -2204,9 +2036,11 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
if (reply == NULL)
goto out;
- if (netif_rx_ni(reply) == NET_RX_DROP)
+ if (netif_rx_ni(reply) == NET_RX_DROP) {
dev->stats.rx_dropped++;
-
+ vxlan_vnifilter_count(vxlan, vni, NULL,
+ VXLAN_VNI_STATS_RX_DROPS, 0);
+ }
} else if (vxlan->cfg.flags & VXLAN_F_L3MISS) {
union vxlan_addr ipa = {
.sin6.sin6_addr = msg->target,
@@ -2540,15 +2374,20 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
tx_stats->tx_packets++;
tx_stats->tx_bytes += len;
u64_stats_update_end(&tx_stats->syncp);
+ vxlan_vnifilter_count(src_vxlan, vni, NULL, VXLAN_VNI_STATS_TX, len);
if (__netif_rx(skb) == NET_RX_SUCCESS) {
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->rx_packets++;
rx_stats->rx_bytes += len;
u64_stats_update_end(&rx_stats->syncp);
+ vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX,
+ len);
} else {
drop:
dev->stats.rx_dropped++;
+ vxlan_vnifilter_count(dst_vxlan, vni, NULL,
+ VXLAN_VNI_STATS_RX_DROPS, 0);
}
rcu_read_unlock();
}
@@ -2578,6 +2417,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
vxlan->cfg.flags);
if (!dst_vxlan) {
dev->stats.tx_errors++;
+ vxlan_vnifilter_count(vxlan, vni, NULL,
+ VXLAN_VNI_STATS_TX_ERRORS, 0);
kfree_skb(skb);
return -ENOENT;
@@ -2601,15 +2442,19 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
union vxlan_addr remote_ip, local_ip;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
+ unsigned int pkt_len = skb->len;
__be16 src_port = 0, dst_port;
struct dst_entry *ndst = NULL;
- __be32 vni, label;
__u8 tos, ttl;
int ifindex;
int err;
u32 flags = vxlan->cfg.flags;
bool udp_sum = false;
bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
+ __be32 vni = 0;
+#if IS_ENABLED(CONFIG_IPV6)
+ __be32 label;
+#endif
info = skb_tunnel_info(skb);
@@ -2647,7 +2492,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
else
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
+#if IS_ENABLED(CONFIG_IPV6)
label = vxlan->cfg.label;
+#endif
} else {
if (!info) {
WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
@@ -2674,7 +2521,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
ttl = info->key.ttl;
tos = info->key.tos;
+#if IS_ENABLED(CONFIG_IPV6)
label = info->key.label;
+#endif
udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
}
src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
@@ -2821,12 +2670,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
label, src_port, dst_port, !udp_sum);
#endif
}
+ vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len);
out_unlock:
rcu_read_unlock();
return;
drop:
dev->stats.tx_dropped++;
+ vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0);
dev_kfree_skb(skb);
return;
@@ -2838,6 +2689,7 @@ tx_error:
dev->stats.tx_carrier_errors++;
dst_release(ndst);
dev->stats.tx_errors++;
+ vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_ERRORS, 0);
kfree_skb(skb);
}
@@ -2870,6 +2722,8 @@ static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
drop:
dev->stats.tx_dropped++;
+ vxlan_vnifilter_count(netdev_priv(dev), vni, NULL,
+ VXLAN_VNI_STATS_TX_DROPS, 0);
dev_kfree_skb(skb);
}
@@ -2944,6 +2798,8 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
vxlan_fdb_miss(vxlan, eth->h_dest);
dev->stats.tx_dropped++;
+ vxlan_vnifilter_count(vxlan, vni, NULL,
+ VXLAN_VNI_STATS_TX_DROPS, 0);
kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -3044,6 +2900,9 @@ static int vxlan_init(struct net_device *dev)
struct vxlan_dev *vxlan = netdev_priv(dev);
int err;
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
+ vxlan_vnigroup_init(vxlan);
+
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
@@ -3073,6 +2932,9 @@ static void vxlan_uninit(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
+ vxlan_vnigroup_uninit(vxlan);
+
gro_cells_destroy(&vxlan->gro_cells);
vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni);
@@ -3090,14 +2952,10 @@ static int vxlan_open(struct net_device *dev)
if (ret < 0)
return ret;
- if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
- ret = vxlan_igmp_join(vxlan);
- if (ret == -EADDRINUSE)
- ret = 0;
- if (ret) {
- vxlan_sock_release(vxlan);
- return ret;
- }
+ ret = vxlan_multicast_join(vxlan);
+ if (ret) {
+ vxlan_sock_release(vxlan);
+ return ret;
}
if (vxlan->cfg.age_interval)
@@ -3134,12 +2992,9 @@ static void vxlan_flush(struct vxlan_dev *vxlan, bool do_all)
static int vxlan_stop(struct net_device *dev)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
int ret = 0;
- if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
- !vxlan_group_used(vn, vxlan))
- ret = vxlan_igmp_leave(vxlan);
+ vxlan_multicast_leave(vxlan);
del_timer_sync(&vxlan->age_timer);
@@ -3369,6 +3224,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
[IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
[IFLA_VXLAN_DF] = { .type = NLA_U8 },
+ [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 },
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -3554,6 +3410,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ bool metadata = vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA;
struct vxlan_sock *vs = NULL;
struct vxlan_dev_node *node;
int l3mdev_index = 0;
@@ -3589,7 +3446,12 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
rcu_assign_pointer(vxlan->vn4_sock, vs);
node = &vxlan->hlist4;
}
- vxlan_vs_add_dev(vs, vxlan, node);
+
+ if (metadata && (vxlan->cfg.flags & VXLAN_F_VNIFILTER))
+ vxlan_vs_add_vnigrp(vxlan, vs, ipv6);
+ else
+ vxlan_vs_add_dev(vs, vxlan, node);
+
return 0;
}
@@ -3616,13 +3478,42 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan)
return ret;
}
+int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
+ struct vxlan_config *conf, __be32 vni)
+{
+ struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
+ struct vxlan_dev *tmp;
+
+ list_for_each_entry(tmp, &vn->vxlan_list, next) {
+ if (tmp == vxlan)
+ continue;
+ if (tmp->cfg.flags & VXLAN_F_VNIFILTER) {
+ if (!vxlan_vnifilter_lookup(tmp, vni))
+ continue;
+ } else if (tmp->cfg.vni != vni) {
+ continue;
+ }
+ if (tmp->cfg.dst_port != conf->dst_port)
+ continue;
+ if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) !=
+ (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)))
+ continue;
+
+ if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) &&
+ tmp->cfg.remote_ifindex != conf->remote_ifindex)
+ continue;
+
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
struct net_device **lower,
struct vxlan_dev *old,
struct netlink_ext_ack *extack)
{
- struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
- struct vxlan_dev *tmp;
bool use_ipv6 = false;
if (conf->flags & VXLAN_F_GPE) {
@@ -3755,22 +3646,7 @@ static int vxlan_config_validate(struct net *src_net, struct vxlan_config *conf,
if (!conf->age_interval)
conf->age_interval = FDB_AGE_DEFAULT;
- list_for_each_entry(tmp, &vn->vxlan_list, next) {
- if (tmp == old)
- continue;
-
- if (tmp->cfg.vni != conf->vni)
- continue;
- if (tmp->cfg.dst_port != conf->dst_port)
- continue;
- if ((tmp->cfg.flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)) !=
- (conf->flags & (VXLAN_F_RCV_FLAGS | VXLAN_F_IPV6)))
- continue;
-
- if ((conf->flags & VXLAN_F_IPV6_LINKLOCAL) &&
- tmp->cfg.remote_ifindex != conf->remote_ifindex)
- continue;
-
+ if (vxlan_vni_in_use(src_net, old, conf, conf->vni)) {
NL_SET_ERR_MSG(extack,
"A VXLAN device with the specified VNI already exists");
return -EEXIST;
@@ -4226,6 +4102,21 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
if (data[IFLA_VXLAN_DF])
conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);
+ if (data[IFLA_VXLAN_VNIFILTER]) {
+ err = vxlan_nl2flag(conf, data, IFLA_VXLAN_VNIFILTER,
+ VXLAN_F_VNIFILTER, changelink, false,
+ extack);
+ if (err)
+ return err;
+
+ if ((conf->flags & VXLAN_F_VNIFILTER) &&
+ !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VXLAN_VNIFILTER],
+ "vxlan vnifilter only valid in collect metadata mode");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -4301,6 +4192,19 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
dst->remote_ifindex,
true);
spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+
+ /* If vni filtering device, also update fdb entries of
+ * all vnis that were using default remote ip
+ */
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
+ err = vxlan_vnilist_update_group(vxlan, &dst->remote_ip,
+ &conf.remote_ip, extack);
+ if (err) {
+ netdev_adjacent_change_abort(dst->remote_dev,
+ lowerdev, dev);
+ return err;
+ }
+ }
}
if (conf.age_interval != vxlan->cfg.age_interval)
@@ -4446,6 +4350,11 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
goto nla_put_failure;
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER &&
+ nla_put_u8(skb, IFLA_VXLAN_VNIFILTER,
+ !!(vxlan->cfg.flags & VXLAN_F_VNIFILTER)))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -4805,6 +4714,8 @@ static int __init vxlan_init_module(void)
if (rc)
goto out4;
+ vxlan_vnifilter_init();
+
return 0;
out4:
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
@@ -4819,6 +4730,7 @@ late_initcall(vxlan_init_module);
static void __exit vxlan_cleanup_module(void)
{
+ vxlan_vnifilter_uninit();
rtnl_link_unregister(&vxlan_link_ops);
unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
unregister_netdevice_notifier(&vxlan_notifier_block);
diff --git a/drivers/net/vxlan/vxlan_multicast.c b/drivers/net/vxlan/vxlan_multicast.c
new file mode 100644
index 000000000000..a7f2d67dc61b
--- /dev/null
+++ b/drivers/net/vxlan/vxlan_multicast.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Vxlan multicast group handling
+ *
+ */
+#include <linux/kernel.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <linux/igmp.h>
+#include <net/vxlan.h>
+
+#include "vxlan_private.h"
+
+/* Update multicast group membership when first VNI on
+ * multicast address is brought up
+ */
+int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip,
+ int rifindex)
+{
+ union vxlan_addr *ip = (rip ? : &vxlan->default_dst.remote_ip);
+ int ifindex = (rifindex ? : vxlan->default_dst.remote_ifindex);
+ int ret = -EINVAL;
+ struct sock *sk;
+
+ if (ip->sa.sa_family == AF_INET) {
+ struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
+ struct ip_mreqn mreq = {
+ .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
+ .imr_ifindex = ifindex,
+ };
+
+ sk = sock4->sock->sk;
+ lock_sock(sk);
+ ret = ip_mc_join_group(sk, &mreq);
+ release_sock(sk);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+
+ sk = sock6->sock->sk;
+ lock_sock(sk);
+ ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
+ &ip->sin6.sin6_addr);
+ release_sock(sk);
+#endif
+ }
+
+ return ret;
+}
+
+int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip,
+ int rifindex)
+{
+ union vxlan_addr *ip = (rip ? : &vxlan->default_dst.remote_ip);
+ int ifindex = (rifindex ? : vxlan->default_dst.remote_ifindex);
+ int ret = -EINVAL;
+ struct sock *sk;
+
+ if (ip->sa.sa_family == AF_INET) {
+ struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
+ struct ip_mreqn mreq = {
+ .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
+ .imr_ifindex = ifindex,
+ };
+
+ sk = sock4->sock->sk;
+ lock_sock(sk);
+ ret = ip_mc_leave_group(sk, &mreq);
+ release_sock(sk);
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+
+ sk = sock6->sock->sk;
+ lock_sock(sk);
+ ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
+ &ip->sin6.sin6_addr);
+ release_sock(sk);
+#endif
+ }
+
+ return ret;
+}
+
+static bool vxlan_group_used_match(union vxlan_addr *ip, int ifindex,
+ union vxlan_addr *rip, int rifindex)
+{
+ if (!vxlan_addr_multicast(rip))
+ return false;
+
+ if (!vxlan_addr_equal(rip, ip))
+ return false;
+
+ if (rifindex != ifindex)
+ return false;
+
+ return true;
+}
+
+static bool vxlan_group_used_by_vnifilter(struct vxlan_dev *vxlan,
+ union vxlan_addr *ip, int ifindex)
+{
+ struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
+ struct vxlan_vni_node *v, *tmp;
+
+ if (vxlan_group_used_match(ip, ifindex,
+ &vxlan->default_dst.remote_ip,
+ vxlan->default_dst.remote_ifindex))
+ return true;
+
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+ if (!vxlan_addr_multicast(&v->remote_ip))
+ continue;
+
+ if (vxlan_group_used_match(ip, ifindex,
+ &v->remote_ip,
+ vxlan->default_dst.remote_ifindex))
+ return true;
+ }
+
+ return false;
+}
+
+/* See if multicast group is already in use by other ID */
+bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev,
+ __be32 vni, union vxlan_addr *rip, int rifindex)
+{
+ union vxlan_addr *ip = (rip ? : &dev->default_dst.remote_ip);
+ int ifindex = (rifindex ? : dev->default_dst.remote_ifindex);
+ struct vxlan_dev *vxlan;
+ struct vxlan_sock *sock4;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct vxlan_sock *sock6;
+#endif
+ unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
+
+ sock4 = rtnl_dereference(dev->vn4_sock);
+
+ /* The vxlan_sock is only used by dev, leaving group has
+ * no effect on other vxlan devices.
+ */
+ if (family == AF_INET && sock4 && refcount_read(&sock4->refcnt) == 1)
+ return false;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ sock6 = rtnl_dereference(dev->vn6_sock);
+ if (family == AF_INET6 && sock6 && refcount_read(&sock6->refcnt) == 1)
+ return false;
+#endif
+
+ list_for_each_entry(vxlan, &vn->vxlan_list, next) {
+ if (!netif_running(vxlan->dev) || vxlan == dev)
+ continue;
+
+ if (family == AF_INET &&
+ rtnl_dereference(vxlan->vn4_sock) != sock4)
+ continue;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6 &&
+ rtnl_dereference(vxlan->vn6_sock) != sock6)
+ continue;
+#endif
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) {
+ if (!vxlan_group_used_by_vnifilter(vxlan, ip, ifindex))
+ continue;
+ } else {
+ if (!vxlan_group_used_match(ip, ifindex,
+ &vxlan->default_dst.remote_ip,
+ vxlan->default_dst.remote_ifindex))
+ continue;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+static int vxlan_multicast_join_vnigrp(struct vxlan_dev *vxlan)
+{
+ struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
+ struct vxlan_vni_node *v, *tmp, *vgood = NULL;
+ int ret = 0;
+
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+ if (!vxlan_addr_multicast(&v->remote_ip))
+ continue;
+ /* skip if address is same as default address */
+ if (vxlan_addr_equal(&v->remote_ip,
+ &vxlan->default_dst.remote_ip))
+ continue;
+ ret = vxlan_igmp_join(vxlan, &v->remote_ip, 0);
+ if (ret == -EADDRINUSE)
+ ret = 0;
+ if (ret)
+ goto out;
+ vgood = v;
+ }
+out:
+ if (ret) {
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+ if (!vxlan_addr_multicast(&v->remote_ip))
+ continue;
+ if (vxlan_addr_equal(&v->remote_ip,
+ &vxlan->default_dst.remote_ip))
+ continue;
+ vxlan_igmp_leave(vxlan, &v->remote_ip, 0);
+ if (v == vgood)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int vxlan_multicast_leave_vnigrp(struct vxlan_dev *vxlan)
+{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
+ struct vxlan_vni_node *v, *tmp;
+ int last_err = 0, ret;
+
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+ if (vxlan_addr_multicast(&v->remote_ip) &&
+ !vxlan_group_used(vn, vxlan, v->vni, &v->remote_ip,
+ 0)) {
+ ret = vxlan_igmp_leave(vxlan, &v->remote_ip, 0);
+ if (ret)
+ last_err = ret;
+ }
+ }
+
+ return last_err;
+}
+
+int vxlan_multicast_join(struct vxlan_dev *vxlan)
+{
+ int ret = 0;
+
+ if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip)) {
+ ret = vxlan_igmp_join(vxlan, &vxlan->default_dst.remote_ip,
+ vxlan->default_dst.remote_ifindex);
+ if (ret == -EADDRINUSE)
+ ret = 0;
+ if (ret)
+ return ret;
+ }
+
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
+ return vxlan_multicast_join_vnigrp(vxlan);
+
+ return 0;
+}
+
+int vxlan_multicast_leave(struct vxlan_dev *vxlan)
+{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ int ret = 0;
+
+ if (vxlan_addr_multicast(&vxlan->default_dst.remote_ip) &&
+ !vxlan_group_used(vn, vxlan, 0, NULL, 0)) {
+ ret = vxlan_igmp_leave(vxlan, &vxlan->default_dst.remote_ip,
+ vxlan->default_dst.remote_ifindex);
+ if (ret)
+ return ret;
+ }
+
+ if (vxlan->cfg.flags & VXLAN_F_VNIFILTER)
+ return vxlan_multicast_leave_vnigrp(vxlan);
+
+ return 0;
+}
diff --git a/drivers/net/vxlan/vxlan_private.h b/drivers/net/vxlan/vxlan_private.h
new file mode 100644
index 000000000000..599c3b4fdd5e
--- /dev/null
+++ b/drivers/net/vxlan/vxlan_private.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Vxlan private header file
+ *
+ */
+
+#ifndef _VXLAN_PRIVATE_H
+#define _VXLAN_PRIVATE_H
+
+#include <linux/rhashtable.h>
+
+extern unsigned int vxlan_net_id;
+extern const u8 all_zeros_mac[ETH_ALEN + 2];
+extern const struct rhashtable_params vxlan_vni_rht_params;
+
+#define PORT_HASH_BITS 8
+#define PORT_HASH_SIZE (1 << PORT_HASH_BITS)
+
+/* per-network namespace private data for this module */
+struct vxlan_net {
+ struct list_head vxlan_list;
+ struct hlist_head sock_list[PORT_HASH_SIZE];
+ spinlock_t sock_lock;
+ struct notifier_block nexthop_notifier_block;
+};
+
+/* Forwarding table entry */
+struct vxlan_fdb {
+ struct hlist_node hlist; /* linked list of entries */
+ struct rcu_head rcu;
+ unsigned long updated; /* jiffies */
+ unsigned long used;
+ struct list_head remotes;
+ u8 eth_addr[ETH_ALEN];
+ u16 state; /* see ndm_state */
+ __be32 vni;
+ u16 flags; /* see ndm_flags and below */
+ struct list_head nh_list;
+ struct nexthop __rcu *nh;
+ struct vxlan_dev __rcu *vdev;
+};
+
+#define NTF_VXLAN_ADDED_BY_USER 0x100
+
+/* Virtual Network hash table head */
+static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
+{
+ return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
+}
+
+/* Socket hash table head */
+static inline struct hlist_head *vs_head(struct net *net, __be16 port)
+{
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+
+ return &vn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
+}
+
+/* First remote destination for a forwarding entry.
+ * Guaranteed to be non-NULL because remotes are never deleted.
+ */
+static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
+{
+ if (rcu_access_pointer(fdb->nh))
+ return NULL;
+ return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
+}
+
+static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
+{
+ if (rcu_access_pointer(fdb->nh))
+ return NULL;
+ return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline
+bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
+{
+ if (a->sa.sa_family != b->sa.sa_family)
+ return false;
+ if (a->sa.sa_family == AF_INET6)
+ return ipv6_addr_equal(&a->sin6.sin6_addr, &b->sin6.sin6_addr);
+ else
+ return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
+}
+
+#else /* !CONFIG_IPV6 */
+
+static inline
+bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
+{
+ return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
+}
+
+#endif
+
+static inline struct vxlan_vni_node *
+vxlan_vnifilter_lookup(struct vxlan_dev *vxlan, __be32 vni)
+{
+ struct vxlan_vni_group *vg;
+
+ vg = rcu_dereference_rtnl(vxlan->vnigrp);
+ if (!vg)
+ return NULL;
+
+ return rhashtable_lookup_fast(&vg->vni_hash, &vni,
+ vxlan_vni_rht_params);
+}
+
+/* vxlan_core.c */
+int vxlan_fdb_create(struct vxlan_dev *vxlan,
+ const u8 *mac, union vxlan_addr *ip,
+ __u16 state, __be16 port, __be32 src_vni,
+ __be32 vni, __u32 ifindex, __u16 ndm_flags,
+ u32 nhid, struct vxlan_fdb **fdb,
+ struct netlink_ext_ack *extack);
+int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
+ const unsigned char *addr, union vxlan_addr ip,
+ __be16 port, __be32 src_vni, __be32 vni,
+ u32 ifindex, bool swdev_notify);
+u32 eth_vni_hash(const unsigned char *addr, __be32 vni);
+u32 fdb_head_index(struct vxlan_dev *vxlan, const u8 *mac, __be32 vni);
+int vxlan_fdb_update(struct vxlan_dev *vxlan,
+ const u8 *mac, union vxlan_addr *ip,
+ __u16 state, __u16 flags,
+ __be16 port, __be32 src_vni, __be32 vni,
+ __u32 ifindex, __u16 ndm_flags, u32 nhid,
+ bool swdev_notify, struct netlink_ext_ack *extack);
+int vxlan_vni_in_use(struct net *src_net, struct vxlan_dev *vxlan,
+ struct vxlan_config *conf, __be32 vni);
+
+/* vxlan_vnifilter.c */
+int vxlan_vnigroup_init(struct vxlan_dev *vxlan);
+void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan);
+
+void vxlan_vnifilter_init(void);
+void vxlan_vnifilter_uninit(void);
+void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni,
+ struct vxlan_vni_node *vninode,
+ int type, unsigned int len);
+
+void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
+ struct vxlan_sock *vs,
+ bool ipv6);
+void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan);
+int vxlan_vnilist_update_group(struct vxlan_dev *vxlan,
+ union vxlan_addr *old_remote_ip,
+ union vxlan_addr *new_remote_ip,
+ struct netlink_ext_ack *extack);
+
+
+/* vxlan_multicast.c */
+int vxlan_multicast_join(struct vxlan_dev *vxlan);
+int vxlan_multicast_leave(struct vxlan_dev *vxlan);
+bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev,
+ __be32 vni, union vxlan_addr *rip, int rifindex);
+int vxlan_igmp_join(struct vxlan_dev *vxlan, union vxlan_addr *rip,
+ int rifindex);
+int vxlan_igmp_leave(struct vxlan_dev *vxlan, union vxlan_addr *rip,
+ int rifindex);
+#endif
diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c
new file mode 100644
index 000000000000..9f28d0b6a6b2
--- /dev/null
+++ b/drivers/net/vxlan/vxlan_vnifilter.c
@@ -0,0 +1,999 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Vxlan vni filter for collect metadata mode
+ *
+ * Authors: Roopa Prabhu <roopa@nvidia.com>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/etherdevice.h>
+#include <linux/rhashtable.h>
+#include <net/rtnetlink.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/vxlan.h>
+
+#include "vxlan_private.h"
+
+static inline int vxlan_vni_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct vxlan_vni_node *vnode = ptr;
+ __be32 vni = *(__be32 *)arg->key;
+
+ return vnode->vni != vni;
+}
+
+const struct rhashtable_params vxlan_vni_rht_params = {
+ .head_offset = offsetof(struct vxlan_vni_node, vnode),
+ .key_offset = offsetof(struct vxlan_vni_node, vni),
+ .key_len = sizeof(__be32),
+ .nelem_hint = 3,
+ .max_size = VXLAN_N_VID,
+ .obj_cmpfn = vxlan_vni_cmp,
+ .automatic_shrinking = true,
+};
+
+static void vxlan_vs_add_del_vninode(struct vxlan_dev *vxlan,
+ struct vxlan_vni_node *v,
+ bool del)
+{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_dev_node *node;
+ struct vxlan_sock *vs;
+
+ spin_lock(&vn->sock_lock);
+ if (del) {
+ if (!hlist_unhashed(&v->hlist4.hlist))
+ hlist_del_init_rcu(&v->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!hlist_unhashed(&v->hlist6.hlist))
+ hlist_del_init_rcu(&v->hlist6.hlist);
+#endif
+ goto out;
+ }
+
+#if IS_ENABLED(CONFIG_IPV6)
+ vs = rtnl_dereference(vxlan->vn6_sock);
+ if (vs && v) {
+ node = &v->hlist6;
+ hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
+ }
+#endif
+ vs = rtnl_dereference(vxlan->vn4_sock);
+ if (vs && v) {
+ node = &v->hlist4;
+ hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
+ }
+out:
+ spin_unlock(&vn->sock_lock);
+}
+
+void vxlan_vs_add_vnigrp(struct vxlan_dev *vxlan,
+ struct vxlan_sock *vs,
+ bool ipv6)
+{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
+ struct vxlan_vni_node *v, *tmp;
+ struct vxlan_dev_node *node;
+
+ if (!vg)
+ return;
+
+ spin_lock(&vn->sock_lock);
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ipv6)
+ node = &v->hlist6;
+ else
+#endif
+ node = &v->hlist4;
+ node->vxlan = vxlan;
+ hlist_add_head_rcu(&node->hlist, vni_head(vs, v->vni));
+ }
+ spin_unlock(&vn->sock_lock);
+}
+
+void vxlan_vs_del_vnigrp(struct vxlan_dev *vxlan)
+{
+ struct vxlan_vni_group *vg = rtnl_dereference(vxlan->vnigrp);
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_vni_node *v, *tmp;
+
+ if (!vg)
+ return;
+
+ spin_lock(&vn->sock_lock);
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+ hlist_del_init_rcu(&v->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+ hlist_del_init_rcu(&v->hlist6.hlist);
+#endif
+ }
+ spin_unlock(&vn->sock_lock);
+}
+
+static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode,
+ struct vxlan_vni_stats *dest)
+{
+ int i;
+
+ memset(dest, 0, sizeof(*dest));
+ for_each_possible_cpu(i) {
+ struct vxlan_vni_stats_pcpu *pstats;
+ struct vxlan_vni_stats temp;
+ unsigned int start;
+
+ pstats = per_cpu_ptr(vninode->stats, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&pstats->syncp);
+ memcpy(&temp, &pstats->stats, sizeof(temp));
+ } while (u64_stats_fetch_retry_irq(&pstats->syncp, start));
+
+ dest->rx_packets += temp.rx_packets;
+ dest->rx_bytes += temp.rx_bytes;
+ dest->rx_drops += temp.rx_drops;
+ dest->rx_errors += temp.rx_errors;
+ dest->tx_packets += temp.tx_packets;
+ dest->tx_bytes += temp.tx_bytes;
+ dest->tx_drops += temp.tx_drops;
+ dest->tx_errors += temp.tx_errors;
+ }
+}
+
+static void vxlan_vnifilter_stats_add(struct vxlan_vni_node *vninode,
+ int type, unsigned int len)
+{
+ struct vxlan_vni_stats_pcpu *pstats = this_cpu_ptr(vninode->stats);
+
+ u64_stats_update_begin(&pstats->syncp);
+ switch (type) {
+ case VXLAN_VNI_STATS_RX:
+ pstats->stats.rx_bytes += len;
+ pstats->stats.rx_packets++;
+ break;
+ case VXLAN_VNI_STATS_RX_DROPS:
+ pstats->stats.rx_drops++;
+ break;
+ case VXLAN_VNI_STATS_RX_ERRORS:
+ pstats->stats.rx_errors++;
+ break;
+ case VXLAN_VNI_STATS_TX:
+ pstats->stats.tx_bytes += len;
+ pstats->stats.tx_packets++;
+ break;
+ case VXLAN_VNI_STATS_TX_DROPS:
+ pstats->stats.tx_drops++;
+ break;
+ case VXLAN_VNI_STATS_TX_ERRORS:
+ pstats->stats.tx_errors++;
+ break;
+ }
+ u64_stats_update_end(&pstats->syncp);
+}
+
+void vxlan_vnifilter_count(struct vxlan_dev *vxlan, __be32 vni,
+ struct vxlan_vni_node *vninode,
+ int type, unsigned int len)
+{
+ struct vxlan_vni_node *vnode;
+
+ if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
+ return;
+
+ if (vninode) {
+ vnode = vninode;
+ } else {
+ vnode = vxlan_vnifilter_lookup(vxlan, vni);
+ if (!vnode)
+ return;
+ }
+
+ vxlan_vnifilter_stats_add(vnode, type, len);
+}
+
+static u32 vnirange(struct vxlan_vni_node *vbegin,
+ struct vxlan_vni_node *vend)
+{
+ return (be32_to_cpu(vend->vni) - be32_to_cpu(vbegin->vni));
+}
+
+static size_t vxlan_vnifilter_entry_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct tunnel_msg))
+ + nla_total_size(0) /* VXLAN_VNIFILTER_ENTRY */
+ + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_START */
+ + nla_total_size(sizeof(u32)) /* VXLAN_VNIFILTER_ENTRY_END */
+ + nla_total_size(sizeof(struct in6_addr));/* VXLAN_VNIFILTER_ENTRY_GROUP{6} */
+}
+
+static int __vnifilter_entry_fill_stats(struct sk_buff *skb,
+ const struct vxlan_vni_node *vbegin)
+{
+ struct vxlan_vni_stats vstats;
+ struct nlattr *vstats_attr;
+
+ vstats_attr = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY_STATS);
+ if (!vstats_attr)
+ goto out_stats_err;
+
+ vxlan_vnifilter_stats_get(vbegin, &vstats);
+ if (nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_BYTES,
+ vstats.rx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
+ nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_PKTS,
+ vstats.rx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
+ nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_DROPS,
+ vstats.rx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
+ nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_RX_ERRORS,
+ vstats.rx_errors, VNIFILTER_ENTRY_STATS_PAD) ||
+ nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_BYTES,
+ vstats.tx_bytes, VNIFILTER_ENTRY_STATS_PAD) ||
+ nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_PKTS,
+ vstats.tx_packets, VNIFILTER_ENTRY_STATS_PAD) ||
+ nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_DROPS,
+ vstats.tx_drops, VNIFILTER_ENTRY_STATS_PAD) ||
+ nla_put_u64_64bit(skb, VNIFILTER_ENTRY_STATS_TX_ERRORS,
+ vstats.tx_errors, VNIFILTER_ENTRY_STATS_PAD))
+ goto out_stats_err;
+
+ nla_nest_end(skb, vstats_attr);
+
+ return 0;
+
+out_stats_err:
+ nla_nest_cancel(skb, vstats_attr);
+ return -EMSGSIZE;
+}
+
+static bool vxlan_fill_vni_filter_entry(struct sk_buff *skb,
+ struct vxlan_vni_node *vbegin,
+ struct vxlan_vni_node *vend,
+ bool fill_stats)
+{
+ struct nlattr *ventry;
+ u32 vs = be32_to_cpu(vbegin->vni);
+ u32 ve = 0;
+
+ if (vbegin != vend)
+ ve = be32_to_cpu(vend->vni);
+
+ ventry = nla_nest_start(skb, VXLAN_VNIFILTER_ENTRY);
+ if (!ventry)
+ return false;
+
+ if (nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_START, vs))
+ goto out_err;
+
+ if (ve && nla_put_u32(skb, VXLAN_VNIFILTER_ENTRY_END, ve))
+ goto out_err;
+
+ if (!vxlan_addr_any(&vbegin->remote_ip)) {
+ if (vbegin->remote_ip.sa.sa_family == AF_INET) {
+ if (nla_put_in_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP,
+ vbegin->remote_ip.sin.sin_addr.s_addr))
+ goto out_err;
+#if IS_ENABLED(CONFIG_IPV6)
+ } else {
+ if (nla_put_in6_addr(skb, VXLAN_VNIFILTER_ENTRY_GROUP6,
+ &vbegin->remote_ip.sin6.sin6_addr))
+ goto out_err;
+#endif
+ }
+ }
+
+ if (fill_stats && __vnifilter_entry_fill_stats(skb, vbegin))
+ goto out_err;
+
+ nla_nest_end(skb, ventry);
+
+ return true;
+
+out_err:
+ nla_nest_cancel(skb, ventry);
+
+ return false;
+}
+
+static void vxlan_vnifilter_notify(const struct vxlan_dev *vxlan,
+ struct vxlan_vni_node *vninode, int cmd)
+{
+ struct tunnel_msg *tmsg;
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ struct net *net = dev_net(vxlan->dev);
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(vxlan_vnifilter_entry_nlmsg_size(), GFP_KERNEL);
+ if (!skb)
+ goto out_err;
+
+ err = -EMSGSIZE;
+ nlh = nlmsg_put(skb, 0, 0, cmd, sizeof(*tmsg), 0);
+ if (!nlh)
+ goto out_err;
+ tmsg = nlmsg_data(nlh);
+ memset(tmsg, 0, sizeof(*tmsg));
+ tmsg->family = AF_BRIDGE;
+ tmsg->ifindex = vxlan->dev->ifindex;
+
+ if (!vxlan_fill_vni_filter_entry(skb, vninode, vninode, false))
+ goto out_err;
+
+ nlmsg_end(skb, nlh);
+ rtnl_notify(skb, net, 0, RTNLGRP_TUNNEL, NULL, GFP_KERNEL);
+
+ return;
+
+out_err:
+ rtnl_set_sk_err(net, RTNLGRP_TUNNEL, err);
+
+ kfree_skb(skb);
+}
+
+static int vxlan_vnifilter_dump_dev(const struct net_device *dev,
+ struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct vxlan_vni_node *tmp, *v, *vbegin = NULL, *vend = NULL;
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct tunnel_msg *new_tmsg, *tmsg;
+ int idx = 0, s_idx = cb->args[1];
+ struct vxlan_vni_group *vg;
+ struct nlmsghdr *nlh;
+ bool dump_stats;
+ int err = 0;
+
+ if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
+ return -EINVAL;
+
+ /* RCU needed because of the vni locking rules (rcu || rtnl) */
+ vg = rcu_dereference(vxlan->vnigrp);
+ if (!vg || !vg->num_vnis)
+ return 0;
+
+ tmsg = nlmsg_data(cb->nlh);
+ dump_stats = !!(tmsg->flags & TUNNEL_MSG_FLAG_STATS);
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RTM_NEWTUNNEL, sizeof(*new_tmsg), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+ new_tmsg = nlmsg_data(nlh);
+ memset(new_tmsg, 0, sizeof(*new_tmsg));
+ new_tmsg->family = PF_BRIDGE;
+ new_tmsg->ifindex = dev->ifindex;
+
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+ if (idx < s_idx) {
+ idx++;
+ continue;
+ }
+ if (!vbegin) {
+ vbegin = v;
+ vend = v;
+ continue;
+ }
+ if (!dump_stats && vnirange(vend, v) == 1 &&
+ vxlan_addr_equal(&v->remote_ip, &vend->remote_ip)) {
+ goto update_end;
+ } else {
+ if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend,
+ dump_stats)) {
+ err = -EMSGSIZE;
+ break;
+ }
+ idx += vnirange(vbegin, vend) + 1;
+ vbegin = v;
+ }
+update_end:
+ vend = v;
+ }
+
+ if (!err && vbegin) {
+ if (!vxlan_fill_vni_filter_entry(skb, vbegin, vend, dump_stats))
+ err = -EMSGSIZE;
+ }
+
+ cb->args[1] = err ? idx : 0;
+
+ nlmsg_end(skb, nlh);
+
+ return err;
+}
+
+static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx = 0, err = 0, s_idx = cb->args[0];
+ struct net *net = sock_net(skb->sk);
+ struct tunnel_msg *tmsg;
+ struct net_device *dev;
+
+ tmsg = nlmsg_data(cb->nlh);
+
+ if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) {
+ NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header");
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+ if (tmsg->ifindex) {
+ dev = dev_get_by_index_rcu(net, tmsg->ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto out_err;
+ }
+ err = vxlan_vnifilter_dump_dev(dev, skb, cb);
+ /* if the dump completed without an error we return 0 here */
+ if (err != -EMSGSIZE)
+ goto out_err;
+ } else {
+ for_each_netdev_rcu(net, dev) {
+ if (!netif_is_vxlan(dev))
+ continue;
+ if (idx < s_idx)
+ goto skip;
+ err = vxlan_vnifilter_dump_dev(dev, skb, cb);
+ if (err == -EMSGSIZE)
+ break;
+skip:
+ idx++;
+ }
+ }
+ cb->args[0] = idx;
+ rcu_read_unlock();
+
+ return skb->len;
+
+out_err:
+ rcu_read_unlock();
+
+ return err;
+}
+
+static const struct nla_policy vni_filter_entry_policy[VXLAN_VNIFILTER_ENTRY_MAX + 1] = {
+ [VXLAN_VNIFILTER_ENTRY_START] = { .type = NLA_U32 },
+ [VXLAN_VNIFILTER_ENTRY_END] = { .type = NLA_U32 },
+ [VXLAN_VNIFILTER_ENTRY_GROUP] = { .type = NLA_BINARY,
+ .len = sizeof_field(struct iphdr, daddr) },
+ [VXLAN_VNIFILTER_ENTRY_GROUP6] = { .type = NLA_BINARY,
+ .len = sizeof(struct in6_addr) },
+};
+
+static const struct nla_policy vni_filter_policy[VXLAN_VNIFILTER_MAX + 1] = {
+ [VXLAN_VNIFILTER_ENTRY] = { .type = NLA_NESTED },
+};
+
+static int vxlan_update_default_fdb_entry(struct vxlan_dev *vxlan, __be32 vni,
+ union vxlan_addr *old_remote_ip,
+ union vxlan_addr *remote_ip,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ u32 hash_index;
+ int err = 0;
+
+ hash_index = fdb_head_index(vxlan, all_zeros_mac, vni);
+ spin_lock_bh(&vxlan->hash_lock[hash_index]);
+ if (remote_ip && !vxlan_addr_any(remote_ip)) {
+ err = vxlan_fdb_update(vxlan, all_zeros_mac,
+ remote_ip,
+ NUD_REACHABLE | NUD_PERMANENT,
+ NLM_F_APPEND | NLM_F_CREATE,
+ vxlan->cfg.dst_port,
+ vni,
+ vni,
+ dst->remote_ifindex,
+ NTF_SELF, 0, true, extack);
+ if (err) {
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ return err;
+ }
+ }
+
+ if (old_remote_ip && !vxlan_addr_any(old_remote_ip)) {
+ __vxlan_fdb_delete(vxlan, all_zeros_mac,
+ *old_remote_ip,
+ vxlan->cfg.dst_port,
+ vni, vni,
+ dst->remote_ifindex,
+ true);
+ }
+ spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+
+ return err;
+}
+
+static int vxlan_vni_update_group(struct vxlan_dev *vxlan,
+ struct vxlan_vni_node *vninode,
+ union vxlan_addr *group,
+ bool create, bool *changed,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+ union vxlan_addr *newrip = NULL, *oldrip = NULL;
+ union vxlan_addr old_remote_ip;
+ int ret = 0;
+
+ memcpy(&old_remote_ip, &vninode->remote_ip, sizeof(old_remote_ip));
+
+ /* if per vni remote ip is not present use vxlan dev
+ * default dst remote ip for fdb entry
+ */
+ if (group && !vxlan_addr_any(group)) {
+ newrip = group;
+ } else {
+ if (!vxlan_addr_any(&dst->remote_ip))
+ newrip = &dst->remote_ip;
+ }
+
+ /* if old rip exists, and no newrip,
+ * explicitly delete old rip
+ */
+ if (!newrip && !vxlan_addr_any(&old_remote_ip))
+ oldrip = &old_remote_ip;
+
+ if (!newrip && !oldrip)
+ return 0;
+
+ if (!create && oldrip && newrip && vxlan_addr_equal(oldrip, newrip))
+ return 0;
+
+ ret = vxlan_update_default_fdb_entry(vxlan, vninode->vni,
+ oldrip, newrip,
+ extack);
+ if (ret)
+ goto out;
+
+ if (group)
+ memcpy(&vninode->remote_ip, group, sizeof(vninode->remote_ip));
+
+ if (vxlan->dev->flags & IFF_UP) {
+ if (vxlan_addr_multicast(&old_remote_ip) &&
+ !vxlan_group_used(vn, vxlan, vninode->vni,
+ &old_remote_ip,
+ vxlan->default_dst.remote_ifindex)) {
+ ret = vxlan_igmp_leave(vxlan, &old_remote_ip,
+ 0);
+ if (ret)
+ goto out;
+ }
+
+ if (vxlan_addr_multicast(&vninode->remote_ip)) {
+ ret = vxlan_igmp_join(vxlan, &vninode->remote_ip, 0);
+ if (ret == -EADDRINUSE)
+ ret = 0;
+ if (ret)
+ goto out;
+ }
+ }
+
+ *changed = true;
+
+ return 0;
+out:
+ return ret;
+}
+
+int vxlan_vnilist_update_group(struct vxlan_dev *vxlan,
+ union vxlan_addr *old_remote_ip,
+ union vxlan_addr *new_remote_ip,
+ struct netlink_ext_ack *extack)
+{
+ struct list_head *headp, *hpos;
+ struct vxlan_vni_group *vg;
+ struct vxlan_vni_node *vent;
+ int ret;
+
+ vg = rtnl_dereference(vxlan->vnigrp);
+
+ headp = &vg->vni_list;
+ list_for_each_prev(hpos, headp) {
+ vent = list_entry(hpos, struct vxlan_vni_node, vlist);
+ if (vxlan_addr_any(&vent->remote_ip)) {
+ ret = vxlan_update_default_fdb_entry(vxlan, vent->vni,
+ old_remote_ip,
+ new_remote_ip,
+ extack);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void vxlan_vni_delete_group(struct vxlan_dev *vxlan,
+ struct vxlan_vni_node *vninode)
+{
+ struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
+ struct vxlan_rdst *dst = &vxlan->default_dst;
+
+ /* if per vni remote_ip not present, delete the
+ * default dst remote_ip previously added for this vni
+ */
+ if (!vxlan_addr_any(&vninode->remote_ip) ||
+ !vxlan_addr_any(&dst->remote_ip))
+ __vxlan_fdb_delete(vxlan, all_zeros_mac,
+ (vxlan_addr_any(&vninode->remote_ip) ?
+ dst->remote_ip : vninode->remote_ip),
+ vxlan->cfg.dst_port,
+ vninode->vni, vninode->vni,
+ dst->remote_ifindex,
+ true);
+
+ if (vxlan->dev->flags & IFF_UP) {
+ if (vxlan_addr_multicast(&vninode->remote_ip) &&
+ !vxlan_group_used(vn, vxlan, vninode->vni,
+ &vninode->remote_ip,
+ dst->remote_ifindex)) {
+ vxlan_igmp_leave(vxlan, &vninode->remote_ip, 0);
+ }
+ }
+}
+
+static int vxlan_vni_update(struct vxlan_dev *vxlan,
+ struct vxlan_vni_group *vg,
+ __be32 vni, union vxlan_addr *group,
+ bool *changed,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_vni_node *vninode;
+ int ret;
+
+ vninode = rhashtable_lookup_fast(&vg->vni_hash, &vni,
+ vxlan_vni_rht_params);
+ if (!vninode)
+ return 0;
+
+ ret = vxlan_vni_update_group(vxlan, vninode, group, false, changed,
+ extack);
+ if (ret)
+ return ret;
+
+ if (changed)
+ vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
+
+ return 0;
+}
+
+static void __vxlan_vni_add_list(struct vxlan_vni_group *vg,
+ struct vxlan_vni_node *v)
+{
+ struct list_head *headp, *hpos;
+ struct vxlan_vni_node *vent;
+
+ headp = &vg->vni_list;
+ list_for_each_prev(hpos, headp) {
+ vent = list_entry(hpos, struct vxlan_vni_node, vlist);
+ if (be32_to_cpu(v->vni) < be32_to_cpu(vent->vni))
+ continue;
+ else
+ break;
+ }
+ list_add_rcu(&v->vlist, hpos);
+ vg->num_vnis++;
+}
+
+static void __vxlan_vni_del_list(struct vxlan_vni_group *vg,
+ struct vxlan_vni_node *v)
+{
+ list_del_rcu(&v->vlist);
+ vg->num_vnis--;
+}
+
+static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
+ __be32 vni)
+{
+ struct vxlan_vni_node *vninode;
+
+ vninode = kzalloc(sizeof(*vninode), GFP_ATOMIC);
+ if (!vninode)
+ return NULL;
+ vninode->stats = netdev_alloc_pcpu_stats(struct vxlan_vni_stats_pcpu);
+ if (!vninode->stats) {
+ kfree(vninode);
+ return NULL;
+ }
+ vninode->vni = vni;
+ vninode->hlist4.vxlan = vxlan;
+#if IS_ENABLED(CONFIG_IPV6)
+ vninode->hlist6.vxlan = vxlan;
+#endif
+
+ return vninode;
+}
+
+static int vxlan_vni_add(struct vxlan_dev *vxlan,
+ struct vxlan_vni_group *vg,
+ u32 vni, union vxlan_addr *group,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_vni_node *vninode;
+ __be32 v = cpu_to_be32(vni);
+ bool changed = false;
+ int err = 0;
+
+ if (vxlan_vnifilter_lookup(vxlan, v))
+ return vxlan_vni_update(vxlan, vg, v, group, &changed, extack);
+
+ err = vxlan_vni_in_use(vxlan->net, vxlan, &vxlan->cfg, v);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "VNI in use");
+ return err;
+ }
+
+ vninode = vxlan_vni_alloc(vxlan, v);
+ if (!vninode)
+ return -ENOMEM;
+
+ err = rhashtable_lookup_insert_fast(&vg->vni_hash,
+ &vninode->vnode,
+ vxlan_vni_rht_params);
+ if (err) {
+ kfree(vninode);
+ return err;
+ }
+
+ __vxlan_vni_add_list(vg, vninode);
+
+ if (vxlan->dev->flags & IFF_UP)
+ vxlan_vs_add_del_vninode(vxlan, vninode, false);
+
+ err = vxlan_vni_update_group(vxlan, vninode, group, true, &changed,
+ extack);
+
+ if (changed)
+ vxlan_vnifilter_notify(vxlan, vninode, RTM_NEWTUNNEL);
+
+ return err;
+}
+
+static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
+{
+ struct vxlan_vni_node *v;
+
+ v = container_of(rcu, struct vxlan_vni_node, rcu);
+ free_percpu(v->stats);
+ kfree(v);
+}
+
+static int vxlan_vni_del(struct vxlan_dev *vxlan,
+ struct vxlan_vni_group *vg,
+ u32 vni, struct netlink_ext_ack *extack)
+{
+ struct vxlan_vni_node *vninode;
+ __be32 v = cpu_to_be32(vni);
+ int err = 0;
+
+ vg = rtnl_dereference(vxlan->vnigrp);
+
+ vninode = rhashtable_lookup_fast(&vg->vni_hash, &v,
+ vxlan_vni_rht_params);
+ if (!vninode) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ vxlan_vni_delete_group(vxlan, vninode);
+
+ err = rhashtable_remove_fast(&vg->vni_hash,
+ &vninode->vnode,
+ vxlan_vni_rht_params);
+ if (err)
+ goto out;
+
+ __vxlan_vni_del_list(vg, vninode);
+
+ vxlan_vnifilter_notify(vxlan, vninode, RTM_DELTUNNEL);
+
+ if (vxlan->dev->flags & IFF_UP)
+ vxlan_vs_add_del_vninode(vxlan, vninode, true);
+
+ call_rcu(&vninode->rcu, vxlan_vni_node_rcu_free);
+
+ return 0;
+out:
+ return err;
+}
+
+static int vxlan_vni_add_del(struct vxlan_dev *vxlan, __u32 start_vni,
+ __u32 end_vni, union vxlan_addr *group,
+ int cmd, struct netlink_ext_ack *extack)
+{
+ struct vxlan_vni_group *vg;
+ int v, err = 0;
+
+ vg = rtnl_dereference(vxlan->vnigrp);
+
+ for (v = start_vni; v <= end_vni; v++) {
+ switch (cmd) {
+ case RTM_NEWTUNNEL:
+ err = vxlan_vni_add(vxlan, vg, v, group, extack);
+ break;
+ case RTM_DELTUNNEL:
+ err = vxlan_vni_del(vxlan, vg, v, extack);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+ if (err)
+ goto out;
+ }
+
+ return 0;
+out:
+ return err;
+}
+
+static int vxlan_process_vni_filter(struct vxlan_dev *vxlan,
+ struct nlattr *nlvnifilter,
+ int cmd, struct netlink_ext_ack *extack)
+{
+ struct nlattr *vattrs[VXLAN_VNIFILTER_ENTRY_MAX + 1];
+ u32 vni_start = 0, vni_end = 0;
+ union vxlan_addr group;
+ int err;
+
+ err = nla_parse_nested(vattrs,
+ VXLAN_VNIFILTER_ENTRY_MAX,
+ nlvnifilter, vni_filter_entry_policy,
+ extack);
+ if (err)
+ return err;
+
+ if (vattrs[VXLAN_VNIFILTER_ENTRY_START]) {
+ vni_start = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_START]);
+ vni_end = vni_start;
+ }
+
+ if (vattrs[VXLAN_VNIFILTER_ENTRY_END])
+ vni_end = nla_get_u32(vattrs[VXLAN_VNIFILTER_ENTRY_END]);
+
+ if (!vni_start && !vni_end) {
+ NL_SET_ERR_MSG_ATTR(extack, nlvnifilter,
+ "vni start nor end found in vni entry");
+ return -EINVAL;
+ }
+
+ if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]) {
+ group.sin.sin_addr.s_addr =
+ nla_get_in_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP]);
+ group.sa.sa_family = AF_INET;
+ } else if (vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]) {
+ group.sin6.sin6_addr =
+ nla_get_in6_addr(vattrs[VXLAN_VNIFILTER_ENTRY_GROUP6]);
+ group.sa.sa_family = AF_INET6;
+ } else {
+ memset(&group, 0, sizeof(group));
+ }
+
+ if (vxlan_addr_multicast(&group) && !vxlan->default_dst.remote_ifindex) {
+ NL_SET_ERR_MSG(extack,
+ "Local interface required for multicast remote group");
+
+ return -EINVAL;
+ }
+
+ err = vxlan_vni_add_del(vxlan, vni_start, vni_end, &group, cmd,
+ extack);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+void vxlan_vnigroup_uninit(struct vxlan_dev *vxlan)
+{
+ struct vxlan_vni_node *v, *tmp;
+ struct vxlan_vni_group *vg;
+
+ vg = rtnl_dereference(vxlan->vnigrp);
+ list_for_each_entry_safe(v, tmp, &vg->vni_list, vlist) {
+ rhashtable_remove_fast(&vg->vni_hash, &v->vnode,
+ vxlan_vni_rht_params);
+ hlist_del_init_rcu(&v->hlist4.hlist);
+#if IS_ENABLED(CONFIG_IPV6)
+ hlist_del_init_rcu(&v->hlist6.hlist);
+#endif
+ __vxlan_vni_del_list(vg, v);
+ vxlan_vnifilter_notify(vxlan, v, RTM_DELTUNNEL);
+ call_rcu(&v->rcu, vxlan_vni_node_rcu_free);
+ }
+ rhashtable_destroy(&vg->vni_hash);
+ kfree(vg);
+}
+
+int vxlan_vnigroup_init(struct vxlan_dev *vxlan)
+{
+ struct vxlan_vni_group *vg;
+ int ret;
+
+ vg = kzalloc(sizeof(*vg), GFP_KERNEL);
+ if (!vg)
+ return -ENOMEM;
+ ret = rhashtable_init(&vg->vni_hash, &vxlan_vni_rht_params);
+ if (ret) {
+ kfree(vg);
+ return ret;
+ }
+ INIT_LIST_HEAD(&vg->vni_list);
+ rcu_assign_pointer(vxlan->vnigrp, vg);
+
+ return 0;
+}
+
+static int vxlan_vnifilter_process(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tunnel_msg *tmsg;
+ struct vxlan_dev *vxlan;
+ struct net_device *dev;
+ struct nlattr *attr;
+ int err, vnis = 0;
+ int rem;
+
+ /* this should validate the header and check for remaining bytes */
+ err = nlmsg_parse(nlh, sizeof(*tmsg), NULL, VXLAN_VNIFILTER_MAX,
+ vni_filter_policy, extack);
+ if (err < 0)
+ return err;
+
+ tmsg = nlmsg_data(nlh);
+ dev = __dev_get_by_index(net, tmsg->ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ if (!netif_is_vxlan(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "The device is not a vxlan device");
+ return -EINVAL;
+ }
+
+ vxlan = netdev_priv(dev);
+
+ if (!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))
+ return -EOPNOTSUPP;
+
+ nlmsg_for_each_attr(attr, nlh, sizeof(*tmsg), rem) {
+ switch (nla_type(attr)) {
+ case VXLAN_VNIFILTER_ENTRY:
+ err = vxlan_process_vni_filter(vxlan, attr,
+ nlh->nlmsg_type, extack);
+ break;
+ default:
+ continue;
+ }
+ vnis++;
+ if (err)
+ break;
+ }
+
+ if (!vnis) {
+ NL_SET_ERR_MSG_MOD(extack, "No vnis found to process");
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+void vxlan_vnifilter_init(void)
+{
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_GETTUNNEL, NULL,
+ vxlan_vnifilter_dump, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_NEWTUNNEL,
+ vxlan_vnifilter_process, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_BRIDGE, RTM_DELTUNNEL,
+ vxlan_vnifilter_process, NULL, 0);
+}
+
+void vxlan_vnifilter_uninit(void)
+{
+ rtnl_unregister(PF_BRIDGE, RTM_GETTUNNEL);
+ rtnl_unregister(PF_BRIDGE, RTM_NEWTUNNEL);
+ rtnl_unregister(PF_BRIDGE, RTM_DELTUNNEL);
+}
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 5a934bebe630..bca5b01af247 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -227,11 +227,56 @@ struct vxlan_config {
enum ifla_vxlan_df df;
};
+enum {
+ VXLAN_VNI_STATS_RX,
+ VXLAN_VNI_STATS_RX_DROPS,
+ VXLAN_VNI_STATS_RX_ERRORS,
+ VXLAN_VNI_STATS_TX,
+ VXLAN_VNI_STATS_TX_DROPS,
+ VXLAN_VNI_STATS_TX_ERRORS,
+};
+
+struct vxlan_vni_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 rx_drops;
+ u64 rx_errors;
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_drops;
+ u64 tx_errors;
+};
+
+struct vxlan_vni_stats_pcpu {
+ struct vxlan_vni_stats stats;
+ struct u64_stats_sync syncp;
+};
+
struct vxlan_dev_node {
struct hlist_node hlist;
struct vxlan_dev *vxlan;
};
+struct vxlan_vni_node {
+ struct rhash_head vnode;
+ struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */
+#if IS_ENABLED(CONFIG_IPV6)
+ struct vxlan_dev_node hlist6; /* vni hash table for IPv6 socket */
+#endif
+ struct list_head vlist;
+ __be32 vni;
+ union vxlan_addr remote_ip; /* default remote ip for this vni */
+ struct vxlan_vni_stats_pcpu __percpu *stats;
+
+ struct rcu_head rcu;
+};
+
+struct vxlan_vni_group {
+ struct rhashtable vni_hash;
+ struct list_head vni_list;
+ u32 num_vnis;
+};
+
/* Pseudo network device */
struct vxlan_dev {
struct vxlan_dev_node hlist4; /* vni hash table for IPv4 socket */
@@ -254,6 +299,8 @@ struct vxlan_dev {
struct vxlan_config cfg;
+ struct vxlan_vni_group __rcu *vnigrp;
+
struct hlist_head fdb_head[FDB_HASH_SIZE];
};
@@ -274,6 +321,7 @@ struct vxlan_dev {
#define VXLAN_F_GPE 0x4000
#define VXLAN_F_IPV6_LINKLOCAL 0x8000
#define VXLAN_F_TTL_INHERIT 0x10000
+#define VXLAN_F_VNIFILTER 0x20000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
@@ -283,7 +331,8 @@ struct vxlan_dev {
VXLAN_F_UDP_ZERO_CSUM6_RX | \
VXLAN_F_REMCSUM_RX | \
VXLAN_F_REMCSUM_NOPARTIAL | \
- VXLAN_F_COLLECT_METADATA)
+ VXLAN_F_COLLECT_METADATA | \
+ VXLAN_F_VNIFILTER)
/* Flags that can be set together with VXLAN_F_GPE. */
#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \
@@ -292,7 +341,8 @@ struct vxlan_dev {
VXLAN_F_UDP_ZERO_CSUM_TX | \
VXLAN_F_UDP_ZERO_CSUM6_TX | \
VXLAN_F_UDP_ZERO_CSUM6_RX | \
- VXLAN_F_COLLECT_METADATA)
+ VXLAN_F_COLLECT_METADATA | \
+ VXLAN_F_VNIFILTER)
struct net_device *vxlan_dev_create(struct net *net, const char *name,
u8 name_assign_type, struct vxlan_config *conf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index be09d2ad4b5d..e315e53125f4 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -713,7 +713,55 @@ enum ipvlan_mode {
#define IPVLAN_F_PRIVATE 0x01
#define IPVLAN_F_VEPA 0x02
+/* Tunnel RTM header */
+struct tunnel_msg {
+ __u8 family;
+ __u8 flags;
+ __u16 reserved2;
+ __u32 ifindex;
+};
+
/* VXLAN section */
+
+/* include statistics in the dump */
+#define TUNNEL_MSG_FLAG_STATS 0x01
+
+#define TUNNEL_MSG_VALID_USER_FLAGS TUNNEL_MSG_FLAG_STATS
+
+/* Embedded inside VXLAN_VNIFILTER_ENTRY_STATS */
+enum {
+ VNIFILTER_ENTRY_STATS_UNSPEC,
+ VNIFILTER_ENTRY_STATS_RX_BYTES,
+ VNIFILTER_ENTRY_STATS_RX_PKTS,
+ VNIFILTER_ENTRY_STATS_RX_DROPS,
+ VNIFILTER_ENTRY_STATS_RX_ERRORS,
+ VNIFILTER_ENTRY_STATS_TX_BYTES,
+ VNIFILTER_ENTRY_STATS_TX_PKTS,
+ VNIFILTER_ENTRY_STATS_TX_DROPS,
+ VNIFILTER_ENTRY_STATS_TX_ERRORS,
+ VNIFILTER_ENTRY_STATS_PAD,
+ __VNIFILTER_ENTRY_STATS_MAX
+};
+#define VNIFILTER_ENTRY_STATS_MAX (__VNIFILTER_ENTRY_STATS_MAX - 1)
+
+enum {
+ VXLAN_VNIFILTER_ENTRY_UNSPEC,
+ VXLAN_VNIFILTER_ENTRY_START,
+ VXLAN_VNIFILTER_ENTRY_END,
+ VXLAN_VNIFILTER_ENTRY_GROUP,
+ VXLAN_VNIFILTER_ENTRY_GROUP6,
+ VXLAN_VNIFILTER_ENTRY_STATS,
+ __VXLAN_VNIFILTER_ENTRY_MAX
+};
+#define VXLAN_VNIFILTER_ENTRY_MAX (__VXLAN_VNIFILTER_ENTRY_MAX - 1)
+
+enum {
+ VXLAN_VNIFILTER_UNSPEC,
+ VXLAN_VNIFILTER_ENTRY,
+ __VXLAN_VNIFILTER_MAX
+};
+#define VXLAN_VNIFILTER_MAX (__VXLAN_VNIFILTER_MAX - 1)
+
enum {
IFLA_VXLAN_UNSPEC,
IFLA_VXLAN_ID,
@@ -745,6 +793,7 @@ enum {
IFLA_VXLAN_GPE,
IFLA_VXLAN_TTL_INHERIT,
IFLA_VXLAN_DF,
+ IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 93d934cc4613..0970cb4b1b88 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -185,6 +185,13 @@ enum {
RTM_GETNEXTHOPBUCKET,
#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET
+ RTM_NEWTUNNEL = 120,
+#define RTM_NEWTUNNEL RTM_NEWTUNNEL
+ RTM_DELTUNNEL,
+#define RTM_DELTUNNEL RTM_DELTUNNEL
+ RTM_GETTUNNEL,
+#define RTM_GETTUNNEL RTM_GETTUNNEL
+
__RTM_MAX,
#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
};
@@ -756,6 +763,8 @@ enum rtnetlink_groups {
#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN
RTNLGRP_MCTP_IFADDR,
#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR
+ RTNLGRP_TUNNEL,
+#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL
__RTNLGRP_MAX
};
#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 94ea2a8b2bb7..6ad3ee02e023 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -91,6 +91,9 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
{ RTM_NEWNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
{ RTM_DELNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
{ RTM_GETNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_READ },
+ { RTM_NEWTUNNEL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+ { RTM_DELTUNNEL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+ { RTM_GETTUNNEL, NETLINK_ROUTE_SOCKET__NLMSG_READ },
};
static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -176,7 +179,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
* structures at the top of this file with the new mappings
* before updating the BUILD_BUG_ON() macro!
*/
- BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOPBUCKET + 3));
+ BUILD_BUG_ON(RTM_MAX != (RTM_NEWTUNNEL + 3));
err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
sizeof(nlmsg_route_perms));
break;
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
new file mode 100755
index 000000000000..704997ffc244
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -0,0 +1,579 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the VXLAN vni filtering api and
+# datapath.
+# It simulates two hypervisors running two VMs each using four network
+# six namespaces: two for the HVs, four for the VMs. Each VM is
+# connected to a separate bridge. The VM's use overlapping vlans and
+# hence the separate bridge domain. Each vxlan device is a collect
+# metadata device with vni filtering and hence has the ability to
+# terminate configured vni's only.
+
+# +--------------------------------+ +------------------------------------+
+# | vm-11 netns | | vm-21 netns |
+# | | | |
+# |+------------+ +-------------+ | |+-------------+ +----------------+ |
+# ||veth-11.10 | |veth-11.20 | | ||veth-21.10 | | veth-21.20 | |
+# ||10.0.10.11/24 |10.0.20.11/24| | ||10.0.10.21/24| | 10.0.20.21/24 | |
+# |+------|-----+ +|------------+ | |+-----------|-+ +---|------------+ |
+# | | | | | | | |
+# | | | | | +------------+ |
+# | +------------+ | | | veth-21 | |
+# | | veth-11 | | | | | |
+# | | | | | +-----|------+ |
+# | +-----|------+ | | | |
+# | | | | | |
+# +------------|-------------------+ +---------------|--------------------+
+# +------------|-----------------------------------------|-------------------+
+# | +-----|------+ +-----|------+ |
+# | |vethhv-11 | |vethhv-21 | |
+# | +----|-------+ +-----|------+ |
+# | +---|---+ +---|--+ |
+# | | br1 | | br2 | |
+# | +---|---+ +---|--+ |
+# | +---|----+ +---|--+ |
+# | | vxlan1| |vxlan2| |
+# | +--|-----+ +--|---+ |
+# | | | |
+# | | +---------------------+ | |
+# | | |veth0 | | |
+# | +---------|172.16.0.1/24 -----------+ |
+# | |2002:fee1::1/64 | |
+# | hv-1 netns +--------|------------+ |
+# +-----------------------------|--------------------------------------------+
+# |
+# +-----------------------------|--------------------------------------------+
+# | hv-2 netns +--------|-------------+ |
+# | | veth0 | |
+# | +------| 172.16.0.2/24 |---+ |
+# | | | 2002:fee1::2/64 | | |
+# | | | | | |
+# | | +----------------------+ | - |
+# | | | |
+# | +-|-------+ +--------|-+ |
+# | | vxlan1 | | vxlan2 | |
+# | +----|----+ +---|------+ |
+# | +--|--+ +-|---+ |
+# | | br1 | | br2 | |
+# | +--|--+ +--|--+ |
+# | +-----|-------+ +----|-------+ |
+# | | vethhv-12 | |vethhv-22 | |
+# | +------|------+ +-------|----+ |
+# +-----------------|----------------------------|---------------------------+
+# | |
+# +-----------------|-----------------+ +--------|---------------------------+
+# | +-------|---+ | | +--|---------+ |
+# | | veth-12 | | | |veth-22 | |
+# | +-|--------|+ | | +--|--------|+ |
+# | | | | | | | |
+# |+----------|--+ +---|-----------+ | |+-------|-----+ +|---------------+ |
+# ||veth-12.10 | |veth-12.20 | | ||veth-22.10 | |veth-22.20 | |
+# ||10.0.10.12/24| |10.0.20.12/24 | | ||10.0.10.22/24| |10.0.20.22/24 | |
+# |+-------------+ +---------------+ | |+-------------+ +----------------+ |
+# | | | |
+# | | | |
+# | vm-12 netns | |vm-22 netns |
+# +-----------------------------------+ +------------------------------------+
+#
+#
+# This test tests the new vxlan vnifiltering api
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="
+ vxlan_vnifilter_api
+ vxlan_vnifilter_datapath
+ vxlan_vnifilter_datapath_pervni
+ vxlan_vnifilter_datapath_mgroup
+ vxlan_vnifilter_datapath_mgroup_pervni
+ vxlan_vnifilter_metadata_and_traditional_mix
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+check_hv_connectivity() {
+ ip netns exec hv-1 ping -c 1 -W 1 $1 &>/dev/null
+ sleep 1
+ ip netns exec hv-1 ping -c 1 -W 1 $2 &>/dev/null
+
+ return $?
+}
+
+check_vm_connectivity() {
+ run_cmd "ip netns exec vm-11 ping -c 1 -W 1 10.0.10.12"
+ log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
+
+ run_cmd "ip netns exec vm-21 ping -c 1 -W 1 10.0.10.22"
+ log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
+}
+
+cleanup() {
+ ip link del veth-hv-1 2>/dev/null || true
+ ip link del vethhv-11 vethhv-12 vethhv-21 vethhv-22 2>/dev/null || true
+
+ for ns in hv-1 hv-2 vm-11 vm-21 vm-12 vm-22 vm-31 vm-32; do
+ ip netns del $ns 2>/dev/null || true
+ done
+}
+
+trap cleanup EXIT
+
+setup-hv-networking() {
+ hv=$1
+ local1=$2
+ mask1=$3
+ local2=$4
+ mask2=$5
+
+ ip netns add hv-$hv
+ ip link set veth-hv-$hv netns hv-$hv
+ ip -netns hv-$hv link set veth-hv-$hv name veth0
+ ip -netns hv-$hv addr add $local1/$mask1 dev veth0
+ ip -netns hv-$hv addr add $local2/$mask2 dev veth0
+ ip -netns hv-$hv link set veth0 up
+}
+
+# Setups a "VM" simulated by a netns an a veth pair
+# example: setup-vm <hvid> <vmid> <brid> <VATTRS> <mcast_for_bum>
+# VATTRS = comma separated "<vlan>-<v[46]>-<localip>-<remoteip>-<VTYPE>-<vxlandstport>"
+# VTYPE = vxlan device type. "default = traditional device, metadata = metadata device
+# vnifilter = vnifiltering device,
+# vnifilterg = vnifiltering device with per vni group/remote"
+# example:
+# setup-vm 1 11 1 \
+# 10-v4-172.16.0.1-239.1.1.100-vnifilterg,20-v4-172.16.0.1-239.1.1.100-vnifilterg 1
+#
+setup-vm() {
+ hvid=$1
+ vmid=$2
+ brid=$3
+ vattrs=$4
+ mcast=$5
+ lastvxlandev=""
+
+ # create bridge
+ ip -netns hv-$hvid link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ ip -netns hv-$hvid link set br$brid up
+
+ # create vm namespace and interfaces and connect to hypervisor
+ # namespace
+ ip netns add vm-$vmid
+ hvvethif="vethhv-$vmid"
+ vmvethif="veth-$vmid"
+ ip link add $hvvethif type veth peer name $vmvethif
+ ip link set $hvvethif netns hv-$hvid
+ ip link set $vmvethif netns vm-$vmid
+ ip -netns hv-$hvid link set $hvvethif up
+ ip -netns vm-$vmid link set $vmvethif up
+ ip -netns hv-$hvid link set $hvvethif master br$brid
+
+ # configure VM vlan/vni filtering on hypervisor
+ for vmap in $(echo $vattrs | cut -d "," -f1- --output-delimiter=' ')
+ do
+ local vid=$(echo $vmap | awk -F'-' '{print ($1)}')
+ local family=$(echo $vmap | awk -F'-' '{print ($2)}')
+ local localip=$(echo $vmap | awk -F'-' '{print ($3)}')
+ local group=$(echo $vmap | awk -F'-' '{print ($4)}')
+ local vtype=$(echo $vmap | awk -F'-' '{print ($5)}')
+ local port=$(echo $vmap | awk -F'-' '{print ($6)}')
+
+ ip -netns vm-$vmid link add name $vmvethif.$vid link $vmvethif type vlan id $vid
+ ip -netns vm-$vmid addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid
+ ip -netns vm-$vmid link set $vmvethif.$vid up
+
+ tid=$vid
+ vxlandev="vxlan$brid"
+ vxlandevflags=""
+
+ if [[ -n $vtype && $vtype == "metadata" ]]; then
+ vxlandevflags="$vxlandevflags external"
+ elif [[ -n $vtype && $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then
+ vxlandevflags="$vxlandevflags external vnifilter"
+ tid=$((vid+brid))
+ else
+ vxlandevflags="$vxlandevflags id $tid"
+ vxlandev="vxlan$tid"
+ fi
+
+ if [[ -n $vtype && $vtype != "vnifilterg" ]]; then
+ if [[ -n "$group" && "$group" != "null" ]]; then
+ if [ $mcast -eq 1 ]; then
+ vxlandevflags="$vxlandevflags group $group"
+ else
+ vxlandevflags="$vxlandevflags remote $group"
+ fi
+ fi
+ fi
+
+ if [[ -n "$port" && "$port" != "default" ]]; then
+ vxlandevflags="$vxlandevflags dstport $port"
+ fi
+
+ # create vxlan device
+ if [ "$vxlandev" != "$lastvxlandev" ]; then
+ ip -netns hv-$hvid link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null
+ ip -netns hv-$hvid link set $vxlandev master br$brid
+ ip -netns hv-$hvid link set $vxlandev up
+ lastvxlandev=$vxlandev
+ fi
+
+ # add vlan
+ bridge -netns hv-$hvid vlan add vid $vid dev $hvvethif
+ bridge -netns hv-$hvid vlan add vid $vid pvid dev $vxlandev
+
+ # Add bridge vni filter for tx
+ if [[ -n $vtype && $vtype == "metadata" || $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then
+ bridge -netns hv-$hvid link set dev $vxlandev vlan_tunnel on
+ bridge -netns hv-$hvid vlan add dev $vxlandev vid $vid tunnel_info id $tid
+ fi
+
+ if [[ -n $vtype && $vtype == "metadata" ]]; then
+ bridge -netns hv-$hvid fdb add 00:00:00:00:00:00 dev $vxlandev \
+ src_vni $tid vni $tid dst $group self
+ elif [[ -n $vtype && $vtype == "vnifilter" ]]; then
+ # Add per vni rx filter with 'bridge vni' api
+ bridge -netns hv-$hvid vni add dev $vxlandev vni $tid
+ elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then
+ # Add per vni group config with 'bridge vni' api
+ if [ -n "$group" ]; then
+ if [ "$family" == "v4" ]; then
+ if [ $mcast -eq 1 ]; then
+ bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group
+ else
+ bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group
+ fi
+ else
+ if [ $mcast -eq 1 ]; then
+ bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group6 $group
+ else
+ bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote6 $group
+ fi
+ fi
+ fi
+ fi
+ done
+}
+
+setup_vnifilter_api()
+{
+ ip link add veth-host type veth peer name veth-testns
+ ip netns add testns
+ ip link set veth-testns netns testns
+}
+
+cleanup_vnifilter_api()
+{
+ ip link del veth-host 2>/dev/null || true
+ ip netns del testns 2>/dev/null || true
+}
+
+# tests vxlan filtering api
+vxlan_vnifilter_api()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+ localip="172.16.0.1"
+ group="239.1.1.101"
+
+ cleanup_vnifilter_api &>/dev/null
+ setup_vnifilter_api
+
+ # Duplicate vni test
+ # create non-vnifiltering traditional vni device
+ run_cmd "ip -netns testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789"
+ log_test $? 0 "Create traditional vxlan device"
+
+ # create vni filtering device
+ run_cmd "ip -netns testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789"
+ log_test $? 1 "Cannot create vnifilter device without external flag"
+
+ run_cmd "ip -netns testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
+ log_test $? 0 "Creating external vxlan device with vnifilter flag"
+
+ run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 100"
+ log_test $? 0 "Cannot set in-use vni id on vnifiltering device"
+
+ run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200"
+ log_test $? 0 "Set new vni id on vnifiltering device"
+
+ run_cmd "ip -netns testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
+ log_test $? 0 "Create second external vxlan device with vnifilter flag"
+
+ run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 200"
+ log_test $? 255 "Cannot set in-use vni id on vnifiltering device"
+
+ run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300"
+ log_test $? 0 "Set new vni id on vnifiltering device"
+
+ # check in bridge vni show
+ run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300"
+ log_test $? 0 "Update vni id on vnifiltering device"
+
+ run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 400"
+ log_test $? 0 "Add new vni id on vnifiltering device"
+
+ # add multicast group per vni
+ run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200 group $group"
+ log_test $? 0 "Set multicast group on existing vni"
+
+ # add multicast group per vni
+ run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300 group $group"
+ log_test $? 0 "Set multicast group on existing vni"
+
+ # set vnifilter on an existing external vxlan device
+ run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external vnifilter"
+ log_test $? 2 "Cannot set vnifilter flag on a device"
+
+ # change vxlan vnifilter flag
+ run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external novnifilter"
+ log_test $? 2 "Cannot unset vnifilter flag on a device"
+}
+
+# Sanity test vnifilter datapath
+# vnifilter vnis inherit BUM group from
+# vxlan device
+vxlan_vnifilter_datapath()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+
+ ip link add veth-hv-1 type veth peer name veth-hv-2
+ setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 $hv2addr1 $hv2addr2
+ setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 $hv1addr1 $hv1addr2
+
+ check_hv_connectivity hv2addr1 hv2addr2
+
+ setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0
+ setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0
+
+ setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0
+ setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0
+
+ check_vm_connectivity "vnifiltering vxlan"
+}
+
+# Sanity test vnifilter datapath
+# with vnifilter per vni configured BUM
+# group/remote
+vxlan_vnifilter_datapath_pervni()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+
+ ip link add veth-hv-1 type veth peer name veth-hv-2
+ setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+ setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+ check_hv_connectivity hv2addr1 hv2addr2
+
+ setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilterg,20-v4-$hv1addr1-$hv2addr1-vnifilterg 0
+ setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilterg,20-v6-$hv1addr2-$hv2addr2-vnifilterg 0
+
+ setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilterg,20-v4-$hv2addr1-$hv1addr1-vnifilterg 0
+ setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilterg,20-v6-$hv2addr2-$hv1addr2-vnifilterg 0
+
+ check_vm_connectivity "vnifiltering vxlan pervni remote"
+}
+
+
+vxlan_vnifilter_datapath_mgroup()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+ group="239.1.1.100"
+ group6="ff07::1"
+
+ ip link add veth-hv-1 type veth peer name veth-hv-2
+ setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+ setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+ check_hv_connectivity hv2addr1 hv2addr2
+
+ setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilter,20-v4-$hv1addr1-$group-vnifilter 1
+ setup-vm 1 21 2 "10-v6-$hv1addr2-$group6-vnifilter,20-v6-$hv1addr2-$group6-vnifilter" 1
+
+ setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilter,20-v4-$hv2addr1-$group-vnifilter 1
+ setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilter,20-v6-$hv2addr2-$group6-vnifilter 1
+
+ check_vm_connectivity "vnifiltering vxlan mgroup"
+}
+
+vxlan_vnifilter_datapath_mgroup_pervni()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+ group="239.1.1.100"
+ group6="ff07::1"
+
+ ip link add veth-hv-1 type veth peer name veth-hv-2
+ setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+ setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+ check_hv_connectivity hv2addr1 hv2addr2
+
+ setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilterg,20-v4-$hv1addr1-$group-vnifilterg 1
+ setup-vm 1 21 2 10-v6-$hv1addr2-$group6-vnifilterg,20-v6-$hv1addr2-$group6-vnifilterg 1
+
+ setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilterg,20-v4-$hv2addr1-$group-vnifilterg 1
+ setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilterg,20-v6-$hv2addr2-$group6-vnifilterg 1
+
+ check_vm_connectivity "vnifiltering vxlan pervni mgroup"
+}
+
+vxlan_vnifilter_metadata_and_traditional_mix()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+
+ ip link add veth-hv-1 type veth peer name veth-hv-2
+ setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+ setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+ check_hv_connectivity hv2addr1 hv2addr2
+
+ setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0
+ setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0
+ setup-vm 1 31 3 30-v4-$hv1addr1-$hv2addr1-default-4790,40-v6-$hv1addr2-$hv2addr2-default-4790,50-v4-$hv1addr1-$hv2addr1-metadata-4791 0
+
+
+ setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0
+ setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0
+ setup-vm 2 32 3 30-v4-$hv2addr1-$hv1addr1-default-4790,40-v6-$hv2addr2-$hv1addr2-default-4790,50-v4-$hv2addr1-$hv1addr1-metadata-4791 0
+
+ check_vm_connectivity "vnifiltering vxlan pervni remote mix"
+
+ # check VM connectivity over traditional/non-vxlan filtering vxlan devices
+ run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.30.32"
+ log_test $? 0 "VM connectivity over traditional vxlan (ipv4 default rdst)"
+
+ run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.40.32"
+ log_test $? 0 "VM connectivity over traditional vxlan (ipv6 default rdst)"
+
+ run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.50.32"
+ log_test $? 0 "VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)"
+}
+
+while getopts :t:pP46hv o
+do
+ case $o in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip link help vxlan 2>&1 | grep -q "vnifilter"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing vxlan dev vnifilter setting"
+ sync
+ exit $ksft_skip
+fi
+
+bridge vni help 2>&1 | grep -q "Usage: bridge vni"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 bridge lacks vxlan vnifiltering support"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ none) setup; exit 0;;
+ *) $t; cleanup;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret