summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-12-04 19:04:20 +0300
committerDavid S. Miller <davem@davemloft.net>2017-12-04 19:04:20 +0300
commit90ffa72ff483fa3c6a967895c77589d1bdb6918d (patch)
tree91ab1f1ea34f92095448ee935eb50b56a690e4d3
parentc34bc2b5059c515aebc3dde4c34baba6db3fd43f (diff)
parent56ddd30280d5730ad349c425de7811d596b19476 (diff)
downloadlinux-90ffa72ff483fa3c6a967895c77589d1bdb6918d.tar.xz
Merge branch 'ipv6-gre-collect_md'
William Tu says: ==================== add ip6 gre and gretap collect_md mode Similar to gre, vxlan, geneve, ipip tunnels, allow ip6gretap tunnels to operate in collect metadata mode. The first patch adds the support to ip6_gre.c. The second patch enables unsetting the csum for ipv6 tunnel, when using bpf_skb_[gs]et_tunnel_key() helpers. Finally, the last patch adds the ip6 gre and gretap tunnel test cases to BPF sample code. The corresponding iproute2 patch: https://marc.info/?l=linux-netdev&m=151216943128087&w=2 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/core/filter.c5
-rw-r--r--net/ipv6/ip6_gre.c105
-rw-r--r--net/ipv6/ip6_tunnel.c5
-rw-r--r--samples/bpf/tcbpf2_kern.c43
-rwxr-xr-xsamples/bpf/test_tunnel_bpf.sh65
5 files changed, 210 insertions, 13 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 6a85e67fafce..8ec5a504eb28 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3026,10 +3026,11 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
IPV6_FLOWLABEL_MASK;
} else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
- if (flags & BPF_F_ZERO_CSUM_TX)
- info->key.tun_flags &= ~TUNNEL_CSUM;
}
+ if (flags & BPF_F_ZERO_CSUM_TX)
+ info->key.tun_flags &= ~TUNNEL_CSUM;
+
return 0;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 76379f01bcd2..1510ce9a4e4e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -56,6 +56,7 @@
#include <net/ip6_tunnel.h>
#include <net/gre.h>
#include <net/erspan.h>
+#include <net/dst_metadata.h>
static bool log_ecn_error = true;
@@ -69,6 +70,7 @@ static unsigned int ip6gre_net_id __read_mostly;
struct ip6gre_net {
struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
+ struct ip6_tnl __rcu *collect_md_tun;
struct net_device *fb_tunnel_dev;
};
@@ -229,6 +231,10 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (cand)
return cand;
+ t = rcu_dereference(ign->collect_md_tun);
+ if (t && t->dev->flags & IFF_UP)
+ return t;
+
dev = ign->fb_tunnel_dev;
if (dev->flags & IFF_UP)
return netdev_priv(dev);
@@ -264,6 +270,9 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
{
struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, t);
+
rcu_assign_pointer(t->next, rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
}
@@ -273,6 +282,9 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp;
struct ip6_tnl *iter;
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, NULL);
+
for (tp = ip6gre_bucket(ign, t);
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
@@ -463,7 +475,22 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
tpi->proto);
if (tunnel) {
- ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ if (tunnel->parms.collect_md) {
+ struct metadata_dst *tun_dst;
+ __be64 tun_id;
+ __be16 flags;
+
+ flags = tpi->flags;
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+ } else {
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ }
return PACKET_RCVD;
}
@@ -633,8 +660,38 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
- protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+
+ if (tunnel->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ __be16 flags;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info ||
+ !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
+ return -EINVAL;
+
+ key = &tun_info->key;
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = IPPROTO_GRE;
+ fl6->daddr = key->u.ipv6.dst;
+ fl6->flowlabel = key->label;
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ dsfield = key->tos;
+ flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ tunnel->tun_hlen = gre_calc_hlen(flags);
+
+ gre_build_header(skb, tunnel->tun_hlen,
+ flags, protocol,
+ tunnel_id_to_key32(tun_info->key.tun_id), 0);
+
+ } else {
+ gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ protocol, tunnel->parms.o_key,
+ htonl(tunnel->o_seqno));
+ }
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
@@ -645,13 +702,15 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
int encap_limit = -1;
struct flowi6 fl6;
- __u8 dsfield;
+ __u8 dsfield = 0;
__u32 mtu;
int err;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, &dsfield, &encap_limit);
+ if (!t->parms.collect_md)
+ prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+ &dsfield, &encap_limit);
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
@@ -676,14 +735,15 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
int encap_limit = -1;
struct flowi6 fl6;
- __u8 dsfield;
+ __u8 dsfield = 0;
__u32 mtu;
int err;
if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
return -1;
- if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
+ if (!t->parms.collect_md &&
+ prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
return -1;
if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
@@ -731,7 +791,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ if (!t->parms.collect_md)
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
@@ -1201,6 +1262,11 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
+ if (tunnel->parms.collect_md) {
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ netif_keep_dst(dev);
+ }
+
return 0;
}
@@ -1215,6 +1281,9 @@ static int ip6gre_tunnel_init(struct net_device *dev)
tunnel = netdev_priv(dev);
+ if (tunnel->parms.collect_md)
+ return 0;
+
memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
@@ -1464,6 +1533,9 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_ERSPAN_INDEX])
parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+ if (data[IFLA_GRE_COLLECT_METADATA])
+ parms->collect_md = true;
}
static int ip6gre_tap_init(struct net_device *dev)
@@ -1622,8 +1694,13 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
ip6gre_netlink_parms(data, &nt->parms);
- if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
- return -EEXIST;
+ if (nt->parms.collect_md) {
+ if (rtnl_dereference(ign->collect_md_tun))
+ return -EEXIST;
+ } else {
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+ }
if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev);
@@ -1742,6 +1819,8 @@ static size_t ip6gre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_GRE_COLLECT_METADATA */
+ nla_total_size(0) +
/* IFLA_GRE_FWMARK */
nla_total_size(4) +
/* IFLA_GRE_ERSPAN_INDEX */
@@ -1781,6 +1860,11 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
t->encap.flags))
goto nla_put_failure;
+ if (p->collect_md) {
+ if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
@@ -1803,6 +1887,7 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GRE_FWMARK] = { .type = NLA_U32 },
[IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
};
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3d3092adf1d2..6a3b1a54a952 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -861,7 +861,7 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
struct metadata_dst *tun_dst,
bool log_ecn_err)
{
- return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
+ return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
log_ecn_err);
}
EXPORT_SYMBOL(ip6_tnl_rcv);
@@ -979,6 +979,9 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
int ret = 0;
struct net *net = t->net;
+ if (t->parms.collect_md)
+ return 1;
+
if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
(ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index 370b749f5ee6..15a469220e19 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -81,6 +81,49 @@ int _gre_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("ip6gretap_set_tunnel")
+int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = _htonl(0x11); /* ::11 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+ key.tunnel_label = 0xabcde;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6gretap_get_tunnel")
+int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip6 ::%x label %x\n";
+ struct bpf_tunnel_key key;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+ return TC_ACT_OK;
+}
+
SEC("erspan_set_tunnel")
int _erspan_set_tunnel(struct __sk_buff *skb)
{
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index 312e1722a39f..226f45381b76 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -33,6 +33,30 @@ function add_gre_tunnel {
ip addr add dev $DEV 10.1.1.200/24
}
+function add_ip6gretap_tunnel {
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # in namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \
+ local ::11 remote ::22
+
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # out of namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip addr add dev $DEV fc80::200/24
+ ip link set dev $DEV up
+}
+
function add_erspan_tunnel {
# in namespace
ip netns exec at_ns0 \
@@ -113,6 +137,41 @@ function test_gre {
cleanup
}
+function test_ip6gre {
+ TYPE=ip6gre
+ DEV_NS=ip6gre00
+ DEV=ip6gre11
+ config_device
+ # reuse the ip6gretap function
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 -c 4 ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping -c 1 10.1.1.200
+ ping -c 1 10.1.1.100
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 -c 1 fc80::200
+ cleanup
+}
+
+function test_ip6gretap {
+ TYPE=ip6gretap
+ DEV_NS=ip6gretap00
+ DEV=ip6gretap11
+ config_device
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 -c 4 ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping -i .2 -c 1 10.1.1.200
+ ping -c 1 10.1.1.100
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 -c 1 fc80::200
+ cleanup
+}
+
function test_erspan {
TYPE=erspan
DEV_NS=erspan00
@@ -175,6 +234,8 @@ function cleanup {
ip link del veth1
ip link del ipip11
ip link del gretap11
+ ip link del ip6gre11
+ ip link del ip6gretap11
ip link del vxlan11
ip link del geneve11
ip link del erspan11
@@ -187,6 +248,10 @@ trap cleanup 0 2 3 6 9
cleanup
echo "Testing GRE tunnel..."
test_gre
+echo "Testing IP6GRE tunnel..."
+test_ip6gre
+echo "Testing IP6GRETAP tunnel..."
+test_ip6gretap
echo "Testing ERSPAN tunnel..."
test_erspan
echo "Testing VXLAN tunnel..."