summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-08-29 01:04:52 +0300
committerDavid S. Miller <davem@davemloft.net>2017-08-29 01:04:52 +0300
commitd5a915b978c41396933dcd840b6f09f358ee2383 (patch)
tree09ccc663e7a4b7a7d707dd8bfea0632cb0c3b791
parent03157937fe0b5ef0431509106c096e50612fa70d (diff)
parentef88f89c830f4832e58c454dc8bd1d759373adc4 (diff)
downloadlinux-d5a915b978c41396933dcd840b6f09f358ee2383.tar.xz
Merge branch 'gre-add-collect_md-mode-for-ERSPAN-tunnel'
William Tu says: ==================== gre: add collect_md mode for ERSPAN tunnel This patch series provide collect_md mode for ERSPAN tunnel. The fist patch refactors the existing gre_fb_xmit function by exacting the route cache portion into a new function called prepare_fb_xmit. The second patch introduces the collect_md mode for ERSPAN tunnel, by calling the prepare_fb_xmit function and adding ERSPAN specific logic. The final patch adds the test case using bpf_skb_{set,get}_tunnel_{key,opt}. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip_tunnels.h4
-rw-r--r--net/ipv4/ip_gre.c157
-rw-r--r--samples/bpf/tcbpf2_kern.c63
-rwxr-xr-xsamples/bpf/test_tunnel_bpf.sh29
4 files changed, 232 insertions, 21 deletions
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 625c29329372..992652856fe8 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -154,8 +154,10 @@ struct ip_tunnel {
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
#define TUNNEL_NOCACHE __cpu_to_be16(0x2000)
+#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000)
-#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
+#define TUNNEL_OPTIONS_PRESENT \
+ (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
struct tnl_ptk_info {
__be16 flags;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f70674799fdd..0162fb955b33 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -113,6 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
+static void erspan_build_header(struct sk_buff *skb,
+ __be32 id, u32 index, bool truncate);
static unsigned int ipgre_net_id __read_mostly;
static unsigned int gre_tap_net_id __read_mostly;
@@ -287,7 +289,33 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
false, false) < 0)
goto drop;
- tunnel->index = ntohl(index);
+ if (tunnel->collect_md) {
+ struct ip_tunnel_info *info;
+ struct erspan_metadata *md;
+ __be64 tun_id;
+ __be16 flags;
+
+ tpi->flags |= TUNNEL_KEY;
+ flags = tpi->flags;
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = ip_tun_rx_dst(skb, flags,
+ tun_id, sizeof(*md));
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+ if (!md)
+ return PACKET_REJECT;
+
+ md->index = index;
+ info = &tun_dst->u.tun_info;
+ info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ info->options_len = sizeof(*md);
+ } else {
+ tunnel->index = ntohl(index);
+ }
+
skb_reset_mac_header(skb);
ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
return PACKET_RCVD;
@@ -432,39 +460,33 @@ static struct rtable *gre_get_rt(struct sk_buff *skb,
return ip_route_output_key(net, fl);
}
-static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
- __be16 proto)
+static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi4 *fl,
+ int tunnel_hlen)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct rtable *rt = NULL;
- struct flowi4 fl;
int min_headroom;
- int tunnel_hlen;
- __be16 df, flags;
bool use_cache;
int err;
tun_info = skb_tunnel_info(skb);
- if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
- ip_tunnel_info_af(tun_info) != AF_INET))
- goto err_free_skb;
-
key = &tun_info->key;
use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+
if (use_cache)
- rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
+ rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
if (!rt) {
- rt = gre_get_rt(skb, dev, &fl, key);
+ rt = gre_get_rt(skb, dev, fl, key);
if (IS_ERR(rt))
- goto err_free_skb;
+ goto err_free_skb;
if (use_cache)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
- fl.saddr);
+ fl->saddr);
}
- tunnel_hlen = gre_calc_hlen(key->tun_flags);
-
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ tunnel_hlen + sizeof(struct iphdr);
if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
@@ -476,6 +498,37 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
if (unlikely(err))
goto err_free_rt;
}
+ return rt;
+
+err_free_rt:
+ ip_rt_put(rt);
+err_free_skb:
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+ return NULL;
+}
+
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+ __be16 proto)
+{
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ struct rtable *rt = NULL;
+ struct flowi4 fl;
+ int tunnel_hlen;
+ __be16 df, flags;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET))
+ goto err_free_skb;
+
+ key = &tun_info->key;
+ tunnel_hlen = gre_calc_hlen(key->tun_flags);
+
+ rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+ if (!rt)
+ return;
/* Push Tunnel header. */
if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
@@ -498,6 +551,64 @@ err_free_skb:
dev->stats.tx_dropped++;
}
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+ __be16 proto)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ struct erspan_metadata *md;
+ struct rtable *rt = NULL;
+ bool truncate = false;
+ struct flowi4 fl;
+ int tunnel_hlen;
+ __be16 df;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET))
+ goto err_free_skb;
+
+ key = &tun_info->key;
+
+ /* ERSPAN has fixed 8 byte GRE header */
+ tunnel_hlen = 8 + sizeof(struct erspanhdr);
+
+ rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+ if (!rt)
+ return;
+
+ if (gre_handle_offloads(skb, false))
+ goto err_free_rt;
+
+ if (skb->len > dev->mtu) {
+ pskb_trim(skb, dev->mtu);
+ truncate = true;
+ }
+
+ md = ip_tunnel_info_opts(tun_info);
+ if (!md)
+ goto err_free_rt;
+
+ erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+ ntohl(md->index), truncate);
+
+ gre_build_header(skb, 8, TUNNEL_SEQ,
+ htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
+
+ df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+
+ iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+ key->tos, key->ttl, df, false);
+ return;
+
+err_free_rt:
+ ip_rt_put(rt);
+err_free_skb:
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+}
+
static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -611,6 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
struct ip_tunnel *tunnel = netdev_priv(dev);
bool truncate = false;
+ if (tunnel->collect_md) {
+ erspan_fb_xmit(skb, dev, skb->protocol);
+ return NETDEV_TX_OK;
+ }
+
if (gre_handle_offloads(skb, false))
goto free_skb;
@@ -973,9 +1089,12 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
return ret;
/* ERSPAN should only have GRE sequence and key flag */
- flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
- flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
- if (flags != (GRE_SEQ | GRE_KEY))
+ if (data[IFLA_GRE_OFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ if (data[IFLA_GRE_IFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ if (!data[IFLA_GRE_COLLECT_METADATA] &&
+ flags != (GRE_SEQ | GRE_KEY))
return -EINVAL;
/* ERSPAN Session ID only has 10-bit. Since we reuse
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
index 270edcc149a1..370b749f5ee6 100644
--- a/samples/bpf/tcbpf2_kern.c
+++ b/samples/bpf/tcbpf2_kern.c
@@ -17,6 +17,7 @@
#include <uapi/linux/pkt_cls.h>
#include <net/ipv6.h>
#include "bpf_helpers.h"
+#include "bpf_endian.h"
#define _htonl __builtin_bswap32
#define ERROR(ret) do {\
@@ -38,6 +39,10 @@ struct vxlan_metadata {
u32 gbp;
};
+struct erspan_metadata {
+ __be32 index;
+};
+
SEC("gre_set_tunnel")
int _gre_set_tunnel(struct __sk_buff *skb)
{
@@ -76,6 +81,63 @@ int _gre_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("erspan_set_tunnel")
+int _erspan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ md.index = htonl(123);
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("erspan_get_tunnel")
+int _erspan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip 0x%x erspan index 0x%x\n";
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ u32 index;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ index = bpf_ntohl(md.index);
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, index);
+
+ return TC_ACT_OK;
+}
+
SEC("vxlan_set_tunnel")
int _vxlan_set_tunnel(struct __sk_buff *skb)
{
@@ -378,5 +440,4 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK;
}
-
char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
index a70d2ea90313..410052d9fc37 100755
--- a/samples/bpf/test_tunnel_bpf.sh
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -32,6 +32,19 @@ function add_gre_tunnel {
ip addr add dev $DEV 10.1.1.200/24
}
+function add_erspan_tunnel {
+ # in namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 local 172.16.1.100 remote 172.16.1.200 erspan 123
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # out of namespace
+ ip link add dev $DEV type $TYPE external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
function add_vxlan_tunnel {
# Set static ARP entry here because iptables set-mark works
# on L3 packet, as a result not applying to ARP packets,
@@ -99,6 +112,18 @@ function test_gre {
cleanup
}
+function test_erspan {
+ TYPE=erspan
+ DEV_NS=erspan00
+ DEV=erspan11
+ config_device
+ add_erspan_tunnel
+ attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+ ping -c 1 10.1.1.100
+ ip netns exec at_ns0 ping -c 1 10.1.1.200
+ cleanup
+}
+
function test_vxlan {
TYPE=vxlan
DEV_NS=vxlan00
@@ -151,14 +176,18 @@ function cleanup {
ip link del gretap11
ip link del vxlan11
ip link del geneve11
+ ip link del erspan11
pkill tcpdump
pkill cat
set -ex
}
+trap cleanup 0 2 3 6 9
cleanup
echo "Testing GRE tunnel..."
test_gre
+echo "Testing ERSPAN tunnel..."
+test_erspan
echo "Testing VXLAN tunnel..."
test_vxlan
echo "Testing GENEVE tunnel..."