diff options
author | Jason Wang <jasowang@redhat.com> | 2017-08-11 14:41:18 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-08-14 05:56:07 +0300 |
commit | 761876c857cb2ef8489fbee01907151da902af91 (patch) | |
tree | 430b242ed12977a98f3fa3282712275039adb4a7 | |
parent | 7c4974786f4794178f04e96318fc3b2f2850cbc6 (diff) | |
download | linux-761876c857cb2ef8489fbee01907151da902af91.tar.xz |
tap: XDP support
This patch tries to implement XDP for tun. The implementation was
split into two parts:
- fast path: small and no gso packet. We try to do XDP at page level
before build_skb(). For XDP_TX, since creating/destroying queues
were completely under control of userspace, it was implemented
through generic XDP helper after skb has been built. This could be
optimized in the future.
- slow path: big or gso packet. We try to do it after skb was created
through generic XDP helpers.
Test were done through pktgen with small packets.
xdp1 test shows ~41.1% improvement:
Before: ~1.7Mpps
After: ~2.3Mpps
xdp_redirect to ixgbe shows ~60% improvement:
Before: ~0.8Mpps
After: ~1.38Mpps
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/tun.c | 149 |
1 files changed, 142 insertions, 7 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9736df40d2bf..5892284eb8d0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -73,6 +73,8 @@ #include <linux/seq_file.h> #include <linux/uio.h> #include <linux/skb_array.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/uaccess.h> @@ -105,7 +107,8 @@ do { \ } while (0) #endif -#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD) +#define TUN_HEADROOM 256 +#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD + TUN_HEADROOM) /* TUN device flags */ @@ -224,6 +227,7 @@ struct tun_struct { u32 flow_count; u32 rx_batched; struct tun_pcpu_stats __percpu *pcpu_stats; + struct bpf_prog __rcu *xdp_prog; }; #ifdef CONFIG_TUN_VNET_CROSS_LE @@ -590,6 +594,7 @@ static void tun_detach(struct tun_file *tfile, bool clean) static void tun_detach_all(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); + struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog); struct tun_file *tfile, *tmp; int i, n = tun->numqueues; @@ -622,6 +627,9 @@ static void tun_detach_all(struct net_device *dev) } BUG_ON(tun->numdisabled != 0); + if (xdp_prog) + bpf_prog_put(xdp_prog); + if (tun->flags & IFF_PERSIST) module_put(THIS_MODULE); } @@ -1008,6 +1016,46 @@ tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_dropped = tx_dropped; } +static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct tun_struct *tun = netdev_priv(dev); + struct bpf_prog *old_prog; + + old_prog = rtnl_dereference(tun->xdp_prog); + rcu_assign_pointer(tun->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static u32 tun_xdp_query(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + const struct bpf_prog *xdp_prog; + + xdp_prog = rtnl_dereference(tun->xdp_prog); + if (xdp_prog) + return xdp_prog->aux->id; + + return 0; +} + +static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return tun_xdp_set(dev, xdp->prog, xdp->extack); + case XDP_QUERY_PROG: + xdp->prog_id = tun_xdp_query(dev); + xdp->prog_attached = !!xdp->prog_id; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops tun_netdev_ops = { .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, @@ -1038,6 +1086,7 @@ static const struct net_device_ops tap_netdev_ops = { .ndo_features_check = passthru_features_check, .ndo_set_rx_headroom = tun_set_headroom, .ndo_get_stats64 = tun_net_get_stats64, + .ndo_xdp = tun_xdp, }; static void tun_flow_init(struct tun_struct *tun) @@ -1217,16 +1266,22 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile, return true; } -static struct sk_buff *tun_build_skb(struct tun_file *tfile, +static struct sk_buff *tun_build_skb(struct tun_struct *tun, + struct tun_file *tfile, struct iov_iter *from, - int len) + struct virtio_net_hdr *hdr, + int len, int *generic_xdp) { struct page_frag *alloc_frag = &tfile->alloc_frag; struct sk_buff *skb; + struct bpf_prog *xdp_prog; int buflen = SKB_DATA_ALIGN(len + TUN_RX_PAD) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + unsigned int delta = 0; char *buf; size_t copied; + bool xdp_xmit = false; + int err; if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL))) return ERR_PTR(-ENOMEM); @@ -1238,16 +1293,77 @@ static struct sk_buff *tun_build_skb(struct tun_file *tfile, if (copied != len) return ERR_PTR(-EFAULT); + if (hdr->gso_type) + *generic_xdp = 1; + else + *generic_xdp = 0; + + rcu_read_lock(); + xdp_prog = rcu_dereference(tun->xdp_prog); + if (xdp_prog && !*generic_xdp) { + struct xdp_buff xdp; + void *orig_data; + u32 act; + + xdp.data_hard_start = buf; + xdp.data = buf + TUN_RX_PAD; + xdp.data_end = xdp.data + len; + orig_data = xdp.data; + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + switch (act) { + case XDP_REDIRECT: + get_page(alloc_frag->page); + alloc_frag->offset += buflen; + err = xdp_do_redirect(tun->dev, &xdp, xdp_prog); + if (err) + goto err_redirect; + return NULL; + case XDP_TX: + xdp_xmit = true; + /* fall through */ + case XDP_PASS: + delta = orig_data - xdp.data; + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(tun->dev, xdp_prog, act); + /* fall through */ + case XDP_DROP: + goto err_xdp; + } + } + skb = build_skb(buf, buflen); - if (!skb) + if (!skb) { + rcu_read_unlock(); return ERR_PTR(-ENOMEM); + } - skb_reserve(skb, TUN_RX_PAD); - skb_put(skb, len); + skb_reserve(skb, TUN_RX_PAD - delta); + skb_put(skb, len + delta); get_page(alloc_frag->page); alloc_frag->offset += buflen; + if (xdp_xmit) { + skb->dev = tun->dev; + generic_xdp_tx(skb, xdp_prog); + rcu_read_lock(); + return NULL; + } + + rcu_read_unlock(); + return skb; + +err_redirect: + put_page(alloc_frag->page); +err_xdp: + rcu_read_unlock(); + this_cpu_inc(tun->pcpu_stats->rx_dropped); + return NULL; } /* Get packet from user space buffer */ @@ -1266,6 +1382,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, bool zerocopy = false; int err; u32 rxhash; + int generic_xdp = 1; if (!(tun->dev->flags & IFF_UP)) return -EIO; @@ -1324,11 +1441,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) { - skb = tun_build_skb(tfile, from, len); + skb = tun_build_skb(tun, tfile, from, &gso, len, &generic_xdp); if (IS_ERR(skb)) { this_cpu_inc(tun->pcpu_stats->rx_dropped); return PTR_ERR(skb); } + if (!skb) + return total_len; } else { if (!zerocopy) { copylen = len; @@ -1402,6 +1521,22 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); + if (generic_xdp) { + struct bpf_prog *xdp_prog; + int ret; + + rcu_read_lock(); + xdp_prog = rcu_dereference(tun->xdp_prog); + if (xdp_prog) { + ret = do_xdp_generic(xdp_prog, skb); + if (ret != XDP_PASS) { + rcu_read_unlock(); + return total_len; + } + } + rcu_read_unlock(); + } + rxhash = __skb_get_hash_symmetric(skb); #ifndef CONFIG_4KSTACKS tun_rx_batched(tun, tfile, skb, more); |