summaryrefslogtreecommitdiff
path: root/drivers/net/tun.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r--drivers/net/tun.c280
1 files changed, 250 insertions, 30 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 3d4c24572ecd..3c9985f29950 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -73,6 +73,8 @@
#include <linux/seq_file.h>
#include <linux/uio.h>
#include <linux/skb_array.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
#include <linux/uaccess.h>
@@ -105,6 +107,9 @@ do { \
} while (0)
#endif
+#define TUN_HEADROOM 256
+#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
+
/* TUN device flags */
/* IFF_ATTACH_QUEUE is never stored in device flags,
@@ -199,7 +204,7 @@ struct tun_struct {
struct net_device *dev;
netdev_features_t set_features;
#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
- NETIF_F_TSO6|NETIF_F_UFO)
+ NETIF_F_TSO6)
int align;
int vnet_hdr_sz;
@@ -221,6 +226,7 @@ struct tun_struct {
u32 flow_count;
u32 rx_batched;
struct tun_pcpu_stats __percpu *pcpu_stats;
+ struct bpf_prog __rcu *xdp_prog;
};
#ifdef CONFIG_TUN_VNET_CROSS_LE
@@ -585,6 +591,7 @@ static void tun_detach(struct tun_file *tfile, bool clean)
static void tun_detach_all(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
+ struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog);
struct tun_file *tfile, *tmp;
int i, n = tun->numqueues;
@@ -617,6 +624,9 @@ static void tun_detach_all(struct net_device *dev)
}
BUG_ON(tun->numdisabled != 0);
+ if (xdp_prog)
+ bpf_prog_put(xdp_prog);
+
if (tun->flags & IFF_PERSIST)
module_put(THIS_MODULE);
}
@@ -892,7 +902,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
sk_filter(tfile->socket.sk, skb))
goto drop;
- if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
+ if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
goto drop;
skb_tx_timestamp(skb);
@@ -1003,6 +1013,46 @@ tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
stats->tx_dropped = tx_dropped;
}
+static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+ struct bpf_prog *old_prog;
+
+ old_prog = rtnl_dereference(tun->xdp_prog);
+ rcu_assign_pointer(tun->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ return 0;
+}
+
+static u32 tun_xdp_query(struct net_device *dev)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+ const struct bpf_prog *xdp_prog;
+
+ xdp_prog = rtnl_dereference(tun->xdp_prog);
+ if (xdp_prog)
+ return xdp_prog->aux->id;
+
+ return 0;
+}
+
+static int tun_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return tun_xdp_set(dev, xdp->prog, xdp->extack);
+ case XDP_QUERY_PROG:
+ xdp->prog_id = tun_xdp_query(dev);
+ xdp->prog_attached = !!xdp->prog_id;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static const struct net_device_ops tun_netdev_ops = {
.ndo_uninit = tun_net_uninit,
.ndo_open = tun_net_open,
@@ -1033,6 +1083,7 @@ static const struct net_device_ops tap_netdev_ops = {
.ndo_features_check = passthru_features_check,
.ndo_set_rx_headroom = tun_set_headroom,
.ndo_get_stats64 = tun_net_get_stats64,
+ .ndo_xdp = tun_xdp,
};
static void tun_flow_init(struct tun_struct *tun)
@@ -1190,6 +1241,138 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
}
}
+static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
+ int len, int noblock, bool zerocopy)
+{
+ if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
+ return false;
+
+ if (tfile->socket.sk->sk_sndbuf != INT_MAX)
+ return false;
+
+ if (!noblock)
+ return false;
+
+ if (zerocopy)
+ return false;
+
+ if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
+ return false;
+
+ return true;
+}
+
+static struct sk_buff *tun_build_skb(struct tun_struct *tun,
+ struct tun_file *tfile,
+ struct iov_iter *from,
+ struct virtio_net_hdr *hdr,
+ int len, int *skb_xdp)
+{
+ struct page_frag *alloc_frag = &current->task_frag;
+ struct sk_buff *skb;
+ struct bpf_prog *xdp_prog;
+ int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ unsigned int delta = 0;
+ char *buf;
+ size_t copied;
+ bool xdp_xmit = false;
+ int err, pad = TUN_RX_PAD;
+
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(tun->xdp_prog);
+ if (xdp_prog)
+ pad += TUN_HEADROOM;
+ buflen += SKB_DATA_ALIGN(len + pad);
+ rcu_read_unlock();
+
+ if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
+ return ERR_PTR(-ENOMEM);
+
+ buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+ copied = copy_page_from_iter(alloc_frag->page,
+ alloc_frag->offset + pad,
+ len, from);
+ if (copied != len)
+ return ERR_PTR(-EFAULT);
+
+ /* There's a small window that XDP may be set after the check
+ * of xdp_prog above, this should be rare and for simplicity
+ * we do XDP on skb in case the headroom is not enough.
+ */
+ if (hdr->gso_type || !xdp_prog)
+ *skb_xdp = 1;
+ else
+ *skb_xdp = 0;
+
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(tun->xdp_prog);
+ if (xdp_prog && !*skb_xdp) {
+ struct xdp_buff xdp;
+ void *orig_data;
+ u32 act;
+
+ xdp.data_hard_start = buf;
+ xdp.data = buf + pad;
+ xdp.data_end = xdp.data + len;
+ orig_data = xdp.data;
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ switch (act) {
+ case XDP_REDIRECT:
+ get_page(alloc_frag->page);
+ alloc_frag->offset += buflen;
+ err = xdp_do_redirect(tun->dev, &xdp, xdp_prog);
+ if (err)
+ goto err_redirect;
+ return NULL;
+ case XDP_TX:
+ xdp_xmit = true;
+ /* fall through */
+ case XDP_PASS:
+ delta = orig_data - xdp.data;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ /* fall through */
+ case XDP_ABORTED:
+ trace_xdp_exception(tun->dev, xdp_prog, act);
+ /* fall through */
+ case XDP_DROP:
+ goto err_xdp;
+ }
+ }
+
+ skb = build_skb(buf, buflen);
+ if (!skb) {
+ rcu_read_unlock();
+ return ERR_PTR(-ENOMEM);
+ }
+
+ skb_reserve(skb, pad - delta);
+ skb_put(skb, len + delta);
+ get_page(alloc_frag->page);
+ alloc_frag->offset += buflen;
+
+ if (xdp_xmit) {
+ skb->dev = tun->dev;
+ generic_xdp_tx(skb, xdp_prog);
+ rcu_read_lock();
+ return NULL;
+ }
+
+ rcu_read_unlock();
+
+ return skb;
+
+err_redirect:
+ put_page(alloc_frag->page);
+err_xdp:
+ rcu_read_unlock();
+ this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ return NULL;
+}
+
/* Get packet from user space buffer */
static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
void *msg_control, struct iov_iter *from,
@@ -1206,6 +1389,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
bool zerocopy = false;
int err;
u32 rxhash;
+ int skb_xdp = 1;
if (!(tun->dev->flags & IFF_UP))
return -EIO;
@@ -1263,30 +1447,44 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
zerocopy = true;
}
- if (!zerocopy) {
- copylen = len;
- if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
- linear = good_linear;
- else
- linear = tun16_to_cpu(tun, gso.hdr_len);
- }
-
- skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
- if (IS_ERR(skb)) {
- if (PTR_ERR(skb) != -EAGAIN)
+ if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
+ /* For the packet that is not easy to be processed
+ * (e.g gso or jumbo packet), we will do it at after
+ * skb was created with generic XDP routine.
+ */
+ skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp);
+ if (IS_ERR(skb)) {
this_cpu_inc(tun->pcpu_stats->rx_dropped);
- return PTR_ERR(skb);
- }
+ return PTR_ERR(skb);
+ }
+ if (!skb)
+ return total_len;
+ } else {
+ if (!zerocopy) {
+ copylen = len;
+ if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
+ linear = good_linear;
+ else
+ linear = tun16_to_cpu(tun, gso.hdr_len);
+ }
- if (zerocopy)
- err = zerocopy_sg_from_iter(skb, from);
- else
- err = skb_copy_datagram_from_iter(skb, 0, from, len);
+ skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
+ if (IS_ERR(skb)) {
+ if (PTR_ERR(skb) != -EAGAIN)
+ this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ return PTR_ERR(skb);
+ }
- if (err) {
- this_cpu_inc(tun->pcpu_stats->rx_dropped);
- kfree_skb(skb);
- return -EFAULT;
+ if (zerocopy)
+ err = zerocopy_sg_from_iter(skb, from);
+ else
+ err = skb_copy_datagram_from_iter(skb, 0, from, len);
+
+ if (err) {
+ this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ kfree_skb(skb);
+ return -EFAULT;
+ }
}
if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
@@ -1334,6 +1532,22 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
skb_reset_network_header(skb);
skb_probe_transport_header(skb, 0);
+ if (skb_xdp) {
+ struct bpf_prog *xdp_prog;
+ int ret;
+
+ rcu_read_lock();
+ xdp_prog = rcu_dereference(tun->xdp_prog);
+ if (xdp_prog) {
+ ret = do_xdp_generic(xdp_prog, skb);
+ if (ret != XDP_PASS) {
+ rcu_read_unlock();
+ return total_len;
+ }
+ }
+ rcu_read_unlock();
+ }
+
rxhash = __skb_get_hash_symmetric(skb);
#ifndef CONFIG_4KSTACKS
tun_rx_batched(tun, tfile, skb, more);
@@ -1879,6 +2093,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
err_detach:
tun_detach_all(dev);
+ /* register_netdevice() already called tun_free_netdev() */
+ goto err_free_dev;
+
err_free_flow:
tun_flow_uninit(tun);
security_tun_dev_free_security(tun->security);
@@ -1921,11 +2138,6 @@ static int set_offload(struct tun_struct *tun, unsigned long arg)
features |= NETIF_F_TSO6;
arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
}
-
- if (arg & TUN_F_UFO) {
- features |= NETIF_F_UFO;
- arg &= ~TUN_F_UFO;
- }
}
/* This gives the user a way to test for new features in future by
@@ -2537,7 +2749,7 @@ static int tun_queue_resize(struct tun_struct *tun)
int n = tun->numqueues + tun->numdisabled;
int ret, i;
- arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
+ arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL);
if (!arrays)
return -ENOMEM;
@@ -2598,8 +2810,16 @@ static int __init tun_init(void)
goto err_misc;
}
- register_netdevice_notifier(&tun_notifier_block);
+ ret = register_netdevice_notifier(&tun_notifier_block);
+ if (ret) {
+ pr_err("Can't register netdevice notifier\n");
+ goto err_notifier;
+ }
+
return 0;
+
+err_notifier:
+ misc_deregister(&tun_miscdev);
err_misc:
rtnl_link_unregister(&tun_link_ops);
err_linkops: