diff options
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 160 |
1 files changed, 144 insertions, 16 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 987aeefbc774..3a16d4fdaa05 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -22,7 +22,7 @@ * Add TUNSETLINK ioctl to set the link encapsulation * * Mark Smith <markzzzsmith@yahoo.com.au> - * Use random_ether_addr() for tap MAC address. + * Use eth_random_addr() for tap MAC address. * * Harald Roelle <harald.roelle@ifi.lmu.de> 2004/04/20 * Fixes in packet dropping, queue length setting and queue wakeup. @@ -100,6 +100,8 @@ do { \ } while (0) #endif +#define GOODCOPY_LEN 128 + #define FLT_EXACT_COUNT 8 struct tap_filter { unsigned int count; /* Number of addrs. Zero means disabled */ @@ -185,7 +187,6 @@ static void __tun_detach(struct tun_struct *tun) netif_tx_lock_bh(tun->dev); netif_carrier_off(tun->dev); tun->tfile = NULL; - tun->socket.file = NULL; netif_tx_unlock_bh(tun->dev); /* Drop read queue */ @@ -358,6 +359,8 @@ static void tun_free_netdev(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); + BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags)); + sk_release_kernel(tun->socket.sk); } @@ -414,6 +417,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Orphan the skb - required as we might hang on to it * for indefinite time. */ + if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) + goto drop; skb_orphan(skb); /* Enqueue packet */ @@ -600,19 +605,100 @@ static struct sk_buff *tun_alloc_skb(struct tun_struct *tun, return skb; } +/* set skb frags from iovec, this can move to core network code for reuse */ +static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, + int offset, size_t count) +{ + int len = iov_length(from, count) - offset; + int copy = skb_headlen(skb); + int size, offset1 = 0; + int i = 0; + + /* Skip over from offset */ + while (count && (offset >= from->iov_len)) { + offset -= from->iov_len; + ++from; + --count; + } + + /* copy up to skb headlen */ + while (count && (copy > 0)) { + size = min_t(unsigned int, copy, from->iov_len - offset); + if (copy_from_user(skb->data + offset1, from->iov_base + offset, + size)) + return -EFAULT; + if (copy > size) { + ++from; + --count; + offset = 0; + } else + offset += size; + copy -= size; + offset1 += size; + } + + if (len == offset1) + return 0; + + while (count--) { + struct page *page[MAX_SKB_FRAGS]; + int num_pages; + unsigned long base; + unsigned long truesize; + + len = from->iov_len - offset; + if (!len) { + offset = 0; + ++from; + continue; + } + base = (unsigned long)from->iov_base + offset; + size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; + if (i + size > MAX_SKB_FRAGS) + return -EMSGSIZE; + num_pages = get_user_pages_fast(base, size, 0, &page[i]); + if (num_pages != size) { + for (i = 0; i < num_pages; i++) + put_page(page[i]); + return -EFAULT; + } + truesize = size * PAGE_SIZE; + skb->data_len += len; + skb->len += len; + skb->truesize += truesize; + atomic_add(truesize, &skb->sk->sk_wmem_alloc); + while (len) { + int off = base & ~PAGE_MASK; + int size = min_t(int, len, PAGE_SIZE - off); + __skb_fill_page_desc(skb, i, page[i], off, size); + skb_shinfo(skb)->nr_frags++; + /* increase sk_wmem_alloc */ + base += size; + len -= size; + i++; + } + offset = 0; + ++from; + } + return 0; +} + /* Get packet from user space buffer */ -static ssize_t tun_get_user(struct tun_struct *tun, - const struct iovec *iv, size_t count, - int noblock) +static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control, + const struct iovec *iv, size_t total_len, + size_t count, int noblock) { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; - size_t len = count, align = NET_SKB_PAD; + size_t len = total_len, align = NET_SKB_PAD; struct virtio_net_hdr gso = { 0 }; int offset = 0; + int copylen; + bool zerocopy = false; + int err; if (!(tun->flags & TUN_NO_PI)) { - if ((len -= sizeof(pi)) > count) + if ((len -= sizeof(pi)) > total_len) return -EINVAL; if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi))) @@ -621,7 +707,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, } if (tun->flags & TUN_VNET_HDR) { - if ((len -= tun->vnet_hdr_sz) > count) + if ((len -= tun->vnet_hdr_sz) > total_len) return -EINVAL; if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso))) @@ -643,14 +729,46 @@ static ssize_t tun_get_user(struct tun_struct *tun, return -EINVAL; } - skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock); + if (msg_control) + zerocopy = true; + + if (zerocopy) { + /* Userspace may produce vectors with count greater than + * MAX_SKB_FRAGS, so we need to linearize parts of the skb + * to let the rest of data to be fit in the frags. + */ + if (count > MAX_SKB_FRAGS) { + copylen = iov_length(iv, count - MAX_SKB_FRAGS); + if (copylen < offset) + copylen = 0; + else + copylen -= offset; + } else + copylen = 0; + /* There are 256 bytes to be copied in skb, so there is enough + * room for skb expand head in case it is used. + * The rest of the buffer is mapped from userspace. + */ + if (copylen < gso.hdr_len) + copylen = gso.hdr_len; + if (!copylen) + copylen = GOODCOPY_LEN; + } else + copylen = len; + + skb = tun_alloc_skb(tun, align, copylen, gso.hdr_len, noblock); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) tun->dev->stats.rx_dropped++; return PTR_ERR(skb); } - if (skb_copy_datagram_from_iovec(skb, 0, iv, offset, len)) { + if (zerocopy) + err = zerocopy_sg_from_iovec(skb, iv, offset, count); + else + err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len); + + if (err) { tun->dev->stats.rx_dropped++; kfree_skb(skb); return -EFAULT; @@ -724,12 +842,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, skb_shinfo(skb)->gso_segs = 0; } + /* copy skb_ubuf_info for callback when skb has no error */ + if (zerocopy) { + skb_shinfo(skb)->destructor_arg = msg_control; + skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + } + netif_rx_ni(skb); tun->dev->stats.rx_packets++; tun->dev->stats.rx_bytes += len; - return count; + return total_len; } static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, @@ -744,7 +868,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count); - result = tun_get_user(tun, iv, iov_length(iv, count), + result = tun_get_user(tun, NULL, iv, iov_length(iv, count), count, file->f_flags & O_NONBLOCK); tun_put(tun); @@ -958,8 +1082,8 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len) { struct tun_struct *tun = container_of(sock, struct tun_struct, socket); - return tun_get_user(tun, m->msg_iov, total_len, - m->msg_flags & MSG_DONTWAIT); + return tun_get_user(tun, m->msg_control, m->msg_iov, total_len, + m->msg_iovlen, m->msg_flags & MSG_DONTWAIT); } static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, @@ -1115,6 +1239,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->flags = flags; tun->txflt.count = 0; tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); + set_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags); err = -ENOMEM; sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto); @@ -1128,6 +1253,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) sock_init_data(&tun->socket, sk); sk->sk_write_space = tun_sock_write_space; sk->sk_sndbuf = INT_MAX; + sock_set_flag(sk, SOCK_ZEROCOPY); tun_sk(sk)->tun = tun; @@ -1252,10 +1378,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int vnet_hdr_sz; int ret; - if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) + if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; - + } else { + memset(&ifr, 0, sizeof(ifr)); + } if (cmd == TUNGETFEATURES) { /* Currently this just means: "what IFF flags are valid?". * This is needed because we never checked for invalid flags on |