diff options
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 116 |
1 files changed, 90 insertions, 26 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 782e38bfc1ee..44c4db8450f0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -69,6 +69,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/sock.h> +#include <linux/seq_file.h> #include <asm/uaccess.h> @@ -110,7 +111,7 @@ struct tap_filter { unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; -/* DEFAULT_MAX_NUM_RSS_QUEUES were choosed to let the rx/tx queues allocated for +/* DEFAULT_MAX_NUM_RSS_QUEUES were chosen to let the rx/tx queues allocated for * the netdevice to be fit in one page. So we can make sure the success of * memory allocation. TODO: increase the limit. */ #define MAX_TAP_QUEUES DEFAULT_MAX_NUM_RSS_QUEUES @@ -119,7 +120,7 @@ struct tap_filter { #define TUN_FLOW_EXPIRE (3 * HZ) /* A tun_file connects an open character device to a tuntap netdevice. It - * also contains all socket related strctures (except sock_fprog and tap_filter) + * also contains all socket related structures (except sock_fprog and tap_filter) * to serve as one transmit queue for tuntap device. The sock_fprog and * tap_filter were kept in tun_struct since they were used for filtering for the * netdevice not for a specific queue (at least I didn't see the requirement for @@ -152,6 +153,7 @@ struct tun_flow_entry { struct tun_struct *tun; u32 rxhash; + u32 rps_rxhash; int queue_index; unsigned long updated; }; @@ -220,6 +222,7 @@ static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun, rxhash, queue_index); e->updated = jiffies; e->rxhash = rxhash; + e->rps_rxhash = 0; e->queue_index = queue_index; e->tun = tun; hlist_add_head_rcu(&e->hash_link, head); @@ -232,6 +235,7 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e) { tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n", e->rxhash, e->queue_index); + sock_rps_reset_flow_hash(e->rps_rxhash); hlist_del_rcu(&e->hash_link); kfree_rcu(e, rcu); --tun->flow_count; @@ -325,6 +329,7 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash, /* TODO: keep queueing to old queue until it's empty? */ e->queue_index = queue_index; e->updated = jiffies; + sock_rps_record_flow_hash(e->rps_rxhash); } else { spin_lock_bh(&tun->lock); if (!tun_flow_find(head, rxhash) && @@ -341,14 +346,27 @@ unlock: rcu_read_unlock(); } +/** + * Save the hash received in the stack receive path and update the + * flow_hash table accordingly. + */ +static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash) +{ + if (unlikely(e->rps_rxhash != hash)) { + sock_rps_reset_flow_hash(e->rps_rxhash); + e->rps_rxhash = hash; + } +} + /* We try to identify a flow through its rxhash first. The reason that - * we do not check rxq no. is becuase some cards(e.g 82599), chooses + * we do not check rxq no. is because some cards(e.g 82599), chooses * the rxq based on the txq where the last packet of the flow comes. As * the userspace application move between processors, we may get a * different rxq no. here. If we could not get rxhash, then we would * hope the rxq no. may help here. */ -static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) +static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv) { struct tun_struct *tun = netdev_priv(dev); struct tun_flow_entry *e; @@ -358,12 +376,13 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) rcu_read_lock(); numqueues = ACCESS_ONCE(tun->numqueues); - txq = skb_get_rxhash(skb); + txq = skb_get_hash(skb); if (txq) { e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq); - if (e) + if (e) { + tun_flow_save_rps_rxhash(e, txq); txq = e->queue_index; - else + } else /* use multiply and shift instead of expensive divide */ txq = ((u64)txq * numqueues) >> 32; } else if (likely(skb_rx_queue_recorded(skb))) { @@ -531,7 +550,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte err = 0; - /* Re-attach the filter to presist device */ + /* Re-attach the filter to persist device */ if (!skip_filter && (tun->filter_attached == true)) { err = sk_attach_filter(&tun->fprog, tfile->socket.sk); if (!err) @@ -720,14 +739,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); int txq = skb->queue_mapping; struct tun_file *tfile; + u32 numqueues = 0; rcu_read_lock(); tfile = rcu_dereference(tun->tfiles[txq]); + numqueues = ACCESS_ONCE(tun->numqueues); /* Drop packet if interface is not attached */ - if (txq >= tun->numqueues) + if (txq >= numqueues) goto drop; + if (numqueues == 1) { + /* Select queue was not called for the skbuff, so we extract the + * RPS hash and save it into the flow_table here. + */ + __u32 rxhash; + + rxhash = skb_get_hash(skb); + if (rxhash) { + struct tun_flow_entry *e; + e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], + rxhash); + if (e) + tun_flow_save_rps_rxhash(e, rxhash); + } + } + tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len); BUG_ON(!tfile); @@ -745,8 +782,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Limit the number of packets queued by dividing txq length with the * number of queues. */ - if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) - >= dev->tx_queue_len / tun->numqueues) + if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) * numqueues + >= dev->tx_queue_len) goto drop; if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) @@ -819,9 +856,9 @@ static void tun_poll_controller(struct net_device *dev) * Tun only receives frames when: * 1) the char device endpoint gets data from user space * 2) the tun socket gets a sendmsg call from user space - * Since both of those are syncronous operations, we are guaranteed + * Since both of those are synchronous operations, we are guaranteed * never to have pending data when we poll for it - * so theres nothing to do here but return. + * so there is nothing to do here but return. * We need this though so netpoll recognizes us as an interface that * supports polling, which enables bridge devices in virt setups to * still use netconsole @@ -1146,7 +1183,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); - rxhash = skb_get_rxhash(skb); + rxhash = skb_get_hash(skb); netif_rx_ni(skb); tun->dev->stats.rx_packets++; @@ -1184,7 +1221,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, { struct tun_pi pi = { 0, skb->protocol }; ssize_t total = 0; - int vlan_offset = 0; + int vlan_offset = 0, copied; if (!(tun->flags & TUN_NO_PI)) { if ((len -= sizeof(pi)) < 0) @@ -1248,6 +1285,8 @@ static ssize_t tun_put_user(struct tun_struct *tun, total += tun->vnet_hdr_sz; } + copied = total; + total += skb->len; if (!vlan_tx_tag_present(skb)) { len = min_t(int, skb->len, len); } else { @@ -1262,24 +1301,24 @@ static ssize_t tun_put_user(struct tun_struct *tun, vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); len = min_t(int, skb->len + VLAN_HLEN, len); + total += VLAN_HLEN; copy = min_t(int, vlan_offset, len); - ret = skb_copy_datagram_const_iovec(skb, 0, iv, total, copy); + ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy); len -= copy; - total += copy; + copied += copy; if (ret || !len) goto done; copy = min_t(int, sizeof(veth), len); - ret = memcpy_toiovecend(iv, (void *)&veth, total, copy); + ret = memcpy_toiovecend(iv, (void *)&veth, copied, copy); len -= copy; - total += copy; + copied += copy; if (ret || !len) goto done; } - skb_copy_datagram_const_iovec(skb, vlan_offset, iv, total, len); - total += len; + skb_copy_datagram_const_iovec(skb, vlan_offset, iv, copied, len); done: tun->dev->stats.tx_packets++; @@ -1289,8 +1328,7 @@ done: } static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, - struct kiocb *iocb, const struct iovec *iv, - ssize_t len, int noblock) + const struct iovec *iv, ssize_t len, int noblock) { DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; @@ -1353,9 +1391,11 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, goto out; } - ret = tun_do_read(tun, tfile, iocb, iv, len, + ret = tun_do_read(tun, tfile, iv, len, file->f_flags & O_NONBLOCK); ret = min_t(ssize_t, ret, len); + if (ret > 0) + iocb->ki_pos = ret; out: tun_put(tun); return ret; @@ -1452,7 +1492,7 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, SOL_PACKET, TUN_TX_TIMESTAMP); goto out; } - ret = tun_do_read(tun, tfile, iocb, m->msg_iov, total_len, + ret = tun_do_read(tun, tfile, m->msg_iov, total_len, flags & MSG_DONTWAIT); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; @@ -2189,6 +2229,27 @@ static int tun_chr_close(struct inode *inode, struct file *file) return 0; } +#ifdef CONFIG_PROC_FS +static int tun_chr_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct tun_struct *tun; + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + rtnl_lock(); + tun = tun_get(f); + if (tun) + tun_get_iff(current->nsproxy->net_ns, tun, &ifr); + rtnl_unlock(); + + if (tun) + tun_put(tun); + + return seq_printf(m, "iff:\t%s\n", ifr.ifr_name); +} +#endif + static const struct file_operations tun_fops = { .owner = THIS_MODULE, .llseek = no_llseek, @@ -2203,7 +2264,10 @@ static const struct file_operations tun_fops = { #endif .open = tun_chr_open, .release = tun_chr_close, - .fasync = tun_chr_fasync + .fasync = tun_chr_fasync, +#ifdef CONFIG_PROC_FS + .show_fdinfo = tun_chr_show_fdinfo, +#endif }; static struct miscdevice tun_miscdev = { |