From a8e04698732736f59fefe72c675791a006b76e1d Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:46 -0800 Subject: tap: Refactoring macvtap.c macvtap module has code for tap/queue management and link management. This patch splits the code into macvtap_main.c for link management and tap.c for tap/queue management. Functionality in tap.c can be re-used for implementing tap on other virtual interfaces. Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- drivers/net/tap.c | 1186 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1186 insertions(+) create mode 100644 drivers/net/tap.c (limited to 'drivers/net/tap.c') diff --git a/drivers/net/tap.c b/drivers/net/tap.c new file mode 100644 index 000000000000..6f6228e4fd3f --- /dev/null +++ b/drivers/net/tap.c @@ -0,0 +1,1186 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * A macvtap queue is the central object of this driver, it connects + * an open character device to a macvlan interface. There can be + * multiple queues on one interface, which map back to queues + * implemented in hardware on the underlying device. + * + * macvtap_proto is used to allocate queues through the sock allocation + * mechanism. + * + */ +struct macvtap_queue { + struct sock sk; + struct socket sock; + struct socket_wq wq; + int vnet_hdr_sz; + struct macvlan_dev __rcu *vlan; + struct file *file; + unsigned int flags; + u16 queue_index; + bool enabled; + struct list_head next; + struct skb_array skb_array; +}; + +#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) + +#define MACVTAP_VNET_LE 0x80000000 +#define MACVTAP_VNET_BE 0x40000000 + +#ifdef CONFIG_TUN_VNET_CROSS_LE +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +{ + return q->flags & MACVTAP_VNET_BE ? false : + virtio_legacy_is_little_endian(); +} + +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +{ + int s = !!(q->flags & MACVTAP_VNET_BE); + + if (put_user(s, sp)) + return -EFAULT; + + return 0; +} + +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp) +{ + int s; + + if (get_user(s, sp)) + return -EFAULT; + + if (s) + q->flags |= MACVTAP_VNET_BE; + else + q->flags &= ~MACVTAP_VNET_BE; + + return 0; +} +#else +static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +{ + return virtio_legacy_is_little_endian(); +} + +static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp) +{ + return -EINVAL; +} + +static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp) +{ + return -EINVAL; +} +#endif /* CONFIG_TUN_VNET_CROSS_LE */ + +static inline bool macvtap_is_little_endian(struct macvtap_queue *q) +{ + return q->flags & MACVTAP_VNET_LE || + macvtap_legacy_is_little_endian(q); +} + +static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) +{ + return __virtio16_to_cpu(macvtap_is_little_endian(q), val); +} + +static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val) +{ + return __cpu_to_virtio16(macvtap_is_little_endian(q), val); +} + +static struct proto macvtap_proto = { + .name = "macvtap", + .owner = THIS_MODULE, + .obj_size = sizeof (struct macvtap_queue), +}; + +#define MACVTAP_NUM_DEVS (1U << MINORBITS) +static DEFINE_MUTEX(minor_lock); +DEFINE_IDR(minor_idr); + +#define GOODCOPY_LEN 128 + +static const struct proto_ops macvtap_socket_ops; + +#define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) +#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST) + +static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler_data); +} + +/* + * RCU usage: + * The macvtap_queue and the macvlan_dev are loosely coupled, the + * pointers from one to the other can only be read while rcu_read_lock + * or rtnl is held. + * + * Both the file and the macvlan_dev hold a reference on the macvtap_queue + * through sock_hold(&q->sk). When the macvlan_dev goes away first, + * q->vlan becomes inaccessible. When the files gets closed, + * macvtap_get_queue() fails. + * + * There may still be references to the struct sock inside of the + * queue from outbound SKBs, but these never reference back to the + * file or the dev. The data structure is freed through __sk_free + * when both our references and any pending SKBs are gone. + */ + +static int macvtap_enable_queue(struct net_device *dev, struct file *file, + struct macvtap_queue *q) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + int err = -EINVAL; + + ASSERT_RTNL(); + + if (q->enabled) + goto out; + + err = 0; + rcu_assign_pointer(vlan->taps[vlan->numvtaps], q); + q->queue_index = vlan->numvtaps; + q->enabled = true; + + vlan->numvtaps++; +out: + return err; +} + +/* Requires RTNL */ +static int macvtap_set_queue(struct net_device *dev, struct file *file, + struct macvtap_queue *q) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + + if (vlan->numqueues == MAX_MACVTAP_QUEUES) + return -EBUSY; + + rcu_assign_pointer(q->vlan, vlan); + rcu_assign_pointer(vlan->taps[vlan->numvtaps], q); + sock_hold(&q->sk); + + q->file = file; + q->queue_index = vlan->numvtaps; + q->enabled = true; + file->private_data = q; + list_add_tail(&q->next, &vlan->queue_list); + + vlan->numvtaps++; + vlan->numqueues++; + + return 0; +} + +static int macvtap_disable_queue(struct macvtap_queue *q) +{ + struct macvlan_dev *vlan; + struct macvtap_queue *nq; + + ASSERT_RTNL(); + if (!q->enabled) + return -EINVAL; + + vlan = rtnl_dereference(q->vlan); + + if (vlan) { + int index = q->queue_index; + BUG_ON(index >= vlan->numvtaps); + nq = rtnl_dereference(vlan->taps[vlan->numvtaps - 1]); + nq->queue_index = index; + + rcu_assign_pointer(vlan->taps[index], nq); + RCU_INIT_POINTER(vlan->taps[vlan->numvtaps - 1], NULL); + q->enabled = false; + + vlan->numvtaps--; + } + + return 0; +} + +/* + * The file owning the queue got closed, give up both + * the reference that the files holds as well as the + * one from the macvlan_dev if that still exists. + * + * Using the spinlock makes sure that we don't get + * to the queue again after destroying it. + */ +static void macvtap_put_queue(struct macvtap_queue *q) +{ + struct macvlan_dev *vlan; + + rtnl_lock(); + vlan = rtnl_dereference(q->vlan); + + if (vlan) { + if (q->enabled) + BUG_ON(macvtap_disable_queue(q)); + + vlan->numqueues--; + RCU_INIT_POINTER(q->vlan, NULL); + sock_put(&q->sk); + list_del_init(&q->next); + } + + rtnl_unlock(); + + synchronize_rcu(); + sock_put(&q->sk); +} + +/* + * Select a queue based on the rxq of the device on which this packet + * arrived. If the incoming device is not mq, calculate a flow hash + * to select a queue. If all fails, find the first available queue. + * Cache vlan->numvtaps since it can become zero during the execution + * of this function. + */ +static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, + struct sk_buff *skb) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_queue *tap = NULL; + /* Access to taps array is protected by rcu, but access to numvtaps + * isn't. Below we use it to lookup a queue, but treat it as a hint + * and validate that the result isn't NULL - in case we are + * racing against queue removal. + */ + int numvtaps = ACCESS_ONCE(vlan->numvtaps); + __u32 rxq; + + if (!numvtaps) + goto out; + + if (numvtaps == 1) + goto single; + + /* Check if we can use flow to select a queue */ + rxq = skb_get_hash(skb); + if (rxq) { + tap = rcu_dereference(vlan->taps[rxq % numvtaps]); + goto out; + } + + if (likely(skb_rx_queue_recorded(skb))) { + rxq = skb_get_rx_queue(skb); + + while (unlikely(rxq >= numvtaps)) + rxq -= numvtaps; + + tap = rcu_dereference(vlan->taps[rxq]); + goto out; + } + +single: + tap = rcu_dereference(vlan->taps[0]); +out: + return tap; +} + +/* + * The net_device is going away, give up the reference + * that it holds on all queues and safely set the pointer + * from the queues to NULL. + */ +void macvtap_del_queues(struct net_device *dev) +{ + struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_queue *q, *tmp; + + ASSERT_RTNL(); + list_for_each_entry_safe(q, tmp, &vlan->queue_list, next) { + list_del_init(&q->next); + RCU_INIT_POINTER(q->vlan, NULL); + if (q->enabled) + vlan->numvtaps--; + vlan->numqueues--; + sock_put(&q->sk); + } + BUG_ON(vlan->numvtaps); + BUG_ON(vlan->numqueues); + /* guarantee that any future macvtap_set_queue will fail */ + vlan->numvtaps = MAX_MACVTAP_QUEUES; +} + +rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) +{ + struct sk_buff *skb = *pskb; + struct net_device *dev = skb->dev; + struct macvlan_dev *vlan; + struct macvtap_queue *q; + netdev_features_t features = TAP_FEATURES; + + vlan = macvtap_get_vlan_rcu(dev); + if (!vlan) + return RX_HANDLER_PASS; + + q = macvtap_get_queue(dev, skb); + if (!q) + return RX_HANDLER_PASS; + + if (__skb_array_full(&q->skb_array)) + goto drop; + + skb_push(skb, ETH_HLEN); + + /* Apply the forward feature mask so that we perform segmentation + * according to users wishes. This only works if VNET_HDR is + * enabled. + */ + if (q->flags & IFF_VNET_HDR) + features |= vlan->tap_features; + if (netif_needs_gso(skb, features)) { + struct sk_buff *segs = __skb_gso_segment(skb, features, false); + + if (IS_ERR(segs)) + goto drop; + + if (!segs) { + if (skb_array_produce(&q->skb_array, skb)) + goto drop; + goto wake_up; + } + + consume_skb(skb); + while (segs) { + struct sk_buff *nskb = segs->next; + + segs->next = NULL; + if (skb_array_produce(&q->skb_array, segs)) { + kfree_skb(segs); + kfree_skb_list(nskb); + break; + } + segs = nskb; + } + } else { + /* If we receive a partial checksum and the tap side + * doesn't support checksum offload, compute the checksum. + * Note: it doesn't matter which checksum feature to + * check, we either support them all or none. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL && + !(features & NETIF_F_CSUM_MASK) && + skb_checksum_help(skb)) + goto drop; + if (skb_array_produce(&q->skb_array, skb)) + goto drop; + } + +wake_up: + wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND); + return RX_HANDLER_CONSUMED; + +drop: + /* Count errors/drops only here, thus don't care about args. */ + macvlan_count_rx(vlan, 0, 0, 0); + kfree_skb(skb); + return RX_HANDLER_CONSUMED; +} + +int macvtap_get_minor(struct macvlan_dev *vlan) +{ + int retval = -ENOMEM; + + mutex_lock(&minor_lock); + retval = idr_alloc(&minor_idr, vlan, 1, MACVTAP_NUM_DEVS, GFP_KERNEL); + if (retval >= 0) { + vlan->minor = retval; + } else if (retval == -ENOSPC) { + netdev_err(vlan->dev, "Too many macvtap devices\n"); + retval = -EINVAL; + } + mutex_unlock(&minor_lock); + return retval < 0 ? retval : 0; +} + +void macvtap_free_minor(struct macvlan_dev *vlan) +{ + mutex_lock(&minor_lock); + if (vlan->minor) { + idr_remove(&minor_idr, vlan->minor); + vlan->minor = 0; + } + mutex_unlock(&minor_lock); +} + +static struct net_device *dev_get_by_macvtap_minor(int minor) +{ + struct net_device *dev = NULL; + struct macvlan_dev *vlan; + + mutex_lock(&minor_lock); + vlan = idr_find(&minor_idr, minor); + if (vlan) { + dev = vlan->dev; + dev_hold(dev); + } + mutex_unlock(&minor_lock); + return dev; +} + +static void macvtap_sock_write_space(struct sock *sk) +{ + wait_queue_head_t *wqueue; + + if (!sock_writeable(sk) || + !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) + return; + + wqueue = sk_sleep(sk); + if (wqueue && waitqueue_active(wqueue)) + wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); +} + +static void macvtap_sock_destruct(struct sock *sk) +{ + struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk); + + skb_array_cleanup(&q->skb_array); +} + +static int macvtap_open(struct inode *inode, struct file *file) +{ + struct net *net = current->nsproxy->net_ns; + struct net_device *dev; + struct macvtap_queue *q; + int err = -ENODEV; + + rtnl_lock(); + dev = dev_get_by_macvtap_minor(iminor(inode)); + if (!dev) + goto err; + + err = -ENOMEM; + q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, + &macvtap_proto, 0); + if (!q) + goto err; + + RCU_INIT_POINTER(q->sock.wq, &q->wq); + init_waitqueue_head(&q->wq.wait); + q->sock.type = SOCK_RAW; + q->sock.state = SS_CONNECTED; + q->sock.file = file; + q->sock.ops = &macvtap_socket_ops; + sock_init_data(&q->sock, &q->sk); + q->sk.sk_write_space = macvtap_sock_write_space; + q->sk.sk_destruct = macvtap_sock_destruct; + q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; + q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); + + /* + * so far only KVM virtio_net uses macvtap, enable zero copy between + * guest kernel and host kernel when lower device supports zerocopy + * + * The macvlan supports zerocopy iff the lower device supports zero + * copy so we don't have to look at the lower device directly. + */ + if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG)) + sock_set_flag(&q->sk, SOCK_ZEROCOPY); + + err = -ENOMEM; + if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL)) + goto err_array; + + err = macvtap_set_queue(dev, file, q); + if (err) + goto err_queue; + + dev_put(dev); + + rtnl_unlock(); + return err; + +err_queue: + skb_array_cleanup(&q->skb_array); +err_array: + sock_put(&q->sk); +err: + if (dev) + dev_put(dev); + + rtnl_unlock(); + return err; +} + +static int macvtap_release(struct inode *inode, struct file *file) +{ + struct macvtap_queue *q = file->private_data; + macvtap_put_queue(q); + return 0; +} + +static unsigned int macvtap_poll(struct file *file, poll_table * wait) +{ + struct macvtap_queue *q = file->private_data; + unsigned int mask = POLLERR; + + if (!q) + goto out; + + mask = 0; + poll_wait(file, &q->wq.wait, wait); + + if (!skb_array_empty(&q->skb_array)) + mask |= POLLIN | POLLRDNORM; + + if (sock_writeable(&q->sk) || + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) && + sock_writeable(&q->sk))) + mask |= POLLOUT | POLLWRNORM; + +out: + return mask; +} + +static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, + size_t len, size_t linear, + int noblock, int *err) +{ + struct sk_buff *skb; + + /* Under a page? Don't bother with paged skb. */ + if (prepad + len < PAGE_SIZE || !linear) + linear = len; + + skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, + err, 0); + if (!skb) + return NULL; + + skb_reserve(skb, prepad); + skb_put(skb, linear); + skb->data_len = len - linear; + skb->len += len - linear; + + return skb; +} + +/* Neighbour code has some assumptions on HH_DATA_MOD alignment */ +#define MACVTAP_RESERVE HH_DATA_OFF(ETH_HLEN) + +/* Get packet from user space buffer */ +static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, + struct iov_iter *from, int noblock) +{ + int good_linear = SKB_MAX_HEAD(MACVTAP_RESERVE); + struct sk_buff *skb; + struct macvlan_dev *vlan; + unsigned long total_len = iov_iter_count(from); + unsigned long len = total_len; + int err; + struct virtio_net_hdr vnet_hdr = { 0 }; + int vnet_hdr_len = 0; + int copylen = 0; + int depth; + bool zerocopy = false; + size_t linear; + + if (q->flags & IFF_VNET_HDR) { + vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); + + err = -EINVAL; + if (len < vnet_hdr_len) + goto err; + len -= vnet_hdr_len; + + err = -EFAULT; + if (!copy_from_iter_full(&vnet_hdr, sizeof(vnet_hdr), from)) + goto err; + iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr)); + if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && + macvtap16_to_cpu(q, vnet_hdr.csum_start) + + macvtap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > + macvtap16_to_cpu(q, vnet_hdr.hdr_len)) + vnet_hdr.hdr_len = cpu_to_macvtap16(q, + macvtap16_to_cpu(q, vnet_hdr.csum_start) + + macvtap16_to_cpu(q, vnet_hdr.csum_offset) + 2); + err = -EINVAL; + if (macvtap16_to_cpu(q, vnet_hdr.hdr_len) > len) + goto err; + } + + err = -EINVAL; + if (unlikely(len < ETH_HLEN)) + goto err; + + if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { + struct iov_iter i; + + copylen = vnet_hdr.hdr_len ? + macvtap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; + if (copylen > good_linear) + copylen = good_linear; + else if (copylen < ETH_HLEN) + copylen = ETH_HLEN; + linear = copylen; + i = *from; + iov_iter_advance(&i, copylen); + if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) + zerocopy = true; + } + + if (!zerocopy) { + copylen = len; + linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len); + if (linear > good_linear) + linear = good_linear; + else if (linear < ETH_HLEN) + linear = ETH_HLEN; + } + + skb = macvtap_alloc_skb(&q->sk, MACVTAP_RESERVE, copylen, + linear, noblock, &err); + if (!skb) + goto err; + + if (zerocopy) + err = zerocopy_sg_from_iter(skb, from); + else + err = skb_copy_datagram_from_iter(skb, 0, from, len); + + if (err) + goto err_kfree; + + skb_set_network_header(skb, ETH_HLEN); + skb_reset_mac_header(skb); + skb->protocol = eth_hdr(skb)->h_proto; + + if (vnet_hdr_len) { + err = virtio_net_hdr_to_skb(skb, &vnet_hdr, + macvtap_is_little_endian(q)); + if (err) + goto err_kfree; + } + + skb_probe_transport_header(skb, ETH_HLEN); + + /* Move network header to the right position for VLAN tagged packets */ + if ((skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) && + __vlan_get_protocol(skb, skb->protocol, &depth) != 0) + skb_set_network_header(skb, depth); + + rcu_read_lock(); + vlan = rcu_dereference(q->vlan); + /* copy skb_ubuf_info for callback when skb has no error */ + if (zerocopy) { + skb_shinfo(skb)->destructor_arg = m->msg_control; + skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; + } else if (m && m->msg_control) { + struct ubuf_info *uarg = m->msg_control; + uarg->callback(uarg, false); + } + + if (vlan) { + skb->dev = vlan->dev; + dev_queue_xmit(skb); + } else { + kfree_skb(skb); + } + rcu_read_unlock(); + + return total_len; + +err_kfree: + kfree_skb(skb); + +err: + rcu_read_lock(); + vlan = rcu_dereference(q->vlan); + if (vlan) + this_cpu_inc(vlan->pcpu_stats->tx_dropped); + rcu_read_unlock(); + + return err; +} + +static ssize_t macvtap_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct macvtap_queue *q = file->private_data; + + return macvtap_get_user(q, NULL, from, file->f_flags & O_NONBLOCK); +} + +/* Put packet to the user space buffer */ +static ssize_t macvtap_put_user(struct macvtap_queue *q, + const struct sk_buff *skb, + struct iov_iter *iter) +{ + int ret; + int vnet_hdr_len = 0; + int vlan_offset = 0; + int total; + + if (q->flags & IFF_VNET_HDR) { + struct virtio_net_hdr vnet_hdr; + vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); + if (iov_iter_count(iter) < vnet_hdr_len) + return -EINVAL; + + if (virtio_net_hdr_from_skb(skb, &vnet_hdr, + macvtap_is_little_endian(q), true)) + BUG(); + + if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) != + sizeof(vnet_hdr)) + return -EFAULT; + + iov_iter_advance(iter, vnet_hdr_len - sizeof(vnet_hdr)); + } + total = vnet_hdr_len; + total += skb->len; + + if (skb_vlan_tag_present(skb)) { + struct { + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + } veth; + veth.h_vlan_proto = skb->vlan_proto; + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + + vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); + total += VLAN_HLEN; + + ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset); + if (ret || !iov_iter_count(iter)) + goto done; + + ret = copy_to_iter(&veth, sizeof(veth), iter); + if (ret != sizeof(veth) || !iov_iter_count(iter)) + goto done; + } + + ret = skb_copy_datagram_iter(skb, vlan_offset, iter, + skb->len - vlan_offset); + +done: + return ret ? ret : total; +} + +static ssize_t macvtap_do_read(struct macvtap_queue *q, + struct iov_iter *to, + int noblock) +{ + DEFINE_WAIT(wait); + struct sk_buff *skb; + ssize_t ret = 0; + + if (!iov_iter_count(to)) + return 0; + + while (1) { + if (!noblock) + prepare_to_wait(sk_sleep(&q->sk), &wait, + TASK_INTERRUPTIBLE); + + /* Read frames from the queue */ + skb = skb_array_consume(&q->skb_array); + if (skb) + break; + if (noblock) { + ret = -EAGAIN; + break; + } + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + /* Nothing to read, let's sleep */ + schedule(); + } + if (!noblock) + finish_wait(sk_sleep(&q->sk), &wait); + + if (skb) { + ret = macvtap_put_user(q, skb, to); + if (unlikely(ret < 0)) + kfree_skb(skb); + else + consume_skb(skb); + } + return ret; +} + +static ssize_t macvtap_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct macvtap_queue *q = file->private_data; + ssize_t len = iov_iter_count(to), ret; + + ret = macvtap_do_read(q, to, file->f_flags & O_NONBLOCK); + ret = min_t(ssize_t, ret, len); + if (ret > 0) + iocb->ki_pos = ret; + return ret; +} + +static struct macvlan_dev *macvtap_get_vlan(struct macvtap_queue *q) +{ + struct macvlan_dev *vlan; + + ASSERT_RTNL(); + vlan = rtnl_dereference(q->vlan); + if (vlan) + dev_hold(vlan->dev); + + return vlan; +} + +static void macvtap_put_vlan(struct macvlan_dev *vlan) +{ + dev_put(vlan->dev); +} + +static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags) +{ + struct macvtap_queue *q = file->private_data; + struct macvlan_dev *vlan; + int ret; + + vlan = macvtap_get_vlan(q); + if (!vlan) + return -EINVAL; + + if (flags & IFF_ATTACH_QUEUE) + ret = macvtap_enable_queue(vlan->dev, file, q); + else if (flags & IFF_DETACH_QUEUE) + ret = macvtap_disable_queue(q); + else + ret = -EINVAL; + + macvtap_put_vlan(vlan); + return ret; +} + +static int set_offload(struct macvtap_queue *q, unsigned long arg) +{ + struct macvlan_dev *vlan; + netdev_features_t features; + netdev_features_t feature_mask = 0; + + vlan = rtnl_dereference(q->vlan); + if (!vlan) + return -ENOLINK; + + features = vlan->dev->features; + + if (arg & TUN_F_CSUM) { + feature_mask = NETIF_F_HW_CSUM; + + if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) { + if (arg & TUN_F_TSO_ECN) + feature_mask |= NETIF_F_TSO_ECN; + if (arg & TUN_F_TSO4) + feature_mask |= NETIF_F_TSO; + if (arg & TUN_F_TSO6) + feature_mask |= NETIF_F_TSO6; + } + + if (arg & TUN_F_UFO) + feature_mask |= NETIF_F_UFO; + } + + /* tun/tap driver inverts the usage for TSO offloads, where + * setting the TSO bit means that the userspace wants to + * accept TSO frames and turning it off means that user space + * does not support TSO. + * For macvtap, we have to invert it to mean the same thing. + * When user space turns off TSO, we turn off GSO/LRO so that + * user-space will not receive TSO frames. + */ + if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO)) + features |= RX_OFFLOADS; + else + features &= ~RX_OFFLOADS; + + /* tap_features are the same as features on tun/tap and + * reflect user expectations. + */ + vlan->tap_features = feature_mask; + vlan->set_features = features; + netdev_update_features(vlan->dev); + + return 0; +} + +/* + * provide compatibility with generic tun/tap interface + */ +static long macvtap_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct macvtap_queue *q = file->private_data; + struct macvlan_dev *vlan; + void __user *argp = (void __user *)arg; + struct ifreq __user *ifr = argp; + unsigned int __user *up = argp; + unsigned short u; + int __user *sp = argp; + struct sockaddr sa; + int s; + int ret; + + switch (cmd) { + case TUNSETIFF: + /* ignore the name, just look at flags */ + if (get_user(u, &ifr->ifr_flags)) + return -EFAULT; + + ret = 0; + if ((u & ~MACVTAP_FEATURES) != (IFF_NO_PI | IFF_TAP)) + ret = -EINVAL; + else + q->flags = (q->flags & ~MACVTAP_FEATURES) | u; + + return ret; + + case TUNGETIFF: + rtnl_lock(); + vlan = macvtap_get_vlan(q); + if (!vlan) { + rtnl_unlock(); + return -ENOLINK; + } + + ret = 0; + u = q->flags; + if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) || + put_user(u, &ifr->ifr_flags)) + ret = -EFAULT; + macvtap_put_vlan(vlan); + rtnl_unlock(); + return ret; + + case TUNSETQUEUE: + if (get_user(u, &ifr->ifr_flags)) + return -EFAULT; + rtnl_lock(); + ret = macvtap_ioctl_set_queue(file, u); + rtnl_unlock(); + return ret; + + case TUNGETFEATURES: + if (put_user(IFF_TAP | IFF_NO_PI | MACVTAP_FEATURES, up)) + return -EFAULT; + return 0; + + case TUNSETSNDBUF: + if (get_user(s, sp)) + return -EFAULT; + + q->sk.sk_sndbuf = s; + return 0; + + case TUNGETVNETHDRSZ: + s = q->vnet_hdr_sz; + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETHDRSZ: + if (get_user(s, sp)) + return -EFAULT; + if (s < (int)sizeof(struct virtio_net_hdr)) + return -EINVAL; + + q->vnet_hdr_sz = s; + return 0; + + case TUNGETVNETLE: + s = !!(q->flags & MACVTAP_VNET_LE); + if (put_user(s, sp)) + return -EFAULT; + return 0; + + case TUNSETVNETLE: + if (get_user(s, sp)) + return -EFAULT; + if (s) + q->flags |= MACVTAP_VNET_LE; + else + q->flags &= ~MACVTAP_VNET_LE; + return 0; + + case TUNGETVNETBE: + return macvtap_get_vnet_be(q, sp); + + case TUNSETVNETBE: + return macvtap_set_vnet_be(q, sp); + + case TUNSETOFFLOAD: + /* let the user check for future flags */ + if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | + TUN_F_TSO_ECN | TUN_F_UFO)) + return -EINVAL; + + rtnl_lock(); + ret = set_offload(q, arg); + rtnl_unlock(); + return ret; + + case SIOCGIFHWADDR: + rtnl_lock(); + vlan = macvtap_get_vlan(q); + if (!vlan) { + rtnl_unlock(); + return -ENOLINK; + } + ret = 0; + u = vlan->dev->type; + if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) || + copy_to_user(&ifr->ifr_hwaddr.sa_data, vlan->dev->dev_addr, ETH_ALEN) || + put_user(u, &ifr->ifr_hwaddr.sa_family)) + ret = -EFAULT; + macvtap_put_vlan(vlan); + rtnl_unlock(); + return ret; + + case SIOCSIFHWADDR: + if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa))) + return -EFAULT; + rtnl_lock(); + vlan = macvtap_get_vlan(q); + if (!vlan) { + rtnl_unlock(); + return -ENOLINK; + } + ret = dev_set_mac_address(vlan->dev, &sa); + macvtap_put_vlan(vlan); + rtnl_unlock(); + return ret; + + default: + return -EINVAL; + } +} + +#ifdef CONFIG_COMPAT +static long macvtap_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + +const struct file_operations macvtap_fops = { + .owner = THIS_MODULE, + .open = macvtap_open, + .release = macvtap_release, + .read_iter = macvtap_read_iter, + .write_iter = macvtap_write_iter, + .poll = macvtap_poll, + .llseek = no_llseek, + .unlocked_ioctl = macvtap_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = macvtap_compat_ioctl, +#endif +}; + +static int macvtap_sendmsg(struct socket *sock, struct msghdr *m, + size_t total_len) +{ + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); + return macvtap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT); +} + +static int macvtap_recvmsg(struct socket *sock, struct msghdr *m, + size_t total_len, int flags) +{ + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); + int ret; + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) + return -EINVAL; + ret = macvtap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT); + if (ret > total_len) { + m->msg_flags |= MSG_TRUNC; + ret = flags & MSG_TRUNC ? ret : total_len; + } + return ret; +} + +static int macvtap_peek_len(struct socket *sock) +{ + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, + sock); + return skb_array_peek_len(&q->skb_array); +} + +/* Ops structure to mimic raw sockets with tun */ +static const struct proto_ops macvtap_socket_ops = { + .sendmsg = macvtap_sendmsg, + .recvmsg = macvtap_recvmsg, + .peek_len = macvtap_peek_len, +}; + +/* Get an underlying socket object from tun file. Returns error unless file is + * attached to a device. The returned object works like a packet socket, it + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for + * holding a reference to the file for as long as the socket is in use. */ +struct socket *macvtap_get_socket(struct file *file) +{ + struct macvtap_queue *q; + if (file->f_op != &macvtap_fops) + return ERR_PTR(-EINVAL); + q = file->private_data; + if (!q) + return ERR_PTR(-EBADFD); + return &q->sock; +} +EXPORT_SYMBOL_GPL(macvtap_get_socket); + +int macvtap_queue_resize(struct macvlan_dev *vlan) +{ + struct net_device *dev = vlan->dev; + struct macvtap_queue *q; + struct skb_array **arrays; + int n = vlan->numqueues; + int ret, i = 0; + + arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL); + if (!arrays) + return -ENOMEM; + + list_for_each_entry(q, &vlan->queue_list, next) + arrays[i++] = &q->skb_array; + + ret = skb_array_resize_multiple(arrays, n, + dev->tx_queue_len, GFP_KERNEL); + + kfree(arrays); + return ret; +} -- cgit v1.2.3 From 635b8c8ecdd27142d7fdab0df334b2e9201481cf Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:47 -0800 Subject: tap: Renaming tap related APIs, data structures, macros Renaming tap related APIs, data structures and macros in tap.c from macvtap_.* to tap_.* Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- drivers/net/macvtap_main.c | 18 +-- drivers/net/tap.c | 332 ++++++++++++++++++++++----------------------- drivers/vhost/net.c | 3 +- include/linux/if_macvlan.h | 17 +-- include/linux/if_macvtap.h | 10 -- include/linux/if_tap.h | 23 ++++ 6 files changed, 202 insertions(+), 201 deletions(-) delete mode 100644 include/linux/if_macvtap.h create mode 100644 include/linux/if_tap.h (limited to 'drivers/net/tap.c') diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 96ffa60c5a36..548f339a75bd 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ static int macvtap_newlink(struct net *src_net, */ vlan->tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, macvtap_handle_frame, vlan); + err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); if (err) return err; @@ -82,7 +82,7 @@ static void macvtap_dellink(struct net_device *dev, struct list_head *head) { netdev_rx_handler_unregister(dev); - macvtap_del_queues(dev); + tap_del_queues(dev); macvlan_dellink(dev, head); } @@ -121,7 +121,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = macvtap_get_minor(vlan); + err = tap_get_minor(vlan); if (err) return notifier_from_errno(err); @@ -129,7 +129,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(&macvtap_class, &dev->dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - macvtap_free_minor(vlan); + tap_free_minor(vlan); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj, @@ -144,10 +144,10 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(&dev->dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlan->minor); device_destroy(&macvtap_class, devt); - macvtap_free_minor(vlan); + tap_free_minor(vlan); break; case NETDEV_CHANGE_TX_QUEUE_LEN: - if (macvtap_queue_resize(vlan)) + if (tap_queue_resize(vlan)) return NOTIFY_BAD; break; } @@ -159,7 +159,7 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations macvtap_fops; +extern struct file_operations tap_fops; static int macvtap_init(void) { int err; @@ -169,7 +169,7 @@ static int macvtap_init(void) if (err) goto out1; - cdev_init(&macvtap_cdev, &macvtap_fops); + cdev_init(&macvtap_cdev, &tap_fops); err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) goto out2; diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 6f6228e4fd3f..15ca2d531d05 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -24,16 +24,16 @@ #include /* - * A macvtap queue is the central object of this driver, it connects + * A tap queue is the central object of this driver, it connects * an open character device to a macvlan interface. There can be * multiple queues on one interface, which map back to queues * implemented in hardware on the underlying device. * - * macvtap_proto is used to allocate queues through the sock allocation + * tap_proto is used to allocate queues through the sock allocation * mechanism. * */ -struct macvtap_queue { +struct tap_queue { struct sock sk; struct socket sock; struct socket_wq wq; @@ -47,21 +47,21 @@ struct macvtap_queue { struct skb_array skb_array; }; -#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) +#define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) -#define MACVTAP_VNET_LE 0x80000000 -#define MACVTAP_VNET_BE 0x40000000 +#define TAP_VNET_LE 0x80000000 +#define TAP_VNET_BE 0x40000000 #ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { - return q->flags & MACVTAP_VNET_BE ? false : + return q->flags & TAP_VNET_BE ? false : virtio_legacy_is_little_endian(); } -static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) +static long tap_get_vnet_be(struct tap_queue *q, int __user *sp) { - int s = !!(q->flags & MACVTAP_VNET_BE); + int s = !!(q->flags & TAP_VNET_BE); if (put_user(s, sp)) return -EFAULT; @@ -69,7 +69,7 @@ static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp) return 0; } -static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp) +static long tap_set_vnet_be(struct tap_queue *q, int __user *sp) { int s; @@ -77,77 +77,77 @@ static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp) return -EFAULT; if (s) - q->flags |= MACVTAP_VNET_BE; + q->flags |= TAP_VNET_BE; else - q->flags &= ~MACVTAP_VNET_BE; + q->flags &= ~TAP_VNET_BE; return 0; } #else -static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q) +static inline bool tap_legacy_is_little_endian(struct tap_queue *q) { return virtio_legacy_is_little_endian(); } -static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp) +static long tap_get_vnet_be(struct tap_queue *q, int __user *argp) { return -EINVAL; } -static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp) +static long tap_set_vnet_be(struct tap_queue *q, int __user *argp) { return -EINVAL; } #endif /* CONFIG_TUN_VNET_CROSS_LE */ -static inline bool macvtap_is_little_endian(struct macvtap_queue *q) +static inline bool tap_is_little_endian(struct tap_queue *q) { - return q->flags & MACVTAP_VNET_LE || - macvtap_legacy_is_little_endian(q); + return q->flags & TAP_VNET_LE || + tap_legacy_is_little_endian(q); } -static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val) +static inline u16 tap16_to_cpu(struct tap_queue *q, __virtio16 val) { - return __virtio16_to_cpu(macvtap_is_little_endian(q), val); + return __virtio16_to_cpu(tap_is_little_endian(q), val); } -static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val) +static inline __virtio16 cpu_to_tap16(struct tap_queue *q, u16 val) { - return __cpu_to_virtio16(macvtap_is_little_endian(q), val); + return __cpu_to_virtio16(tap_is_little_endian(q), val); } -static struct proto macvtap_proto = { - .name = "macvtap", +static struct proto tap_proto = { + .name = "tap", .owner = THIS_MODULE, - .obj_size = sizeof (struct macvtap_queue), + .obj_size = sizeof(struct tap_queue), }; -#define MACVTAP_NUM_DEVS (1U << MINORBITS) +#define TAP_NUM_DEVS (1U << MINORBITS) static DEFINE_MUTEX(minor_lock); DEFINE_IDR(minor_idr); #define GOODCOPY_LEN 128 -static const struct proto_ops macvtap_socket_ops; +static const struct proto_ops tap_socket_ops; #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) #define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST) -static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev) +static struct macvlan_dev *tap_get_vlan_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler_data); } /* * RCU usage: - * The macvtap_queue and the macvlan_dev are loosely coupled, the + * The tap_queue and the macvlan_dev are loosely coupled, the * pointers from one to the other can only be read while rcu_read_lock * or rtnl is held. * - * Both the file and the macvlan_dev hold a reference on the macvtap_queue + * Both the file and the macvlan_dev hold a reference on the tap_queue * through sock_hold(&q->sk). When the macvlan_dev goes away first, * q->vlan becomes inaccessible. When the files gets closed, - * macvtap_get_queue() fails. + * tap_get_queue() fails. * * There may still be references to the struct sock inside of the * queue from outbound SKBs, but these never reference back to the @@ -155,8 +155,8 @@ static struct macvlan_dev *macvtap_get_vlan_rcu(const struct net_device *dev) * when both our references and any pending SKBs are gone. */ -static int macvtap_enable_queue(struct net_device *dev, struct file *file, - struct macvtap_queue *q) +static int tap_enable_queue(struct net_device *dev, struct file *file, + struct tap_queue *q) { struct macvlan_dev *vlan = netdev_priv(dev); int err = -EINVAL; @@ -177,12 +177,12 @@ out: } /* Requires RTNL */ -static int macvtap_set_queue(struct net_device *dev, struct file *file, - struct macvtap_queue *q) +static int tap_set_queue(struct net_device *dev, struct file *file, + struct tap_queue *q) { struct macvlan_dev *vlan = netdev_priv(dev); - if (vlan->numqueues == MAX_MACVTAP_QUEUES) + if (vlan->numqueues == MAX_TAP_QUEUES) return -EBUSY; rcu_assign_pointer(q->vlan, vlan); @@ -201,10 +201,10 @@ static int macvtap_set_queue(struct net_device *dev, struct file *file, return 0; } -static int macvtap_disable_queue(struct macvtap_queue *q) +static int tap_disable_queue(struct tap_queue *q) { struct macvlan_dev *vlan; - struct macvtap_queue *nq; + struct tap_queue *nq; ASSERT_RTNL(); if (!q->enabled) @@ -236,7 +236,7 @@ static int macvtap_disable_queue(struct macvtap_queue *q) * Using the spinlock makes sure that we don't get * to the queue again after destroying it. */ -static void macvtap_put_queue(struct macvtap_queue *q) +static void tap_put_queue(struct tap_queue *q) { struct macvlan_dev *vlan; @@ -245,7 +245,7 @@ static void macvtap_put_queue(struct macvtap_queue *q) if (vlan) { if (q->enabled) - BUG_ON(macvtap_disable_queue(q)); + BUG_ON(tap_disable_queue(q)); vlan->numqueues--; RCU_INIT_POINTER(q->vlan, NULL); @@ -266,11 +266,11 @@ static void macvtap_put_queue(struct macvtap_queue *q) * Cache vlan->numvtaps since it can become zero during the execution * of this function. */ -static struct macvtap_queue *macvtap_get_queue(struct net_device *dev, - struct sk_buff *skb) +static struct tap_queue *tap_get_queue(struct net_device *dev, + struct sk_buff *skb) { struct macvlan_dev *vlan = netdev_priv(dev); - struct macvtap_queue *tap = NULL; + struct tap_queue *tap = NULL; /* Access to taps array is protected by rcu, but access to numvtaps * isn't. Below we use it to lookup a queue, but treat it as a hint * and validate that the result isn't NULL - in case we are @@ -313,10 +313,10 @@ out: * that it holds on all queues and safely set the pointer * from the queues to NULL. */ -void macvtap_del_queues(struct net_device *dev) +void tap_del_queues(struct net_device *dev) { struct macvlan_dev *vlan = netdev_priv(dev); - struct macvtap_queue *q, *tmp; + struct tap_queue *q, *tmp; ASSERT_RTNL(); list_for_each_entry_safe(q, tmp, &vlan->queue_list, next) { @@ -329,23 +329,23 @@ void macvtap_del_queues(struct net_device *dev) } BUG_ON(vlan->numvtaps); BUG_ON(vlan->numqueues); - /* guarantee that any future macvtap_set_queue will fail */ - vlan->numvtaps = MAX_MACVTAP_QUEUES; + /* guarantee that any future tap_set_queue will fail */ + vlan->numvtaps = MAX_TAP_QUEUES; } -rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) +rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; struct macvlan_dev *vlan; - struct macvtap_queue *q; + struct tap_queue *q; netdev_features_t features = TAP_FEATURES; - vlan = macvtap_get_vlan_rcu(dev); + vlan = tap_get_vlan_rcu(dev); if (!vlan) return RX_HANDLER_PASS; - q = macvtap_get_queue(dev, skb); + q = tap_get_queue(dev, skb); if (!q) return RX_HANDLER_PASS; @@ -409,23 +409,23 @@ drop: return RX_HANDLER_CONSUMED; } -int macvtap_get_minor(struct macvlan_dev *vlan) +int tap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; mutex_lock(&minor_lock); - retval = idr_alloc(&minor_idr, vlan, 1, MACVTAP_NUM_DEVS, GFP_KERNEL); + retval = idr_alloc(&minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { vlan->minor = retval; } else if (retval == -ENOSPC) { - netdev_err(vlan->dev, "Too many macvtap devices\n"); + netdev_err(vlan->dev, "Too many tap devices\n"); retval = -EINVAL; } mutex_unlock(&minor_lock); return retval < 0 ? retval : 0; } -void macvtap_free_minor(struct macvlan_dev *vlan) +void tap_free_minor(struct macvlan_dev *vlan) { mutex_lock(&minor_lock); if (vlan->minor) { @@ -435,7 +435,7 @@ void macvtap_free_minor(struct macvlan_dev *vlan) mutex_unlock(&minor_lock); } -static struct net_device *dev_get_by_macvtap_minor(int minor) +static struct net_device *dev_get_by_tap_minor(int minor) { struct net_device *dev = NULL; struct macvlan_dev *vlan; @@ -450,7 +450,7 @@ static struct net_device *dev_get_by_macvtap_minor(int minor) return dev; } -static void macvtap_sock_write_space(struct sock *sk) +static void tap_sock_write_space(struct sock *sk) { wait_queue_head_t *wqueue; @@ -463,28 +463,28 @@ static void macvtap_sock_write_space(struct sock *sk) wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); } -static void macvtap_sock_destruct(struct sock *sk) +static void tap_sock_destruct(struct sock *sk) { - struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk); + struct tap_queue *q = container_of(sk, struct tap_queue, sk); skb_array_cleanup(&q->skb_array); } -static int macvtap_open(struct inode *inode, struct file *file) +static int tap_open(struct inode *inode, struct file *file) { struct net *net = current->nsproxy->net_ns; struct net_device *dev; - struct macvtap_queue *q; + struct tap_queue *q; int err = -ENODEV; rtnl_lock(); - dev = dev_get_by_macvtap_minor(iminor(inode)); + dev = dev_get_by_tap_minor(iminor(inode)); if (!dev) goto err; err = -ENOMEM; - q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, - &macvtap_proto, 0); + q = (struct tap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, + &tap_proto, 0); if (!q) goto err; @@ -493,15 +493,15 @@ static int macvtap_open(struct inode *inode, struct file *file) q->sock.type = SOCK_RAW; q->sock.state = SS_CONNECTED; q->sock.file = file; - q->sock.ops = &macvtap_socket_ops; + q->sock.ops = &tap_socket_ops; sock_init_data(&q->sock, &q->sk); - q->sk.sk_write_space = macvtap_sock_write_space; - q->sk.sk_destruct = macvtap_sock_destruct; + q->sk.sk_write_space = tap_sock_write_space; + q->sk.sk_destruct = tap_sock_destruct; q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); /* - * so far only KVM virtio_net uses macvtap, enable zero copy between + * so far only KVM virtio_net uses tap, enable zero copy between * guest kernel and host kernel when lower device supports zerocopy * * The macvlan supports zerocopy iff the lower device supports zero @@ -514,7 +514,7 @@ static int macvtap_open(struct inode *inode, struct file *file) if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL)) goto err_array; - err = macvtap_set_queue(dev, file, q); + err = tap_set_queue(dev, file, q); if (err) goto err_queue; @@ -535,16 +535,16 @@ err: return err; } -static int macvtap_release(struct inode *inode, struct file *file) +static int tap_release(struct inode *inode, struct file *file) { - struct macvtap_queue *q = file->private_data; - macvtap_put_queue(q); + struct tap_queue *q = file->private_data; + tap_put_queue(q); return 0; } -static unsigned int macvtap_poll(struct file *file, poll_table * wait) +static unsigned int tap_poll(struct file *file, poll_table *wait) { - struct macvtap_queue *q = file->private_data; + struct tap_queue *q = file->private_data; unsigned int mask = POLLERR; if (!q) @@ -565,8 +565,8 @@ out: return mask; } -static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, - size_t len, size_t linear, +static inline struct sk_buff *tap_alloc_skb(struct sock *sk, size_t prepad, + size_t len, size_t linear, int noblock, int *err) { struct sk_buff *skb; @@ -589,13 +589,13 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, } /* Neighbour code has some assumptions on HH_DATA_MOD alignment */ -#define MACVTAP_RESERVE HH_DATA_OFF(ETH_HLEN) +#define TAP_RESERVE HH_DATA_OFF(ETH_HLEN) /* Get packet from user space buffer */ -static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, - struct iov_iter *from, int noblock) +static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, + struct iov_iter *from, int noblock) { - int good_linear = SKB_MAX_HEAD(MACVTAP_RESERVE); + int good_linear = SKB_MAX_HEAD(TAP_RESERVE); struct sk_buff *skb; struct macvlan_dev *vlan; unsigned long total_len = iov_iter_count(from); @@ -621,14 +621,14 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, goto err; iov_iter_advance(from, vnet_hdr_len - sizeof(vnet_hdr)); if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - macvtap16_to_cpu(q, vnet_hdr.csum_start) + - macvtap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > - macvtap16_to_cpu(q, vnet_hdr.hdr_len)) - vnet_hdr.hdr_len = cpu_to_macvtap16(q, - macvtap16_to_cpu(q, vnet_hdr.csum_start) + - macvtap16_to_cpu(q, vnet_hdr.csum_offset) + 2); + tap16_to_cpu(q, vnet_hdr.csum_start) + + tap16_to_cpu(q, vnet_hdr.csum_offset) + 2 > + tap16_to_cpu(q, vnet_hdr.hdr_len)) + vnet_hdr.hdr_len = cpu_to_tap16(q, + tap16_to_cpu(q, vnet_hdr.csum_start) + + tap16_to_cpu(q, vnet_hdr.csum_offset) + 2); err = -EINVAL; - if (macvtap16_to_cpu(q, vnet_hdr.hdr_len) > len) + if (tap16_to_cpu(q, vnet_hdr.hdr_len) > len) goto err; } @@ -640,7 +640,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, struct iov_iter i; copylen = vnet_hdr.hdr_len ? - macvtap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; + tap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN; if (copylen > good_linear) copylen = good_linear; else if (copylen < ETH_HLEN) @@ -654,15 +654,15 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (!zerocopy) { copylen = len; - linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len); + linear = tap16_to_cpu(q, vnet_hdr.hdr_len); if (linear > good_linear) linear = good_linear; else if (linear < ETH_HLEN) linear = ETH_HLEN; } - skb = macvtap_alloc_skb(&q->sk, MACVTAP_RESERVE, copylen, - linear, noblock, &err); + skb = tap_alloc_skb(&q->sk, TAP_RESERVE, copylen, + linear, noblock, &err); if (!skb) goto err; @@ -680,7 +680,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (vnet_hdr_len) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, - macvtap_is_little_endian(q)); + tap_is_little_endian(q)); if (err) goto err_kfree; } @@ -728,18 +728,18 @@ err: return err; } -static ssize_t macvtap_write_iter(struct kiocb *iocb, struct iov_iter *from) +static ssize_t tap_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct macvtap_queue *q = file->private_data; + struct tap_queue *q = file->private_data; - return macvtap_get_user(q, NULL, from, file->f_flags & O_NONBLOCK); + return tap_get_user(q, NULL, from, file->f_flags & O_NONBLOCK); } /* Put packet to the user space buffer */ -static ssize_t macvtap_put_user(struct macvtap_queue *q, - const struct sk_buff *skb, - struct iov_iter *iter) +static ssize_t tap_put_user(struct tap_queue *q, + const struct sk_buff *skb, + struct iov_iter *iter) { int ret; int vnet_hdr_len = 0; @@ -753,7 +753,7 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q, return -EINVAL; if (virtio_net_hdr_from_skb(skb, &vnet_hdr, - macvtap_is_little_endian(q), true)) + tap_is_little_endian(q), true)) BUG(); if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) != @@ -792,9 +792,9 @@ done: return ret ? ret : total; } -static ssize_t macvtap_do_read(struct macvtap_queue *q, - struct iov_iter *to, - int noblock) +static ssize_t tap_do_read(struct tap_queue *q, + struct iov_iter *to, + int noblock) { DEFINE_WAIT(wait); struct sk_buff *skb; @@ -827,7 +827,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q, finish_wait(sk_sleep(&q->sk), &wait); if (skb) { - ret = macvtap_put_user(q, skb, to); + ret = tap_put_user(q, skb, to); if (unlikely(ret < 0)) kfree_skb(skb); else @@ -836,20 +836,20 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q, return ret; } -static ssize_t macvtap_read_iter(struct kiocb *iocb, struct iov_iter *to) +static ssize_t tap_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; - struct macvtap_queue *q = file->private_data; + struct tap_queue *q = file->private_data; ssize_t len = iov_iter_count(to), ret; - ret = macvtap_do_read(q, to, file->f_flags & O_NONBLOCK); + ret = tap_do_read(q, to, file->f_flags & O_NONBLOCK); ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; return ret; } -static struct macvlan_dev *macvtap_get_vlan(struct macvtap_queue *q) +static struct macvlan_dev *tap_get_vlan(struct tap_queue *q) { struct macvlan_dev *vlan; @@ -861,33 +861,33 @@ static struct macvlan_dev *macvtap_get_vlan(struct macvtap_queue *q) return vlan; } -static void macvtap_put_vlan(struct macvlan_dev *vlan) +static void tap_put_vlan(struct macvlan_dev *vlan) { dev_put(vlan->dev); } -static int macvtap_ioctl_set_queue(struct file *file, unsigned int flags) +static int tap_ioctl_set_queue(struct file *file, unsigned int flags) { - struct macvtap_queue *q = file->private_data; + struct tap_queue *q = file->private_data; struct macvlan_dev *vlan; int ret; - vlan = macvtap_get_vlan(q); + vlan = tap_get_vlan(q); if (!vlan) return -EINVAL; if (flags & IFF_ATTACH_QUEUE) - ret = macvtap_enable_queue(vlan->dev, file, q); + ret = tap_enable_queue(vlan->dev, file, q); else if (flags & IFF_DETACH_QUEUE) - ret = macvtap_disable_queue(q); + ret = tap_disable_queue(q); else ret = -EINVAL; - macvtap_put_vlan(vlan); + tap_put_vlan(vlan); return ret; } -static int set_offload(struct macvtap_queue *q, unsigned long arg) +static int set_offload(struct tap_queue *q, unsigned long arg) { struct macvlan_dev *vlan; netdev_features_t features; @@ -919,7 +919,7 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg) * setting the TSO bit means that the userspace wants to * accept TSO frames and turning it off means that user space * does not support TSO. - * For macvtap, we have to invert it to mean the same thing. + * For tap, we have to invert it to mean the same thing. * When user space turns off TSO, we turn off GSO/LRO so that * user-space will not receive TSO frames. */ @@ -941,10 +941,10 @@ static int set_offload(struct macvtap_queue *q, unsigned long arg) /* * provide compatibility with generic tun/tap interface */ -static long macvtap_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static long tap_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) { - struct macvtap_queue *q = file->private_data; + struct tap_queue *q = file->private_data; struct macvlan_dev *vlan; void __user *argp = (void __user *)arg; struct ifreq __user *ifr = argp; @@ -962,16 +962,16 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return -EFAULT; ret = 0; - if ((u & ~MACVTAP_FEATURES) != (IFF_NO_PI | IFF_TAP)) + if ((u & ~TAP_IFFEATURES) != (IFF_NO_PI | IFF_TAP)) ret = -EINVAL; else - q->flags = (q->flags & ~MACVTAP_FEATURES) | u; + q->flags = (q->flags & ~TAP_IFFEATURES) | u; return ret; case TUNGETIFF: rtnl_lock(); - vlan = macvtap_get_vlan(q); + vlan = tap_get_vlan(q); if (!vlan) { rtnl_unlock(); return -ENOLINK; @@ -982,7 +982,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) || put_user(u, &ifr->ifr_flags)) ret = -EFAULT; - macvtap_put_vlan(vlan); + tap_put_vlan(vlan); rtnl_unlock(); return ret; @@ -990,12 +990,12 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, if (get_user(u, &ifr->ifr_flags)) return -EFAULT; rtnl_lock(); - ret = macvtap_ioctl_set_queue(file, u); + ret = tap_ioctl_set_queue(file, u); rtnl_unlock(); return ret; case TUNGETFEATURES: - if (put_user(IFF_TAP | IFF_NO_PI | MACVTAP_FEATURES, up)) + if (put_user(IFF_TAP | IFF_NO_PI | TAP_IFFEATURES, up)) return -EFAULT; return 0; @@ -1022,7 +1022,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, return 0; case TUNGETVNETLE: - s = !!(q->flags & MACVTAP_VNET_LE); + s = !!(q->flags & TAP_VNET_LE); if (put_user(s, sp)) return -EFAULT; return 0; @@ -1031,16 +1031,16 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, if (get_user(s, sp)) return -EFAULT; if (s) - q->flags |= MACVTAP_VNET_LE; + q->flags |= TAP_VNET_LE; else - q->flags &= ~MACVTAP_VNET_LE; + q->flags &= ~TAP_VNET_LE; return 0; case TUNGETVNETBE: - return macvtap_get_vnet_be(q, sp); + return tap_get_vnet_be(q, sp); case TUNSETVNETBE: - return macvtap_set_vnet_be(q, sp); + return tap_set_vnet_be(q, sp); case TUNSETOFFLOAD: /* let the user check for future flags */ @@ -1055,7 +1055,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, case SIOCGIFHWADDR: rtnl_lock(); - vlan = macvtap_get_vlan(q); + vlan = tap_get_vlan(q); if (!vlan) { rtnl_unlock(); return -ENOLINK; @@ -1066,7 +1066,7 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, copy_to_user(&ifr->ifr_hwaddr.sa_data, vlan->dev->dev_addr, ETH_ALEN) || put_user(u, &ifr->ifr_hwaddr.sa_family)) ret = -EFAULT; - macvtap_put_vlan(vlan); + tap_put_vlan(vlan); rtnl_unlock(); return ret; @@ -1074,13 +1074,13 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa))) return -EFAULT; rtnl_lock(); - vlan = macvtap_get_vlan(q); + vlan = tap_get_vlan(q); if (!vlan) { rtnl_unlock(); return -ENOLINK; } ret = dev_set_mac_address(vlan->dev, &sa); - macvtap_put_vlan(vlan); + tap_put_vlan(vlan); rtnl_unlock(); return ret; @@ -1090,42 +1090,42 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd, } #ifdef CONFIG_COMPAT -static long macvtap_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static long tap_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) { - return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); + return tap_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); } #endif -const struct file_operations macvtap_fops = { +const struct file_operations tap_fops = { .owner = THIS_MODULE, - .open = macvtap_open, - .release = macvtap_release, - .read_iter = macvtap_read_iter, - .write_iter = macvtap_write_iter, - .poll = macvtap_poll, + .open = tap_open, + .release = tap_release, + .read_iter = tap_read_iter, + .write_iter = tap_write_iter, + .poll = tap_poll, .llseek = no_llseek, - .unlocked_ioctl = macvtap_ioctl, + .unlocked_ioctl = tap_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = macvtap_compat_ioctl, + .compat_ioctl = tap_compat_ioctl, #endif }; -static int macvtap_sendmsg(struct socket *sock, struct msghdr *m, - size_t total_len) +static int tap_sendmsg(struct socket *sock, struct msghdr *m, + size_t total_len) { - struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); - return macvtap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT); + struct tap_queue *q = container_of(sock, struct tap_queue, sock); + return tap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT); } -static int macvtap_recvmsg(struct socket *sock, struct msghdr *m, - size_t total_len, int flags) +static int tap_recvmsg(struct socket *sock, struct msghdr *m, + size_t total_len, int flags) { - struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); + struct tap_queue *q = container_of(sock, struct tap_queue, sock); int ret; if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) return -EINVAL; - ret = macvtap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT); + ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; @@ -1133,40 +1133,40 @@ static int macvtap_recvmsg(struct socket *sock, struct msghdr *m, return ret; } -static int macvtap_peek_len(struct socket *sock) +static int tap_peek_len(struct socket *sock) { - struct macvtap_queue *q = container_of(sock, struct macvtap_queue, + struct tap_queue *q = container_of(sock, struct tap_queue, sock); return skb_array_peek_len(&q->skb_array); } /* Ops structure to mimic raw sockets with tun */ -static const struct proto_ops macvtap_socket_ops = { - .sendmsg = macvtap_sendmsg, - .recvmsg = macvtap_recvmsg, - .peek_len = macvtap_peek_len, +static const struct proto_ops tap_socket_ops = { + .sendmsg = tap_sendmsg, + .recvmsg = tap_recvmsg, + .peek_len = tap_peek_len, }; /* Get an underlying socket object from tun file. Returns error unless file is * attached to a device. The returned object works like a packet socket, it * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for * holding a reference to the file for as long as the socket is in use. */ -struct socket *macvtap_get_socket(struct file *file) +struct socket *tap_get_socket(struct file *file) { - struct macvtap_queue *q; - if (file->f_op != &macvtap_fops) + struct tap_queue *q; + if (file->f_op != &tap_fops) return ERR_PTR(-EINVAL); q = file->private_data; if (!q) return ERR_PTR(-EBADFD); return &q->sock; } -EXPORT_SYMBOL_GPL(macvtap_get_socket); +EXPORT_SYMBOL_GPL(tap_get_socket); -int macvtap_queue_resize(struct macvlan_dev *vlan) +int tap_queue_resize(struct macvlan_dev *vlan) { struct net_device *dev = vlan->dev; - struct macvtap_queue *q; + struct tap_queue *q; struct skb_array **arrays; int n = vlan->numqueues; int ret, i = 0; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index c42e9c305134..2fe35354f20e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -960,7 +961,7 @@ static struct socket *get_tap_socket(int fd) sock = tun_get_socket(file); if (!IS_ERR(sock)) return sock; - sock = macvtap_get_socket(file); + sock = tap_get_socket(file); if (IS_ERR(sock)) fput(file); return sock; diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index a4ccc3122f93..c9ec1343d187 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -9,19 +9,6 @@ #include #include -#if IS_ENABLED(CONFIG_MACVTAP) -struct socket *macvtap_get_socket(struct file *); -#else -#include -#include -struct file; -struct socket; -static inline struct socket *macvtap_get_socket(struct file *f) -{ - return ERR_PTR(-EINVAL); -} -#endif /* CONFIG_MACVTAP */ - struct macvlan_port; struct macvtap_queue; @@ -29,7 +16,7 @@ struct macvtap_queue; * Maximum times a macvtap device can be opened. This can be used to * configure the number of receive queue, e.g. for multiqueue virtio. */ -#define MAX_MACVTAP_QUEUES 256 +#define MAX_TAP_QUEUES 256 #define MACVLAN_MC_FILTER_BITS 8 #define MACVLAN_MC_FILTER_SZ (1 << MACVLAN_MC_FILTER_BITS) @@ -49,7 +36,7 @@ struct macvlan_dev { enum macvlan_mode mode; u16 flags; /* This array tracks active taps. */ - struct macvtap_queue __rcu *taps[MAX_MACVTAP_QUEUES]; + struct tap_queue __rcu *taps[MAX_TAP_QUEUES]; /* This list tracks all taps (both enabled and disabled) */ struct list_head queue_list; int numvtaps; diff --git a/include/linux/if_macvtap.h b/include/linux/if_macvtap.h deleted file mode 100644 index c9bf84b75b27..000000000000 --- a/include/linux/if_macvtap.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _LINUX_IF_MACVTAP_H_ -#define _LINUX_IF_MACVTAP_H_ - -rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb); -void macvtap_del_queues(struct net_device *dev); -int macvtap_get_minor(struct macvlan_dev *vlan); -void macvtap_free_minor(struct macvlan_dev *vlan); -int macvtap_queue_resize(struct macvlan_dev *vlan); - -#endif /*_LINUX_IF_MACVTAP_H_*/ diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h new file mode 100644 index 000000000000..97d27b8ebd55 --- /dev/null +++ b/include/linux/if_tap.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_IF_TAP_H_ +#define _LINUX_IF_TAP_H_ + +#if IS_ENABLED(CONFIG_MACVTAP) +struct socket *tap_get_socket(struct file *); +#else +#include +#include +struct file; +struct socket; +static inline struct socket *tap_get_socket(struct file *f) +{ + return ERR_PTR(-EINVAL); +} +#endif /* CONFIG_MACVTAP */ + +rx_handler_result_t tap_handle_frame(struct sk_buff **pskb); +void tap_del_queues(struct net_device *dev); +int tap_get_minor(struct macvlan_dev *vlan); +void tap_free_minor(struct macvlan_dev *vlan); +int tap_queue_resize(struct macvlan_dev *vlan); + +#endif /*_LINUX_IF_TAP_H_*/ -- cgit v1.2.3 From ebc05ba7e8600b52a2a0c87a43105143368aca2a Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:48 -0800 Subject: tap: Tap character device creation/destroy API This patch provides tap device create/destroy APIs in tap.c. Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- drivers/net/macvtap_main.c | 30 +++++++--------------- drivers/net/tap.c | 62 ++++++++++++++++++++++++++++++++++++++-------- include/linux/if_tap.h | 3 +++ 3 files changed, 63 insertions(+), 32 deletions(-) (limited to 'drivers/net/tap.c') diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 548f339a75bd..215ab7abae89 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -28,7 +28,6 @@ * Variables for dealing with macvtaps device numbers. */ static dev_t macvtap_major; -#define MACVTAP_NUM_DEVS (1U << MINORBITS) static const void *macvtap_net_namespace(struct device *d) { @@ -159,57 +158,46 @@ static struct notifier_block macvtap_notifier_block __read_mostly = { .notifier_call = macvtap_device_event, }; -extern struct file_operations tap_fops; static int macvtap_init(void) { int err; - err = alloc_chrdev_region(&macvtap_major, 0, - MACVTAP_NUM_DEVS, "macvtap"); - if (err) - goto out1; + err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap"); - cdev_init(&macvtap_cdev, &tap_fops); - err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS); if (err) - goto out2; + goto out1; err = class_register(&macvtap_class); if (err) - goto out3; + goto out2; err = register_netdevice_notifier(&macvtap_notifier_block); if (err) - goto out4; + goto out3; err = macvlan_link_register(&macvtap_link_ops); if (err) - goto out5; + goto out4; return 0; -out5: - unregister_netdevice_notifier(&macvtap_notifier_block); out4: - class_unregister(&macvtap_class); + unregister_netdevice_notifier(&macvtap_notifier_block); out3: - cdev_del(&macvtap_cdev); + class_unregister(&macvtap_class); out2: - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); + tap_destroy_cdev(macvtap_major, &macvtap_cdev); out1: return err; } module_init(macvtap_init); -extern struct idr minor_idr; static void macvtap_exit(void) { rtnl_link_unregister(&macvtap_link_ops); unregister_netdevice_notifier(&macvtap_notifier_block); class_unregister(&macvtap_class); - cdev_del(&macvtap_cdev); - unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS); - idr_destroy(&minor_idr); + tap_destroy_cdev(macvtap_major, &macvtap_cdev); } module_exit(macvtap_exit); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 15ca2d531d05..04ba9782c2f3 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -123,8 +123,12 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) -static DEFINE_MUTEX(minor_lock); -DEFINE_IDR(minor_idr); +struct major_info { + dev_t major; + struct idr minor_idr; + struct mutex minor_lock; + const char *device_name; +} macvtap_major; #define GOODCOPY_LEN 128 @@ -413,26 +417,26 @@ int tap_get_minor(struct macvlan_dev *vlan) { int retval = -ENOMEM; - mutex_lock(&minor_lock); - retval = idr_alloc(&minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(&macvtap_major.minor_lock); + retval = idr_alloc(&macvtap_major.minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { vlan->minor = retval; } else if (retval == -ENOSPC) { netdev_err(vlan->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(&minor_lock); + mutex_unlock(&macvtap_major.minor_lock); return retval < 0 ? retval : 0; } void tap_free_minor(struct macvlan_dev *vlan) { - mutex_lock(&minor_lock); + mutex_lock(&macvtap_major.minor_lock); if (vlan->minor) { - idr_remove(&minor_idr, vlan->minor); + idr_remove(&macvtap_major.minor_idr, vlan->minor); vlan->minor = 0; } - mutex_unlock(&minor_lock); + mutex_unlock(&macvtap_major.minor_lock); } static struct net_device *dev_get_by_tap_minor(int minor) @@ -440,13 +444,13 @@ static struct net_device *dev_get_by_tap_minor(int minor) struct net_device *dev = NULL; struct macvlan_dev *vlan; - mutex_lock(&minor_lock); - vlan = idr_find(&minor_idr, minor); + mutex_lock(&macvtap_major.minor_lock); + vlan = idr_find(&macvtap_major.minor_idr, minor); if (vlan) { dev = vlan->dev; dev_hold(dev); } - mutex_unlock(&minor_lock); + mutex_unlock(&macvtap_major.minor_lock); return dev; } @@ -1184,3 +1188,39 @@ int tap_queue_resize(struct macvlan_dev *vlan) kfree(arrays); return ret; } + +int tap_create_cdev(struct cdev *tap_cdev, + dev_t *tap_major, const char *device_name) +{ + int err; + + err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); + if (err) + goto out1; + + cdev_init(tap_cdev, &tap_fops); + err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS); + if (err) + goto out2; + + macvtap_major.major = MAJOR(*tap_major); + + idr_init(&macvtap_major.minor_idr); + mutex_init(&macvtap_major.minor_lock); + + macvtap_major.device_name = device_name; + + return 0; + +out2: + unregister_chrdev_region(*tap_major, TAP_NUM_DEVS); +out1: + return err; +} + +void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) +{ + cdev_del(tap_cdev); + unregister_chrdev_region(major, TAP_NUM_DEVS); + idr_destroy(&macvtap_major.minor_idr); +} diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 97d27b8ebd55..a2dfd9063a6c 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -19,5 +19,8 @@ void tap_del_queues(struct net_device *dev); int tap_get_minor(struct macvlan_dev *vlan); void tap_free_minor(struct macvlan_dev *vlan); int tap_queue_resize(struct macvlan_dev *vlan); +int tap_create_cdev(struct cdev *tap_cdev, + dev_t *tap_major, const char *device_name); +void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev); #endif /*_LINUX_IF_TAP_H_*/ -- cgit v1.2.3 From 6fe3faf86757eb7f078ff06b23b206f17dc4fb36 Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:49 -0800 Subject: tap: Abstract type of virtual interface from tap implementation macvlan object is re-structured to hold tap related elements in a separate entity, tap_dev. Upon NETDEV_REGISTER device_event, tap_dev is registered with idr and fetched again on tap_open. Few of the tap functions are modified to accepted tap_dev as argument. tap_dev object includes callbacks to be used by underlying virtual interface to take care of tx and rx accounting. Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 2 +- drivers/net/macvtap_main.c | 71 +++++++++--- drivers/net/tap.c | 264 ++++++++++++++++++++------------------------- include/linux/if_tap.h | 57 +++++++++- 4 files changed, 229 insertions(+), 165 deletions(-) (limited to 'drivers/net/tap.c') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index cbfc1be23a0e..9261722960a7 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1525,7 +1525,6 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = { int macvlan_link_register(struct rtnl_link_ops *ops) { /* common fields */ - ops->priv_size = sizeof(struct macvlan_dev); ops->validate = macvlan_validate; ops->maxtype = IFLA_MACVLAN_MAX; ops->policy = macvlan_policy; @@ -1548,6 +1547,7 @@ static struct rtnl_link_ops macvlan_link_ops = { .newlink = macvlan_newlink, .dellink = macvlan_dellink, .get_link_net = macvlan_get_link_net, + .priv_size = sizeof(struct macvlan_dev), }; static int macvlan_device_event(struct notifier_block *unused, diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 215ab7abae89..0238df62bf45 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -24,6 +24,11 @@ #include #include +struct macvtap_dev { + struct macvlan_dev vlan; + struct tap_dev tap; +}; + /* * Variables for dealing with macvtaps device numbers. */ @@ -46,22 +51,55 @@ static struct cdev macvtap_cdev; #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6 | NETIF_F_UFO) +static void macvtap_count_tx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = &vlantap->vlan; + + this_cpu_inc(vlan->pcpu_stats->tx_dropped); +} + +static void macvtap_count_rx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = &vlantap->vlan; + + macvlan_count_rx(vlan, 0, 0, 0); +} + +static void macvtap_update_features(struct tap_dev *tap, + netdev_features_t features) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = &vlantap->vlan; + + vlan->set_features = features; + netdev_update_features(vlan->dev); +} + static int macvtap_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct macvlan_dev *vlan = netdev_priv(dev); + struct macvtap_dev *vlantap = netdev_priv(dev); int err; - INIT_LIST_HEAD(&vlan->queue_list); + INIT_LIST_HEAD(&vlantap->tap.queue_list); /* Since macvlan supports all offloads by default, make * tap support all offloads also. */ - vlan->tap_features = TUN_OFFLOADS; + vlantap->tap.tap_features = TUN_OFFLOADS; - err = netdev_rx_handler_register(dev, tap_handle_frame, vlan); + /* Register callbacks for rx/tx drops accounting and updating + * net_device features + */ + vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; + vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; + vlantap->tap.update_features = macvtap_update_features; + + err = netdev_rx_handler_register(dev, tap_handle_frame, &vlantap->tap); if (err) return err; @@ -74,14 +112,18 @@ static int macvtap_newlink(struct net *src_net, return err; } + vlantap->tap.dev = vlantap->vlan.dev; + return 0; } static void macvtap_dellink(struct net_device *dev, struct list_head *head) { + struct macvtap_dev *vlantap = netdev_priv(dev); + netdev_rx_handler_unregister(dev); - tap_del_queues(dev); + tap_del_queues(&vlantap->tap); macvlan_dellink(dev, head); } @@ -96,13 +138,14 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = { .setup = macvtap_setup, .newlink = macvtap_newlink, .dellink = macvtap_dellink, + .priv_size = sizeof(struct macvtap_dev), }; static int macvtap_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct macvlan_dev *vlan; + struct macvtap_dev *vlantap; struct device *classdev; dev_t devt; int err; @@ -112,7 +155,7 @@ static int macvtap_device_event(struct notifier_block *unused, return NOTIFY_DONE; snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); - vlan = netdev_priv(dev); + vlantap = netdev_priv(dev); switch (event) { case NETDEV_REGISTER: @@ -120,15 +163,15 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = tap_get_minor(vlan); + err = tap_get_minor(&vlantap->tap); if (err) return notifier_from_errno(err); - devt = MKDEV(MAJOR(macvtap_major), vlan->minor); + devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); classdev = device_create(&macvtap_class, &dev->dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - tap_free_minor(vlan); + tap_free_minor(&vlantap->tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj, @@ -138,15 +181,15 @@ static int macvtap_device_event(struct notifier_block *unused, break; case NETDEV_UNREGISTER: /* vlan->minor == 0 if NETDEV_REGISTER above failed */ - if (vlan->minor == 0) + if (vlantap->tap.minor == 0) break; sysfs_remove_link(&dev->dev.kobj, tap_name); - devt = MKDEV(MAJOR(macvtap_major), vlan->minor); + devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); device_destroy(&macvtap_class, devt); - tap_free_minor(vlan); + tap_free_minor(&vlantap->tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: - if (tap_queue_resize(vlan)) + if (tap_queue_resize(&vlantap->tap)) return NOTIFY_BAD; break; } diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 04ba9782c2f3..7d3e8b18f5e6 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -23,30 +23,6 @@ #include #include -/* - * A tap queue is the central object of this driver, it connects - * an open character device to a macvlan interface. There can be - * multiple queues on one interface, which map back to queues - * implemented in hardware on the underlying device. - * - * tap_proto is used to allocate queues through the sock allocation - * mechanism. - * - */ -struct tap_queue { - struct sock sk; - struct socket sock; - struct socket_wq wq; - int vnet_hdr_sz; - struct macvlan_dev __rcu *vlan; - struct file *file; - unsigned int flags; - u16 queue_index; - bool enabled; - struct list_head next; - struct skb_array skb_array; -}; - #define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) #define TAP_VNET_LE 0x80000000 @@ -137,7 +113,7 @@ static const struct proto_ops tap_socket_ops; #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) #define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST) -static struct macvlan_dev *tap_get_vlan_rcu(const struct net_device *dev) +static struct tap_dev *tap_dev_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler_data); } @@ -159,10 +135,9 @@ static struct macvlan_dev *tap_get_vlan_rcu(const struct net_device *dev) * when both our references and any pending SKBs are gone. */ -static int tap_enable_queue(struct net_device *dev, struct file *file, +static int tap_enable_queue(struct tap_dev *tap, struct file *file, struct tap_queue *q) { - struct macvlan_dev *vlan = netdev_priv(dev); int err = -EINVAL; ASSERT_RTNL(); @@ -171,62 +146,60 @@ static int tap_enable_queue(struct net_device *dev, struct file *file, goto out; err = 0; - rcu_assign_pointer(vlan->taps[vlan->numvtaps], q); - q->queue_index = vlan->numvtaps; + rcu_assign_pointer(tap->taps[tap->numvtaps], q); + q->queue_index = tap->numvtaps; q->enabled = true; - vlan->numvtaps++; + tap->numvtaps++; out: return err; } /* Requires RTNL */ -static int tap_set_queue(struct net_device *dev, struct file *file, +static int tap_set_queue(struct tap_dev *tap, struct file *file, struct tap_queue *q) { - struct macvlan_dev *vlan = netdev_priv(dev); - - if (vlan->numqueues == MAX_TAP_QUEUES) + if (tap->numqueues == MAX_TAP_QUEUES) return -EBUSY; - rcu_assign_pointer(q->vlan, vlan); - rcu_assign_pointer(vlan->taps[vlan->numvtaps], q); + rcu_assign_pointer(q->tap, tap); + rcu_assign_pointer(tap->taps[tap->numvtaps], q); sock_hold(&q->sk); q->file = file; - q->queue_index = vlan->numvtaps; + q->queue_index = tap->numvtaps; q->enabled = true; file->private_data = q; - list_add_tail(&q->next, &vlan->queue_list); + list_add_tail(&q->next, &tap->queue_list); - vlan->numvtaps++; - vlan->numqueues++; + tap->numvtaps++; + tap->numqueues++; return 0; } static int tap_disable_queue(struct tap_queue *q) { - struct macvlan_dev *vlan; + struct tap_dev *tap; struct tap_queue *nq; ASSERT_RTNL(); if (!q->enabled) return -EINVAL; - vlan = rtnl_dereference(q->vlan); + tap = rtnl_dereference(q->tap); - if (vlan) { + if (tap) { int index = q->queue_index; - BUG_ON(index >= vlan->numvtaps); - nq = rtnl_dereference(vlan->taps[vlan->numvtaps - 1]); + BUG_ON(index >= tap->numvtaps); + nq = rtnl_dereference(tap->taps[tap->numvtaps - 1]); nq->queue_index = index; - rcu_assign_pointer(vlan->taps[index], nq); - RCU_INIT_POINTER(vlan->taps[vlan->numvtaps - 1], NULL); + rcu_assign_pointer(tap->taps[index], nq); + RCU_INIT_POINTER(tap->taps[tap->numvtaps - 1], NULL); q->enabled = false; - vlan->numvtaps--; + tap->numvtaps--; } return 0; @@ -242,17 +215,17 @@ static int tap_disable_queue(struct tap_queue *q) */ static void tap_put_queue(struct tap_queue *q) { - struct macvlan_dev *vlan; + struct tap_dev *tap; rtnl_lock(); - vlan = rtnl_dereference(q->vlan); + tap = rtnl_dereference(q->tap); - if (vlan) { + if (tap) { if (q->enabled) BUG_ON(tap_disable_queue(q)); - vlan->numqueues--; - RCU_INIT_POINTER(q->vlan, NULL); + tap->numqueues--; + RCU_INIT_POINTER(q->tap, NULL); sock_put(&q->sk); list_del_init(&q->next); } @@ -270,17 +243,16 @@ static void tap_put_queue(struct tap_queue *q) * Cache vlan->numvtaps since it can become zero during the execution * of this function. */ -static struct tap_queue *tap_get_queue(struct net_device *dev, +static struct tap_queue *tap_get_queue(struct tap_dev *tap, struct sk_buff *skb) { - struct macvlan_dev *vlan = netdev_priv(dev); - struct tap_queue *tap = NULL; + struct tap_queue *queue = NULL; /* Access to taps array is protected by rcu, but access to numvtaps * isn't. Below we use it to lookup a queue, but treat it as a hint * and validate that the result isn't NULL - in case we are * racing against queue removal. */ - int numvtaps = ACCESS_ONCE(vlan->numvtaps); + int numvtaps = ACCESS_ONCE(tap->numvtaps); __u32 rxq; if (!numvtaps) @@ -292,7 +264,7 @@ static struct tap_queue *tap_get_queue(struct net_device *dev, /* Check if we can use flow to select a queue */ rxq = skb_get_hash(skb); if (rxq) { - tap = rcu_dereference(vlan->taps[rxq % numvtaps]); + queue = rcu_dereference(tap->taps[rxq % numvtaps]); goto out; } @@ -302,14 +274,14 @@ static struct tap_queue *tap_get_queue(struct net_device *dev, while (unlikely(rxq >= numvtaps)) rxq -= numvtaps; - tap = rcu_dereference(vlan->taps[rxq]); + queue = rcu_dereference(tap->taps[rxq]); goto out; } single: - tap = rcu_dereference(vlan->taps[0]); + queue = rcu_dereference(tap->taps[0]); out: - return tap; + return queue; } /* @@ -317,39 +289,38 @@ out: * that it holds on all queues and safely set the pointer * from the queues to NULL. */ -void tap_del_queues(struct net_device *dev) +void tap_del_queues(struct tap_dev *tap) { - struct macvlan_dev *vlan = netdev_priv(dev); struct tap_queue *q, *tmp; ASSERT_RTNL(); - list_for_each_entry_safe(q, tmp, &vlan->queue_list, next) { + list_for_each_entry_safe(q, tmp, &tap->queue_list, next) { list_del_init(&q->next); - RCU_INIT_POINTER(q->vlan, NULL); + RCU_INIT_POINTER(q->tap, NULL); if (q->enabled) - vlan->numvtaps--; - vlan->numqueues--; + tap->numvtaps--; + tap->numqueues--; sock_put(&q->sk); } - BUG_ON(vlan->numvtaps); - BUG_ON(vlan->numqueues); + BUG_ON(tap->numvtaps); + BUG_ON(tap->numqueues); /* guarantee that any future tap_set_queue will fail */ - vlan->numvtaps = MAX_TAP_QUEUES; + tap->numvtaps = MAX_TAP_QUEUES; } rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; - struct macvlan_dev *vlan; + struct tap_dev *tap; struct tap_queue *q; netdev_features_t features = TAP_FEATURES; - vlan = tap_get_vlan_rcu(dev); - if (!vlan) + tap = tap_dev_get_rcu(dev); + if (!tap) return RX_HANDLER_PASS; - q = tap_get_queue(dev, skb); + q = tap_get_queue(tap, skb); if (!q) return RX_HANDLER_PASS; @@ -363,7 +334,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) * enabled. */ if (q->flags & IFF_VNET_HDR) - features |= vlan->tap_features; + features |= tap->tap_features; if (netif_needs_gso(skb, features)) { struct sk_buff *segs = __skb_gso_segment(skb, features, false); @@ -408,50 +379,51 @@ wake_up: drop: /* Count errors/drops only here, thus don't care about args. */ - macvlan_count_rx(vlan, 0, 0, 0); + if (tap->count_rx_dropped) + tap->count_rx_dropped(tap); kfree_skb(skb); return RX_HANDLER_CONSUMED; } -int tap_get_minor(struct macvlan_dev *vlan) +int tap_get_minor(struct tap_dev *tap) { int retval = -ENOMEM; mutex_lock(&macvtap_major.minor_lock); - retval = idr_alloc(&macvtap_major.minor_idr, vlan, 1, TAP_NUM_DEVS, GFP_KERNEL); + retval = idr_alloc(&macvtap_major.minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { - vlan->minor = retval; + tap->minor = retval; } else if (retval == -ENOSPC) { - netdev_err(vlan->dev, "Too many tap devices\n"); + netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } mutex_unlock(&macvtap_major.minor_lock); return retval < 0 ? retval : 0; } -void tap_free_minor(struct macvlan_dev *vlan) +void tap_free_minor(struct tap_dev *tap) { mutex_lock(&macvtap_major.minor_lock); - if (vlan->minor) { - idr_remove(&macvtap_major.minor_idr, vlan->minor); - vlan->minor = 0; + if (tap->minor) { + idr_remove(&macvtap_major.minor_idr, tap->minor); + tap->minor = 0; } mutex_unlock(&macvtap_major.minor_lock); } -static struct net_device *dev_get_by_tap_minor(int minor) +static struct tap_dev *dev_get_by_tap_minor(int minor) { struct net_device *dev = NULL; - struct macvlan_dev *vlan; + struct tap_dev *tap; mutex_lock(&macvtap_major.minor_lock); - vlan = idr_find(&macvtap_major.minor_idr, minor); - if (vlan) { - dev = vlan->dev; + tap = idr_find(&macvtap_major.minor_idr, minor); + if (tap) { + dev = tap->dev; dev_hold(dev); } mutex_unlock(&macvtap_major.minor_lock); - return dev; + return tap; } static void tap_sock_write_space(struct sock *sk) @@ -477,13 +449,13 @@ static void tap_sock_destruct(struct sock *sk) static int tap_open(struct inode *inode, struct file *file) { struct net *net = current->nsproxy->net_ns; - struct net_device *dev; + struct tap_dev *tap; struct tap_queue *q; int err = -ENODEV; rtnl_lock(); - dev = dev_get_by_tap_minor(iminor(inode)); - if (!dev) + tap = dev_get_by_tap_minor(iminor(inode)); + if (!tap) goto err; err = -ENOMEM; @@ -511,18 +483,18 @@ static int tap_open(struct inode *inode, struct file *file) * The macvlan supports zerocopy iff the lower device supports zero * copy so we don't have to look at the lower device directly. */ - if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG)) + if ((tap->dev->features & NETIF_F_HIGHDMA) && (tap->dev->features & NETIF_F_SG)) sock_set_flag(&q->sk, SOCK_ZEROCOPY); err = -ENOMEM; - if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL)) + if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) goto err_array; - err = tap_set_queue(dev, file, q); + err = tap_set_queue(tap, file, q); if (err) goto err_queue; - dev_put(dev); + dev_put(tap->dev); rtnl_unlock(); return err; @@ -532,8 +504,8 @@ err_queue: err_array: sock_put(&q->sk); err: - if (dev) - dev_put(dev); + if (tap) + dev_put(tap->dev); rtnl_unlock(); return err; @@ -601,7 +573,7 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, { int good_linear = SKB_MAX_HEAD(TAP_RESERVE); struct sk_buff *skb; - struct macvlan_dev *vlan; + struct tap_dev *tap; unsigned long total_len = iov_iter_count(from); unsigned long len = total_len; int err; @@ -698,7 +670,7 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, skb_set_network_header(skb, depth); rcu_read_lock(); - vlan = rcu_dereference(q->vlan); + tap = rcu_dereference(q->tap); /* copy skb_ubuf_info for callback when skb has no error */ if (zerocopy) { skb_shinfo(skb)->destructor_arg = m->msg_control; @@ -709,8 +681,8 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m, uarg->callback(uarg, false); } - if (vlan) { - skb->dev = vlan->dev; + if (tap) { + skb->dev = tap->dev; dev_queue_xmit(skb); } else { kfree_skb(skb); @@ -724,9 +696,9 @@ err_kfree: err: rcu_read_lock(); - vlan = rcu_dereference(q->vlan); - if (vlan) - this_cpu_inc(vlan->pcpu_stats->tx_dropped); + tap = rcu_dereference(q->tap); + if (tap && tap->count_tx_dropped) + tap->count_tx_dropped(tap); rcu_read_unlock(); return err; @@ -853,55 +825,55 @@ static ssize_t tap_read_iter(struct kiocb *iocb, struct iov_iter *to) return ret; } -static struct macvlan_dev *tap_get_vlan(struct tap_queue *q) +static struct tap_dev *tap_get_tap_dev(struct tap_queue *q) { - struct macvlan_dev *vlan; + struct tap_dev *tap; ASSERT_RTNL(); - vlan = rtnl_dereference(q->vlan); - if (vlan) - dev_hold(vlan->dev); + tap = rtnl_dereference(q->tap); + if (tap) + dev_hold(tap->dev); - return vlan; + return tap; } -static void tap_put_vlan(struct macvlan_dev *vlan) +static void tap_put_tap_dev(struct tap_dev *tap) { - dev_put(vlan->dev); + dev_put(tap->dev); } static int tap_ioctl_set_queue(struct file *file, unsigned int flags) { struct tap_queue *q = file->private_data; - struct macvlan_dev *vlan; + struct tap_dev *tap; int ret; - vlan = tap_get_vlan(q); - if (!vlan) + tap = tap_get_tap_dev(q); + if (!tap) return -EINVAL; if (flags & IFF_ATTACH_QUEUE) - ret = tap_enable_queue(vlan->dev, file, q); + ret = tap_enable_queue(tap, file, q); else if (flags & IFF_DETACH_QUEUE) ret = tap_disable_queue(q); else ret = -EINVAL; - tap_put_vlan(vlan); + tap_put_tap_dev(tap); return ret; } static int set_offload(struct tap_queue *q, unsigned long arg) { - struct macvlan_dev *vlan; + struct tap_dev *tap; netdev_features_t features; netdev_features_t feature_mask = 0; - vlan = rtnl_dereference(q->vlan); - if (!vlan) + tap = rtnl_dereference(q->tap); + if (!tap) return -ENOLINK; - features = vlan->dev->features; + features = tap->dev->features; if (arg & TUN_F_CSUM) { feature_mask = NETIF_F_HW_CSUM; @@ -935,9 +907,9 @@ static int set_offload(struct tap_queue *q, unsigned long arg) /* tap_features are the same as features on tun/tap and * reflect user expectations. */ - vlan->tap_features = feature_mask; - vlan->set_features = features; - netdev_update_features(vlan->dev); + tap->tap_features = feature_mask; + if (tap->update_features) + tap->update_features(tap, features); return 0; } @@ -949,7 +921,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct tap_queue *q = file->private_data; - struct macvlan_dev *vlan; + struct tap_dev *tap; void __user *argp = (void __user *)arg; struct ifreq __user *ifr = argp; unsigned int __user *up = argp; @@ -975,18 +947,18 @@ static long tap_ioctl(struct file *file, unsigned int cmd, case TUNGETIFF: rtnl_lock(); - vlan = tap_get_vlan(q); - if (!vlan) { + tap = tap_get_tap_dev(q); + if (!tap) { rtnl_unlock(); return -ENOLINK; } ret = 0; u = q->flags; - if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) || + if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || put_user(u, &ifr->ifr_flags)) ret = -EFAULT; - tap_put_vlan(vlan); + tap_put_tap_dev(tap); rtnl_unlock(); return ret; @@ -1059,18 +1031,18 @@ static long tap_ioctl(struct file *file, unsigned int cmd, case SIOCGIFHWADDR: rtnl_lock(); - vlan = tap_get_vlan(q); - if (!vlan) { + tap = tap_get_tap_dev(q); + if (!tap) { rtnl_unlock(); return -ENOLINK; } ret = 0; - u = vlan->dev->type; - if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) || - copy_to_user(&ifr->ifr_hwaddr.sa_data, vlan->dev->dev_addr, ETH_ALEN) || + u = tap->dev->type; + if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || + copy_to_user(&ifr->ifr_hwaddr.sa_data, tap->dev->dev_addr, ETH_ALEN) || put_user(u, &ifr->ifr_hwaddr.sa_family)) ret = -EFAULT; - tap_put_vlan(vlan); + tap_put_tap_dev(tap); rtnl_unlock(); return ret; @@ -1078,13 +1050,13 @@ static long tap_ioctl(struct file *file, unsigned int cmd, if (copy_from_user(&sa, &ifr->ifr_hwaddr, sizeof(sa))) return -EFAULT; rtnl_lock(); - vlan = tap_get_vlan(q); - if (!vlan) { + tap = tap_get_tap_dev(q); + if (!tap) { rtnl_unlock(); return -ENOLINK; } - ret = dev_set_mac_address(vlan->dev, &sa); - tap_put_vlan(vlan); + ret = dev_set_mac_address(tap->dev, &sa); + tap_put_tap_dev(tap); rtnl_unlock(); return ret; @@ -1167,19 +1139,19 @@ struct socket *tap_get_socket(struct file *file) } EXPORT_SYMBOL_GPL(tap_get_socket); -int tap_queue_resize(struct macvlan_dev *vlan) +int tap_queue_resize(struct tap_dev *tap) { - struct net_device *dev = vlan->dev; + struct net_device *dev = tap->dev; struct tap_queue *q; struct skb_array **arrays; - int n = vlan->numqueues; + int n = tap->numqueues; int ret, i = 0; arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL); if (!arrays) return -ENOMEM; - list_for_each_entry(q, &vlan->queue_list, next) + list_for_each_entry(q, &tap->queue_list, next) arrays[i++] = &q->skb_array; ret = skb_array_resize_multiple(arrays, n, diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index a2dfd9063a6c..75031e5d0a65 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -14,11 +14,60 @@ static inline struct socket *tap_get_socket(struct file *f) } #endif /* CONFIG_MACVTAP */ +#include +#include + +#define MAX_TAP_QUEUES 256 + +struct tap_queue; + +struct tap_dev { + struct net_device *dev; + u16 flags; + /* This array tracks active taps. */ + struct tap_queue __rcu *taps[MAX_TAP_QUEUES]; + /* This list tracks all taps (both enabled and disabled) */ + struct list_head queue_list; + int numvtaps; + int numqueues; + netdev_features_t tap_features; + int minor; + + void (*update_features)(struct tap_dev *tap, netdev_features_t features); + void (*count_tx_dropped)(struct tap_dev *tap); + void (*count_rx_dropped)(struct tap_dev *tap); +}; + +/* + * A tap queue is the central object of tap module, it connects + * an open character device to virtual interface. There can be + * multiple queues on one interface, which map back to queues + * implemented in hardware on the underlying device. + * + * tap_proto is used to allocate queues through the sock allocation + * mechanism. + * + */ + +struct tap_queue { + struct sock sk; + struct socket sock; + struct socket_wq wq; + int vnet_hdr_sz; + struct tap_dev __rcu *tap; + struct file *file; + unsigned int flags; + u16 queue_index; + bool enabled; + struct list_head next; + struct skb_array skb_array; +}; + rx_handler_result_t tap_handle_frame(struct sk_buff **pskb); -void tap_del_queues(struct net_device *dev); -int tap_get_minor(struct macvlan_dev *vlan); -void tap_free_minor(struct macvlan_dev *vlan); -int tap_queue_resize(struct macvlan_dev *vlan); +void tap_del_queues(struct tap_dev *tap); +int tap_get_minor(struct tap_dev *tap); +void tap_free_minor(struct tap_dev *tap); +int tap_queue_resize(struct tap_dev *tap); int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, const char *device_name); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev); -- cgit v1.2.3 From d9f1f61c0801a73ff36d416a7ede54229b231e1d Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:50 -0800 Subject: tap: Extending tap device create/destroy APIs Extending tap APIs get/free_minor and create/destroy_cdev to handle more than one type of virtual interface. Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- drivers/net/macvtap_main.c | 6 +-- drivers/net/tap.c | 118 +++++++++++++++++++++++++++++++++++++-------- include/linux/if_tap.h | 4 +- 3 files changed, 102 insertions(+), 26 deletions(-) (limited to 'drivers/net/tap.c') diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c index 0238df62bf45..a4bfc10b61dd 100644 --- a/drivers/net/macvtap_main.c +++ b/drivers/net/macvtap_main.c @@ -163,7 +163,7 @@ static int macvtap_device_event(struct notifier_block *unused, * been registered but before register_netdevice has * finished running. */ - err = tap_get_minor(&vlantap->tap); + err = tap_get_minor(macvtap_major, &vlantap->tap); if (err) return notifier_from_errno(err); @@ -171,7 +171,7 @@ static int macvtap_device_event(struct notifier_block *unused, classdev = device_create(&macvtap_class, &dev->dev, devt, dev, tap_name); if (IS_ERR(classdev)) { - tap_free_minor(&vlantap->tap); + tap_free_minor(macvtap_major, &vlantap->tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj, @@ -186,7 +186,7 @@ static int macvtap_device_event(struct notifier_block *unused, sysfs_remove_link(&dev->dev.kobj, tap_name); devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); device_destroy(&macvtap_class, devt); - tap_free_minor(&vlantap->tap); + tap_free_minor(macvtap_major, &vlantap->tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: if (tap_queue_resize(&vlantap->tap)) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 7d3e8b18f5e6..71bbf0b6327d 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -99,12 +99,17 @@ static struct proto tap_proto = { }; #define TAP_NUM_DEVS (1U << MINORBITS) + +static LIST_HEAD(major_list); + struct major_info { + struct rcu_head rcu; dev_t major; struct idr minor_idr; struct mutex minor_lock; const char *device_name; -} macvtap_major; + struct list_head next; +}; #define GOODCOPY_LEN 128 @@ -385,44 +390,89 @@ drop: return RX_HANDLER_CONSUMED; } -int tap_get_minor(struct tap_dev *tap) +static struct major_info *tap_get_major(int major) +{ + struct major_info *tap_major; + + list_for_each_entry_rcu(tap_major, &major_list, next) { + if (tap_major->major == major) + return tap_major; + } + + return NULL; +} + +int tap_get_minor(dev_t major, struct tap_dev *tap) { int retval = -ENOMEM; + struct major_info *tap_major; + + rcu_read_lock(); + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) { + retval = -EINVAL; + goto unlock; + } - mutex_lock(&macvtap_major.minor_lock); - retval = idr_alloc(&macvtap_major.minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); + mutex_lock(&tap_major->minor_lock); + retval = idr_alloc(&tap_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_KERNEL); if (retval >= 0) { tap->minor = retval; } else if (retval == -ENOSPC) { netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } - mutex_unlock(&macvtap_major.minor_lock); + mutex_unlock(&tap_major->minor_lock); + +unlock: + rcu_read_unlock(); return retval < 0 ? retval : 0; } -void tap_free_minor(struct tap_dev *tap) +void tap_free_minor(dev_t major, struct tap_dev *tap) { - mutex_lock(&macvtap_major.minor_lock); + struct major_info *tap_major; + + rcu_read_lock(); + tap_major = tap_get_major(MAJOR(major)); + if (!tap_major) { + goto unlock; + } + + mutex_lock(&tap_major->minor_lock); if (tap->minor) { - idr_remove(&macvtap_major.minor_idr, tap->minor); + idr_remove(&tap_major->minor_idr, tap->minor); tap->minor = 0; } - mutex_unlock(&macvtap_major.minor_lock); + mutex_unlock(&tap_major->minor_lock); + +unlock: + rcu_read_unlock(); } -static struct tap_dev *dev_get_by_tap_minor(int minor) +static struct tap_dev *dev_get_by_tap_file(int major, int minor) { struct net_device *dev = NULL; struct tap_dev *tap; + struct major_info *tap_major; - mutex_lock(&macvtap_major.minor_lock); - tap = idr_find(&macvtap_major.minor_idr, minor); + rcu_read_lock(); + tap_major = tap_get_major(major); + if (!tap_major) { + tap = NULL; + goto unlock; + } + + mutex_lock(&tap_major->minor_lock); + tap = idr_find(&tap_major->minor_idr, minor); if (tap) { dev = tap->dev; dev_hold(dev); } - mutex_unlock(&macvtap_major.minor_lock); + mutex_unlock(&tap_major->minor_lock); + +unlock: + rcu_read_unlock(); return tap; } @@ -454,7 +504,7 @@ static int tap_open(struct inode *inode, struct file *file) int err = -ENODEV; rtnl_lock(); - tap = dev_get_by_tap_minor(iminor(inode)); + tap = dev_get_by_tap_file(imajor(inode), iminor(inode)); if (!tap) goto err; @@ -1161,6 +1211,25 @@ int tap_queue_resize(struct tap_dev *tap) return ret; } +static int tap_list_add(dev_t major, const char *device_name) +{ + struct major_info *tap_major; + + tap_major = kzalloc(sizeof(*tap_major), GFP_ATOMIC); + if (!tap_major) + return -ENOMEM; + + tap_major->major = MAJOR(major); + + idr_init(&tap_major->minor_idr); + mutex_init(&tap_major->minor_lock); + + tap_major->device_name = device_name; + + list_add_tail_rcu(&tap_major->next, &major_list); + return 0; +} + int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, const char *device_name) { @@ -1175,15 +1244,14 @@ int tap_create_cdev(struct cdev *tap_cdev, if (err) goto out2; - macvtap_major.major = MAJOR(*tap_major); - - idr_init(&macvtap_major.minor_idr); - mutex_init(&macvtap_major.minor_lock); - - macvtap_major.device_name = device_name; + err = tap_list_add(*tap_major, device_name); + if (err) + goto out3; return 0; +out3: + cdev_del(tap_cdev); out2: unregister_chrdev_region(*tap_major, TAP_NUM_DEVS); out1: @@ -1192,7 +1260,15 @@ out1: void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) { + struct major_info *tap_major, *tmp; + cdev_del(tap_cdev); unregister_chrdev_region(major, TAP_NUM_DEVS); - idr_destroy(&macvtap_major.minor_idr); + list_for_each_entry_safe(tap_major, tmp, &major_list, next) { + if (tap_major->major == MAJOR(major)) { + idr_destroy(&tap_major->minor_idr); + list_del_rcu(&tap_major->next); + kfree_rcu(tap_major, rcu); + } + } } diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 75031e5d0a65..362e71c16efb 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -65,8 +65,8 @@ struct tap_queue { rx_handler_result_t tap_handle_frame(struct sk_buff **pskb); void tap_del_queues(struct tap_dev *tap); -int tap_get_minor(struct tap_dev *tap); -void tap_free_minor(struct tap_dev *tap); +int tap_get_minor(dev_t major, struct tap_dev *tap); +void tap_free_minor(dev_t major, struct tap_dev *tap); int tap_queue_resize(struct tap_dev *tap); int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, const char *device_name); -- cgit v1.2.3 From 9a393b5d5988ea4eaa3e0da138321abe0dc03a68 Mon Sep 17 00:00:00 2001 From: Sainath Grandhi Date: Fri, 10 Feb 2017 16:03:51 -0800 Subject: tap: tap as an independent module This patch makes tap a separate module for other types of virtual interfaces, for example, ipvlan to use. Signed-off-by: Sainath Grandhi Signed-off-by: David S. Miller --- drivers/net/Kconfig | 7 ++ drivers/net/Makefile | 3 +- drivers/net/macvtap.c | 249 +++++++++++++++++++++++++++++++++++++++++++++ drivers/net/macvtap_main.c | 249 --------------------------------------------- drivers/net/tap.c | 11 ++ drivers/vhost/Kconfig | 2 +- include/linux/if_tap.h | 4 +- 7 files changed, 271 insertions(+), 254 deletions(-) create mode 100644 drivers/net/macvtap.c delete mode 100644 drivers/net/macvtap_main.c (limited to 'drivers/net/tap.c') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index a993cbeb9e0c..5763503fe4e6 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -135,6 +135,7 @@ config MACVTAP tristate "MAC-VLAN based tap driver" depends on MACVLAN depends on INET + select TAP help This adds a specialized tap character device driver that is based on the MAC-VLAN network interface, called macvtap. A macvtap device @@ -287,6 +288,12 @@ config TUN If you don't know what to use this for, you don't need it. +config TAP + tristate + ---help--- + This option is selected by any driver implementing tap user space + interface for a virtual interface to re-use core tap functionality. + config TUN_VNET_CROSS_LE bool "Support for cross-endian vnet headers on little-endian kernels" default n diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 19b03a9fe0f6..7dd86ca02d0d 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_RIONET) += rionet.o obj-$(CONFIG_NET_TEAM) += team/ obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_TAP) += tap.o obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o @@ -29,8 +30,6 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o -macvtap-objs := macvtap_main.o tap.o - # # Networking Drivers # diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c new file mode 100644 index 000000000000..a4bfc10b61dd --- /dev/null +++ b/drivers/net/macvtap.c @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +struct macvtap_dev { + struct macvlan_dev vlan; + struct tap_dev tap; +}; + +/* + * Variables for dealing with macvtaps device numbers. + */ +static dev_t macvtap_major; + +static const void *macvtap_net_namespace(struct device *d) +{ + struct net_device *dev = to_net_dev(d->parent); + return dev_net(dev); +} + +static struct class macvtap_class = { + .name = "macvtap", + .owner = THIS_MODULE, + .ns_type = &net_ns_type_operations, + .namespace = macvtap_net_namespace, +}; +static struct cdev macvtap_cdev; + +#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ + NETIF_F_TSO6 | NETIF_F_UFO) + +static void macvtap_count_tx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = &vlantap->vlan; + + this_cpu_inc(vlan->pcpu_stats->tx_dropped); +} + +static void macvtap_count_rx_dropped(struct tap_dev *tap) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = &vlantap->vlan; + + macvlan_count_rx(vlan, 0, 0, 0); +} + +static void macvtap_update_features(struct tap_dev *tap, + netdev_features_t features) +{ + struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); + struct macvlan_dev *vlan = &vlantap->vlan; + + vlan->set_features = features; + netdev_update_features(vlan->dev); +} + +static int macvtap_newlink(struct net *src_net, + struct net_device *dev, + struct nlattr *tb[], + struct nlattr *data[]) +{ + struct macvtap_dev *vlantap = netdev_priv(dev); + int err; + + INIT_LIST_HEAD(&vlantap->tap.queue_list); + + /* Since macvlan supports all offloads by default, make + * tap support all offloads also. + */ + vlantap->tap.tap_features = TUN_OFFLOADS; + + /* Register callbacks for rx/tx drops accounting and updating + * net_device features + */ + vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; + vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; + vlantap->tap.update_features = macvtap_update_features; + + err = netdev_rx_handler_register(dev, tap_handle_frame, &vlantap->tap); + if (err) + return err; + + /* Don't put anything that may fail after macvlan_common_newlink + * because we can't undo what it does. + */ + err = macvlan_common_newlink(src_net, dev, tb, data); + if (err) { + netdev_rx_handler_unregister(dev); + return err; + } + + vlantap->tap.dev = vlantap->vlan.dev; + + return 0; +} + +static void macvtap_dellink(struct net_device *dev, + struct list_head *head) +{ + struct macvtap_dev *vlantap = netdev_priv(dev); + + netdev_rx_handler_unregister(dev); + tap_del_queues(&vlantap->tap); + macvlan_dellink(dev, head); +} + +static void macvtap_setup(struct net_device *dev) +{ + macvlan_common_setup(dev); + dev->tx_queue_len = TUN_READQ_SIZE; +} + +static struct rtnl_link_ops macvtap_link_ops __read_mostly = { + .kind = "macvtap", + .setup = macvtap_setup, + .newlink = macvtap_newlink, + .dellink = macvtap_dellink, + .priv_size = sizeof(struct macvtap_dev), +}; + +static int macvtap_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct macvtap_dev *vlantap; + struct device *classdev; + dev_t devt; + int err; + char tap_name[IFNAMSIZ]; + + if (dev->rtnl_link_ops != &macvtap_link_ops) + return NOTIFY_DONE; + + snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); + vlantap = netdev_priv(dev); + + switch (event) { + case NETDEV_REGISTER: + /* Create the device node here after the network device has + * been registered but before register_netdevice has + * finished running. + */ + err = tap_get_minor(macvtap_major, &vlantap->tap); + if (err) + return notifier_from_errno(err); + + devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); + classdev = device_create(&macvtap_class, &dev->dev, devt, + dev, tap_name); + if (IS_ERR(classdev)) { + tap_free_minor(macvtap_major, &vlantap->tap); + return notifier_from_errno(PTR_ERR(classdev)); + } + err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj, + tap_name); + if (err) + return notifier_from_errno(err); + break; + case NETDEV_UNREGISTER: + /* vlan->minor == 0 if NETDEV_REGISTER above failed */ + if (vlantap->tap.minor == 0) + break; + sysfs_remove_link(&dev->dev.kobj, tap_name); + devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); + device_destroy(&macvtap_class, devt); + tap_free_minor(macvtap_major, &vlantap->tap); + break; + case NETDEV_CHANGE_TX_QUEUE_LEN: + if (tap_queue_resize(&vlantap->tap)) + return NOTIFY_BAD; + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block macvtap_notifier_block __read_mostly = { + .notifier_call = macvtap_device_event, +}; + +static int macvtap_init(void) +{ + int err; + + err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap"); + + if (err) + goto out1; + + err = class_register(&macvtap_class); + if (err) + goto out2; + + err = register_netdevice_notifier(&macvtap_notifier_block); + if (err) + goto out3; + + err = macvlan_link_register(&macvtap_link_ops); + if (err) + goto out4; + + return 0; + +out4: + unregister_netdevice_notifier(&macvtap_notifier_block); +out3: + class_unregister(&macvtap_class); +out2: + tap_destroy_cdev(macvtap_major, &macvtap_cdev); +out1: + return err; +} +module_init(macvtap_init); + +static void macvtap_exit(void) +{ + rtnl_link_unregister(&macvtap_link_ops); + unregister_netdevice_notifier(&macvtap_notifier_block); + class_unregister(&macvtap_class); + tap_destroy_cdev(macvtap_major, &macvtap_cdev); +} +module_exit(macvtap_exit); + +MODULE_ALIAS_RTNL_LINK("macvtap"); +MODULE_AUTHOR("Arnd Bergmann "); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/macvtap_main.c b/drivers/net/macvtap_main.c deleted file mode 100644 index a4bfc10b61dd..000000000000 --- a/drivers/net/macvtap_main.c +++ /dev/null @@ -1,249 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -struct macvtap_dev { - struct macvlan_dev vlan; - struct tap_dev tap; -}; - -/* - * Variables for dealing with macvtaps device numbers. - */ -static dev_t macvtap_major; - -static const void *macvtap_net_namespace(struct device *d) -{ - struct net_device *dev = to_net_dev(d->parent); - return dev_net(dev); -} - -static struct class macvtap_class = { - .name = "macvtap", - .owner = THIS_MODULE, - .ns_type = &net_ns_type_operations, - .namespace = macvtap_net_namespace, -}; -static struct cdev macvtap_cdev; - -#define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ - NETIF_F_TSO6 | NETIF_F_UFO) - -static void macvtap_count_tx_dropped(struct tap_dev *tap) -{ - struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); - struct macvlan_dev *vlan = &vlantap->vlan; - - this_cpu_inc(vlan->pcpu_stats->tx_dropped); -} - -static void macvtap_count_rx_dropped(struct tap_dev *tap) -{ - struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); - struct macvlan_dev *vlan = &vlantap->vlan; - - macvlan_count_rx(vlan, 0, 0, 0); -} - -static void macvtap_update_features(struct tap_dev *tap, - netdev_features_t features) -{ - struct macvtap_dev *vlantap = container_of(tap, struct macvtap_dev, tap); - struct macvlan_dev *vlan = &vlantap->vlan; - - vlan->set_features = features; - netdev_update_features(vlan->dev); -} - -static int macvtap_newlink(struct net *src_net, - struct net_device *dev, - struct nlattr *tb[], - struct nlattr *data[]) -{ - struct macvtap_dev *vlantap = netdev_priv(dev); - int err; - - INIT_LIST_HEAD(&vlantap->tap.queue_list); - - /* Since macvlan supports all offloads by default, make - * tap support all offloads also. - */ - vlantap->tap.tap_features = TUN_OFFLOADS; - - /* Register callbacks for rx/tx drops accounting and updating - * net_device features - */ - vlantap->tap.count_tx_dropped = macvtap_count_tx_dropped; - vlantap->tap.count_rx_dropped = macvtap_count_rx_dropped; - vlantap->tap.update_features = macvtap_update_features; - - err = netdev_rx_handler_register(dev, tap_handle_frame, &vlantap->tap); - if (err) - return err; - - /* Don't put anything that may fail after macvlan_common_newlink - * because we can't undo what it does. - */ - err = macvlan_common_newlink(src_net, dev, tb, data); - if (err) { - netdev_rx_handler_unregister(dev); - return err; - } - - vlantap->tap.dev = vlantap->vlan.dev; - - return 0; -} - -static void macvtap_dellink(struct net_device *dev, - struct list_head *head) -{ - struct macvtap_dev *vlantap = netdev_priv(dev); - - netdev_rx_handler_unregister(dev); - tap_del_queues(&vlantap->tap); - macvlan_dellink(dev, head); -} - -static void macvtap_setup(struct net_device *dev) -{ - macvlan_common_setup(dev); - dev->tx_queue_len = TUN_READQ_SIZE; -} - -static struct rtnl_link_ops macvtap_link_ops __read_mostly = { - .kind = "macvtap", - .setup = macvtap_setup, - .newlink = macvtap_newlink, - .dellink = macvtap_dellink, - .priv_size = sizeof(struct macvtap_dev), -}; - -static int macvtap_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct macvtap_dev *vlantap; - struct device *classdev; - dev_t devt; - int err; - char tap_name[IFNAMSIZ]; - - if (dev->rtnl_link_ops != &macvtap_link_ops) - return NOTIFY_DONE; - - snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); - vlantap = netdev_priv(dev); - - switch (event) { - case NETDEV_REGISTER: - /* Create the device node here after the network device has - * been registered but before register_netdevice has - * finished running. - */ - err = tap_get_minor(macvtap_major, &vlantap->tap); - if (err) - return notifier_from_errno(err); - - devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); - classdev = device_create(&macvtap_class, &dev->dev, devt, - dev, tap_name); - if (IS_ERR(classdev)) { - tap_free_minor(macvtap_major, &vlantap->tap); - return notifier_from_errno(PTR_ERR(classdev)); - } - err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj, - tap_name); - if (err) - return notifier_from_errno(err); - break; - case NETDEV_UNREGISTER: - /* vlan->minor == 0 if NETDEV_REGISTER above failed */ - if (vlantap->tap.minor == 0) - break; - sysfs_remove_link(&dev->dev.kobj, tap_name); - devt = MKDEV(MAJOR(macvtap_major), vlantap->tap.minor); - device_destroy(&macvtap_class, devt); - tap_free_minor(macvtap_major, &vlantap->tap); - break; - case NETDEV_CHANGE_TX_QUEUE_LEN: - if (tap_queue_resize(&vlantap->tap)) - return NOTIFY_BAD; - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block macvtap_notifier_block __read_mostly = { - .notifier_call = macvtap_device_event, -}; - -static int macvtap_init(void) -{ - int err; - - err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap"); - - if (err) - goto out1; - - err = class_register(&macvtap_class); - if (err) - goto out2; - - err = register_netdevice_notifier(&macvtap_notifier_block); - if (err) - goto out3; - - err = macvlan_link_register(&macvtap_link_ops); - if (err) - goto out4; - - return 0; - -out4: - unregister_netdevice_notifier(&macvtap_notifier_block); -out3: - class_unregister(&macvtap_class); -out2: - tap_destroy_cdev(macvtap_major, &macvtap_cdev); -out1: - return err; -} -module_init(macvtap_init); - -static void macvtap_exit(void) -{ - rtnl_link_unregister(&macvtap_link_ops); - unregister_netdevice_notifier(&macvtap_notifier_block); - class_unregister(&macvtap_class); - tap_destroy_cdev(macvtap_major, &macvtap_cdev); -} -module_exit(macvtap_exit); - -MODULE_ALIAS_RTNL_LINK("macvtap"); -MODULE_AUTHOR("Arnd Bergmann "); -MODULE_LICENSE("GPL"); diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 71bbf0b6327d..35b55a2fa1a1 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -312,6 +312,7 @@ void tap_del_queues(struct tap_dev *tap) /* guarantee that any future tap_set_queue will fail */ tap->numvtaps = MAX_TAP_QUEUES; } +EXPORT_SYMBOL_GPL(tap_del_queues); rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { @@ -389,6 +390,7 @@ drop: kfree_skb(skb); return RX_HANDLER_CONSUMED; } +EXPORT_SYMBOL_GPL(tap_handle_frame); static struct major_info *tap_get_major(int major) { @@ -428,6 +430,7 @@ unlock: rcu_read_unlock(); return retval < 0 ? retval : 0; } +EXPORT_SYMBOL_GPL(tap_get_minor); void tap_free_minor(dev_t major, struct tap_dev *tap) { @@ -449,6 +452,7 @@ void tap_free_minor(dev_t major, struct tap_dev *tap) unlock: rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(tap_free_minor); static struct tap_dev *dev_get_by_tap_file(int major, int minor) { @@ -1210,6 +1214,7 @@ int tap_queue_resize(struct tap_dev *tap) kfree(arrays); return ret; } +EXPORT_SYMBOL_GPL(tap_queue_resize); static int tap_list_add(dev_t major, const char *device_name) { @@ -1257,6 +1262,7 @@ out2: out1: return err; } +EXPORT_SYMBOL_GPL(tap_create_cdev); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) { @@ -1272,3 +1278,8 @@ void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) } } } +EXPORT_SYMBOL_GPL(tap_destroy_cdev); + +MODULE_AUTHOR("Arnd Bergmann "); +MODULE_AUTHOR("Sainath Grandhi "); +MODULE_LICENSE("GPL"); diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index 40764ecad9ce..cfdecea5078f 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -1,6 +1,6 @@ config VHOST_NET tristate "Host kernel accelerator for virtio net" - depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) + depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) select VHOST ---help--- This kernel module can be loaded in host kernel to accelerate diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 362e71c16efb..3482c3c2037d 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -1,7 +1,7 @@ #ifndef _LINUX_IF_TAP_H_ #define _LINUX_IF_TAP_H_ -#if IS_ENABLED(CONFIG_MACVTAP) +#if IS_ENABLED(CONFIG_TAP) struct socket *tap_get_socket(struct file *); #else #include @@ -12,7 +12,7 @@ static inline struct socket *tap_get_socket(struct file *f) { return ERR_PTR(-EINVAL); } -#endif /* CONFIG_MACVTAP */ +#endif /* CONFIG_TAP */ #include #include -- cgit v1.2.3 From c3edc4010e9d102eb7b8f17d15c2ebc425fed63c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 08:35:14 +0100 Subject: sched/headers: Move task_struct::signal and task_struct::sighand types and accessors into MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit task_struct::signal and task_struct::sighand are pointers, which would normally make it straightforward to not define those types in sched.h. That is not so, because the types are accompanied by a myriad of APIs (macros and inline functions) that dereference them. Split the types and the APIs out of sched.h and move them into a new header, . With this change sched.h does not know about 'struct signal' and 'struct sighand' anymore, trying to put accessors into sched.h as a test fails the following way: ./include/linux/sched.h: In function ‘test_signal_types’: ./include/linux/sched.h:2461:18: error: dereferencing pointer to incomplete type ‘struct signal_struct’ ^ This reduces the size and complexity of sched.h significantly. Update all headers and .c code that relied on getting the signal handling functionality from to include . The list of affected files in the preparatory patch was partly generated by grepping for the APIs, and partly by doing coverage build testing, both all[yes|mod|def|no]config builds on 64-bit and 32-bit x86, and an array of cross-architecture builds. Nevertheless some (trivial) build breakage is still expected related to rare Kconfig combinations and in-flight patches to various kernel code, but most of it should be handled by this patch. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/nwfpe/fpmodule.c | 2 +- arch/sh/kernel/cpu/sh4/fpu.c | 3 +- drivers/net/tap.c | 2 +- include/linux/sched.h | 499 +----------------------------------------- include/linux/sched/signal.h | 502 +++++++++++++++++++++++++++++++++++++++++++ kernel/cgroup/cgroup-v1.c | 1 + mm/vmalloc.c | 2 +- net/smc/af_smc.c | 2 + net/smc/smc_clc.c | 2 + net/smc/smc_close.c | 2 + net/smc/smc_rx.c | 2 + net/smc/smc_tx.c | 2 + 12 files changed, 520 insertions(+), 501 deletions(-) (limited to 'drivers/net/tap.c') diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c index ec717c190e2c..1365e8650843 100644 --- a/arch/arm/nwfpe/fpmodule.c +++ b/arch/arm/nwfpe/fpmodule.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index 69ab4d3c8d41..95fd2dcb83da 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -10,8 +10,7 @@ * * FIXME! These routines have not been tested for big endian case. */ -#include -#include +#include #include #include #include diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 35b55a2fa1a1..4d4173d25dd0 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/sched.h b/include/linux/sched.h index 7934cd0acbc7..c1586104d4c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -71,6 +71,9 @@ struct blk_plug; struct filename; struct nameidata; +struct signal_struct; +struct sighand_struct; + extern unsigned long total_forks; extern int nr_threads; DECLARE_PER_CPU(unsigned long, process_counts); @@ -361,13 +364,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} #endif -struct sighand_struct { - atomic_t count; - struct k_sigaction action[_NSIG]; - spinlock_t siglock; - wait_queue_head_t signalfd_wqh; -}; - struct pacct_struct { int ac_flag; long ac_exitcode; @@ -485,195 +481,6 @@ struct thread_group_cputimer { #include struct autogroup; -/* - * NOTE! "signal_struct" does not have its own - * locking, because a shared signal_struct always - * implies a shared sighand_struct, so locking - * sighand_struct is always a proper superset of - * the locking of signal_struct. - */ -struct signal_struct { - atomic_t sigcnt; - atomic_t live; - int nr_threads; - struct list_head thread_head; - - wait_queue_head_t wait_chldexit; /* for wait4() */ - - /* current thread group signal load-balancing target: */ - struct task_struct *curr_target; - - /* shared signal handling: */ - struct sigpending shared_pending; - - /* thread group exit support */ - int group_exit_code; - /* overloaded: - * - notify group_exit_task when ->count is equal to notify_count - * - everyone except group_exit_task is stopped during signal delivery - * of fatal signals, group_exit_task processes the signal. - */ - int notify_count; - struct task_struct *group_exit_task; - - /* thread group stop support, overloads group_exit_code too */ - int group_stop_count; - unsigned int flags; /* see SIGNAL_* flags below */ - - /* - * PR_SET_CHILD_SUBREAPER marks a process, like a service - * manager, to re-parent orphan (double-forking) child processes - * to this process instead of 'init'. The service manager is - * able to receive SIGCHLD signals and is able to investigate - * the process until it calls wait(). All children of this - * process will inherit a flag if they should look for a - * child_subreaper process at exit. - */ - unsigned int is_child_subreaper:1; - unsigned int has_child_subreaper:1; - -#ifdef CONFIG_POSIX_TIMERS - - /* POSIX.1b Interval Timers */ - int posix_timer_id; - struct list_head posix_timers; - - /* ITIMER_REAL timer for the process */ - struct hrtimer real_timer; - ktime_t it_real_incr; - - /* - * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use - * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these - * values are defined to 0 and 1 respectively - */ - struct cpu_itimer it[2]; - - /* - * Thread group totals for process CPU timers. - * See thread_group_cputimer(), et al, for details. - */ - struct thread_group_cputimer cputimer; - - /* Earliest-expiration cache. */ - struct task_cputime cputime_expires; - - struct list_head cpu_timers[3]; - -#endif - - struct pid *leader_pid; - -#ifdef CONFIG_NO_HZ_FULL - atomic_t tick_dep_mask; -#endif - - struct pid *tty_old_pgrp; - - /* boolean value for session group leader */ - int leader; - - struct tty_struct *tty; /* NULL if no tty */ - -#ifdef CONFIG_SCHED_AUTOGROUP - struct autogroup *autogroup; -#endif - /* - * Cumulative resource counters for dead threads in the group, - * and for reaped dead child processes forked by this group. - * Live threads maintain their own counters and add to these - * in __exit_signal, except for the group leader. - */ - seqlock_t stats_lock; - u64 utime, stime, cutime, cstime; - u64 gtime; - u64 cgtime; - struct prev_cputime prev_cputime; - unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; - unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; - unsigned long inblock, oublock, cinblock, coublock; - unsigned long maxrss, cmaxrss; - struct task_io_accounting ioac; - - /* - * Cumulative ns of schedule CPU time fo dead threads in the - * group, not including a zombie group leader, (This only differs - * from jiffies_to_ns(utime + stime) if sched_clock uses something - * other than jiffies.) - */ - unsigned long long sum_sched_runtime; - - /* - * We don't bother to synchronize most readers of this at all, - * because there is no reader checking a limit that actually needs - * to get both rlim_cur and rlim_max atomically, and either one - * alone is a single word that can safely be read normally. - * getrlimit/setrlimit use task_lock(current->group_leader) to - * protect this instead of the siglock, because they really - * have no need to disable irqs. - */ - struct rlimit rlim[RLIM_NLIMITS]; - -#ifdef CONFIG_BSD_PROCESS_ACCT - struct pacct_struct pacct; /* per-process accounting information */ -#endif -#ifdef CONFIG_TASKSTATS - struct taskstats *stats; -#endif -#ifdef CONFIG_AUDIT - unsigned audit_tty; - struct tty_audit_buf *tty_audit_buf; -#endif - - /* - * Thread is the potential origin of an oom condition; kill first on - * oom - */ - bool oom_flag_origin; - short oom_score_adj; /* OOM kill score adjustment */ - short oom_score_adj_min; /* OOM kill score adjustment min value. - * Only settable by CAP_SYS_RESOURCE. */ - struct mm_struct *oom_mm; /* recorded mm when the thread group got - * killed by the oom killer */ - - struct mutex cred_guard_mutex; /* guard against foreign influences on - * credential calculations - * (notably. ptrace) */ -}; - -/* - * Bits in flags field of signal_struct. - */ -#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ -#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ -#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ -#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ -/* - * Pending notifications to parent. - */ -#define SIGNAL_CLD_STOPPED 0x00000010 -#define SIGNAL_CLD_CONTINUED 0x00000020 -#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) - -#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ - -#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ - SIGNAL_STOP_CONTINUED) - -static inline void signal_set_stop_flags(struct signal_struct *sig, - unsigned int flags) -{ - WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); - sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; -} - -/* If true, all threads except ->group_exit_task have pending SIGKILL */ -static inline int signal_group_exit(const struct signal_struct *sig) -{ - return (sig->flags & SIGNAL_GROUP_EXIT) || - (sig->group_exit_task != NULL); -} - /* * Some day this will be a full-fledged user tracking system.. */ @@ -2126,190 +1933,8 @@ extern int sched_fork(unsigned long clone_flags, struct task_struct *p); extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); -extern void flush_signals(struct task_struct *); -extern void ignore_signals(struct task_struct *); -extern void flush_signal_handlers(struct task_struct *, int force_default); -extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); - -static inline int kernel_dequeue_signal(siginfo_t *info) -{ - struct task_struct *tsk = current; - siginfo_t __info; - int ret; - - spin_lock_irq(&tsk->sighand->siglock); - ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); - spin_unlock_irq(&tsk->sighand->siglock); - - return ret; -} - -static inline void kernel_signal_stop(void) -{ - spin_lock_irq(¤t->sighand->siglock); - if (current->jobctl & JOBCTL_STOP_DEQUEUED) - __set_current_state(TASK_STOPPED); - spin_unlock_irq(¤t->sighand->siglock); - - schedule(); -} extern void release_task(struct task_struct * p); -extern int send_sig_info(int, struct siginfo *, struct task_struct *); -extern int force_sigsegv(int, struct task_struct *); -extern int force_sig_info(int, struct siginfo *, struct task_struct *); -extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); -extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); -extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, - const struct cred *, u32); -extern int kill_pgrp(struct pid *pid, int sig, int priv); -extern int kill_pid(struct pid *pid, int sig, int priv); -extern int kill_proc_info(int, struct siginfo *, pid_t); -extern __must_check bool do_notify_parent(struct task_struct *, int); -extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); -extern void force_sig(int, struct task_struct *); -extern int send_sig(int, struct task_struct *, int); -extern int zap_other_threads(struct task_struct *p); -extern struct sigqueue *sigqueue_alloc(void); -extern void sigqueue_free(struct sigqueue *); -extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); -extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); - -#ifdef TIF_RESTORE_SIGMASK -/* - * Legacy restore_sigmask accessors. These are inefficient on - * SMP architectures because they require atomic operations. - */ - -/** - * set_restore_sigmask() - make sure saved_sigmask processing gets done - * - * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code - * will run before returning to user mode, to process the flag. For - * all callers, TIF_SIGPENDING is already set or it's no harm to set - * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the - * arch code will notice on return to user mode, in case those bits - * are scarce. We set TIF_SIGPENDING here to ensure that the arch - * signal code always gets run when TIF_RESTORE_SIGMASK is set. - */ -static inline void set_restore_sigmask(void) -{ - set_thread_flag(TIF_RESTORE_SIGMASK); - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); -} -static inline void clear_restore_sigmask(void) -{ - clear_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_restore_sigmask(void) -{ - return test_thread_flag(TIF_RESTORE_SIGMASK); -} -static inline bool test_and_clear_restore_sigmask(void) -{ - return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); -} - -#else /* TIF_RESTORE_SIGMASK */ - -/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ -static inline void set_restore_sigmask(void) -{ - current->restore_sigmask = true; - WARN_ON(!test_thread_flag(TIF_SIGPENDING)); -} -static inline void clear_restore_sigmask(void) -{ - current->restore_sigmask = false; -} -static inline bool test_restore_sigmask(void) -{ - return current->restore_sigmask; -} -static inline bool test_and_clear_restore_sigmask(void) -{ - if (!current->restore_sigmask) - return false; - current->restore_sigmask = false; - return true; -} -#endif - -static inline void restore_saved_sigmask(void) -{ - if (test_and_clear_restore_sigmask()) - __set_current_blocked(¤t->saved_sigmask); -} - -static inline sigset_t *sigmask_to_save(void) -{ - sigset_t *res = ¤t->blocked; - if (unlikely(test_restore_sigmask())) - res = ¤t->saved_sigmask; - return res; -} - -static inline int kill_cad_pid(int sig, int priv) -{ - return kill_pid(cad_pid, sig, priv); -} - -/* These can be the second arg to send_sig_info/send_group_sig_info. */ -#define SEND_SIG_NOINFO ((struct siginfo *) 0) -#define SEND_SIG_PRIV ((struct siginfo *) 1) -#define SEND_SIG_FORCED ((struct siginfo *) 2) - -/* - * True if we are on the alternate signal stack. - */ -static inline int on_sig_stack(unsigned long sp) -{ - /* - * If the signal stack is SS_AUTODISARM then, by construction, we - * can't be on the signal stack unless user code deliberately set - * SS_AUTODISARM when we were already on it. - * - * This improves reliability: if user state gets corrupted such that - * the stack pointer points very close to the end of the signal stack, - * then this check will enable the signal to be handled anyway. - */ - if (current->sas_ss_flags & SS_AUTODISARM) - return 0; - -#ifdef CONFIG_STACK_GROWSUP - return sp >= current->sas_ss_sp && - sp - current->sas_ss_sp < current->sas_ss_size; -#else - return sp > current->sas_ss_sp && - sp - current->sas_ss_sp <= current->sas_ss_size; -#endif -} - -static inline int sas_ss_flags(unsigned long sp) -{ - if (!current->sas_ss_size) - return SS_DISABLE; - - return on_sig_stack(sp) ? SS_ONSTACK : 0; -} - -static inline void sas_ss_reset(struct task_struct *p) -{ - p->sas_ss_sp = 0; - p->sas_ss_size = 0; - p->sas_ss_flags = SS_DISABLE; -} - -static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) -{ - if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) -#ifdef CONFIG_STACK_GROWSUP - return current->sas_ss_sp; -#else - return current->sas_ss_sp + current->sas_ss_size; -#endif - return sp; -} #ifdef CONFIG_HAVE_COPY_THREAD_TLS extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, @@ -2338,10 +1963,8 @@ static inline void exit_thread(struct task_struct *tsk) #endif extern void exit_files(struct task_struct *); -extern void __cleanup_sighand(struct sighand_struct *); extern void exit_itimers(struct signal_struct *); -extern void flush_itimer_signals(void); extern void do_group_exit(int); @@ -2376,81 +1999,6 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif -#define tasklist_empty() \ - list_empty(&init_task.tasks) - -#define next_task(p) \ - list_entry_rcu((p)->tasks.next, struct task_struct, tasks) - -#define for_each_process(p) \ - for (p = &init_task ; (p = next_task(p)) != &init_task ; ) - -extern bool current_is_single_threaded(void); - -/* - * Careful: do_each_thread/while_each_thread is a double loop so - * 'break' will not work as expected - use goto instead. - */ -#define do_each_thread(g, t) \ - for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do - -#define while_each_thread(g, t) \ - while ((t = next_thread(t)) != g) - -#define __for_each_thread(signal, t) \ - list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) - -#define for_each_thread(p, t) \ - __for_each_thread((p)->signal, t) - -/* Careful: this is a double loop, 'break' won't work as expected. */ -#define for_each_process_thread(p, t) \ - for_each_process(p) for_each_thread(p, t) - -typedef int (*proc_visitor)(struct task_struct *p, void *data); -void walk_process_tree(struct task_struct *top, proc_visitor, void *); - -static inline int get_nr_threads(struct task_struct *tsk) -{ - return tsk->signal->nr_threads; -} - -static inline bool thread_group_leader(struct task_struct *p) -{ - return p->exit_signal >= 0; -} - -/* Do to the insanities of de_thread it is possible for a process - * to have the pid of the thread group leader without actually being - * the thread group leader. For iteration through the pids in proc - * all we care about is that we have a task with the appropriate - * pid, we don't actually care if we have the right task. - */ -static inline bool has_group_leader_pid(struct task_struct *p) -{ - return task_pid(p) == p->signal->leader_pid; -} - -static inline -bool same_thread_group(struct task_struct *p1, struct task_struct *p2) -{ - return p1->signal == p2->signal; -} - -static inline struct task_struct *next_thread(const struct task_struct *p) -{ - return list_entry_rcu(p->thread_group.next, - struct task_struct, thread_group); -} - -static inline int thread_group_empty(struct task_struct *p) -{ - return list_empty(&p->thread_group); -} - -#define delay_group_leader(p) \ - (thread_group_leader(p) && !thread_group_empty(p)) - /* * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also @@ -2471,25 +2019,6 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } -extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, - unsigned long *flags); - -static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, - unsigned long *flags) -{ - struct sighand_struct *ret; - - ret = __lock_task_sighand(tsk, flags); - (void)__cond_lock(&tsk->sighand->siglock, ret); - return ret; -} - -static inline void unlock_task_sighand(struct task_struct *tsk, - unsigned long *flags) -{ - spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); -} - #ifdef CONFIG_THREAD_INFO_IN_TASK static inline struct thread_info *task_thread_info(struct task_struct *task) @@ -2862,28 +2391,6 @@ static inline void mm_update_next_owner(struct mm_struct *mm) } #endif /* CONFIG_MEMCG */ -static inline unsigned long task_rlimit(const struct task_struct *tsk, - unsigned int limit) -{ - return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); -} - -static inline unsigned long task_rlimit_max(const struct task_struct *tsk, - unsigned int limit) -{ - return READ_ONCE(tsk->signal->rlim[limit].rlim_max); -} - -static inline unsigned long rlimit(unsigned int limit) -{ - return task_rlimit(current, limit); -} - -static inline unsigned long rlimit_max(unsigned int limit) -{ - return task_rlimit_max(current, limit); -} - #define SCHED_CPUFREQ_RT (1U << 0) #define SCHED_CPUFREQ_DL (1U << 1) #define SCHED_CPUFREQ_IOWAIT (1U << 2) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index c6958a53fef3..53fe5450f431 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -8,4 +8,506 @@ #include #include +/* + * Types defining task->signal and task->sighand and APIs using them: + */ + +struct sighand_struct { + atomic_t count; + struct k_sigaction action[_NSIG]; + spinlock_t siglock; + wait_queue_head_t signalfd_wqh; +}; + +/* + * NOTE! "signal_struct" does not have its own + * locking, because a shared signal_struct always + * implies a shared sighand_struct, so locking + * sighand_struct is always a proper superset of + * the locking of signal_struct. + */ +struct signal_struct { + atomic_t sigcnt; + atomic_t live; + int nr_threads; + struct list_head thread_head; + + wait_queue_head_t wait_chldexit; /* for wait4() */ + + /* current thread group signal load-balancing target: */ + struct task_struct *curr_target; + + /* shared signal handling: */ + struct sigpending shared_pending; + + /* thread group exit support */ + int group_exit_code; + /* overloaded: + * - notify group_exit_task when ->count is equal to notify_count + * - everyone except group_exit_task is stopped during signal delivery + * of fatal signals, group_exit_task processes the signal. + */ + int notify_count; + struct task_struct *group_exit_task; + + /* thread group stop support, overloads group_exit_code too */ + int group_stop_count; + unsigned int flags; /* see SIGNAL_* flags below */ + + /* + * PR_SET_CHILD_SUBREAPER marks a process, like a service + * manager, to re-parent orphan (double-forking) child processes + * to this process instead of 'init'. The service manager is + * able to receive SIGCHLD signals and is able to investigate + * the process until it calls wait(). All children of this + * process will inherit a flag if they should look for a + * child_subreaper process at exit. + */ + unsigned int is_child_subreaper:1; + unsigned int has_child_subreaper:1; + +#ifdef CONFIG_POSIX_TIMERS + + /* POSIX.1b Interval Timers */ + int posix_timer_id; + struct list_head posix_timers; + + /* ITIMER_REAL timer for the process */ + struct hrtimer real_timer; + ktime_t it_real_incr; + + /* + * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use + * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these + * values are defined to 0 and 1 respectively + */ + struct cpu_itimer it[2]; + + /* + * Thread group totals for process CPU timers. + * See thread_group_cputimer(), et al, for details. + */ + struct thread_group_cputimer cputimer; + + /* Earliest-expiration cache. */ + struct task_cputime cputime_expires; + + struct list_head cpu_timers[3]; + +#endif + + struct pid *leader_pid; + +#ifdef CONFIG_NO_HZ_FULL + atomic_t tick_dep_mask; +#endif + + struct pid *tty_old_pgrp; + + /* boolean value for session group leader */ + int leader; + + struct tty_struct *tty; /* NULL if no tty */ + +#ifdef CONFIG_SCHED_AUTOGROUP + struct autogroup *autogroup; +#endif + /* + * Cumulative resource counters for dead threads in the group, + * and for reaped dead child processes forked by this group. + * Live threads maintain their own counters and add to these + * in __exit_signal, except for the group leader. + */ + seqlock_t stats_lock; + u64 utime, stime, cutime, cstime; + u64 gtime; + u64 cgtime; + struct prev_cputime prev_cputime; + unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; + unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; + unsigned long inblock, oublock, cinblock, coublock; + unsigned long maxrss, cmaxrss; + struct task_io_accounting ioac; + + /* + * Cumulative ns of schedule CPU time fo dead threads in the + * group, not including a zombie group leader, (This only differs + * from jiffies_to_ns(utime + stime) if sched_clock uses something + * other than jiffies.) + */ + unsigned long long sum_sched_runtime; + + /* + * We don't bother to synchronize most readers of this at all, + * because there is no reader checking a limit that actually needs + * to get both rlim_cur and rlim_max atomically, and either one + * alone is a single word that can safely be read normally. + * getrlimit/setrlimit use task_lock(current->group_leader) to + * protect this instead of the siglock, because they really + * have no need to disable irqs. + */ + struct rlimit rlim[RLIM_NLIMITS]; + +#ifdef CONFIG_BSD_PROCESS_ACCT + struct pacct_struct pacct; /* per-process accounting information */ +#endif +#ifdef CONFIG_TASKSTATS + struct taskstats *stats; +#endif +#ifdef CONFIG_AUDIT + unsigned audit_tty; + struct tty_audit_buf *tty_audit_buf; +#endif + + /* + * Thread is the potential origin of an oom condition; kill first on + * oom + */ + bool oom_flag_origin; + short oom_score_adj; /* OOM kill score adjustment */ + short oom_score_adj_min; /* OOM kill score adjustment min value. + * Only settable by CAP_SYS_RESOURCE. */ + struct mm_struct *oom_mm; /* recorded mm when the thread group got + * killed by the oom killer */ + + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ +}; + +/* + * Bits in flags field of signal_struct. + */ +#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */ +#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */ +#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */ +#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */ +/* + * Pending notifications to parent. + */ +#define SIGNAL_CLD_STOPPED 0x00000010 +#define SIGNAL_CLD_CONTINUED 0x00000020 +#define SIGNAL_CLD_MASK (SIGNAL_CLD_STOPPED|SIGNAL_CLD_CONTINUED) + +#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ + +#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ + SIGNAL_STOP_CONTINUED) + +static inline void signal_set_stop_flags(struct signal_struct *sig, + unsigned int flags) +{ + WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); + sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; +} + +/* If true, all threads except ->group_exit_task have pending SIGKILL */ +static inline int signal_group_exit(const struct signal_struct *sig) +{ + return (sig->flags & SIGNAL_GROUP_EXIT) || + (sig->group_exit_task != NULL); +} + +extern void flush_signals(struct task_struct *); +extern void ignore_signals(struct task_struct *); +extern void flush_signal_handlers(struct task_struct *, int force_default); +extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); + +static inline int kernel_dequeue_signal(siginfo_t *info) +{ + struct task_struct *tsk = current; + siginfo_t __info; + int ret; + + spin_lock_irq(&tsk->sighand->siglock); + ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); + spin_unlock_irq(&tsk->sighand->siglock); + + return ret; +} + +static inline void kernel_signal_stop(void) +{ + spin_lock_irq(¤t->sighand->siglock); + if (current->jobctl & JOBCTL_STOP_DEQUEUED) + __set_current_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); + + schedule(); +} +extern int send_sig_info(int, struct siginfo *, struct task_struct *); +extern int force_sigsegv(int, struct task_struct *); +extern int force_sig_info(int, struct siginfo *, struct task_struct *); +extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp); +extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid); +extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, + const struct cred *, u32); +extern int kill_pgrp(struct pid *pid, int sig, int priv); +extern int kill_pid(struct pid *pid, int sig, int priv); +extern int kill_proc_info(int, struct siginfo *, pid_t); +extern __must_check bool do_notify_parent(struct task_struct *, int); +extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); +extern void force_sig(int, struct task_struct *); +extern int send_sig(int, struct task_struct *, int); +extern int zap_other_threads(struct task_struct *p); +extern struct sigqueue *sigqueue_alloc(void); +extern void sigqueue_free(struct sigqueue *); +extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group); +extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *); + +#ifdef TIF_RESTORE_SIGMASK +/* + * Legacy restore_sigmask accessors. These are inefficient on + * SMP architectures because they require atomic operations. + */ + +/** + * set_restore_sigmask() - make sure saved_sigmask processing gets done + * + * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code + * will run before returning to user mode, to process the flag. For + * all callers, TIF_SIGPENDING is already set or it's no harm to set + * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the + * arch code will notice on return to user mode, in case those bits + * are scarce. We set TIF_SIGPENDING here to ensure that the arch + * signal code always gets run when TIF_RESTORE_SIGMASK is set. + */ +static inline void set_restore_sigmask(void) +{ + set_thread_flag(TIF_RESTORE_SIGMASK); + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); +} +static inline void clear_restore_sigmask(void) +{ + clear_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_restore_sigmask(void) +{ + return test_thread_flag(TIF_RESTORE_SIGMASK); +} +static inline bool test_and_clear_restore_sigmask(void) +{ + return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK); +} + +#else /* TIF_RESTORE_SIGMASK */ + +/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */ +static inline void set_restore_sigmask(void) +{ + current->restore_sigmask = true; + WARN_ON(!test_thread_flag(TIF_SIGPENDING)); +} +static inline void clear_restore_sigmask(void) +{ + current->restore_sigmask = false; +} +static inline bool test_restore_sigmask(void) +{ + return current->restore_sigmask; +} +static inline bool test_and_clear_restore_sigmask(void) +{ + if (!current->restore_sigmask) + return false; + current->restore_sigmask = false; + return true; +} +#endif + +static inline void restore_saved_sigmask(void) +{ + if (test_and_clear_restore_sigmask()) + __set_current_blocked(¤t->saved_sigmask); +} + +static inline sigset_t *sigmask_to_save(void) +{ + sigset_t *res = ¤t->blocked; + if (unlikely(test_restore_sigmask())) + res = ¤t->saved_sigmask; + return res; +} + +static inline int kill_cad_pid(int sig, int priv) +{ + return kill_pid(cad_pid, sig, priv); +} + +/* These can be the second arg to send_sig_info/send_group_sig_info. */ +#define SEND_SIG_NOINFO ((struct siginfo *) 0) +#define SEND_SIG_PRIV ((struct siginfo *) 1) +#define SEND_SIG_FORCED ((struct siginfo *) 2) + +/* + * True if we are on the alternate signal stack. + */ +static inline int on_sig_stack(unsigned long sp) +{ + /* + * If the signal stack is SS_AUTODISARM then, by construction, we + * can't be on the signal stack unless user code deliberately set + * SS_AUTODISARM when we were already on it. + * + * This improves reliability: if user state gets corrupted such that + * the stack pointer points very close to the end of the signal stack, + * then this check will enable the signal to be handled anyway. + */ + if (current->sas_ss_flags & SS_AUTODISARM) + return 0; + +#ifdef CONFIG_STACK_GROWSUP + return sp >= current->sas_ss_sp && + sp - current->sas_ss_sp < current->sas_ss_size; +#else + return sp > current->sas_ss_sp && + sp - current->sas_ss_sp <= current->sas_ss_size; +#endif +} + +static inline int sas_ss_flags(unsigned long sp) +{ + if (!current->sas_ss_size) + return SS_DISABLE; + + return on_sig_stack(sp) ? SS_ONSTACK : 0; +} + +static inline void sas_ss_reset(struct task_struct *p) +{ + p->sas_ss_sp = 0; + p->sas_ss_size = 0; + p->sas_ss_flags = SS_DISABLE; +} + +static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) +{ + if (unlikely((ksig->ka.sa.sa_flags & SA_ONSTACK)) && ! sas_ss_flags(sp)) +#ifdef CONFIG_STACK_GROWSUP + return current->sas_ss_sp; +#else + return current->sas_ss_sp + current->sas_ss_size; +#endif + return sp; +} + +extern void __cleanup_sighand(struct sighand_struct *); +extern void flush_itimer_signals(void); + +#define tasklist_empty() \ + list_empty(&init_task.tasks) + +#define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) + +#define for_each_process(p) \ + for (p = &init_task ; (p = next_task(p)) != &init_task ; ) + +extern bool current_is_single_threaded(void); + +/* + * Careful: do_each_thread/while_each_thread is a double loop so + * 'break' will not work as expected - use goto instead. + */ +#define do_each_thread(g, t) \ + for (g = t = &init_task ; (g = t = next_task(g)) != &init_task ; ) do + +#define while_each_thread(g, t) \ + while ((t = next_thread(t)) != g) + +#define __for_each_thread(signal, t) \ + list_for_each_entry_rcu(t, &(signal)->thread_head, thread_node) + +#define for_each_thread(p, t) \ + __for_each_thread((p)->signal, t) + +/* Careful: this is a double loop, 'break' won't work as expected. */ +#define for_each_process_thread(p, t) \ + for_each_process(p) for_each_thread(p, t) + +typedef int (*proc_visitor)(struct task_struct *p, void *data); +void walk_process_tree(struct task_struct *top, proc_visitor, void *); + +static inline int get_nr_threads(struct task_struct *tsk) +{ + return tsk->signal->nr_threads; +} + +static inline bool thread_group_leader(struct task_struct *p) +{ + return p->exit_signal >= 0; +} + +/* Do to the insanities of de_thread it is possible for a process + * to have the pid of the thread group leader without actually being + * the thread group leader. For iteration through the pids in proc + * all we care about is that we have a task with the appropriate + * pid, we don't actually care if we have the right task. + */ +static inline bool has_group_leader_pid(struct task_struct *p) +{ + return task_pid(p) == p->signal->leader_pid; +} + +static inline +bool same_thread_group(struct task_struct *p1, struct task_struct *p2) +{ + return p1->signal == p2->signal; +} + +static inline struct task_struct *next_thread(const struct task_struct *p) +{ + return list_entry_rcu(p->thread_group.next, + struct task_struct, thread_group); +} + +static inline int thread_group_empty(struct task_struct *p) +{ + return list_empty(&p->thread_group); +} + +#define delay_group_leader(p) \ + (thread_group_leader(p) && !thread_group_empty(p)) + +extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, + unsigned long *flags); + +static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + struct sighand_struct *ret; + + ret = __lock_task_sighand(tsk, flags); + (void)__cond_lock(&tsk->sighand->siglock, ret); + return ret; +} + +static inline void unlock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + spin_unlock_irqrestore(&tsk->sighand->siglock, *flags); +} + +static inline unsigned long task_rlimit(const struct task_struct *tsk, + unsigned int limit) +{ + return READ_ONCE(tsk->signal->rlim[limit].rlim_cur); +} + +static inline unsigned long task_rlimit_max(const struct task_struct *tsk, + unsigned int limit) +{ + return READ_ONCE(tsk->signal->rlim[limit].rlim_max); +} + +static inline unsigned long rlimit(unsigned int limit) +{ + return task_rlimit(current, limit); +} + +static inline unsigned long rlimit_max(unsigned int limit) +{ + return task_rlimit_max(current, limit); +} + #endif /* _LINUX_SCHED_SIGNAL_H */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index fc34bcf2329f..08d2cb605101 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/vmalloc.c b/mm/vmalloc.c index be93949b4885..b4024d688f38 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5d4208ad029e..85837ab90e89 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -27,6 +27,8 @@ #include #include #include +#include + #include #include #include diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index cc6b6f8651eb..e41f594a1e1d 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -11,6 +11,8 @@ #include #include +#include + #include #include diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 03dfcc6b7661..67a71d170bed 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -9,6 +9,8 @@ */ #include +#include + #include #include "smc.h" diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 5d1878732f46..c4ef9a4ec569 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -11,6 +11,8 @@ #include #include +#include + #include #include "smc.h" diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 6e73b28915ea..69a0013dd25c 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -15,6 +15,8 @@ #include #include #include +#include + #include #include "smc.h" -- cgit v1.2.3