diff options
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r-- | drivers/net/tun.c | 309 |
1 files changed, 111 insertions, 198 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 28624cca91f8..cc6c50180663 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -83,6 +83,8 @@ #include <linux/uaccess.h> #include <linux/proc_fs.h> +#include "tun_vnet.h" + static void tun_default_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd); @@ -94,9 +96,6 @@ static void tun_default_link_ksettings(struct net_device *dev, * overload it to mean fasync when stored there. */ #define TUN_FASYNC IFF_ATTACH_QUEUE -/* High bits in flags field are unused. */ -#define TUN_VNET_LE 0x80000000 -#define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS) @@ -187,7 +186,8 @@ struct tun_struct { struct net_device *dev; netdev_features_t set_features; #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ - NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4) + NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4 | \ + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM) int align; int vnet_hdr_sz; @@ -298,70 +298,6 @@ static bool tun_napi_frags_enabled(const struct tun_file *tfile) return tfile->napi_frags_enabled; } -#ifdef CONFIG_TUN_VNET_CROSS_LE -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) -{ - return tun->flags & TUN_VNET_BE ? false : - virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) -{ - int be = !!(tun->flags & TUN_VNET_BE); - - if (put_user(be, argp)) - return -EFAULT; - - return 0; -} - -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) -{ - int be; - - if (get_user(be, argp)) - return -EFAULT; - - if (be) - tun->flags |= TUN_VNET_BE; - else - tun->flags &= ~TUN_VNET_BE; - - return 0; -} -#else -static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) -{ - return virtio_legacy_is_little_endian(); -} - -static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} - -static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp) -{ - return -EINVAL; -} -#endif /* CONFIG_TUN_VNET_CROSS_LE */ - -static inline bool tun_is_little_endian(struct tun_struct *tun) -{ - return tun->flags & TUN_VNET_LE || - tun_legacy_is_little_endian(tun); -} - -static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val) -{ - return __virtio16_to_cpu(tun_is_little_endian(tun), val); -} - -static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val) -{ - return __cpu_to_virtio16(tun_is_little_endian(tun), val); -} - static inline u32 tun_hashfn(u32 rxhash) { return rxhash & TUN_MASK_FLOW_ENTRIES; @@ -442,7 +378,7 @@ static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index) static void tun_flow_cleanup(struct timer_list *t) { - struct tun_struct *tun = from_timer(tun, t, flow_gc_timer); + struct tun_struct *tun = timer_container_of(tun, t, flow_gc_timer); unsigned long delay = tun->ageing_time; unsigned long next_timer = jiffies + delay; unsigned long count = 0; @@ -574,18 +510,14 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, return ret; } -static inline bool tun_capable(struct tun_struct *tun) +static inline bool tun_not_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); struct net *net = dev_net(tun->dev); - if (ns_capable(net->user_ns, CAP_NET_ADMIN)) - return 1; - if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner)) - return 1; - if (gid_valid(tun->group) && in_egroup_p(tun->group)) - return 1; - return 0; + return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || + (gid_valid(tun->group) && !in_egroup_p(tun->group))) && + !ns_capable(net->user_ns, CAP_NET_ADMIN); } static void tun_set_real_num_queues(struct tun_struct *tun) @@ -994,6 +926,7 @@ static int tun_net_init(struct net_device *dev) dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; + dev->hw_enc_features = dev->hw_features; dev->features = dev->hw_features; dev->vlan_features = dev->features & ~(NETIF_F_HW_VLAN_CTAG_TX | @@ -1069,8 +1002,8 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun, /* Net device start xmit */ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { + enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct tun_struct *tun = netdev_priv(dev); - enum skb_drop_reason drop_reason; int txq = skb->queue_mapping; struct netdev_queue *queue; struct tun_file *tfile; @@ -1099,10 +1032,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) } if (tfile->socket.sk->sk_filter && - sk_filter(tfile->socket.sk, skb)) { - drop_reason = SKB_DROP_REASON_SOCKET_FILTER; + sk_filter_reason(tfile->socket.sk, skb, &drop_reason)) goto drop; - } len = run_ebpf_filter(tun, skb, len); if (len == 0) { @@ -1364,7 +1295,7 @@ static void tun_flow_init(struct tun_struct *tun) static void tun_flow_uninit(struct tun_struct *tun) { - del_timer_sync(&tun->flow_gc_timer); + timer_delete_sync(&tun->flow_gc_timer); tun_flow_flush(tun); } @@ -1604,7 +1535,8 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile, static struct sk_buff *__tun_build_skb(struct tun_file *tfile, struct page_frag *alloc_frag, char *buf, - int buflen, int len, int pad) + int buflen, int len, int pad, + int metasize) { struct sk_buff *skb = build_skb(buf, buflen); @@ -1613,6 +1545,8 @@ static struct sk_buff *__tun_build_skb(struct tun_file *tfile, skb_reserve(skb, pad); skb_put(skb, len); + if (metasize) + skb_metadata_set(skb, metasize); skb_set_owner_w(skb, tfile->socket.sk); get_page(alloc_frag->page); @@ -1672,6 +1606,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, char *buf; size_t copied; int pad = TUN_RX_PAD; + int metasize = 0; int err = 0; rcu_read_lock(); @@ -1699,7 +1634,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, if (hdr->gso_type || !xdp_prog) { *skb_xdp = 1; return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, - pad); + pad, metasize); } *skb_xdp = 0; @@ -1713,7 +1648,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, u32 act; xdp_init_buff(&xdp, buflen, &tfile->xdp_rxq); - xdp_prepare_buff(&xdp, buf, pad, len, false); + xdp_prepare_buff(&xdp, buf, pad, len, true); act = bpf_prog_run_xdp(xdp_prog, &xdp); if (act == XDP_REDIRECT || act == XDP_TX) { @@ -1734,12 +1669,18 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, pad = xdp.data - xdp.data_hard_start; len = xdp.data_end - xdp.data; + + /* It is known that the xdp_buff was prepared with metadata + * support, so the metasize will never be negative. + */ + metasize = xdp.data - xdp.data_meta; } bpf_net_ctx_clear(bpf_net_ctx); rcu_read_unlock(); local_bh_enable(); - return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad); + return __tun_build_skb(tfile, alloc_frag, buf, buflen, len, pad, + metasize); out: bpf_net_ctx_clear(bpf_net_ctx); @@ -1757,15 +1698,26 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, struct sk_buff *skb; size_t total_len = iov_iter_count(from); size_t len = total_len, align = tun->align, linear; - struct virtio_net_hdr gso = { 0 }; + struct virtio_net_hdr_v1_hash_tunnel hdr; + struct virtio_net_hdr *gso; int good_linear; int copylen; + int hdr_len = 0; bool zerocopy = false; int err; u32 rxhash = 0; int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tfile); enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; + netdev_features_t features = 0; + + /* + * Keep it easy and always zero the whole buffer, even if the + * tunnel-related field will be touched only when the feature + * is enabled and the hdr size id compatible. + */ + memset(&hdr, 0, sizeof(hdr)); + gso = (struct virtio_net_hdr *)&hdr; if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) @@ -1779,26 +1731,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (tun->flags & IFF_VNET_HDR) { int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - if (len < vnet_hdr_sz) - return -EINVAL; - len -= vnet_hdr_sz; - - if (!copy_from_iter_full(&gso, sizeof(gso), from)) - return -EFAULT; - - if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && - tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len)) - gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2); + features = tun_vnet_hdr_guest_features(vnet_hdr_sz); + hdr_len = __tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, + features, from, gso); + if (hdr_len < 0) + return hdr_len; - if (tun16_to_cpu(tun, gso.hdr_len) > len) - return -EINVAL; - iov_iter_advance(from, vnet_hdr_sz - sizeof(gso)); + len -= vnet_hdr_sz; } if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) { align += NET_IP_ALIGN; - if (unlikely(len < ETH_HLEN || - (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN))) + if (unlikely(len < ETH_HLEN || (hdr_len && hdr_len < ETH_HLEN))) return -EINVAL; } @@ -1811,9 +1755,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ - copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN; - if (copylen > good_linear) - copylen = good_linear; + copylen = min(hdr_len ? hdr_len : GOODCOPY_LEN, good_linear); linear = copylen; iov_iter_advance(&i, copylen); if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) @@ -1825,7 +1767,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, * (e.g gso or jumbo packet), we will do it at after * skb was created with generic XDP routine. */ - skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp); + skb = tun_build_skb(tun, tfile, from, gso, len, &skb_xdp); err = PTR_ERR_OR_ZERO(skb); if (err) goto drop; @@ -1834,10 +1776,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } else { if (!zerocopy) { copylen = len; - if (tun16_to_cpu(tun, gso.hdr_len) > good_linear) - linear = good_linear; - else - linear = tun16_to_cpu(tun, gso.hdr_len); + linear = min(hdr_len, good_linear); } if (frags) { @@ -1872,7 +1811,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } } - if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) { + if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, &hdr)) { atomic_long_inc(&tun->rx_frame_errors); err = -EINVAL; goto free_skb; @@ -2067,18 +2006,15 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun, { int vnet_hdr_sz = 0; size_t size = xdp_frame->len; - size_t ret; + ssize_t ret; if (tun->flags & IFF_VNET_HDR) { struct virtio_net_hdr gso = { 0 }; vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); - if (unlikely(iov_iter_count(iter) < vnet_hdr_sz)) - return -EINVAL; - if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) != - sizeof(gso))) - return -EFAULT; - iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); + if (ret) + return ret; } ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz; @@ -2101,6 +2037,7 @@ static ssize_t tun_put_user(struct tun_struct *tun, int vlan_offset = 0; int vlan_hlen = 0; int vnet_hdr_sz = 0; + int ret; if (skb_vlan_tag_present(skb)) vlan_hlen = VLAN_HLEN; @@ -2125,33 +2062,23 @@ static ssize_t tun_put_user(struct tun_struct *tun, } if (vnet_hdr_sz) { - struct virtio_net_hdr gso; - - if (iov_iter_count(iter) < vnet_hdr_sz) - return -EINVAL; - - if (virtio_net_hdr_from_skb(skb, &gso, - tun_is_little_endian(tun), true, - vlan_hlen)) { - struct skb_shared_info *sinfo = skb_shinfo(skb); - - if (net_ratelimit()) { - netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", - sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size), - tun16_to_cpu(tun, gso.hdr_len)); - print_hex_dump(KERN_ERR, "tun: ", - DUMP_PREFIX_NONE, - 16, 1, skb->head, - min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true); - } - WARN_ON_ONCE(1); - return -EINVAL; - } + struct virtio_net_hdr_v1_hash_tunnel hdr; + struct virtio_net_hdr *gso; - if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso)) - return -EFAULT; + ret = tun_vnet_hdr_tnl_from_skb(tun->flags, tun->dev, skb, + &hdr); + if (ret) + return ret; - iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + /* + * Drop the packet if the configured header size is too small + * WRT the enabled offloads. + */ + gso = (struct virtio_net_hdr *)&hdr; + ret = __tun_vnet_hdr_put(vnet_hdr_sz, tun->dev->features, + iter, gso); + if (ret) + return ret; } if (vlan_hlen) { @@ -2449,13 +2376,15 @@ static int tun_xdp_one(struct tun_struct *tun, struct tun_page *tpage) { unsigned int datasize = xdp->data_end - xdp->data; - struct tun_xdp_hdr *hdr = xdp->data_hard_start; - struct virtio_net_hdr *gso = &hdr->gso; + struct virtio_net_hdr *gso = xdp->data_hard_start; + struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr; struct bpf_prog *xdp_prog; struct sk_buff *skb = NULL; struct sk_buff_head *queue; + netdev_features_t features; u32 rxhash = 0, act; - int buflen = hdr->buflen; + int buflen = xdp->frame_sz; + int metasize = 0; int ret = 0; bool skb_xdp = false; struct page *page; @@ -2471,7 +2400,6 @@ static int tun_xdp_one(struct tun_struct *tun, } xdp_init_buff(xdp, buflen, &tfile->xdp_rxq); - xdp_set_data_meta_invalid(xdp); act = bpf_prog_run_xdp(xdp_prog, xdp); ret = tun_xdp_act(tun, xdp_prog, xdp, act); @@ -2511,7 +2439,17 @@ build: skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); - if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) { + /* The externally provided xdp_buff may have no metadata support, which + * is marked by xdp->data_meta being xdp->data + 1. This will lead to a + * metasize of -1 and is the reason why the condition checks for > 0. + */ + metasize = xdp->data - xdp->data_meta; + if (metasize > 0) + skb_metadata_set(skb, metasize); + + features = tun_vnet_hdr_guest_features(READ_ONCE(tun->vnet_hdr_sz)); + tnl_hdr = (struct virtio_net_hdr_v1_hash_tunnel *)gso; + if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, tnl_hdr)) { atomic_long_inc(&tun->rx_frame_errors); kfree_skb(skb); ret = -EINVAL; @@ -2782,7 +2720,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) !!(tun->flags & IFF_MULTI_QUEUE)) return -EINVAL; - if (!tun_capable(tun)) + if (tun_not_capable(tun)) return -EPERM; err = security_tun_dev_open(tun->security); if (err < 0) @@ -2897,6 +2835,8 @@ static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) } +#define PLAIN_GSO (NETIF_F_GSO_UDP_L4 | NETIF_F_TSO | NETIF_F_TSO6) + /* This is like a cut-down ethtool ops, except done via tun fd so no * privs required. */ static int set_offload(struct tun_struct *tun, unsigned long arg) @@ -2926,6 +2866,18 @@ static int set_offload(struct tun_struct *tun, unsigned long arg) features |= NETIF_F_GSO_UDP_L4; arg &= ~(TUN_F_USO4 | TUN_F_USO6); } + + /* + * Tunnel offload is allowed only if some plain offload is + * available, too. + */ + if (features & PLAIN_GSO && arg & TUN_F_UDP_TUNNEL_GSO) { + features |= NETIF_F_GSO_UDP_TUNNEL; + if (arg & TUN_F_UDP_TUNNEL_GSO_CSUM) + features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + arg &= ~(TUN_F_UDP_TUNNEL_GSO | + TUN_F_UDP_TUNNEL_GSO_CSUM); + } } /* This gives the user a way to test for new features in future by @@ -3095,8 +3047,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, kgid_t group; int ifindex; int sndbuf; - int vnet_hdr_sz; - int le; int ret; bool do_notify = false; @@ -3273,14 +3223,20 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case SIOCGIFHWADDR: /* Get hw address */ - dev_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name); + netif_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name); if (copy_to_user(argp, &ifr, ifreq_len)) ret = -EFAULT; break; case SIOCSIFHWADDR: /* Set hw address */ - ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL); + if (tun->dev->addr_len > sizeof(ifr.ifr_hwaddr)) { + ret = -EINVAL; + break; + } + ret = dev_set_mac_address_user(tun->dev, + (struct sockaddr_storage *)&ifr.ifr_hwaddr, + NULL); break; case TUNGETSNDBUF: @@ -3303,50 +3259,6 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tun_set_sndbuf(tun); break; - case TUNGETVNETHDRSZ: - vnet_hdr_sz = tun->vnet_hdr_sz; - if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz))) - ret = -EFAULT; - break; - - case TUNSETVNETHDRSZ: - if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) { - ret = -EFAULT; - break; - } - if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) { - ret = -EINVAL; - break; - } - - tun->vnet_hdr_sz = vnet_hdr_sz; - break; - - case TUNGETVNETLE: - le = !!(tun->flags & TUN_VNET_LE); - if (put_user(le, (int __user *)argp)) - ret = -EFAULT; - break; - - case TUNSETVNETLE: - if (get_user(le, (int __user *)argp)) { - ret = -EFAULT; - break; - } - if (le) - tun->flags |= TUN_VNET_LE; - else - tun->flags &= ~TUN_VNET_LE; - break; - - case TUNGETVNETBE: - ret = tun_get_vnet_be(tun, argp); - break; - - case TUNSETVNETBE: - ret = tun_set_vnet_be(tun, argp); - break; - case TUNATTACHFILTER: /* Can be set only for TAPs */ ret = -EINVAL; @@ -3402,7 +3314,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, break; default: - ret = -EINVAL; + ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp); break; } @@ -3820,3 +3732,4 @@ MODULE_AUTHOR(DRV_COPYRIGHT); MODULE_LICENSE("GPL"); MODULE_ALIAS_MISCDEV(TUN_MINOR); MODULE_ALIAS("devname:net/tun"); +MODULE_IMPORT_NS("NETDEV_INTERNAL"); |