summaryrefslogtreecommitdiff
path: root/drivers/net/virtio_net.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-03-05 22:29:24 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-03-05 22:29:24 +0300
commit547046141f44dba075207fd343e3e032e129c9ac (patch)
tree3979961d838def5efa9f3835d19e05d60b3b4d88 /drivers/net/virtio_net.c
parent661e50bc853209e41a5c14a290ca4decc43cbfd1 (diff)
parenta7f0fb1bfb66ded5d556d6723d691b77a7146b6f (diff)
downloadlinux-547046141f44dba075207fd343e3e032e129c9ac.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) Use an appropriate TSQ pacing shift in mac80211, from Toke Høiland-Jørgensen. 2) Just like ipv4's ip_route_me_harder(), we have to use skb_to_full_sk in ip6_route_me_harder, from Eric Dumazet. 3) Fix several shutdown races and similar other problems in l2tp, from James Chapman. 4) Handle missing XDP flush properly in tuntap, for real this time. From Jason Wang. 5) Out-of-bounds access in powerpc ebpf tailcalls, from Daniel Borkmann. 6) Fix phy_resume() locking, from Andrew Lunn. 7) IFLA_MTU values are ignored on newlink for some tunnel types, fix from Xin Long. 8) Revert F-RTO middle box workarounds, they only handle one dimension of the problem. From Yuchung Cheng. 9) Fix socket refcounting in RDS, from Ka-Cheong Poon. 10) Don't allow ppp unit registration to an unregistered channel, from Guillaume Nault. 11) Various hv_netvsc fixes from Stephen Hemminger. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (98 commits) hv_netvsc: propagate rx filters to VF hv_netvsc: filter multicast/broadcast hv_netvsc: defer queue selection to VF hv_netvsc: use napi_schedule_irqoff hv_netvsc: fix race in napi poll when rescheduling hv_netvsc: cancel subchannel setup before halting device hv_netvsc: fix error unwind handling if vmbus_open fails hv_netvsc: only wake transmit queue if link is up hv_netvsc: avoid retry on send during shutdown virtio-net: re enable XDP_REDIRECT for mergeable buffer ppp: prevent unregistered channels from connecting to PPP units tc-testing: skbmod: fix match value of ethertype mlxsw: spectrum_switchdev: Check success of FDB add operation net: make skb_gso_*_seglen functions private net: xfrm: use skb_gso_validate_network_len() to check gso sizes net: sched: tbf: handle GSO_BY_FRAGS case in enqueue net: rename skb_gso_validate_mtu -> skb_gso_validate_network_len rds: Incorrect reference counting in TCP socket creation net: ethtool: don't ignore return from driver get_fecparam method vrf: check forwarding on the original netdevice when generating ICMP dest unreachable ...
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r--drivers/net/virtio_net.c62
1 files changed, 47 insertions, 15 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9bb9e562b893..23374603e4d9 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -504,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
page_off += *len;
while (--*num_buf) {
+ int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
unsigned int buflen;
void *buf;
int off;
@@ -518,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
/* guard against a misconfigured or uncooperative backend that
* is sending packet larger than the MTU.
*/
- if ((page_off + buflen) > PAGE_SIZE) {
+ if ((page_off + buflen + tailroom) > PAGE_SIZE) {
put_page(p);
goto err_buf;
}
@@ -690,6 +691,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
unsigned int truesize;
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
bool sent;
+ int err;
head_skb = NULL;
@@ -701,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
void *data;
u32 act;
- /* This happens when rx buffer size is underestimated */
+ /* This happens when rx buffer size is underestimated
+ * or headroom is not enough because of the buffer
+ * was refilled before XDP is set. This should only
+ * happen for the first several packets, so we don't
+ * care much about its performance.
+ */
if (unlikely(num_buf > 1 ||
headroom < virtnet_get_headroom(vi))) {
/* linearize data for XDP */
@@ -736,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
act = bpf_prog_run_xdp(xdp_prog, &xdp);
- if (act != XDP_PASS)
- ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
-
switch (act) {
case XDP_PASS:
/* recalculate offset to account for any header
@@ -770,6 +774,18 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
goto err_xdp;
rcu_read_unlock();
goto xdp_xmit;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(dev, &xdp, xdp_prog);
+ if (err) {
+ if (unlikely(xdp_page != page))
+ put_page(xdp_page);
+ goto err_xdp;
+ }
+ *xdp_xmit = true;
+ if (unlikely(xdp_page != page))
+ goto err_xdp;
+ rcu_read_unlock();
+ goto xdp_xmit;
default:
bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED:
@@ -1013,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
}
static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
- struct ewma_pkt_len *avg_pkt_len)
+ struct ewma_pkt_len *avg_pkt_len,
+ unsigned int room)
{
const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
unsigned int len;
- len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
+ if (room)
+ return PAGE_SIZE - room;
+
+ len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
rq->min_buf_len, PAGE_SIZE - hdr_len);
+
return ALIGN(len, L1_CACHE_BYTES);
}
@@ -1028,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
{
struct page_frag *alloc_frag = &rq->alloc_frag;
unsigned int headroom = virtnet_get_headroom(vi);
+ unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
+ unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
char *buf;
void *ctx;
int err;
unsigned int len, hole;
- len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
- if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
+ /* Extra tailroom is needed to satisfy XDP's assumption. This
+ * means rx frags coalescing won't work, but consider we've
+ * disabled GSO for XDP, it won't be a big issue.
+ */
+ len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
+ if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
return -ENOMEM;
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
buf += headroom; /* advance address leaving hole at front of pkt */
get_page(alloc_frag->page);
- alloc_frag->offset += len + headroom;
+ alloc_frag->offset += len + room;
hole = alloc_frag->size - alloc_frag->offset;
- if (hole < len + headroom) {
+ if (hole < len + room) {
/* To avoid internal fragmentation, if there is very likely not
* enough space for another buffer, add the remaining space to
* the current buffer.
@@ -2185,8 +2212,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
}
/* Make sure NAPI is not using any XDP TX queues for RX. */
- for (i = 0; i < vi->max_queue_pairs; i++)
- napi_disable(&vi->rq[i].napi);
+ if (netif_running(dev))
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ napi_disable(&vi->rq[i].napi);
netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
@@ -2205,7 +2233,8 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
}
if (old_prog)
bpf_prog_put(old_prog);
- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
+ if (netif_running(dev))
+ virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
}
return 0;
@@ -2576,12 +2605,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
{
struct virtnet_info *vi = netdev_priv(queue->dev);
unsigned int queue_index = get_netdev_rx_queue_index(queue);
+ unsigned int headroom = virtnet_get_headroom(vi);
+ unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
struct ewma_pkt_len *avg;
BUG_ON(queue_index >= vi->max_queue_pairs);
avg = &vi->rq[queue_index].mrg_avg_pkt_len;
return sprintf(buf, "%u\n",
- get_mergeable_buf_len(&vi->rq[queue_index], avg));
+ get_mergeable_buf_len(&vi->rq[queue_index], avg,
+ SKB_DATA_ALIGN(headroom + tailroom)));
}
static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =