diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 267 |
1 files changed, 173 insertions, 94 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b6c9a2af3732..765920905226 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -30,7 +30,7 @@ #include <linux/cpu.h> #include <linux/average.h> #include <linux/filter.h> -#include <linux/netdevice.h> +#include <linux/kernel.h> #include <linux/pci.h> #include <net/route.h> #include <net/xdp.h> @@ -53,6 +53,10 @@ module_param(napi_tx, bool, 0644); /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ #define VIRTIO_XDP_HEADROOM 256 +/* Separating two types of XDP xmit */ +#define VIRTIO_XDP_TX BIT(0) +#define VIRTIO_XDP_REDIR BIT(1) + /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled * at once, the weight is chosen so that the EWMA will be insensitive to short- @@ -78,25 +82,43 @@ struct virtnet_sq_stats { struct u64_stats_sync syncp; u64 packets; u64 bytes; + u64 xdp_tx; + u64 xdp_tx_drops; + u64 kicks; }; struct virtnet_rq_stats { struct u64_stats_sync syncp; u64 packets; u64 bytes; + u64 drops; + u64 xdp_packets; + u64 xdp_tx; + u64 xdp_redirects; + u64 xdp_drops; + u64 kicks; }; #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { - { "packets", VIRTNET_SQ_STAT(packets) }, - { "bytes", VIRTNET_SQ_STAT(bytes) }, + { "packets", VIRTNET_SQ_STAT(packets) }, + { "bytes", VIRTNET_SQ_STAT(bytes) }, + { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, + { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, + { "kicks", VIRTNET_SQ_STAT(kicks) }, }; static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { - { "packets", VIRTNET_RQ_STAT(packets) }, - { "bytes", VIRTNET_RQ_STAT(bytes) }, + { "packets", VIRTNET_RQ_STAT(packets) }, + { "bytes", VIRTNET_RQ_STAT(bytes) }, + { "drops", VIRTNET_RQ_STAT(drops) }, + { "xdp_packets", VIRTNET_RQ_STAT(xdp_packets) }, + { "xdp_tx", VIRTNET_RQ_STAT(xdp_tx) }, + { "xdp_redirects", VIRTNET_RQ_STAT(xdp_redirects) }, + { "xdp_drops", VIRTNET_RQ_STAT(xdp_drops) }, + { "kicks", VIRTNET_RQ_STAT(kicks) }, }; #define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc) @@ -443,22 +465,12 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, return 0; } -static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi, - struct xdp_frame *xdpf) +static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi) { - struct xdp_frame *xdpf_sent; - struct send_queue *sq; - unsigned int len; unsigned int qp; qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - sq = &vi->sq[qp]; - - /* Free up any pending old buffers before queueing new ones. */ - while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) - xdp_return_frame(xdpf_sent); - - return __virtnet_xdp_xmit_one(vi, sq, xdpf); + return &vi->sq[qp]; } static int virtnet_xdp_xmit(struct net_device *dev, @@ -470,23 +482,28 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct bpf_prog *xdp_prog; struct send_queue *sq; unsigned int len; - unsigned int qp; int drops = 0; - int err; + int kicks = 0; + int ret, err; int i; - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) - return -EINVAL; + sq = virtnet_xdp_sq(vi); - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - sq = &vi->sq[qp]; + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { + ret = -EINVAL; + drops = n; + goto out; + } /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. */ xdp_prog = rcu_dereference(rq->xdp_prog); - if (!xdp_prog) - return -ENXIO; + if (!xdp_prog) { + ret = -ENXIO; + drops = n; + goto out; + } /* Free up any pending old buffers before queueing new ones. */ while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) @@ -501,11 +518,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, drops++; } } + ret = n - drops; - if (flags & XDP_XMIT_FLUSH) - virtqueue_kick(sq->vq); + if (flags & XDP_XMIT_FLUSH) { + if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) + kicks = 1; + } +out: + u64_stats_update_begin(&sq->stats.syncp); + sq->stats.xdp_tx += n; + sq->stats.xdp_tx_drops += drops; + sq->stats.kicks += kicks; + u64_stats_update_end(&sq->stats.syncp); - return n - drops; + return ret; } static unsigned int virtnet_get_headroom(struct virtnet_info *vi) @@ -582,7 +608,8 @@ static struct sk_buff *receive_small(struct net_device *dev, struct receive_queue *rq, void *buf, void *ctx, unsigned int len, - bool *xdp_xmit) + unsigned int *xdp_xmit, + struct virtnet_rq_stats *stats) { struct sk_buff *skb; struct bpf_prog *xdp_prog; @@ -597,6 +624,7 @@ static struct sk_buff *receive_small(struct net_device *dev, int err; len -= vi->hdr_len; + stats->bytes += len; rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); @@ -638,6 +666,7 @@ static struct sk_buff *receive_small(struct net_device *dev, xdp.rxq = &rq->xdp_rxq; orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); + stats->xdp_packets++; switch (act) { case XDP_PASS: @@ -646,26 +675,29 @@ static struct sk_buff *receive_small(struct net_device *dev, len = xdp.data_end - xdp.data; break; case XDP_TX: + stats->xdp_tx++; xdpf = convert_to_xdp_frame(&xdp); if (unlikely(!xdpf)) goto err_xdp; - err = __virtnet_xdp_tx_xmit(vi, xdpf); - if (unlikely(err)) { + err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); + if (unlikely(err < 0)) { trace_xdp_exception(vi->dev, xdp_prog, act); goto err_xdp; } - *xdp_xmit = true; + *xdp_xmit |= VIRTIO_XDP_TX; rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: + stats->xdp_redirects++; err = xdp_do_redirect(dev, &xdp, xdp_prog); if (err) goto err_xdp; - *xdp_xmit = true; + *xdp_xmit |= VIRTIO_XDP_REDIR; rcu_read_unlock(); goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); + /* fall through */ case XDP_ABORTED: trace_xdp_exception(vi->dev, xdp_prog, act); case XDP_DROP: @@ -691,7 +723,8 @@ err: err_xdp: rcu_read_unlock(); - dev->stats.rx_dropped++; + stats->xdp_drops++; + stats->drops++; put_page(page); xdp_xmit: return NULL; @@ -701,18 +734,20 @@ static struct sk_buff *receive_big(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, void *buf, - unsigned int len) + unsigned int len, + struct virtnet_rq_stats *stats) { struct page *page = buf; struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); + stats->bytes += len - vi->hdr_len; if (unlikely(!skb)) goto err; return skb; err: - dev->stats.rx_dropped++; + stats->drops++; give_pages(rq, page); return NULL; } @@ -723,7 +758,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, void *buf, void *ctx, unsigned int len, - bool *xdp_xmit) + unsigned int *xdp_xmit, + struct virtnet_rq_stats *stats) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); @@ -736,6 +772,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, int err; head_skb = NULL; + stats->bytes += len - vi->hdr_len; rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); @@ -784,6 +821,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, xdp.rxq = &rq->xdp_rxq; act = bpf_prog_run_xdp(xdp_prog, &xdp); + stats->xdp_packets++; switch (act) { case XDP_PASS: @@ -808,37 +846,41 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: + stats->xdp_tx++; xdpf = convert_to_xdp_frame(&xdp); if (unlikely(!xdpf)) goto err_xdp; - err = __virtnet_xdp_tx_xmit(vi, xdpf); - if (unlikely(err)) { + err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); + if (unlikely(err < 0)) { trace_xdp_exception(vi->dev, xdp_prog, act); if (unlikely(xdp_page != page)) put_page(xdp_page); goto err_xdp; } - *xdp_xmit = true; + *xdp_xmit |= VIRTIO_XDP_TX; if (unlikely(xdp_page != page)) put_page(page); rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: + stats->xdp_redirects++; err = xdp_do_redirect(dev, &xdp, xdp_prog); if (err) { if (unlikely(xdp_page != page)) put_page(xdp_page); goto err_xdp; } - *xdp_xmit = true; + *xdp_xmit |= VIRTIO_XDP_REDIR; if (unlikely(xdp_page != page)) put_page(page); rcu_read_unlock(); goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); + /* fall through */ case XDP_ABORTED: trace_xdp_exception(vi->dev, xdp_prog, act); + /* fall through */ case XDP_DROP: if (unlikely(xdp_page != page)) __free_pages(xdp_page, 0); @@ -873,6 +915,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_buf; } + stats->bytes += len; page = virt_to_head_page(buf); truesize = mergeable_ctx_to_truesize(ctx); @@ -918,6 +961,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, err_xdp: rcu_read_unlock(); + stats->xdp_drops++; err_skb: put_page(page); while (num_buf-- > 1) { @@ -928,23 +972,25 @@ err_skb: dev->stats.rx_length_errors++; break; } + stats->bytes += len; page = virt_to_head_page(buf); put_page(page); } err_buf: - dev->stats.rx_dropped++; + stats->drops++; dev_kfree_skb(head_skb); xdp_xmit: return NULL; } -static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, - void *buf, unsigned int len, void **ctx, bool *xdp_xmit) +static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, + void *buf, unsigned int len, void **ctx, + unsigned int *xdp_xmit, + struct virtnet_rq_stats *stats) { struct net_device *dev = vi->dev; struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; - int ret; if (unlikely(len < vi->hdr_len + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -956,23 +1002,22 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, } else { put_page(virt_to_head_page(buf)); } - return 0; + return; } if (vi->mergeable_rx_bufs) - skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit); + skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, + stats); else if (vi->big_packets) - skb = receive_big(dev, vi, rq, buf, len); + skb = receive_big(dev, vi, rq, buf, len, stats); else - skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit); + skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); if (unlikely(!skb)) - return 0; + return; hdr = skb_vnet_hdr(skb); - ret = skb->len; - if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -989,12 +1034,11 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, ntohs(skb->protocol), skb->len, skb->pkt_type); napi_gro_receive(&rq->napi, skb); - return ret; + return; frame_err: dev->stats.rx_frame_errors++; dev_kfree_skb(skb); - return 0; } /* Unlike mergeable buffers, all buffers are allocated to the @@ -1161,7 +1205,12 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, if (err) break; } while (rq->vq->num_free); - virtqueue_kick(rq->vq); + if (virtqueue_kick_prepare(rq->vq) && virtqueue_notify(rq->vq)) { + u64_stats_update_begin(&rq->stats.syncp); + rq->stats.kicks++; + u64_stats_update_end(&rq->stats.syncp); + } + return !oom; } @@ -1232,25 +1281,28 @@ static void refill_work(struct work_struct *work) } } -static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) +static int virtnet_receive(struct receive_queue *rq, int budget, + unsigned int *xdp_xmit) { struct virtnet_info *vi = rq->vq->vdev->priv; - unsigned int len, received = 0, bytes = 0; + struct virtnet_rq_stats stats = {}; + unsigned int len; void *buf; + int i; if (!vi->big_packets || vi->mergeable_rx_bufs) { void *ctx; - while (received < budget && + while (stats.packets < budget && (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { - bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit); - received++; + receive_buf(vi, rq, buf, len, ctx, xdp_xmit, &stats); + stats.packets++; } } else { - while (received < budget && + while (stats.packets < budget && (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { - bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit); - received++; + receive_buf(vi, rq, buf, len, NULL, xdp_xmit, &stats); + stats.packets++; } } @@ -1260,11 +1312,16 @@ static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) } u64_stats_update_begin(&rq->stats.syncp); - rq->stats.bytes += bytes; - rq->stats.packets += received; + for (i = 0; i < VIRTNET_RQ_STATS_LEN; i++) { + size_t offset = virtnet_rq_stats_desc[i].offset; + u64 *item; + + item = (u64 *)((u8 *)&rq->stats + offset); + *item += *(u64 *)((u8 *)&stats + offset); + } u64_stats_update_end(&rq->stats.syncp); - return received; + return stats.packets; } static void free_old_xmit_skbs(struct send_queue *sq) @@ -1320,8 +1377,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget) container_of(napi, struct receive_queue, napi); struct virtnet_info *vi = rq->vq->vdev->priv; struct send_queue *sq; - unsigned int received, qp; - bool xdp_xmit = false; + unsigned int received; + unsigned int xdp_xmit = 0; virtnet_poll_cleantx(rq); @@ -1331,12 +1388,16 @@ static int virtnet_poll(struct napi_struct *napi, int budget) if (received < budget) virtqueue_napi_complete(napi, rq->vq, received); - if (xdp_xmit) { - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + - smp_processor_id(); - sq = &vi->sq[qp]; - virtqueue_kick(sq->vq); + if (xdp_xmit & VIRTIO_XDP_REDIR) xdp_do_flush_map(); + + if (xdp_xmit & VIRTIO_XDP_TX) { + sq = virtnet_xdp_sq(vi); + if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { + u64_stats_update_begin(&sq->stats.syncp); + sq->stats.kicks++; + u64_stats_update_end(&sq->stats.syncp); + } } return received; @@ -1498,8 +1559,13 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) } } - if (kick || netif_xmit_stopped(txq)) - virtqueue_kick(sq->vq); + if (kick || netif_xmit_stopped(txq)) { + if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { + u64_stats_update_begin(&sq->stats.syncp); + sq->stats.kicks++; + u64_stats_update_end(&sq->stats.syncp); + } + } return NETDEV_TX_OK; } @@ -1603,7 +1669,7 @@ static void virtnet_stats(struct net_device *dev, int i; for (i = 0; i < vi->max_queue_pairs; i++) { - u64 tpackets, tbytes, rpackets, rbytes; + u64 tpackets, tbytes, rpackets, rbytes, rdrops; struct receive_queue *rq = &vi->rq[i]; struct send_queue *sq = &vi->sq[i]; @@ -1617,17 +1683,18 @@ static void virtnet_stats(struct net_device *dev, start = u64_stats_fetch_begin_irq(&rq->stats.syncp); rpackets = rq->stats.packets; rbytes = rq->stats.bytes; + rdrops = rq->stats.drops; } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start)); tot->rx_packets += rpackets; tot->tx_packets += tpackets; tot->rx_bytes += rbytes; tot->tx_bytes += tbytes; + tot->rx_dropped += rdrops; } tot->tx_dropped = dev->stats.tx_dropped; tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->rx_dropped = dev->stats.rx_dropped; tot->rx_length_errors = dev->stats.rx_length_errors; tot->rx_frame_errors = dev->stats.rx_frame_errors; } @@ -1811,8 +1878,8 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) if (vi->affinity_hint_set) { for (i = 0; i < vi->max_queue_pairs; i++) { - virtqueue_set_affinity(vi->rq[i].vq, -1); - virtqueue_set_affinity(vi->sq[i].vq, -1); + virtqueue_set_affinity(vi->rq[i].vq, NULL); + virtqueue_set_affinity(vi->sq[i].vq, NULL); } vi->affinity_hint_set = false; @@ -1821,28 +1888,41 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu) static void virtnet_set_affinity(struct virtnet_info *vi) { - int i; - int cpu; + cpumask_var_t mask; + int stragglers; + int group_size; + int i, j, cpu; + int num_cpu; + int stride; - /* In multiqueue mode, when the number of cpu is equal to the number of - * queue pairs, we let the queue pairs to be private to one cpu by - * setting the affinity hint to eliminate the contention. - */ - if (vi->curr_queue_pairs == 1 || - vi->max_queue_pairs != num_online_cpus()) { + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { virtnet_clean_affinity(vi, -1); return; } - i = 0; - for_each_online_cpu(cpu) { - virtqueue_set_affinity(vi->rq[i].vq, cpu); - virtqueue_set_affinity(vi->sq[i].vq, cpu); - netif_set_xps_queue(vi->dev, cpumask_of(cpu), i); - i++; + num_cpu = num_online_cpus(); + stride = max_t(int, num_cpu / vi->curr_queue_pairs, 1); + stragglers = num_cpu >= vi->curr_queue_pairs ? + num_cpu % vi->curr_queue_pairs : + 0; + cpu = cpumask_next(-1, cpu_online_mask); + + for (i = 0; i < vi->curr_queue_pairs; i++) { + group_size = stride + (i < stragglers ? 1 : 0); + + for (j = 0; j < group_size; j++) { + cpumask_set_cpu(cpu, mask); + cpu = cpumask_next_wrap(cpu, cpu_online_mask, + nr_cpu_ids, false); + } + virtqueue_set_affinity(vi->rq[i].vq, mask); + virtqueue_set_affinity(vi->sq[i].vq, mask); + __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false); + cpumask_clear(mask); } vi->affinity_hint_set = true; + free_cpumask_var(mask); } static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node) @@ -2335,7 +2415,6 @@ static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) return virtnet_xdp_set(dev, xdp->prog, xdp->extack); case XDP_QUERY_PROG: xdp->prog_id = virtnet_xdp_query(dev); - xdp->prog_attached = !!xdp->prog_id; return 0; default: return -EINVAL; |