diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 190 |
1 files changed, 130 insertions, 60 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0824e6999e49..7fda2ae4c40f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -195,6 +195,9 @@ struct virtnet_info { /* # of XDP queue pairs currently used by the driver */ u16 xdp_queue_pairs; + /* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ + bool xdp_enabled; + /* I like... big packets and I cannot lie! */ bool big_packets; @@ -376,21 +379,18 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, - bool hdr_valid, unsigned int metasize) + bool hdr_valid, unsigned int metasize, + unsigned int headroom) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; unsigned int copy, hdr_len, hdr_padded_len; - char *p; + struct page *page_to_free = NULL; + int tailroom, shinfo_size; + char *p, *hdr_p, *buf; p = page_address(page) + offset; - - /* copy small packet so we can reuse these pages for small data */ - skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); - if (unlikely(!skb)) - return NULL; - - hdr = skb_vnet_hdr(skb); + hdr_p = p; hdr_len = vi->hdr_len; if (vi->mergeable_rx_bufs) @@ -398,14 +398,44 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - /* hdr_valid means no XDP, so we can copy the vnet header */ - if (hdr_valid) - memcpy(hdr, p, hdr_len); + /* If headroom is not 0, there is an offset between the beginning of the + * data and the allocated space, otherwise the data and the allocated + * space are aligned. + */ + if (headroom) { + /* Buffers with headroom use PAGE_SIZE as alloc size, + * see add_recvbuf_mergeable() + get_mergeable_buf_len() + */ + truesize = PAGE_SIZE; + tailroom = truesize - len - offset; + buf = page_address(page); + } else { + tailroom = truesize - len; + buf = p; + } len -= hdr_len; offset += hdr_padded_len; p += hdr_padded_len; + shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + /* copy small packet so we can reuse these pages */ + if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { + skb = build_skb(buf, truesize); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, p - buf); + skb_put(skb, len); + goto ok; + } + + /* copy small packet so we can reuse these pages for small data */ + skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); + if (unlikely(!skb)) + return NULL; + /* Copy all frame if it fits skb->head, otherwise * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. */ @@ -415,11 +445,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, copy = ETH_HLEN + metasize; skb_put_data(skb, p, copy); - if (metasize) { - __skb_pull(skb, metasize); - skb_metadata_set(skb, metasize); - } - len -= copy; offset += copy; @@ -427,8 +452,8 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, if (len) skb_add_rx_frag(skb, 0, page, offset, len, truesize); else - put_page(page); - return skb; + page_to_free = page; + goto ok; } /* @@ -455,6 +480,20 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, if (page) give_pages(rq, page); +ok: + /* hdr_valid means no XDP, so we can copy the vnet header */ + if (hdr_valid) { + hdr = skb_vnet_hdr(skb); + memcpy(hdr, hdr_p, hdr_len); + } + if (page_to_free) + put_page(page_to_free); + + if (metasize) { + __skb_pull(skb, metasize); + skb_metadata_set(skb, metasize); + } + return skb; } @@ -485,12 +524,41 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, return 0; } -static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi) -{ - unsigned int qp; - - qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); - return &vi->sq[qp]; +/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on + * the current cpu, so it does not need to be locked. + * + * Here we use marco instead of inline functions because we have to deal with + * three issues at the same time: 1. the choice of sq. 2. judge and execute the + * lock/unlock of txq 3. make sparse happy. It is difficult for two inline + * functions to perfectly solve these three problems at the same time. + */ +#define virtnet_xdp_get_sq(vi) ({ \ + struct netdev_queue *txq; \ + typeof(vi) v = (vi); \ + unsigned int qp; \ + \ + if (v->curr_queue_pairs > nr_cpu_ids) { \ + qp = v->curr_queue_pairs - v->xdp_queue_pairs; \ + qp += smp_processor_id(); \ + txq = netdev_get_tx_queue(v->dev, qp); \ + __netif_tx_acquire(txq); \ + } else { \ + qp = smp_processor_id() % v->curr_queue_pairs; \ + txq = netdev_get_tx_queue(v->dev, qp); \ + __netif_tx_lock(txq, raw_smp_processor_id()); \ + } \ + v->sq + qp; \ +}) + +#define virtnet_xdp_put_sq(vi, q) { \ + struct netdev_queue *txq; \ + typeof(vi) v = (vi); \ + \ + txq = netdev_get_tx_queue(v->dev, (q) - v->sq); \ + if (v->curr_queue_pairs > nr_cpu_ids) \ + __netif_tx_release(txq); \ + else \ + __netif_tx_unlock(txq); \ } static int virtnet_xdp_xmit(struct net_device *dev, @@ -503,10 +571,10 @@ static int virtnet_xdp_xmit(struct net_device *dev, unsigned int len; int packets = 0; int bytes = 0; - int drops = 0; + int nxmit = 0; int kicks = 0; - int ret, err; void *ptr; + int ret; int i; /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this @@ -516,11 +584,10 @@ static int virtnet_xdp_xmit(struct net_device *dev, if (!xdp_prog) return -ENXIO; - sq = virtnet_xdp_sq(vi); + sq = virtnet_xdp_get_sq(vi); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { ret = -EINVAL; - drops = n; goto out; } @@ -543,13 +610,11 @@ static int virtnet_xdp_xmit(struct net_device *dev, for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; - err = __virtnet_xdp_xmit_one(vi, sq, xdpf); - if (err) { - xdp_return_frame_rx_napi(xdpf); - drops++; - } + if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) + break; + nxmit++; } - ret = n - drops; + ret = nxmit; if (flags & XDP_XMIT_FLUSH) { if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) @@ -560,16 +625,17 @@ out: sq->stats.bytes += bytes; sq->stats.packets += packets; sq->stats.xdp_tx += n; - sq->stats.xdp_tx_drops += drops; + sq->stats.xdp_tx_drops += n - nxmit; sq->stats.kicks += kicks; u64_stats_update_end(&sq->stats.syncp); + virtnet_xdp_put_sq(vi, sq); return ret; } static unsigned int virtnet_get_headroom(struct virtnet_info *vi) { - return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; + return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0; } /* We copy the packet for XDP in the following cases: @@ -713,7 +779,9 @@ static struct sk_buff *receive_small(struct net_device *dev, if (unlikely(!xdpf)) goto err_xdp; err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); - if (unlikely(err < 0)) { + if (unlikely(!err)) { + xdp_return_frame_rx_napi(xdpf); + } else if (unlikely(err < 0)) { trace_xdp_exception(vi->dev, xdp_prog, act); goto err_xdp; } @@ -776,7 +844,7 @@ static struct sk_buff *receive_big(struct net_device *dev, { struct page *page = buf; struct sk_buff *skb = - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0); stats->bytes += len - vi->hdr_len; if (unlikely(!skb)) @@ -890,7 +958,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, offset, len, PAGE_SIZE, false, - metasize); + metasize, headroom); return head_skb; } break; @@ -900,7 +968,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(!xdpf)) goto err_xdp; err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); - if (unlikely(err < 0)) { + if (unlikely(!err)) { + xdp_return_frame_rx_napi(xdpf); + } else if (unlikely(err < 0)) { trace_xdp_exception(vi->dev, xdp_prog, act); if (unlikely(xdp_page != page)) put_page(xdp_page); @@ -946,7 +1016,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, - metasize); + metasize, headroom); curr_skb = head_skb; if (unlikely(!curr_skb)) @@ -1462,12 +1532,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget) xdp_do_flush(); if (xdp_xmit & VIRTIO_XDP_TX) { - sq = virtnet_xdp_sq(vi); + sq = virtnet_xdp_get_sq(vi); if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { u64_stats_update_begin(&sq->stats.syncp); sq->stats.kicks++; u64_stats_update_end(&sq->stats.syncp); } + virtnet_xdp_put_sq(vi, sq); } return received; @@ -1985,7 +2056,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi) } virtqueue_set_affinity(vi->rq[i].vq, mask); virtqueue_set_affinity(vi->sq[i].vq, mask); - __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false); + __netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS); cpumask_clear(mask); } @@ -2108,25 +2179,21 @@ static int virtnet_set_channels(struct net_device *dev, static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct virtnet_info *vi = netdev_priv(dev); - char *p = (char *)data; unsigned int i, j; + u8 *p = data; switch (stringset) { case ETH_SS_STATS: for (i = 0; i < vi->curr_queue_pairs; i++) { - for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { - snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s", - i, virtnet_rq_stats_desc[j].desc); - p += ETH_GSTRING_LEN; - } + for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) + ethtool_sprintf(&p, "rx_queue_%u_%s", i, + virtnet_rq_stats_desc[j].desc); } for (i = 0; i < vi->curr_queue_pairs; i++) { - for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { - snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s", - i, virtnet_sq_stats_desc[j].desc); - p += ETH_GSTRING_LEN; - } + for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) + ethtool_sprintf(&p, "tx_queue_%u_%s", i, + virtnet_sq_stats_desc[j].desc); } break; } @@ -2422,10 +2489,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, /* XDP requires extra queues for XDP_TX */ if (curr_qp + xdp_qp > vi->max_queue_pairs) { - NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available"); - netdev_warn(dev, "request %i queues but max is %i\n", + netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n", curr_qp + xdp_qp, vi->max_queue_pairs); - return -ENOMEM; + xdp_qp = 0; } old_prog = rtnl_dereference(vi->rq[0].xdp_prog); @@ -2459,11 +2525,14 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, vi->xdp_queue_pairs = xdp_qp; if (prog) { + vi->xdp_enabled = true; for (i = 0; i < vi->max_queue_pairs; i++) { rcu_assign_pointer(vi->rq[i].xdp_prog, prog); if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); } + } else { + vi->xdp_enabled = false; } for (i = 0; i < vi->max_queue_pairs; i++) { @@ -2531,7 +2600,7 @@ static int virtnet_set_features(struct net_device *dev, int err; if ((dev->features ^ features) & NETIF_F_LRO) { - if (vi->xdp_queue_pairs) + if (vi->xdp_enabled) return -EBUSY; if (features & NETIF_F_LRO) @@ -2977,7 +3046,8 @@ static int virtnet_probe(struct virtio_device *vdev) return -ENOMEM; /* Set up network device as normal. */ - dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; + dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | + IFF_TX_SKB_NO_LINEAR; dev->netdev_ops = &virtnet_netdev; dev->features = NETIF_F_HIGHDMA; |