diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
| -rw-r--r-- | drivers/net/virtio_net.c | 208 | 
1 files changed, 143 insertions, 65 deletions
| diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 82e520d2cb12..9b6a4a875c55 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -195,6 +195,9 @@ struct virtnet_info {  	/* # of XDP queue pairs currently used by the driver */  	u16 xdp_queue_pairs; +	/* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */ +	bool xdp_enabled; +  	/* I like... big packets and I cannot lie! */  	bool big_packets; @@ -376,21 +379,18 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,  				   struct receive_queue *rq,  				   struct page *page, unsigned int offset,  				   unsigned int len, unsigned int truesize, -				   bool hdr_valid, unsigned int metasize) +				   bool hdr_valid, unsigned int metasize, +				   unsigned int headroom)  {  	struct sk_buff *skb;  	struct virtio_net_hdr_mrg_rxbuf *hdr;  	unsigned int copy, hdr_len, hdr_padded_len; -	char *p; +	struct page *page_to_free = NULL; +	int tailroom, shinfo_size; +	char *p, *hdr_p, *buf;  	p = page_address(page) + offset; - -	/* copy small packet so we can reuse these pages for small data */ -	skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); -	if (unlikely(!skb)) -		return NULL; - -	hdr = skb_vnet_hdr(skb); +	hdr_p = p;  	hdr_len = vi->hdr_len;  	if (vi->mergeable_rx_bufs) @@ -398,24 +398,53 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,  	else  		hdr_padded_len = sizeof(struct padded_vnet_hdr); -	/* hdr_valid means no XDP, so we can copy the vnet header */ -	if (hdr_valid) -		memcpy(hdr, p, hdr_len); +	/* If headroom is not 0, there is an offset between the beginning of the +	 * data and the allocated space, otherwise the data and the allocated +	 * space are aligned. +	 */ +	if (headroom) { +		/* Buffers with headroom use PAGE_SIZE as alloc size, +		 * see add_recvbuf_mergeable() + get_mergeable_buf_len() +		 */ +		truesize = PAGE_SIZE; +		tailroom = truesize - len - offset; +		buf = page_address(page); +	} else { +		tailroom = truesize - len; +		buf = p; +	}  	len -= hdr_len;  	offset += hdr_padded_len;  	p += hdr_padded_len; -	copy = len; -	if (copy > skb_tailroom(skb)) -		copy = skb_tailroom(skb); -	skb_put_data(skb, p, copy); +	shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); -	if (metasize) { -		__skb_pull(skb, metasize); -		skb_metadata_set(skb, metasize); +	/* copy small packet so we can reuse these pages */ +	if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) { +		skb = build_skb(buf, truesize); +		if (unlikely(!skb)) +			return NULL; + +		skb_reserve(skb, p - buf); +		skb_put(skb, len); +		goto ok;  	} +	/* copy small packet so we can reuse these pages for small data */ +	skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); +	if (unlikely(!skb)) +		return NULL; + +	/* Copy all frame if it fits skb->head, otherwise +	 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. +	 */ +	if (len <= skb_tailroom(skb)) +		copy = len; +	else +		copy = ETH_HLEN + metasize; +	skb_put_data(skb, p, copy); +  	len -= copy;  	offset += copy; @@ -423,8 +452,8 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,  		if (len)  			skb_add_rx_frag(skb, 0, page, offset, len, truesize);  		else -			put_page(page); -		return skb; +			page_to_free = page; +		goto ok;  	}  	/* @@ -451,6 +480,20 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,  	if (page)  		give_pages(rq, page); +ok: +	/* hdr_valid means no XDP, so we can copy the vnet header */ +	if (hdr_valid) { +		hdr = skb_vnet_hdr(skb); +		memcpy(hdr, hdr_p, hdr_len); +	} +	if (page_to_free) +		put_page(page_to_free); + +	if (metasize) { +		__skb_pull(skb, metasize); +		skb_metadata_set(skb, metasize); +	} +  	return skb;  } @@ -481,12 +524,41 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,  	return 0;  } -static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi) -{ -	unsigned int qp; - -	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); -	return &vi->sq[qp]; +/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on + * the current cpu, so it does not need to be locked. + * + * Here we use marco instead of inline functions because we have to deal with + * three issues at the same time: 1. the choice of sq. 2. judge and execute the + * lock/unlock of txq 3. make sparse happy. It is difficult for two inline + * functions to perfectly solve these three problems at the same time. + */ +#define virtnet_xdp_get_sq(vi) ({                                       \ +	struct netdev_queue *txq;                                       \ +	typeof(vi) v = (vi);                                            \ +	unsigned int qp;                                                \ +									\ +	if (v->curr_queue_pairs > nr_cpu_ids) {                         \ +		qp = v->curr_queue_pairs - v->xdp_queue_pairs;          \ +		qp += smp_processor_id();                               \ +		txq = netdev_get_tx_queue(v->dev, qp);                  \ +		__netif_tx_acquire(txq);                                \ +	} else {                                                        \ +		qp = smp_processor_id() % v->curr_queue_pairs;          \ +		txq = netdev_get_tx_queue(v->dev, qp);                  \ +		__netif_tx_lock(txq, raw_smp_processor_id());           \ +	}                                                               \ +	v->sq + qp;                                                     \ +}) + +#define virtnet_xdp_put_sq(vi, q) {                                     \ +	struct netdev_queue *txq;                                       \ +	typeof(vi) v = (vi);                                            \ +									\ +	txq = netdev_get_tx_queue(v->dev, (q) - v->sq);                 \ +	if (v->curr_queue_pairs > nr_cpu_ids)                           \ +		__netif_tx_release(txq);                                \ +	else                                                            \ +		__netif_tx_unlock(txq);                                 \  }  static int virtnet_xdp_xmit(struct net_device *dev, @@ -499,10 +571,10 @@ static int virtnet_xdp_xmit(struct net_device *dev,  	unsigned int len;  	int packets = 0;  	int bytes = 0; -	int drops = 0; +	int nxmit = 0;  	int kicks = 0; -	int ret, err;  	void *ptr; +	int ret;  	int i;  	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this @@ -512,11 +584,10 @@ static int virtnet_xdp_xmit(struct net_device *dev,  	if (!xdp_prog)  		return -ENXIO; -	sq = virtnet_xdp_sq(vi); +	sq = virtnet_xdp_get_sq(vi);  	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {  		ret = -EINVAL; -		drops = n;  		goto out;  	} @@ -539,13 +610,11 @@ static int virtnet_xdp_xmit(struct net_device *dev,  	for (i = 0; i < n; i++) {  		struct xdp_frame *xdpf = frames[i]; -		err = __virtnet_xdp_xmit_one(vi, sq, xdpf); -		if (err) { -			xdp_return_frame_rx_napi(xdpf); -			drops++; -		} +		if (__virtnet_xdp_xmit_one(vi, sq, xdpf)) +			break; +		nxmit++;  	} -	ret = n - drops; +	ret = nxmit;  	if (flags & XDP_XMIT_FLUSH) {  		if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) @@ -556,16 +625,17 @@ out:  	sq->stats.bytes += bytes;  	sq->stats.packets += packets;  	sq->stats.xdp_tx += n; -	sq->stats.xdp_tx_drops += drops; +	sq->stats.xdp_tx_drops += n - nxmit;  	sq->stats.kicks += kicks;  	u64_stats_update_end(&sq->stats.syncp); +	virtnet_xdp_put_sq(vi, sq);  	return ret;  }  static unsigned int virtnet_get_headroom(struct virtnet_info *vi)  { -	return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; +	return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;  }  /* We copy the packet for XDP in the following cases: @@ -709,7 +779,9 @@ static struct sk_buff *receive_small(struct net_device *dev,  			if (unlikely(!xdpf))  				goto err_xdp;  			err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); -			if (unlikely(err < 0)) { +			if (unlikely(!err)) { +				xdp_return_frame_rx_napi(xdpf); +			} else if (unlikely(err < 0)) {  				trace_xdp_exception(vi->dev, xdp_prog, act);  				goto err_xdp;  			} @@ -772,7 +844,7 @@ static struct sk_buff *receive_big(struct net_device *dev,  {  	struct page *page = buf;  	struct sk_buff *skb = -		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); +		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0);  	stats->bytes += len - vi->hdr_len;  	if (unlikely(!skb)) @@ -886,7 +958,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,  				put_page(page);  				head_skb = page_to_skb(vi, rq, xdp_page, offset,  						       len, PAGE_SIZE, false, -						       metasize); +						       metasize, headroom);  				return head_skb;  			}  			break; @@ -896,7 +968,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,  			if (unlikely(!xdpf))  				goto err_xdp;  			err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); -			if (unlikely(err < 0)) { +			if (unlikely(!err)) { +				xdp_return_frame_rx_napi(xdpf); +			} else if (unlikely(err < 0)) {  				trace_xdp_exception(vi->dev, xdp_prog, act);  				if (unlikely(xdp_page != page))  					put_page(xdp_page); @@ -942,7 +1016,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,  	}  	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, -			       metasize); +			       metasize, headroom);  	curr_skb = head_skb;  	if (unlikely(!curr_skb)) @@ -1458,12 +1532,13 @@ static int virtnet_poll(struct napi_struct *napi, int budget)  		xdp_do_flush();  	if (xdp_xmit & VIRTIO_XDP_TX) { -		sq = virtnet_xdp_sq(vi); +		sq = virtnet_xdp_get_sq(vi);  		if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {  			u64_stats_update_begin(&sq->stats.syncp);  			sq->stats.kicks++;  			u64_stats_update_end(&sq->stats.syncp);  		} +		virtnet_xdp_put_sq(vi, sq);  	}  	return received; @@ -1981,7 +2056,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)  		}  		virtqueue_set_affinity(vi->rq[i].vq, mask);  		virtqueue_set_affinity(vi->sq[i].vq, mask); -		__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false); +		__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);  		cpumask_clear(mask);  	} @@ -2104,25 +2179,21 @@ static int virtnet_set_channels(struct net_device *dev,  static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)  {  	struct virtnet_info *vi = netdev_priv(dev); -	char *p = (char *)data;  	unsigned int i, j; +	u8 *p = data;  	switch (stringset) {  	case ETH_SS_STATS:  		for (i = 0; i < vi->curr_queue_pairs; i++) { -			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { -				snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s", -					 i, virtnet_rq_stats_desc[j].desc); -				p += ETH_GSTRING_LEN; -			} +			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) +				ethtool_sprintf(&p, "rx_queue_%u_%s", i, +						virtnet_rq_stats_desc[j].desc);  		}  		for (i = 0; i < vi->curr_queue_pairs; i++) { -			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { -				snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s", -					 i, virtnet_sq_stats_desc[j].desc); -				p += ETH_GSTRING_LEN; -			} +			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) +				ethtool_sprintf(&p, "tx_queue_%u_%s", i, +						virtnet_sq_stats_desc[j].desc);  		}  		break;  	} @@ -2418,10 +2489,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,  	/* XDP requires extra queues for XDP_TX */  	if (curr_qp + xdp_qp > vi->max_queue_pairs) { -		NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available"); -		netdev_warn(dev, "request %i queues but max is %i\n", +		netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",  			    curr_qp + xdp_qp, vi->max_queue_pairs); -		return -ENOMEM; +		xdp_qp = 0;  	}  	old_prog = rtnl_dereference(vi->rq[0].xdp_prog); @@ -2455,11 +2525,14 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,  	vi->xdp_queue_pairs = xdp_qp;  	if (prog) { +		vi->xdp_enabled = true;  		for (i = 0; i < vi->max_queue_pairs; i++) {  			rcu_assign_pointer(vi->rq[i].xdp_prog, prog);  			if (i == 0 && !old_prog)  				virtnet_clear_guest_offloads(vi);  		} +	} else { +		vi->xdp_enabled = false;  	}  	for (i = 0; i < vi->max_queue_pairs; i++) { @@ -2527,7 +2600,7 @@ static int virtnet_set_features(struct net_device *dev,  	int err;  	if ((dev->features ^ features) & NETIF_F_LRO) { -		if (vi->xdp_queue_pairs) +		if (vi->xdp_enabled)  			return -EBUSY;  		if (features & NETIF_F_LRO) @@ -2797,9 +2870,13 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)  {  	int i; -	vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); -	if (!vi->ctrl) -		goto err_ctrl; +	if (vi->has_cvq) { +		vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL); +		if (!vi->ctrl) +			goto err_ctrl; +	} else { +		vi->ctrl = NULL; +	}  	vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);  	if (!vi->sq)  		goto err_sq; @@ -2973,7 +3050,8 @@ static int virtnet_probe(struct virtio_device *vdev)  		return -ENOMEM;  	/* Set up network device as normal. */ -	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE; +	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE | +			   IFF_TX_SKB_NO_LINEAR;  	dev->netdev_ops = &virtnet_netdev;  	dev->features = NETIF_F_HIGHDMA; | 
