diff options
Diffstat (limited to 'drivers/net/veth.c')
| -rw-r--r-- | drivers/net/veth.c | 64 |
1 files changed, 51 insertions, 13 deletions
diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 18148e068aa0..4ff0d4232914 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -306,12 +306,10 @@ static void __veth_xdp_flush(struct veth_rq *rq) static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb) { - if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) { - dev_kfree_skb_any(skb); - return NET_RX_DROP; - } + if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) + return NETDEV_TX_BUSY; /* signal qdisc layer */ - return NET_RX_SUCCESS; + return NET_RX_SUCCESS; /* same as NETDEV_TX_OK */ } static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, @@ -345,11 +343,11 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) { struct veth_priv *rcv_priv, *priv = netdev_priv(dev); struct veth_rq *rq = NULL; - int ret = NETDEV_TX_OK; + struct netdev_queue *txq; struct net_device *rcv; int length = skb->len; bool use_napi = false; - int rxq; + int ret, rxq; rcu_read_lock(); rcv = rcu_dereference(priv->peer); @@ -372,17 +370,43 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) } skb_tx_timestamp(skb); - if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { + + ret = veth_forward_skb(rcv, skb, rq, use_napi); + switch (ret) { + case NET_RX_SUCCESS: /* same as NETDEV_TX_OK */ if (!use_napi) dev_sw_netstats_tx_add(dev, 1, length); else __veth_xdp_flush(rq); - } else { + break; + case NETDEV_TX_BUSY: + /* If a qdisc is attached to our virtual device, returning + * NETDEV_TX_BUSY is allowed. + */ + txq = netdev_get_tx_queue(dev, rxq); + + if (qdisc_txq_has_no_queue(txq)) { + dev_kfree_skb_any(skb); + goto drop; + } + /* Restore Eth hdr pulled by dev_forward_skb/eth_type_trans */ + __skb_push(skb, ETH_HLEN); + netif_tx_stop_queue(txq); + /* Makes sure NAPI peer consumer runs. Consumer is responsible + * for starting txq again, until then ndo_start_xmit (this + * function) will not be invoked by the netstack again. + */ + __veth_xdp_flush(rq); + break; + case NET_RX_DROP: /* same as NET_XMIT_DROP */ drop: atomic64_inc(&priv->dropped); ret = NET_XMIT_DROP; + break; + default: + net_crit_ratelimited("%s(%s): Invalid return code(%d)", + __func__, dev->name, ret); } - rcu_read_unlock(); return ret; @@ -932,17 +956,28 @@ static int veth_poll(struct napi_struct *napi, int budget) { struct veth_rq *rq = container_of(napi, struct veth_rq, xdp_napi); + struct veth_priv *priv = netdev_priv(rq->dev); + int queue_idx = rq->xdp_rxq.queue_index; + struct netdev_queue *peer_txq; struct veth_stats stats = {}; + struct net_device *peer_dev; struct veth_xdp_tx_bq bq; int done; bq.count = 0; + /* NAPI functions as RCU section */ + peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held()); + peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL; + xdp_set_return_frame_no_direct(); done = veth_xdp_rcv(rq, budget, &bq, &stats); if (stats.xdp_redirect > 0) xdp_do_flush(); + if (stats.xdp_tx > 0) + veth_xdp_flush(rq, &bq); + xdp_clear_return_frame_no_direct(); if (done < budget && napi_complete_done(napi, done)) { /* Write rx_notify_masked before reading ptr_ring */ @@ -955,9 +990,12 @@ static int veth_poll(struct napi_struct *napi, int budget) } } - if (stats.xdp_tx > 0) - veth_xdp_flush(rq, &bq); - xdp_clear_return_frame_no_direct(); + /* Release backpressure per NAPI poll */ + smp_rmb(); /* Paired with netif_tx_stop_queue set_bit */ + if (peer_txq && netif_tx_queue_stopped(peer_txq)) { + txq_trans_cond_update(peer_txq); + netif_tx_wake_queue(peer_txq); + } return done; } |
