diff options
author | Jesper Dangaard Brouer <brouer@redhat.com> | 2018-05-24 17:46:12 +0300 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2018-05-25 04:36:15 +0300 |
commit | 735fc4054b3a25034445c6713d259da0f96f8131 (patch) | |
tree | 355f7a0672e6239fa4227d562f7d5b65fac9c011 /drivers/net/ethernet/intel | |
parent | 389ab7f01af988c2a1ec5617eb0c7e220df1ef1c (diff) | |
download | linux-735fc4054b3a25034445c6713d259da0f96f8131.tar.xz |
xdp: change ndo_xdp_xmit API to support bulking
This patch change the API for ndo_xdp_xmit to support bulking
xdp_frames.
When kernel is compiled with CONFIG_RETPOLINE, XDP sees a huge slowdown.
Most of the slowdown is caused by DMA API indirect function calls, but
also the net_device->ndo_xdp_xmit() call.
Benchmarked patch with CONFIG_RETPOLINE, using xdp_redirect_map with
single flow/core test (CPU E5-1650 v4 @ 3.60GHz), showed
performance improved:
for driver ixgbe: 6,042,682 pps -> 6,853,768 pps = +811,086 pps
for driver i40e : 6,187,169 pps -> 6,724,519 pps = +537,350 pps
With frames avail as a bulk inside the driver ndo_xdp_xmit call,
further optimizations are possible, like bulk DMA-mapping for TX.
Testing without CONFIG_RETPOLINE show the same performance for
physical NIC drivers.
The virtual NIC driver tun sees a huge performance boost, as it can
avoid doing per frame producer locking, but instead amortize the
locking cost over the bulk.
V2: Fix compile errors reported by kbuild test robot <lkp@intel.com>
V4: Isolated ndo, driver changes and callers.
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'drivers/net/ethernet/intel')
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 26 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 21 |
3 files changed, 35 insertions, 14 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 5efa68de935b..9b698c5acd05 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) * @dev: netdev * @xdp: XDP buffer * - * Returns Zero if sent, else an error code + * Returns number of frames successfully sent. Frames that fail are + * free'ed via XDP return API. + * + * For error cases, a negative errno code is returned and no-frames + * are transmitted (caller must handle freeing frames). **/ -int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) +int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames) { struct i40e_netdev_priv *np = netdev_priv(dev); unsigned int queue_index = smp_processor_id(); struct i40e_vsi *vsi = np->vsi; - int err; + int drops = 0; + int i; if (test_bit(__I40E_VSI_DOWN, vsi->state)) return -ENETDOWN; @@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs) return -ENXIO; - err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]); - if (err != I40E_XDP_TX) - return -ENOSPC; + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + int err; - return 0; + err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]); + if (err != I40E_XDP_TX) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + return n - drops; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index fdd2c55f03a6..eb8804b3d7b6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); void i40e_detect_recover_hung(struct i40e_vsi *vsi); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); -int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf); +int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames); void i40e_xdp_flush(struct net_device *dev); /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 6652b201df5b..9645619f7729 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10017,11 +10017,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } -static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) +static int ixgbe_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames) { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_ring *ring; - int err; + int drops = 0; + int i; if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) return -ENETDOWN; @@ -10033,11 +10035,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) if (unlikely(!ring)) return -ENXIO; - err = ixgbe_xmit_xdp_ring(adapter, xdpf); - if (err != IXGBE_XDP_TX) - return -ENOSPC; + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + int err; - return 0; + err = ixgbe_xmit_xdp_ring(adapter, xdpf); + if (err != IXGBE_XDP_TX) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } + } + + return n - drops; } static void ixgbe_xdp_flush(struct net_device *dev) |