diff options
author | Maciej Fijalkowski <maciej.fijalkowski@intel.com> | 2022-01-25 19:04:45 +0300 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2022-01-27 19:25:33 +0300 |
commit | 126cdfe1007acb3632d054a4a68a0174bd47f17d (patch) | |
tree | 7cc6323a5f94c5dca16bb322cf37ff939ad2cc33 /drivers/net/ethernet/intel/ice/ice_xsk.h | |
parent | 86e3f78c8d324b410fbe79dcdc702a366e5c5341 (diff) | |
download | linux-126cdfe1007acb3632d054a4a68a0174bd47f17d.tar.xz |
ice: xsk: Improve AF_XDP ZC Tx and use batching API
Apply the logic that was done for regular XDP from commit 9610bd988df9
("ice: optimize XDP_TX workloads") to the ZC side of the driver. On top
of that, introduce batching to Tx that is inspired by i40e's
implementation with adjustments to the cleaning logic - take into the
account NAPI budget in ice_clean_xdp_irq_zc().
Separating the stats structs onto separate cache lines seemed to improve
the performance.
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>
Link: https://lore.kernel.org/bpf/20220125160446.78976-8-maciej.fijalkowski@intel.com
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_xsk.h')
-rw-r--r-- | drivers/net/ethernet/intel/ice/ice_xsk.h | 27 |
1 files changed, 19 insertions, 8 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 4c7bd8e9dfc4..0cbb5793b5b8 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -6,19 +6,37 @@ #include "ice_txrx.h" #include "ice.h" +#define PKTS_PER_BATCH 8 + +#ifdef __clang__ +#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for +#elif __GNUC__ >= 4 +#define loop_unrolled_for _Pragma("GCC unroll 8") for +#else +#define loop_unrolled_for for +#endif + struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid); int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget); -bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget); int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget); #else +static inline bool +ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, + u32 __always_unused budget, + int __always_unused napi_budget) +{ + return false; +} + static inline int ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, struct xsk_buff_pool __always_unused *pool, @@ -35,13 +53,6 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, } static inline bool -ice_clean_tx_irq_zc(struct ice_tx_ring __always_unused *xdp_ring, - int __always_unused budget) -{ - return false; -} - -static inline bool ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring, u16 __always_unused count) { |