summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/i40e/i40e_xsk.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_xsk.h')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.h16
1 files changed, 16 insertions, 0 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index 7adfd8539247..ea88f4597a07 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -4,6 +4,22 @@
#ifndef _I40E_XSK_H_
#define _I40E_XSK_H_
+/* This value should match the pragma in the loop_unrolled_for
+ * macro. Why 4? It is strictly empirical. It seems to be a good
+ * compromise between the advantage of having simultaneous outstanding
+ * reads to the DMA array that can hide each others latency and the
+ * disadvantage of having a larger code path.
+ */
+#define PKTS_PER_BATCH 4
+
+#ifdef __clang__
+#define loop_unrolled_for _Pragma("clang loop unroll_count(4)") for
+#elif __GNUC__ >= 8
+#define loop_unrolled_for _Pragma("GCC unroll 4") for
+#else
+#define loop_unrolled_for for
+#endif
+
struct i40e_vsi;
struct xsk_buff_pool;
struct zero_copy_allocator;