summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-02-11 04:54:45 +0300
committerJakub Kicinski <kuba@kernel.org>2025-02-11 04:54:46 +0300
commit7aba66642936068cfb4a45eb4bddf437de2776f8 (patch)
tree7fa21de685ce2a7c51b8cb5602306a09a4450024 /include
parent5b281fe7e396c519914863c5de5ce3a3f9cffd5b (diff)
parent23d9324a27a48858cfdd7f0342f52328e8595c6d (diff)
downloadlinux-7aba66642936068cfb4a45eb4bddf437de2776f8.tar.xz
Merge branch 'xsk-the-lost-bits-from-chapter-iii'
Alexander Lobakin says: ==================== xsk: the lost bits from Chapter III Before introducing libeth_xdp, we need to add a couple more generic helpers. Notably: * 01: add generic loop unrolling hint helpers; * 04: add helper to get both xdp_desc's DMA address and metadata pointer in one go, saving several cycles and hotpath object code size in drivers (especially when unrolling). Bonus: * 02, 03: convert two drivers which were using custom macros to generic unrolled_count() (trivial, no object code changes). ==================== Link: https://patch.msgid.link/20250206182630.3914318-1-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/unroll.h44
-rw-r--r--include/net/xdp_sock_drv.h43
-rw-r--r--include/net/xsk_buff_pool.h8
3 files changed, 92 insertions, 3 deletions
diff --git a/include/linux/unroll.h b/include/linux/unroll.h
index d42fd6366373..863fb69f6a7e 100644
--- a/include/linux/unroll.h
+++ b/include/linux/unroll.h
@@ -9,6 +9,50 @@
#include <linux/args.h>
+#ifdef CONFIG_CC_IS_CLANG
+#define __pick_unrolled(x, y) _Pragma(#x)
+#elif CONFIG_GCC_VERSION >= 80000
+#define __pick_unrolled(x, y) _Pragma(#y)
+#else
+#define __pick_unrolled(x, y) /* not supported */
+#endif
+
+/**
+ * unrolled - loop attributes to ask the compiler to unroll it
+ *
+ * Usage:
+ *
+ * #define BATCH 8
+ *
+ * unrolled_count(BATCH)
+ * for (u32 i = 0; i < BATCH; i++)
+ * // loop body without cross-iteration dependencies
+ *
+ * This is only a hint and the compiler is free to disable unrolling if it
+ * thinks the count is suboptimal and may hurt performance and/or hugely
+ * increase object code size.
+ * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends
+ * on what iter x will do with variables) is not a strict requirement, but
+ * provides best performance and object code size.
+ * Available only on Clang and GCC 8.x onwards.
+ */
+
+/* Ask the compiler to pick an optimal unroll count, Clang only */
+#define unrolled \
+ __pick_unrolled(clang loop unroll(enable), /* nothing */)
+
+/* Unroll each @n iterations of the loop */
+#define unrolled_count(n) \
+ __pick_unrolled(clang loop unroll_count(n), GCC unroll n)
+
+/* Unroll the whole loop */
+#define unrolled_full \
+ __pick_unrolled(clang loop unroll(full), GCC unroll 65534)
+
+/* Never unroll the loop */
+#define unrolled_none \
+ __pick_unrolled(clang loop unroll(disable), GCC unroll 1)
+
#define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args)
#define __UNROLL_0(MACRO, args...)
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 784cd34f5bba..15086dcf51d8 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -196,6 +196,23 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return xp_raw_get_data(pool, addr);
}
+/**
+ * xsk_buff_raw_get_ctx - get &xdp_desc context
+ * @pool: XSk buff pool desc address belongs to
+ * @addr: desc address (from userspace)
+ *
+ * Wrapper for xp_raw_get_ctx() to be used in drivers, see its kdoc for
+ * details.
+ *
+ * Return: new &xdp_desc_ctx struct containing desc's DMA address and metadata
+ * pointer, if it is present and valid (initialized to %NULL otherwise).
+ */
+static inline struct xdp_desc_ctx
+xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr)
+{
+ return xp_raw_get_ctx(pool, addr);
+}
+
#define XDP_TXMD_FLAGS_VALID ( \
XDP_TXMD_FLAGS_TIMESTAMP | \
XDP_TXMD_FLAGS_CHECKSUM | \
@@ -207,20 +224,27 @@ xsk_buff_valid_tx_metadata(const struct xsk_tx_metadata *meta)
return !(meta->flags & ~XDP_TXMD_FLAGS_VALID);
}
-static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+static inline struct xsk_tx_metadata *
+__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data)
{
struct xsk_tx_metadata *meta;
if (!pool->tx_metadata_len)
return NULL;
- meta = xp_raw_get_data(pool, addr) - pool->tx_metadata_len;
+ meta = data - pool->tx_metadata_len;
if (unlikely(!xsk_buff_valid_tx_metadata(meta)))
return NULL; /* no way to signal the error to the user */
return meta;
}
+static inline struct xsk_tx_metadata *
+xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+{
+ return __xsk_buff_get_metadata(pool, xp_raw_get_data(pool, addr));
+}
+
static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
@@ -388,12 +412,25 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
return NULL;
}
+static inline struct xdp_desc_ctx
+xsk_buff_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr)
+{
+ return (struct xdp_desc_ctx){ };
+}
+
static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta)
{
return false;
}
-static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
+static inline struct xsk_tx_metadata *
+__xsk_buff_get_metadata(const struct xsk_buff_pool *pool, void *data)
+{
+ return NULL;
+}
+
+static inline struct xsk_tx_metadata *
+xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr)
{
return NULL;
}
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 50779406bc2d..1dcd4d71468a 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -141,6 +141,14 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count);
void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr);
dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr);
+
+struct xdp_desc_ctx {
+ dma_addr_t dma;
+ struct xsk_tx_metadata *meta;
+};
+
+struct xdp_desc_ctx xp_raw_get_ctx(const struct xsk_buff_pool *pool, u64 addr);
+
static inline dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb)
{
return xskb->dma;