summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-02-11 04:54:45 +0300
committerJakub Kicinski <kuba@kernel.org>2025-02-11 04:54:46 +0300
commit7aba66642936068cfb4a45eb4bddf437de2776f8 (patch)
tree7fa21de685ce2a7c51b8cb5602306a09a4450024 /include/linux
parent5b281fe7e396c519914863c5de5ce3a3f9cffd5b (diff)
parent23d9324a27a48858cfdd7f0342f52328e8595c6d (diff)
downloadlinux-7aba66642936068cfb4a45eb4bddf437de2776f8.tar.xz
Merge branch 'xsk-the-lost-bits-from-chapter-iii'
Alexander Lobakin says: ==================== xsk: the lost bits from Chapter III Before introducing libeth_xdp, we need to add a couple more generic helpers. Notably: * 01: add generic loop unrolling hint helpers; * 04: add helper to get both xdp_desc's DMA address and metadata pointer in one go, saving several cycles and hotpath object code size in drivers (especially when unrolling). Bonus: * 02, 03: convert two drivers which were using custom macros to generic unrolled_count() (trivial, no object code changes). ==================== Link: https://patch.msgid.link/20250206182630.3914318-1-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/unroll.h44
1 files changed, 44 insertions, 0 deletions
diff --git a/include/linux/unroll.h b/include/linux/unroll.h
index d42fd6366373..863fb69f6a7e 100644
--- a/include/linux/unroll.h
+++ b/include/linux/unroll.h
@@ -9,6 +9,50 @@
#include <linux/args.h>
+#ifdef CONFIG_CC_IS_CLANG
+#define __pick_unrolled(x, y) _Pragma(#x)
+#elif CONFIG_GCC_VERSION >= 80000
+#define __pick_unrolled(x, y) _Pragma(#y)
+#else
+#define __pick_unrolled(x, y) /* not supported */
+#endif
+
+/**
+ * unrolled - loop attributes to ask the compiler to unroll it
+ *
+ * Usage:
+ *
+ * #define BATCH 8
+ *
+ * unrolled_count(BATCH)
+ * for (u32 i = 0; i < BATCH; i++)
+ * // loop body without cross-iteration dependencies
+ *
+ * This is only a hint and the compiler is free to disable unrolling if it
+ * thinks the count is suboptimal and may hurt performance and/or hugely
+ * increase object code size.
+ * Not having any cross-iteration dependencies (i.e. when iter x + 1 depends
+ * on what iter x will do with variables) is not a strict requirement, but
+ * provides best performance and object code size.
+ * Available only on Clang and GCC 8.x onwards.
+ */
+
+/* Ask the compiler to pick an optimal unroll count, Clang only */
+#define unrolled \
+ __pick_unrolled(clang loop unroll(enable), /* nothing */)
+
+/* Unroll each @n iterations of the loop */
+#define unrolled_count(n) \
+ __pick_unrolled(clang loop unroll_count(n), GCC unroll n)
+
+/* Unroll the whole loop */
+#define unrolled_full \
+ __pick_unrolled(clang loop unroll(full), GCC unroll 65534)
+
+/* Never unroll the loop */
+#define unrolled_none \
+ __pick_unrolled(clang loop unroll(disable), GCC unroll 1)
+
#define UNROLL(N, MACRO, args...) CONCATENATE(__UNROLL_, N)(MACRO, args)
#define __UNROLL_0(MACRO, args...)