summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2026-03-07 16:36:01 +0300
committerJakub Kicinski <kuba@kernel.org>2026-03-10 05:31:41 +0300
commitf2db7b80b03f268ff65fe825a7c761a8f551aa48 (patch)
treeaf5f6bef3f8fd3f59438f90b6a20b024d4615b84
parente8eb33d650cd5e60b008f9d958262e489de6e7a9 (diff)
downloadlinux-f2db7b80b03f268ff65fe825a7c761a8f551aa48.tar.xz
net/sched: refine indirect call mitigation in tc_wrapper.h
Some modern cpus disable X86_FEATURE_RETPOLINE feature, even if a direct call can still be beneficial. Even when IBRS is present, an indirect call is more expensive than a direct one: Direct Calls: Compilers can perform powerful optimizations like inlining, where the function body is directly inserted at the call site, eliminating call overhead entirely. Indirect Calls: Inlining is much harder, if not impossible, because the compiler doesn't know the target function at compile time. Techniques like Indirect Call Promotion can help by using profile-guided optimization to turn frequently taken indirect calls into conditional direct calls, but they still add complexity and potential overhead compared to a truly direct call. In this patch, I split tc_skip_wrapper in two different static keys, one for tc_act() (tc_skip_wrapper_act) and one for tc_classify() (tc_skip_wrapper_cls). Then I enable the tc_skip_wrapper_cls only if the count of builtin classifiers is above one. I enable tc_skip_wrapper_act only it the count of builtin actions is above one. In our production kernels, we only have CONFIG_NET_CLS_BPF=y and CONFIG_NET_ACT_BPF=y. Other are modules or are not compiled. Tested on AMD Turin cpus, cls_bpf_classify() cost went from 1% down to 0.18 %, and FDO will be able to inline it in tcf_classify() for further gains. Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Reviewed-by: Pedro Tammela <pctammela@mojatatu.com> Reviewed-by: Victor Nogueira <victor@mojatatu.com> Link: https://patch.msgid.link/20260307133601.3863071-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--include/net/tc_wrapper.h47
-rw-r--r--net/sched/sch_api.c3
2 files changed, 44 insertions, 6 deletions
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index ffe58a02537c..4ebb053bb0dd 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -12,7 +12,8 @@
#define TC_INDIRECT_SCOPE
-extern struct static_key_false tc_skip_wrapper;
+extern struct static_key_false tc_skip_wrapper_act;
+extern struct static_key_false tc_skip_wrapper_cls;
/* TC Actions */
#ifdef CONFIG_NET_CLS_ACT
@@ -46,7 +47,7 @@ TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);
static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
- if (static_branch_likely(&tc_skip_wrapper))
+ if (static_branch_likely(&tc_skip_wrapper_act))
goto skip;
#if IS_BUILTIN(CONFIG_NET_ACT_GACT)
@@ -153,7 +154,7 @@ TC_INDIRECT_FILTER_DECLARE(u32_classify);
static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- if (static_branch_likely(&tc_skip_wrapper))
+ if (static_branch_likely(&tc_skip_wrapper_cls))
goto skip;
#if IS_BUILTIN(CONFIG_NET_CLS_BPF)
@@ -202,8 +203,44 @@ skip:
static inline void tc_wrapper_init(void)
{
#ifdef CONFIG_X86
- if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE))
- static_branch_enable(&tc_skip_wrapper);
+ int cnt_cls = IS_BUILTIN(CONFIG_NET_CLS_BPF) +
+ IS_BUILTIN(CONFIG_NET_CLS_U32) +
+ IS_BUILTIN(CONFIG_NET_CLS_FLOWER) +
+ IS_BUILTIN(CONFIG_NET_CLS_FW) +
+ IS_BUILTIN(CONFIG_NET_CLS_MATCHALL) +
+ IS_BUILTIN(CONFIG_NET_CLS_BASIC) +
+ IS_BUILTIN(CONFIG_NET_CLS_CGROUP) +
+ IS_BUILTIN(CONFIG_NET_CLS_FLOW) +
+ IS_BUILTIN(CONFIG_NET_CLS_ROUTE4);
+
+ int cnt_act = IS_BUILTIN(CONFIG_NET_ACT_GACT) +
+ IS_BUILTIN(CONFIG_NET_ACT_MIRRED) +
+ IS_BUILTIN(CONFIG_NET_ACT_PEDIT) +
+ IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT) +
+ IS_BUILTIN(CONFIG_NET_ACT_SKBMOD) +
+ IS_BUILTIN(CONFIG_NET_ACT_POLICE) +
+ IS_BUILTIN(CONFIG_NET_ACT_BPF) +
+ IS_BUILTIN(CONFIG_NET_ACT_CONNMARK) +
+ IS_BUILTIN(CONFIG_NET_ACT_CSUM) +
+ IS_BUILTIN(CONFIG_NET_ACT_CT) +
+ IS_BUILTIN(CONFIG_NET_ACT_CTINFO) +
+ IS_BUILTIN(CONFIG_NET_ACT_GATE) +
+ IS_BUILTIN(CONFIG_NET_ACT_MPLS) +
+ IS_BUILTIN(CONFIG_NET_ACT_NAT) +
+ IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY) +
+ IS_BUILTIN(CONFIG_NET_ACT_VLAN) +
+ IS_BUILTIN(CONFIG_NET_ACT_IFE) +
+ IS_BUILTIN(CONFIG_NET_ACT_SIMP) +
+ IS_BUILTIN(CONFIG_NET_ACT_SAMPLE);
+
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
+ return;
+
+ if (cnt_cls > 1)
+ static_branch_enable(&tc_skip_wrapper_cls);
+
+ if (cnt_act > 1)
+ static_branch_enable(&tc_skip_wrapper_act);
#endif
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c0bab092ea80..ed869a5ffc73 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2479,7 +2479,8 @@ static struct pernet_operations psched_net_ops = {
};
#if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
-DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
+DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper_act);
+DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper_cls);
#endif
static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {