diff options
| author | Eric Dumazet <edumazet@google.com> | 2026-03-07 16:36:01 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-03-10 05:31:41 +0300 |
| commit | f2db7b80b03f268ff65fe825a7c761a8f551aa48 (patch) | |
| tree | af5f6bef3f8fd3f59438f90b6a20b024d4615b84 /include | |
| parent | e8eb33d650cd5e60b008f9d958262e489de6e7a9 (diff) | |
| download | linux-f2db7b80b03f268ff65fe825a7c761a8f551aa48.tar.xz | |
net/sched: refine indirect call mitigation in tc_wrapper.h
Some modern cpus disable X86_FEATURE_RETPOLINE feature,
even if a direct call can still be beneficial.
Even when IBRS is present, an indirect call is more expensive
than a direct one:
Direct Calls:
Compilers can perform powerful optimizations like inlining,
where the function body is directly inserted at the call site,
eliminating call overhead entirely.
Indirect Calls:
Inlining is much harder, if not impossible, because the compiler
doesn't know the target function at compile time.
Techniques like Indirect Call Promotion can help by using
profile-guided optimization to turn frequently taken indirect calls
into conditional direct calls, but they still add complexity
and potential overhead compared to a truly direct call.
In this patch, I split tc_skip_wrapper in two different
static keys, one for tc_act() (tc_skip_wrapper_act)
and one for tc_classify() (tc_skip_wrapper_cls).
Then I enable the tc_skip_wrapper_cls only if the count
of builtin classifiers is above one.
I enable tc_skip_wrapper_act only it the count of builtin
actions is above one.
In our production kernels, we only have CONFIG_NET_CLS_BPF=y
and CONFIG_NET_ACT_BPF=y. Other are modules or are not compiled.
Tested on AMD Turin cpus, cls_bpf_classify() cost went
from 1% down to 0.18 %, and FDO will be able to inline
it in tcf_classify() for further gains.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Link: https://patch.msgid.link/20260307133601.3863071-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/net/tc_wrapper.h | 47 |
1 files changed, 42 insertions, 5 deletions
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h index ffe58a02537c..4ebb053bb0dd 100644 --- a/include/net/tc_wrapper.h +++ b/include/net/tc_wrapper.h @@ -12,7 +12,8 @@ #define TC_INDIRECT_SCOPE -extern struct static_key_false tc_skip_wrapper; +extern struct static_key_false tc_skip_wrapper_act; +extern struct static_key_false tc_skip_wrapper_cls; /* TC Actions */ #ifdef CONFIG_NET_CLS_ACT @@ -46,7 +47,7 @@ TC_INDIRECT_ACTION_DECLARE(tunnel_key_act); static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { - if (static_branch_likely(&tc_skip_wrapper)) + if (static_branch_likely(&tc_skip_wrapper_act)) goto skip; #if IS_BUILTIN(CONFIG_NET_ACT_GACT) @@ -153,7 +154,7 @@ TC_INDIRECT_FILTER_DECLARE(u32_classify); static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - if (static_branch_likely(&tc_skip_wrapper)) + if (static_branch_likely(&tc_skip_wrapper_cls)) goto skip; #if IS_BUILTIN(CONFIG_NET_CLS_BPF) @@ -202,8 +203,44 @@ skip: static inline void tc_wrapper_init(void) { #ifdef CONFIG_X86 - if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE)) - static_branch_enable(&tc_skip_wrapper); + int cnt_cls = IS_BUILTIN(CONFIG_NET_CLS_BPF) + + IS_BUILTIN(CONFIG_NET_CLS_U32) + + IS_BUILTIN(CONFIG_NET_CLS_FLOWER) + + IS_BUILTIN(CONFIG_NET_CLS_FW) + + IS_BUILTIN(CONFIG_NET_CLS_MATCHALL) + + IS_BUILTIN(CONFIG_NET_CLS_BASIC) + + IS_BUILTIN(CONFIG_NET_CLS_CGROUP) + + IS_BUILTIN(CONFIG_NET_CLS_FLOW) + + IS_BUILTIN(CONFIG_NET_CLS_ROUTE4); + + int cnt_act = IS_BUILTIN(CONFIG_NET_ACT_GACT) + + IS_BUILTIN(CONFIG_NET_ACT_MIRRED) + + IS_BUILTIN(CONFIG_NET_ACT_PEDIT) + + IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT) + + IS_BUILTIN(CONFIG_NET_ACT_SKBMOD) + + IS_BUILTIN(CONFIG_NET_ACT_POLICE) + + IS_BUILTIN(CONFIG_NET_ACT_BPF) + + IS_BUILTIN(CONFIG_NET_ACT_CONNMARK) + + IS_BUILTIN(CONFIG_NET_ACT_CSUM) + + IS_BUILTIN(CONFIG_NET_ACT_CT) + + IS_BUILTIN(CONFIG_NET_ACT_CTINFO) + + IS_BUILTIN(CONFIG_NET_ACT_GATE) + + IS_BUILTIN(CONFIG_NET_ACT_MPLS) + + IS_BUILTIN(CONFIG_NET_ACT_NAT) + + IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY) + + IS_BUILTIN(CONFIG_NET_ACT_VLAN) + + IS_BUILTIN(CONFIG_NET_ACT_IFE) + + IS_BUILTIN(CONFIG_NET_ACT_SIMP) + + IS_BUILTIN(CONFIG_NET_ACT_SAMPLE); + + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) + return; + + if (cnt_cls > 1) + static_branch_enable(&tc_skip_wrapper_cls); + + if (cnt_act > 1) + static_branch_enable(&tc_skip_wrapper_act); #endif } |
