Merge branch 'bpf-relax-8-frame-limitation-for-global-subprogs'

Emil Tsalapatis says: ==================== bpf: Relax 8 frame limitation for global subprogs The BPF verifier currently limits the maximum runtime call stack to 8 frames. Larger BPF programs like sched-ext schedulers routinely fail verification because they exceed this limit, even as they use very little actual stack space for each frame. Relax the verifier to permit call stacks > 8 frames deep when the call stacks include global subprogs. The old 8 stack frame limit now only applies to call stacks composed entirely of static function calls. This works because global functions are each verified in isolation, so the verifier does not need to cross-reference verification state across the function call boundary, which has been the reason for limiting the call stack size in the first place. This patch does not change the verification time limit of 8 stack frames. Static functions that are inlined for verification purposes still only go 8 frames deep to avoid changing the verifier's internal data structures used for verification. These data structures only support holding information on up to 8 stack frames. This patch also does not adjust the actual maximum stack size of 512. CHANGELOG ========= v5 -> v6 (https://lore.kernel.org/bpf/20260311182831.91219-1-emil@etsalapatis.com/) - Make bpf_subprog_call_depth_info internal to verifier.c (Alexei) v4 -> v5 (https://lore.kernel.org/bpf/20260309204430.201219-1-emil@etsalapatis.com/) - Move depth tracking state to verifier (Eduard) and free it after verification (Alexei) - Fix selftest patch title and formatting errors (Yonghong) v3 -> v4 (https://lore.kernel.org/bpf/20260303043106.406099-1-emil@etsalapatis.com/) - Factor out temp call depth tracking info into its own struct (Eduard) - Bring depth calculation loop in line with the other instances (Mykyta) - Add comment on why selftest call stack is 16 bytes/frame (Eduard) - Rename "cidx" to "caller" for clarity (Mykyta, Eduard) v2 -> v3 (https://lore.kernel.org/bpf/20260210213606.475415-1-emil@etsalapatis.com/) - Change logic to remove arbitrary limit on call depth (Eduard) - Add additional selftests (Eduard) v1 -> v2 (https://lore.kernel.org/bpf/20260202233716.835638-1-emil@etsalapatis.com) - Adjust patch to only increase the runtime stack depth, leaving the verification-time stack depth unchanged (Alexei) Signed-off-by: Emil Tsalapatis <emil@etsalapatis.com> ==================== Link: https://patch.msgid.link/20260316161225.128011-1-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
author: Alexei Starovoitov <ast@kernel.org> 2026-03-16 21:26:42 +0300
committer: Alexei Starovoitov <ast@kernel.org> 2026-03-16 21:26:42 +0300
commit: 6c8e1a9eee0fec802b542dadf768c30c2a183b3c (patch)
tree: 7eea18c945bea1fd31c54818de9794ced6b767d5
parent: 202e42e4aa890172366354b233c42c73107a3f59 (diff)
parent: 01d5d2f7d93de7270f0bf3bcba36f6f4d3d0bf9d (diff)
download: linux-6c8e1a9eee0fec802b542dadf768c30c2a183b3c.tar.xz
4 files changed, 160 insertions, 31 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e29f15419fcb..01c18f4268de 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6724,22 +6724,30 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
 	return round_up(max_t(u32, stack_depth, 1), 32);
 }
 
+/* temporary state used for call frame depth calculation */
+struct bpf_subprog_call_depth_info {
+	int ret_insn; /* caller instruction where we return to. */
+	int caller; /* caller subprogram idx */
+	int frame; /* # of consecutive static call stack frames on top of stack */
+};
+
 /* starting from main bpf function walk all instructions of the function
  * and recursively walk all callees that given function can call.
  * Ignore jump and exit insns.
- * Since recursion is prevented by check_cfg() this algorithm
- * only needs a local stack of MAX_CALL_FRAMES to remember callsites
  */
 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
+					 struct bpf_subprog_call_depth_info *dinfo,
 					 bool priv_stack_supported)
 {
 	struct bpf_subprog_info *subprog = env->subprog_info;
 	struct bpf_insn *insn = env->prog->insnsi;
 	int depth = 0, frame = 0, i, subprog_end, subprog_depth;
 	bool tail_call_reachable = false;
-	int ret_insn[MAX_CALL_FRAMES];
-	int ret_prog[MAX_CALL_FRAMES];
-	int j;
+	int total;
+	int tmp;
+
+	/* no caller idx */
+	dinfo[idx].caller = -1;
 
 	i = subprog[idx].start;
 	if (!priv_stack_supported)
@@ -6791,8 +6799,12 @@ process_func:
 	} else {
 		depth += subprog_depth;
 		if (depth > MAX_BPF_STACK) {
+			total = 0;
+			for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller)
+				total++;
+
 			verbose(env, "combined stack size of %d calls is %d. Too large\n",
-				frame + 1, depth);
+				total, depth);
 			return -EACCES;
 		}
 	}
@@ -6806,10 +6818,8 @@ continue_func:
 
 			if (!is_bpf_throw_kfunc(insn + i))
 				continue;
-			if (subprog[idx].is_cb)
-				err = true;
-			for (int c = 0; c < frame && !err; c++) {
-				if (subprog[ret_prog[c]].is_cb) {
+			for (tmp = idx; tmp >= 0 && !err; tmp = dinfo[tmp].caller) {
+				if (subprog[tmp].is_cb) {
 					err = true;
 					break;
 				}
@@ -6825,8 +6835,6 @@ continue_func:
 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
 			continue;
 		/* remember insn and function to return to */
-		ret_insn[frame] = i + 1;
-		ret_prog[frame] = idx;
 
 		/* find the callee */
 		next_insn = i + insn[i].imm + 1;
@@ -6846,7 +6854,16 @@ continue_func:
 				return -EINVAL;
 			}
 		}
+
+		/* store caller info for after we return from callee */
+		dinfo[idx].frame = frame;
+		dinfo[idx].ret_insn = i + 1;
+
+		/* push caller idx into callee's dinfo */
+		dinfo[sidx].caller = idx;
+
 		i = next_insn;
+
 		idx = sidx;
 		if (!priv_stack_supported)
 			subprog[idx].priv_stack_mode = NO_PRIV_STACK;
@@ -6854,7 +6871,7 @@ continue_func:
 		if (subprog[idx].has_tail_call)
 			tail_call_reachable = true;
 
-		frame++;
+		frame = subprog_is_global(env, idx) ? 0 : frame + 1;
 		if (frame >= MAX_CALL_FRAMES) {
 			verbose(env, "the call stack of %d frames is too deep !\n",
 				frame);
@@ -6868,12 +6885,12 @@ continue_func:
 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
 	 */
 	if (tail_call_reachable)
-		for (j = 0; j < frame; j++) {
-			if (subprog[ret_prog[j]].is_exception_cb) {
+		for (tmp = idx; tmp >= 0; tmp = dinfo[tmp].caller) {
+			if (subprog[tmp].is_exception_cb) {
 				verbose(env, "cannot tail call within exception cb\n");
 				return -EINVAL;
 			}
-			subprog[ret_prog[j]].tail_call_reachable = true;
+			subprog[tmp].tail_call_reachable = true;
 		}
 	if (subprog[0].tail_call_reachable)
 		env->prog->aux->tail_call_reachable = true;
@@ -6881,23 +6898,33 @@ continue_func:
 	/* end of for() loop means the last insn of the 'subprog'
 	 * was reached. Doesn't matter whether it was JA or EXIT
 	 */
-	if (frame == 0)
+	if (frame == 0 && dinfo[idx].caller < 0)
 		return 0;
 	if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
 		depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
-	frame--;
-	i = ret_insn[frame];
-	idx = ret_prog[frame];
+
+	/* pop caller idx from callee */
+	idx = dinfo[idx].caller;
+
+	/* retrieve caller state from its frame */
+	frame = dinfo[idx].frame;
+	i = dinfo[idx].ret_insn;
+
 	goto continue_func;
 }
 
 static int check_max_stack_depth(struct bpf_verifier_env *env)
 {
 	enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
+	struct bpf_subprog_call_depth_info *dinfo;
 	struct bpf_subprog_info *si = env->subprog_info;
 	bool priv_stack_supported;
 	int ret;
 
+	dinfo = kvcalloc(env->subprog_cnt, sizeof(*dinfo), GFP_KERNEL_ACCOUNT);
+	if (!dinfo)
+		return -ENOMEM;
+
 	for (int i = 0; i < env->subprog_cnt; i++) {
 		if (si[i].has_tail_call) {
 			priv_stack_mode = NO_PRIV_STACK;
@@ -6919,9 +6946,12 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
 	for (int i = env->subprog_cnt - 1; i >= 0; i--) {
 		if (!i || si[i].is_async_cb) {
 			priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
-			ret = check_max_stack_depth_subprog(env, i, priv_stack_supported);
-			if (ret < 0)
+			ret = check_max_stack_depth_subprog(env, i, dinfo,
+					priv_stack_supported);
+			if (ret < 0) {
+				kvfree(dinfo);
 				return ret;
+			}
 		}
 	}
 
@@ -6932,6 +6962,8 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
 		}
 	}
 
+	kvfree(dinfo);
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index e905cbaf6b3d..500446808908 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -18,6 +18,7 @@
 #include "test_global_func15.skel.h"
 #include "test_global_func16.skel.h"
 #include "test_global_func17.skel.h"
+#include "test_global_func_deep_stack.skel.h"
 #include "test_global_func_ctx_args.skel.h"
 
 #include "bpf/libbpf_internal.h"
@@ -155,6 +156,7 @@ void test_test_global_funcs(void)
 	RUN_TESTS(test_global_func15);
 	RUN_TESTS(test_global_func16);
 	RUN_TESTS(test_global_func17);
+	RUN_TESTS(test_global_func_deep_stack);
 	RUN_TESTS(test_global_func_ctx_args);
 
 	if (test__start_subtest("ctx_arg_rewrite"))
diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c
index 142b682d3c2f..974fd8c19561 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func3.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func3.c
@@ -5,56 +5,56 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
 int f1(struct __sk_buff *skb)
 {
 	return skb->len;
 }
 
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
 int f2(int val, struct __sk_buff *skb)
 {
 	return f1(skb) + val;
 }
 
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
 int f3(int val, struct __sk_buff *skb, int var)
 {
 	return f2(var, skb) + val;
 }
 
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
 int f4(struct __sk_buff *skb)
 {
 	return f3(1, skb, 2);
 }
 
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
 int f5(struct __sk_buff *skb)
 {
 	return f4(skb);
 }
 
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
 int f6(struct __sk_buff *skb)
 {
 	return f5(skb);
 }
 
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
 int f7(struct __sk_buff *skb)
 {
 	return f6(skb);
 }
 
-__attribute__ ((noinline))
+static __attribute__ ((noinline))
 int f8(struct __sk_buff *skb)
 {
 	return f7(skb);
 }
 
 SEC("tc")
-__failure __msg("the call stack of 8 frames")
+__failure __msg("the call stack of 9 frames")
 int global_func3(struct __sk_buff *skb)
 {
 	return f8(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_deep_stack.c b/tools/testing/selftests/bpf/progs/test_global_func_deep_stack.c
new file mode 100644
index 000000000000..1b634b543b62
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func_deep_stack.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2026 Meta Platforms, Inc and affiliates. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/*
+ * Macro tricks to tersely define for long non-recursive call chains. Add
+ * computation to the functions prevent tail recursion from reducing the
+ * stack size to 0.
+ */
+
+#define CAT(a, b) a ## b
+#define XCAT(a, b) CAT(a, b)
+
+#define F_0 \
+__attribute__((noinline))         \
+int f0(unsigned long a)           \
+{                                 \
+	volatile long b = a + 16; \
+	if (a == 0)               \
+		return 0;         \
+	return b;                 \
+}
+
+#define FN(n, prev) \
+__attribute__((noinline))                        \
+int XCAT(f, n)(unsigned long a)                  \
+{                                                \
+	volatile long b = XCAT(f, prev)(a - 1);  \
+	if (!b)                                  \
+		return 0;                        \
+	return b + 1;                            \
+}
+
+/* Call chain 33 levels deep. */
+#define F_1 F_0         FN(1, 0)
+#define F_2 F_1         FN(2, 1)
+#define F_3 F_2         FN(3, 2)
+#define F_4 F_3         FN(4, 3)
+#define F_5 F_4         FN(5, 4)
+#define F_6 F_5         FN(6, 5)
+#define F_7 F_6         FN(7, 6)
+#define F_8 F_7         FN(8, 7)
+#define F_9 F_8         FN(9, 8)
+#define F_10 F_9        FN(10, 9)
+#define F_11 F_10       FN(11, 10)
+#define F_12 F_11       FN(12, 11)
+#define F_13 F_12       FN(13, 12)
+#define F_14 F_13       FN(14, 13)
+#define F_15 F_14       FN(15, 14)
+#define F_16 F_15       FN(16, 15)
+#define F_17 F_16       FN(17, 16)
+#define F_18 F_17       FN(18, 17)
+#define F_19 F_18       FN(19, 18)
+#define F_20 F_19       FN(20, 19)
+#define F_21 F_20       FN(21, 20)
+#define F_22 F_21       FN(22, 21)
+#define F_23 F_22       FN(23, 22)
+#define F_24 F_23       FN(24, 23)
+#define F_25 F_24       FN(25, 24)
+#define F_26 F_25       FN(26, 25)
+#define F_27 F_26       FN(27, 26)
+#define F_28 F_27       FN(28, 27)
+#define F_29 F_28       FN(29, 28)
+#define F_30 F_29       FN(30, 29)
+#define F_31 F_30       FN(31, 30)
+#define F_32 F_31       FN(32, 31)
+
+#define CAT2(a, b) a ## b
+#define XCAT2(a, b) CAT2(a, b)
+
+#define F(n) XCAT2(F_, n)
+
+F(32)
+
+/* Ensure that even 32 levels deep, the function verifies. */
+SEC("syscall")
+__success
+int global_func_deep_stack_success(struct __sk_buff *skb)
+{
+	return f31(55);
+}
+
+/*
+ * Check we actually honor stack limits (33 * 16 = 528 > 512 = MAX_STACK_DEPTH).
+ * The stack depth is 16 because the verifier calls round_up_stack_depth() on
+ * the size.
+ */
+SEC("syscall")
+__failure __msg("combined stack size of 34 calls")
+int global_func_deep_stack_fail(struct __sk_buff *skb)
+{
+	return f32(123);
+}
author	Alexei Starovoitov <ast@kernel.org>	2026-03-16 21:26:42 +0300
committer	Alexei Starovoitov <ast@kernel.org>	2026-03-16 21:26:42 +0300
commit	6c8e1a9eee0fec802b542dadf768c30c2a183b3c (patch)
tree	7eea18c945bea1fd31c54818de9794ced6b767d5
parent	202e42e4aa890172366354b233c42c73107a3f59 (diff)
parent	01d5d2f7d93de7270f0bf3bcba36f6f4d3d0bf9d (diff)
download	linux-6c8e1a9eee0fec802b542dadf768c30c2a183b3c.tar.xz