summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2026-06-10 07:23:12 +0300
committerAlexei Starovoitov <ast@kernel.org>2026-06-10 07:23:12 +0300
commit1fed2e47fac582e824f77f68722a8a13820e58e2 (patch)
tree703f21d578314c1ee115b1c9a44ee754bdbd90cf
parent140fa23df957b51385aa847986d44ad7f59b0563 (diff)
parent2e7c6cb4d8437a2fe7cd95aac7ca53d7eb05e9f4 (diff)
downloadlinux-1fed2e47fac582e824f77f68722a8a13820e58e2.tar.xz
Merge branch 'fix-kptr-dtor-deadlock'
Kumar Kartikeya Dwivedi says: ==================== Fix kptr dtor deadlock Referenced kptr destruction can run from tracing/NMI contexts through bpf_obj_drop() and map value update/delete paths, reaching NMI-unsafe special field teardown and deadlocks. Justin reported the issue and iterated on fixes in [0]-[2], and also confirmed the bpf_obj_drop() reproducer in [3]. This series rejects unsafe obj drops from non-iterator tracing programs, limits map value recycle to NMI-safe field cancellation, and adds focused selftests for the obj_drop(), NMI delete, and recycle teardown cases. See patches for details. [0]: https://lore.kernel.org/bpf/20260505150851.3090688-1-utilityemal77@gmail.com [1]: https://lore.kernel.org/bpf/20260507175453.1140400-1-utilityemal77@gmail.com [2]: https://lore.kernel.org/bpf/20260519011450.1144935-1-utilityemal77@gmail.com [3]: https://lore.kernel.org/bpf/agyG3eQwgmoJwmj2@suesslenovo Changelog: ---------- v2 -> v3 v2: https://lore.kernel.org/bpf/20260609093719.2858096-1-memxor@gmail.com * Replace bpf_obj_cancel_fields() to use bpf_map_free_internal_structs(). (Mykyta) * Fix CI failures. v1 -> v2 v1: https://lore.kernel.org/bpf/20260608144841.1732406-1-memxor@gmail.com * Drop is_tracing_prog_type() fix due to compat breakage, revisit separately. * Rework bpf_obj_drop() fix to additionally reject non-iter tracing progs. ==================== Link: https://patch.msgid.link/20260609202548.3571690-1-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--include/linux/bpf.h30
-rw-r--r--kernel/bpf/arraymap.c8
-rw-r--r--kernel/bpf/hashtab.c32
-rw-r--r--kernel/bpf/syscall.c5
-rw-r--r--kernel/bpf/verifier.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/htab_update.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c33
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr.c66
-rw-r--r--tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_kfunc.c42
-rw-r--r--tools/testing/selftests/bpf/progs/htab_update.c4
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.c71
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c89
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr.c20
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_failure.c40
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_success.c13
16 files changed, 421 insertions, 61 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 62bba7a4876f..56f5da2b437f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -492,6 +492,35 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f
return rec->field_mask & type;
}
+static inline bool btf_field_is_nmi_safe(enum btf_field_type type)
+{
+ switch (type) {
+ case BPF_SPIN_LOCK:
+ case BPF_RES_SPIN_LOCK:
+ case BPF_TIMER:
+ case BPF_WORKQUEUE:
+ case BPF_TASK_WORK:
+ case BPF_KPTR_UNREF:
+ case BPF_REFCOUNT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool btf_record_has_nmi_unsafe_fields(const struct btf_record *rec)
+{
+ int i;
+
+ if (IS_ERR_OR_NULL(rec))
+ return false;
+ for (i = 0; i < rec->cnt; i++) {
+ if (!btf_field_is_nmi_safe(rec->fields[i].type))
+ return true;
+ }
+ return false;
+}
+
static inline void bpf_obj_init(const struct btf_record *rec, void *obj)
{
int i;
@@ -2688,6 +2717,7 @@ bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *r
void bpf_obj_free_timer(const struct btf_record *rec, void *obj);
void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj);
void bpf_obj_free_task_work(const struct btf_record *rec, void *obj);
+void bpf_obj_cancel_fields(struct bpf_map *map, void *obj);
void bpf_obj_free_fields(const struct btf_record *rec, void *obj);
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index e6271a2bf6d6..248b4818178c 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -384,7 +384,7 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
val = this_cpu_ptr(array->pptrs[index & array->index_mask]);
copy_map_value(map, val, value);
- bpf_obj_free_fields(array->map.record, val);
+ bpf_obj_cancel_fields(map, val);
} else {
val = array->value +
(u64)array->elem_size * (index & array->index_mask);
@@ -392,7 +392,7 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value,
copy_map_value_locked(map, val, value, false);
else
copy_map_value(map, val, value);
- bpf_obj_free_fields(array->map.record, val);
+ bpf_obj_cancel_fields(map, val);
}
return 0;
}
@@ -432,14 +432,14 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
cpu = map_flags >> 32;
ptr = per_cpu_ptr(pptr, cpu);
copy_map_value(map, ptr, value);
- bpf_obj_free_fields(array->map.record, ptr);
+ bpf_obj_cancel_fields(map, ptr);
goto unlock;
}
for_each_possible_cpu(cpu) {
ptr = per_cpu_ptr(pptr, cpu);
val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu;
copy_map_value(map, ptr, val);
- bpf_obj_free_fields(array->map.record, ptr);
+ bpf_obj_cancel_fields(map, ptr);
}
unlock:
rcu_read_unlock();
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index b4366cad3cfa..9f394e1aa2e8 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -243,6 +243,10 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab)
if (IS_ERR_OR_NULL(htab->map.record))
return;
+ /*
+ * Preallocated maps do not have a bpf_mem_alloc destructor, so fully
+ * destroy every element, including the extra elements.
+ */
if (htab_has_extra_elems(htab))
num_entries += num_possible_cpus();
for (i = 0; i < num_entries; i++) {
@@ -833,8 +837,8 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
return insn - insn_buf;
}
-static void check_and_free_fields(struct bpf_htab *htab,
- struct htab_elem *elem)
+static void check_and_cancel_fields(struct bpf_htab *htab,
+ struct htab_elem *elem)
{
if (IS_ERR_OR_NULL(htab->map.record))
return;
@@ -844,11 +848,11 @@ static void check_and_free_fields(struct bpf_htab *htab,
int cpu;
for_each_possible_cpu(cpu)
- bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
+ bpf_obj_cancel_fields(&htab->map, per_cpu_ptr(pptr, cpu));
} else {
void *map_value = htab_elem_value(elem, htab->map.key_size);
- bpf_obj_free_fields(htab->map.record, map_value);
+ bpf_obj_cancel_fields(&htab->map, map_value);
}
}
@@ -883,7 +887,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
htab_unlock_bucket(b, flags);
if (l == tgt_l)
- check_and_free_fields(htab, l);
+ check_and_cancel_fields(htab, l);
return l == tgt_l;
}
@@ -948,7 +952,7 @@ find_first_elem:
static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{
- check_and_free_fields(htab, l);
+ check_and_cancel_fields(htab, l);
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr);
@@ -1001,7 +1005,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
if (htab_is_prealloc(htab)) {
bpf_map_dec_elem_count(&htab->map);
- check_and_free_fields(htab, l);
+ check_and_cancel_fields(htab, l);
pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
dec_elem_count(htab);
@@ -1018,7 +1022,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
/* copy true value_size bytes */
ptr = this_cpu_ptr(pptr);
copy_map_value(&htab->map, ptr, value);
- bpf_obj_free_fields(htab->map.record, ptr);
+ bpf_obj_cancel_fields(&htab->map, ptr);
} else {
u32 size = round_up(htab->map.value_size, 8);
void *val;
@@ -1028,7 +1032,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
cpu = map_flags >> 32;
ptr = per_cpu_ptr(pptr, cpu);
copy_map_value(&htab->map, ptr, value);
- bpf_obj_free_fields(htab->map.record, ptr);
+ bpf_obj_cancel_fields(&htab->map, ptr);
return;
}
@@ -1036,7 +1040,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
ptr = per_cpu_ptr(pptr, cpu);
val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu;
copy_map_value(&htab->map, ptr, val);
- bpf_obj_free_fields(htab->map.record, ptr);
+ bpf_obj_cancel_fields(&htab->map, ptr);
}
}
}
@@ -1252,11 +1256,11 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (l_old) {
hlist_nulls_del_rcu(&l_old->hash_node);
- /* l_old has already been stashed in htab->extra_elems, free
- * its special fields before it is available for reuse.
+ /* l_old has already been stashed in htab->extra_elems, cancel
+ * its reusable special fields before it is available for reuse.
*/
if (htab_is_prealloc(htab))
- check_and_free_fields(htab, l_old);
+ check_and_cancel_fields(htab, l_old);
}
htab_unlock_bucket(b, flags);
if (l_old && !htab_is_prealloc(htab))
@@ -1269,7 +1273,7 @@ err:
static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
{
- check_and_free_fields(htab, elem);
+ check_and_cancel_fields(htab, elem);
bpf_map_dec_elem_count(&htab->map);
bpf_lru_push_free(&htab->lru, &elem->lru_node);
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d4188a992bd8..7ed949f70f82 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -808,6 +808,11 @@ void bpf_obj_free_task_work(const struct btf_record *rec, void *obj)
bpf_task_work_cancel_and_free(obj + rec->task_work_off);
}
+void bpf_obj_cancel_fields(struct bpf_map *map, void *obj)
+{
+ bpf_map_free_internal_structs(map, obj);
+}
+
void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
{
const struct btf_field *fields;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 954b85609f32..eb46a81a8c51 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -205,6 +205,7 @@ static int release_reference_nomark(struct bpf_verifier_state *state, int id);
static int release_reference(struct bpf_verifier_env *env, int id);
static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
+static bool is_tracing_prog_type(enum bpf_prog_type type);
static int ref_set_non_owning(struct bpf_verifier_env *env,
struct bpf_reg_state *reg);
static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg);
@@ -12881,6 +12882,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
const struct btf_type *t, *ptr_type;
@@ -12957,6 +12959,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (err < 0)
return err;
+ if ((is_bpf_obj_drop_kfunc(meta.func_id) ||
+ is_bpf_percpu_obj_drop_kfunc(meta.func_id)) && (is_tracing_prog_type(prog_type) ||
+ /* is_tracing_prog_type() for now doesn't cover non-iterator tracing progs. */
+ (prog_type == BPF_PROG_TYPE_TRACING && env->prog->expected_attach_type != BPF_TRACE_ITER
+ && !env->prog->sleepable))) {
+ struct btf_struct_meta *struct_meta;
+
+ struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
+ if (struct_meta && btf_record_has_nmi_unsafe_fields(struct_meta->record)) {
+ verbose(env, "%s cannot be used in tracing programs on types with NMI unsafe fields\n",
+ func_name);
+ return -EINVAL;
+ }
+ }
+
if (is_bpf_rbtree_add_kfunc(meta.func_id)) {
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
set_rbtree_add_callback_state);
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c
index ea1a6766fbe9..0a28d4346924 100644
--- a/tools/testing/selftests/bpf/prog_tests/htab_update.c
+++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c
@@ -23,7 +23,7 @@ static void test_reenter_update(void)
if (!ASSERT_OK_PTR(skel, "htab_update__open"))
return;
- bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true);
+ bpf_program__set_autoload(skel->progs.bpf_obj_cancel_fields, true);
err = htab_update__load(skel);
if (!ASSERT_TRUE(!err, "htab_update__load") || err)
goto out;
@@ -50,7 +50,7 @@ static void test_reenter_update(void)
/*
* Second update: replace existing element with same key and trigger
* the reentrancy of bpf_map_update_elem().
- * check_and_free_fields() calls bpf_obj_free_fields() on the old
+ * check_and_cancel_fields() calls bpf_obj_cancel_fields() on the old
* value, which is where fentry program runs and performs a nested
* bpf_map_update_elem(), triggering -EDEADLK.
*/
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index dbff099860ba..8defea0253ed 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -131,13 +131,14 @@ end:
linked_list_fail__destroy(skel);
}
-static void clear_fields(struct bpf_map *map)
+static void clear_fields(struct bpf_program *prog)
{
- char buf[24];
- int key = 0;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ int ret;
- memset(buf, 0xff, sizeof(buf));
- ASSERT_OK(bpf_map__update_elem(map, &key, sizeof(key), buf, sizeof(buf), 0), "check_and_free_fields");
+ ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ ASSERT_OK(ret, "clear_fields");
+ ASSERT_OK(opts.retval, "clear_fields retval");
}
enum {
@@ -170,31 +171,31 @@ static void test_linked_list_success(int mode, bool leave_in_map)
ASSERT_OK(ret, "map_list_push_pop");
ASSERT_OK(opts.retval, "map_list_push_pop retval");
if (!leave_in_map)
- clear_fields(skel->maps.array_map);
+ clear_fields(skel->progs.clear_map_list);
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop), &opts);
ASSERT_OK(ret, "inner_map_list_push_pop");
ASSERT_OK(opts.retval, "inner_map_list_push_pop retval");
if (!leave_in_map)
- clear_fields(skel->maps.inner_map);
+ clear_fields(skel->progs.clear_inner_map_list);
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts);
ASSERT_OK(ret, "global_list_push_pop");
ASSERT_OK(opts.retval, "global_list_push_pop retval");
if (!leave_in_map)
- clear_fields(skel->maps.bss_A);
+ clear_fields(skel->progs.clear_global_list);
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_nested), &opts);
ASSERT_OK(ret, "global_list_push_pop_nested");
ASSERT_OK(opts.retval, "global_list_push_pop_nested retval");
if (!leave_in_map)
- clear_fields(skel->maps.bss_A);
+ clear_fields(skel->progs.clear_global_nested_list);
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_array_push_pop), &opts);
ASSERT_OK(ret, "global_list_array_push_pop");
ASSERT_OK(opts.retval, "global_list_array_push_pop retval");
if (!leave_in_map)
- clear_fields(skel->maps.bss_A);
+ clear_fields(skel->progs.clear_global_array_list);
if (mode == PUSH_POP)
goto end;
@@ -204,19 +205,19 @@ ppm:
ASSERT_OK(ret, "map_list_push_pop_multiple");
ASSERT_OK(opts.retval, "map_list_push_pop_multiple retval");
if (!leave_in_map)
- clear_fields(skel->maps.array_map);
+ clear_fields(skel->progs.clear_map_list);
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop_multiple), &opts);
ASSERT_OK(ret, "inner_map_list_push_pop_multiple");
ASSERT_OK(opts.retval, "inner_map_list_push_pop_multiple retval");
if (!leave_in_map)
- clear_fields(skel->maps.inner_map);
+ clear_fields(skel->progs.clear_inner_map_list);
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts);
ASSERT_OK(ret, "global_list_push_pop_multiple");
ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval");
if (!leave_in_map)
- clear_fields(skel->maps.bss_A);
+ clear_fields(skel->progs.clear_global_list);
if (mode == PUSH_POP_MULT)
goto end;
@@ -226,19 +227,19 @@ lil:
ASSERT_OK(ret, "map_list_in_list");
ASSERT_OK(opts.retval, "map_list_in_list retval");
if (!leave_in_map)
- clear_fields(skel->maps.array_map);
+ clear_fields(skel->progs.clear_map_list);
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_in_list), &opts);
ASSERT_OK(ret, "inner_map_list_in_list");
ASSERT_OK(opts.retval, "inner_map_list_in_list retval");
if (!leave_in_map)
- clear_fields(skel->maps.inner_map);
+ clear_fields(skel->progs.clear_inner_map_list);
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts);
ASSERT_OK(ret, "global_list_in_list");
ASSERT_OK(opts.retval, "global_list_in_list retval");
if (!leave_in_map)
- clear_fields(skel->maps.bss_A);
+ clear_fields(skel->progs.clear_global_list);
end:
linked_list__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
index 03b46f17cf53..17e707dddda8 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -51,7 +51,6 @@ static void test_map_kptr_success(bool test_run)
ret = bpf_map__update_elem(skel->maps.array_map,
&key, sizeof(key), buf, sizeof(buf), 0);
ASSERT_OK(ret, "array_map update");
- skel->data->ref--;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
@@ -59,49 +58,42 @@ static void test_map_kptr_success(bool test_run)
ret = bpf_map__update_elem(skel->maps.pcpu_array_map,
&key, sizeof(key), pbuf, cpu * sizeof(buf), 0);
ASSERT_OK(ret, "pcpu_array_map update");
- skel->data->ref--;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "hash_map delete");
- skel->data->ref--;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "pcpu_hash_map delete");
- skel->data->ref--;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "hash_malloc_map delete");
- skel->data->ref--;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "pcpu_hash_malloc_map delete");
- skel->data->ref--;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "lru_hash_map delete");
- skel->data->ref--;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0);
ASSERT_OK(ret, "lru_pcpu_hash_map delete");
- skel->data->ref--;
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
@@ -151,12 +143,68 @@ static void wait_for_map_release(void)
map_kptr__destroy(skel);
}
+enum map_update_kptr_case {
+ MAP_UPDATE_KPTR_ARRAY,
+ MAP_UPDATE_KPTR_HASH,
+ MAP_UPDATE_KPTR_HASH_MALLOC,
+};
+
+static struct bpf_program *map_update_kptr_prog(struct map_kptr *skel,
+ enum map_update_kptr_case test)
+{
+ switch (test) {
+ case MAP_UPDATE_KPTR_ARRAY:
+ return skel->progs.test_array_map_update_kptr;
+ case MAP_UPDATE_KPTR_HASH:
+ return skel->progs.test_hash_map_update_kptr;
+ case MAP_UPDATE_KPTR_HASH_MALLOC:
+ return skel->progs.test_hash_malloc_map_update_kptr;
+ }
+
+ return NULL;
+}
+
+static void test_map_update_kptr(enum map_update_kptr_case test)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct map_kptr *skel;
+ struct bpf_program *prog;
+ int ret;
+
+ skel = map_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
+ return;
+
+ prog = map_update_kptr_prog(skel, test);
+ if (!ASSERT_OK_PTR(prog, "map_update_kptr_prog"))
+ goto out;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (!ASSERT_OK(ret, "map_update_kptr"))
+ goto out;
+ if (!ASSERT_OK(opts.retval, "map_update_kptr retval"))
+ goto out;
+
+ ASSERT_EQ(skel->bss->num_of_refs, 3, "refs_after_update");
+
+out:
+ map_kptr__destroy(skel);
+ wait_for_map_release();
+}
+
void serial_test_map_kptr(void)
{
struct rcu_tasks_trace_gp *skel;
RUN_TESTS(map_kptr_fail);
+ if (test__start_subtest("update_array_map_kptr"))
+ test_map_update_kptr(MAP_UPDATE_KPTR_ARRAY);
+ if (test__start_subtest("update_hash_map_kptr"))
+ test_map_update_kptr(MAP_UPDATE_KPTR_HASH);
+ if (test__start_subtest("update_hash_malloc_map_kptr"))
+ test_map_update_kptr(MAP_UPDATE_KPTR_HASH_MALLOC);
+
skel = rcu_tasks_trace_gp__open_and_load();
if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load"))
return;
@@ -175,7 +223,7 @@ void serial_test_map_kptr(void)
ASSERT_OK(kern_sync_rcu(), "sync rcu");
wait_for_map_release();
- /* Observe refcount dropping to 1 on synchronous delete elem */
+ /* Observe refcount dropping to 1 on map release. */
test_map_kptr_success(true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
index d2c0542716a8..1737eba34323 100644
--- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -57,6 +57,7 @@ void test_percpu_hash_refcounted_kptr_refcount_leak(void)
.data_size_in = sizeof(pkt_v4),
.repeat = 1,
);
+ LIBBPF_OPTS(bpf_test_run_opts, syscall_opts);
cpu_nr = libbpf_num_possible_cpus();
if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
@@ -87,8 +88,11 @@ void test_percpu_hash_refcounted_kptr_refcount_leak(void)
if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
goto out;
- err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
- if (!ASSERT_OK(err, "bpf_map__update_elem"))
+ fd = bpf_program__fd(skel->progs.clear_percpu_hash_kptr);
+ err = bpf_prog_test_run_opts(fd, &syscall_opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ if (!ASSERT_EQ(syscall_opts.retval, 1, "syscall_opts.retval"))
goto out;
fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
index 83b90335967a..e6e95c1416e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -68,6 +68,36 @@ cleanup:
task_kfunc_success__destroy(skel);
}
+static void run_syscall_success_test(const char *prog_name)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct task_kfunc_success *skel;
+ struct bpf_program *prog;
+ int err;
+
+ skel = open_load_task_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_run_err"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto cleanup;
+ if (!ASSERT_EQ(opts.retval, 0, "retval"))
+ goto cleanup;
+
+ ASSERT_OK(skel->bss->err, "post_run_err");
+
+cleanup:
+ task_kfunc_success__destroy(skel);
+}
+
static int run_vpid_test(void *prog_name)
{
struct task_kfunc_success *skel;
@@ -140,7 +170,6 @@ static const char * const success_tests[] = {
"test_task_acquire_release_argument",
"test_task_acquire_release_current",
"test_task_acquire_leave_in_map",
- "test_task_xchg_release",
"test_task_map_acquire_release",
"test_task_current_acquire_release",
"test_task_from_pid_arg",
@@ -151,6 +180,10 @@ static const char * const success_tests[] = {
"test_task_kfunc_flavor_relo_not_found",
};
+static const char * const syscall_success_tests[] = {
+ "test_task_xchg_release",
+};
+
static const char * const vpid_success_tests[] = {
"test_task_from_vpid_current",
"test_task_from_vpid_invalid",
@@ -167,6 +200,13 @@ void test_task_kfunc(void)
run_success_test(success_tests[i]);
}
+ for (i = 0; i < ARRAY_SIZE(syscall_success_tests); i++) {
+ if (!test__start_subtest(syscall_success_tests[i]))
+ continue;
+
+ run_syscall_success_test(syscall_success_tests[i]);
+ }
+
for (i = 0; i < ARRAY_SIZE(vpid_success_tests); i++) {
if (!test__start_subtest(vpid_success_tests[i]))
continue;
diff --git a/tools/testing/selftests/bpf/progs/htab_update.c b/tools/testing/selftests/bpf/progs/htab_update.c
index 195d3b2fba00..62c1b1325ec2 100644
--- a/tools/testing/selftests/bpf/progs/htab_update.c
+++ b/tools/testing/selftests/bpf/progs/htab_update.c
@@ -22,8 +22,8 @@ struct {
int pid = 0;
int update_err = 0;
-SEC("?fentry/bpf_obj_free_fields")
-int bpf_obj_free_fields(void *ctx)
+SEC("?fentry/bpf_obj_cancel_fields")
+int bpf_obj_cancel_fields(void *ctx)
{
__u32 key = 0;
struct val value = { .payload = 1 };
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index 421f40835acd..fa97faa5358b 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -290,6 +290,77 @@ int test_list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head)
return list_in_list(lock, head, true);
}
+#define MAX_LIST_CLEAR_NODES 256
+
+static __always_inline
+int clear_list(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+ struct bpf_list_node *n;
+ int i;
+
+ for (i = 0; i < MAX_LIST_CLEAR_NODES; i++) {
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 0;
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ }
+ return 1;
+}
+
+SEC("syscall")
+int clear_map_list(void *ctx)
+{
+ struct map_value *v;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 1;
+ return clear_list(&v->lock, &v->head);
+}
+
+SEC("syscall")
+int clear_inner_map_list(void *ctx)
+{
+ struct map_value *v;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+ if (!map)
+ return 1;
+ v = bpf_map_lookup_elem(map, &(int){0});
+ if (!v)
+ return 1;
+ return clear_list(&v->lock, &v->head);
+}
+
+SEC("syscall")
+int clear_global_list(void *ctx)
+{
+ return clear_list(&glock, &ghead);
+}
+
+SEC("syscall")
+int clear_global_nested_list(void *ctx)
+{
+ return clear_list(&ghead_nested.inner.lock, &ghead_nested.inner.head);
+}
+
+SEC("syscall")
+int clear_global_array_list(void *ctx)
+{
+ int ret;
+
+ ret = clear_list(&glock_c, &ghead_array[0]);
+ if (ret)
+ return ret;
+ ret = clear_list(&glock_c, &ghead_array[1]);
+ if (ret)
+ return ret;
+ return clear_list(&glock_c, &ghead_array_one[0]);
+}
+
SEC("tc")
int map_list_push_pop(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index e708ffbe1f61..3fbefc568e0a 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -489,8 +489,7 @@ int test_map_kptr_ref3(struct __sk_buff *ctx)
int num_of_refs;
-SEC("syscall")
-int count_ref(void *ctx)
+static __always_inline int read_ref_count(void)
{
struct prog_test_ref_kfunc *p;
unsigned long arg = 0;
@@ -500,12 +499,96 @@ int count_ref(void *ctx)
return 1;
num_of_refs = p->cnt.refs.counter;
-
bpf_kfunc_call_test_release(p);
return 0;
}
SEC("syscall")
+int count_ref(void *ctx)
+{
+ return read_ref_count();
+}
+
+static __always_inline int stash_ref_ptr(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p, *old;
+ unsigned long arg = 0;
+
+ p = bpf_kfunc_call_test_acquire(&arg);
+ if (!p)
+ return 1;
+
+ old = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (old) {
+ bpf_kfunc_call_test_release(old);
+ old = bpf_kptr_xchg(&v->ref_ptr, NULL);
+ if (old)
+ bpf_kfunc_call_test_release(old);
+ return 2;
+ }
+ return 0;
+}
+
+static __always_inline int check_refs(int expected)
+{
+ int ret;
+
+ ret = read_ref_count();
+ if (ret)
+ return ret;
+ return num_of_refs == expected ? 0 : 3;
+}
+
+SEC("syscall")
+int test_array_map_update_kptr(void *ctx)
+{
+ struct map_value init = {}, *v;
+ int key = 0, ret;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 1;
+ ret = stash_ref_ptr(v);
+ if (ret)
+ return ret;
+ ret = check_refs(3);
+ if (ret)
+ return ret;
+ ret = bpf_map_update_elem(&array_map, &key, &init, BPF_EXIST);
+ if (ret)
+ return 4;
+ return check_refs(3);
+}
+
+#define DEFINE_HASH_UPDATE_KPTR_TEST(name, map) \
+SEC("syscall") \
+int name(void *ctx) \
+{ \
+ struct map_value init = {}, *v; \
+ int key = 0, ret; \
+ \
+ ret = bpf_map_update_elem(&map, &key, &init, BPF_NOEXIST); \
+ if (ret) \
+ return 1; \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return 2; \
+ ret = stash_ref_ptr(v); \
+ if (ret) \
+ return ret; \
+ ret = check_refs(3); \
+ if (ret) \
+ return ret; \
+ ret = bpf_map_update_elem(&map, &key, &init, BPF_EXIST); \
+ if (ret) \
+ return 4; \
+ return check_refs(3); \
+}
+
+DEFINE_HASH_UPDATE_KPTR_TEST(test_hash_map_update_kptr, hash_map)
+DEFINE_HASH_UPDATE_KPTR_TEST(test_hash_malloc_map_update_kptr, hash_malloc_map)
+
+SEC("syscall")
int test_ls_map_kptr_ref1(void *ctx)
{
struct task_struct *current;
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
index 13de169ad68f..61906f48025c 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -1036,13 +1036,31 @@ int percpu_hash_refcount_leak(void *ctx)
struct map_value *v;
int key = 0;
- v = bpf_map_lookup_elem(&percpu_hash, &key);
+ v = bpf_map_lookup_percpu_elem(&percpu_hash, &key, 0);
if (!v)
return 0;
return __insert_in_list(&head, &lock, &v->node);
}
+SEC("syscall")
+int clear_percpu_hash_kptr(void *ctx)
+{
+ struct node_data *n;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_percpu_elem(&percpu_hash, &key, 0);
+ if (!v)
+ return 0;
+
+ n = bpf_kptr_xchg(&v->node, NULL);
+ if (!n)
+ return 0;
+ bpf_obj_drop(n);
+ return probe_read_refcount();
+}
+
SEC("tc")
int check_percpu_hash_refcount(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
index 8e947d445f8e..8942b5478129 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -5,6 +5,7 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
+#include "../bpf_experimental.h"
#include "bpf_misc.h"
#include "task_kfunc_common.h"
@@ -234,6 +235,45 @@ int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_
}
SEC("tp_btf/task_newtask")
+__failure __msg("bpf_obj_drop cannot be used in tracing programs on types with NMI unsafe fields")
+int BPF_PROG(task_kfunc_obj_drop_with_kptr, struct task_struct *task, u64 clone_flags)
+{
+ struct __tasks_kfunc_map_value *local;
+
+ local = bpf_obj_new(typeof(*local));
+ if (!local)
+ return 0;
+
+ bpf_obj_drop(local);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("bpf_obj_drop cannot be used in tracing programs on types with NMI unsafe fields")
+int BPF_PROG(task_kfunc_obj_drop_nmi_with_kptr, struct task_struct *task,
+ u64 clone_flags)
+{
+ struct __tasks_kfunc_map_value *local;
+ struct task_struct *acquired, *old;
+
+ (void)clone_flags;
+
+ local = bpf_obj_new(typeof(*local));
+ if (!local)
+ return 0;
+
+ acquired = bpf_task_acquire(task);
+ if (acquired) {
+ old = bpf_kptr_xchg(&local->task, acquired);
+ if (old)
+ bpf_task_release(old);
+ }
+
+ bpf_obj_drop(local);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
__failure __msg("Possibly NULL pointer passed to trusted R1")
int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags)
{
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
index 5fb4fc19d26a..d63a79ee33dc 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
@@ -140,17 +140,17 @@ int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone
return 0;
}
-SEC("tp_btf/task_newtask")
-int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
+SEC("syscall")
+int test_task_xchg_release(const void *ctx)
{
- struct task_struct *kptr, *acquired;
+ struct task_struct *task, *kptr, *acquired;
struct __tasks_kfunc_map_value *v, *local;
int refcnt, refcnt_after_drop;
long status;
- if (!is_test_kfunc_task())
- return 0;
+ (void)ctx;
+ task = bpf_get_current_task_btf();
status = tasks_kfunc_map_insert(task);
if (status) {
err = 1;
@@ -191,7 +191,7 @@ int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
return 0;
}
- /* Stash a copy into local kptr and check if it is released recursively */
+ /* Stash a copy into local kptr and check if it is released recursively. */
acquired = bpf_task_acquire(kptr);
if (!acquired) {
err = 7;
@@ -220,7 +220,6 @@ int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
}
bpf_task_release(kptr);
-
return 0;
}