diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2026-06-10 07:23:12 +0300 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2026-06-10 07:23:12 +0300 |
| commit | 1fed2e47fac582e824f77f68722a8a13820e58e2 (patch) | |
| tree | 703f21d578314c1ee115b1c9a44ee754bdbd90cf | |
| parent | 140fa23df957b51385aa847986d44ad7f59b0563 (diff) | |
| parent | 2e7c6cb4d8437a2fe7cd95aac7ca53d7eb05e9f4 (diff) | |
| download | linux-1fed2e47fac582e824f77f68722a8a13820e58e2.tar.xz | |
Merge branch 'fix-kptr-dtor-deadlock'
Kumar Kartikeya Dwivedi says:
====================
Fix kptr dtor deadlock
Referenced kptr destruction can run from tracing/NMI contexts through
bpf_obj_drop() and map value update/delete paths, reaching NMI-unsafe
special field teardown and deadlocks. Justin reported the issue and
iterated on fixes in [0]-[2], and also confirmed the bpf_obj_drop()
reproducer in [3].
This series rejects unsafe obj drops from non-iterator tracing programs,
limits map value recycle to NMI-safe field cancellation, and adds
focused selftests for the obj_drop(), NMI delete, and recycle teardown
cases.
See patches for details.
[0]: https://lore.kernel.org/bpf/20260505150851.3090688-1-utilityemal77@gmail.com
[1]: https://lore.kernel.org/bpf/20260507175453.1140400-1-utilityemal77@gmail.com
[2]: https://lore.kernel.org/bpf/20260519011450.1144935-1-utilityemal77@gmail.com
[3]: https://lore.kernel.org/bpf/agyG3eQwgmoJwmj2@suesslenovo
Changelog:
----------
v2 -> v3
v2: https://lore.kernel.org/bpf/20260609093719.2858096-1-memxor@gmail.com
* Replace bpf_obj_cancel_fields() to use bpf_map_free_internal_structs(). (Mykyta)
* Fix CI failures.
v1 -> v2
v1: https://lore.kernel.org/bpf/20260608144841.1732406-1-memxor@gmail.com
* Drop is_tracing_prog_type() fix due to compat breakage, revisit separately.
* Rework bpf_obj_drop() fix to additionally reject non-iter tracing progs.
====================
Link: https://patch.msgid.link/20260609202548.3571690-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
| -rw-r--r-- | include/linux/bpf.h | 30 | ||||
| -rw-r--r-- | kernel/bpf/arraymap.c | 8 | ||||
| -rw-r--r-- | kernel/bpf/hashtab.c | 32 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 5 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 17 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/htab_update.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/linked_list.c | 33 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/map_kptr.c | 66 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c | 8 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/task_kfunc.c | 42 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/htab_update.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/linked_list.c | 71 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/map_kptr.c | 89 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/refcounted_kptr.c | 20 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/task_kfunc_failure.c | 40 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/task_kfunc_success.c | 13 |
16 files changed, 421 insertions, 61 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 62bba7a4876f..56f5da2b437f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -492,6 +492,35 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f return rec->field_mask & type; } +static inline bool btf_field_is_nmi_safe(enum btf_field_type type) +{ + switch (type) { + case BPF_SPIN_LOCK: + case BPF_RES_SPIN_LOCK: + case BPF_TIMER: + case BPF_WORKQUEUE: + case BPF_TASK_WORK: + case BPF_KPTR_UNREF: + case BPF_REFCOUNT: + return true; + default: + return false; + } +} + +static inline bool btf_record_has_nmi_unsafe_fields(const struct btf_record *rec) +{ + int i; + + if (IS_ERR_OR_NULL(rec)) + return false; + for (i = 0; i < rec->cnt; i++) { + if (!btf_field_is_nmi_safe(rec->fields[i].type)) + return true; + } + return false; +} + static inline void bpf_obj_init(const struct btf_record *rec, void *obj) { int i; @@ -2688,6 +2717,7 @@ bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *r void bpf_obj_free_timer(const struct btf_record *rec, void *obj); void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); void bpf_obj_free_task_work(const struct btf_record *rec, void *obj); +void bpf_obj_cancel_fields(struct bpf_map *map, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index e6271a2bf6d6..248b4818178c 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -384,7 +384,7 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value, if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { val = this_cpu_ptr(array->pptrs[index & array->index_mask]); copy_map_value(map, val, value); - bpf_obj_free_fields(array->map.record, val); + bpf_obj_cancel_fields(map, val); } else { val = array->value + (u64)array->elem_size * (index & array->index_mask); @@ -392,7 +392,7 @@ static long array_map_update_elem(struct bpf_map *map, void *key, void *value, copy_map_value_locked(map, val, value, false); else copy_map_value(map, val, value); - bpf_obj_free_fields(array->map.record, val); + bpf_obj_cancel_fields(map, val); } return 0; } @@ -432,14 +432,14 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, cpu = map_flags >> 32; ptr = per_cpu_ptr(pptr, cpu); copy_map_value(map, ptr, value); - bpf_obj_free_fields(array->map.record, ptr); + bpf_obj_cancel_fields(map, ptr); goto unlock; } for_each_possible_cpu(cpu) { ptr = per_cpu_ptr(pptr, cpu); val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu; copy_map_value(map, ptr, val); - bpf_obj_free_fields(array->map.record, ptr); + bpf_obj_cancel_fields(map, ptr); } unlock: rcu_read_unlock(); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index b4366cad3cfa..9f394e1aa2e8 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -243,6 +243,10 @@ static void htab_free_prealloced_fields(struct bpf_htab *htab) if (IS_ERR_OR_NULL(htab->map.record)) return; + /* + * Preallocated maps do not have a bpf_mem_alloc destructor, so fully + * destroy every element, including the extra elements. + */ if (htab_has_extra_elems(htab)) num_entries += num_possible_cpus(); for (i = 0; i < num_entries; i++) { @@ -833,8 +837,8 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map, return insn - insn_buf; } -static void check_and_free_fields(struct bpf_htab *htab, - struct htab_elem *elem) +static void check_and_cancel_fields(struct bpf_htab *htab, + struct htab_elem *elem) { if (IS_ERR_OR_NULL(htab->map.record)) return; @@ -844,11 +848,11 @@ static void check_and_free_fields(struct bpf_htab *htab, int cpu; for_each_possible_cpu(cpu) - bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); + bpf_obj_cancel_fields(&htab->map, per_cpu_ptr(pptr, cpu)); } else { void *map_value = htab_elem_value(elem, htab->map.key_size); - bpf_obj_free_fields(htab->map.record, map_value); + bpf_obj_cancel_fields(&htab->map, map_value); } } @@ -883,7 +887,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) htab_unlock_bucket(b, flags); if (l == tgt_l) - check_and_free_fields(htab, l); + check_and_cancel_fields(htab, l); return l == tgt_l; } @@ -948,7 +952,7 @@ find_first_elem: static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) { - check_and_free_fields(htab, l); + check_and_cancel_fields(htab, l); if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); @@ -1001,7 +1005,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) if (htab_is_prealloc(htab)) { bpf_map_dec_elem_count(&htab->map); - check_and_free_fields(htab, l); + check_and_cancel_fields(htab, l); pcpu_freelist_push(&htab->freelist, &l->fnode); } else { dec_elem_count(htab); @@ -1018,7 +1022,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, /* copy true value_size bytes */ ptr = this_cpu_ptr(pptr); copy_map_value(&htab->map, ptr, value); - bpf_obj_free_fields(htab->map.record, ptr); + bpf_obj_cancel_fields(&htab->map, ptr); } else { u32 size = round_up(htab->map.value_size, 8); void *val; @@ -1028,7 +1032,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, cpu = map_flags >> 32; ptr = per_cpu_ptr(pptr, cpu); copy_map_value(&htab->map, ptr, value); - bpf_obj_free_fields(htab->map.record, ptr); + bpf_obj_cancel_fields(&htab->map, ptr); return; } @@ -1036,7 +1040,7 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, ptr = per_cpu_ptr(pptr, cpu); val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu; copy_map_value(&htab->map, ptr, val); - bpf_obj_free_fields(htab->map.record, ptr); + bpf_obj_cancel_fields(&htab->map, ptr); } } } @@ -1252,11 +1256,11 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, if (l_old) { hlist_nulls_del_rcu(&l_old->hash_node); - /* l_old has already been stashed in htab->extra_elems, free - * its special fields before it is available for reuse. + /* l_old has already been stashed in htab->extra_elems, cancel + * its reusable special fields before it is available for reuse. */ if (htab_is_prealloc(htab)) - check_and_free_fields(htab, l_old); + check_and_cancel_fields(htab, l_old); } htab_unlock_bucket(b, flags); if (l_old && !htab_is_prealloc(htab)) @@ -1269,7 +1273,7 @@ err: static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) { - check_and_free_fields(htab, elem); + check_and_cancel_fields(htab, elem); bpf_map_dec_elem_count(&htab->map); bpf_lru_push_free(&htab->lru, &elem->lru_node); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d4188a992bd8..7ed949f70f82 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -808,6 +808,11 @@ void bpf_obj_free_task_work(const struct btf_record *rec, void *obj) bpf_task_work_cancel_and_free(obj + rec->task_work_off); } +void bpf_obj_cancel_fields(struct bpf_map *map, void *obj) +{ + bpf_map_free_internal_structs(map, obj); +} + void bpf_obj_free_fields(const struct btf_record *rec, void *obj) { const struct btf_field *fields; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 954b85609f32..eb46a81a8c51 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -205,6 +205,7 @@ static int release_reference_nomark(struct bpf_verifier_state *state, int id); static int release_reference(struct bpf_verifier_env *env, int id); static void invalidate_non_owning_refs(struct bpf_verifier_env *env); static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env); +static bool is_tracing_prog_type(enum bpf_prog_type type); static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg); static bool is_trusted_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg); @@ -12881,6 +12882,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable; + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); struct bpf_reg_state *regs = cur_regs(env); const char *func_name, *ptr_type_name; const struct btf_type *t, *ptr_type; @@ -12957,6 +12959,21 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (err < 0) return err; + if ((is_bpf_obj_drop_kfunc(meta.func_id) || + is_bpf_percpu_obj_drop_kfunc(meta.func_id)) && (is_tracing_prog_type(prog_type) || + /* is_tracing_prog_type() for now doesn't cover non-iterator tracing progs. */ + (prog_type == BPF_PROG_TYPE_TRACING && env->prog->expected_attach_type != BPF_TRACE_ITER + && !env->prog->sleepable))) { + struct btf_struct_meta *struct_meta; + + struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id); + if (struct_meta && btf_record_has_nmi_unsafe_fields(struct_meta->record)) { + verbose(env, "%s cannot be used in tracing programs on types with NMI unsafe fields\n", + func_name); + return -EINVAL; + } + } + if (is_bpf_rbtree_add_kfunc(meta.func_id)) { err = push_callback_call(env, insn, insn_idx, meta.subprogno, set_rbtree_add_callback_state); diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c index ea1a6766fbe9..0a28d4346924 100644 --- a/tools/testing/selftests/bpf/prog_tests/htab_update.c +++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c @@ -23,7 +23,7 @@ static void test_reenter_update(void) if (!ASSERT_OK_PTR(skel, "htab_update__open")) return; - bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true); + bpf_program__set_autoload(skel->progs.bpf_obj_cancel_fields, true); err = htab_update__load(skel); if (!ASSERT_TRUE(!err, "htab_update__load") || err) goto out; @@ -50,7 +50,7 @@ static void test_reenter_update(void) /* * Second update: replace existing element with same key and trigger * the reentrancy of bpf_map_update_elem(). - * check_and_free_fields() calls bpf_obj_free_fields() on the old + * check_and_cancel_fields() calls bpf_obj_cancel_fields() on the old * value, which is where fentry program runs and performs a nested * bpf_map_update_elem(), triggering -EDEADLK. */ diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index dbff099860ba..8defea0253ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -131,13 +131,14 @@ end: linked_list_fail__destroy(skel); } -static void clear_fields(struct bpf_map *map) +static void clear_fields(struct bpf_program *prog) { - char buf[24]; - int key = 0; + LIBBPF_OPTS(bpf_test_run_opts, opts); + int ret; - memset(buf, 0xff, sizeof(buf)); - ASSERT_OK(bpf_map__update_elem(map, &key, sizeof(key), buf, sizeof(buf), 0), "check_and_free_fields"); + ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + ASSERT_OK(ret, "clear_fields"); + ASSERT_OK(opts.retval, "clear_fields retval"); } enum { @@ -170,31 +171,31 @@ static void test_linked_list_success(int mode, bool leave_in_map) ASSERT_OK(ret, "map_list_push_pop"); ASSERT_OK(opts.retval, "map_list_push_pop retval"); if (!leave_in_map) - clear_fields(skel->maps.array_map); + clear_fields(skel->progs.clear_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop), &opts); ASSERT_OK(ret, "inner_map_list_push_pop"); ASSERT_OK(opts.retval, "inner_map_list_push_pop retval"); if (!leave_in_map) - clear_fields(skel->maps.inner_map); + clear_fields(skel->progs.clear_inner_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts); ASSERT_OK(ret, "global_list_push_pop"); ASSERT_OK(opts.retval, "global_list_push_pop retval"); if (!leave_in_map) - clear_fields(skel->maps.bss_A); + clear_fields(skel->progs.clear_global_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_nested), &opts); ASSERT_OK(ret, "global_list_push_pop_nested"); ASSERT_OK(opts.retval, "global_list_push_pop_nested retval"); if (!leave_in_map) - clear_fields(skel->maps.bss_A); + clear_fields(skel->progs.clear_global_nested_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_array_push_pop), &opts); ASSERT_OK(ret, "global_list_array_push_pop"); ASSERT_OK(opts.retval, "global_list_array_push_pop retval"); if (!leave_in_map) - clear_fields(skel->maps.bss_A); + clear_fields(skel->progs.clear_global_array_list); if (mode == PUSH_POP) goto end; @@ -204,19 +205,19 @@ ppm: ASSERT_OK(ret, "map_list_push_pop_multiple"); ASSERT_OK(opts.retval, "map_list_push_pop_multiple retval"); if (!leave_in_map) - clear_fields(skel->maps.array_map); + clear_fields(skel->progs.clear_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop_multiple), &opts); ASSERT_OK(ret, "inner_map_list_push_pop_multiple"); ASSERT_OK(opts.retval, "inner_map_list_push_pop_multiple retval"); if (!leave_in_map) - clear_fields(skel->maps.inner_map); + clear_fields(skel->progs.clear_inner_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts); ASSERT_OK(ret, "global_list_push_pop_multiple"); ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval"); if (!leave_in_map) - clear_fields(skel->maps.bss_A); + clear_fields(skel->progs.clear_global_list); if (mode == PUSH_POP_MULT) goto end; @@ -226,19 +227,19 @@ lil: ASSERT_OK(ret, "map_list_in_list"); ASSERT_OK(opts.retval, "map_list_in_list retval"); if (!leave_in_map) - clear_fields(skel->maps.array_map); + clear_fields(skel->progs.clear_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_in_list), &opts); ASSERT_OK(ret, "inner_map_list_in_list"); ASSERT_OK(opts.retval, "inner_map_list_in_list retval"); if (!leave_in_map) - clear_fields(skel->maps.inner_map); + clear_fields(skel->progs.clear_inner_map_list); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts); ASSERT_OK(ret, "global_list_in_list"); ASSERT_OK(opts.retval, "global_list_in_list retval"); if (!leave_in_map) - clear_fields(skel->maps.bss_A); + clear_fields(skel->progs.clear_global_list); end: linked_list__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index 03b46f17cf53..17e707dddda8 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -51,7 +51,6 @@ static void test_map_kptr_success(bool test_run) ret = bpf_map__update_elem(skel->maps.array_map, &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "array_map update"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); @@ -59,49 +58,42 @@ static void test_map_kptr_success(bool test_run) ret = bpf_map__update_elem(skel->maps.pcpu_array_map, &key, sizeof(key), pbuf, cpu * sizeof(buf), 0); ASSERT_OK(ret, "pcpu_array_map update"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "pcpu_hash_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_malloc_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0); ASSERT_OK(ret, "pcpu_hash_malloc_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_hash_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_pcpu_hash_map delete"); - skel->data->ref--; ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); @@ -151,12 +143,68 @@ static void wait_for_map_release(void) map_kptr__destroy(skel); } +enum map_update_kptr_case { + MAP_UPDATE_KPTR_ARRAY, + MAP_UPDATE_KPTR_HASH, + MAP_UPDATE_KPTR_HASH_MALLOC, +}; + +static struct bpf_program *map_update_kptr_prog(struct map_kptr *skel, + enum map_update_kptr_case test) +{ + switch (test) { + case MAP_UPDATE_KPTR_ARRAY: + return skel->progs.test_array_map_update_kptr; + case MAP_UPDATE_KPTR_HASH: + return skel->progs.test_hash_map_update_kptr; + case MAP_UPDATE_KPTR_HASH_MALLOC: + return skel->progs.test_hash_malloc_map_update_kptr; + } + + return NULL; +} + +static void test_map_update_kptr(enum map_update_kptr_case test) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct map_kptr *skel; + struct bpf_program *prog; + int ret; + + skel = map_kptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) + return; + + prog = map_update_kptr_prog(skel, test); + if (!ASSERT_OK_PTR(prog, "map_update_kptr_prog")) + goto out; + + ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (!ASSERT_OK(ret, "map_update_kptr")) + goto out; + if (!ASSERT_OK(opts.retval, "map_update_kptr retval")) + goto out; + + ASSERT_EQ(skel->bss->num_of_refs, 3, "refs_after_update"); + +out: + map_kptr__destroy(skel); + wait_for_map_release(); +} + void serial_test_map_kptr(void) { struct rcu_tasks_trace_gp *skel; RUN_TESTS(map_kptr_fail); + if (test__start_subtest("update_array_map_kptr")) + test_map_update_kptr(MAP_UPDATE_KPTR_ARRAY); + if (test__start_subtest("update_hash_map_kptr")) + test_map_update_kptr(MAP_UPDATE_KPTR_HASH); + if (test__start_subtest("update_hash_malloc_map_kptr")) + test_map_update_kptr(MAP_UPDATE_KPTR_HASH_MALLOC); + skel = rcu_tasks_trace_gp__open_and_load(); if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load")) return; @@ -175,7 +223,7 @@ void serial_test_map_kptr(void) ASSERT_OK(kern_sync_rcu(), "sync rcu"); wait_for_map_release(); - /* Observe refcount dropping to 1 on synchronous delete elem */ + /* Observe refcount dropping to 1 on map release. */ test_map_kptr_success(true); } diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c index d2c0542716a8..1737eba34323 100644 --- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c @@ -57,6 +57,7 @@ void test_percpu_hash_refcounted_kptr_refcount_leak(void) .data_size_in = sizeof(pkt_v4), .repeat = 1, ); + LIBBPF_OPTS(bpf_test_run_opts, syscall_opts); cpu_nr = libbpf_num_possible_cpus(); if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus")) @@ -87,8 +88,11 @@ void test_percpu_hash_refcounted_kptr_refcount_leak(void) if (!ASSERT_EQ(opts.retval, 2, "opts.retval")) goto out; - err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0); - if (!ASSERT_OK(err, "bpf_map__update_elem")) + fd = bpf_program__fd(skel->progs.clear_percpu_hash_kptr); + err = bpf_prog_test_run_opts(fd, &syscall_opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + if (!ASSERT_EQ(syscall_opts.retval, 1, "syscall_opts.retval")) goto out; fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount); diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c index 83b90335967a..e6e95c1416e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -68,6 +68,36 @@ cleanup: task_kfunc_success__destroy(skel); } +static void run_syscall_success_test(const char *prog_name) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct task_kfunc_success *skel; + struct bpf_program *prog; + int err; + + skel = open_load_task_kfunc_skel(); + if (!ASSERT_OK_PTR(skel, "open_load_skel")) + return; + + if (!ASSERT_OK(skel->bss->err, "pre_run_err")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto cleanup; + if (!ASSERT_EQ(opts.retval, 0, "retval")) + goto cleanup; + + ASSERT_OK(skel->bss->err, "post_run_err"); + +cleanup: + task_kfunc_success__destroy(skel); +} + static int run_vpid_test(void *prog_name) { struct task_kfunc_success *skel; @@ -140,7 +170,6 @@ static const char * const success_tests[] = { "test_task_acquire_release_argument", "test_task_acquire_release_current", "test_task_acquire_leave_in_map", - "test_task_xchg_release", "test_task_map_acquire_release", "test_task_current_acquire_release", "test_task_from_pid_arg", @@ -151,6 +180,10 @@ static const char * const success_tests[] = { "test_task_kfunc_flavor_relo_not_found", }; +static const char * const syscall_success_tests[] = { + "test_task_xchg_release", +}; + static const char * const vpid_success_tests[] = { "test_task_from_vpid_current", "test_task_from_vpid_invalid", @@ -167,6 +200,13 @@ void test_task_kfunc(void) run_success_test(success_tests[i]); } + for (i = 0; i < ARRAY_SIZE(syscall_success_tests); i++) { + if (!test__start_subtest(syscall_success_tests[i])) + continue; + + run_syscall_success_test(syscall_success_tests[i]); + } + for (i = 0; i < ARRAY_SIZE(vpid_success_tests); i++) { if (!test__start_subtest(vpid_success_tests[i])) continue; diff --git a/tools/testing/selftests/bpf/progs/htab_update.c b/tools/testing/selftests/bpf/progs/htab_update.c index 195d3b2fba00..62c1b1325ec2 100644 --- a/tools/testing/selftests/bpf/progs/htab_update.c +++ b/tools/testing/selftests/bpf/progs/htab_update.c @@ -22,8 +22,8 @@ struct { int pid = 0; int update_err = 0; -SEC("?fentry/bpf_obj_free_fields") -int bpf_obj_free_fields(void *ctx) +SEC("?fentry/bpf_obj_cancel_fields") +int bpf_obj_cancel_fields(void *ctx) { __u32 key = 0; struct val value = { .payload = 1 }; diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 421f40835acd..fa97faa5358b 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -290,6 +290,77 @@ int test_list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head) return list_in_list(lock, head, true); } +#define MAX_LIST_CLEAR_NODES 256 + +static __always_inline +int clear_list(struct bpf_spin_lock *lock, struct bpf_list_head *head) +{ + struct bpf_list_node *n; + int i; + + for (i = 0; i < MAX_LIST_CLEAR_NODES; i++) { + bpf_spin_lock(lock); + n = bpf_list_pop_front(head); + bpf_spin_unlock(lock); + if (!n) + return 0; + bpf_obj_drop(container_of(n, struct foo, node2)); + } + return 1; +} + +SEC("syscall") +int clear_map_list(void *ctx) +{ + struct map_value *v; + + v = bpf_map_lookup_elem(&array_map, &(int){0}); + if (!v) + return 1; + return clear_list(&v->lock, &v->head); +} + +SEC("syscall") +int clear_inner_map_list(void *ctx) +{ + struct map_value *v; + void *map; + + map = bpf_map_lookup_elem(&map_of_maps, &(int){0}); + if (!map) + return 1; + v = bpf_map_lookup_elem(map, &(int){0}); + if (!v) + return 1; + return clear_list(&v->lock, &v->head); +} + +SEC("syscall") +int clear_global_list(void *ctx) +{ + return clear_list(&glock, &ghead); +} + +SEC("syscall") +int clear_global_nested_list(void *ctx) +{ + return clear_list(&ghead_nested.inner.lock, &ghead_nested.inner.head); +} + +SEC("syscall") +int clear_global_array_list(void *ctx) +{ + int ret; + + ret = clear_list(&glock_c, &ghead_array[0]); + if (ret) + return ret; + ret = clear_list(&glock_c, &ghead_array[1]); + if (ret) + return ret; + return clear_list(&glock_c, &ghead_array_one[0]); +} + SEC("tc") int map_list_push_pop(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index e708ffbe1f61..3fbefc568e0a 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -489,8 +489,7 @@ int test_map_kptr_ref3(struct __sk_buff *ctx) int num_of_refs; -SEC("syscall") -int count_ref(void *ctx) +static __always_inline int read_ref_count(void) { struct prog_test_ref_kfunc *p; unsigned long arg = 0; @@ -500,12 +499,96 @@ int count_ref(void *ctx) return 1; num_of_refs = p->cnt.refs.counter; - bpf_kfunc_call_test_release(p); return 0; } SEC("syscall") +int count_ref(void *ctx) +{ + return read_ref_count(); +} + +static __always_inline int stash_ref_ptr(struct map_value *v) +{ + struct prog_test_ref_kfunc *p, *old; + unsigned long arg = 0; + + p = bpf_kfunc_call_test_acquire(&arg); + if (!p) + return 1; + + old = bpf_kptr_xchg(&v->ref_ptr, p); + if (old) { + bpf_kfunc_call_test_release(old); + old = bpf_kptr_xchg(&v->ref_ptr, NULL); + if (old) + bpf_kfunc_call_test_release(old); + return 2; + } + return 0; +} + +static __always_inline int check_refs(int expected) +{ + int ret; + + ret = read_ref_count(); + if (ret) + return ret; + return num_of_refs == expected ? 0 : 3; +} + +SEC("syscall") +int test_array_map_update_kptr(void *ctx) +{ + struct map_value init = {}, *v; + int key = 0, ret; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 1; + ret = stash_ref_ptr(v); + if (ret) + return ret; + ret = check_refs(3); + if (ret) + return ret; + ret = bpf_map_update_elem(&array_map, &key, &init, BPF_EXIST); + if (ret) + return 4; + return check_refs(3); +} + +#define DEFINE_HASH_UPDATE_KPTR_TEST(name, map) \ +SEC("syscall") \ +int name(void *ctx) \ +{ \ + struct map_value init = {}, *v; \ + int key = 0, ret; \ + \ + ret = bpf_map_update_elem(&map, &key, &init, BPF_NOEXIST); \ + if (ret) \ + return 1; \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return 2; \ + ret = stash_ref_ptr(v); \ + if (ret) \ + return ret; \ + ret = check_refs(3); \ + if (ret) \ + return ret; \ + ret = bpf_map_update_elem(&map, &key, &init, BPF_EXIST); \ + if (ret) \ + return 4; \ + return check_refs(3); \ +} + +DEFINE_HASH_UPDATE_KPTR_TEST(test_hash_map_update_kptr, hash_map) +DEFINE_HASH_UPDATE_KPTR_TEST(test_hash_malloc_map_update_kptr, hash_malloc_map) + +SEC("syscall") int test_ls_map_kptr_ref1(void *ctx) { struct task_struct *current; diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c index 13de169ad68f..61906f48025c 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -1036,13 +1036,31 @@ int percpu_hash_refcount_leak(void *ctx) struct map_value *v; int key = 0; - v = bpf_map_lookup_elem(&percpu_hash, &key); + v = bpf_map_lookup_percpu_elem(&percpu_hash, &key, 0); if (!v) return 0; return __insert_in_list(&head, &lock, &v->node); } +SEC("syscall") +int clear_percpu_hash_kptr(void *ctx) +{ + struct node_data *n; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_percpu_elem(&percpu_hash, &key, 0); + if (!v) + return 0; + + n = bpf_kptr_xchg(&v->node, NULL); + if (!n) + return 0; + bpf_obj_drop(n); + return probe_read_refcount(); +} + SEC("tc") int check_percpu_hash_refcount(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index 8e947d445f8e..8942b5478129 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -5,6 +5,7 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "../bpf_experimental.h" #include "bpf_misc.h" #include "task_kfunc_common.h" @@ -234,6 +235,45 @@ int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_ } SEC("tp_btf/task_newtask") +__failure __msg("bpf_obj_drop cannot be used in tracing programs on types with NMI unsafe fields") +int BPF_PROG(task_kfunc_obj_drop_with_kptr, struct task_struct *task, u64 clone_flags) +{ + struct __tasks_kfunc_map_value *local; + + local = bpf_obj_new(typeof(*local)); + if (!local) + return 0; + + bpf_obj_drop(local); + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("bpf_obj_drop cannot be used in tracing programs on types with NMI unsafe fields") +int BPF_PROG(task_kfunc_obj_drop_nmi_with_kptr, struct task_struct *task, + u64 clone_flags) +{ + struct __tasks_kfunc_map_value *local; + struct task_struct *acquired, *old; + + (void)clone_flags; + + local = bpf_obj_new(typeof(*local)); + if (!local) + return 0; + + acquired = bpf_task_acquire(task); + if (acquired) { + old = bpf_kptr_xchg(&local->task, acquired); + if (old) + bpf_task_release(old); + } + + bpf_obj_drop(local); + return 0; +} + +SEC("tp_btf/task_newtask") __failure __msg("Possibly NULL pointer passed to trusted R1") int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags) { diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index 5fb4fc19d26a..d63a79ee33dc 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -140,17 +140,17 @@ int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone return 0; } -SEC("tp_btf/task_newtask") -int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) +SEC("syscall") +int test_task_xchg_release(const void *ctx) { - struct task_struct *kptr, *acquired; + struct task_struct *task, *kptr, *acquired; struct __tasks_kfunc_map_value *v, *local; int refcnt, refcnt_after_drop; long status; - if (!is_test_kfunc_task()) - return 0; + (void)ctx; + task = bpf_get_current_task_btf(); status = tasks_kfunc_map_insert(task); if (status) { err = 1; @@ -191,7 +191,7 @@ int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) return 0; } - /* Stash a copy into local kptr and check if it is released recursively */ + /* Stash a copy into local kptr and check if it is released recursively. */ acquired = bpf_task_acquire(kptr); if (!acquired) { err = 7; @@ -220,7 +220,6 @@ int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags) } bpf_task_release(kptr); - return 0; } |
