summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaitao Cheng <chengkaitao@kylinos.cn>2026-05-21 06:23:00 +0300
committerAlexei Starovoitov <ast@kernel.org>2026-05-21 12:47:45 +0300
commitcfa6afa4b931aed08288454943e5077f114fd7f3 (patch)
tree5c05fcc76b2c0ba35c7ea0b043066875635cbf2c
parentcb339ac61d72f7fb7f57bfc0516b7b2b65bc1bad (diff)
downloadlinux-cfa6afa4b931aed08288454943e5077f114fd7f3.tar.xz
bpf: clear list node owner and unlink before drop
The issue only becomes exposed once bpf_list_del() is available: callers can pass an arbitrary bpf_list_head and bpf_list_node pair, including nodes that are not actually linked to the supplied head, or nodes that outlive their original head after refcount-based retention. This was not practically reachable for callers restricted to pop-style helpers alone; bpf_list_del() widens the API surface. A failure mode appears when bpf_list_head_free() runs while a program still holds an independent refcount on a node (for example via bpf_refcount_acquire()). The list head value embedded in map memory can go away while the node object survives. If node->owner is left pointing at the old head address until drop completes, that pointer becomes stale. If a new bpf_list_head is later allocated at the same address and the stale node is passed to bpf_list_del(), the owner comparison can succeed even though the node is not really linked to the new head, and list_del_init() will follow bogus next/prev pointers with the risk of memory corruption. When draining a bpf_list_head, mark each node owner with BPF_PTR_POISON under the map spinlock while moving it to a private drain list, then list_del_init() the node and clear owner to NULL before calling __bpf_obj_drop_impl(). Concurrent readers therefore never observe a node that appears linked to a head while its list_head is inconsistent, and surviving refcounted nodes never retain a stale non-NULL owner. Signed-off-by: Kaitao Cheng <chengkaitao@kylinos.cn> Link: https://lore.kernel.org/r/20260521032306.97118-3-kaitao.cheng@linux.dev Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--kernel/bpf/helpers.c27
1 files changed, 19 insertions, 8 deletions
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 094457c3e6d3..59855b434f0b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2247,10 +2247,11 @@ EXPORT_SYMBOL_GPL(bpf_base_func_proto);
void bpf_list_head_free(const struct btf_field *field, void *list_head,
struct bpf_spin_lock *spin_lock)
{
- struct list_head *head = list_head, *orig_head = list_head;
+ struct list_head *head = list_head, drain, *pos, *n;
BUILD_BUG_ON(sizeof(struct list_head) > sizeof(struct bpf_list_head));
BUILD_BUG_ON(__alignof__(struct list_head) > __alignof__(struct bpf_list_head));
+ INIT_LIST_HEAD(&drain);
/* Do the actual list draining outside the lock to not hold the lock for
* too long, and also prevent deadlocks if tracing programs end up
@@ -2261,20 +2262,30 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head,
__bpf_spin_lock_irqsave(spin_lock);
if (!head->next || list_empty(head))
goto unlock;
- head = head->next;
+ list_for_each_safe(pos, n, head) {
+ struct bpf_list_node_kern *node;
+
+ node = container_of(pos, struct bpf_list_node_kern, list_head);
+ WRITE_ONCE(node->owner, BPF_PTR_POISON);
+ list_move_tail(pos, &drain);
+ }
unlock:
- INIT_LIST_HEAD(orig_head);
+ INIT_LIST_HEAD(head);
__bpf_spin_unlock_irqrestore(spin_lock);
- while (head != orig_head) {
- void *obj = head;
+ while (!list_empty(&drain)) {
+ struct bpf_list_node_kern *node;
- obj -= field->graph_root.node_offset;
- head = head->next;
+ pos = drain.next;
+ node = container_of(pos, struct bpf_list_node_kern, list_head);
+ list_del_init(pos);
+ /* Ensure __bpf_list_add() sees the node as unlinked. */
+ smp_store_release(&node->owner, NULL);
/* The contained type can also have resources, including a
* bpf_list_head which needs to be freed.
*/
- __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
+ __bpf_obj_drop_impl((char *)pos - field->graph_root.node_offset,
+ field->graph_root.value_rec, false);
}
}