From 531b87d865eb9e625c2e46ec8f06a65a6157ee45 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Sun, 26 Oct 2025 20:38:45 +0000 Subject: bpf: widen dynptr size/offset to 64 bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dynptr currently caps size and offset at 24 bits, which isn’t sufficient for file-backed use cases; even 32 bits can be limiting. Refactor dynptr helpers/kfuncs to use 64-bit size and offset, ensuring consistency across the APIs. This change does not affect internals of xdp, skb or other dynptrs, which continue to behave as before. Also it does not break binary compatibility. The widening enables large-file access support via dynptr, implemented in the next patches. Signed-off-by: Mykyta Yatsenko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251026203853.135105-3-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e53cda0aabb6..907c69295293 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1387,19 +1387,19 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_SKB_META, }; -int bpf_dynptr_check_size(u32 size); -u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); -const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); -void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); +int bpf_dynptr_check_size(u64 size); +u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); +const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len); +void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len); bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); -int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset, - void *src, u32 len, u64 flags); -void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, - void *buffer__opt, u32 buffer__szk); +int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset, + void *src, u64 len, u64 flags); +void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset, + void *buffer__opt, u64 buffer__szk); -static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len) { - u32 size = __bpf_dynptr_size(ptr); + u64 size = __bpf_dynptr_size(ptr); if (len > size || offset > size - len) return -E2BIG; -- cgit v1.2.3 From 76e4fed847124690f7344a43d01dbcd7b2925353 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Sun, 26 Oct 2025 20:38:46 +0000 Subject: lib: move freader into buildid.h Move struct freader and prototypes of the functions operating on it into the buildid.h. This allows reusing freader outside buildid, e.g. for file dynptr support added later. Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20251026203853.135105-4-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- MAINTAINERS | 1 + include/linux/buildid.h | 25 +++++++++++++++++++++++++ lib/buildid.c | 29 +++++------------------------ 3 files changed, 31 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 545a4776795e..7564692f2f3c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4648,6 +4648,7 @@ F: Documentation/userspace-api/ebpf/ F: arch/*/net/* F: include/linux/bpf* F: include/linux/btf* +F: include/linux/buildid.h F: include/linux/filter.h F: include/trace/events/xdp.h F: include/uapi/linux/bpf* diff --git a/include/linux/buildid.h b/include/linux/buildid.h index 014a88c41073..831c1b4b626c 100644 --- a/include/linux/buildid.h +++ b/include/linux/buildid.h @@ -18,4 +18,29 @@ void init_vmlinux_build_id(void); static inline void init_vmlinux_build_id(void) { } #endif +struct freader { + void *buf; + u32 buf_sz; + int err; + union { + struct { + struct file *file; + struct folio *folio; + void *addr; + loff_t folio_off; + bool may_fault; + }; + struct { + const char *data; + u64 data_sz; + }; + }; +}; + +void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, + struct file *file, bool may_fault); +void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz); +const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz); +void freader_cleanup(struct freader *r); + #endif diff --git a/lib/buildid.c b/lib/buildid.c index c4b0f376fb34..df06e492810d 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -11,27 +11,8 @@ #define MAX_PHDR_CNT 256 -struct freader { - void *buf; - u32 buf_sz; - int err; - union { - struct { - struct file *file; - struct folio *folio; - void *addr; - loff_t folio_off; - bool may_fault; - }; - struct { - const char *data; - u64 data_sz; - }; - }; -}; - -static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, - struct file *file, bool may_fault) +void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, + struct file *file, bool may_fault) { memset(r, 0, sizeof(*r)); r->buf = buf; @@ -40,7 +21,7 @@ static void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz, r->may_fault = may_fault; } -static void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz) +void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz) { memset(r, 0, sizeof(*r)); r->data = data; @@ -92,7 +73,7 @@ static int freader_get_folio(struct freader *r, loff_t file_off) return 0; } -static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) +const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) { size_t folio_sz; @@ -147,7 +128,7 @@ static const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz) return r->addr + (file_off - r->folio_off); } -static void freader_cleanup(struct freader *r) +void freader_cleanup(struct freader *r) { if (!r->buf) return; /* non-file-backed mode */ -- cgit v1.2.3 From 8d8771dc03e48300e80b43744dd3c320ccaf746a Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Sun, 26 Oct 2025 20:38:49 +0000 Subject: bpf: add plumbing for file-backed dynptr Add the necessary verifier plumbing for the new file-backed dynptr type. Introduce two kfuncs for its lifecycle management: * bpf_dynptr_from_file() for initialization * bpf_dynptr_file_discard() for destruction Currently there is no mechanism for kfunc to release dynptr, this patch add one: * Dynptr release function sets meta->release_regno * Call unmark_stack_slots_dynptr() if meta->release_regno is set and dynptr ref_obj_id is set as well. Signed-off-by: Mykyta Yatsenko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251026203853.135105-7-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 7 ++++++- kernel/bpf/helpers.c | 12 ++++++++++++ kernel/bpf/log.c | 2 ++ kernel/bpf/verifier.c | 31 +++++++++++++++++++++++++------ 4 files changed, 45 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 907c69295293..14f800773997 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -792,12 +792,15 @@ enum bpf_type_flag { /* DYNPTR points to skb_metadata_end()-skb_metadata_len() */ DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS), + /* DYNPTR points to file */ + DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS), + __BPF_TYPE_FLAG_MAX, __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; #define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \ - | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META) + | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE) /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -1385,6 +1388,8 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_XDP, /* Points to skb_metadata_end()-skb_metadata_len() */ BPF_DYNPTR_TYPE_SKB_META, + /* Underlying data is a file */ + BPF_DYNPTR_TYPE_FILE, }; int bpf_dynptr_check_size(u64 size); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index a2ce17ea5edb..bf65b7fb761f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4252,6 +4252,16 @@ __bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct b return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME); } +__bpf_kfunc int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit) +{ + return 0; +} + +__bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr) +{ + return 0; +} + __bpf_kfunc_end_defs(); static void bpf_task_work_cancel_scheduled(struct irq_work *irq_work) @@ -4429,6 +4439,8 @@ BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_dynptr_from_file, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_dynptr_file_discard) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index f50533169cc3..70221aafc35c 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -500,6 +500,8 @@ const char *dynptr_type_str(enum bpf_dynptr_type type) return "xdp"; case BPF_DYNPTR_TYPE_SKB_META: return "skb_meta"; + case BPF_DYNPTR_TYPE_FILE: + return "file"; case BPF_DYNPTR_TYPE_INVALID: return ""; default: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f60cfab95230..cd48ead852a0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -692,6 +692,8 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) return BPF_DYNPTR_TYPE_XDP; case DYNPTR_TYPE_SKB_META: return BPF_DYNPTR_TYPE_SKB_META; + case DYNPTR_TYPE_FILE: + return BPF_DYNPTR_TYPE_FILE; default: return BPF_DYNPTR_TYPE_INVALID; } @@ -710,6 +712,8 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) return DYNPTR_TYPE_XDP; case BPF_DYNPTR_TYPE_SKB_META: return DYNPTR_TYPE_SKB_META; + case BPF_DYNPTR_TYPE_FILE: + return DYNPTR_TYPE_FILE; default: return 0; } @@ -717,7 +721,7 @@ static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type) static bool dynptr_type_refcounted(enum bpf_dynptr_type type) { - return type == BPF_DYNPTR_TYPE_RINGBUF; + return type == BPF_DYNPTR_TYPE_RINGBUF || type == BPF_DYNPTR_TYPE_FILE; } static void __mark_dynptr_reg(struct bpf_reg_state *reg, @@ -12291,6 +12295,8 @@ enum special_kfunc_type { KF_bpf_res_spin_unlock, KF_bpf_res_spin_lock_irqsave, KF_bpf_res_spin_unlock_irqrestore, + KF_bpf_dynptr_from_file, + KF_bpf_dynptr_file_discard, KF___bpf_trap, KF_bpf_task_work_schedule_signal, KF_bpf_task_work_schedule_resume, @@ -12363,6 +12369,8 @@ BTF_ID(func, bpf_res_spin_lock) BTF_ID(func, bpf_res_spin_unlock) BTF_ID(func, bpf_res_spin_lock_irqsave) BTF_ID(func, bpf_res_spin_unlock_irqrestore) +BTF_ID(func, bpf_dynptr_from_file) +BTF_ID(func, bpf_dynptr_file_discard) BTF_ID(func, __bpf_trap) BTF_ID(func, bpf_task_work_schedule_signal) BTF_ID(func, bpf_task_work_schedule_resume) @@ -13326,6 +13334,11 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ dynptr_arg_type |= DYNPTR_TYPE_XDP; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb_meta]) { dynptr_arg_type |= DYNPTR_TYPE_SKB_META; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { + dynptr_arg_type |= DYNPTR_TYPE_FILE; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_file_discard]) { + dynptr_arg_type |= DYNPTR_TYPE_FILE; + meta->release_regno = regno; } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && (dynptr_arg_type & MEM_UNINIT)) { enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type; @@ -14006,12 +14019,18 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ if (meta.release_regno) { - err = release_reference(env, regs[meta.release_regno].ref_obj_id); - if (err) { - verbose(env, "kfunc %s#%d reference has not been acquired before\n", - func_name, meta.func_id); - return err; + struct bpf_reg_state *reg = ®s[meta.release_regno]; + + if (meta.initialized_dynptr.ref_obj_id) { + err = unmark_stack_slots_dynptr(env, reg); + } else { + err = release_reference(env, reg->ref_obj_id); + if (err) + verbose(env, "kfunc %s#%d reference has not been acquired before\n", + func_name, meta.func_id); } + if (err) + return err; } if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || -- cgit v1.2.3 From 2c52e8943a437af6093d8b0f0920f1764f0e5f64 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Sun, 26 Oct 2025 20:38:52 +0000 Subject: bpf: dispatch to sleepable file dynptr File dynptr reads may sleep when the requested folios are not in the page cache. To avoid sleeping in non-sleepable contexts while still supporting valid sleepable use, given that dynptrs are non-sleepable by default, enable sleeping only when bpf_dynptr_from_file() is invoked from a sleepable context. This change: * Introduces a sleepable constructor: bpf_dynptr_from_file_sleepable() * Override non-sleepable constructor with sleepable if it's always called in sleepable context Signed-off-by: Mykyta Yatsenko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251026203853.135105-10-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ kernel/bpf/helpers.c | 5 +++++ kernel/bpf/verifier.c | 10 +++++++--- 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 14f800773997..a47d67db3be5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -670,6 +670,9 @@ static inline bool bpf_map_has_internal_structs(struct bpf_map *map) void bpf_map_free_internal_structs(struct bpf_map *map, void *obj); +int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, + struct bpf_dynptr *ptr__uninit); + extern const struct bpf_map_ops bpf_map_offload_ops; /* bpf_type_flag contains a set of flags that are applicable to the values of diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 99a7def0b978..930e132f440f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4336,6 +4336,11 @@ __bpf_kfunc int bpf_dynptr_from_file(struct file *file, u32 flags, struct bpf_dy return make_file_dynptr(file, flags, false, (struct bpf_dynptr_kern *)ptr__uninit); } +int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags, struct bpf_dynptr *ptr__uninit) +{ + return make_file_dynptr(file, flags, true, (struct bpf_dynptr_kern *)ptr__uninit); +} + __bpf_kfunc int bpf_dynptr_file_discard(struct bpf_dynptr *dynptr) { struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)dynptr; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 61589be91c65..542e23fb19c7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3124,7 +3124,8 @@ struct bpf_kfunc_btf_tab { u32 nr_descs; }; -static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc); +static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, + int insn_idx); static int kfunc_desc_cmp_by_id_off(const void *a, const void *b) { @@ -21869,7 +21870,7 @@ static int fixup_call_args(struct bpf_verifier_env *env) } /* replace a generic kfunc with a specialized version if necessary */ -static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc) +static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc *desc, int insn_idx) { struct bpf_prog *prog = env->prog; bool seen_direct_write; @@ -21904,6 +21905,9 @@ static int specialize_kfunc(struct bpf_verifier_env *env, struct bpf_kfunc_desc } else if (func_id == special_kfunc_list[KF_bpf_remove_dentry_xattr]) { if (bpf_lsm_has_d_inode_locked(prog)) addr = (unsigned long)bpf_remove_dentry_xattr_locked; + } else if (func_id == special_kfunc_list[KF_bpf_dynptr_from_file]) { + if (!env->insn_aux_data[insn_idx].non_sleepable) + addr = (unsigned long)bpf_dynptr_from_file_sleepable; } set_imm: @@ -21963,7 +21967,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EFAULT; } - err = specialize_kfunc(env, desc); + err = specialize_kfunc(env, desc, insn_idx); if (err) return err; -- cgit v1.2.3