From e3362acd796789dc0562eb1a3937007b0beb0c5b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 16 Mar 2024 08:35:40 +0100 Subject: bpf: Remove arch_unprotect_bpf_trampoline() Last user of arch_unprotect_bpf_trampoline() was removed by commit 187e2af05abe ("bpf: struct_ops supports more than one page for trampolines.") Remove arch_unprotect_bpf_trampoline() Reported-by: Daniel Borkmann Fixes: 187e2af05abe ("bpf: struct_ops supports more than one page for trampolines.") Signed-off-by: Christophe Leroy Link: https://lore.kernel.org/r/42c635bb54d3af91db0f9b85d724c7c290069f67.1710574353.git.christophe.leroy@csgroup.eu Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4f20f62f9d63..d89bdefb42e2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1117,7 +1117,6 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i void *arch_alloc_bpf_trampoline(unsigned int size); void arch_free_bpf_trampoline(void *image, unsigned int size); void arch_protect_bpf_trampoline(void *image, unsigned int size); -void arch_unprotect_bpf_trampoline(void *image, unsigned int size); int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr); -- cgit v1.2.3 From c733239f8f530872a1f80d8c45dcafbaff368737 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 16 Mar 2024 08:35:41 +0100 Subject: bpf: Check return from set_memory_rox() arch_protect_bpf_trampoline() and alloc_new_pack() call set_memory_rox() which can fail, leading to unprotected memory. Take into account return from set_memory_rox() function and add __must_check flag to arch_protect_bpf_trampoline(). Signed-off-by: Christophe Leroy Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/fe1c163c83767fde5cab31d209a4a6be3ddb3a73.1710574353.git.christophe.leroy@csgroup.eu Signed-off-by: Martin KaFai Lau --- arch/arm64/net/bpf_jit_comp.c | 3 ++- arch/x86/net/bpf_jit_comp.c | 3 ++- include/linux/bpf.h | 2 +- kernel/bpf/bpf_struct_ops.c | 8 ++++++-- kernel/bpf/core.c | 28 +++++++++++++++++++++------- kernel/bpf/trampoline.c | 8 +++++--- net/bpf/bpf_dummy_struct_ops.c | 4 +++- 7 files changed, 40 insertions(+), 16 deletions(-) (limited to 'include/linux/bpf.h') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 132c8ffba109..bc16eb694657 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -2176,8 +2176,9 @@ void arch_free_bpf_trampoline(void *image, unsigned int size) bpf_prog_pack_free(image, size); } -void arch_protect_bpf_trampoline(void *image, unsigned int size) +int arch_protect_bpf_trampoline(void *image, unsigned int size) { + return 0; } int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 7a56d2d84512..4900b1ee019f 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3004,8 +3004,9 @@ void arch_free_bpf_trampoline(void *image, unsigned int size) bpf_prog_pack_free(image, size); } -void arch_protect_bpf_trampoline(void *image, unsigned int size) +int arch_protect_bpf_trampoline(void *image, unsigned int size) { + return 0; } int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d89bdefb42e2..17843e66a1d3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1116,7 +1116,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i void *func_addr); void *arch_alloc_bpf_trampoline(unsigned int size); void arch_free_bpf_trampoline(void *image, unsigned int size); -void arch_protect_bpf_trampoline(void *image, unsigned int size); +int __must_check arch_protect_bpf_trampoline(void *image, unsigned int size); int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 3fcd35314ce5..86c7884abaf8 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -740,8 +740,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, if (err) goto reset_unlock; } - for (i = 0; i < st_map->image_pages_cnt; i++) - arch_protect_bpf_trampoline(st_map->image_pages[i], PAGE_SIZE); + for (i = 0; i < st_map->image_pages_cnt; i++) { + err = arch_protect_bpf_trampoline(st_map->image_pages[i], + PAGE_SIZE); + if (err) + goto reset_unlock; + } if (st_map->map.map_flags & BPF_F_LINK) { err = 0; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 63f100def31b..5aacb1d3c4cc 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -908,23 +908,30 @@ static LIST_HEAD(pack_list); static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_prog_pack *pack; + int err; pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)), GFP_KERNEL); if (!pack) return NULL; pack->ptr = bpf_jit_alloc_exec(BPF_PROG_PACK_SIZE); - if (!pack->ptr) { - kfree(pack); - return NULL; - } + if (!pack->ptr) + goto out; bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE); bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE); - list_add_tail(&pack->list, &pack_list); set_vm_flush_reset_perms(pack->ptr); - set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + err = set_memory_rox((unsigned long)pack->ptr, + BPF_PROG_PACK_SIZE / PAGE_SIZE); + if (err) + goto out; + list_add_tail(&pack->list, &pack_list); return pack; + +out: + bpf_jit_free_exec(pack->ptr); + kfree(pack); + return NULL; } void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) @@ -939,9 +946,16 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) size = round_up(size, PAGE_SIZE); ptr = bpf_jit_alloc_exec(size); if (ptr) { + int err; + bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); - set_memory_rox((unsigned long)ptr, size / PAGE_SIZE); + err = set_memory_rox((unsigned long)ptr, + size / PAGE_SIZE); + if (err) { + bpf_jit_free_exec(ptr); + ptr = NULL; + } } goto out; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 04fd1abd3661..cc50607f8d8c 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -456,7 +456,9 @@ again: if (err < 0) goto out_free; - arch_protect_bpf_trampoline(im->image, im->size); + err = arch_protect_bpf_trampoline(im->image, im->size); + if (err) + goto out_free; WARN_ON(tr->cur_image && total == 0); if (tr->cur_image) @@ -1072,10 +1074,10 @@ void __weak arch_free_bpf_trampoline(void *image, unsigned int size) bpf_jit_free_exec(image); } -void __weak arch_protect_bpf_trampoline(void *image, unsigned int size) +int __weak arch_protect_bpf_trampoline(void *image, unsigned int size) { WARN_ON_ONCE(size > PAGE_SIZE); - set_memory_rox((long)image, 1); + return set_memory_rox((long)image, 1); } int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index de33dc1b0daa..25b75844891a 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -133,7 +133,9 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, if (err < 0) goto out; - arch_protect_bpf_trampoline(image, PAGE_SIZE); + err = arch_protect_bpf_trampoline(image, PAGE_SIZE); + if (err) + goto out; prog_ret = dummy_ops_call_op(image, args); err = dummy_ops_copy_args(args); -- cgit v1.2.3 From d4dfc5700e867b22ab94f960f9a9972696a637d5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 19 Mar 2024 16:38:49 -0700 Subject: bpf: pass whole link instead of prog when triggering raw tracepoint Instead of passing prog as an argument to bpf_trace_runX() helpers, that are called from tracepoint triggering calls, store BPF link itself (struct bpf_raw_tp_link for raw tracepoints). This will allow to pass extra information like BPF cookie into raw tracepoint registration. Instead of replacing `struct bpf_prog *prog = __data;` with corresponding `struct bpf_raw_tp_link *link = __data;` assignment in `__bpf_trace_##call` I just passed `__data` through into underlying bpf_trace_runX() call. This works well because we implicitly cast `void *`, and it also avoids naming clashes with arguments coming from tracepoint's "proto" list. We could have run into the same problem with "prog", we just happened to not have a tracepoint that has "prog" input argument. We are less lucky with "link", as there are tracepoints using "link" argument name already. So instead of trying to avoid naming conflicts, let's just remove intermediate local variable. It doesn't hurt readibility, it's either way a bit of a maze of calls and macros, that requires careful reading. Acked-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Message-ID: <20240319233852.1977493-3-andrii@kernel.org> Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 5 +++++ include/linux/trace_events.h | 36 ++++++++++++++++++++---------------- include/trace/bpf_probe.h | 3 +-- kernel/bpf/syscall.c | 9 ++------- kernel/trace/bpf_trace.c | 18 ++++++++++-------- 5 files changed, 38 insertions(+), 33 deletions(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 17843e66a1d3..2ea8ce59f582 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1607,6 +1607,11 @@ struct bpf_tracing_link { struct bpf_prog *tgt_prog; }; +struct bpf_raw_tp_link { + struct bpf_link link; + struct bpf_raw_event_map *btp; +}; + struct bpf_link_primer { struct bpf_link *link; struct file *file; diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index d68ff9b1247f..a7fc6fb6de3c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -759,8 +759,11 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie); void perf_event_detach_bpf_prog(struct perf_event *event); int perf_event_query_prog_array(struct perf_event *event, void __user *info); -int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog); -int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog); + +struct bpf_raw_tp_link; +int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link); +int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link); + struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name); void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, @@ -788,11 +791,12 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info) { return -EOPNOTSUPP; } -static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p) +struct bpf_raw_tp_link; +static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { return -EOPNOTSUPP; } -static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p) +static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { return -EOPNOTSUPP; } @@ -903,31 +907,31 @@ void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie); void perf_event_free_bpf_prog(struct perf_event *event); -void bpf_trace_run1(struct bpf_prog *prog, u64 arg1); -void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2); -void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run1(struct bpf_raw_tp_link *link, u64 arg1); +void bpf_trace_run2(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2); +void bpf_trace_run3(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3); -void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run4(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4); -void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run5(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5); -void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run6(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6); -void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run7(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); -void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run8(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8); -void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run9(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9); -void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run10(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10); -void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run11(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10, u64 arg11); -void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2, +void bpf_trace_run12(struct bpf_raw_tp_link *link, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12); void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index e609cd7da47e..a2ea11cc912e 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -46,8 +46,7 @@ static notrace void \ __bpf_trace_##call(void *__data, proto) \ { \ - struct bpf_prog *prog = __data; \ - CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args)); \ + CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(__data, CAST_TO_U64(args)); \ } #undef DECLARE_EVENT_CLASS diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ae2ff73bde7e..1cb4c3809af4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3469,17 +3469,12 @@ out_put_prog: return err; } -struct bpf_raw_tp_link { - struct bpf_link link; - struct bpf_raw_event_map *btp; -}; - static void bpf_raw_tp_link_release(struct bpf_link *link) { struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link); - bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog); + bpf_probe_unregister(raw_tp->btp, raw_tp); bpf_put_raw_tracepoint(raw_tp->btp); } @@ -3833,7 +3828,7 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog, goto out_put_btp; } - err = bpf_probe_register(link->btp, prog); + err = bpf_probe_register(link->btp, link); if (err) { bpf_link_cleanup(&link_primer); goto out_put_btp; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 30ecf62f8a17..17de91ad4a1f 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2366,8 +2366,10 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) } static __always_inline -void __bpf_trace_run(struct bpf_prog *prog, u64 *args) +void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) { + struct bpf_prog *prog = link->link.prog; + cant_sleep(); if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); @@ -2404,12 +2406,12 @@ out: #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 #define BPF_TRACE_DEFN_x(x) \ - void bpf_trace_run##x(struct bpf_prog *prog, \ + void bpf_trace_run##x(struct bpf_raw_tp_link *link, \ REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \ { \ u64 args[x]; \ REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \ - __bpf_trace_run(prog, args); \ + __bpf_trace_run(link, args); \ } \ EXPORT_SYMBOL_GPL(bpf_trace_run##x) BPF_TRACE_DEFN_x(1); @@ -2425,9 +2427,10 @@ BPF_TRACE_DEFN_x(10); BPF_TRACE_DEFN_x(11); BPF_TRACE_DEFN_x(12); -int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) +int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { struct tracepoint *tp = btp->tp; + struct bpf_prog *prog = link->link.prog; /* * check that program doesn't access arguments beyond what's @@ -2439,13 +2442,12 @@ int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog) if (prog->aux->max_tp_access > btp->writable_size) return -EINVAL; - return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, - prog); + return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func, link); } -int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog) +int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_raw_tp_link *link) { - return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog); + return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, link); } int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, -- cgit v1.2.3 From 68ca5d4eebb8c4de246ee5f634eee26bc689562d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 19 Mar 2024 16:38:50 -0700 Subject: bpf: support BPF cookie in raw tracepoint (raw_tp, tp_btf) programs Wire up BPF cookie for raw tracepoint programs (both BTF and non-BTF aware variants). This brings them up to part w.r.t. BPF cookie usage with classic tracepoint and fentry/fexit programs. Acked-by: Stanislav Fomichev Acked-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Message-ID: <20240319233852.1977493-4-andrii@kernel.org> Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 6 ++++-- kernel/bpf/syscall.c | 13 +++++++++---- kernel/trace/bpf_trace.c | 13 +++++++++++++ tools/include/uapi/linux/bpf.h | 1 + 5 files changed, 28 insertions(+), 6 deletions(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2ea8ce59f582..62762390c93d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1610,6 +1610,7 @@ struct bpf_tracing_link { struct bpf_raw_tp_link { struct bpf_link link; struct bpf_raw_event_map *btp; + u64 cookie; }; struct bpf_link_primer { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3c42b9f1bada..9585f5345353 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1662,8 +1662,10 @@ union bpf_attr { } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ - __u64 name; - __u32 prog_fd; + __u64 name; + __u32 prog_fd; + __u32 :32; + __aligned_u64 cookie; } raw_tracepoint; struct { /* anonymous struct for BPF_BTF_LOAD */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1cb4c3809af4..e44c276e8617 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3774,7 +3774,7 @@ static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *pro #endif /* CONFIG_PERF_EVENTS */ static int bpf_raw_tp_link_attach(struct bpf_prog *prog, - const char __user *user_tp_name) + const char __user *user_tp_name, u64 cookie) { struct bpf_link_primer link_primer; struct bpf_raw_tp_link *link; @@ -3821,6 +3821,7 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog, bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, &bpf_raw_tp_link_lops, prog); link->btp = btp; + link->cookie = cookie; err = bpf_link_prime(&link->link, &link_primer); if (err) { @@ -3841,11 +3842,13 @@ out_put_btp: return err; } -#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd +#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.cookie static int bpf_raw_tracepoint_open(const union bpf_attr *attr) { struct bpf_prog *prog; + void __user *tp_name; + __u64 cookie; int fd; if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) @@ -3855,7 +3858,9 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); - fd = bpf_raw_tp_link_attach(prog, u64_to_user_ptr(attr->raw_tracepoint.name)); + tp_name = u64_to_user_ptr(attr->raw_tracepoint.name); + cookie = attr->raw_tracepoint.cookie; + fd = bpf_raw_tp_link_attach(prog, tp_name, cookie); if (fd < 0) bpf_prog_put(prog); return fd; @@ -5193,7 +5198,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) goto out; } if (prog->expected_attach_type == BPF_TRACE_RAW_TP) - ret = bpf_raw_tp_link_attach(prog, NULL); + ret = bpf_raw_tp_link_attach(prog, NULL, attr->link_create.tracing.cookie); else if (prog->expected_attach_type == BPF_TRACE_ITER) ret = bpf_iter_link_attach(attr, uattr, prog); else if (prog->expected_attach_type == BPF_LSM_CGROUP) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 17de91ad4a1f..434e3ece6688 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2004,6 +2004,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_stackid_proto_raw_tp; case BPF_FUNC_get_stack: return &bpf_get_stack_proto_raw_tp; + case BPF_FUNC_get_attach_cookie: + return &bpf_get_attach_cookie_proto_tracing; default: return bpf_tracing_func_proto(func_id, prog); } @@ -2066,6 +2068,9 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_func_arg_cnt: return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; case BPF_FUNC_get_attach_cookie: + if (prog->type == BPF_PROG_TYPE_TRACING && + prog->expected_attach_type == BPF_TRACE_RAW_TP) + return &bpf_get_attach_cookie_proto_tracing; return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL; default: fn = raw_tp_prog_func_proto(func_id, prog); @@ -2369,15 +2374,23 @@ static __always_inline void __bpf_trace_run(struct bpf_raw_tp_link *link, u64 *args) { struct bpf_prog *prog = link->link.prog; + struct bpf_run_ctx *old_run_ctx; + struct bpf_trace_run_ctx run_ctx; cant_sleep(); if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { bpf_prog_inc_misses_counter(prog); goto out; } + + run_ctx.bpf_cookie = link->cookie; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + rcu_read_lock(); (void) bpf_prog_run(prog, args); rcu_read_unlock(); + + bpf_reset_run_ctx(old_run_ctx); out: this_cpu_dec(*(prog->active)); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3c42b9f1bada..bf80b614c4db 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1664,6 +1664,7 @@ union bpf_attr { struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ __u64 name; __u32 prog_fd; + __aligned_u64 cookie; } raw_tracepoint; struct { /* anonymous struct for BPF_BTF_LOAD */ -- cgit v1.2.3 From 1a80dbcb2dbaf6e4c216e62e30fa7d3daa8001ce Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 27 Mar 2024 22:24:26 -0700 Subject: bpf: support deferring bpf_link dealloc to after RCU grace period BPF link for some program types is passed as a "context" which can be used by those BPF programs to look up additional information. E.g., for multi-kprobes and multi-uprobes, link is used to fetch BPF cookie values. Because of this runtime dependency, when bpf_link refcnt drops to zero there could still be active BPF programs running accessing link data. This patch adds generic support to defer bpf_link dealloc callback to after RCU GP, if requested. This is done by exposing two different deallocation callbacks, one synchronous and one deferred. If deferred one is provided, bpf_link_free() will schedule dealloc_deferred() callback to happen after RCU GP. BPF is using two flavors of RCU: "classic" non-sleepable one and RCU tasks trace one. The latter is used when sleepable BPF programs are used. bpf_link_free() accommodates that by checking underlying BPF program's sleepable flag, and goes either through normal RCU GP only for non-sleepable, or through RCU tasks trace GP *and* then normal RCU GP (taking into account rcu_trace_implies_rcu_gp() optimization), if BPF program is sleepable. We use this for multi-kprobe and multi-uprobe links, which dereference link during program run. We also preventively switch raw_tp link to use deferred dealloc callback, as upcoming changes in bpf-next tree expose raw_tp link data (specifically, cookie value) to BPF program at runtime as well. Fixes: 0dcac2725406 ("bpf: Add multi kprobe link") Fixes: 89ae89f53d20 ("bpf: Add multi uprobe link") Reported-by: syzbot+981935d9485a560bfbcb@syzkaller.appspotmail.com Reported-by: syzbot+2cb5a6c573e98db598cc@syzkaller.appspotmail.com Reported-by: syzbot+62d8b26793e8a2bd0516@syzkaller.appspotmail.com Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Link: https://lore.kernel.org/r/20240328052426.3042617-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 16 +++++++++++++++- kernel/bpf/syscall.c | 35 ++++++++++++++++++++++++++++++++--- kernel/trace/bpf_trace.c | 4 ++-- 3 files changed, 49 insertions(+), 6 deletions(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4f20f62f9d63..890e152d553e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1574,12 +1574,26 @@ struct bpf_link { enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; - struct work_struct work; + /* rcu is used before freeing, work can be used to schedule that + * RCU-based freeing before that, so they never overlap + */ + union { + struct rcu_head rcu; + struct work_struct work; + }; }; struct bpf_link_ops { void (*release)(struct bpf_link *link); + /* deallocate link resources callback, called without RCU grace period + * waiting + */ void (*dealloc)(struct bpf_link *link); + /* deallocate link resources callback, called after RCU grace period; + * if underlying BPF program is sleepable we go through tasks trace + * RCU GP and then "classic" RCU GP + */ + void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ae2ff73bde7e..c287925471f6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link) atomic64_inc(&link->refcnt); } +static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) +{ + struct bpf_link *link = container_of(rcu, struct bpf_link, rcu); + + /* free bpf_link and its containing memory */ + link->ops->dealloc_deferred(link); +} + +static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) +{ + if (rcu_trace_implies_rcu_gp()) + bpf_link_defer_dealloc_rcu_gp(rcu); + else + call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp); +} + /* bpf_link_free is guaranteed to be called from process context */ static void bpf_link_free(struct bpf_link *link) { + bool sleepable = false; + bpf_link_free_id(link->id); if (link->prog) { + sleepable = link->prog->sleepable; /* detach BPF program, clean up used resources */ link->ops->release(link); bpf_prog_put(link->prog); } - /* free bpf_link and its containing memory */ - link->ops->dealloc(link); + if (link->ops->dealloc_deferred) { + /* schedule BPF link deallocation; if underlying BPF program + * is sleepable, we need to first wait for RCU tasks trace + * sync, then go through "classic" RCU grace period + */ + if (sleepable) + call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); + else + call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); + } + if (link->ops->dealloc) + link->ops->dealloc(link); } static void bpf_link_put_deferred(struct work_struct *work) @@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_raw_tp_link_lops = { .release = bpf_raw_tp_link_release, - .dealloc = bpf_raw_tp_link_dealloc, + .dealloc_deferred = bpf_raw_tp_link_dealloc, .show_fdinfo = bpf_raw_tp_link_show_fdinfo, .fill_link_info = bpf_raw_tp_link_fill_link_info, }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 0b73fe5f7206..9dc605f08a23 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { .release = bpf_kprobe_multi_link_release, - .dealloc = bpf_kprobe_multi_link_dealloc, + .dealloc_deferred = bpf_kprobe_multi_link_dealloc, .fill_link_info = bpf_kprobe_multi_link_fill_link_info, }; @@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link, static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { .release = bpf_uprobe_multi_link_release, - .dealloc = bpf_uprobe_multi_link_dealloc, + .dealloc_deferred = bpf_uprobe_multi_link_dealloc, .fill_link_info = bpf_uprobe_multi_link_fill_link_info, }; -- cgit v1.2.3 From 699c23f02c65cbfc3e638f14ce0d70c23a2e1f02 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 9 Apr 2024 21:35:27 -0700 Subject: bpf: Add bpf_link support for sk_msg and sk_skb progs Add bpf_link support for sk_msg and sk_skb programs. We have an internal request to support bpf_link for sk_msg programs so user space can have a uniform handling with bpf_link based libbpf APIs. Using bpf_link based libbpf API also has a benefit which makes system robust by decoupling prog life cycle and attachment life cycle. Reviewed-by: John Fastabend Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20240410043527.3737160-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 6 + include/linux/skmsg.h | 4 + include/uapi/linux/bpf.h | 5 + kernel/bpf/syscall.c | 4 + net/core/sock_map.c | 263 ++++++++++++++++++++++++++++++++++++++--- tools/include/uapi/linux/bpf.h | 5 + 6 files changed, 271 insertions(+), 16 deletions(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 62762390c93d..5034c1b4ded7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2996,6 +2996,7 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); int sock_map_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); +int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog); void sock_map_unhash(struct sock *sk); void sock_map_destroy(struct sock *sk); @@ -3094,6 +3095,11 @@ static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, { return -EINVAL; } + +static inline int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_SYSCALL */ #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e65ec3fd2799..9c8dd4c01412 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -58,6 +58,10 @@ struct sk_psock_progs { struct bpf_prog *stream_parser; struct bpf_prog *stream_verdict; struct bpf_prog *skb_verdict; + struct bpf_link *msg_parser_link; + struct bpf_link *stream_parser_link; + struct bpf_link *stream_verdict_link; + struct bpf_link *skb_verdict_link; }; enum sk_psock_state_bits { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6fe9f11c8abe..cee0a7915c08 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1135,6 +1135,7 @@ enum bpf_link_type { BPF_LINK_TYPE_TCX = 11, BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, + BPF_LINK_TYPE_SOCKMAP = 14, __MAX_BPF_LINK_TYPE, }; @@ -6724,6 +6725,10 @@ struct bpf_link_info { __u32 ifindex; __u32 attach_type; } netkit; + struct { + __u32 map_id; + __u32 attach_type; + } sockmap; }; } __attribute__((aligned(8))); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e44c276e8617..7d392ec83655 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5213,6 +5213,10 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_SK_LOOKUP: ret = netns_bpf_link_create(attr, prog); break; + case BPF_PROG_TYPE_SK_MSG: + case BPF_PROG_TYPE_SK_SKB: + ret = sock_map_link_create(attr, prog); + break; #ifdef CONFIG_NET case BPF_PROG_TYPE_XDP: ret = bpf_xdp_link_attach(attr, prog); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 27d733c0f65e..63c016b4c169 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -24,8 +24,16 @@ struct bpf_stab { #define SOCK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) +/* This mutex is used to + * - protect race between prog/link attach/detach and link prog update, and + * - protect race between releasing and accessing map in bpf_link. + * A single global mutex lock is used since it is expected contention is low. + */ +static DEFINE_MUTEX(sockmap_mutex); + static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which); + struct bpf_prog *old, struct bpf_link *link, + u32 which); static struct sk_psock_progs *sock_map_progs(struct bpf_map *map); static struct bpf_map *sock_map_alloc(union bpf_attr *attr) @@ -71,7 +79,9 @@ int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - ret = sock_map_prog_update(map, prog, NULL, attr->attach_type); + mutex_lock(&sockmap_mutex); + ret = sock_map_prog_update(map, prog, NULL, NULL, attr->attach_type); + mutex_unlock(&sockmap_mutex); fdput(f); return ret; } @@ -103,7 +113,9 @@ int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) goto put_prog; } - ret = sock_map_prog_update(map, NULL, prog, attr->attach_type); + mutex_lock(&sockmap_mutex); + ret = sock_map_prog_update(map, NULL, prog, NULL, attr->attach_type); + mutex_unlock(&sockmap_mutex); put_prog: bpf_prog_put(prog); put_map: @@ -1454,55 +1466,84 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map) return NULL; } -static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog, - u32 which) +static int sock_map_prog_link_lookup(struct bpf_map *map, struct bpf_prog ***pprog, + struct bpf_link ***plink, u32 which) { struct sk_psock_progs *progs = sock_map_progs(map); + struct bpf_prog **cur_pprog; + struct bpf_link **cur_plink; if (!progs) return -EOPNOTSUPP; switch (which) { case BPF_SK_MSG_VERDICT: - *pprog = &progs->msg_parser; + cur_pprog = &progs->msg_parser; + cur_plink = &progs->msg_parser_link; break; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) case BPF_SK_SKB_STREAM_PARSER: - *pprog = &progs->stream_parser; + cur_pprog = &progs->stream_parser; + cur_plink = &progs->stream_parser_link; break; #endif case BPF_SK_SKB_STREAM_VERDICT: if (progs->skb_verdict) return -EBUSY; - *pprog = &progs->stream_verdict; + cur_pprog = &progs->stream_verdict; + cur_plink = &progs->stream_verdict_link; break; case BPF_SK_SKB_VERDICT: if (progs->stream_verdict) return -EBUSY; - *pprog = &progs->skb_verdict; + cur_pprog = &progs->skb_verdict; + cur_plink = &progs->skb_verdict_link; break; default: return -EOPNOTSUPP; } + *pprog = cur_pprog; + if (plink) + *plink = cur_plink; return 0; } +/* Handle the following four cases: + * prog_attach: prog != NULL, old == NULL, link == NULL + * prog_detach: prog == NULL, old != NULL, link == NULL + * link_attach: prog != NULL, old == NULL, link != NULL + * link_detach: prog == NULL, old != NULL, link != NULL + */ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which) + struct bpf_prog *old, struct bpf_link *link, + u32 which) { struct bpf_prog **pprog; + struct bpf_link **plink; int ret; - ret = sock_map_prog_lookup(map, &pprog, which); + ret = sock_map_prog_link_lookup(map, &pprog, &plink, which); if (ret) return ret; - if (old) - return psock_replace_prog(pprog, prog, old); + /* for prog_attach/prog_detach/link_attach, return error if a bpf_link + * exists for that prog. + */ + if ((!link || prog) && *plink) + return -EBUSY; - psock_set_prog(pprog, prog); - return 0; + if (old) { + ret = psock_replace_prog(pprog, prog, old); + if (!ret) + *plink = NULL; + } else { + psock_set_prog(pprog, prog); + if (link) + *plink = link; + } + + return ret; } int sock_map_bpf_prog_query(const union bpf_attr *attr, @@ -1527,7 +1568,7 @@ int sock_map_bpf_prog_query(const union bpf_attr *attr, rcu_read_lock(); - ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type); + ret = sock_map_prog_link_lookup(map, &pprog, NULL, attr->query.attach_type); if (ret) goto end; @@ -1657,6 +1698,196 @@ void sock_map_close(struct sock *sk, long timeout) } EXPORT_SYMBOL_GPL(sock_map_close); +struct sockmap_link { + struct bpf_link link; + struct bpf_map *map; + enum bpf_attach_type attach_type; +}; + +static void sock_map_link_release(struct bpf_link *link) +{ + struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + + mutex_lock(&sockmap_mutex); + if (!sockmap_link->map) + goto out; + + WARN_ON_ONCE(sock_map_prog_update(sockmap_link->map, NULL, link->prog, link, + sockmap_link->attach_type)); + + bpf_map_put_with_uref(sockmap_link->map); + sockmap_link->map = NULL; +out: + mutex_unlock(&sockmap_mutex); +} + +static int sock_map_link_detach(struct bpf_link *link) +{ + sock_map_link_release(link); + return 0; +} + +static void sock_map_link_dealloc(struct bpf_link *link) +{ + kfree(link); +} + +/* Handle the following two cases: + * case 1: link != NULL, prog != NULL, old != NULL + * case 2: link != NULL, prog != NULL, old == NULL + */ +static int sock_map_link_update_prog(struct bpf_link *link, + struct bpf_prog *prog, + struct bpf_prog *old) +{ + const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + struct bpf_prog **pprog, *old_link_prog; + struct bpf_link **plink; + int ret = 0; + + mutex_lock(&sockmap_mutex); + + /* If old prog is not NULL, ensure old prog is the same as link->prog. */ + if (old && link->prog != old) { + ret = -EPERM; + goto out; + } + /* Ensure link->prog has the same type/attach_type as the new prog. */ + if (link->prog->type != prog->type || + link->prog->expected_attach_type != prog->expected_attach_type) { + ret = -EINVAL; + goto out; + } + + ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink, + sockmap_link->attach_type); + if (ret) + goto out; + + /* return error if the stored bpf_link does not match the incoming bpf_link. */ + if (link != *plink) { + ret = -EBUSY; + goto out; + } + + if (old) { + ret = psock_replace_prog(pprog, prog, old); + if (ret) + goto out; + } else { + psock_set_prog(pprog, prog); + } + + bpf_prog_inc(prog); + old_link_prog = xchg(&link->prog, prog); + bpf_prog_put(old_link_prog); + +out: + mutex_unlock(&sockmap_mutex); + return ret; +} + +static u32 sock_map_link_get_map_id(const struct sockmap_link *sockmap_link) +{ + u32 map_id = 0; + + mutex_lock(&sockmap_mutex); + if (sockmap_link->map) + map_id = sockmap_link->map->id; + mutex_unlock(&sockmap_mutex); + return map_id; +} + +static int sock_map_link_fill_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + u32 map_id = sock_map_link_get_map_id(sockmap_link); + + info->sockmap.map_id = map_id; + info->sockmap.attach_type = sockmap_link->attach_type; + return 0; +} + +static void sock_map_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + const struct sockmap_link *sockmap_link = container_of(link, struct sockmap_link, link); + u32 map_id = sock_map_link_get_map_id(sockmap_link); + + seq_printf(seq, "map_id:\t%u\n", map_id); + seq_printf(seq, "attach_type:\t%u\n", sockmap_link->attach_type); +} + +static const struct bpf_link_ops sock_map_link_ops = { + .release = sock_map_link_release, + .dealloc = sock_map_link_dealloc, + .detach = sock_map_link_detach, + .update_prog = sock_map_link_update_prog, + .fill_link_info = sock_map_link_fill_info, + .show_fdinfo = sock_map_link_show_fdinfo, +}; + +int sock_map_link_create(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct bpf_link_primer link_primer; + struct sockmap_link *sockmap_link; + enum bpf_attach_type attach_type; + struct bpf_map *map; + int ret; + + if (attr->link_create.flags) + return -EINVAL; + + map = bpf_map_get_with_uref(attr->link_create.target_fd); + if (IS_ERR(map)) + return PTR_ERR(map); + if (map->map_type != BPF_MAP_TYPE_SOCKMAP && map->map_type != BPF_MAP_TYPE_SOCKHASH) { + ret = -EINVAL; + goto out; + } + + sockmap_link = kzalloc(sizeof(*sockmap_link), GFP_USER); + if (!sockmap_link) { + ret = -ENOMEM; + goto out; + } + + attach_type = attr->link_create.attach_type; + bpf_link_init(&sockmap_link->link, BPF_LINK_TYPE_SOCKMAP, &sock_map_link_ops, prog); + sockmap_link->map = map; + sockmap_link->attach_type = attach_type; + + ret = bpf_link_prime(&sockmap_link->link, &link_primer); + if (ret) { + kfree(sockmap_link); + goto out; + } + + mutex_lock(&sockmap_mutex); + ret = sock_map_prog_update(map, prog, NULL, &sockmap_link->link, attach_type); + mutex_unlock(&sockmap_mutex); + if (ret) { + bpf_link_cleanup(&link_primer); + goto out; + } + + /* Increase refcnt for the prog since when old prog is replaced with + * psock_replace_prog() and psock_set_prog() its refcnt will be decreased. + * + * Actually, we do not need to increase refcnt for the prog since bpf_link + * will hold a reference. But in order to have less complexity w.r.t. + * replacing/setting prog, let us increase the refcnt to make things simpler. + */ + bpf_prog_inc(prog); + + return bpf_link_settle(&link_primer); + +out: + bpf_map_put_with_uref(map); + return ret; +} + static int sock_map_iter_attach_target(struct bpf_prog *prog, union bpf_iter_link_info *linfo, struct bpf_iter_aux_info *aux) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6fe9f11c8abe..cee0a7915c08 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1135,6 +1135,7 @@ enum bpf_link_type { BPF_LINK_TYPE_TCX = 11, BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, + BPF_LINK_TYPE_SOCKMAP = 14, __MAX_BPF_LINK_TYPE, }; @@ -6724,6 +6725,10 @@ struct bpf_link_info { __u32 ifindex; __u32 attach_type; } netkit; + struct { + __u32 map_id; + __u32 attach_type; + } sockmap; }; } __attribute__((aligned(8))); -- cgit v1.2.3 From d56b63cf0c0f71e1b2e04dd8220b408f049e67ff Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 20 Apr 2024 11:09:05 +0200 Subject: bpf: add support for bpf_wq user type Mostly a copy/paste from the bpf_timer API, without the initialization and free, as they will be done in a separate patch. Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20240420-bpf_wq-v2-5-6c986a5a741f@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 ++++++++++- include/uapi/linux/bpf.h | 4 ++++ kernel/bpf/btf.c | 17 +++++++++++++++++ kernel/bpf/syscall.c | 6 +++++- kernel/bpf/verifier.c | 9 +++++++++ 5 files changed, 45 insertions(+), 2 deletions(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5034c1b4ded7..c7dcfd395555 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -185,7 +185,7 @@ struct bpf_map_ops { enum { /* Support at most 10 fields in a BTF type */ - BTF_FIELDS_MAX = 10, + BTF_FIELDS_MAX = 11, }; enum btf_field_type { @@ -202,6 +202,7 @@ enum btf_field_type { BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), + BPF_WORKQUEUE = (1 << 10), }; typedef void (*btf_dtor_kfunc_t)(void *); @@ -238,6 +239,7 @@ struct btf_record { u32 field_mask; int spin_lock_off; int timer_off; + int wq_off; int refcount_off; struct btf_field fields[]; }; @@ -312,6 +314,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "bpf_spin_lock"; case BPF_TIMER: return "bpf_timer"; + case BPF_WORKQUEUE: + return "bpf_wq"; case BPF_KPTR_UNREF: case BPF_KPTR_REF: return "kptr"; @@ -340,6 +344,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_spin_lock); case BPF_TIMER: return sizeof(struct bpf_timer); + case BPF_WORKQUEUE: + return sizeof(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: @@ -367,6 +373,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_spin_lock); case BPF_TIMER: return __alignof__(struct bpf_timer); + case BPF_WORKQUEUE: + return __alignof__(struct bpf_wq); case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: @@ -406,6 +414,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) /* RB_ROOT_CACHED 0-inits, no need to do anything after memset */ case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_WORKQUEUE: case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cee0a7915c08..e4ae83550fb3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7306,6 +7306,10 @@ struct bpf_timer { __u64 __opaque[2]; } __attribute__((aligned(8))); +struct bpf_wq { + __u64 __opaque[2]; +} __attribute__((aligned(8))); + struct bpf_dynptr { __u64 __opaque[2]; } __attribute__((aligned(8))); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 6d46cee47ae3..8291fbfd27b1 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3464,6 +3464,15 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask, goto end; } } + if (field_mask & BPF_WORKQUEUE) { + if (!strcmp(name, "bpf_wq")) { + if (*seen_mask & BPF_WORKQUEUE) + return -E2BIG; + *seen_mask |= BPF_WORKQUEUE; + type = BPF_WORKQUEUE; + goto end; + } + } field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head"); field_mask_test_name(BPF_LIST_NODE, "bpf_list_node"); field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root"); @@ -3515,6 +3524,7 @@ static int btf_find_struct_field(const struct btf *btf, switch (field_type) { case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_WORKQUEUE: case BPF_LIST_NODE: case BPF_RB_NODE: case BPF_REFCOUNT: @@ -3582,6 +3592,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t, switch (field_type) { case BPF_SPIN_LOCK: case BPF_TIMER: + case BPF_WORKQUEUE: case BPF_LIST_NODE: case BPF_RB_NODE: case BPF_REFCOUNT: @@ -3816,6 +3827,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type rec->spin_lock_off = -EINVAL; rec->timer_off = -EINVAL; + rec->wq_off = -EINVAL; rec->refcount_off = -EINVAL; for (i = 0; i < cnt; i++) { field_type_size = btf_field_type_size(info_arr[i].type); @@ -3846,6 +3858,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type /* Cache offset for faster lookup at runtime */ rec->timer_off = rec->fields[i].offset; break; + case BPF_WORKQUEUE: + WARN_ON_ONCE(rec->wq_off >= 0); + /* Cache offset for faster lookup at runtime */ + rec->wq_off = rec->fields[i].offset; + break; case BPF_REFCOUNT: WARN_ON_ONCE(rec->refcount_off >= 0); /* Cache offset for faster lookup at runtime */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7d392ec83655..0848e4141b00 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -559,6 +559,7 @@ void btf_record_free(struct btf_record *rec) case BPF_SPIN_LOCK: case BPF_TIMER: case BPF_REFCOUNT: + case BPF_WORKQUEUE: /* Nothing to release */ break; default: @@ -608,6 +609,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) case BPF_SPIN_LOCK: case BPF_TIMER: case BPF_REFCOUNT: + case BPF_WORKQUEUE: /* Nothing to acquire */ break; default: @@ -679,6 +681,8 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) case BPF_TIMER: bpf_timer_cancel_and_free(field_ptr); break; + case BPF_WORKQUEUE: + break; case BPF_KPTR_UNREF: WRITE_ONCE(*(u64 *)field_ptr, 0); break; @@ -1085,7 +1089,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, map->record = btf_parse_fields(btf, value_type, BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | - BPF_RB_ROOT | BPF_REFCOUNT, + BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE, map->value_size); if (!IS_ERR_OR_NULL(map->record)) { int i; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5a7e34e83a5b..89490a95b120 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1838,6 +1838,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) */ if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER)) reg->map_uid = reg->id; + if (btf_record_has_field(map->inner_map_meta->record, BPF_WORKQUEUE)) + reg->map_uid = reg->id; } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { reg->type = PTR_TO_XDP_SOCK; } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || @@ -18141,6 +18143,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, } } + if (btf_record_has_field(map->record, BPF_WORKQUEUE)) { + if (is_tracing_prog_type(prog_type)) { + verbose(env, "tracing progs cannot use bpf_wq yet\n"); + return -EINVAL; + } + } + if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) && !bpf_offload_prog_map_match(prog, map)) { verbose(env, "offload device mismatch between prog and map\n"); -- cgit v1.2.3 From 246331e3f1eac905170a923f0ec76725c2558232 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 20 Apr 2024 11:09:09 +0200 Subject: bpf: allow struct bpf_wq to be embedded in arraymaps and hashmaps Currently bpf_wq_cancel_and_free() is just a placeholder as there is no memory allocation for bpf_wq just yet. Again, duplication of the bpf_timer approach Signed-off-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20240420-bpf_wq-v2-9-6c986a5a741f@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 ++ kernel/bpf/arraymap.c | 18 ++++++++++------- kernel/bpf/hashtab.c | 55 ++++++++++++++++++++++++++++++++++++++++----------- kernel/bpf/helpers.c | 8 ++++++++ kernel/bpf/syscall.c | 9 +++++++++ 5 files changed, 73 insertions(+), 19 deletions(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c7dcfd395555..64bf0cf3ee95 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -534,6 +534,7 @@ static inline void zero_map_value(struct bpf_map *map, void *dst) void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); +void bpf_wq_cancel_and_free(void *timer); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, @@ -2204,6 +2205,7 @@ void bpf_map_free_record(struct bpf_map *map); struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); +void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 8c1e6d7654bb..580d07b15471 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -428,17 +428,21 @@ static void *array_map_vmalloc_addr(struct bpf_array *array) return (void *)round_down((unsigned long)array, PAGE_SIZE); } -static void array_map_free_timers(struct bpf_map *map) +static void array_map_free_timers_wq(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; - /* We don't reset or free fields other than timer on uref dropping to zero. */ - if (!btf_record_has_field(map->record, BPF_TIMER)) - return; + /* We don't reset or free fields other than timer and workqueue + * on uref dropping to zero. + */ + if (btf_record_has_field(map->record, BPF_TIMER)) + for (i = 0; i < array->map.max_entries; i++) + bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i)); - for (i = 0; i < array->map.max_entries; i++) - bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i)); + if (btf_record_has_field(map->record, BPF_WORKQUEUE)) + for (i = 0; i < array->map.max_entries; i++) + bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i)); } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ @@ -782,7 +786,7 @@ const struct bpf_map_ops array_map_ops = { .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, - .map_release_uref = array_map_free_timers, + .map_release_uref = array_map_free_timers_wq, .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index c3e79a0b9361..0179183c543a 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -240,6 +240,26 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab) } } +static void htab_free_prealloced_wq(struct bpf_htab *htab) +{ + u32 num_entries = htab->map.max_entries; + int i; + + if (!btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) + return; + if (htab_has_extra_elems(htab)) + num_entries += num_possible_cpus(); + + for (i = 0; i < num_entries; i++) { + struct htab_elem *elem; + + elem = get_htab_elem(htab, i); + bpf_obj_free_workqueue(htab->map.record, + elem->key + round_up(htab->map.key_size, 8)); + cond_resched(); + } +} + static void htab_free_prealloced_fields(struct bpf_htab *htab) { u32 num_entries = htab->map.max_entries; @@ -1495,7 +1515,7 @@ static void delete_all_elements(struct bpf_htab *htab) migrate_enable(); } -static void htab_free_malloced_timers(struct bpf_htab *htab) +static void htab_free_malloced_timers_or_wq(struct bpf_htab *htab, bool is_timer) { int i; @@ -1507,24 +1527,35 @@ static void htab_free_malloced_timers(struct bpf_htab *htab) hlist_nulls_for_each_entry(l, n, head, hash_node) { /* We only free timer on uref dropping to zero */ - bpf_obj_free_timer(htab->map.record, l->key + round_up(htab->map.key_size, 8)); + if (is_timer) + bpf_obj_free_timer(htab->map.record, + l->key + round_up(htab->map.key_size, 8)); + else + bpf_obj_free_workqueue(htab->map.record, + l->key + round_up(htab->map.key_size, 8)); } cond_resched_rcu(); } rcu_read_unlock(); } -static void htab_map_free_timers(struct bpf_map *map) +static void htab_map_free_timers_and_wq(struct bpf_map *map) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - /* We only free timer on uref dropping to zero */ - if (!btf_record_has_field(htab->map.record, BPF_TIMER)) - return; - if (!htab_is_prealloc(htab)) - htab_free_malloced_timers(htab); - else - htab_free_prealloced_timers(htab); + /* We only free timer and workqueue on uref dropping to zero */ + if (btf_record_has_field(htab->map.record, BPF_TIMER)) { + if (!htab_is_prealloc(htab)) + htab_free_malloced_timers_or_wq(htab, true); + else + htab_free_prealloced_timers(htab); + } + if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) { + if (!htab_is_prealloc(htab)) + htab_free_malloced_timers_or_wq(htab, false); + else + htab_free_prealloced_wq(htab); + } } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ @@ -2260,7 +2291,7 @@ const struct bpf_map_ops htab_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, - .map_release_uref = htab_map_free_timers, + .map_release_uref = htab_map_free_timers_and_wq, .map_lookup_elem = htab_map_lookup_elem, .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, .map_update_elem = htab_map_update_elem, @@ -2281,7 +2312,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, - .map_release_uref = htab_map_free_timers, + .map_release_uref = htab_map_free_timers_and_wq, .map_lookup_elem = htab_lru_map_lookup_elem, .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 0b8ba6c81994..fe827f6e5234 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1468,6 +1468,14 @@ void bpf_timer_cancel_and_free(void *val) kfree_rcu(t, cb.rcu); } +/* This function is called by map_delete/update_elem for individual element and + * by ops->map_release_uref when the user space reference to a map reaches zero. + */ +void bpf_wq_cancel_and_free(void *val) +{ + BTF_TYPE_EMIT(struct bpf_wq); +} + BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) { unsigned long *kptr = map_value; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0848e4141b00..63e368337483 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -661,6 +661,13 @@ void bpf_obj_free_timer(const struct btf_record *rec, void *obj) bpf_timer_cancel_and_free(obj + rec->timer_off); } +void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj) +{ + if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_WORKQUEUE))) + return; + bpf_wq_cancel_and_free(obj + rec->wq_off); +} + void bpf_obj_free_fields(const struct btf_record *rec, void *obj) { const struct btf_field *fields; @@ -682,6 +689,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) bpf_timer_cancel_and_free(field_ptr); break; case BPF_WORKQUEUE: + bpf_wq_cancel_and_free(field_ptr); break; case BPF_KPTR_UNREF: WRITE_ONCE(*(u64 *)field_ptr, 0); @@ -1119,6 +1127,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, } break; case BPF_TIMER: + case BPF_WORKQUEUE: if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY) { -- cgit v1.2.3 From 95c07d58250ca3ed01855c20be568cf04e15382f Mon Sep 17 00:00:00 2001 From: Haiyue Wang Date: Wed, 24 Apr 2024 13:45:07 +0800 Subject: bpf: update the comment for BTF_FIELDS_MAX The commit d56b63cf0c0f ("bpf: add support for bpf_wq user type") changes the fields support number to 11, just sync the comment. Signed-off-by: Haiyue Wang Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240424054526.8031-1-haiyue.wang@intel.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 64bf0cf3ee95..978200f6d925 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -184,7 +184,7 @@ struct bpf_map_ops { }; enum { - /* Support at most 10 fields in a BTF type */ + /* Support at most 11 fields in a BTF type */ BTF_FIELDS_MAX = 11, }; -- cgit v1.2.3 From 3e1c6f35409f9e447bf37f64840f5b65576bfb78 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Mon, 22 Apr 2024 15:50:21 -0700 Subject: bpf: make common crypto API for TC/XDP programs Add crypto API support to BPF to be able to decrypt or encrypt packets in TC/XDP BPF programs. Special care should be taken for initialization part of crypto algo because crypto alloc) doesn't work with preemtion disabled, it can be run only in sleepable BPF program. Also async crypto is not supported because of the very same issue - TC/XDP BPF programs are not sleepable. Signed-off-by: Vadim Fedorenko Link: https://lore.kernel.org/r/20240422225024.2847039-2-vadfed@meta.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 1 + include/linux/bpf_crypto.h | 24 +++ kernel/bpf/Makefile | 3 + kernel/bpf/crypto.c | 385 +++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/helpers.c | 2 +- kernel/bpf/verifier.c | 1 + 6 files changed, 415 insertions(+), 1 deletion(-) create mode 100644 include/linux/bpf_crypto.h create mode 100644 kernel/bpf/crypto.c (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 978200f6d925..364563b74db6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1275,6 +1275,7 @@ int bpf_dynptr_check_size(u32 size); u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len); void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len); +bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); diff --git a/include/linux/bpf_crypto.h b/include/linux/bpf_crypto.h new file mode 100644 index 000000000000..a41e71d4e2d9 --- /dev/null +++ b/include/linux/bpf_crypto.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#ifndef _BPF_CRYPTO_H +#define _BPF_CRYPTO_H + +struct bpf_crypto_type { + void *(*alloc_tfm)(const char *algo); + void (*free_tfm)(void *tfm); + int (*has_algo)(const char *algo); + int (*setkey)(void *tfm, const u8 *key, unsigned int keylen); + int (*setauthsize)(void *tfm, unsigned int authsize); + int (*encrypt)(void *tfm, const u8 *src, u8 *dst, unsigned int len, u8 *iv); + int (*decrypt)(void *tfm, const u8 *src, u8 *dst, unsigned int len, u8 *iv); + unsigned int (*ivsize)(void *tfm); + unsigned int (*statesize)(void *tfm); + u32 (*get_flags)(void *tfm); + struct module *owner; + char name[14]; +}; + +int bpf_crypto_register_type(const struct bpf_crypto_type *type); +int bpf_crypto_unregister_type(const struct bpf_crypto_type *type); + +#endif /* _BPF_CRYPTO_H */ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 368c5d86b5b7..736bd22e5ce0 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -44,6 +44,9 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o obj-$(CONFIG_BPF_SYSCALL) += cpumask.o obj-${CONFIG_BPF_LSM} += bpf_lsm.o endif +ifeq ($(CONFIG_CRYPTO),y) +obj-$(CONFIG_BPF_SYSCALL) += crypto.o +endif obj-$(CONFIG_BPF_PRELOAD) += preload/ obj-$(CONFIG_BPF_SYSCALL) += relo_core.o diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c new file mode 100644 index 000000000000..2bee4af91e38 --- /dev/null +++ b/kernel/bpf/crypto.c @@ -0,0 +1,385 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024 Meta, Inc */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct bpf_crypto_type_list { + const struct bpf_crypto_type *type; + struct list_head list; +}; + +/* BPF crypto initialization parameters struct */ +/** + * struct bpf_crypto_params - BPF crypto initialization parameters structure + * @type: The string of crypto operation type. + * @reserved: Reserved member, will be reused for more options in future + * Values: + * 0 + * @algo: The string of algorithm to initialize. + * @key: The cipher key used to init crypto algorithm. + * @key_len: The length of cipher key. + * @authsize: The length of authentication tag used by algorithm. + */ +struct bpf_crypto_params { + char type[14]; + u8 reserved[2]; + char algo[128]; + u8 key[256]; + u32 key_len; + u32 authsize; +}; + +static LIST_HEAD(bpf_crypto_types); +static DECLARE_RWSEM(bpf_crypto_types_sem); + +/** + * struct bpf_crypto_ctx - refcounted BPF crypto context structure + * @type: The pointer to bpf crypto type + * @tfm: The pointer to instance of crypto API struct. + * @siv_len: Size of IV and state storage for cipher + * @rcu: The RCU head used to free the crypto context with RCU safety. + * @usage: Object reference counter. When the refcount goes to 0, the + * memory is released back to the BPF allocator, which provides + * RCU safety. + */ +struct bpf_crypto_ctx { + const struct bpf_crypto_type *type; + void *tfm; + u32 siv_len; + struct rcu_head rcu; + refcount_t usage; +}; + +int bpf_crypto_register_type(const struct bpf_crypto_type *type) +{ + struct bpf_crypto_type_list *node; + int err = -EEXIST; + + down_write(&bpf_crypto_types_sem); + list_for_each_entry(node, &bpf_crypto_types, list) { + if (!strcmp(node->type->name, type->name)) + goto unlock; + } + + node = kmalloc(sizeof(*node), GFP_KERNEL); + err = -ENOMEM; + if (!node) + goto unlock; + + node->type = type; + list_add(&node->list, &bpf_crypto_types); + err = 0; + +unlock: + up_write(&bpf_crypto_types_sem); + + return err; +} +EXPORT_SYMBOL_GPL(bpf_crypto_register_type); + +int bpf_crypto_unregister_type(const struct bpf_crypto_type *type) +{ + struct bpf_crypto_type_list *node; + int err = -ENOENT; + + down_write(&bpf_crypto_types_sem); + list_for_each_entry(node, &bpf_crypto_types, list) { + if (strcmp(node->type->name, type->name)) + continue; + + list_del(&node->list); + kfree(node); + err = 0; + break; + } + up_write(&bpf_crypto_types_sem); + + return err; +} +EXPORT_SYMBOL_GPL(bpf_crypto_unregister_type); + +static const struct bpf_crypto_type *bpf_crypto_get_type(const char *name) +{ + const struct bpf_crypto_type *type = ERR_PTR(-ENOENT); + struct bpf_crypto_type_list *node; + + down_read(&bpf_crypto_types_sem); + list_for_each_entry(node, &bpf_crypto_types, list) { + if (strcmp(node->type->name, name)) + continue; + + if (try_module_get(node->type->owner)) + type = node->type; + break; + } + up_read(&bpf_crypto_types_sem); + + return type; +} + +__bpf_kfunc_start_defs(); + +/** + * bpf_crypto_ctx_create() - Create a mutable BPF crypto context. + * + * Allocates a crypto context that can be used, acquired, and released by + * a BPF program. The crypto context returned by this function must either + * be embedded in a map as a kptr, or freed with bpf_crypto_ctx_release(). + * As crypto API functions use GFP_KERNEL allocations, this function can + * only be used in sleepable BPF programs. + * + * bpf_crypto_ctx_create() allocates memory for crypto context. + * It may return NULL if no memory is available. + * @params: pointer to struct bpf_crypto_params which contains all the + * details needed to initialise crypto context. + * @params__sz: size of steuct bpf_crypto_params usef by bpf program + * @err: integer to store error code when NULL is returned. + */ +__bpf_kfunc struct bpf_crypto_ctx * +bpf_crypto_ctx_create(const struct bpf_crypto_params *params, u32 params__sz, + int *err) +{ + const struct bpf_crypto_type *type; + struct bpf_crypto_ctx *ctx; + + if (!params || params->reserved[0] || params->reserved[1] || + params__sz != sizeof(struct bpf_crypto_params)) { + *err = -EINVAL; + return NULL; + } + + type = bpf_crypto_get_type(params->type); + if (IS_ERR(type)) { + *err = PTR_ERR(type); + return NULL; + } + + if (!type->has_algo(params->algo)) { + *err = -EOPNOTSUPP; + goto err_module_put; + } + + if (!!params->authsize ^ !!type->setauthsize) { + *err = -EOPNOTSUPP; + goto err_module_put; + } + + if (!params->key_len || params->key_len > sizeof(params->key)) { + *err = -EINVAL; + goto err_module_put; + } + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + *err = -ENOMEM; + goto err_module_put; + } + + ctx->type = type; + ctx->tfm = type->alloc_tfm(params->algo); + if (IS_ERR(ctx->tfm)) { + *err = PTR_ERR(ctx->tfm); + goto err_free_ctx; + } + + if (params->authsize) { + *err = type->setauthsize(ctx->tfm, params->authsize); + if (*err) + goto err_free_tfm; + } + + *err = type->setkey(ctx->tfm, params->key, params->key_len); + if (*err) + goto err_free_tfm; + + if (type->get_flags(ctx->tfm) & CRYPTO_TFM_NEED_KEY) { + *err = -EINVAL; + goto err_free_tfm; + } + + ctx->siv_len = type->ivsize(ctx->tfm) + type->statesize(ctx->tfm); + + refcount_set(&ctx->usage, 1); + + return ctx; + +err_free_tfm: + type->free_tfm(ctx->tfm); +err_free_ctx: + kfree(ctx); +err_module_put: + module_put(type->owner); + + return NULL; +} + +static void crypto_free_cb(struct rcu_head *head) +{ + struct bpf_crypto_ctx *ctx; + + ctx = container_of(head, struct bpf_crypto_ctx, rcu); + ctx->type->free_tfm(ctx->tfm); + module_put(ctx->type->owner); + kfree(ctx); +} + +/** + * bpf_crypto_ctx_acquire() - Acquire a reference to a BPF crypto context. + * @ctx: The BPF crypto context being acquired. The ctx must be a trusted + * pointer. + * + * Acquires a reference to a BPF crypto context. The context returned by this function + * must either be embedded in a map as a kptr, or freed with + * bpf_crypto_ctx_release(). + */ +__bpf_kfunc struct bpf_crypto_ctx * +bpf_crypto_ctx_acquire(struct bpf_crypto_ctx *ctx) +{ + if (!refcount_inc_not_zero(&ctx->usage)) + return NULL; + return ctx; +} + +/** + * bpf_crypto_ctx_release() - Release a previously acquired BPF crypto context. + * @ctx: The crypto context being released. + * + * Releases a previously acquired reference to a BPF crypto context. When the final + * reference of the BPF crypto context has been released, its memory + * will be released. + */ +__bpf_kfunc void bpf_crypto_ctx_release(struct bpf_crypto_ctx *ctx) +{ + if (refcount_dec_and_test(&ctx->usage)) + call_rcu(&ctx->rcu, crypto_free_cb); +} + +static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx, + const struct bpf_dynptr_kern *src, + const struct bpf_dynptr_kern *dst, + const struct bpf_dynptr_kern *siv, + bool decrypt) +{ + u32 src_len, dst_len, siv_len; + const u8 *psrc; + u8 *pdst, *piv; + int err; + + if (__bpf_dynptr_is_rdonly(dst)) + return -EINVAL; + + siv_len = __bpf_dynptr_size(siv); + src_len = __bpf_dynptr_size(src); + dst_len = __bpf_dynptr_size(dst); + if (!src_len || !dst_len) + return -EINVAL; + + if (siv_len != ctx->siv_len) + return -EINVAL; + + psrc = __bpf_dynptr_data(src, src_len); + if (!psrc) + return -EINVAL; + pdst = __bpf_dynptr_data_rw(dst, dst_len); + if (!pdst) + return -EINVAL; + + piv = siv_len ? __bpf_dynptr_data_rw(siv, siv_len) : NULL; + if (siv_len && !piv) + return -EINVAL; + + err = decrypt ? ctx->type->decrypt(ctx->tfm, psrc, pdst, src_len, piv) + : ctx->type->encrypt(ctx->tfm, psrc, pdst, src_len, piv); + + return err; +} + +/** + * bpf_crypto_decrypt() - Decrypt buffer using configured context and IV provided. + * @ctx: The crypto context being used. The ctx must be a trusted pointer. + * @src: bpf_dynptr to the encrypted data. Must be a trusted pointer. + * @dst: bpf_dynptr to the buffer where to store the result. Must be a trusted pointer. + * @siv: bpf_dynptr to IV data and state data to be used by decryptor. + * + * Decrypts provided buffer using IV data and the crypto context. Crypto context must be configured. + */ +__bpf_kfunc int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, + const struct bpf_dynptr_kern *src, + const struct bpf_dynptr_kern *dst, + const struct bpf_dynptr_kern *siv) +{ + return bpf_crypto_crypt(ctx, src, dst, siv, true); +} + +/** + * bpf_crypto_encrypt() - Encrypt buffer using configured context and IV provided. + * @ctx: The crypto context being used. The ctx must be a trusted pointer. + * @src: bpf_dynptr to the plain data. Must be a trusted pointer. + * @dst: bpf_dynptr to buffer where to store the result. Must be a trusted pointer. + * @siv: bpf_dynptr to IV data and state data to be used by decryptor. + * + * Encrypts provided buffer using IV data and the crypto context. Crypto context must be configured. + */ +__bpf_kfunc int bpf_crypto_encrypt(struct bpf_crypto_ctx *ctx, + const struct bpf_dynptr_kern *src, + const struct bpf_dynptr_kern *dst, + const struct bpf_dynptr_kern *siv) +{ + return bpf_crypto_crypt(ctx, src, dst, siv, false); +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(crypt_init_kfunc_btf_ids) +BTF_ID_FLAGS(func, bpf_crypto_ctx_create, KF_ACQUIRE | KF_RET_NULL | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_crypto_ctx_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_crypto_ctx_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL) +BTF_KFUNCS_END(crypt_init_kfunc_btf_ids) + +static const struct btf_kfunc_id_set crypt_init_kfunc_set = { + .owner = THIS_MODULE, + .set = &crypt_init_kfunc_btf_ids, +}; + +BTF_KFUNCS_START(crypt_kfunc_btf_ids) +BTF_ID_FLAGS(func, bpf_crypto_decrypt, KF_RCU) +BTF_ID_FLAGS(func, bpf_crypto_encrypt, KF_RCU) +BTF_KFUNCS_END(crypt_kfunc_btf_ids) + +static const struct btf_kfunc_id_set crypt_kfunc_set = { + .owner = THIS_MODULE, + .set = &crypt_kfunc_btf_ids, +}; + +BTF_ID_LIST(bpf_crypto_dtor_ids) +BTF_ID(struct, bpf_crypto_ctx) +BTF_ID(func, bpf_crypto_ctx_release) + +static int __init crypto_kfunc_init(void) +{ + int ret; + const struct btf_id_dtor_kfunc bpf_crypto_dtors[] = { + { + .btf_id = bpf_crypto_dtor_ids[0], + .kfunc_btf_id = bpf_crypto_dtor_ids[1] + }, + }; + + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &crypt_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_ACT, &crypt_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &crypt_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, + &crypt_init_kfunc_set); + return ret ?: register_btf_id_dtor_kfuncs(bpf_crypto_dtors, + ARRAY_SIZE(bpf_crypto_dtors), + THIS_MODULE); +} + +late_initcall(crypto_kfunc_init); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 14c401b78e12..2a69a9a36c0f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1583,7 +1583,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = { #define DYNPTR_SIZE_MASK 0xFFFFFF #define DYNPTR_RDONLY_BIT BIT(31) -static bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr) +bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr) { return ptr->size & DYNPTR_RDONLY_BIT; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8312c7a0ee19..4e474ef44e9c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5310,6 +5310,7 @@ BTF_ID(struct, cgroup) BTF_ID(struct, bpf_cpumask) #endif BTF_ID(struct, task_struct) +BTF_ID(struct, bpf_crypto_ctx) BTF_SET_END(rcu_protected_types) static bool rcu_protected_object(const struct btf *btf, u32 btf_id) -- cgit v1.2.3 From 2c321f3f70bc284510598f712b702ce8d60c4d14 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sun, 14 Apr 2024 19:07:31 -0700 Subject: mm: change inlined allocation helpers to account at the call site Main goal of memory allocation profiling patchset is to provide accounting that is cheap enough to run in production. To achieve that we inject counters using codetags at the allocation call sites to account every time allocation is made. This injection allows us to perform accounting efficiently because injected counters are immediately available as opposed to the alternative methods, such as using _RET_IP_, which would require counter lookup and appropriate locking that makes accounting much more expensive. This method requires all allocation functions to inject separate counters at their call sites so that their callers can be individually accounted. Counter injection is implemented by allocation hooks which should wrap all allocation functions. Inlined functions which perform allocations but do not use allocation hooks are directly charged for the allocations they perform. In most cases these functions are just specialized allocation wrappers used from multiple places to allocate objects of a specific type. It would be more useful to do the accounting at their call sites instead. Instrument these helpers to do accounting at the call site. Simple inlined allocation wrappers are converted directly into macros. More complex allocators or allocators with documentation are converted into _noprof versions and allocation hooks are added. This allows memory allocation profiling mechanism to charge allocations to the callers of these functions. Link: https://lkml.kernel.org/r/20240415020731.1152108-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Jan Kara [jbd2] Cc: Anna Schumaker Cc: Arnd Bergmann Cc: Benjamin Tissoires Cc: Christoph Lameter Cc: David Rientjes Cc: David S. Miller Cc: Dennis Zhou Cc: Eric Dumazet Cc: Herbert Xu Cc: Jakub Kicinski Cc: Jakub Sitnicki Cc: Jiri Kosina Cc: Joerg Roedel Cc: Joonsoo Kim Cc: Kent Overstreet Cc: Matthew Wilcox (Oracle) Cc: Paolo Abeni Cc: Pekka Enberg Cc: Tejun Heo Cc: Theodore Ts'o Cc: Trond Myklebust Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- drivers/iommu/amd/amd_iommu.h | 5 +++-- fs/nfs/iostat.h | 5 +---- include/acpi/platform/aclinuxex.h | 19 ++++++------------ include/asm-generic/pgalloc.h | 35 ++++++++++++++++++++------------- include/crypto/hash.h | 7 ++++--- include/crypto/internal/acompress.h | 5 +++-- include/crypto/skcipher.h | 7 ++++--- include/linux/bpf.h | 33 ++++++++----------------------- include/linux/bpfptr.h | 5 +++-- include/linux/dma-fence-chain.h | 6 ++---- include/linux/hid_bpf.h | 6 ++---- include/linux/jbd2.h | 12 ++++------- include/linux/mm.h | 5 +++-- include/linux/mm_types.h | 5 +++-- include/linux/percpu.h | 3 +++ include/linux/ptr_ring.h | 28 +++++++++++++++----------- include/linux/skb_array.h | 19 ++++++++++-------- include/linux/skbuff.h | 20 ++++++++----------- include/linux/skmsg.h | 8 +++----- include/linux/slab.h | 5 +++++ include/linux/sockptr.h | 10 ++++++---- include/net/netlabel.h | 16 +++++++++------ include/net/netlink.h | 5 +++-- include/net/request_sock.h | 5 +++-- include/net/tcx.h | 5 +++-- net/sunrpc/auth_gss/auth_gss_internal.h | 6 ++++-- 26 files changed, 142 insertions(+), 143 deletions(-) (limited to 'include/linux/bpf.h') diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index f482aab420f7..52575ba9a141 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -134,13 +134,14 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev) return PCI_SEG_DEVID_TO_SBDF(seg, devid); } -static inline void *alloc_pgtable_page(int nid, gfp_t gfp) +static inline void *alloc_pgtable_page_noprof(int nid, gfp_t gfp) { struct page *page; - page = alloc_pages_node(nid, gfp | __GFP_ZERO, 0); + page = alloc_pages_node_noprof(nid, gfp | __GFP_ZERO, 0); return page ? page_address(page) : NULL; } +#define alloc_pgtable_page(...) alloc_hooks(alloc_pgtable_page_noprof(__VA_ARGS__)) /* * This must be called after device probe completes. During probe diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index 5aa776b5a3e7..b17a9eb9b148 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h @@ -46,10 +46,7 @@ static inline void nfs_add_stats(const struct inode *inode, nfs_add_server_stats(NFS_SERVER(inode), stat, addend); } -static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void) -{ - return alloc_percpu(struct nfs_iostats); -} +#define nfs_alloc_iostats() alloc_percpu(struct nfs_iostats) static inline void nfs_free_iostats(struct nfs_iostats __percpu *stats) { diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h index 600d4e2641da..62cac266a1c8 100644 --- a/include/acpi/platform/aclinuxex.h +++ b/include/acpi/platform/aclinuxex.h @@ -47,26 +47,19 @@ acpi_status acpi_os_terminate(void); * However, boot has (system_state != SYSTEM_RUNNING) * to quiet __might_sleep() in kmalloc() and resume does not. */ -static inline void *acpi_os_allocate(acpi_size size) -{ - return kmalloc(size, irqs_disabled()? GFP_ATOMIC : GFP_KERNEL); -} +#define acpi_os_allocate(_size) \ + kmalloc(_size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL) -static inline void *acpi_os_allocate_zeroed(acpi_size size) -{ - return kzalloc(size, irqs_disabled()? GFP_ATOMIC : GFP_KERNEL); -} +#define acpi_os_allocate_zeroed(_size) \ + kzalloc(_size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL) static inline void acpi_os_free(void *memory) { kfree(memory); } -static inline void *acpi_os_acquire_object(acpi_cache_t * cache) -{ - return kmem_cache_zalloc(cache, - irqs_disabled()? GFP_ATOMIC : GFP_KERNEL); -} +#define acpi_os_acquire_object(_cache) \ + kmem_cache_zalloc(_cache, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL) static inline acpi_thread_id acpi_os_get_thread_id(void) { diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 879e5f8aa5e9..7c48f5fbf8aa 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -16,15 +16,16 @@ * * Return: pointer to the allocated memory or %NULL on error */ -static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm) +static inline pte_t *__pte_alloc_one_kernel_noprof(struct mm_struct *mm) { - struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & + struct ptdesc *ptdesc = pagetable_alloc_noprof(GFP_PGTABLE_KERNEL & ~__GFP_HIGHMEM, 0); if (!ptdesc) return NULL; return ptdesc_address(ptdesc); } +#define __pte_alloc_one_kernel(...) alloc_hooks(__pte_alloc_one_kernel_noprof(__VA_ARGS__)) #ifndef __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL /** @@ -33,10 +34,11 @@ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm) * * Return: pointer to the allocated memory or %NULL on error */ -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) +static inline pte_t *pte_alloc_one_kernel_noprof(struct mm_struct *mm) { - return __pte_alloc_one_kernel(mm); + return __pte_alloc_one_kernel_noprof(mm); } +#define pte_alloc_one_kernel(...) alloc_hooks(pte_alloc_one_kernel_noprof(__VA_ARGS__)) #endif /** @@ -61,11 +63,11 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) * * Return: `struct page` referencing the ptdesc or %NULL on error */ -static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) +static inline pgtable_t __pte_alloc_one_noprof(struct mm_struct *mm, gfp_t gfp) { struct ptdesc *ptdesc; - ptdesc = pagetable_alloc(gfp, 0); + ptdesc = pagetable_alloc_noprof(gfp, 0); if (!ptdesc) return NULL; if (!pagetable_pte_ctor(ptdesc)) { @@ -75,6 +77,7 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) return ptdesc_page(ptdesc); } +#define __pte_alloc_one(...) alloc_hooks(__pte_alloc_one_noprof(__VA_ARGS__)) #ifndef __HAVE_ARCH_PTE_ALLOC_ONE /** @@ -85,10 +88,11 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp) * * Return: `struct page` referencing the ptdesc or %NULL on error */ -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) +static inline pgtable_t pte_alloc_one_noprof(struct mm_struct *mm) { - return __pte_alloc_one(mm, GFP_PGTABLE_USER); + return __pte_alloc_one_noprof(mm, GFP_PGTABLE_USER); } +#define pte_alloc_one(...) alloc_hooks(pte_alloc_one_noprof(__VA_ARGS__)) #endif /* @@ -124,14 +128,14 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page) * * Return: pointer to the allocated memory or %NULL on error */ -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +static inline pmd_t *pmd_alloc_one_noprof(struct mm_struct *mm, unsigned long addr) { struct ptdesc *ptdesc; gfp_t gfp = GFP_PGTABLE_USER; if (mm == &init_mm) gfp = GFP_PGTABLE_KERNEL; - ptdesc = pagetable_alloc(gfp, 0); + ptdesc = pagetable_alloc_noprof(gfp, 0); if (!ptdesc) return NULL; if (!pagetable_pmd_ctor(ptdesc)) { @@ -140,6 +144,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) } return ptdesc_address(ptdesc); } +#define pmd_alloc_one(...) alloc_hooks(pmd_alloc_one_noprof(__VA_ARGS__)) #endif #ifndef __HAVE_ARCH_PMD_FREE @@ -157,7 +162,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #if CONFIG_PGTABLE_LEVELS > 3 -static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) +static inline pud_t *__pud_alloc_one_noprof(struct mm_struct *mm, unsigned long addr) { gfp_t gfp = GFP_PGTABLE_USER; struct ptdesc *ptdesc; @@ -166,13 +171,14 @@ static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) gfp = GFP_PGTABLE_KERNEL; gfp &= ~__GFP_HIGHMEM; - ptdesc = pagetable_alloc(gfp, 0); + ptdesc = pagetable_alloc_noprof(gfp, 0); if (!ptdesc) return NULL; pagetable_pud_ctor(ptdesc); return ptdesc_address(ptdesc); } +#define __pud_alloc_one(...) alloc_hooks(__pud_alloc_one_noprof(__VA_ARGS__)) #ifndef __HAVE_ARCH_PUD_ALLOC_ONE /** @@ -184,10 +190,11 @@ static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) * * Return: pointer to the allocated memory or %NULL on error */ -static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +static inline pud_t *pud_alloc_one_noprof(struct mm_struct *mm, unsigned long addr) { - return __pud_alloc_one(mm, addr); + return __pud_alloc_one_noprof(mm, addr); } +#define pud_alloc_one(...) alloc_hooks(pud_alloc_one_noprof(__VA_ARGS__)) #endif static inline void __pud_free(struct mm_struct *mm, pud_t *pud) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 5d61f576cfc8..e5181cc9b7c5 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -578,19 +578,20 @@ static inline void ahash_request_set_tfm(struct ahash_request *req, * * Return: allocated request handle in case of success, or NULL if out of memory */ -static inline struct ahash_request *ahash_request_alloc( +static inline struct ahash_request *ahash_request_alloc_noprof( struct crypto_ahash *tfm, gfp_t gfp) { struct ahash_request *req; - req = kmalloc(sizeof(struct ahash_request) + - crypto_ahash_reqsize(tfm), gfp); + req = kmalloc_noprof(sizeof(struct ahash_request) + + crypto_ahash_reqsize(tfm), gfp); if (likely(req)) ahash_request_set_tfm(req, tfm); return req; } +#define ahash_request_alloc(...) alloc_hooks(ahash_request_alloc_noprof(__VA_ARGS__)) /** * ahash_request_free() - zeroize and free the request data structure diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index 4ac46bafba9d..2a67793f52ad 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -69,15 +69,16 @@ static inline void acomp_request_complete(struct acomp_req *req, crypto_request_complete(&req->base, err); } -static inline struct acomp_req *__acomp_request_alloc(struct crypto_acomp *tfm) +static inline struct acomp_req *__acomp_request_alloc_noprof(struct crypto_acomp *tfm) { struct acomp_req *req; - req = kzalloc(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL); + req = kzalloc_noprof(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL); if (likely(req)) acomp_request_set_tfm(req, tfm); return req; } +#define __acomp_request_alloc(...) alloc_hooks(__acomp_request_alloc_noprof(__VA_ARGS__)) static inline void __acomp_request_free(struct acomp_req *req) { diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h index c8857d7bdb37..6c5330e316b0 100644 --- a/include/crypto/skcipher.h +++ b/include/crypto/skcipher.h @@ -861,19 +861,20 @@ static inline struct skcipher_request *skcipher_request_cast( * * Return: allocated request handle in case of success, or NULL if out of memory */ -static inline struct skcipher_request *skcipher_request_alloc( +static inline struct skcipher_request *skcipher_request_alloc_noprof( struct crypto_skcipher *tfm, gfp_t gfp) { struct skcipher_request *req; - req = kmalloc(sizeof(struct skcipher_request) + - crypto_skcipher_reqsize(tfm), gfp); + req = kmalloc_noprof(sizeof(struct skcipher_request) + + crypto_skcipher_reqsize(tfm), gfp); if (likely(req)) skcipher_request_set_tfm(req, tfm); return req; } +#define skcipher_request_alloc(...) alloc_hooks(skcipher_request_alloc_noprof(__VA_ARGS__)) /** * skcipher_request_free() - zeroize and free request data structure diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 890e152d553e..a86da1f38b3c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2244,31 +2244,14 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags); #else -static inline void * -bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, - int node) -{ - return kmalloc_node(size, flags, node); -} - -static inline void * -bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) -{ - return kzalloc(size, flags); -} - -static inline void * -bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, gfp_t flags) -{ - return kvcalloc(n, size, flags); -} - -static inline void __percpu * -bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, - gfp_t flags) -{ - return __alloc_percpu_gfp(size, align, flags); -} +#define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ + kmalloc_node(_size, _flags, _node) +#define bpf_map_kzalloc(_map, _size, _flags) \ + kzalloc(_size, _flags) +#define bpf_map_kvcalloc(_map, _n, _size, _flags) \ + kvcalloc(_n, _size, _flags) +#define bpf_map_alloc_percpu(_map, _size, _align, _flags) \ + __alloc_percpu_gfp(_size, _align, _flags) #endif static inline int diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h index 79b2f78eec1a..1af241525a17 100644 --- a/include/linux/bpfptr.h +++ b/include/linux/bpfptr.h @@ -65,9 +65,9 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset, return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size); } -static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len) +static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len) { - void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN); + void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); @@ -77,6 +77,7 @@ static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len) } return p; } +#define kvmemdup_bpfptr(...) alloc_hooks(kvmemdup_bpfptr_noprof(__VA_ARGS__)) static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count) { diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index 4bdf0b96da28..ad9e2506c2f4 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -86,10 +86,8 @@ dma_fence_chain_contained(struct dma_fence *fence) * * Returns a new struct dma_fence_chain object or NULL on failure. */ -static inline struct dma_fence_chain *dma_fence_chain_alloc(void) -{ - return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); -}; +#define dma_fence_chain_alloc() \ + ((struct dma_fence_chain *)kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL)) /** * dma_fence_chain_free diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 7118ac28d468..ca70eeb6393e 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -149,10 +149,8 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} static inline void hid_bpf_device_init(struct hid_device *hid) {} -static inline u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size) -{ - return kmemdup(rdesc, *size, GFP_KERNEL); -} +#define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size) \ + ((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL)) #endif /* CONFIG_HID_BPF */ diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 971f3e826e15..9512fe332668 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1586,10 +1586,8 @@ void jbd2_journal_put_journal_head(struct journal_head *jh); */ extern struct kmem_cache *jbd2_handle_cache; -static inline handle_t *jbd2_alloc_handle(gfp_t gfp_flags) -{ - return kmem_cache_zalloc(jbd2_handle_cache, gfp_flags); -} +#define jbd2_alloc_handle(_gfp_flags) \ + ((handle_t *)kmem_cache_zalloc(jbd2_handle_cache, _gfp_flags)) static inline void jbd2_free_handle(handle_t *handle) { @@ -1602,10 +1600,8 @@ static inline void jbd2_free_handle(handle_t *handle) */ extern struct kmem_cache *jbd2_inode_cache; -static inline struct jbd2_inode *jbd2_alloc_inode(gfp_t gfp_flags) -{ - return kmem_cache_alloc(jbd2_inode_cache, gfp_flags); -} +#define jbd2_alloc_inode(_gfp_flags) \ + ((struct jbd2_inode *)kmem_cache_alloc(jbd2_inode_cache, _gfp_flags)) static inline void jbd2_free_inode(struct jbd2_inode *jinode) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 0dbd737cfabd..60eabc6c8e00 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2860,12 +2860,13 @@ static inline bool pagetable_is_reserved(struct ptdesc *pt) * * Return: The ptdesc describing the allocated page tables. */ -static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order) +static inline struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order) { - struct page *page = alloc_pages(gfp | __GFP_COMP, order); + struct page *page = alloc_pages_noprof(gfp | __GFP_COMP, order); return page_ptdesc(page); } +#define pagetable_alloc(...) alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__)) /** * pagetable_free - Free pagetables diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4ae4684d1add..6fef56938a17 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1170,14 +1170,15 @@ static inline void mm_init_cid(struct mm_struct *mm) cpumask_clear(mm_cidmask(mm)); } -static inline int mm_alloc_cid(struct mm_struct *mm) +static inline int mm_alloc_cid_noprof(struct mm_struct *mm) { - mm->pcpu_cid = alloc_percpu(struct mm_cid); + mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid); if (!mm->pcpu_cid) return -ENOMEM; mm_init_cid(mm); return 0; } +#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__)) static inline void mm_destroy_cid(struct mm_struct *mm) { diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 13a82f11e4fd..03053de557cf 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -151,6 +151,9 @@ extern size_t pcpu_alloc_size(void __percpu *__pdata); #define alloc_percpu(type) \ (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ __alignof__(type)) +#define alloc_percpu_noprof(type) \ + ((typeof(type) __percpu *)pcpu_alloc_noprof(sizeof(type), \ + __alignof__(type), false, GFP_KERNEL)) extern void free_percpu(void __percpu *__pdata); diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 808f9d3ee546..fd037c127bb0 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -464,11 +464,11 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r, /* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See * documentation for vmalloc for which of them are legal. */ -static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp) +static inline void **__ptr_ring_init_queue_alloc_noprof(unsigned int size, gfp_t gfp) { if (size > KMALLOC_MAX_SIZE / sizeof(void *)) return NULL; - return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO); + return kvmalloc_array_noprof(size, sizeof(void *), gfp | __GFP_ZERO); } static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) @@ -484,9 +484,9 @@ static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) r->batch = 1; } -static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) +static inline int ptr_ring_init_noprof(struct ptr_ring *r, int size, gfp_t gfp) { - r->queue = __ptr_ring_init_queue_alloc(size, gfp); + r->queue = __ptr_ring_init_queue_alloc_noprof(size, gfp); if (!r->queue) return -ENOMEM; @@ -497,6 +497,7 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) return 0; } +#define ptr_ring_init(...) alloc_hooks(ptr_ring_init_noprof(__VA_ARGS__)) /* * Return entries into ring. Destroy entries that don't fit. @@ -587,11 +588,11 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ -static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, +static inline int ptr_ring_resize_noprof(struct ptr_ring *r, int size, gfp_t gfp, void (*destroy)(void *)) { unsigned long flags; - void **queue = __ptr_ring_init_queue_alloc(size, gfp); + void **queue = __ptr_ring_init_queue_alloc_noprof(size, gfp); void **old; if (!queue) @@ -609,6 +610,7 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, return 0; } +#define ptr_ring_resize(...) alloc_hooks(ptr_ring_resize_noprof(__VA_ARGS__)) /* * Note: producer lock is nested within consumer lock, so if you @@ -616,21 +618,21 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, * In particular if you consume ring in interrupt or BH context, you must * disable interrupts/BH when doing so. */ -static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, - unsigned int nrings, - int size, - gfp_t gfp, void (*destroy)(void *)) +static inline int ptr_ring_resize_multiple_noprof(struct ptr_ring **rings, + unsigned int nrings, + int size, + gfp_t gfp, void (*destroy)(void *)) { unsigned long flags; void ***queues; int i; - queues = kmalloc_array(nrings, sizeof(*queues), gfp); + queues = kmalloc_array_noprof(nrings, sizeof(*queues), gfp); if (!queues) goto noqueues; for (i = 0; i < nrings; ++i) { - queues[i] = __ptr_ring_init_queue_alloc(size, gfp); + queues[i] = __ptr_ring_init_queue_alloc_noprof(size, gfp); if (!queues[i]) goto nomem; } @@ -660,6 +662,8 @@ nomem: noqueues: return -ENOMEM; } +#define ptr_ring_resize_multiple(...) \ + alloc_hooks(ptr_ring_resize_multiple_noprof(__VA_ARGS__)) static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) { diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index e2d45b7cb619..926496c9cc9c 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -177,10 +177,11 @@ static inline int skb_array_peek_len_any(struct skb_array *a) return PTR_RING_PEEK_CALL_ANY(&a->ring, __skb_array_len_with_tag); } -static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp) +static inline int skb_array_init_noprof(struct skb_array *a, int size, gfp_t gfp) { - return ptr_ring_init(&a->ring, size, gfp); + return ptr_ring_init_noprof(&a->ring, size, gfp); } +#define skb_array_init(...) alloc_hooks(skb_array_init_noprof(__VA_ARGS__)) static void __skb_array_destroy_skb(void *ptr) { @@ -198,15 +199,17 @@ static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); } -static inline int skb_array_resize_multiple(struct skb_array **rings, - int nrings, unsigned int size, - gfp_t gfp) +static inline int skb_array_resize_multiple_noprof(struct skb_array **rings, + int nrings, unsigned int size, + gfp_t gfp) { BUILD_BUG_ON(offsetof(struct skb_array, ring)); - return ptr_ring_resize_multiple((struct ptr_ring **)rings, - nrings, size, gfp, - __skb_array_destroy_skb); + return ptr_ring_resize_multiple_noprof((struct ptr_ring **)rings, + nrings, size, gfp, + __skb_array_destroy_skb); } +#define skb_array_resize_multiple(...) \ + alloc_hooks(skb_array_resize_multiple_noprof(__VA_ARGS__)) static inline void skb_array_cleanup(struct skb_array *a) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9d24aec064e8..b72920c9887b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3371,7 +3371,7 @@ void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason); * * %NULL is returned if there is no free memory. */ -static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, +static inline struct page *__dev_alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { /* This piece of code contains several assumptions. @@ -3384,13 +3384,11 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, */ gfp_mask |= __GFP_COMP | __GFP_MEMALLOC; - return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); + return alloc_pages_node_noprof(NUMA_NO_NODE, gfp_mask, order); } +#define __dev_alloc_pages(...) alloc_hooks(__dev_alloc_pages_noprof(__VA_ARGS__)) -static inline struct page *dev_alloc_pages(unsigned int order) -{ - return __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, order); -} +#define dev_alloc_pages(_order) __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, _order) /** * __dev_alloc_page - allocate a page for network Rx @@ -3400,15 +3398,13 @@ static inline struct page *dev_alloc_pages(unsigned int order) * * %NULL is returned if there is no free memory. */ -static inline struct page *__dev_alloc_page(gfp_t gfp_mask) +static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask) { - return __dev_alloc_pages(gfp_mask, 0); + return __dev_alloc_pages_noprof(gfp_mask, 0); } +#define __dev_alloc_page(...) alloc_hooks(__dev_alloc_page_noprof(__VA_ARGS__)) -static inline struct page *dev_alloc_page(void) -{ - return dev_alloc_pages(0); -} +#define dev_alloc_page() dev_alloc_pages(0) /** * dev_page_is_reusable - check whether a page can be reused for network Rx diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index e65ec3fd2799..78efc5b20284 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -410,11 +410,9 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock); int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg); -static inline struct sk_psock_link *sk_psock_init_link(void) -{ - return kzalloc(sizeof(struct sk_psock_link), - GFP_ATOMIC | __GFP_NOWARN); -} +#define sk_psock_init_link() \ + ((struct sk_psock_link *)kzalloc(sizeof(struct sk_psock_link), \ + GFP_ATOMIC | __GFP_NOWARN)) static inline void sk_psock_free_link(struct sk_psock_link *link) { diff --git a/include/linux/slab.h b/include/linux/slab.h index c0be1cd03cf6..4cc37ef22aae 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -744,6 +744,9 @@ void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node, */ #define kmalloc_track_caller(...) kmalloc_node_track_caller(__VA_ARGS__, NUMA_NO_NODE) +#define kmalloc_track_caller_noprof(...) \ + kmalloc_node_track_caller_noprof(__VA_ARGS__, NUMA_NO_NODE, _RET_IP_) + static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) { @@ -781,6 +784,7 @@ extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_si #define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) #define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) +#define kvmalloc_noprof(_size, _flags) kvmalloc_node_noprof(_size, _flags, NUMA_NO_NODE) #define kvzalloc(_size, _flags) kvmalloc(_size, _flags|__GFP_ZERO) #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, _flags|__GFP_ZERO, _node) @@ -797,6 +801,7 @@ static inline __alloc_size(1, 2) void *kvmalloc_array_noprof(size_t n, size_t si #define kvmalloc_array(...) alloc_hooks(kvmalloc_array_noprof(__VA_ARGS__)) #define kvcalloc(_n, _size, _flags) kvmalloc_array(_n, _size, _flags|__GFP_ZERO) +#define kvcalloc_noprof(_n, _size, _flags) kvmalloc_array_noprof(_n, _size, _flags|__GFP_ZERO) extern void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flags) __realloc_size(3); diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 317200cd3a60..fc5a206c4043 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -117,9 +117,9 @@ static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size) return copy_to_sockptr_offset(dst, 0, src, size); } -static inline void *memdup_sockptr(sockptr_t src, size_t len) +static inline void *memdup_sockptr_noprof(sockptr_t src, size_t len) { - void *p = kmalloc_track_caller(len, GFP_USER | __GFP_NOWARN); + void *p = kmalloc_track_caller_noprof(len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); @@ -129,10 +129,11 @@ static inline void *memdup_sockptr(sockptr_t src, size_t len) } return p; } +#define memdup_sockptr(...) alloc_hooks(memdup_sockptr_noprof(__VA_ARGS__)) -static inline void *memdup_sockptr_nul(sockptr_t src, size_t len) +static inline void *memdup_sockptr_nul_noprof(sockptr_t src, size_t len) { - char *p = kmalloc_track_caller(len + 1, GFP_KERNEL); + char *p = kmalloc_track_caller_noprof(len + 1, GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); @@ -143,6 +144,7 @@ static inline void *memdup_sockptr_nul(sockptr_t src, size_t len) p[len] = '\0'; return p; } +#define memdup_sockptr_nul(...) alloc_hooks(memdup_sockptr_nul_noprof(__VA_ARGS__)) static inline long strncpy_from_sockptr(char *dst, sockptr_t src, size_t count) { diff --git a/include/net/netlabel.h b/include/net/netlabel.h index f3ab0b8a4b18..c31bd96dafdb 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -274,15 +274,17 @@ struct netlbl_calipso_ops { * on success, NULL on failure. * */ -static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags) +static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc_noprof(gfp_t flags) { struct netlbl_lsm_cache *cache; - cache = kzalloc(sizeof(*cache), flags); + cache = kzalloc_noprof(sizeof(*cache), flags); if (cache) refcount_set(&cache->refcount, 1); return cache; } +#define netlbl_secattr_cache_alloc(...) \ + alloc_hooks(netlbl_secattr_cache_alloc_noprof(__VA_ARGS__)) /** * netlbl_secattr_cache_free - Frees a netlbl_lsm_cache struct @@ -311,10 +313,11 @@ static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache) * on failure. * */ -static inline struct netlbl_lsm_catmap *netlbl_catmap_alloc(gfp_t flags) +static inline struct netlbl_lsm_catmap *netlbl_catmap_alloc_noprof(gfp_t flags) { - return kzalloc(sizeof(struct netlbl_lsm_catmap), flags); + return kzalloc_noprof(sizeof(struct netlbl_lsm_catmap), flags); } +#define netlbl_catmap_alloc(...) alloc_hooks(netlbl_catmap_alloc_noprof(__VA_ARGS__)) /** * netlbl_catmap_free - Free a LSM secattr catmap @@ -376,10 +379,11 @@ static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr) * pointer on success, or NULL on failure. * */ -static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(gfp_t flags) +static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc_noprof(gfp_t flags) { - return kzalloc(sizeof(struct netlbl_lsm_secattr), flags); + return kzalloc_noprof(sizeof(struct netlbl_lsm_secattr), flags); } +#define netlbl_secattr_alloc(...) alloc_hooks(netlbl_secattr_alloc_noprof(__VA_ARGS__)) /** * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct diff --git a/include/net/netlink.h b/include/net/netlink.h index c19ff921b661..972b5484fa6f 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1891,10 +1891,11 @@ static inline struct nla_bitfield32 nla_get_bitfield32(const struct nlattr *nla) * @src: netlink attribute to duplicate from * @gfp: GFP mask */ -static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp) +static inline void *nla_memdup_noprof(const struct nlattr *src, gfp_t gfp) { - return kmemdup(nla_data(src), nla_len(src), gfp); + return kmemdup_noprof(nla_data(src), nla_len(src), gfp); } +#define nla_memdup(...) alloc_hooks(nla_memdup_noprof(__VA_ARGS__)) /** * nla_nest_start_noflag - Start a new level of nested attributes diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 004e651e6067..29495c331d20 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -127,12 +127,12 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb, } static inline struct request_sock * -reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, +reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener) { struct request_sock *req; - req = kmem_cache_alloc(ops->slab, GFP_ATOMIC | __GFP_NOWARN); + req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN); if (!req) return NULL; req->rsk_listener = NULL; @@ -157,6 +157,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, return req; } +#define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__)) static inline void __reqsk_free(struct request_sock *req) { diff --git a/include/net/tcx.h b/include/net/tcx.h index 04be9377785d..72a3e75e539f 100644 --- a/include/net/tcx.h +++ b/include/net/tcx.h @@ -75,9 +75,9 @@ tcx_entry_fetch(struct net_device *dev, bool ingress) return rcu_dereference_rtnl(dev->tcx_egress); } -static inline struct bpf_mprog_entry *tcx_entry_create(void) +static inline struct bpf_mprog_entry *tcx_entry_create_noprof(void) { - struct tcx_entry *tcx = kzalloc(sizeof(*tcx), GFP_KERNEL); + struct tcx_entry *tcx = kzalloc_noprof(sizeof(*tcx), GFP_KERNEL); if (tcx) { bpf_mprog_bundle_init(&tcx->bundle); @@ -85,6 +85,7 @@ static inline struct bpf_mprog_entry *tcx_entry_create(void) } return NULL; } +#define tcx_entry_create(...) alloc_hooks(tcx_entry_create_noprof(__VA_ARGS__)) static inline void tcx_entry_free(struct bpf_mprog_entry *entry) { diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h index c53b329092d4..4ebc1b7043d9 100644 --- a/net/sunrpc/auth_gss/auth_gss_internal.h +++ b/net/sunrpc/auth_gss/auth_gss_internal.h @@ -23,7 +23,7 @@ simple_get_bytes(const void *p, const void *end, void *res, size_t len) } static inline const void * -simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) +simple_get_netobj_noprof(const void *p, const void *end, struct xdr_netobj *dest) { const void *q; unsigned int len; @@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); if (len) { - dest->data = kmemdup(p, len, GFP_KERNEL); + dest->data = kmemdup_noprof(p, len, GFP_KERNEL); if (unlikely(dest->data == NULL)) return ERR_PTR(-ENOMEM); } else @@ -43,3 +43,5 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) dest->len = len; return q; } + +#define simple_get_netobj(...) alloc_hooks(simple_get_netobj_noprof(__VA_ARGS__)) -- cgit v1.2.3