diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-05-24 02:07:13 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-05-24 02:07:14 +0300 |
commit | 1ef0736c0711e2633a59b540931406de626f2836 (patch) | |
tree | 3c230f459eda15a2263bb1fb9ac99eb0f79d802b | |
parent | 9fa87dd23251574a29cf948fd16cf39075762f3e (diff) | |
parent | 608b638ebf368f18431f47bbbd0d93828cbbdf83 (diff) | |
download | linux-1ef0736c0711e2633a59b540931406de626f2836.tar.xz |
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2022-05-23
We've added 113 non-merge commits during the last 26 day(s) which contain
a total of 121 files changed, 7425 insertions(+), 1586 deletions(-).
The main changes are:
1) Speed up symbol resolution for kprobes multi-link attachments, from Jiri Olsa.
2) Add BPF dynamic pointer infrastructure e.g. to allow for dynamically sized ringbuf
reservations without extra memory copies, from Joanne Koong.
3) Big batch of libbpf improvements towards libbpf 1.0 release, from Andrii Nakryiko.
4) Add BPF link iterator to traverse links via seq_file ops, from Dmitrii Dolgov.
5) Add source IP address to BPF tunnel key infrastructure, from Kaixi Fan.
6) Refine unprivileged BPF to disable only object-creating commands, from Alan Maguire.
7) Fix JIT blinding of ld_imm64 when they point to subprogs, from Alexei Starovoitov.
8) Add BPF access to mptcp_sock structures and their meta data, from Geliang Tang.
9) Add new BPF helper for access to remote CPU's BPF map elements, from Feng Zhou.
10) Allow attaching 64-bit cookie to BPF link of fentry/fexit/fmod_ret, from Kui-Feng Lee.
11) Follow-ups to typed pointer support in BPF maps, from Kumar Kartikeya Dwivedi.
12) Add busy-poll test cases to the XSK selftest suite, from Magnus Karlsson.
13) Improvements in BPF selftest test_progs subtest output, from Mykola Lysenko.
14) Fill bpf_prog_pack allocator areas with illegal instructions, from Song Liu.
15) Add generic batch operations for BPF map-in-map cases, from Takshak Chahande.
16) Make bpf_jit_enable more user friendly when permanently on 1, from Tiezhu Yang.
17) Fix an array overflow in bpf_trampoline_get_progs(), from Yuntao Wang.
====================
Link: https://lore.kernel.org/r/20220523223805.27931-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
121 files changed, 7422 insertions, 1582 deletions
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 5300837ac2c9..1de6a57c7e1e 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -130,7 +130,7 @@ Byte swap instructions The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit code field of ``BPF_END``. -The byte swap instructions instructions operate on the destination register +The byte swap instructions operate on the destination register only and do not use a separate source register or immediate value. The 1-bit source operand field in the opcode is used to to select what byte @@ -157,7 +157,7 @@ Examples: dst_reg = htobe64(dst_reg) ``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and -``BPF_TO_LE`` respetively. +``BPF_TO_BE`` respectively. Jump instructions diff --git a/MAINTAINERS b/MAINTAINERS index 234380c959c8..a73058b5cc3a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13799,6 +13799,7 @@ F: include/net/mptcp.h F: include/trace/events/mptcp.h F: include/uapi/linux/mptcp.h F: net/mptcp/ +F: tools/testing/selftests/bpf/*/*mptcp*.c F: tools/testing/selftests/net/mptcp/ NETWORKING [TCP] @@ -21551,6 +21552,7 @@ K: (?:\b|_)xdp(?:\b|_) XDP SOCKETS (AF_XDP) M: Björn Töpel <bjorn@kernel.org> M: Magnus Karlsson <magnus.karlsson@intel.com> +M: Maciej Fijalkowski <maciej.fijalkowski@intel.com> R: Jonathan Lemon <jonathan.lemon@gmail.com> L: netdev@vger.kernel.org L: bpf@vger.kernel.org diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index aede9a3ca3f7..af35052d06ed 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1809,7 +1809,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* * Three initial passes: * - 1/2: Determine clobbered registers - * - 3: Calculate program size and addrs arrray + * - 3: Calculate program size and addrs array */ for (pass = 1; pass <= 3; pass++) { if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index d20ab0921480..1cc15528ce29 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -45,6 +45,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern void *text_poke_copy(void *addr, const void *opcode, size_t len); +extern void *text_poke_set(void *addr, int c, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d374cb3cf024..7563b5bc8328 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -994,7 +994,21 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state) __ro_after_init struct mm_struct *poking_mm; __ro_after_init unsigned long poking_addr; -static void *__text_poke(void *addr, const void *opcode, size_t len) +static void text_poke_memcpy(void *dst, const void *src, size_t len) +{ + memcpy(dst, src, len); +} + +static void text_poke_memset(void *dst, const void *src, size_t len) +{ + int c = *(const int *)src; + + memset(dst, c, len); +} + +typedef void text_poke_f(void *dst, const void *src, size_t len); + +static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t len) { bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE; struct page *pages[2] = {NULL}; @@ -1059,7 +1073,7 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) prev = use_temporary_mm(poking_mm); kasan_disable_current(); - memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len); + func((u8 *)poking_addr + offset_in_page(addr), src, len); kasan_enable_current(); /* @@ -1087,11 +1101,13 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) (cross_page_boundary ? 2 : 1) * PAGE_SIZE, PAGE_SHIFT, false); - /* - * If the text does not match what we just wrote then something is - * fundamentally screwy; there's nothing we can really do about that. - */ - BUG_ON(memcmp(addr, opcode, len)); + if (func == text_poke_memcpy) { + /* + * If the text does not match what we just wrote then something is + * fundamentally screwy; there's nothing we can really do about that. + */ + BUG_ON(memcmp(addr, src, len)); + } local_irq_restore(flags); pte_unmap_unlock(ptep, ptl); @@ -1118,7 +1134,7 @@ void *text_poke(void *addr, const void *opcode, size_t len) { lockdep_assert_held(&text_mutex); - return __text_poke(addr, opcode, len); + return __text_poke(text_poke_memcpy, addr, opcode, len); } /** @@ -1137,7 +1153,7 @@ void *text_poke(void *addr, const void *opcode, size_t len) */ void *text_poke_kgdb(void *addr, const void *opcode, size_t len) { - return __text_poke(addr, opcode, len); + return __text_poke(text_poke_memcpy, addr, opcode, len); } /** @@ -1167,7 +1183,38 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len) s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); - __text_poke((void *)ptr, opcode + patched, s); + __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s); + patched += s; + } + mutex_unlock(&text_mutex); + return addr; +} + +/** + * text_poke_set - memset into (an unused part of) RX memory + * @addr: address to modify + * @c: the byte to fill the area with + * @len: length to copy, could be more than 2x PAGE_SIZE + * + * This is useful to overwrite unused regions of RX memory with illegal + * instructions. + */ +void *text_poke_set(void *addr, int c, size_t len) +{ + unsigned long start = (unsigned long)addr; + size_t patched = 0; + + if (WARN_ON_ONCE(core_kernel_text(start))) + return NULL; + + mutex_lock(&text_mutex); + while (patched < len) { + unsigned long ptr = start + patched; + size_t s; + + s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); + + __text_poke(text_poke_memset, (void *)ptr, (void *)&c, s); patched += s; } mutex_unlock(&text_mutex); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 16b6efacf7c6..f298b18a9a3d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -228,6 +228,11 @@ static void jit_fill_hole(void *area, unsigned int size) memset(area, 0xcc, size); } +int bpf_arch_text_invalidate(void *dst, size_t len) +{ + return IS_ERR_OR_NULL(text_poke_set(dst, 0xcc, len)); +} + struct jit_context { int cleanup_addr; /* Epilogue code offset */ @@ -1762,13 +1767,32 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog *p, int stack_size, bool save_ret) + struct bpf_tramp_link *l, int stack_size, + int run_ctx_off, bool save_ret) { u8 *prog = *pprog; u8 *jmp_insn; + int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); + struct bpf_prog *p = l->link.prog; + u64 cookie = l->cookie; + + /* mov rdi, cookie */ + emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie); + + /* Prepare struct bpf_tramp_run_ctx. + * + * bpf_tramp_run_ctx is already preserved by + * arch_prepare_bpf_trampoline(). + * + * mov QWORD PTR [rbp - run_ctx_off + ctx_cookie_off], rdi + */ + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off); /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); + /* arg2: lea rsi, [rbp - ctx_cookie_off] */ + EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); + if (emit_call(&prog, p->aux->sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter, prog)) @@ -1814,6 +1838,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); /* arg2: mov rsi, rbx <- start time in nsec */ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); + /* arg3: lea rdx, [rbp - run_ctx_off] */ + EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); if (emit_call(&prog, p->aux->sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit, prog)) @@ -1850,15 +1876,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) } static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size, - bool save_ret) + struct bpf_tramp_links *tl, int stack_size, + int run_ctx_off, bool save_ret) { int i; u8 *prog = *pprog; - for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, - save_ret)) + for (i = 0; i < tl->nr_links; i++) { + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, + run_ctx_off, save_ret)) return -EINVAL; } *pprog = prog; @@ -1866,8 +1892,8 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, } static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size, - u8 **branches) + struct bpf_tramp_links *tl, int stack_size, + int run_ctx_off, u8 **branches) { u8 *prog = *pprog; int i; @@ -1877,8 +1903,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, */ emit_mov_imm32(&prog, false, BPF_REG_0, 0); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); - for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true)) + for (i = 0; i < tl->nr_links; i++) { + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true)) return -EINVAL; /* mod_ret prog stored return value into [rbp - 8]. Emit: @@ -1980,14 +2006,14 @@ static bool is_valid_bpf_tramp_flags(unsigned int flags) */ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call) { int ret, i, nr_args = m->nr_args; - int regs_off, ip_off, args_off, stack_size = nr_args * 8; - struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; - struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; - struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; + int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; u8 **branches = NULL; u8 *prog; bool save_ret; @@ -2014,6 +2040,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * RBP - args_off [ args count ] always * * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag + * + * RBP - run_ctx_off [ bpf_tramp_run_ctx ] */ /* room for return value of orig_call or fentry prog */ @@ -2032,6 +2060,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ip_off = stack_size; + stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7; + run_ctx_off = stack_size; + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip patched call instruction and point orig_call to actual * body of the kernel function. @@ -2078,19 +2109,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } } - if (fentry->nr_progs) - if (invoke_bpf(m, &prog, fentry, regs_off, + if (fentry->nr_links) + if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET)) return -EINVAL; - if (fmod_ret->nr_progs) { - branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *), + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *), GFP_KERNEL); if (!branches) return -ENOMEM; if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off, - branches)) { + run_ctx_off, branches)) { ret = -EINVAL; goto cleanup; } @@ -2111,7 +2142,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i prog += X86_PATCH_SIZE; } - if (fmod_ret->nr_progs) { + if (fmod_ret->nr_links) { /* From Intel 64 and IA-32 Architectures Optimization * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler * Coding Rule 11: All branch targets should be 16-byte @@ -2121,13 +2152,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i /* Update the branches saved in invoke_bpf_mod_ret with the * aligned address of do_fexit. */ - for (i = 0; i < fmod_ret->nr_progs; i++) + for (i = 0; i < fmod_ret->nr_links; i++) emit_cond_near_jump(&branches[i], prog, branches[i], X86_JNE); } - if (fexit->nr_progs) - if (invoke_bpf(m, &prog, fexit, regs_off, false)) { + if (fexit->nr_links) + if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) { ret = -EINVAL; goto cleanup; } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index be94833d390a..a7080c86fa76 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -89,6 +89,7 @@ struct bpf_map_ops { int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); int (*map_pop_elem)(struct bpf_map *map, void *value); int (*map_peek_elem)(struct bpf_map *map, void *value); + void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -389,9 +390,20 @@ enum bpf_type_flag { */ PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = PTR_UNTRUSTED, + MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to memory local to the bpf program. */ + DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to a ringbuf record. */ + DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_FLAG_MAX, + __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; +#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF) + /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -408,16 +420,11 @@ enum bpf_arg_type { ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ - ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ - /* the following constraints used to prototype bpf_memcmp() and other - * functions that access data on eBPF program stack + /* Used to prototype bpf_memcmp() and other functions that access data + * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ - ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, - * helper function must fill all bytes or clear - * them in error case. - */ ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ @@ -439,6 +446,7 @@ enum bpf_arg_type { ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, /* Extended arg_types. */ @@ -449,6 +457,10 @@ enum bpf_arg_type { ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, + /* pointer to memory does not need to be initialized, helper function must fill + * all bytes or clear them in error case. + */ + ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. @@ -476,6 +488,7 @@ enum bpf_return_type { RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is @@ -721,15 +734,17 @@ struct btf_func_model { #define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 - * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 + * bytes on x86. */ -#define BPF_MAX_TRAMP_PROGS 38 +#define BPF_MAX_TRAMP_LINKS 38 -struct bpf_tramp_progs { - struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; - int nr_progs; +struct bpf_tramp_links { + struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; + int nr_links; }; +struct bpf_tramp_run_ctx; + /* Different use cases for BPF trampoline: * 1. replace nop at the function entry (kprobe equivalent) * flags = BPF_TRAMP_F_RESTORE_REGS @@ -753,13 +768,14 @@ struct bpf_tramp_progs { struct bpf_tramp_image; int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call); /* these two functions are called from generated trampoline */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog); -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); @@ -852,9 +868,10 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( { return bpf_func(ctx, insnsi); } + #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -905,12 +922,12 @@ int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else -static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, +static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; } -static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog, +static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; @@ -1009,7 +1026,6 @@ struct bpf_prog_aux { bool tail_call_reachable; bool xdp_has_frags; bool use_bpf_prog_pack; - struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1096,6 +1112,19 @@ struct bpf_link_ops { struct bpf_link_info *info); }; +struct bpf_tramp_link { + struct bpf_link link; + struct hlist_node tramp_hlist; + u64 cookie; +}; + +struct bpf_tracing_link { + struct bpf_tramp_link link; + enum bpf_attach_type attach_type; + struct bpf_trampoline *trampoline; + struct bpf_prog *tgt_prog; +}; + struct bpf_link_primer { struct bpf_link *link; struct file *file; @@ -1133,8 +1162,8 @@ bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); -int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, - struct bpf_prog *prog, +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, + struct bpf_tramp_link *link, const struct btf_func_model *model, void *image, void *image_end); static inline bool bpf_try_module_get(const void *data, struct module *owner) @@ -1339,6 +1368,12 @@ struct bpf_trace_run_ctx { u64 bpf_cookie; }; +struct bpf_tramp_run_ctx { + struct bpf_run_ctx run_ctx; + u64 bpf_cookie; + struct bpf_run_ctx *saved_run_ctx; +}; + static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) { struct bpf_run_ctx *old_ctx = NULL; @@ -1544,6 +1579,7 @@ void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); +struct bpf_link *bpf_link_get_curr_or_next(u32 *id); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); @@ -2161,6 +2197,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_map_push_elem_proto; extern const struct bpf_func_proto bpf_map_pop_elem_proto; extern const struct bpf_func_proto bpf_map_peek_elem_proto; +extern const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; @@ -2198,12 +2235,16 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto; +extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto; extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_snprintf_proto; @@ -2223,6 +2264,7 @@ extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_strncmp_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; +extern const struct bpf_func_proto bpf_kptr_xchg_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2336,6 +2378,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); void *bpf_arch_text_copy(void *dst, void *src, size_t len); +int bpf_arch_text_invalidate(void *dst, size_t len); struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); @@ -2346,4 +2389,33 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 **bin_buf, u32 num_args); void bpf_bprintf_cleanup(void); +/* the implementation of the opaque uapi struct bpf_dynptr */ +struct bpf_dynptr_kern { + void *data; + /* Size represents the number of usable bytes of dynptr data. + * If for example the offset is at 4 for a local dynptr whose data is + * of type u64, the number of usable bytes is 4. + * + * The upper 8 bits are reserved. It is as follows: + * Bits 0 - 23 = size + * Bits 24 - 30 = dynptr type + * Bit 31 = whether dynptr is read-only + */ + u32 size; + u32 offset; +} __aligned(8); + +enum bpf_dynptr_type { + BPF_DYNPTR_TYPE_INVALID, + /* Points to memory that is local to the bpf program */ + BPF_DYNPTR_TYPE_LOCAL, + /* Underlying data is a ringbuf record */ + BPF_DYNPTR_TYPE_RINGBUF, +}; + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size); +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); +int bpf_dynptr_check_size(u32 size); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 3e24ad0c4b3c..2b9112b80171 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -141,3 +141,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) +BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1f1e7f2ea967..e8439f6cbe57 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -72,6 +72,18 @@ struct bpf_reg_state { u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ + /* For dynptr stack slots */ + struct { + enum bpf_dynptr_type type; + /* A dynptr is 16 bytes so it takes up 2 stack slots. + * We need to track which slot is the first slot + * to protect against cases where the user may try to + * pass in an address starting at the second slot of the + * dynptr. + */ + bool first_slot; + } dynptr; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -88,6 +100,8 @@ struct bpf_reg_state { * for the purpose of tracking that it's freed. * For PTR_TO_SOCKET this is used to share which pointers retain the * same reference to the socket, to determine proper reference freeing. + * For stack slots that are dynptrs, this is used to track references to + * the dynptr to determine proper reference freeing. */ u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned @@ -174,9 +188,15 @@ enum bpf_stack_slot_type { STACK_SPILL, /* register spilled into stack */ STACK_MISC, /* BPF program wrote some data into this slot */ STACK_ZERO, /* BPF program wrote constant zero */ + /* A dynptr is stored in this stack slot. The type of dynptr + * is stored in bpf_stack_state->spilled_ptr.dynptr.type + */ + STACK_DYNPTR, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) +#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) struct bpf_stack_state { struct bpf_reg_state spilled_ptr; diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index bc5d9cc34e4c..335a19092368 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -178,7 +178,8 @@ extern struct btf_id_set name; BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) enum { #define BTF_SOCK_TYPE(name, str) name, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4816b7e11047..820500430eae 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -303,6 +303,8 @@ int unregister_ftrace_function(struct ftrace_ops *ops); extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct ftrace_regs *fregs); + +int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs); #else /* !CONFIG_FUNCTION_TRACER */ /* * (un)register_ftrace_function must be a macro since the ops parameter @@ -313,6 +315,10 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1, static inline void ftrace_kill(void) { } static inline void ftrace_free_init_mem(void) { } static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } +static inline int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_FUNCTION_TRACER */ struct ftrace_func_entry { diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index ce1bd2fbf23e..ad39636e0c3f 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -65,11 +65,11 @@ static inline void *dereference_symbol_descriptor(void *ptr) return ptr; } +#ifdef CONFIG_KALLSYMS int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); -#ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); @@ -163,6 +163,11 @@ static inline bool kallsyms_show_value(const struct cred *cred) return false; } +static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, + unsigned long), void *data) +{ + return -EOPNOTSUPP; +} #endif /*CONFIG_KALLSYMS*/ static inline void print_ip_sym(const char *loglvl, unsigned long ip) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index d4ec894ce67b..4d761ad530c9 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -284,4 +284,10 @@ static inline int mptcpv6_init(void) { return 0; } static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { } #endif +#if defined(CONFIG_MPTCP) && defined(CONFIG_BPF_SYSCALL) +struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk); +#else +static inline struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { return NULL; } +#endif + #endif /* __NET_MPTCP_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 444fe6f1cf35..f4009dbdf62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1013,6 +1013,7 @@ enum bpf_link_type { BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, + BPF_LINK_TYPE_STRUCT_OPS = 9, MAX_BPF_LINK_TYPE, }; @@ -1489,6 +1490,15 @@ union bpf_attr { __aligned_u64 addrs; __aligned_u64 cookies; } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; }; } link_create; @@ -5154,6 +5164,91 @@ union bpf_attr { * if not NULL, is a reference which must be released using its * corresponding release function, or moved into a BPF map before * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. + * + * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr. + * + * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5351,6 +5446,15 @@ union bpf_attr { FN(skb_set_tstamp), \ FN(ima_file_hash), \ FN(kptr_xchg), \ + FN(map_lookup_percpu_elem), \ + FN(skc_to_mptcp_sock), \ + FN(dynptr_from_mem), \ + FN(ringbuf_reserve_dynptr), \ + FN(ringbuf_submit_dynptr), \ + FN(ringbuf_discard_dynptr), \ + FN(dynptr_read), \ + FN(dynptr_write), \ + FN(dynptr_data), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5604,6 +5708,10 @@ struct bpf_tunnel_key { __u8 tunnel_ttl; __u16 tunnel_ext; /* Padding, future use. */ __u32 tunnel_label; + union { + __u32 local_ipv4; + __u32 local_ipv6[4]; + }; }; /* user accessible mirror of in-kernel xfrm_state. @@ -6498,6 +6606,11 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index c1a9be6a4b9f..057ba8e01e70 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index b3bf31fd9458..fe40d3b9458f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -243,6 +243,20 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) return this_cpu_ptr(array->pptrs[index & array->index_mask]); } +static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + + if (cpu >= nr_cpu_ids) + return NULL; + + if (unlikely(index >= array->map.max_entries)) + return NULL; + + return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu); +} + int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) { struct bpf_array *array = container_of(map, struct bpf_array, map); @@ -725,6 +739,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_lookup_elem = percpu_array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, + .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, @@ -1345,6 +1360,8 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_fd_put_ptr = bpf_map_fd_put_ptr, .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = array_of_map_gen_lookup, + .map_lookup_batch = generic_map_lookup_batch, + .map_update_batch = generic_map_update_batch, .map_check_btf = map_check_no_btf, .map_btf_id = &array_map_btf_ids[0], }; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 064eccba641d..c1351df9f7ee 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -117,6 +117,21 @@ static const struct bpf_func_proto bpf_ima_file_hash_proto = { .allowed = bpf_ima_inode_hash_allowed, }; +BPF_CALL_1(bpf_get_attach_cookie, void *, ctx) +{ + struct bpf_trace_run_ctx *run_ctx; + + run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); + return run_ctx->bpf_cookie; +} + +static const struct bpf_func_proto bpf_get_attach_cookie_proto = { + .func = bpf_get_attach_cookie, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static const struct bpf_func_proto * bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -141,6 +156,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL; case BPF_FUNC_ima_file_hash: return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL; + case BPF_FUNC_get_attach_cookie: + return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 3a0103ad97bc..d9a3c9207240 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -33,15 +33,15 @@ struct bpf_struct_ops_map { const struct bpf_struct_ops *st_ops; /* protect map_update */ struct mutex lock; - /* progs has all the bpf_prog that is populated + /* link has all the bpf_links that is populated * to the func ptr of the kernel's struct * (in kvalue.data). */ - struct bpf_prog **progs; + struct bpf_link **links; /* image is a page that has all the trampolines * that stores the func args before calling the bpf_prog. * A PAGE_SIZE "image" is enough to store all trampoline for - * "progs[]". + * "links[]". */ void *image; /* uvalue->data stores the kernel struct @@ -283,9 +283,9 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map) u32 i; for (i = 0; i < btf_type_vlen(t); i++) { - if (st_map->progs[i]) { - bpf_prog_put(st_map->progs[i]); - st_map->progs[i] = NULL; + if (st_map->links[i]) { + bpf_link_put(st_map->links[i]); + st_map->links[i] = NULL; } } } @@ -316,18 +316,34 @@ static int check_zero_holes(const struct btf_type *t, void *data) return 0; } -int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, - struct bpf_prog *prog, +static void bpf_struct_ops_link_release(struct bpf_link *link) +{ +} + +static void bpf_struct_ops_link_dealloc(struct bpf_link *link) +{ + struct bpf_tramp_link *tlink = container_of(link, struct bpf_tramp_link, link); + + kfree(tlink); +} + +const struct bpf_link_ops bpf_struct_ops_link_lops = { + .release = bpf_struct_ops_link_release, + .dealloc = bpf_struct_ops_link_dealloc, +}; + +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, + struct bpf_tramp_link *link, const struct btf_func_model *model, void *image, void *image_end) { u32 flags; - tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; - tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + tlinks[BPF_TRAMP_FENTRY].links[0] = link; + tlinks[BPF_TRAMP_FENTRY].nr_links = 1; flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; return arch_prepare_bpf_trampoline(NULL, image, image_end, - model, flags, tprogs, NULL); + model, flags, tlinks, NULL); } static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, @@ -338,7 +354,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_struct_ops_value *uvalue, *kvalue; const struct btf_member *member; const struct btf_type *t = st_ops->type; - struct bpf_tramp_progs *tprogs = NULL; + struct bpf_tramp_links *tlinks = NULL; void *udata, *kdata; int prog_fd, err = 0; void *image, *image_end; @@ -362,8 +378,8 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, if (uvalue->state || refcount_read(&uvalue->refcnt)) return -EINVAL; - tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); - if (!tprogs) + tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); + if (!tlinks) return -ENOMEM; uvalue = (struct bpf_struct_ops_value *)st_map->uvalue; @@ -386,6 +402,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, for_each_member(i, t, member) { const struct btf_type *mtype, *ptype; struct bpf_prog *prog; + struct bpf_tramp_link *link; u32 moff; moff = __btf_member_bit_offset(t, member) / 8; @@ -439,16 +456,26 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, err = PTR_ERR(prog); goto reset_unlock; } - st_map->progs[i] = prog; if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || prog->aux->attach_btf_id != st_ops->type_id || prog->expected_attach_type != i) { + bpf_prog_put(prog); err = -EINVAL; goto reset_unlock; } - err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + bpf_prog_put(prog); + err = -ENOMEM; + goto reset_unlock; + } + bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, + &bpf_struct_ops_link_lops, prog); + st_map->links[i] = &link->link; + + err = bpf_struct_ops_prepare_trampoline(tlinks, link, &st_ops->func_models[i], image, image_end); if (err < 0) @@ -491,7 +518,7 @@ reset_unlock: memset(uvalue, 0, map->value_size); memset(kvalue, 0, map->value_size); unlock: - kfree(tprogs); + kfree(tlinks); mutex_unlock(&st_map->lock); return err; } @@ -546,9 +573,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; - if (st_map->progs) + if (st_map->links) bpf_struct_ops_map_put_progs(st_map); - bpf_map_area_free(st_map->progs); + bpf_map_area_free(st_map->links); bpf_jit_free_exec(st_map->image); bpf_map_area_free(st_map->uvalue); bpf_map_area_free(st_map); @@ -597,11 +624,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) map = &st_map->map; st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); - st_map->progs = - bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_prog *), + st_map->links = + bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *), NUMA_NO_NODE); st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); - if (!st_map->uvalue || !st_map->progs || !st_map->image) { + if (!st_map->uvalue || !st_map->links || !st_map->image) { bpf_struct_ops_map_free(map); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2f0b0440131c..7bccaa4646e5 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -202,6 +202,8 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_XDP, BTF_KFUNC_HOOK_TC, BTF_KFUNC_HOOK_STRUCT_OPS, + BTF_KFUNC_HOOK_TRACING, + BTF_KFUNC_HOOK_SYSCALL, BTF_KFUNC_HOOK_MAX, }; @@ -7110,6 +7112,10 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) return BTF_KFUNC_HOOK_TC; case BPF_PROG_TYPE_STRUCT_OPS: return BTF_KFUNC_HOOK_STRUCT_OPS; + case BPF_PROG_TYPE_TRACING: + return BTF_KFUNC_HOOK_TRACING; + case BPF_PROG_TYPE_SYSCALL: + return BTF_KFUNC_HOOK_SYSCALL; default: return BTF_KFUNC_HOOK_MAX; } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 13e9dbeeedf3..cacd8684c3c4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -873,7 +873,7 @@ static size_t select_bpf_prog_pack_size(void) return size; } -static struct bpf_prog_pack *alloc_new_pack(void) +static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_prog_pack *pack; @@ -886,6 +886,7 @@ static struct bpf_prog_pack *alloc_new_pack(void) kfree(pack); return NULL; } + bpf_fill_ill_insns(pack->ptr, bpf_prog_pack_size); bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE); list_add_tail(&pack->list, &pack_list); @@ -895,7 +896,7 @@ static struct bpf_prog_pack *alloc_new_pack(void) return pack; } -static void *bpf_prog_pack_alloc(u32 size) +static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) { unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size); struct bpf_prog_pack *pack; @@ -910,6 +911,7 @@ static void *bpf_prog_pack_alloc(u32 size) size = round_up(size, PAGE_SIZE); ptr = module_alloc(size); if (ptr) { + bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); set_memory_x((unsigned long)ptr, size / PAGE_SIZE); @@ -923,7 +925,7 @@ static void *bpf_prog_pack_alloc(u32 size) goto found_free_area; } - pack = alloc_new_pack(); + pack = alloc_new_pack(bpf_fill_ill_insns); if (!pack) goto out; @@ -966,6 +968,9 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr) nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size); pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT; + WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size), + "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n"); + bitmap_clear(pack->bitmap, pos, nbits); if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0, bpf_prog_chunk_count(), 0) == 0) { @@ -1102,7 +1107,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, if (bpf_jit_charge_modmem(size)) return NULL; - ro_header = bpf_prog_pack_alloc(size); + ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns); if (!ro_header) { bpf_jit_uncharge_modmem(size); return NULL; @@ -1434,6 +1439,16 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) insn = clone->insnsi; for (i = 0; i < insn_cnt; i++, insn++) { + if (bpf_pseudo_func(insn)) { + /* ld_imm64 with an address of bpf subprog is not + * a user controlled constant. Don't randomize it, + * since it will conflict with jit_subprogs() logic. + */ + insn++; + i++; + continue; + } + /* We temporarily need to hold the original ld64 insn * so that we can still access the first part in the * second blinding run. @@ -2619,6 +2634,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_map_push_elem_proto __weak; const struct bpf_func_proto bpf_map_pop_elem_proto __weak; const struct bpf_func_proto bpf_map_peek_elem_proto __weak; +const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto __weak; const struct bpf_func_proto bpf_spin_lock_proto __weak; const struct bpf_func_proto bpf_spin_unlock_proto __weak; const struct bpf_func_proto bpf_jiffies64_proto __weak; @@ -2727,6 +2743,11 @@ void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len) return ERR_PTR(-ENOTSUPP); } +int __weak bpf_arch_text_invalidate(void *dst, size_t len) +{ + return -ENOTSUPP; +} + DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); EXPORT_SYMBOL(bpf_stats_enabled_key); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3e00e62b2218..17fb69c0e0dc 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -140,7 +140,7 @@ static inline bool htab_use_raw_lock(const struct bpf_htab *htab) static void htab_init_buckets(struct bpf_htab *htab) { - unsigned i; + unsigned int i; for (i = 0; i < htab->n_buckets; i++) { INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); @@ -1627,7 +1627,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, void __user *uvalues = u64_to_user_ptr(attr->batch.values); void __user *ukeys = u64_to_user_ptr(attr->batch.keys); void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); - u32 batch, max_count, size, bucket_size; + u32 batch, max_count, size, bucket_size, map_id; struct htab_elem *node_to_free = NULL; u64 elem_map_flags, map_flags; struct hlist_nulls_head *head; @@ -1752,6 +1752,14 @@ again_nocopy: } } else { value = l->key + roundup_key_size; + if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + struct bpf_map **inner_map = value; + + /* Actual value is the id of the inner map */ + map_id = map->ops->map_fd_sys_lookup_elem(*inner_map); + value = &map_id; + } + if (elem_map_flags & BPF_F_LOCK) copy_map_value_locked(map, dst_val, value, true); @@ -2191,6 +2199,20 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct htab_elem *l; + + if (cpu >= nr_cpu_ids) + return NULL; + + l = __htab_map_lookup_elem(map, key); + if (l) + return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); + else + return NULL; +} + static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) { struct htab_elem *l = __htab_map_lookup_elem(map, key); @@ -2203,6 +2225,22 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct htab_elem *l; + + if (cpu >= nr_cpu_ids) + return NULL; + + l = __htab_map_lookup_elem(map, key); + if (l) { + bpf_lru_node_set_ref(&l->lru_node); + return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); + } + + return NULL; +} + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) { struct htab_elem *l; @@ -2292,6 +2330,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_percpu_map_update_elem, .map_delete_elem = htab_map_delete_elem, + .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, @@ -2310,6 +2349,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_lru_percpu_map_update_elem, .map_delete_elem = htab_lru_map_delete_elem, + .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, @@ -2450,5 +2490,6 @@ const struct bpf_map_ops htab_of_maps_map_ops = { .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = htab_of_map_gen_lookup, .map_check_btf = map_check_no_btf, + BATCH_OPS(htab), .map_btf_id = &htab_map_btf_ids[0], }; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3e709fed5306..225806a02efb 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -103,7 +103,7 @@ const struct bpf_func_proto bpf_map_pop_elem_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, + .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, }; BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) @@ -116,7 +116,23 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, + .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, +}; + +BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) +{ + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu); +} + +const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = { + .func = bpf_map_lookup_percpu_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, + .arg3_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_get_prandom_u32_proto = { @@ -1396,6 +1412,169 @@ const struct bpf_func_proto bpf_kptr_xchg_proto = { .arg2_btf_id = BPF_PTR_POISON, }; +/* Since the upper 8 bits of dynptr->size is reserved, the + * maximum supported size is 2^24 - 1. + */ +#define DYNPTR_MAX_SIZE ((1UL << 24) - 1) +#define DYNPTR_TYPE_SHIFT 28 +#define DYNPTR_SIZE_MASK 0xFFFFFF +#define DYNPTR_RDONLY_BIT BIT(31) + +static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) +{ + return ptr->size & DYNPTR_RDONLY_BIT; +} + +static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) +{ + ptr->size |= type << DYNPTR_TYPE_SHIFT; +} + +static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr) +{ + return ptr->size & DYNPTR_SIZE_MASK; +} + +int bpf_dynptr_check_size(u32 size) +{ + return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; +} + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size) +{ + ptr->data = data; + ptr->offset = offset; + ptr->size = size; + bpf_dynptr_set_type(ptr, type); +} + +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) +{ + memset(ptr, 0, sizeof(*ptr)); +} + +static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +{ + u32 size = bpf_dynptr_get_size(ptr); + + if (len > size || offset > size - len) + return -E2BIG; + + return 0; +} + +BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) +{ + int err; + + err = bpf_dynptr_check_size(size); + if (err) + goto error; + + /* flags is currently unsupported */ + if (flags) { + err = -EINVAL; + goto error; + } + + bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size); + + return 0; + +error: + bpf_dynptr_set_null(ptr); + return err; +} + +const struct bpf_func_proto bpf_dynptr_from_mem_proto = { + .func = bpf_dynptr_from_mem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, +}; + +BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset) +{ + int err; + + if (!src->data) + return -EINVAL; + + err = bpf_dynptr_check_off_len(src, offset, len); + if (err) + return err; + + memcpy(dst, src->data + src->offset + offset, len); + + return 0; +} + +const struct bpf_func_proto bpf_dynptr_read_proto = { + .func = bpf_dynptr_read, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_PTR_TO_DYNPTR, + .arg4_type = ARG_ANYTHING, +}; + +BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len) +{ + int err; + + if (!dst->data || bpf_dynptr_is_rdonly(dst)) + return -EINVAL; + + err = bpf_dynptr_check_off_len(dst, offset, len); + if (err) + return err; + + memcpy(dst->data + dst->offset + offset, src, len); + + return 0; +} + +const struct bpf_func_proto bpf_dynptr_write_proto = { + .func = bpf_dynptr_write, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_DYNPTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE_OR_ZERO, +}; + +BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) +{ + int err; + + if (!ptr->data) + return 0; + + err = bpf_dynptr_check_off_len(ptr, offset, len); + if (err) + return 0; + + if (bpf_dynptr_is_rdonly(ptr)) + return 0; + + return (unsigned long)(ptr->data + ptr->offset + offset); +} + +const struct bpf_func_proto bpf_dynptr_data_proto = { + .func = bpf_dynptr_data, + .gpl_only = false, + .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL, + .arg1_type = ARG_PTR_TO_DYNPTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1420,6 +1599,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; + case BPF_FUNC_map_lookup_percpu_elem: + return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: @@ -1442,12 +1623,26 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; + case BPF_FUNC_ringbuf_reserve_dynptr: + return &bpf_ringbuf_reserve_dynptr_proto; + case BPF_FUNC_ringbuf_submit_dynptr: + return &bpf_ringbuf_submit_dynptr_proto; + case BPF_FUNC_ringbuf_discard_dynptr: + return &bpf_ringbuf_discard_dynptr_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_loop: return &bpf_loop_proto; case BPF_FUNC_strncmp: return &bpf_strncmp_proto; + case BPF_FUNC_dynptr_from_mem: + return &bpf_dynptr_from_mem_proto; + case BPF_FUNC_dynptr_read: + return &bpf_dynptr_read_proto; + case BPF_FUNC_dynptr_write: + return &bpf_dynptr_write_proto; + case BPF_FUNC_dynptr_data: + return &bpf_dynptr_data_proto; default: break; } diff --git a/kernel/bpf/link_iter.c b/kernel/bpf/link_iter.c new file mode 100644 index 000000000000..fec8005a121c --- /dev/null +++ b/kernel/bpf/link_iter.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Red Hat, Inc. */ +#include <linux/bpf.h> +#include <linux/fs.h> +#include <linux/filter.h> +#include <linux/kernel.h> +#include <linux/btf_ids.h> + +struct bpf_iter_seq_link_info { + u32 link_id; +}; + +static void *bpf_link_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_iter_seq_link_info *info = seq->private; + struct bpf_link *link; + + link = bpf_link_get_curr_or_next(&info->link_id); + if (!link) + return NULL; + + if (*pos == 0) + ++*pos; + return link; +} + +static void *bpf_link_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_iter_seq_link_info *info = seq->private; + + ++*pos; + ++info->link_id; + bpf_link_put((struct bpf_link *)v); + return bpf_link_get_curr_or_next(&info->link_id); +} + +struct bpf_iter__bpf_link { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_link *, link); +}; + +DEFINE_BPF_ITER_FUNC(bpf_link, struct bpf_iter_meta *meta, struct bpf_link *link) + +static int __bpf_link_seq_show(struct seq_file *seq, void *v, bool in_stop) +{ + struct bpf_iter__bpf_link ctx; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int ret = 0; + + ctx.meta = &meta; + ctx.link = v; + meta.seq = seq; + prog = bpf_iter_get_info(&meta, in_stop); + if (prog) + ret = bpf_iter_run_prog(prog, &ctx); + + return ret; +} + +static int bpf_link_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_link_seq_show(seq, v, false); +} + +static void bpf_link_seq_stop(struct seq_file *seq, void *v) +{ + if (!v) + (void)__bpf_link_seq_show(seq, v, true); + else + bpf_link_put((struct bpf_link *)v); +} + +static const struct seq_operations bpf_link_seq_ops = { + .start = bpf_link_seq_start, + .next = bpf_link_seq_next, + .stop = bpf_link_seq_stop, + .show = bpf_link_seq_show, +}; + +BTF_ID_LIST(btf_bpf_link_id) +BTF_ID(struct, bpf_link) + +static const struct bpf_iter_seq_info bpf_link_seq_info = { + .seq_ops = &bpf_link_seq_ops, + .init_seq_private = NULL, + .fini_seq_private = NULL, + .seq_priv_size = sizeof(struct bpf_iter_seq_link_info), +}; + +static struct bpf_iter_reg bpf_link_reg_info = { + .target = "bpf_link", + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_link, link), + PTR_TO_BTF_ID_OR_NULL }, + }, + .seq_info = &bpf_link_seq_info, +}; + +static int __init bpf_link_iter_init(void) +{ + bpf_link_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_link_id; + return bpf_iter_reg_target(&bpf_link_reg_info); +} + +late_initcall(bpf_link_iter_init); diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 311264ab80c4..ded4faeca192 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -475,3 +475,81 @@ const struct bpf_func_proto bpf_ringbuf_query_proto = { .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, }; + +BPF_CALL_4(bpf_ringbuf_reserve_dynptr, struct bpf_map *, map, u32, size, u64, flags, + struct bpf_dynptr_kern *, ptr) +{ + struct bpf_ringbuf_map *rb_map; + void *sample; + int err; + + if (unlikely(flags)) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + err = bpf_dynptr_check_size(size); + if (err) { + bpf_dynptr_set_null(ptr); + return err; + } + + rb_map = container_of(map, struct bpf_ringbuf_map, map); + + sample = __bpf_ringbuf_reserve(rb_map->rb, size); + if (!sample) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + bpf_dynptr_init(ptr, sample, BPF_DYNPTR_TYPE_RINGBUF, 0, size); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = { + .func = bpf_ringbuf_reserve_dynptr, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT, +}; + +BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags) +{ + if (!ptr->data) + return 0; + + bpf_ringbuf_commit(ptr->data, flags, false /* discard */); + + bpf_dynptr_set_null(ptr); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto = { + .func = bpf_ringbuf_submit_dynptr, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_ringbuf_discard_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags) +{ + if (!ptr->data) + return 0; + + bpf_ringbuf_commit(ptr->data, flags, true /* discard */); + + bpf_dynptr_set_null(ptr); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = { + .func = bpf_ringbuf_discard_dynptr, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE, + .arg2_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e0aead17dff4..2b69306d3c6e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2864,19 +2864,12 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) } EXPORT_SYMBOL(bpf_link_get_from_fd); -struct bpf_tracing_link { - struct bpf_link link; - enum bpf_attach_type attach_type; - struct bpf_trampoline *trampoline; - struct bpf_prog *tgt_prog; -}; - static void bpf_tracing_link_release(struct bpf_link *link) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); - WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog, + WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, tr_link->trampoline)); bpf_trampoline_put(tr_link->trampoline); @@ -2889,7 +2882,7 @@ static void bpf_tracing_link_release(struct bpf_link *link) static void bpf_tracing_link_dealloc(struct bpf_link *link) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); kfree(tr_link); } @@ -2898,7 +2891,7 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); seq_printf(seq, "attach_type:\t%d\n", @@ -2909,7 +2902,7 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); info->tracing.attach_type = tr_link->attach_type; bpf_trampoline_unpack_key(tr_link->trampoline->key, @@ -2928,7 +2921,8 @@ static const struct bpf_link_ops bpf_tracing_link_lops = { static int bpf_tracing_prog_attach(struct bpf_prog *prog, int tgt_prog_fd, - u32 btf_id) + u32 btf_id, + u64 bpf_cookie) { struct bpf_link_primer link_primer; struct bpf_prog *tgt_prog = NULL; @@ -2990,9 +2984,10 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, err = -ENOMEM; goto out_put_prog; } - bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING, + bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, &bpf_tracing_link_lops, prog); link->attach_type = prog->expected_attach_type; + link->link.cookie = bpf_cookie; mutex_lock(&prog->aux->dst_mutex); @@ -3060,11 +3055,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, tgt_prog = prog->aux->dst_prog; } - err = bpf_link_prime(&link->link, &link_primer); + err = bpf_link_prime(&link->link.link, &link_primer); if (err) goto out_unlock; - err = bpf_trampoline_link_prog(prog, tr); + err = bpf_trampoline_link_prog(&link->link, tr); if (err) { bpf_link_cleanup(&link_primer); link = NULL; @@ -3278,7 +3273,7 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog, tp_name = prog->aux->attach_func_name; break; } - return bpf_tracing_prog_attach(prog, 0, 0); + return bpf_tracing_prog_attach(prog, 0, 0, 0); case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) @@ -4531,7 +4526,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_EXT: ret = bpf_tracing_prog_attach(prog, attr->link_create.target_fd, - attr->link_create.target_btf_id); + attr->link_create.target_btf_id, + attr->link_create.tracing.cookie); break; case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_TRACING: @@ -4546,7 +4542,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) else ret = bpf_tracing_prog_attach(prog, attr->link_create.target_fd, - attr->link_create.target_btf_id); + attr->link_create.target_btf_id, + attr->link_create.tracing.cookie); break; case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_SK_LOOKUP: @@ -4680,6 +4677,25 @@ struct bpf_link *bpf_link_by_id(u32 id) return link; } +struct bpf_link *bpf_link_get_curr_or_next(u32 *id) +{ + struct bpf_link *link; + + spin_lock_bh(&link_idr_lock); +again: + link = idr_get_next(&link_idr, id); + if (link) { + link = bpf_link_inc_not_zero(link); + if (IS_ERR(link)) { + (*id)++; + goto again; + } + } + spin_unlock_bh(&link_idr_lock); + + return link; +} + #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id static int bpf_link_get_fd_by_id(const union bpf_attr *attr) @@ -4847,9 +4863,21 @@ out_prog_put: static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; + bool capable; int err; - if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; + + /* Intent here is for unprivileged_bpf_disabled to block key object + * creation commands for unprivileged users; other actions depend + * of fd availability and access to bpffs, so are dependent on + * object creation success. Capabilities are later verified for + * operations such as load and map create, so even with unprivileged + * BPF disabled, capability checks are still carried out for these + * and other operations. + */ + if (!capable && + (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) return -EPERM; err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); @@ -5008,6 +5036,7 @@ static bool syscall_prog_is_valid_access(int off, int size, BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) { struct bpf_prog * __maybe_unused prog; + struct bpf_tramp_run_ctx __maybe_unused run_ctx; switch (cmd) { case BPF_MAP_CREATE: @@ -5035,13 +5064,15 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) return -EINVAL; } - if (!__bpf_prog_enter_sleepable(prog)) { + run_ctx.bpf_cookie = 0; + run_ctx.saved_run_ctx = NULL; + if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) { /* recursion detected */ bpf_prog_put(prog); return -EBUSY; } attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); - __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */); + __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx); bpf_prog_put(prog); return 0; #endif diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index ada97751ae1b..93c7675f0c9e 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -30,9 +30,12 @@ static DEFINE_MUTEX(trampoline_mutex); bool bpf_prog_has_trampoline(const struct bpf_prog *prog) { enum bpf_attach_type eatype = prog->expected_attach_type; + enum bpf_prog_type ptype = prog->type; - return eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || - eatype == BPF_MODIFY_RETURN; + return (ptype == BPF_PROG_TYPE_TRACING && + (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || + eatype == BPF_MODIFY_RETURN)) || + (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC); } void *bpf_jit_alloc_exec_page(void) @@ -168,30 +171,30 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) return ret; } -static struct bpf_tramp_progs * +static struct bpf_tramp_links * bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg) { - const struct bpf_prog_aux *aux; - struct bpf_tramp_progs *tprogs; - struct bpf_prog **progs; + struct bpf_tramp_link *link; + struct bpf_tramp_links *tlinks; + struct bpf_tramp_link **links; int kind; *total = 0; - tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); - if (!tprogs) + tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); + if (!tlinks) return ERR_PTR(-ENOMEM); for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { - tprogs[kind].nr_progs = tr->progs_cnt[kind]; + tlinks[kind].nr_links = tr->progs_cnt[kind]; *total += tr->progs_cnt[kind]; - progs = tprogs[kind].progs; + links = tlinks[kind].links; - hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) { - *ip_arg |= aux->prog->call_get_func_ip; - *progs++ = aux->prog; + hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) { + *ip_arg |= link->link.prog->call_get_func_ip; + *links++ = link; } } - return tprogs; + return tlinks; } static void __bpf_tramp_image_put_deferred(struct work_struct *work) @@ -330,14 +333,14 @@ out: static int bpf_trampoline_update(struct bpf_trampoline *tr) { struct bpf_tramp_image *im; - struct bpf_tramp_progs *tprogs; + struct bpf_tramp_links *tlinks; u32 flags = BPF_TRAMP_F_RESTORE_REGS; bool ip_arg = false; int err, total; - tprogs = bpf_trampoline_get_progs(tr, &total, &ip_arg); - if (IS_ERR(tprogs)) - return PTR_ERR(tprogs); + tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg); + if (IS_ERR(tlinks)) + return PTR_ERR(tlinks); if (total == 0) { err = unregister_fentry(tr, tr->cur_image->image); @@ -353,15 +356,15 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) goto out; } - if (tprogs[BPF_TRAMP_FEXIT].nr_progs || - tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) + if (tlinks[BPF_TRAMP_FEXIT].nr_links || + tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; if (ip_arg) flags |= BPF_TRAMP_F_IP_ARG; err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, - &tr->func.model, flags, tprogs, + &tr->func.model, flags, tlinks, tr->func.addr); if (err < 0) goto out; @@ -381,7 +384,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) tr->cur_image = im; tr->selector++; out: - kfree(tprogs); + kfree(tlinks); return err; } @@ -407,13 +410,14 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) } } -int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; + struct bpf_tramp_link *link_exiting; int err = 0; - int cnt; + int cnt = 0, i; - kind = bpf_attach_type_to_tramp(prog); + kind = bpf_attach_type_to_tramp(link->link.prog); mutex_lock(&tr->mutex); if (tr->extension_prog) { /* cannot attach fentry/fexit if extension prog is attached. @@ -422,32 +426,43 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) err = -EBUSY; goto out; } - cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]; + + for (i = 0; i < BPF_TRAMP_MAX; i++) + cnt += tr->progs_cnt[i]; + if (kind == BPF_TRAMP_REPLACE) { /* Cannot attach extension if fentry/fexit are in use. */ if (cnt) { err = -EBUSY; goto out; } - tr->extension_prog = prog; + tr->extension_prog = link->link.prog; err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, - prog->bpf_func); + link->link.prog->bpf_func); goto out; } - if (cnt >= BPF_MAX_TRAMP_PROGS) { + if (cnt >= BPF_MAX_TRAMP_LINKS) { err = -E2BIG; goto out; } - if (!hlist_unhashed(&prog->aux->tramp_hlist)) { + if (!hlist_unhashed(&link->tramp_hlist)) { + /* prog already linked */ + err = -EBUSY; + goto out; + } + hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) { + if (link_exiting->link.prog != link->link.prog) + continue; /* prog already linked */ err = -EBUSY; goto out; } - hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]); + + hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]); tr->progs_cnt[kind]++; err = bpf_trampoline_update(tr); if (err) { - hlist_del_init(&prog->aux->tramp_hlist); + hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; } out: @@ -456,12 +471,12 @@ out: } /* bpf_trampoline_unlink_prog() should never fail. */ -int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; int err; - kind = bpf_attach_type_to_tramp(prog); + kind = bpf_attach_type_to_tramp(link->link.prog); mutex_lock(&tr->mutex); if (kind == BPF_TRAMP_REPLACE) { WARN_ON_ONCE(!tr->extension_prog); @@ -470,7 +485,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) tr->extension_prog = NULL; goto out; } - hlist_del_init(&prog->aux->tramp_hlist); + hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; err = bpf_trampoline_update(tr); out: @@ -500,16 +515,19 @@ out: void bpf_trampoline_put(struct bpf_trampoline *tr) { + int i; + if (!tr) return; mutex_lock(&trampoline_mutex); if (!refcount_dec_and_test(&tr->refcnt)) goto out; WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); - if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) - goto out; - if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) - goto out; + + for (i = 0; i < BPF_TRAMP_MAX; i++) + if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i]))) + goto out; + /* This code will be executed even when the last bpf_tramp_image * is alive. All progs are detached from the trampoline and the * trampoline image is patched with jmp into epilogue to skip @@ -559,11 +577,14 @@ static void notrace inc_misses_counter(struct bpf_prog *prog) * [2..MAX_U64] - execute bpf prog and record execution time. * This is start time. */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog) +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) __acquires(RCU) { rcu_read_lock(); migrate_disable(); + + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { inc_misses_counter(prog); return 0; @@ -593,29 +614,38 @@ static void notrace update_prog_stats(struct bpf_prog *prog, } } -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx) __releases(RCU) { + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + update_prog_stats(prog, start); __this_cpu_dec(*(prog->active)); migrate_enable(); rcu_read_unlock(); } -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) { rcu_read_lock_trace(); migrate_disable(); might_fault(); + if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { inc_misses_counter(prog); return 0; } + + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + return bpf_prog_start_time(); } -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start) +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) { + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + update_prog_stats(prog, start); __this_cpu_dec(*(prog->active)); migrate_enable(); @@ -635,7 +665,7 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) int __weak arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call) { return -ENOTSUPP; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 813f6ee80419..aedac2ac02b9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -187,6 +187,9 @@ struct bpf_verifier_stack_elem { POISON_POINTER_DELTA)) #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) +static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); +static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); + static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; @@ -259,6 +262,7 @@ struct bpf_call_arg_meta { u32 ret_btf_id; u32 subprogno; struct bpf_map_value_off_desc *kptr_off_desc; + u8 uninit_dynptr_regno; }; struct btf *btf_vmlinux; @@ -509,6 +513,7 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id) func_id == BPF_FUNC_skc_to_tcp_sock || func_id == BPF_FUNC_skc_to_tcp6_sock || func_id == BPF_FUNC_skc_to_udp6_sock || + func_id == BPF_FUNC_skc_to_mptcp_sock || func_id == BPF_FUNC_skc_to_tcp_timewait_sock || func_id == BPF_FUNC_skc_to_tcp_request_sock; } @@ -580,6 +585,7 @@ static char slot_type_char[] = { [STACK_SPILL] = 'r', [STACK_MISC] = 'm', [STACK_ZERO] = '0', + [STACK_DYNPTR] = 'd', }; static void print_liveness(struct bpf_verifier_env *env, @@ -595,6 +601,25 @@ static void print_liveness(struct bpf_verifier_env *env, verbose(env, "D"); } +static int get_spi(s32 off) +{ + return (-off - 1) / BPF_REG_SIZE; +} + +static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots) +{ + int allocated_slots = state->allocated_stack / BPF_REG_SIZE; + + /* We need to check that slots between [spi - nr_slots + 1, spi] are + * within [0, allocated_stack). + * + * Please note that the spi grows downwards. For example, a dynptr + * takes the size of two stack slots; the first slot will be at + * spi and the second slot will be at spi - 1. + */ + return spi - nr_slots + 1 >= 0 && spi < allocated_slots; +} + static struct bpf_func_state *func(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) { @@ -646,6 +671,132 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env) env->scratched_stack_slots = ~0ULL; } +static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) +{ + switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { + case DYNPTR_TYPE_LOCAL: + return BPF_DYNPTR_TYPE_LOCAL; + case DYNPTR_TYPE_RINGBUF: + return BPF_DYNPTR_TYPE_RINGBUF; + default: + return BPF_DYNPTR_TYPE_INVALID; + } +} + +static bool dynptr_type_refcounted(enum bpf_dynptr_type type) +{ + return type == BPF_DYNPTR_TYPE_RINGBUF; +} + +static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + enum bpf_arg_type arg_type, int insn_idx) +{ + struct bpf_func_state *state = func(env, reg); + enum bpf_dynptr_type type; + int spi, i, id; + + spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) { + state->stack[spi].slot_type[i] = STACK_DYNPTR; + state->stack[spi - 1].slot_type[i] = STACK_DYNPTR; + } + + type = arg_to_dynptr_type(arg_type); + if (type == BPF_DYNPTR_TYPE_INVALID) + return -EINVAL; + + state->stack[spi].spilled_ptr.dynptr.first_slot = true; + state->stack[spi].spilled_ptr.dynptr.type = type; + state->stack[spi - 1].spilled_ptr.dynptr.type = type; + + if (dynptr_type_refcounted(type)) { + /* The id is used to track proper releasing */ + id = acquire_reference_state(env, insn_idx); + if (id < 0) + return id; + + state->stack[spi].spilled_ptr.id = id; + state->stack[spi - 1].spilled_ptr.id = id; + } + + return 0; +} + +static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i; + + spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) { + state->stack[spi].slot_type[i] = STACK_INVALID; + state->stack[spi - 1].slot_type[i] = STACK_INVALID; + } + + /* Invalidate any slices associated with this dynptr */ + if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { + release_reference(env, state->stack[spi].spilled_ptr.id); + state->stack[spi].spilled_ptr.id = 0; + state->stack[spi - 1].spilled_ptr.id = 0; + } + + state->stack[spi].spilled_ptr.dynptr.first_slot = false; + state->stack[spi].spilled_ptr.dynptr.type = 0; + state->stack[spi - 1].spilled_ptr.dynptr.type = 0; + + return 0; +} + +static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + int i; + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return true; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] == STACK_DYNPTR || + state->stack[spi - 1].slot_type[i] == STACK_DYNPTR) + return false; + } + + return true; +} + +static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + enum bpf_arg_type arg_type) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + int i; + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || + !state->stack[spi].spilled_ptr.dynptr.first_slot) + return false; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] != STACK_DYNPTR || + state->stack[spi - 1].slot_type[i] != STACK_DYNPTR) + return false; + } + + /* ARG_PTR_TO_DYNPTR takes any type of dynptr */ + if (arg_type == ARG_PTR_TO_DYNPTR) + return true; + + return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type); +} + /* The reg state of a pointer or a bounded scalar was saved when * it was spilled to the stack. */ @@ -1815,8 +1966,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) kfree(tab); } -static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, - u32 func_id, s16 offset) +static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset) { if (offset) { if (offset < 0) { @@ -1891,7 +2041,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) prog_aux->kfunc_btf_tab = btf_tab; } - desc_btf = find_kfunc_desc_btf(env, func_id, offset); + desc_btf = find_kfunc_desc_btf(env, offset); if (IS_ERR(desc_btf)) { verbose(env, "failed to find BTF for kernel function\n"); return PTR_ERR(desc_btf); @@ -2360,7 +2510,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off); + desc_btf = find_kfunc_desc_btf(data, insn->off); if (IS_ERR(desc_btf)) return "<error>"; @@ -5352,7 +5502,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, return -EINVAL; } if (!map_value_has_kptrs(map_ptr)) { - ret = PTR_ERR(map_ptr->kptr_off_tab); + ret = PTR_ERR_OR_ZERO(map_ptr->kptr_off_tab); if (ret == -E2BIG) verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name, BPF_MAP_VALUE_OFF_MAX); @@ -5378,12 +5528,6 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, return 0; } -static bool arg_type_is_mem_ptr(enum bpf_arg_type type) -{ - return base_type(type) == ARG_PTR_TO_MEM || - base_type(type) == ARG_PTR_TO_UNINIT_MEM; -} - static bool arg_type_is_mem_size(enum bpf_arg_type type) { return type == ARG_CONST_SIZE || @@ -5406,6 +5550,11 @@ static bool arg_type_is_release(enum bpf_arg_type type) return type & OBJ_RELEASE; } +static bool arg_type_is_dynptr(enum bpf_arg_type type) +{ + return base_type(type) == ARG_PTR_TO_DYNPTR; +} + static int int_ptr_type_to_size(enum bpf_arg_type type) { if (type == ARG_PTR_TO_INT) @@ -5523,7 +5672,6 @@ static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } } static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, - [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types, [ARG_CONST_SIZE] = &scalar_types, [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, @@ -5537,7 +5685,6 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, - [ARG_PTR_TO_UNINIT_MEM] = &mem_types, [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, @@ -5547,6 +5694,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, [ARG_PTR_TO_KPTR] = &kptr_types, + [ARG_PTR_TO_DYNPTR] = &stack_ptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -5636,8 +5784,13 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, bool fixed_off_ok = false; switch ((u32)type) { - case SCALAR_VALUE: /* Pointer types where reg offset is explicitly allowed: */ + case PTR_TO_STACK: + if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) { + verbose(env, "cannot pass in dynptr at an offset\n"); + return -EINVAL; + } + fallthrough; case PTR_TO_PACKET: case PTR_TO_PACKET_META: case PTR_TO_MAP_KEY: @@ -5647,7 +5800,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_MEM | MEM_ALLOC: case PTR_TO_BUF: case PTR_TO_BUF | MEM_RDONLY: - case PTR_TO_STACK: + case SCALAR_VALUE: /* Some of the argument types nevertheless require a * zero register offset. */ @@ -5679,6 +5832,14 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, return __check_ptr_off_reg(env, reg, regno, fixed_off_ok); } +static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + + return state->stack[spi].spilled_ptr.id; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn) @@ -5711,8 +5872,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, return -EACCES; } - if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || - base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { + if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) { err = resolve_map_arg_type(env, meta, &arg_type); if (err) return err; @@ -5734,7 +5894,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, skip_type_check: if (arg_type_is_release(arg_type)) { - if (!reg->ref_obj_id && !register_is_null(reg)) { + if (arg_type_is_dynptr(arg_type)) { + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || + !state->stack[spi].spilled_ptr.id) { + verbose(env, "arg %d is an unacquired reference\n", regno); + return -EINVAL; + } + } else if (!reg->ref_obj_id && !register_is_null(reg)) { verbose(env, "R%d must be referenced when passed to release function\n", regno); return -EINVAL; @@ -5798,8 +5967,7 @@ skip_type_check: err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, false, NULL); - } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || - base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { + } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) { if (type_may_be_null(arg_type) && register_is_null(reg)) return 0; @@ -5811,7 +5979,7 @@ skip_type_check: verbose(env, "invalid map_ptr to access map->value\n"); return -EACCES; } - meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); + meta->raw_mode = arg_type & MEM_UNINIT; err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, false, meta); @@ -5838,15 +6006,49 @@ skip_type_check: return -EACCES; } else if (arg_type == ARG_PTR_TO_FUNC) { meta->subprogno = reg->subprogno; - } else if (arg_type_is_mem_ptr(arg_type)) { + } else if (base_type(arg_type) == ARG_PTR_TO_MEM) { /* The access to this pointer is only checked when we hit the * next is_mem_size argument below. */ - meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM); + meta->raw_mode = arg_type & MEM_UNINIT; } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta); + } else if (arg_type_is_dynptr(arg_type)) { + if (arg_type & MEM_UNINIT) { + if (!is_dynptr_reg_valid_uninit(env, reg)) { + verbose(env, "Dynptr has to be an uninitialized dynptr\n"); + return -EINVAL; + } + + /* We only support one dynptr being uninitialized at the moment, + * which is sufficient for the helper functions we have right now. + */ + if (meta->uninit_dynptr_regno) { + verbose(env, "verifier internal error: multiple uninitialized dynptr args\n"); + return -EFAULT; + } + + meta->uninit_dynptr_regno = regno; + } else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) { + const char *err_extra = ""; + + switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { + case DYNPTR_TYPE_LOCAL: + err_extra = "local "; + break; + case DYNPTR_TYPE_RINGBUF: + err_extra = "ringbuf "; + break; + default: + break; + } + + verbose(env, "Expected an initialized %sdynptr as arg #%d\n", + err_extra, arg + 1); + return -EINVAL; + } } else if (arg_type_is_alloc_size(arg_type)) { if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d is not a known constant'\n", @@ -5968,7 +6170,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_RINGBUF: if (func_id != BPF_FUNC_ringbuf_output && func_id != BPF_FUNC_ringbuf_reserve && - func_id != BPF_FUNC_ringbuf_query) + func_id != BPF_FUNC_ringbuf_query && + func_id != BPF_FUNC_ringbuf_reserve_dynptr && + func_id != BPF_FUNC_ringbuf_submit_dynptr && + func_id != BPF_FUNC_ringbuf_discard_dynptr) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -6084,6 +6289,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_ringbuf_output: case BPF_FUNC_ringbuf_reserve: case BPF_FUNC_ringbuf_query: + case BPF_FUNC_ringbuf_reserve_dynptr: + case BPF_FUNC_ringbuf_submit_dynptr: + case BPF_FUNC_ringbuf_discard_dynptr: if (map->map_type != BPF_MAP_TYPE_RINGBUF) goto error; break; @@ -6138,6 +6346,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) goto error; break; + case BPF_FUNC_map_lookup_percpu_elem: + if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && + map->map_type != BPF_MAP_TYPE_PERCPU_HASH && + map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) @@ -6189,10 +6403,8 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn) static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, enum bpf_arg_type arg_next) { - return (arg_type_is_mem_ptr(arg_curr) && - !arg_type_is_mem_size(arg_next)) || - (!arg_type_is_mem_ptr(arg_curr) && - arg_type_is_mem_size(arg_next)); + return (base_type(arg_curr) == ARG_PTR_TO_MEM) != + arg_type_is_mem_size(arg_next); } static bool check_arg_pair_ok(const struct bpf_func_proto *fn) @@ -6203,7 +6415,7 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) * helper function specification. */ if (arg_type_is_mem_size(fn->arg1_type) || - arg_type_is_mem_ptr(fn->arg5_type) || + base_type(fn->arg5_type) == ARG_PTR_TO_MEM || check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || @@ -6751,7 +6963,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, func_id != BPF_FUNC_map_pop_elem && func_id != BPF_FUNC_map_peek_elem && func_id != BPF_FUNC_for_each_map_elem && - func_id != BPF_FUNC_redirect_map) + func_id != BPF_FUNC_redirect_map && + func_id != BPF_FUNC_map_lookup_percpu_elem) return 0; if (map == NULL) { @@ -6975,9 +7188,27 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs = cur_regs(env); + if (meta.uninit_dynptr_regno) { + /* we write BPF_DW bits (8 bytes) at a time */ + for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) { + err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno, + i, BPF_DW, BPF_WRITE, -1, false); + if (err) + return err; + } + + err = mark_stack_slots_dynptr(env, ®s[meta.uninit_dynptr_regno], + fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1], + insn_idx); + if (err) + return err; + } + if (meta.release_regno) { err = -EINVAL; - if (meta.ref_obj_id) + if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) + err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); + else if (meta.ref_obj_id) err = release_reference(env, meta.ref_obj_id); /* meta.ref_obj_id can only be 0 if register that is meant to be * released is NULL, which must be > R0. @@ -7027,6 +7258,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_loop_callback_state); break; + case BPF_FUNC_dynptr_from_mem: + if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { + verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n", + reg_type_str(env, regs[BPF_REG_1].type)); + return -EACCES; + } } if (err) @@ -7155,6 +7392,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].id = id; /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = id; + } else if (func_id == BPF_FUNC_dynptr_data) { + int dynptr_id = 0, i; + + /* Find the id of the dynptr we're acquiring a reference to */ + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { + if (arg_type_is_dynptr(fn->arg_type[i])) { + if (dynptr_id) { + verbose(env, "verifier internal error: multiple dynptr args in func\n"); + return -EFAULT; + } + dynptr_id = stack_slot_get_id(env, ®s[BPF_REG_1 + i]); + } + } + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = dynptr_id; } do_refine_retval_range(regs, fn->ret_type, func_id, &meta); @@ -7237,7 +7489,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (!insn->imm) return 0; - desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off); + desc_btf = find_kfunc_desc_btf(env, insn->off); if (IS_ERR(desc_btf)) return PTR_ERR(desc_btf); @@ -13811,7 +14063,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem || insn->imm == BPF_FUNC_redirect_map || - insn->imm == BPF_FUNC_for_each_map_elem)) { + insn->imm == BPF_FUNC_for_each_map_elem || + insn->imm == BPF_FUNC_map_lookup_percpu_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -13860,6 +14113,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) bpf_callback_t callback_fn, void *callback_ctx, u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem, + (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL)); patch_map_ops_generic: switch (insn->imm) { @@ -13887,6 +14142,9 @@ patch_map_ops_generic: case BPF_FUNC_for_each_map_elem: insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); continue; + case BPF_FUNC_map_lookup_percpu_elem: + insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem); + continue; } goto patch_call_imm; diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 79f2eb617a62..fbdf8d3279ac 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -29,6 +29,7 @@ #include <linux/compiler.h> #include <linux/module.h> #include <linux/kernel.h> +#include <linux/bsearch.h> /* * These will be re-linked against their real values @@ -228,7 +229,6 @@ unsigned long kallsyms_lookup_name(const char *name) return module_kallsyms_lookup_name(name); } -#ifdef CONFIG_LIVEPATCH /* * Iterate over all symbols in vmlinux. For symbols from modules use * module_kallsyms_on_each_symbol instead. @@ -251,7 +251,6 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, } return 0; } -#endif /* CONFIG_LIVEPATCH */ static unsigned long get_symbol_pos(unsigned long addr, unsigned long *symbolsize, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f15b826f9899..10b157a6d73e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1091,6 +1091,21 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx) +{ + struct bpf_trace_run_ctx *run_ctx; + + run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); + return run_ctx->bpf_cookie; +} + +static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = { + .func = bpf_get_attach_cookie_tracing, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) { #ifndef CONFIG_X86 @@ -1182,6 +1197,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; + case BPF_FUNC_map_lookup_percpu_elem: + return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: @@ -1688,6 +1705,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_to_udp6_sock_proto; case BPF_FUNC_skc_to_unix_sock: return &bpf_skc_to_unix_sock_proto; + case BPF_FUNC_skc_to_mptcp_sock: + return &bpf_skc_to_mptcp_sock_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_tracing_proto; case BPF_FUNC_sk_storage_delete: @@ -1719,6 +1738,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; case BPF_FUNC_get_func_arg_cnt: return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; + case BPF_FUNC_get_attach_cookie: + return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL; default: fn = raw_tp_prog_func_proto(func_id, prog); if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) @@ -2229,6 +2250,59 @@ struct bpf_kprobe_multi_run_ctx { unsigned long entry_ip; }; +struct user_syms { + const char **syms; + char *buf; +}; + +static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt) +{ + unsigned long __user usymbol; + const char **syms = NULL; + char *buf = NULL, *p; + int err = -ENOMEM; + unsigned int i; + + syms = kvmalloc(cnt * sizeof(*syms), GFP_KERNEL); + if (!syms) + goto error; + + buf = kvmalloc(cnt * KSYM_NAME_LEN, GFP_KERNEL); + if (!buf) + goto error; + + for (p = buf, i = 0; i < cnt; i++) { + if (__get_user(usymbol, usyms + i)) { + err = -EFAULT; + goto error; + } + err = strncpy_from_user(p, (const char __user *) usymbol, KSYM_NAME_LEN); + if (err == KSYM_NAME_LEN) + err = -E2BIG; + if (err < 0) + goto error; + syms[i] = p; + p += err + 1; + } + + us->syms = syms; + us->buf = buf; + return 0; + +error: + if (err) { + kvfree(syms); + kvfree(buf); + } + return err; +} + +static void free_user_syms(struct user_syms *us) +{ + kvfree(us->syms); + kvfree(us->buf); +} + static void bpf_kprobe_multi_link_release(struct bpf_link *link) { struct bpf_kprobe_multi_link *kmulti_link; @@ -2349,53 +2423,12 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip, kprobe_multi_link_prog_run(link, entry_ip, regs); } -static int -kprobe_multi_resolve_syms(const void __user *usyms, u32 cnt, - unsigned long *addrs) +static int symbols_cmp(const void *a, const void *b) { - unsigned long addr, size; - const char __user **syms; - int err = -ENOMEM; - unsigned int i; - char *func; - - size = cnt * sizeof(*syms); - syms = kvzalloc(size, GFP_KERNEL); - if (!syms) - return -ENOMEM; + const char **str_a = (const char **) a; + const char **str_b = (const char **) b; - func = kmalloc(KSYM_NAME_LEN, GFP_KERNEL); - if (!func) - goto error; - - if (copy_from_user(syms, usyms, size)) { - err = -EFAULT; - goto error; - } - - for (i = 0; i < cnt; i++) { - err = strncpy_from_user(func, syms[i], KSYM_NAME_LEN); - if (err == KSYM_NAME_LEN) - err = -E2BIG; - if (err < 0) - goto error; - err = -EINVAL; - addr = kallsyms_lookup_name(func); - if (!addr) - goto error; - if (!kallsyms_lookup_size_offset(addr, &size, NULL)) - goto error; - addr = ftrace_location_range(addr, addr + size - 1); - if (!addr) - goto error; - addrs[i] = addr; - } - - err = 0; -error: - kvfree(syms); - kfree(func); - return err; + return strcmp(*str_a, *str_b); } int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) @@ -2441,7 +2474,15 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error; } } else { - err = kprobe_multi_resolve_syms(usyms, cnt, addrs); + struct user_syms us; + + err = copy_user_syms(&us, usyms, cnt); + if (err) + goto error; + + sort(us.syms, cnt, sizeof(*us.syms), symbols_cmp, NULL); + err = ftrace_lookup_symbols(us.syms, cnt, addrs); + free_user_syms(&us); if (err) goto error; } diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 89d9f994ebb0..aac63ca9c3d1 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -85,39 +85,31 @@ static void fprobe_exit_handler(struct rethook_node *rh, void *data, } NOKPROBE_SYMBOL(fprobe_exit_handler); +static int symbols_cmp(const void *a, const void *b) +{ + const char **str_a = (const char **) a; + const char **str_b = (const char **) b; + + return strcmp(*str_a, *str_b); +} + /* Convert ftrace location address from symbols */ static unsigned long *get_ftrace_locations(const char **syms, int num) { - unsigned long addr, size; unsigned long *addrs; - int i; /* Convert symbols to symbol address */ addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL); if (!addrs) return ERR_PTR(-ENOMEM); - for (i = 0; i < num; i++) { - addr = kallsyms_lookup_name(syms[i]); - if (!addr) /* Maybe wrong symbol */ - goto error; - - /* Convert symbol address to ftrace location. */ - if (!kallsyms_lookup_size_offset(addr, &size, NULL) || !size) - goto error; + /* ftrace_lookup_symbols expects sorted symbols */ + sort(syms, num, sizeof(*syms), symbols_cmp, NULL); - addr = ftrace_location_range(addr, addr + size - 1); - if (!addr) /* No dynamic ftrace there. */ - goto error; + if (!ftrace_lookup_symbols(syms, num, addrs)) + return addrs; - addrs[i] = addr; - } - - return addrs; - -error: kfree(addrs); - return ERR_PTR(-ENOENT); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index af899b058c8d..674add0aafb3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7964,3 +7964,65 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, mutex_unlock(&ftrace_lock); return ret; } + +static int symbols_cmp(const void *a, const void *b) +{ + const char **str_a = (const char **) a; + const char **str_b = (const char **) b; + + return strcmp(*str_a, *str_b); +} + +struct kallsyms_data { + unsigned long *addrs; + const char **syms; + size_t cnt; + size_t found; +}; + +static int kallsyms_callback(void *data, const char *name, + struct module *mod, unsigned long addr) +{ + struct kallsyms_data *args = data; + + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp)) + return 0; + + addr = ftrace_location(addr); + if (!addr) + return 0; + + args->addrs[args->found++] = addr; + return args->found == args->cnt ? 1 : 0; +} + +/** + * ftrace_lookup_symbols - Lookup addresses for array of symbols + * + * @sorted_syms: array of symbols pointers symbols to resolve, + * must be alphabetically sorted + * @cnt: number of symbols/addresses in @syms/@addrs arrays + * @addrs: array for storing resulting addresses + * + * This function looks up addresses for array of symbols provided in + * @syms array (must be alphabetically sorted) and stores them in + * @addrs array, which needs to be big enough to store at least @cnt + * addresses. + * + * This function returns 0 if all provided symbols are found, + * -ESRCH otherwise. + */ +int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs) +{ + struct kallsyms_data args; + int err; + + args.addrs = addrs; + args.syms = sorted_syms; + args.cnt = cnt; + args.found = 0; + err = kallsyms_on_each_symbol(kallsyms_callback, &args); + if (err < 0) + return err; + return args.found == args.cnt ? 0 : -ESRCH; +} diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index d0e54e30658a..e78dadfc5829 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -72,13 +72,16 @@ static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args) args->args[3], args->args[4]); } +extern const struct bpf_link_ops bpf_struct_ops_link_lops; + int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops; const struct btf_type *func_proto; struct bpf_dummy_ops_test_args *args; - struct bpf_tramp_progs *tprogs; + struct bpf_tramp_links *tlinks; + struct bpf_tramp_link *link = NULL; void *image = NULL; unsigned int op_idx; int prog_ret; @@ -92,8 +95,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, if (IS_ERR(args)) return PTR_ERR(args); - tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); - if (!tprogs) { + tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); + if (!tlinks) { err = -ENOMEM; goto out; } @@ -105,8 +108,17 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, } set_vm_flush_reset_perms(image); + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto out; + } + /* prog doesn't take the ownership of the reference from caller */ + bpf_prog_inc(prog); + bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog); + op_idx = prog->expected_attach_type; - err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + err = bpf_struct_ops_prepare_trampoline(tlinks, link, &st_ops->func_models[op_idx], image, image + PAGE_SIZE); if (err < 0) @@ -124,7 +136,9 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, out: kfree(args); bpf_jit_free_exec(image); - kfree(tprogs); + if (link) + bpf_link_put(&link->link); + kfree(tlinks); return err; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 9b5a1f630bb0..56f059b3c242 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -565,31 +565,36 @@ struct prog_test_ref_kfunc { int b; struct prog_test_member memb; struct prog_test_ref_kfunc *next; + refcount_t cnt; }; static struct prog_test_ref_kfunc prog_test_struct = { .a = 42, .b = 108, .next = &prog_test_struct, + .cnt = REFCOUNT_INIT(1), }; noinline struct prog_test_ref_kfunc * bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) { - /* randomly return NULL */ - if (get_jiffies_64() % 2) - return NULL; + refcount_inc(&prog_test_struct.cnt); return &prog_test_struct; } noinline struct prog_test_member * bpf_kfunc_call_memb_acquire(void) { - return &prog_test_struct.memb; + WARN_ON_ONCE(1); + return NULL; } noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) { + if (!p) + return; + + refcount_dec(&p->cnt); } noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p) @@ -598,12 +603,18 @@ noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p) noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) { + WARN_ON_ONCE(1); } noinline struct prog_test_ref_kfunc * -bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) +bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b) { - return &prog_test_struct; + struct prog_test_ref_kfunc *p = READ_ONCE(*pp); + + if (!p) + return NULL; + refcount_inc(&p->cnt); + return p; } struct prog_test_pass1 { diff --git a/net/core/filter.c b/net/core/filter.c index b741b9f7e6a9..5af58eb48587 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -78,6 +78,7 @@ #include <linux/btf_ids.h> #include <net/tls.h> #include <net/xdp.h> +#include <net/mptcp.h> static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -4498,6 +4499,7 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key if (unlikely(size != sizeof(struct bpf_tunnel_key))) { err = -EINVAL; switch (size) { + case offsetof(struct bpf_tunnel_key, local_ipv6[0]): case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_ext): goto set_compat; @@ -4523,10 +4525,14 @@ set_compat: if (flags & BPF_F_TUNINFO_IPV6) { memcpy(to->remote_ipv6, &info->key.u.ipv6.src, sizeof(to->remote_ipv6)); + memcpy(to->local_ipv6, &info->key.u.ipv6.dst, + sizeof(to->local_ipv6)); to->tunnel_label = be32_to_cpu(info->key.label); } else { to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3); + to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst); + memset(&to->local_ipv6[1], 0, sizeof(__u32) * 3); to->tunnel_label = 0; } @@ -4597,6 +4603,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, return -EINVAL; if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { + case offsetof(struct bpf_tunnel_key, local_ipv6[0]): case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_ext): case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): @@ -4639,10 +4646,13 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, info->mode |= IP_TUNNEL_INFO_IPV6; memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, sizeof(from->remote_ipv6)); + memcpy(&info->key.u.ipv6.src, from->local_ipv6, + sizeof(from->local_ipv6)); info->key.label = cpu_to_be32(from->tunnel_label) & IPV6_FLOWLABEL_MASK; } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); + info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4); } return 0; @@ -11272,6 +11282,20 @@ const struct bpf_func_proto bpf_skc_to_unix_sock_proto = { .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX], }; +BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk) +{ + BTF_TYPE_EMIT(struct mptcp_sock); + return (unsigned long)bpf_mptcp_sock_from_subflow(sk); +} + +const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = { + .func = bpf_skc_to_mptcp_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP], +}; + BPF_CALL_1(bpf_sock_from_file, struct file *, file) { return (unsigned long)sock_from_file(file); @@ -11314,6 +11338,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_skc_to_unix_sock: func = &bpf_skc_to_unix_sock_proto; break; + case BPF_FUNC_skc_to_mptcp_sock: + func = &bpf_skc_to_mptcp_sock_proto; + break; case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: diff --git a/net/core/skmsg.c b/net/core/skmsg.c index cc381165ea08..22b983ade0e7 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -524,16 +524,20 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, { int num_sge, copied; - /* skb linearize may fail with ENOMEM, but lets simply try again - * later if this happens. Under memory pressure we don't want to - * drop the skb. We need to linearize the skb so that the mapping - * in skb_to_sgvec can not error. - */ - if (skb_linearize(skb)) - return -EAGAIN; num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); - if (unlikely(num_sge < 0)) - return num_sge; + if (num_sge < 0) { + /* skb linearize may fail with ENOMEM, but lets simply try again + * later if this happens. Under memory pressure we don't want to + * drop the skb. We need to linearize the skb so that the mapping + * in skb_to_sgvec can not error. + */ + if (skb_linearize(skb)) + return -EAGAIN; + + num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); + if (unlikely(num_sge < 0)) + return num_sge; + } copied = len; msg->sg.start = 0; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index ca8d38325e1e..71a13596ea2b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -264,6 +264,8 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, loff_t *ppos) { int ret, jit_enable = *(int *)table->data; + int min = *(int *)table->extra1; + int max = *(int *)table->extra2; struct ctl_table tmp = *table; if (write && !capable(CAP_SYS_ADMIN)) @@ -281,6 +283,10 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, ret = -EPERM; } } + + if (write && ret && min == max) + pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); + return ret; } diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index cb7f53f6ab22..6e7df47c9584 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -10,3 +10,5 @@ obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o mptcp_crypto_test-objs := crypto_test.o mptcp_token_test-objs := token_test.o obj-$(CONFIG_MPTCP_KUNIT_TEST) += mptcp_crypto_test.o mptcp_token_test.o + +obj-$(CONFIG_BPF_SYSCALL) += bpf.o diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c new file mode 100644 index 000000000000..5a0a84ad94af --- /dev/null +++ b/net/mptcp/bpf.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Multipath TCP + * + * Copyright (c) 2020, Tessares SA. + * Copyright (c) 2022, SUSE. + * + * Author: Nicolas Rybowski <nicolas.rybowski@tessares.net> + */ + +#define pr_fmt(fmt) "MPTCP: " fmt + +#include <linux/bpf.h> +#include "protocol.h" + +struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) +{ + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) + return mptcp_sk(mptcp_subflow_ctx(sk)->conn); + + return NULL; +} diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 8fff5ad3444b..03e3d3529ac9 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -369,16 +369,15 @@ VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) ifeq ($(VMLINUX_H),) +ifeq ($(VMLINUX_BTF),) + $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\ + build the kernel or set VMLINUX_BTF or VMLINUX_H variable) +endif $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ else $(Q)cp "$(VMLINUX_H)" $@ endif -ifeq ($(VMLINUX_BTF),) - $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\ - build the kernel or set VMLINUX_BTF variable) -endif - clean-files += vmlinux.h # Get Clang's default includes on this system, as opposed to those seen by diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 05a24a712d7d..08f5331d2b00 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -17,7 +17,7 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n" #include <getopt.h> #include <net/if.h> #include <time.h> - +#include <limits.h> #include <arpa/inet.h> #include <linux/if_link.h> @@ -43,6 +43,9 @@ static struct bpf_map *rx_queue_index_map; #define EXIT_FAIL_BPF 4 #define EXIT_FAIL_MEM 5 +#define FAIL_MEM_SIG INT_MAX +#define FAIL_STAT_SIG (INT_MAX - 1) + static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, {"dev", required_argument, NULL, 'd' }, @@ -76,6 +79,12 @@ static void int_exit(int sig) printf("program on interface changed, not removing\n"); } } + + if (sig == FAIL_MEM_SIG) + exit(EXIT_FAIL_MEM); + else if (sig == FAIL_STAT_SIG) + exit(EXIT_FAIL); + exit(EXIT_OK); } @@ -140,7 +149,8 @@ static char* options2str(enum cfg_options_flags flag) if (flag & READ_MEM) return "read"; fprintf(stderr, "ERR: Unknown config option flags"); - exit(EXIT_FAIL); + int_exit(FAIL_STAT_SIG); + return "unknown"; } static void usage(char *argv[]) @@ -173,7 +183,7 @@ static __u64 gettime(void) res = clock_gettime(CLOCK_MONOTONIC, &t); if (res < 0) { fprintf(stderr, "Error with gettimeofday! (%i)\n", res); - exit(EXIT_FAIL); + int_exit(FAIL_STAT_SIG); } return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; } @@ -201,7 +211,7 @@ static struct datarec *alloc_record_per_cpu(void) array = calloc(nr_cpus, sizeof(struct datarec)); if (!array) { fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); - exit(EXIT_FAIL_MEM); + int_exit(FAIL_MEM_SIG); } return array; } @@ -214,7 +224,7 @@ static struct record *alloc_record_per_rxq(void) array = calloc(nr_rxqs, sizeof(struct record)); if (!array) { fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs); - exit(EXIT_FAIL_MEM); + int_exit(FAIL_MEM_SIG); } return array; } @@ -228,7 +238,7 @@ static struct stats_record *alloc_stats_record(void) rec = calloc(1, sizeof(struct stats_record)); if (!rec) { fprintf(stderr, "Mem alloc error\n"); - exit(EXIT_FAIL_MEM); + int_exit(FAIL_MEM_SIG); } rec->rxq = alloc_record_per_rxq(); for (i = 0; i < nr_rxqs; i++) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 096625242475..855b937e7585 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -633,6 +633,8 @@ class PrinterHelpers(Printer): 'struct socket', 'struct file', 'struct bpf_timer', + 'struct mptcp_sock', + 'struct bpf_dynptr', ] known_types = { '...', @@ -682,6 +684,8 @@ class PrinterHelpers(Printer): 'struct socket', 'struct file', 'struct bpf_timer', + 'struct mptcp_sock', + 'struct bpf_dynptr', } mapped_types = { 'u8': '__u8', diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index a2c665beda87..7e6accb9d9f7 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -459,6 +459,51 @@ done: return err; } +static const char sysfs_vmlinux[] = "/sys/kernel/btf/vmlinux"; + +static struct btf *get_vmlinux_btf_from_sysfs(void) +{ + struct btf *base; + + base = btf__parse(sysfs_vmlinux, NULL); + if (libbpf_get_error(base)) { + p_err("failed to parse vmlinux BTF at '%s': %ld\n", + sysfs_vmlinux, libbpf_get_error(base)); + base = NULL; + } + + return base; +} + +#define BTF_NAME_BUFF_LEN 64 + +static bool btf_is_kernel_module(__u32 btf_id) +{ + struct bpf_btf_info btf_info = {}; + char btf_name[BTF_NAME_BUFF_LEN]; + int btf_fd; + __u32 len; + int err; + + btf_fd = bpf_btf_get_fd_by_id(btf_id); + if (btf_fd < 0) { + p_err("can't get BTF object by id (%u): %s", btf_id, strerror(errno)); + return false; + } + + len = sizeof(btf_info); + btf_info.name = ptr_to_u64(btf_name); + btf_info.name_len = sizeof(btf_name); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + close(btf_fd); + if (err) { + p_err("can't get BTF (ID %u) object info: %s", btf_id, strerror(errno)); + return false; + } + + return btf_info.kernel_btf && strncmp(btf_name, "vmlinux", sizeof(btf_name)) != 0; +} + static int do_dump(int argc, char **argv) { struct btf *btf = NULL, *base = NULL; @@ -536,18 +581,11 @@ static int do_dump(int argc, char **argv) NEXT_ARG(); } else if (is_prefix(src, "file")) { const char sysfs_prefix[] = "/sys/kernel/btf/"; - const char sysfs_vmlinux[] = "/sys/kernel/btf/vmlinux"; if (!base_btf && strncmp(*argv, sysfs_prefix, sizeof(sysfs_prefix) - 1) == 0 && - strcmp(*argv, sysfs_vmlinux) != 0) { - base = btf__parse(sysfs_vmlinux, NULL); - if (libbpf_get_error(base)) { - p_err("failed to parse vmlinux BTF at '%s': %ld\n", - sysfs_vmlinux, libbpf_get_error(base)); - base = NULL; - } - } + strcmp(*argv, sysfs_vmlinux) != 0) + base = get_vmlinux_btf_from_sysfs(); btf = btf__parse_split(*argv, base ?: base_btf); err = libbpf_get_error(btf); @@ -591,6 +629,12 @@ static int do_dump(int argc, char **argv) } if (!btf) { + if (!base_btf && btf_is_kernel_module(btf_id)) { + p_info("Warning: valid base BTF was not specified with -B option, falling back to standard base BTF (%s)", + sysfs_vmlinux); + base_btf = get_vmlinux_btf_from_sysfs(); + } + btf = btf__load_from_kernel_by_id_split(btf_id, base_btf); err = libbpf_get_error(btf); if (err) { diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index be130e35462f..d12f46051aac 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -638,7 +638,7 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix, res = probe_map_type_ifindex(map_type, ifindex); } else { - res = libbpf_probe_bpf_map_type(map_type, NULL); + res = libbpf_probe_bpf_map_type(map_type, NULL) > 0; } /* Probe result depends on the success of map creation, no additional @@ -690,7 +690,7 @@ probe_helper_ifindex(enum bpf_func_id id, enum bpf_prog_type prog_type, return res; } -static void +static bool probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, const char *define_prefix, unsigned int id, const char *ptype_name, __u32 ifindex) @@ -701,7 +701,7 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, if (ifindex) res = probe_helper_ifindex(id, prog_type, ifindex); else - res = libbpf_probe_bpf_helper(prog_type, id, NULL); + res = libbpf_probe_bpf_helper(prog_type, id, NULL) > 0; #ifdef USE_LIBCAP /* Probe may succeed even if program load fails, for * unprivileged users check that we did not fail because of @@ -723,6 +723,8 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, if (res) printf("\n\t- %s", helper_name[id]); } + + return res; } static void @@ -732,6 +734,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, const char *ptype_name = prog_type_name[prog_type]; char feat_name[128]; unsigned int id; + bool probe_res = false; if (ifindex) /* Only test helpers for offload-able program types */ @@ -764,7 +767,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, continue; /* fallthrough */ default: - probe_helper_for_progtype(prog_type, supported_type, + probe_res |= probe_helper_for_progtype(prog_type, supported_type, define_prefix, id, ptype_name, ifindex); } @@ -772,8 +775,17 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, if (json_output) jsonw_end_array(json_wtr); - else if (!define_prefix) + else if (!define_prefix) { printf("\n"); + if (!probe_res) { + if (!supported_type) + printf("\tProgram type not supported\n"); + else + printf("\tCould not determine which helpers are available\n"); + } + } + + } static void diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 7678af364793..4c9477ff748d 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -549,6 +549,7 @@ static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name) printf("\tint fd = skel_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name); break; case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: if (bpf_program__expected_attach_type(prog) == BPF_TRACE_ITER) printf("\tint fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);\n"); else @@ -999,7 +1000,7 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\ - /* THIS FILE IS AUTOGENERATED! */ \n\ + /* THIS FILE IS AUTOGENERATED BY BPFTOOL! */ \n\ #ifndef %2$s \n\ #define %2$s \n\ \n\ @@ -1015,7 +1016,7 @@ static int do_skeleton(int argc, char **argv) \n\ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\ \n\ - /* THIS FILE IS AUTOGENERATED! */ \n\ + /* THIS FILE IS AUTOGENERATED BY BPFTOOL! */ \n\ #ifndef %2$s \n\ #define %2$s \n\ \n\ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 8fb0116f9136..6353a789322b 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -23,6 +23,7 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_XDP] = "xdp", [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", + [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", }; static struct hashmap *link_table; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 444fe6f1cf35..f4009dbdf62d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1013,6 +1013,7 @@ enum bpf_link_type { BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, + BPF_LINK_TYPE_STRUCT_OPS = 9, MAX_BPF_LINK_TYPE, }; @@ -1489,6 +1490,15 @@ union bpf_attr { __aligned_u64 addrs; __aligned_u64 cookies; } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; }; } link_create; @@ -5154,6 +5164,91 @@ union bpf_attr { * if not NULL, is a reference which must be released using its * corresponding release function, or moved into a BPF map before * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. + * + * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr. + * + * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5351,6 +5446,15 @@ union bpf_attr { FN(skb_set_tstamp), \ FN(ima_file_hash), \ FN(kptr_xchg), \ + FN(map_lookup_percpu_elem), \ + FN(skc_to_mptcp_sock), \ + FN(dynptr_from_mem), \ + FN(ringbuf_reserve_dynptr), \ + FN(ringbuf_submit_dynptr), \ + FN(ringbuf_discard_dynptr), \ + FN(dynptr_read), \ + FN(dynptr_write), \ + FN(dynptr_data), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5604,6 +5708,10 @@ struct bpf_tunnel_key { __u8 tunnel_ttl; __u16 tunnel_ext; /* Padding, future use. */ __u32 tunnel_label; + union { + __u32 local_ipv4; + __u32 local_ipv6[4]; + }; }; /* user accessible mirror of in-kernel xfrm_state. @@ -6498,6 +6606,11 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 64741c55b8e3..a1265b152027 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -127,7 +127,7 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a9d292c106c2..240186aac8e6 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -208,86 +208,6 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err_errno(fd); } -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) -{ - LIBBPF_OPTS(bpf_map_create_opts, p); - - p.map_flags = create_attr->map_flags; - p.numa_node = create_attr->numa_node; - p.btf_fd = create_attr->btf_fd; - p.btf_key_type_id = create_attr->btf_key_type_id; - p.btf_value_type_id = create_attr->btf_value_type_id; - p.map_ifindex = create_attr->map_ifindex; - if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) - p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id; - else - p.inner_map_fd = create_attr->inner_map_fd; - - return bpf_map_create(create_attr->map_type, create_attr->name, - create_attr->key_size, create_attr->value_size, - create_attr->max_entries, &p); -} - -int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags, int node) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts); - - opts.map_flags = map_flags; - if (node >= 0) { - opts.numa_node = node; - opts.map_flags |= BPF_F_NUMA_NODE; - } - - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - - return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags, int node) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts); - - opts.inner_map_fd = inner_map_fd; - opts.map_flags = map_flags; - if (node >= 0) { - opts.map_flags |= BPF_F_NUMA_NODE; - opts.numa_node = node; - } - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); -} - -int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, - .inner_map_fd = inner_map_fd, - .map_flags = map_flags, - ); - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); -} - static void * alloc_zero_tailing_info(const void *orecord, __u32 cnt, __u32 actual_rec_size, __u32 expected_rec_size) @@ -639,6 +559,20 @@ int bpf_map_delete_elem(int fd, const void *key) return libbpf_err_errno(ret); } +int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags) +{ + union bpf_attr attr; + int ret; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.flags = flags; + + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + return libbpf_err_errno(ret); +} + int bpf_map_get_next_key(int fd, const void *key, void *next_key) { union bpf_attr attr; @@ -863,6 +797,14 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, kprobe_multi)) return libbpf_err(-EINVAL); break; + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + case BPF_LSM_MAC: + attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0); + if (!OPTS_ZEROED(opts, tracing)) + return libbpf_err(-EINVAL); + break; default: if (!OPTS_ZEROED(opts, flags)) return libbpf_err(-EINVAL); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index f4b4afb6d4ba..cabc03703e29 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -61,48 +61,6 @@ LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts); -struct bpf_create_map_attr { - const char *name; - enum bpf_map_type map_type; - __u32 map_flags; - __u32 key_size; - __u32 value_size; - __u32 max_entries; - __u32 numa_node; - __u32 btf_fd; - __u32 btf_key_type_id; - __u32 btf_value_type_id; - __u32 map_ifindex; - union { - __u32 inner_map_fd; - __u32 btf_vmlinux_value_type_id; - }; -}; - -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags); - struct bpf_prog_load_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -244,6 +202,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); +LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd); @@ -420,6 +379,9 @@ struct bpf_link_create_opts { const unsigned long *addrs; const __u64 *cookies; } kprobe_multi; + struct { + __u64 cookie; + } tracing; }; size_t :0; }; diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index e4aa9996a550..fd48b1ff59ca 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -110,21 +110,50 @@ enum bpf_enum_value_kind { val; \ }) +#define ___bpf_field_ref1(field) (field) +#define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field) +#define ___bpf_field_ref(args...) \ + ___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args) + /* * Convenience macro to check that field actually exists in target kernel's. * Returns: * 1, if matching field is present in target kernel; * 0, if no matching field found. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_exists(p->my_field); + * - field reference through type and field names: + * bpf_core_field_exists(struct my_type, my_field). */ -#define bpf_core_field_exists(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) +#define bpf_core_field_exists(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_EXISTS) /* * Convenience macro to get the byte size of a field. Works for integers, * struct/unions, pointers, arrays, and enums. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_size(p->my_field); + * - field reference through type and field names: + * bpf_core_field_size(struct my_type, my_field). + */ +#define bpf_core_field_size(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE) + +/* + * Convenience macro to get field's byte offset. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_offset(p->my_field); + * - field reference through type and field names: + * bpf_core_field_offset(struct my_type, my_field). */ -#define bpf_core_field_size(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) +#define bpf_core_field_offset(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_OFFSET) /* * Convenience macro to get BTF type ID of a specified type, using a local BTF diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 5de3eb267125..fb04eaf367f1 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -76,6 +76,30 @@ #endif /* + * Compiler (optimization) barrier. + */ +#ifndef barrier +#define barrier() asm volatile("" ::: "memory") +#endif + +/* Variable-specific compiler (optimization) barrier. It's a no-op which makes + * compiler believe that there is some black box modification of a given + * variable and thus prevents compiler from making extra assumption about its + * value and potential simplifications and optimizations on this variable. + * + * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of + * a variable, making some code patterns unverifiable. Putting barrier_var() + * in place will ensure that cast is performed before the barrier_var() + * invocation, because compiler has to pessimistically assume that embedded + * asm section might perform some extra operations on that variable. + * + * This is a variable-specific variant of more global barrier(). + */ +#ifndef barrier_var +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) +#endif + +/* * Helper macro to throw a compilation error if __bpf_unreachable() gets * built into the resulting code. This works given BPF back end does not * implement __builtin_trap(). This is useful to assert that certain paths @@ -149,13 +173,8 @@ enum libbpf_tristate { #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) -#if __has_attribute(btf_type_tag) #define __kptr __attribute__((btf_type_tag("kptr"))) #define __kptr_ref __attribute__((btf_type_tag("kptr_ref"))) -#else -#define __kptr -#define __kptr_ref -#endif #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 73a5192defb3..e89cc9c885b3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -357,6 +357,7 @@ enum libbpf_map_type { }; struct bpf_map { + struct bpf_object *obj; char *name; /* real_name is defined for special internal maps (.rodata*, * .data*, .bss, .kconfig) and preserves their original ELF section @@ -386,7 +387,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; - bool skipped; + bool autocreate; __u64 map_extra; }; @@ -1433,36 +1434,21 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) { - struct bpf_map *new_maps; - size_t new_cap; - int i; - - if (obj->nr_maps < obj->maps_cap) - return &obj->maps[obj->nr_maps++]; - - new_cap = max((size_t)4, obj->maps_cap * 3 / 2); - new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps)); - if (!new_maps) { - pr_warn("alloc maps for object failed\n"); - return ERR_PTR(-ENOMEM); - } + struct bpf_map *map; + int err; - obj->maps_cap = new_cap; - obj->maps = new_maps; + err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, + sizeof(*obj->maps), obj->nr_maps + 1); + if (err) + return ERR_PTR(err); - /* zero out new maps */ - memset(obj->maps + obj->nr_maps, 0, - (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps)); - /* - * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin) - * when failure (zclose won't close negative fd)). - */ - for (i = obj->nr_maps; i < obj->maps_cap; i++) { - obj->maps[i].fd = -1; - obj->maps[i].inner_map_fd = -1; - } + map = &obj->maps[obj->nr_maps++]; + map->obj = obj; + map->fd = -1; + map->inner_map_fd = -1; + map->autocreate = true; - return &obj->maps[obj->nr_maps++]; + return map; } static size_t bpf_map_mmap_sz(const struct bpf_map *map) @@ -4324,6 +4310,20 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) return 0; } +bool bpf_map__autocreate(const struct bpf_map *map) +{ + return map->autocreate; +} + +int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) +{ + if (map->obj->loaded) + return libbpf_err(-EBUSY); + + map->autocreate = autocreate; + return 0; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; @@ -4943,6 +4943,42 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); +static bool is_pow_of_2(size_t x) +{ + return x && (x & (x - 1)); +} + +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); @@ -4981,6 +5017,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } switch (def->type) { + case BPF_MAP_TYPE_RINGBUF: + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + /* fallthrough */ case BPF_MAP_TYPE_PERF_EVENT_ARRAY: case BPF_MAP_TYPE_CGROUP_ARRAY: case BPF_MAP_TYPE_STACK_TRACE: @@ -4994,7 +5033,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: - case BPF_MAP_TYPE_RINGBUF: create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -5180,9 +5218,11 @@ bpf_object__create_maps(struct bpf_object *obj) * bpf_object loading will succeed just fine even on old * kernels. */ - if (bpf_map__is_internal(map) && - !kernel_supports(obj, FEAT_GLOBAL_DATA)) { - map->skipped = true; + if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) + map->autocreate = false; + + if (!map->autocreate) { + pr_debug("map '%s': skipped auto-creating...\n", map->name); continue; } @@ -5805,6 +5845,36 @@ out: return err; } +/* base map load ldimm64 special constant, used also for log fixup logic */ +#define MAP_LDIMM64_POISON_BASE 2001000000 +#define MAP_LDIMM64_POISON_PFX "200100" + +static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, + int insn_idx, struct bpf_insn *insn, + int map_idx, const struct bpf_map *map) +{ + int i; + + pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", + prog->name, relo_idx, insn_idx, map_idx, map->name); + + /* we turn single ldimm64 into two identical invalid calls */ + for (i = 0; i < 2; i++) { + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with something like: + * invalid func unknown#2001000123 + * where lower 123 is map index into obj->maps[] array + */ + insn->imm = MAP_LDIMM64_POISON_BASE + map_idx; + + insn++; + } +} + /* Relocate data references within program code: * - map references; * - global variable references; @@ -5818,33 +5888,35 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + const struct bpf_map *map; struct extern_desc *ext; switch (relo->type) { case RELO_LD64: + map = &obj->maps[relo->map_idx]; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX; insn[0].imm = relo->map_idx; - } else { + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_FD; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_DATA: + map = &obj->maps[relo->map_idx]; insn[1].imm = insn[0].imm + relo->sym_off; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; - } else { - const struct bpf_map *map = &obj->maps[relo->map_idx]; - - if (map->skipped) { - pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", - prog->name, i); - return -ENOTSUP; - } + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_EXTERN_VAR: @@ -6682,17 +6754,32 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; - if (def & SEC_DEPRECATED) + if (def & SEC_DEPRECATED) { pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n", prog->sec_name); + } - if ((prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_LSM || - prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { + if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; const char *attach_name; - attach_name = strchr(prog->sec_name, '/') + 1; + attach_name = strchr(prog->sec_name, '/'); + if (!attach_name) { + /* if BPF program is annotated with just SEC("fentry") + * (or similar) without declaratively specifying + * target, then it is expected that target will be + * specified with bpf_program__set_attach_target() at + * runtime before BPF object load step. If not, then + * there is nothing to load into the kernel as BPF + * verifier won't be able to validate BPF program + * correctness anyways. + */ + pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", + prog->name); + return -EINVAL; + } + attach_name++; /* skip over / */ + err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); if (err) return err; @@ -6773,6 +6860,8 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog prog->name, err); return err; } + insns = prog->insns; + insns_cnt = prog->insns_cnt; } if (obj->gen_loader) { @@ -6784,7 +6873,7 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog } retry_load: - /* if log_level is zero, we don't request logs initiallly even if + /* if log_level is zero, we don't request logs initially even if * custom log_buf is specified; if the program load fails, then we'll * bump log_level to 1 and use either custom log_buf or we'll allocate * our own and retry the load to get details on what failed @@ -6947,7 +7036,7 @@ static void fixup_log_failed_core_relo(struct bpf_program *prog, const struct bpf_core_relo *relo; struct bpf_core_spec spec; char patch[512], spec_buf[256]; - int insn_idx, err; + int insn_idx, err, spec_len; if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) return; @@ -6960,11 +7049,44 @@ static void fixup_log_failed_core_relo(struct bpf_program *prog, if (err) return; - bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); + spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); snprintf(patch, sizeof(patch), "%d: <invalid CO-RE relocation>\n" - "failed to resolve CO-RE relocation %s\n", - insn_idx, spec_buf); + "failed to resolve CO-RE relocation %s%s\n", + insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_log_missing_map_load(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#2001000345 + * line2 -> invalid func unknown#2001000345 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. + * "345" in "2001000345" are map index in obj->maps to fetch map name. + */ + struct bpf_object *obj = prog->obj; + const struct bpf_map *map; + int insn_idx, map_idx; + char patch[128]; + + if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) + return; + + map_idx -= MAP_LDIMM64_POISON_BASE; + if (map_idx < 0 || map_idx >= obj->nr_maps) + return; + map = &obj->maps[map_idx]; + + snprintf(patch, sizeof(patch), + "%d: <invalid BPF map reference>\n" + "BPF map '%s' is referenced but wasn't created\n", + insn_idx, map->name); patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); } @@ -6997,6 +7119,14 @@ static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_s fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, prev_line, cur_line, next_line); return; + } else if (str_has_pfx(cur_line, "invalid func unknown#"MAP_LDIMM64_POISON_PFX)) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; } } } @@ -8185,7 +8315,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) char *pin_path = NULL; char buf[PATH_MAX]; - if (map->skipped) + if (!map->autocreate) continue; if (path) { @@ -8660,6 +8790,26 @@ size_t bpf_program__insn_cnt(const struct bpf_program *prog) return prog->insns_cnt; } +int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt) +{ + struct bpf_insn *insns; + + if (prog->obj->loaded) + return -EBUSY; + + insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", prog->name); + return -ENOMEM; + } + memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); + + prog->insns = insns; + prog->insns_cnt = new_insn_cnt; + return 0; +} + int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { @@ -8853,34 +9003,34 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), - SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), - SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), - SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED), SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fexit/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry.s/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fmod_ret.s/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fexit.s/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), - SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), - SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), - SEC_DEF("iter.s/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), + SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), + SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), + SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), @@ -9799,6 +9949,110 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) return libbpf_err_ptr(-ENOTSUP); } +static int validate_map_op(const struct bpf_map *map, size_t key_sz, + size_t value_sz, bool check_value_sz) +{ + if (map->fd <= 0) + return -ENOENT; + + if (map->def.key_size != key_sz) { + pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", + map->name, key_sz, map->def.key_size); + return -EINVAL; + } + + if (!check_value_sz) + return 0; + + switch (map->def.type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { + int num_cpu = libbpf_num_possible_cpus(); + size_t elem_sz = roundup(map->def.value_size, 8); + + if (value_sz != num_cpu * elem_sz) { + pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", + map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); + return -EINVAL; + } + break; + } + default: + if (map->def.value_size != value_sz) { + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", + map->name, value_sz, map->def.value_size); + return -EINVAL; + } + break; + } + return 0; +} + +int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_lookup_elem_flags(map->fd, key, value, flags); +} + +int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_update_elem(map->fd, key, value, flags); +} + +int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) + return libbpf_err(err); + + return bpf_map_delete_elem_flags(map->fd, key, flags); +} + +int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); +} + +int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) + return libbpf_err(err); + + return bpf_map_get_next_key(map->fd, cur_key, next_key); +} + long libbpf_get_error(const void *ptr) { if (!IS_ERR_OR_NULL(ptr)) @@ -10595,6 +10849,12 @@ static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf char *func; int n; + *link = NULL; + + /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ + if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) + return 0; + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); if (opts.retprobe) func_name = prog->sec_name + sizeof("kretprobe/") - 1; @@ -10625,6 +10885,13 @@ static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, stru char *pattern; int n; + *link = NULL; + + /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ + if (strcmp(prog->sec_name, "kprobe.multi") == 0 || + strcmp(prog->sec_name, "kretprobe.multi") == 0) + return 0; + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); if (opts.retprobe) spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; @@ -11325,6 +11592,12 @@ static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_lin { char *sec_name, *tp_cat, *tp_name; + *link = NULL; + + /* no auto-attach for SEC("tp") or SEC("tracepoint") */ + if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) + return 0; + sec_name = strdup(prog->sec_name); if (!sec_name) return -ENOMEM; @@ -11380,20 +11653,34 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { static const char *const prefixes[] = { - "raw_tp/", - "raw_tracepoint/", - "raw_tp.w/", - "raw_tracepoint.w/", + "raw_tp", + "raw_tracepoint", + "raw_tp.w", + "raw_tracepoint.w", }; size_t i; const char *tp_name = NULL; + *link = NULL; + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { - if (str_has_pfx(prog->sec_name, prefixes[i])) { - tp_name = prog->sec_name + strlen(prefixes[i]); - break; - } + size_t pfx_len; + + if (!str_has_pfx(prog->sec_name, prefixes[i])) + continue; + + pfx_len = strlen(prefixes[i]); + /* no auto-attach case of, e.g., SEC("raw_tp") */ + if (prog->sec_name[pfx_len] == '\0') + return 0; + + if (prog->sec_name[pfx_len] != '/') + continue; + + tp_name = prog->sec_name + pfx_len + 1; + break; } + if (!tp_name) { pr_warn("prog '%s': invalid section name '%s'\n", prog->name, prog->sec_name); @@ -11405,12 +11692,17 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf } /* Common logic for all BPF program types that attach to a btf_id */ -static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog) +static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) { + LIBBPF_OPTS(bpf_link_create_opts, link_opts); char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, pfd; + if (!OPTS_VALID(opts, bpf_trace_opts)) + return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); @@ -11423,7 +11715,8 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro link->detach = &bpf_link__detach_fd; /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ - pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), NULL); + link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); if (pfd < 0) { pfd = -errno; free(link); @@ -11437,12 +11730,18 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); +} + +struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) +{ + return bpf_program__attach_btf_id(prog, opts); } struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); } static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index cdbfee60ea3e..9e9a3fd3edd8 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -323,6 +323,24 @@ struct bpf_insn; * different. */ LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_insns()** can set BPF program's underlying + * BPF instructions. + * + * WARNING: This is a very advanced libbpf API and users need to know + * what they are doing. This should be used from prog_prepare_load_fn + * callback only. + * + * @param prog BPF program for which to return instructions + * @param new_insns a pointer to an array of BPF instructions + * @param new_insn_cnt number of `struct bpf_insn`'s that form + * specified BPF program + * @return 0, on success; negative error code, otherwise + */ +LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt); + /** * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s * that form specified BPF program. @@ -603,9 +621,21 @@ bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, LIBBPF_API struct bpf_link * bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, const char *tp_name); + +struct bpf_trace_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value fetchable through bpf_get_attach_cookie() */ + __u64 cookie; +}; +#define bpf_trace_opts__last_field cookie + LIBBPF_API struct bpf_link * bpf_program__attach_trace(const struct bpf_program *prog); LIBBPF_API struct bpf_link * +bpf_program__attach_trace_opts(const struct bpf_program *prog, const struct bpf_trace_opts *opts); + +LIBBPF_API struct bpf_link * bpf_program__attach_lsm(const struct bpf_program *prog); LIBBPF_API struct bpf_link * bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd); @@ -867,6 +897,28 @@ LIBBPF_API struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); /** + * @brief **bpf_map__set_autocreate()** sets whether libbpf has to auto-create + * BPF map during BPF object load phase. + * @param map the BPF map instance + * @param autocreate whether to create BPF map during BPF object load + * @return 0 on success; -EBUSY if BPF object was already loaded + * + * **bpf_map__set_autocreate()** allows to opt-out from libbpf auto-creating + * BPF map. By default, libbpf will attempt to create every single BPF map + * defined in BPF object file using BPF_MAP_CREATE command of bpf() syscall + * and fill in map FD in BPF instructions. + * + * This API allows to opt-out of this process for specific map instance. This + * can be useful if host kernel doesn't support such BPF map type or used + * combination of flags and user application wants to avoid creating such + * a map in the first place. User is still responsible to make sure that their + * BPF-side code that expects to use such missing BPF map is recognized by BPF + * verifier as dead code, otherwise BPF verifier will reject such BPF program. + */ +LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate); +LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); + +/** * @brief **bpf_map__fd()** gets the file descriptor of the passed * BPF map * @param map the BPF map instance @@ -939,6 +991,110 @@ LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); /** + * @brief **bpf_map__lookup_elem()** allows to lookup BPF map value + * corresponding to provided key. + * @param map BPF map to lookup element in + * @param key pointer to memory containing bytes of the key used for lookup + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory in which looked up value will be stored + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__lookup_elem()** is high-level equivalent of + * **bpf_map_lookup_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__update_elem()** allows to insert or update value in BPF + * map that corresponds to provided key. + * @param map BPF map to insert to or update element in + * @param key pointer to memory containing bytes of the key + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory containing bytes of the value + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__update_elem()** is high-level equivalent of + * **bpf_map_update_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__delete_elem()** allows to delete element in BPF map that + * corresponds to provided key. + * @param map BPF map to delete element from + * @param key pointer to memory containing bytes of the key + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__delete_elem()** is high-level equivalent of + * **bpf_map_delete_elem()** API with added check for key size. + */ +LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags); + +/** + * @brief **bpf_map__lookup_and_delete_elem()** allows to lookup BPF map value + * corresponding to provided key and atomically delete it afterwards. + * @param map BPF map to lookup element in + * @param key pointer to memory containing bytes of the key used for lookup + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory in which looked up value will be stored + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of + * **bpf_map_lookup_and_delete_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__get_next_key()** allows to iterate BPF map keys by + * fetching next key that follows current key. + * @param map BPF map to fetch next key from + * @param cur_key pointer to memory containing bytes of current key or NULL to + * fetch the first key + * @param next_key pointer to memory to write next key into + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @return 0, on success; -ENOENT if **cur_key** is the last key in BPF map; + * negative error, otherwise + * + * **bpf_map__get_next_key()** is high-level equivalent of + * **bpf_map_get_next_key()** API with added check for key size. + */ +LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz); + +/** * @brief **libbpf_get_error()** extracts the error code from the passed * pointer * @param ptr pointer returned from libbpf API function diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 82f6d62176dd..52973cffc20c 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -442,10 +442,24 @@ LIBBPF_0.7.0 { LIBBPF_0.8.0 { global: + bpf_map__autocreate; + bpf_map__get_next_key; + bpf_map__delete_elem; + bpf_map__lookup_and_delete_elem; + bpf_map__lookup_elem; + bpf_map__set_autocreate; + bpf_map__update_elem; + bpf_map_delete_elem_flags; bpf_object__destroy_subskeleton; bpf_object__open_subskeleton; + bpf_program__attach_kprobe_multi_opts; + bpf_program__attach_trace_opts; bpf_program__attach_usdt; + bpf_program__set_insns; libbpf_register_prog_handler; libbpf_unregister_prog_handler; - bpf_program__attach_kprobe_multi_opts; } LIBBPF_0.7.0; + +LIBBPF_1.0.0 { + local: *; +}; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 61f2039404b6..2fb2f4290080 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -3,7 +3,7 @@ #ifndef __LIBBPF_VERSION_H #define __LIBBPF_VERSION_H -#define LIBBPF_MAJOR_VERSION 0 -#define LIBBPF_MINOR_VERSION 8 +#define LIBBPF_MAJOR_VERSION 1 +#define LIBBPF_MINOR_VERSION 0 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index bafdc5373a13..2d3c8c8f558a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -75,7 +75,7 @@ TEST_PROGS := test_kmod.sh \ test_xsk.sh TEST_PROGS_EXTENDED := with_addr.sh \ - with_tunnels.sh \ + with_tunnels.sh ima_setup.sh \ test_xdp_vlan.sh test_bpftool.py # Compile but not part of 'make run_tests' @@ -168,13 +168,15 @@ $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ +# Filter out -static for liburandom_read.so and its dependent targets so that static builds +# do not fail. Static builds leave urandom_read relying on system-wide shared libraries. $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c $(call msg,LIB,,$@) - $(Q)$(CC) $(CFLAGS) -fPIC $(LDFLAGS) $^ $(LDLIBS) --shared -o $@ + $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.c,$^) \ + $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ liburandom_read.so $(LDLIBS) \ -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ @@ -423,11 +425,11 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$< - $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o) - $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o) - $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) - $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@ + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$< + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o) + $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) + $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o)) diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index b1ede6f0b821..82a7c9de95f9 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -16,6 +16,10 @@ BPF_PROG(name, args) #define SOL_TCP 6 #endif +#ifndef TCP_CA_NAME_MAX +#define TCP_CA_NAME_MAX 16 +#endif + #define tcp_jiffies32 ((__u32)bpf_jiffies64()) struct sock_common { @@ -81,6 +85,7 @@ struct tcp_sock { __u32 lsndtime; __u32 prior_cwnd; __u64 tcp_mstamp; /* most recent packet received/sent */ + bool is_mptcp; } __attribute__((preserve_access_index)); static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) @@ -225,4 +230,12 @@ static __always_inline bool tcp_cc_eq(const char *a, const char *b) extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; +struct mptcp_sock { + struct inet_connection_sock sk; + + __u32 token; + struct sock *first; + char ca_name[TCP_CA_NAME_MAX]; +} __attribute__((preserve_access_index)); + #endif diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 763db63a3890..3b3edc0fc8a6 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -53,3 +53,7 @@ CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_CONNTRACK=y CONFIG_USERFAULTFD=y +CONFIG_FPROBE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_MPTCP=y diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c new file mode 100644 index 000000000000..f472d28ad11a --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_maps.h> + +#define OUTER_MAP_ENTRIES 10 + +static __u32 get_map_id_from_fd(int map_fd) +{ + struct bpf_map_info map_info = {}; + uint32_t info_len = sizeof(map_info); + int ret; + + ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + CHECK(ret < 0, "Finding map info failed", "error:%s\n", + strerror(errno)); + + return map_info.id; +} + +/* This creates number of OUTER_MAP_ENTRIES maps that will be stored + * in outer map and return the created map_fds + */ +static void create_inner_maps(enum bpf_map_type map_type, + __u32 *inner_map_fds) +{ + int map_fd, map_index, ret; + __u32 map_key = 0, map_id; + char map_name[15]; + + for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) { + memset(map_name, 0, sizeof(map_name)); + sprintf(map_name, "inner_map_fd_%d", map_index); + map_fd = bpf_map_create(map_type, map_name, sizeof(__u32), + sizeof(__u32), 1, NULL); + CHECK(map_fd < 0, + "inner bpf_map_create() failed", + "map_type=(%d) map_name(%s), error:%s\n", + map_type, map_name, strerror(errno)); + + /* keep track of the inner map fd as it is required + * to add records in outer map + */ + inner_map_fds[map_index] = map_fd; + + /* Add entry into this created map + * eg: map1 key = 0, value = map1's map id + * map2 key = 0, value = map2's map id + */ + map_id = get_map_id_from_fd(map_fd); + ret = bpf_map_update_elem(map_fd, &map_key, &map_id, 0); + CHECK(ret != 0, + "bpf_map_update_elem failed", + "map_type=(%d) map_name(%s), error:%s\n", + map_type, map_name, strerror(errno)); + } +} + +static int create_outer_map(enum bpf_map_type map_type, __u32 inner_map_fd) +{ + int outer_map_fd; + LIBBPF_OPTS(bpf_map_create_opts, attr); + + attr.inner_map_fd = inner_map_fd; + outer_map_fd = bpf_map_create(map_type, "outer_map", sizeof(__u32), + sizeof(__u32), OUTER_MAP_ENTRIES, + &attr); + CHECK(outer_map_fd < 0, + "outer bpf_map_create()", + "map_type=(%d), error:%s\n", + map_type, strerror(errno)); + + return outer_map_fd; +} + +static void validate_fetch_results(int outer_map_fd, + __u32 *fetched_keys, __u32 *fetched_values, + __u32 max_entries_fetched) +{ + __u32 inner_map_key, inner_map_value; + int inner_map_fd, entry, err; + __u32 outer_map_value; + + for (entry = 0; entry < max_entries_fetched; ++entry) { + outer_map_value = fetched_values[entry]; + inner_map_fd = bpf_map_get_fd_by_id(outer_map_value); + CHECK(inner_map_fd < 0, + "Failed to get inner map fd", + "from id(%d), error=%s\n", + outer_map_value, strerror(errno)); + err = bpf_map_get_next_key(inner_map_fd, NULL, &inner_map_key); + CHECK(err != 0, + "Failed to get inner map key", + "error=%s\n", strerror(errno)); + + err = bpf_map_lookup_elem(inner_map_fd, &inner_map_key, + &inner_map_value); + + close(inner_map_fd); + + CHECK(err != 0, + "Failed to get inner map value", + "for key(%d), error=%s\n", + inner_map_key, strerror(errno)); + + /* Actual value validation */ + CHECK(outer_map_value != inner_map_value, + "Failed to validate inner map value", + "fetched(%d) and lookedup(%d)!\n", + outer_map_value, inner_map_value); + } +} + +static void fetch_and_validate(int outer_map_fd, + struct bpf_map_batch_opts *opts, + __u32 batch_size, bool delete_entries) +{ + __u32 *fetched_keys, *fetched_values, total_fetched = 0; + __u32 batch_key = 0, fetch_count, step_size; + int err, max_entries = OUTER_MAP_ENTRIES; + __u32 value_size = sizeof(__u32); + + /* Total entries needs to be fetched */ + fetched_keys = calloc(max_entries, value_size); + fetched_values = calloc(max_entries, value_size); + CHECK((!fetched_keys || !fetched_values), + "Memory allocation failed for fetched_keys or fetched_values", + "error=%s\n", strerror(errno)); + + for (step_size = batch_size; + step_size <= max_entries; + step_size += batch_size) { + fetch_count = step_size; + err = delete_entries + ? bpf_map_lookup_and_delete_batch(outer_map_fd, + total_fetched ? &batch_key : NULL, + &batch_key, + fetched_keys + total_fetched, + fetched_values + total_fetched, + &fetch_count, opts) + : bpf_map_lookup_batch(outer_map_fd, + total_fetched ? &batch_key : NULL, + &batch_key, + fetched_keys + total_fetched, + fetched_values + total_fetched, + &fetch_count, opts); + + if (err && errno == ENOSPC) { + /* Fetch again with higher batch size */ + total_fetched = 0; + continue; + } + + CHECK((err < 0 && (errno != ENOENT)), + "lookup with steps failed", + "error: %s\n", strerror(errno)); + + /* Update the total fetched number */ + total_fetched += fetch_count; + if (err) + break; + } + + CHECK((total_fetched != max_entries), + "Unable to fetch expected entries !", + "total_fetched(%d) and max_entries(%d) error: (%d):%s\n", + total_fetched, max_entries, errno, strerror(errno)); + + /* validate the fetched entries */ + validate_fetch_results(outer_map_fd, fetched_keys, + fetched_values, total_fetched); + printf("batch_op(%s) is successful with batch_size(%d)\n", + delete_entries ? "LOOKUP_AND_DELETE" : "LOOKUP", batch_size); + + free(fetched_keys); + free(fetched_values); +} + +static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, + enum bpf_map_type inner_map_type) +{ + __u32 *outer_map_keys, *inner_map_fds; + __u32 max_entries = OUTER_MAP_ENTRIES; + LIBBPF_OPTS(bpf_map_batch_opts, opts); + __u32 value_size = sizeof(__u32); + int batch_size[2] = {5, 10}; + __u32 map_index, op_index; + int outer_map_fd, ret; + + outer_map_keys = calloc(max_entries, value_size); + inner_map_fds = calloc(max_entries, value_size); + CHECK((!outer_map_keys || !inner_map_fds), + "Memory allocation failed for outer_map_keys or inner_map_fds", + "error=%s\n", strerror(errno)); + + create_inner_maps(inner_map_type, inner_map_fds); + + outer_map_fd = create_outer_map(outer_map_type, *inner_map_fds); + /* create outer map keys */ + for (map_index = 0; map_index < max_entries; map_index++) + outer_map_keys[map_index] = + ((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) + ? 9 : 1000) - map_index; + + /* batch operation - map_update */ + ret = bpf_map_update_batch(outer_map_fd, outer_map_keys, + inner_map_fds, &max_entries, &opts); + CHECK(ret != 0, + "Failed to update the outer map batch ops", + "error=%s\n", strerror(errno)); + + /* batch operation - map_lookup */ + for (op_index = 0; op_index < 2; ++op_index) + fetch_and_validate(outer_map_fd, &opts, + batch_size[op_index], false); + + /* batch operation - map_lookup_delete */ + if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + fetch_and_validate(outer_map_fd, &opts, + max_entries, true /*delete*/); + + /* close all map fds */ + for (map_index = 0; map_index < max_entries; map_index++) + close(inner_map_fds[map_index]); + close(outer_map_fd); + + free(inner_map_fds); + free(outer_map_keys); +} + +void test_map_in_map_batch_ops_array(void) +{ + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY); + printf("%s:PASS with inner ARRAY map\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH); + printf("%s:PASS with inner HASH map\n", __func__); +} + +void test_map_in_map_batch_ops_hash(void) +{ + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY); + printf("%s:PASS with inner ARRAY map\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH); + printf("%s:PASS with inner HASH map\n", __func__); +} diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 2bb1f9b3841d..59cf81ec55af 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -21,6 +21,10 @@ #include "network_helpers.h" #include "test_progs.h" +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + #define clean_errno() (errno == 0 ? "None" : strerror(errno)) #define log_err(MSG, ...) ({ \ int __save = errno; \ @@ -73,13 +77,13 @@ int settimeo(int fd, int timeout_ms) #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) -static int __start_server(int type, const struct sockaddr *addr, +static int __start_server(int type, int protocol, const struct sockaddr *addr, socklen_t addrlen, int timeout_ms, bool reuseport) { int on = 1; int fd; - fd = socket(addr->sa_family, type, 0); + fd = socket(addr->sa_family, type, protocol); if (fd < 0) { log_err("Failed to create server socket"); return -1; @@ -113,8 +117,8 @@ error_close: return -1; } -int start_server(int family, int type, const char *addr_str, __u16 port, - int timeout_ms) +static int start_server_proto(int family, int type, int protocol, + const char *addr_str, __u16 port, int timeout_ms) { struct sockaddr_storage addr; socklen_t addrlen; @@ -122,10 +126,23 @@ int start_server(int family, int type, const char *addr_str, __u16 port, if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) return -1; - return __start_server(type, (struct sockaddr *)&addr, + return __start_server(type, protocol, (struct sockaddr *)&addr, addrlen, timeout_ms, false); } +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) +{ + return start_server_proto(family, type, 0, addr_str, port, timeout_ms); +} + +int start_mptcp_server(int family, const char *addr_str, __u16 port, + int timeout_ms) +{ + return start_server_proto(family, SOCK_STREAM, IPPROTO_MPTCP, addr_str, + port, timeout_ms); +} + int *start_reuseport_server(int family, int type, const char *addr_str, __u16 port, int timeout_ms, unsigned int nr_listens) { @@ -144,7 +161,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str, if (!fds) return NULL; - fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen, + fds[0] = __start_server(type, 0, (struct sockaddr *)&addr, addrlen, timeout_ms, true); if (fds[0] == -1) goto close_fds; @@ -154,7 +171,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str, goto close_fds; for (; nr_fds < nr_listens; nr_fds++) { - fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr, + fds[nr_fds] = __start_server(type, 0, (struct sockaddr *)&addr, addrlen, timeout_ms, true); if (fds[nr_fds] == -1) goto close_fds; @@ -247,7 +264,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) struct sockaddr_storage addr; struct sockaddr_in *addr_in; socklen_t addrlen, optlen; - int fd, type; + int fd, type, protocol; if (!opts) opts = &default_opts; @@ -258,6 +275,11 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) return -1; } + if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { + log_err("getsockopt(SOL_PROTOCOL)"); + return -1; + } + addrlen = sizeof(addr); if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { log_err("Failed to get server addr"); @@ -265,7 +287,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) } addr_in = (struct sockaddr_in *)&addr; - fd = socket(addr_in->sin_family, type, 0); + fd = socket(addr_in->sin_family, type, protocol); if (fd < 0) { log_err("Failed to create client socket"); return -1; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index a4b3b2f9877b..f882c691b790 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -42,6 +42,8 @@ extern struct ipv6_packet pkt_v6; int settimeo(int fd, int timeout_ms); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); +int start_mptcp_server(int family, const char *addr, __u16 port, + int timeout_ms); int *start_reuseport_server(int family, int type, const char *addr_str, __u16 port, int timeout_ms, unsigned int nr_listens); diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index c0c6d410751d..08c0601b3e84 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -55,6 +55,7 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->bss, "check_bss")) goto cleanup; + /* manual-attach kprobe/kretprobe */ kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, false /* retprobe */, SYS_NANOSLEEP_KPROBE_NAME); @@ -69,6 +70,13 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_kretprobe = kretprobe_link; + /* auto-attachable kprobe and kretprobe */ + skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto); + ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto"); + + skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto); + ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto"); + if (!legacy) ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); @@ -157,7 +165,9 @@ void test_attach_probe(void) trigger_func2(); ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); + ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); + ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res"); ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res"); ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res"); ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 923a6139b2d8..83ef55e3caa4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -4,8 +4,11 @@ #include <pthread.h> #include <sched.h> #include <sys/syscall.h> +#include <sys/mman.h> #include <unistd.h> #include <test_progs.h> +#include <network_helpers.h> +#include <bpf/btf.h> #include "test_bpf_cookie.skel.h" #include "kprobe_multi.skel.h" @@ -410,6 +413,88 @@ cleanup: bpf_link__destroy(link); } +static void tracing_subtest(struct test_bpf_cookie *skel) +{ + __u64 cookie; + int prog_fd; + int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1; + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + + skel->bss->fentry_res = 0; + skel->bss->fexit_res = 0; + + cookie = 0x10000000000000L; + prog_fd = bpf_program__fd(skel->progs.fentry_test1); + link_opts.tracing.cookie = cookie; + fentry_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FENTRY, &link_opts); + if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create")) + goto cleanup; + + cookie = 0x20000000000000L; + prog_fd = bpf_program__fd(skel->progs.fexit_test1); + link_opts.tracing.cookie = cookie; + fexit_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FEXIT, &link_opts); + if (!ASSERT_GE(fexit_fd, 0, "fexit.link_create")) + goto cleanup; + + cookie = 0x30000000000000L; + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); + link_opts.tracing.cookie = cookie; + fmod_ret_fd = bpf_link_create(prog_fd, 0, BPF_MODIFY_RETURN, &link_opts); + if (!ASSERT_GE(fmod_ret_fd, 0, "fmod_ret.link_create")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.fentry_test1); + bpf_prog_test_run_opts(prog_fd, &opts); + + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); + bpf_prog_test_run_opts(prog_fd, &opts); + + ASSERT_EQ(skel->bss->fentry_res, 0x10000000000000L, "fentry_res"); + ASSERT_EQ(skel->bss->fexit_res, 0x20000000000000L, "fexit_res"); + ASSERT_EQ(skel->bss->fmod_ret_res, 0x30000000000000L, "fmod_ret_res"); + +cleanup: + if (fentry_fd >= 0) + close(fentry_fd); + if (fexit_fd >= 0) + close(fexit_fd); + if (fmod_ret_fd >= 0) + close(fmod_ret_fd); +} + +int stack_mprotect(void); + +static void lsm_subtest(struct test_bpf_cookie *skel) +{ + __u64 cookie; + int prog_fd; + int lsm_fd = -1; + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + + skel->bss->lsm_res = 0; + + cookie = 0x90000000000090L; + prog_fd = bpf_program__fd(skel->progs.test_int_hook); + link_opts.tracing.cookie = cookie; + lsm_fd = bpf_link_create(prog_fd, 0, BPF_LSM_MAC, &link_opts); + if (!ASSERT_GE(lsm_fd, 0, "lsm.link_create")) + goto cleanup; + + stack_mprotect(); + if (!ASSERT_EQ(errno, EPERM, "stack_mprotect")) + goto cleanup; + + usleep(1); + + ASSERT_EQ(skel->bss->lsm_res, 0x90000000000090L, "fentry_res"); + +cleanup: + if (lsm_fd >= 0) + close(lsm_fd); +} + void test_bpf_cookie(void) { struct test_bpf_cookie *skel; @@ -432,6 +517,10 @@ void test_bpf_cookie(void) tp_subtest(skel); if (test__start_subtest("perf_event")) pe_subtest(skel); + if (test__start_subtest("trampoline")) + tracing_subtest(skel); + if (test__start_subtest("lsm")) + lsm_subtest(skel); test_bpf_cookie__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 2c403ddc8076..7ff5fa93d056 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -26,6 +26,7 @@ #include "bpf_iter_bpf_sk_storage_map.skel.h" #include "bpf_iter_test_kern5.skel.h" #include "bpf_iter_test_kern6.skel.h" +#include "bpf_iter_bpf_link.skel.h" static int duration; @@ -34,8 +35,7 @@ static void test_btf_id_or_null(void) struct bpf_iter_test_kern3 *skel; skel = bpf_iter_test_kern3__open_and_load(); - if (CHECK(skel, "bpf_iter_test_kern3__open_and_load", - "skeleton open_and_load unexpectedly succeeded\n")) { + if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern3__open_and_load")) { bpf_iter_test_kern3__destroy(skel); return; } @@ -52,7 +52,7 @@ static void do_dummy_read(struct bpf_program *prog) return; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* not check contents, but ensure read() ends without error */ @@ -87,8 +87,7 @@ static void test_ipv6_route(void) struct bpf_iter_ipv6_route *skel; skel = bpf_iter_ipv6_route__open_and_load(); - if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_ipv6_route__open_and_load")) return; do_dummy_read(skel->progs.dump_ipv6_route); @@ -101,8 +100,7 @@ static void test_netlink(void) struct bpf_iter_netlink *skel; skel = bpf_iter_netlink__open_and_load(); - if (CHECK(!skel, "bpf_iter_netlink__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_netlink__open_and_load")) return; do_dummy_read(skel->progs.dump_netlink); @@ -115,8 +113,7 @@ static void test_bpf_map(void) struct bpf_iter_bpf_map *skel; skel = bpf_iter_bpf_map__open_and_load(); - if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_map__open_and_load")) return; do_dummy_read(skel->progs.dump_bpf_map); @@ -129,8 +126,7 @@ static void test_task(void) struct bpf_iter_task *skel; skel = bpf_iter_task__open_and_load(); - if (CHECK(!skel, "bpf_iter_task__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load")) return; do_dummy_read(skel->progs.dump_task); @@ -161,8 +157,7 @@ static void test_task_stack(void) struct bpf_iter_task_stack *skel; skel = bpf_iter_task_stack__open_and_load(); - if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_stack__open_and_load")) return; do_dummy_read(skel->progs.dump_task_stack); @@ -183,24 +178,22 @@ static void test_task_file(void) void *ret; skel = bpf_iter_task_file__open_and_load(); - if (CHECK(!skel, "bpf_iter_task_file__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_file__open_and_load")) return; skel->bss->tgid = getpid(); - if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL), - "pthread_create", "pthread_create failed\n")) + if (!ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing, NULL), + "pthread_create")) goto done; do_dummy_read(skel->progs.dump_task_file); - if (CHECK(pthread_join(thread_id, &ret) || ret != NULL, - "pthread_join", "pthread_join failed\n")) + if (!ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL, + "pthread_join")) goto done; - CHECK(skel->bss->count != 0, "check_count", - "invalid non pthread file visit count %d\n", skel->bss->count); + ASSERT_EQ(skel->bss->count, 0, "check_count"); done: bpf_iter_task_file__destroy(skel); @@ -224,7 +217,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) return ret; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ); @@ -238,9 +231,8 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno))) goto free_link; - CHECK(strstr(taskbuf, "(struct task_struct)") == NULL, - "check for btf representation of task_struct in iter data", - "struct task_struct not found"); + ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)", + "check for btf representation of task_struct in iter data"); free_link: if (iter_fd > 0) close(iter_fd); @@ -255,8 +247,7 @@ static void test_task_btf(void) int ret; skel = bpf_iter_task_btf__open_and_load(); - if (CHECK(!skel, "bpf_iter_task_btf__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_btf__open_and_load")) return; bss = skel->bss; @@ -265,12 +256,10 @@ static void test_task_btf(void) if (ret) goto cleanup; - if (CHECK(bss->tasks == 0, "check if iterated over tasks", - "no task iteration, did BPF program run?\n")) + if (!ASSERT_NEQ(bss->tasks, 0, "no task iteration, did BPF program run?")) goto cleanup; - CHECK(bss->seq_err != 0, "check for unexpected err", - "bpf_seq_printf_btf returned %ld", bss->seq_err); + ASSERT_EQ(bss->seq_err, 0, "check for unexpected err"); cleanup: bpf_iter_task_btf__destroy(skel); @@ -281,8 +270,7 @@ static void test_tcp4(void) struct bpf_iter_tcp4 *skel; skel = bpf_iter_tcp4__open_and_load(); - if (CHECK(!skel, "bpf_iter_tcp4__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp4__open_and_load")) return; do_dummy_read(skel->progs.dump_tcp4); @@ -295,8 +283,7 @@ static void test_tcp6(void) struct bpf_iter_tcp6 *skel; skel = bpf_iter_tcp6__open_and_load(); - if (CHECK(!skel, "bpf_iter_tcp6__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp6__open_and_load")) return; do_dummy_read(skel->progs.dump_tcp6); @@ -309,8 +296,7 @@ static void test_udp4(void) struct bpf_iter_udp4 *skel; skel = bpf_iter_udp4__open_and_load(); - if (CHECK(!skel, "bpf_iter_udp4__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_udp4__open_and_load")) return; do_dummy_read(skel->progs.dump_udp4); @@ -323,8 +309,7 @@ static void test_udp6(void) struct bpf_iter_udp6 *skel; skel = bpf_iter_udp6__open_and_load(); - if (CHECK(!skel, "bpf_iter_udp6__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_udp6__open_and_load")) return; do_dummy_read(skel->progs.dump_udp6); @@ -349,7 +334,7 @@ static void test_unix(void) static int do_read_with_fd(int iter_fd, const char *expected, bool read_one_char) { - int err = -1, len, read_buf_len, start; + int len, read_buf_len, start; char buf[16] = {}; read_buf_len = read_one_char ? 1 : 16; @@ -363,9 +348,7 @@ static int do_read_with_fd(int iter_fd, const char *expected, if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) return -1; - err = strcmp(buf, expected); - if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n", - buf, expected)) + if (!ASSERT_STREQ(buf, expected, "read")) return -1; return 0; @@ -378,19 +361,17 @@ static void test_anon_iter(bool read_one_char) int iter_fd, err; skel = bpf_iter_test_kern1__open_and_load(); - if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern1__open_and_load")) return; err = bpf_iter_test_kern1__attach(skel); - if (CHECK(err, "bpf_iter_test_kern1__attach", - "skeleton attach failed\n")) { + if (!ASSERT_OK(err, "bpf_iter_test_kern1__attach")) { goto out; } link = skel->links.dump_task; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto out; do_read_with_fd(iter_fd, "abcd", read_one_char); @@ -423,8 +404,7 @@ static void test_file_iter(void) int err; skel1 = bpf_iter_test_kern1__open_and_load(); - if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel1, "bpf_iter_test_kern1__open_and_load")) return; link = bpf_program__attach_iter(skel1->progs.dump_task, NULL); @@ -447,12 +427,11 @@ static void test_file_iter(void) * should change. */ skel2 = bpf_iter_test_kern2__open_and_load(); - if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel2, "bpf_iter_test_kern2__open_and_load")) goto unlink_path; err = bpf_link__update_program(link, skel2->progs.dump_task); - if (CHECK(err, "update_prog", "update_prog failed\n")) + if (!ASSERT_OK(err, "update_prog")) goto destroy_skel2; do_read(path, "ABCD"); @@ -478,8 +457,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) char *buf; skel = bpf_iter_test_kern4__open(); - if (CHECK(!skel, "bpf_iter_test_kern4__open", - "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern4__open")) return; /* create two maps: bpf program will only do bpf_seq_write @@ -515,8 +493,8 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) } skel->rodata->ret1 = ret1; - if (CHECK(bpf_iter_test_kern4__load(skel), - "bpf_iter_test_kern4__load", "skeleton load failed\n")) + if (!ASSERT_OK(bpf_iter_test_kern4__load(skel), + "bpf_iter_test_kern4__load")) goto free_map2; /* setup filtering map_id in bpf program */ @@ -538,7 +516,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) goto free_map2; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; buf = malloc(expected_read_len); @@ -574,22 +552,16 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) goto free_buf; } - if (CHECK(total_read_len != expected_read_len, "read", - "total len %u, expected len %u\n", total_read_len, - expected_read_len)) + if (!ASSERT_EQ(total_read_len, expected_read_len, "read")) goto free_buf; - if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed", - "expected 1 actual %d\n", skel->bss->map1_accessed)) + if (!ASSERT_EQ(skel->bss->map1_accessed, 1, "map1_accessed")) goto free_buf; - if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed", - "expected 2 actual %d\n", skel->bss->map2_accessed)) + if (!ASSERT_EQ(skel->bss->map2_accessed, 2, "map2_accessed")) goto free_buf; - CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2, - "map2_seqnum", "two different seqnum %lld %lld\n", - skel->bss->map2_seqnum1, skel->bss->map2_seqnum2); + ASSERT_EQ(skel->bss->map2_seqnum1, skel->bss->map2_seqnum2, "map2_seqnum"); free_buf: free(buf); @@ -622,15 +594,13 @@ static void test_bpf_hash_map(void) char buf[64]; skel = bpf_iter_bpf_hash_map__open(); - if (CHECK(!skel, "bpf_iter_bpf_hash_map__open", - "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_hash_map__open")) return; skel->bss->in_test_mode = true; err = bpf_iter_bpf_hash_map__load(skel); - if (CHECK(!skel, "bpf_iter_bpf_hash_map__load", - "skeleton load failed\n")) + if (!ASSERT_OK(err, "bpf_iter_bpf_hash_map__load")) goto out; /* iterator with hashmap2 and hashmap3 should fail */ @@ -659,7 +629,7 @@ static void test_bpf_hash_map(void) expected_val += val; err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -669,7 +639,7 @@ static void test_bpf_hash_map(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -679,17 +649,11 @@ static void test_bpf_hash_map(void) goto close_iter; /* test results */ - if (CHECK(skel->bss->key_sum_a != expected_key_a, - "key_sum_a", "got %u expected %u\n", - skel->bss->key_sum_a, expected_key_a)) + if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a")) goto close_iter; - if (CHECK(skel->bss->key_sum_b != expected_key_b, - "key_sum_b", "got %u expected %u\n", - skel->bss->key_sum_b, expected_key_b)) + if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b")) goto close_iter; - if (CHECK(skel->bss->val_sum != expected_val, - "val_sum", "got %llu expected %llu\n", - skel->bss->val_sum, expected_val)) + if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; close_iter: @@ -718,16 +682,14 @@ static void test_bpf_percpu_hash_map(void) void *val; skel = bpf_iter_bpf_percpu_hash_map__open(); - if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open", - "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__open")) return; skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); err = bpf_iter_bpf_percpu_hash_map__load(skel); - if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load", - "skeleton load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load")) goto out; /* update map values here */ @@ -745,7 +707,7 @@ static void test_bpf_percpu_hash_map(void) } err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -758,7 +720,7 @@ static void test_bpf_percpu_hash_map(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -768,17 +730,11 @@ static void test_bpf_percpu_hash_map(void) goto close_iter; /* test results */ - if (CHECK(skel->bss->key_sum_a != expected_key_a, - "key_sum_a", "got %u expected %u\n", - skel->bss->key_sum_a, expected_key_a)) + if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a")) goto close_iter; - if (CHECK(skel->bss->key_sum_b != expected_key_b, - "key_sum_b", "got %u expected %u\n", - skel->bss->key_sum_b, expected_key_b)) + if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b")) goto close_iter; - if (CHECK(skel->bss->val_sum != expected_val, - "val_sum", "got %u expected %u\n", - skel->bss->val_sum, expected_val)) + if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; close_iter: @@ -803,8 +759,7 @@ static void test_bpf_array_map(void) int len, start; skel = bpf_iter_bpf_array_map__open_and_load(); - if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load")) return; map_fd = bpf_map__fd(skel->maps.arraymap1); @@ -817,7 +772,7 @@ static void test_bpf_array_map(void) first_val = val; err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -830,7 +785,7 @@ static void test_bpf_array_map(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -850,21 +805,16 @@ static void test_bpf_array_map(void) res_first_key, res_first_val, first_val)) goto close_iter; - if (CHECK(skel->bss->key_sum != expected_key, - "key_sum", "got %u expected %u\n", - skel->bss->key_sum, expected_key)) + if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum")) goto close_iter; - if (CHECK(skel->bss->val_sum != expected_val, - "val_sum", "got %llu expected %llu\n", - skel->bss->val_sum, expected_val)) + if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { err = bpf_map_lookup_elem(map_fd, &i, &val); - if (CHECK(err, "map_lookup", "map_lookup failed\n")) + if (!ASSERT_OK(err, "map_lookup")) goto out; - if (CHECK(i != val, "invalid_val", - "got value %llu expected %u\n", val, i)) + if (!ASSERT_EQ(i, val, "invalid_val")) goto out; } @@ -889,16 +839,14 @@ static void test_bpf_percpu_array_map(void) int len; skel = bpf_iter_bpf_percpu_array_map__open(); - if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open", - "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__open")) return; skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); err = bpf_iter_bpf_percpu_array_map__load(skel); - if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load", - "skeleton load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load")) goto out; /* update map values here */ @@ -912,7 +860,7 @@ static void test_bpf_percpu_array_map(void) } err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -925,7 +873,7 @@ static void test_bpf_percpu_array_map(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -935,13 +883,9 @@ static void test_bpf_percpu_array_map(void) goto close_iter; /* test results */ - if (CHECK(skel->bss->key_sum != expected_key, - "key_sum", "got %u expected %u\n", - skel->bss->key_sum, expected_key)) + if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum")) goto close_iter; - if (CHECK(skel->bss->val_sum != expected_val, - "val_sum", "got %u expected %u\n", - skel->bss->val_sum, expected_val)) + if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; close_iter: @@ -966,17 +910,16 @@ static void test_bpf_sk_storage_delete(void) char buf[64]; skel = bpf_iter_bpf_sk_storage_helpers__open_and_load(); - if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load")) return; map_fd = bpf_map__fd(skel->maps.sk_stg_map); sock_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno)) + if (!ASSERT_GE(sock_fd, 0, "socket")) goto out; err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; memset(&linfo, 0, sizeof(linfo)); @@ -989,7 +932,7 @@ static void test_bpf_sk_storage_delete(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -1027,22 +970,21 @@ static void test_bpf_sk_storage_get(void) int sock_fd = -1; skel = bpf_iter_bpf_sk_storage_helpers__open_and_load(); - if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load")) return; sock_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno)) + if (!ASSERT_GE(sock_fd, 0, "socket")) goto out; err = listen(sock_fd, 1); - if (CHECK(err != 0, "listen", "errno: %d\n", errno)) + if (!ASSERT_OK(err, "listen")) goto close_socket; map_fd = bpf_map__fd(skel->maps.sk_stg_map); err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); - if (CHECK(err, "bpf_map_update_elem", "map_update_failed\n")) + if (!ASSERT_OK(err, "bpf_map_update_elem")) goto close_socket; do_dummy_read(skel->progs.fill_socket_owner); @@ -1078,15 +1020,14 @@ static void test_bpf_sk_storage_map(void) char buf[64]; skel = bpf_iter_bpf_sk_storage_map__open_and_load(); - if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_map__open_and_load")) return; map_fd = bpf_map__fd(skel->maps.sk_stg_map); num_sockets = ARRAY_SIZE(sock_fd); for (i = 0; i < num_sockets; i++) { sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno)) + if (!ASSERT_GE(sock_fd[i], 0, "socket")) goto out; val = i + 1; @@ -1094,7 +1035,7 @@ static void test_bpf_sk_storage_map(void) err = bpf_map_update_elem(map_fd, &sock_fd[i], &val, BPF_NOEXIST); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -1107,7 +1048,7 @@ static void test_bpf_sk_storage_map(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -1117,14 +1058,10 @@ static void test_bpf_sk_storage_map(void) goto close_iter; /* test results */ - if (CHECK(skel->bss->ipv6_sk_count != num_sockets, - "ipv6_sk_count", "got %u expected %u\n", - skel->bss->ipv6_sk_count, num_sockets)) + if (!ASSERT_EQ(skel->bss->ipv6_sk_count, num_sockets, "ipv6_sk_count")) goto close_iter; - if (CHECK(skel->bss->val_sum != expected_val, - "val_sum", "got %u expected %u\n", - skel->bss->val_sum, expected_val)) + if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; close_iter: @@ -1147,8 +1084,7 @@ static void test_rdonly_buf_out_of_bound(void) struct bpf_link *link; skel = bpf_iter_test_kern5__open_and_load(); - if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern5__open_and_load")) return; memset(&linfo, 0, sizeof(linfo)); @@ -1167,11 +1103,23 @@ static void test_buf_neg_offset(void) struct bpf_iter_test_kern6 *skel; skel = bpf_iter_test_kern6__open_and_load(); - if (CHECK(skel, "bpf_iter_test_kern6__open_and_load", - "skeleton open_and_load unexpected success\n")) + if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern6__open_and_load")) bpf_iter_test_kern6__destroy(skel); } +static void test_link_iter(void) +{ + struct bpf_iter_bpf_link *skel; + + skel = bpf_iter_bpf_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_link__open_and_load")) + return; + + do_dummy_read(skel->progs.dump_bpf_link); + + bpf_iter_bpf_link__destroy(skel); +} + #define CMP_BUFFER_SIZE 1024 static char task_vma_output[CMP_BUFFER_SIZE]; static char proc_maps_output[CMP_BUFFER_SIZE]; @@ -1200,13 +1148,13 @@ static void test_task_vma(void) char maps_path[64]; skel = bpf_iter_task_vma__open(); - if (CHECK(!skel, "bpf_iter_task_vma__open", "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open")) return; skel->bss->pid = getpid(); err = bpf_iter_task_vma__load(skel); - if (CHECK(err, "bpf_iter_task_vma__load", "skeleton load failed\n")) + if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) goto out; skel->links.proc_maps = bpf_program__attach_iter( @@ -1218,7 +1166,7 @@ static void test_task_vma(void) } iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto out; /* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks @@ -1230,7 +1178,7 @@ static void test_task_vma(void) MIN(read_size, CMP_BUFFER_SIZE - len)); if (!err) break; - if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n")) + if (!ASSERT_GE(err, 0, "read_iter_fd")) goto out; len += err; } @@ -1238,18 +1186,17 @@ static void test_task_vma(void) /* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */ snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid); proc_maps_fd = open(maps_path, O_RDONLY); - if (CHECK(proc_maps_fd < 0, "open_proc_maps", "open_proc_maps failed\n")) + if (!ASSERT_GE(proc_maps_fd, 0, "open_proc_maps")) goto out; err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE); - if (CHECK(err < 0, "read_prog_maps_fd", "read_prog_maps_fd failed\n")) + if (!ASSERT_GE(err, 0, "read_prog_maps_fd")) goto out; /* strip and compare the first line of the two files */ str_strip_first_line(task_vma_output); str_strip_first_line(proc_maps_output); - CHECK(strcmp(task_vma_output, proc_maps_output), "compare_output", - "found mismatch\n"); + ASSERT_STREQ(task_vma_output, proc_maps_output, "compare_output"); out: close(proc_maps_fd); close(iter_fd); @@ -1318,4 +1265,6 @@ void test_bpf_iter(void) test_rdonly_buf_out_of_bound(); if (test__start_subtest("buf-neg-offset")) test_buf_neg_offset(); + if (test__start_subtest("link-iter")) + test_link_iter(); } diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c index 1dfe14ff6aa4..f2ce4fd1cdae 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -167,7 +167,7 @@ void test_core_autosize(void) if (!ASSERT_OK_PTR(bss_map, "bss_map_find")) goto cleanup; - err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out); + err = bpf_map__lookup_elem(bss_map, &zero, sizeof(zero), &out, sizeof(out), 0); if (!ASSERT_OK(err, "bss_lookup")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index f28f75aa9154..3712dfe1be59 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -277,13 +277,21 @@ static int duration = 0; #define SIZE_OUTPUT_DATA(type) \ STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \ .int_sz = sizeof(((type *)0)->int_field), \ + .int_off = offsetof(type, int_field), \ .struct_sz = sizeof(((type *)0)->struct_field), \ + .struct_off = offsetof(type, struct_field), \ .union_sz = sizeof(((type *)0)->union_field), \ + .union_off = offsetof(type, union_field), \ .arr_sz = sizeof(((type *)0)->arr_field), \ - .arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \ + .arr_off = offsetof(type, arr_field), \ + .arr_elem_sz = sizeof(((type *)0)->arr_field[1]), \ + .arr_elem_off = offsetof(type, arr_field[1]), \ .ptr_sz = 8, /* always 8-byte pointer for BPF */ \ + .ptr_off = offsetof(type, ptr_field), \ .enum_sz = sizeof(((type *)0)->enum_field), \ + .enum_off = offsetof(type, enum_field), \ .float_sz = sizeof(((type *)0)->float_field), \ + .float_off = offsetof(type, float_field), \ } #define SIZE_CASE(name) { \ @@ -714,9 +722,10 @@ static const struct core_reloc_test_case test_cases[] = { }), BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield), - /* size relocation checks */ + /* field size and offset relocation checks */ SIZE_CASE(size), SIZE_CASE(size___diff_sz), + SIZE_CASE(size___diff_offs), SIZE_ERR_CASE(size___err_ambiguous), /* validate type existence and size relocations */ diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c index 6acb0e94d4d7..4a2c256c8db6 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_retro.c +++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c @@ -6,31 +6,32 @@ void test_core_retro(void) { - int err, zero = 0, res, duration = 0, my_pid = getpid(); + int err, zero = 0, res, my_pid = getpid(); struct test_core_retro *skel; /* load program */ skel = test_core_retro__open_and_load(); - if (CHECK(!skel, "skel_load", "skeleton open/load failed\n")) + if (!ASSERT_OK_PTR(skel, "skel_load")) goto out_close; - err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0); - if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno)) + err = bpf_map__update_elem(skel->maps.exp_tgid_map, &zero, sizeof(zero), + &my_pid, sizeof(my_pid), 0); + if (!ASSERT_OK(err, "map_update")) goto out_close; /* attach probe */ err = test_core_retro__attach(skel); - if (CHECK(err, "attach_kprobe", "err %d\n", err)) + if (!ASSERT_OK(err, "attach_kprobe")) goto out_close; /* trigger */ usleep(1); - err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.results), &zero, &res); - if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno)) + err = bpf_map__lookup_elem(skel->maps.results, &zero, sizeof(zero), &res, sizeof(res), 0); + if (!ASSERT_OK(err, "map_lookup")) goto out_close; - CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid); + ASSERT_EQ(res, my_pid, "pid_check"); out_close: test_core_retro__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c new file mode 100644 index 000000000000..3c7aa82b98e2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include <test_progs.h> +#include "dynptr_fail.skel.h" +#include "dynptr_success.skel.h" + +static size_t log_buf_sz = 1048576; /* 1 MB */ +static char obj_log_buf[1048576]; + +static struct { + const char *prog_name; + const char *expected_err_msg; +} dynptr_tests[] = { + /* failure cases */ + {"ringbuf_missing_release1", "Unreleased reference id=1"}, + {"ringbuf_missing_release2", "Unreleased reference id=2"}, + {"ringbuf_missing_release_callback", "Unreleased reference id"}, + {"use_after_invalid", "Expected an initialized dynptr as arg #3"}, + {"ringbuf_invalid_api", "type=mem expected=alloc_mem"}, + {"add_dynptr_to_map1", "invalid indirect read from stack"}, + {"add_dynptr_to_map2", "invalid indirect read from stack"}, + {"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"}, + {"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"}, + {"data_slice_use_after_release", "invalid mem access 'scalar'"}, + {"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"}, + {"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"}, + {"invalid_helper1", "invalid indirect read from stack"}, + {"invalid_helper2", "Expected an initialized dynptr as arg #3"}, + {"invalid_write1", "Expected an initialized dynptr as arg #1"}, + {"invalid_write2", "Expected an initialized dynptr as arg #3"}, + {"invalid_write3", "Expected an initialized ringbuf dynptr as arg #1"}, + {"invalid_write4", "arg 1 is an unacquired reference"}, + {"invalid_read1", "invalid read from stack"}, + {"invalid_read2", "cannot pass in dynptr at an offset"}, + {"invalid_read3", "invalid read from stack"}, + {"invalid_read4", "invalid read from stack"}, + {"invalid_offset", "invalid write to stack"}, + {"global", "type=map_value expected=fp"}, + {"release_twice", "arg 1 is an unacquired reference"}, + {"release_twice_callback", "arg 1 is an unacquired reference"}, + {"dynptr_from_mem_invalid_api", + "Unsupported reg type fp for bpf_dynptr_from_mem data"}, + + /* success cases */ + {"test_read_write", NULL}, + {"test_data_slice", NULL}, + {"test_ringbuf", NULL}, +}; + +static void verify_fail(const char *prog_name, const char *expected_err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct bpf_program *prog; + struct dynptr_fail *skel; + int err; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 1; + + skel = dynptr_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + bpf_program__set_autoload(prog, true); + + bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); + + err = dynptr_fail__load(skel); + if (!ASSERT_ERR(err, "unexpected load success")) + goto cleanup; + + if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { + fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); + fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + } + +cleanup: + dynptr_fail__destroy(skel); +} + +static void verify_success(const char *prog_name) +{ + struct dynptr_success *skel; + struct bpf_program *prog; + struct bpf_link *link; + + skel = dynptr_success__open(); + if (!ASSERT_OK_PTR(skel, "dynptr_success__open")) + return; + + skel->bss->pid = getpid(); + + bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); + + dynptr_success__load(skel); + if (!ASSERT_OK_PTR(skel, "dynptr_success__load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + goto cleanup; + + usleep(1); + + ASSERT_EQ(skel->bss->err, 0, "err"); + + bpf_link__destroy(link); + +cleanup: + dynptr_success__destroy(skel); +} + +void test_dynptr(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dynptr_tests); i++) { + if (!test__start_subtest(dynptr_tests[i].prog_name)) + continue; + + if (dynptr_tests[i].expected_err_msg) + verify_fail(dynptr_tests[i].prog_name, + dynptr_tests[i].expected_err_msg); + else + verify_success(dynptr_tests[i].prog_name); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index fe1f0f26ea14..a7e74297f15f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -5,7 +5,7 @@ /* that's kernel internal BPF_MAX_TRAMP_PROGS define */ #define CNT 38 -void test_fexit_stress(void) +void serial_test_fexit_stress(void) { char test_skb[128] = {}; int fexit_fd[CNT] = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 754e80937e5d..8963f8a549f2 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -10,9 +10,10 @@ static unsigned int duration; static void test_hash_map(void) { - int i, err, hashmap_fd, max_entries, percpu_map_fd; + int i, err, max_entries; struct for_each_hash_map_elem *skel; __u64 *percpu_valbuf = NULL; + size_t percpu_val_sz; __u32 key, num_cpus; __u64 val; LIBBPF_OPTS(bpf_test_run_opts, topts, @@ -25,26 +26,27 @@ static void test_hash_map(void) if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load")) return; - hashmap_fd = bpf_map__fd(skel->maps.hashmap); max_entries = bpf_map__max_entries(skel->maps.hashmap); for (i = 0; i < max_entries; i++) { key = i; val = i + 1; - err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY); + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); if (!ASSERT_OK(err, "map_update")) goto out; } num_cpus = bpf_num_possible_cpus(); - percpu_map_fd = bpf_map__fd(skel->maps.percpu_map); - percpu_valbuf = malloc(sizeof(__u64) * num_cpus); + percpu_val_sz = sizeof(__u64) * num_cpus; + percpu_valbuf = malloc(percpu_val_sz); if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf")) goto out; key = 1; for (i = 0; i < num_cpus; i++) percpu_valbuf[i] = i + 1; - err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY); + err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key), + percpu_valbuf, percpu_val_sz, BPF_ANY); if (!ASSERT_OK(err, "percpu_map_update")) goto out; @@ -58,7 +60,7 @@ static void test_hash_map(void) ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems"); key = 1; - err = bpf_map_lookup_elem(hashmap_fd, &key, &val); + err = bpf_map__lookup_elem(skel->maps.hashmap, &key, sizeof(key), &val, sizeof(val), 0); ASSERT_ERR(err, "hashmap_lookup"); ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called"); @@ -75,9 +77,10 @@ out: static void test_array_map(void) { __u32 key, num_cpus, max_entries; - int i, arraymap_fd, percpu_map_fd, err; + int i, err; struct for_each_array_map_elem *skel; __u64 *percpu_valbuf = NULL; + size_t percpu_val_sz; __u64 val, expected_total; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, @@ -89,7 +92,6 @@ static void test_array_map(void) if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load")) return; - arraymap_fd = bpf_map__fd(skel->maps.arraymap); expected_total = 0; max_entries = bpf_map__max_entries(skel->maps.arraymap); for (i = 0; i < max_entries; i++) { @@ -98,21 +100,23 @@ static void test_array_map(void) /* skip the last iteration for expected total */ if (i != max_entries - 1) expected_total += val; - err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY); + err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); if (!ASSERT_OK(err, "map_update")) goto out; } num_cpus = bpf_num_possible_cpus(); - percpu_map_fd = bpf_map__fd(skel->maps.percpu_map); - percpu_valbuf = malloc(sizeof(__u64) * num_cpus); + percpu_val_sz = sizeof(__u64) * num_cpus; + percpu_valbuf = malloc(percpu_val_sz); if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf")) goto out; key = 0; for (i = 0; i < num_cpus; i++) percpu_valbuf[i] = i + 1; - err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY); + err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key), + percpu_valbuf, percpu_val_sz, BPF_ANY); if (!ASSERT_OK(err, "percpu_map_update")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index b9876b55fc0c..586dc52d6fb9 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -2,6 +2,9 @@ #include <test_progs.h> #include "kprobe_multi.skel.h" #include "trace_helpers.h" +#include "kprobe_multi_empty.skel.h" +#include "bpf/libbpf_internal.h" +#include "bpf/hashmap.h" static void kprobe_multi_test_run(struct kprobe_multi *skel, bool test_return) { @@ -140,14 +143,14 @@ test_attach_api(const char *pattern, struct bpf_kprobe_multi_opts *opts) goto cleanup; skel->bss->pid = getpid(); - link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, pattern, opts); if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts")) goto cleanup; if (opts) { opts->retprobe = true; - link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe, + link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe_manual, pattern, opts); if (!ASSERT_OK_PTR(link2, "bpf_program__attach_kprobe_multi_opts")) goto cleanup; @@ -232,7 +235,7 @@ static void test_attach_api_fails(void) skel->bss->pid = getpid(); /* fail_1 - pattern and opts NULL */ - link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, NULL, NULL); if (!ASSERT_ERR_PTR(link, "fail_1")) goto cleanup; @@ -246,7 +249,7 @@ static void test_attach_api_fails(void) opts.cnt = ARRAY_SIZE(syms); opts.cookies = NULL; - link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, NULL, &opts); if (!ASSERT_ERR_PTR(link, "fail_2")) goto cleanup; @@ -260,7 +263,7 @@ static void test_attach_api_fails(void) opts.cnt = ARRAY_SIZE(syms); opts.cookies = NULL; - link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); if (!ASSERT_ERR_PTR(link, "fail_3")) goto cleanup; @@ -274,7 +277,7 @@ static void test_attach_api_fails(void) opts.cnt = ARRAY_SIZE(syms); opts.cookies = NULL; - link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); if (!ASSERT_ERR_PTR(link, "fail_4")) goto cleanup; @@ -288,7 +291,7 @@ static void test_attach_api_fails(void) opts.cnt = 0; opts.cookies = cookies; - link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); if (!ASSERT_ERR_PTR(link, "fail_5")) goto cleanup; @@ -301,6 +304,146 @@ cleanup: kprobe_multi__destroy(skel); } +static inline __u64 get_time_ns(void) +{ + struct timespec t; + + clock_gettime(CLOCK_MONOTONIC, &t); + return (__u64) t.tv_sec * 1000000000 + t.tv_nsec; +} + +static size_t symbol_hash(const void *key, void *ctx __maybe_unused) +{ + return str_hash((const char *) key); +} + +static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_unused) +{ + return strcmp((const char *) key1, (const char *) key2) == 0; +} + +static int get_syms(char ***symsp, size_t *cntp) +{ + size_t cap = 0, cnt = 0, i; + char *name, **syms = NULL; + struct hashmap *map; + char buf[256]; + FILE *f; + int err; + + /* + * The available_filter_functions contains many duplicates, + * but other than that all symbols are usable in kprobe multi + * interface. + * Filtering out duplicates by using hashmap__add, which won't + * add existing entry. + */ + f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); + if (!f) + return -EINVAL; + + map = hashmap__new(symbol_hash, symbol_equal, NULL); + if (IS_ERR(map)) { + err = libbpf_get_error(map); + goto error; + } + + while (fgets(buf, sizeof(buf), f)) { + /* skip modules */ + if (strchr(buf, '[')) + continue; + if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) + continue; + /* + * We attach to almost all kernel functions and some of them + * will cause 'suspicious RCU usage' when fprobe is attached + * to them. Filter out the current culprits - arch_cpu_idle + * and rcu_* functions. + */ + if (!strcmp(name, "arch_cpu_idle")) + continue; + if (!strncmp(name, "rcu_", 4)) + continue; + err = hashmap__add(map, name, NULL); + if (err) { + free(name); + if (err == -EEXIST) + continue; + goto error; + } + err = libbpf_ensure_mem((void **) &syms, &cap, + sizeof(*syms), cnt + 1); + if (err) { + free(name); + goto error; + } + syms[cnt] = name; + cnt++; + } + + *symsp = syms; + *cntp = cnt; + +error: + fclose(f); + hashmap__free(map); + if (err) { + for (i = 0; i < cnt; i++) + free(syms[cnt]); + free(syms); + } + return err; +} + +static void test_bench_attach(void) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + struct kprobe_multi_empty *skel = NULL; + long attach_start_ns, attach_end_ns; + long detach_start_ns, detach_end_ns; + double attach_delta, detach_delta; + struct bpf_link *link = NULL; + char **syms = NULL; + size_t cnt, i; + + if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms")) + return; + + skel = kprobe_multi_empty__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) + goto cleanup; + + opts.syms = (const char **) syms; + opts.cnt = cnt; + + attach_start_ns = get_time_ns(); + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_empty, + NULL, &opts); + attach_end_ns = get_time_ns(); + + if (!ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts")) + goto cleanup; + + detach_start_ns = get_time_ns(); + bpf_link__destroy(link); + detach_end_ns = get_time_ns(); + + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; + + printf("%s: found %lu functions\n", __func__, cnt); + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); + +cleanup: + kprobe_multi_empty__destroy(skel); + if (syms) { + for (i = 0; i < cnt; i++) + free(syms[i]); + free(syms); + } +} + void test_kprobe_multi_test(void) { if (!ASSERT_OK(load_kallsyms(), "load_kallsyms")) @@ -320,4 +463,6 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); + if (test__start_subtest("bench_attach")) + test_bench_attach(); } diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index be3a956cb3a5..f4ffdcabf4e4 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -85,7 +85,6 @@ static void bad_core_relo_subprog(void) if (!ASSERT_ERR(err, "load_fail")) goto cleanup; - /* there should be no prog loading log because we specified per-prog log buf */ ASSERT_HAS_SUBSTR(log_buf, ": <invalid CO-RE relocation>\n" "failed to resolve CO-RE relocation <byte_off> ", @@ -101,6 +100,40 @@ cleanup: test_log_fixup__destroy(skel); } +static void missing_map(void) +{ + char log_buf[8 * 1024]; + struct test_log_fixup* skel; + int err; + + skel = test_log_fixup__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_map__set_autocreate(skel->maps.missing_map, false); + + bpf_program__set_autoload(skel->progs.use_missing_map, true); + bpf_program__set_log_buf(skel->progs.use_missing_map, log_buf, sizeof(log_buf)); + + err = test_log_fixup__load(skel); + if (!ASSERT_ERR(err, "load_fail")) + goto cleanup; + + ASSERT_TRUE(bpf_map__autocreate(skel->maps.existing_map), "existing_map_autocreate"); + ASSERT_FALSE(bpf_map__autocreate(skel->maps.missing_map), "missing_map_autocreate"); + + ASSERT_HAS_SUBSTR(log_buf, + "8: <invalid BPF map reference>\n" + "BPF map 'missing_map' is referenced but wasn't created\n", + "log_buf"); + + if (env.verbosity > VERBOSE_NONE) + printf("LOG: \n=================\n%s=================\n", log_buf); + +cleanup: + test_log_fixup__destroy(skel); +} + void test_log_fixup(void) { if (test__start_subtest("bad_core_relo_trunc_none")) @@ -111,4 +144,6 @@ void test_log_fixup(void) bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */); if (test__start_subtest("bad_core_relo_subprog")) bad_core_relo_subprog(); + if (test__start_subtest("missing_map")) + missing_map(); } diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c index beebfa9730e1..a767bb4a271c 100644 --- a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c +++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c @@ -112,7 +112,8 @@ static void test_lookup_and_delete_hash(void) /* Lookup and delete element. */ key = 1; - err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value); + err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map, + &key, sizeof(key), &value, sizeof(value), 0); if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) goto cleanup; @@ -147,7 +148,8 @@ static void test_lookup_and_delete_percpu_hash(void) /* Lookup and delete element. */ key = 1; - err = bpf_map_lookup_and_delete_elem(map_fd, &key, value); + err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map, + &key, sizeof(key), value, sizeof(value), 0); if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) goto cleanup; @@ -191,7 +193,8 @@ static void test_lookup_and_delete_lru_hash(void) goto cleanup; /* Lookup and delete element 3. */ - err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value); + err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map, + &key, sizeof(key), &value, sizeof(value), 0); if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) goto cleanup; @@ -240,10 +243,10 @@ static void test_lookup_and_delete_lru_percpu_hash(void) value[i] = 0; /* Lookup and delete element 3. */ - err = bpf_map_lookup_and_delete_elem(map_fd, &key, value); - if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) { + err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map, + &key, sizeof(key), value, sizeof(value), 0); + if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) goto cleanup; - } /* Check if only one CPU has set the value. */ for (i = 0; i < nr_cpus; i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index 9e2fbda64a65..fdcea7a61491 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -1,37 +1,148 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> #include "map_kptr.skel.h" +#include "map_kptr_fail.skel.h" -void test_map_kptr(void) +static char log_buf[1024 * 1024]; + +struct { + const char *prog_name; + const char *err_msg; +} map_kptr_fail_tests[] = { + { "size_not_bpf_dw", "kptr access size must be BPF_DW" }, + { "non_const_var_off", "kptr access cannot have variable offset" }, + { "non_const_var_off_kptr_xchg", "R1 doesn't have constant offset. kptr has to be" }, + { "misaligned_access_write", "kptr access misaligned expected=8 off=7" }, + { "misaligned_access_read", "kptr access misaligned expected=8 off=1" }, + { "reject_var_off_store", "variable untrusted_ptr_ access var_off=(0x0; 0x1e0)" }, + { "reject_bad_type_match", "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc" }, + { "marked_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" }, + { "correct_btf_id_check_size", "access beyond struct prog_test_ref_kfunc at off 32 size 4" }, + { "inherit_untrusted_on_walk", "R1 type=untrusted_ptr_ expected=percpu_ptr_" }, + { "reject_kptr_xchg_on_unref", "off=8 kptr isn't referenced kptr" }, + { "reject_kptr_get_no_map_val", "arg#0 expected pointer to map value" }, + { "reject_kptr_get_no_null_map_val", "arg#0 expected pointer to map value" }, + { "reject_kptr_get_no_kptr", "arg#0 no referenced kptr at map value offset=0" }, + { "reject_kptr_get_on_unref", "arg#0 no referenced kptr at map value offset=8" }, + { "reject_kptr_get_bad_type_match", "kernel function bpf_kfunc_call_test_kptr_get args#0" }, + { "mark_ref_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" }, + { "reject_untrusted_store_to_ref", "store to referenced kptr disallowed" }, + { "reject_bad_type_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member" }, + { "reject_untrusted_xchg", "R2 type=untrusted_ptr_ expected=ptr_" }, + { "reject_member_of_ref_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc" }, + { "reject_indirect_helper_access", "kptr cannot be accessed indirectly by helper" }, + { "reject_indirect_global_func_access", "kptr cannot be accessed indirectly by helper" }, + { "kptr_xchg_ref_state", "Unreleased reference id=5 alloc_insn=" }, + { "kptr_get_ref_state", "Unreleased reference id=3 alloc_insn=" }, +}; + +static void test_map_kptr_fail_prog(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct map_kptr_fail *skel; + struct bpf_program *prog; + int ret; + + skel = map_kptr_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "map_kptr_fail__open_opts")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = map_kptr_fail__load(skel); + if (!ASSERT_ERR(ret, "map_kptr__load must fail")) + goto end; + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + map_kptr_fail__destroy(skel); +} + +static void test_map_kptr_fail(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(map_kptr_fail_tests); i++) { + if (!test__start_subtest(map_kptr_fail_tests[i].prog_name)) + continue; + test_map_kptr_fail_prog(map_kptr_fail_tests[i].prog_name, + map_kptr_fail_tests[i].err_msg); + } +} + +static void test_map_kptr_success(bool test_run) { + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); struct map_kptr *skel; int key = 0, ret; - char buf[24]; + char buf[16]; skel = map_kptr__open_and_load(); if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) return; - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts); + ASSERT_OK(ret, "test_map_kptr_ref refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts); + ASSERT_OK(ret, "test_map_kptr_ref2 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval"); + + if (test_run) + return; + + ret = bpf_map__update_elem(skel->maps.array_map, + &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "array_map update"); - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0); + ret = bpf_map__update_elem(skel->maps.array_map, + &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "array_map update2"); - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_map), &key, buf, 0); + ret = bpf_map__update_elem(skel->maps.hash_map, + &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "hash_map update"); - ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_map), &key); + ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_map delete"); - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key, buf, 0); + ret = bpf_map__update_elem(skel->maps.hash_malloc_map, + &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "hash_malloc_map update"); - ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key); + ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_malloc_map delete"); - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.lru_hash_map), &key, buf, 0); + ret = bpf_map__update_elem(skel->maps.lru_hash_map, + &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "lru_hash_map update"); - ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.lru_hash_map), &key); + ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_hash_map delete"); map_kptr__destroy(skel); } + +void test_map_kptr(void) +{ + if (test__start_subtest("success")) { + test_map_kptr_success(false); + /* Do test_run twice, so that we see refcount going back to 1 + * after we leave it in map from first iteration. + */ + test_map_kptr_success(true); + } + test_map_kptr_fail(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c new file mode 100644 index 000000000000..bfb1bf3fd427 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include <test_progs.h> +#include "test_map_lookup_percpu_elem.skel.h" + +void test_map_lookup_percpu_elem(void) +{ + struct test_map_lookup_percpu_elem *skel; + __u64 key = 0, sum; + int ret, i, nr_cpus = libbpf_num_possible_cpus(); + __u64 *buf; + + buf = malloc(nr_cpus*sizeof(__u64)); + if (!ASSERT_OK_PTR(buf, "malloc")) + return; + + for (i = 0; i < nr_cpus; i++) + buf[i] = i; + sum = (nr_cpus - 1) * nr_cpus / 2; + + skel = test_map_lookup_percpu_elem__open(); + if (!ASSERT_OK_PTR(skel, "test_map_lookup_percpu_elem__open")) + goto exit; + + skel->rodata->my_pid = getpid(); + skel->rodata->nr_cpus = nr_cpus; + + ret = test_map_lookup_percpu_elem__load(skel); + if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__load")) + goto cleanup; + + ret = test_map_lookup_percpu_elem__attach(skel); + if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__attach")) + goto cleanup; + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_array_map), &key, buf, 0); + ASSERT_OK(ret, "percpu_array_map update"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_hash_map), &key, buf, 0); + ASSERT_OK(ret, "percpu_hash_map update"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_lru_hash_map), &key, buf, 0); + ASSERT_OK(ret, "percpu_lru_hash_map update"); + + syscall(__NR_getuid); + + test_map_lookup_percpu_elem__detach(skel); + + ASSERT_EQ(skel->bss->percpu_array_elem_sum, sum, "percpu_array lookup percpu elem"); + ASSERT_EQ(skel->bss->percpu_hash_elem_sum, sum, "percpu_hash lookup percpu elem"); + ASSERT_EQ(skel->bss->percpu_lru_hash_elem_sum, sum, "percpu_lru_hash lookup percpu elem"); + +cleanup: + test_map_lookup_percpu_elem__destroy(skel); +exit: + free(buf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c new file mode 100644 index 000000000000..59f08d6d1d53 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Tessares SA. */ +/* Copyright (c) 2022, SUSE. */ + +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "mptcp_sock.skel.h" + +#ifndef TCP_CA_NAME_MAX +#define TCP_CA_NAME_MAX 16 +#endif + +struct mptcp_storage { + __u32 invoked; + __u32 is_mptcp; + struct sock *sk; + __u32 token; + struct sock *first; + char ca_name[TCP_CA_NAME_MAX]; +}; + +static int verify_tsk(int map_fd, int client_fd) +{ + int err, cfd = client_fd; + struct mptcp_storage val; + + err = bpf_map_lookup_elem(map_fd, &cfd, &val); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + return err; + + if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count")) + err++; + + if (!ASSERT_EQ(val.is_mptcp, 0, "unexpected is_mptcp")) + err++; + + return err; +} + +static void get_msk_ca_name(char ca_name[]) +{ + size_t len; + int fd; + + fd = open("/proc/sys/net/ipv4/tcp_congestion_control", O_RDONLY); + if (!ASSERT_GE(fd, 0, "failed to open tcp_congestion_control")) + return; + + len = read(fd, ca_name, TCP_CA_NAME_MAX); + if (!ASSERT_GT(len, 0, "failed to read ca_name")) + goto err; + + if (len > 0 && ca_name[len - 1] == '\n') + ca_name[len - 1] = '\0'; + +err: + close(fd); +} + +static int verify_msk(int map_fd, int client_fd, __u32 token) +{ + char ca_name[TCP_CA_NAME_MAX]; + int err, cfd = client_fd; + struct mptcp_storage val; + + if (!ASSERT_GT(token, 0, "invalid token")) + return -1; + + get_msk_ca_name(ca_name); + + err = bpf_map_lookup_elem(map_fd, &cfd, &val); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + return err; + + if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count")) + err++; + + if (!ASSERT_EQ(val.is_mptcp, 1, "unexpected is_mptcp")) + err++; + + if (!ASSERT_EQ(val.token, token, "unexpected token")) + err++; + + if (!ASSERT_EQ(val.first, val.sk, "unexpected first")) + err++; + + if (!ASSERT_STRNEQ(val.ca_name, ca_name, TCP_CA_NAME_MAX, "unexpected ca_name")) + err++; + + return err; +} + +static int run_test(int cgroup_fd, int server_fd, bool is_mptcp) +{ + int client_fd, prog_fd, map_fd, err; + struct mptcp_sock *sock_skel; + + sock_skel = mptcp_sock__open_and_load(); + if (!ASSERT_OK_PTR(sock_skel, "skel_open_load")) + return -EIO; + + err = mptcp_sock__attach(sock_skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + prog_fd = bpf_program__fd(sock_skel->progs._sockops); + if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) { + err = -EIO; + goto out; + } + + map_fd = bpf_map__fd(sock_skel->maps.socket_storage_map); + if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) { + err = -EIO; + goto out; + } + + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect to fd")) { + err = -EIO; + goto out; + } + + err += is_mptcp ? verify_msk(map_fd, client_fd, sock_skel->bss->token) : + verify_tsk(map_fd, client_fd); + + close(client_fd); + +out: + mptcp_sock__destroy(sock_skel); + return err; +} + +static void test_base(void) +{ + int server_fd, cgroup_fd; + + cgroup_fd = test__join_cgroup("/mptcp"); + if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) + return; + + /* without MPTCP */ + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto with_mptcp; + + ASSERT_OK(run_test(cgroup_fd, server_fd, false), "run_test tcp"); + + close(server_fd); + +with_mptcp: + /* with MPTCP */ + server_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_mptcp_server")) + goto close_cgroup_fd; + + ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp"); + + close(server_fd); + +close_cgroup_fd: + close(cgroup_fd); +} + +void test_mptcp(void) +{ + if (test__start_subtest("base")) + test_base(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index e945195b24c9..eb5f7f5aa81a 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -50,18 +50,6 @@ void test_ringbuf_multi(void) if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; - err = bpf_map__set_max_entries(skel->maps.ringbuf1, page_size); - if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) - goto cleanup; - - err = bpf_map__set_max_entries(skel->maps.ringbuf2, page_size); - if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) - goto cleanup; - - err = bpf_map__set_max_entries(bpf_map__inner_map(skel->maps.ringbuf_arr), page_size); - if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) - goto cleanup; - proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL); if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c index e8399ae50e77..9ad09a6c538a 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c @@ -8,7 +8,7 @@ void test_stacktrace_build_id(void) int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; struct test_stacktrace_build_id *skel; int err, stack_trace_len; - __u32 key, previous_key, val, duration = 0; + __u32 key, prev_key, val, duration = 0; char buf[256]; int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; @@ -58,7 +58,7 @@ retry: "err %d errno %d\n", err, errno)) goto cleanup; - err = bpf_map_get_next_key(stackmap_fd, NULL, &key); + err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key)); if (CHECK(err, "get_next_key from stackmap", "err %d, errno %d\n", err, errno)) goto cleanup; @@ -79,8 +79,8 @@ retry: if (strstr(buf, build_id) != NULL) build_id_matches = 1; } - previous_key = key; - } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + prev_key = key; + } while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0); /* stack_map_get_build_id_offset() is racy and sometimes can return * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f45a1d7b0a28..f4ea1a215ce4 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -27,7 +27,7 @@ void test_stacktrace_build_id_nmi(void) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; - __u32 key, previous_key, val, duration = 0; + __u32 key, prev_key, val, duration = 0; char buf[256]; int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; @@ -100,7 +100,7 @@ retry: "err %d errno %d\n", err, errno)) goto cleanup; - err = bpf_map_get_next_key(stackmap_fd, NULL, &key); + err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key)); if (CHECK(err, "get_next_key from stackmap", "err %d, errno %d\n", err, errno)) goto cleanup; @@ -108,7 +108,8 @@ retry: do { char build_id[64]; - err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); + err = bpf_map__lookup_elem(skel->maps.stackmap, &key, sizeof(key), + id_offs, sizeof(id_offs), 0); if (CHECK(err, "lookup_elem from stackmap", "err %d, errno %d\n", err, errno)) goto cleanup; @@ -121,8 +122,8 @@ retry: if (strstr(buf, build_id) != NULL) build_id_matches = 1; } - previous_key = key; - } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + prev_key = key; + } while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0); /* stack_map_get_build_id_offset() is racy and sometimes can return * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID; diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c new file mode 100644 index 000000000000..3bba4a2a0530 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -0,0 +1,423 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * End-to-end eBPF tunnel test suite + * The file tests BPF network tunnel implementation. + * + * Topology: + * --------- + * root namespace | at_ns0 namespace + * | + * ----------- | ----------- + * | tnl dev | | | tnl dev | (overlay network) + * ----------- | ----------- + * metadata-mode | metadata-mode + * with bpf | with bpf + * | + * ---------- | ---------- + * | veth1 | --------- | veth0 | (underlay network) + * ---------- peer ---------- + * + * + * Device Configuration + * -------------------- + * root namespace with metadata-mode tunnel + BPF + * Device names and addresses: + * veth1 IP 1: 172.16.1.200, IPv6: 00::22 (underlay) + * IP 2: 172.16.1.20, IPv6: 00::bb (underlay) + * tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay) + * + * Namespace at_ns0 with native tunnel + * Device names and addresses: + * veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) + * tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay) + * + * + * End-to-end ping packet flow + * --------------------------- + * Most of the tests start by namespace creation, device configuration, + * then ping the underlay and overlay network. When doing 'ping 10.1.1.100' + * from root namespace, the following operations happen: + * 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev. + * 2) Tnl device's egress BPF program is triggered and set the tunnel metadata, + * with local_ip=172.16.1.200, remote_ip=172.16.1.100. BPF program choose + * the primary or secondary ip of veth1 as the local ip of tunnel. The + * choice is made based on the value of bpf map local_ip_map. + * 3) Outer tunnel header is prepended and route the packet to veth1's egress. + * 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0. + * 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet. + * 6) Forward the packet to the overlay tnl dev. + */ + +#include <arpa/inet.h> +#include <linux/if_tun.h> +#include <linux/limits.h> +#include <linux/sysctl.h> +#include <linux/time_types.h> +#include <linux/net_tstamp.h> +#include <net/if.h> +#include <stdbool.h> +#include <stdio.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_tunnel_kern.skel.h" + +#define IP4_ADDR_VETH0 "172.16.1.100" +#define IP4_ADDR1_VETH1 "172.16.1.200" +#define IP4_ADDR2_VETH1 "172.16.1.20" +#define IP4_ADDR_TUNL_DEV0 "10.1.1.100" +#define IP4_ADDR_TUNL_DEV1 "10.1.1.200" + +#define IP6_ADDR_VETH0 "::11" +#define IP6_ADDR1_VETH1 "::22" +#define IP6_ADDR2_VETH1 "::bb" + +#define IP4_ADDR1_HEX_VETH1 0xac1001c8 +#define IP4_ADDR2_HEX_VETH1 0xac100114 +#define IP6_ADDR1_HEX_VETH1 0x22 +#define IP6_ADDR2_HEX_VETH1 0xbb + +#define MAC_TUNL_DEV0 "52:54:00:d9:01:00" +#define MAC_TUNL_DEV1 "52:54:00:d9:02:00" + +#define VXLAN_TUNL_DEV0 "vxlan00" +#define VXLAN_TUNL_DEV1 "vxlan11" +#define IP6VXLAN_TUNL_DEV0 "ip6vxlan00" +#define IP6VXLAN_TUNL_DEV1 "ip6vxlan11" + +#define PING_ARGS "-i 0.01 -c 3 -w 10 -q" + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +#define SYS_NOFAIL(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + system(cmd); \ + }) + +static int config_device(void) +{ + SYS("ip netns add at_ns0"); + SYS("ip link add veth0 type veth peer name veth1"); + SYS("ip link set veth0 netns at_ns0"); + SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); + SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1"); + SYS("ip link set dev veth1 up mtu 1500"); + SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); + SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); + + return 0; +fail: + return -1; +} + +static void cleanup(void) +{ + SYS_NOFAIL("test -f /var/run/netns/at_ns0 && ip netns delete at_ns0"); + SYS_NOFAIL("ip link del veth1 2> /dev/null"); + SYS_NOFAIL("ip link del %s 2> /dev/null", VXLAN_TUNL_DEV1); + SYS_NOFAIL("ip link del %s 2> /dev/null", IP6VXLAN_TUNL_DEV1); +} + +static int add_vxlan_tunnel(void) +{ + /* at_ns0 namespace */ + SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789", + VXLAN_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip link set dev %s address %s up", + VXLAN_TUNL_DEV0, MAC_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip addr add dev %s %s/24", + VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", + IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); + + /* root namespace */ + SYS("ip link add dev %s type vxlan external gbp dstport 4789", + VXLAN_TUNL_DEV1); + SYS("ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); + SYS("ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); + SYS("ip neigh add %s lladdr %s dev %s", + IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1); + + return 0; +fail: + return -1; +} + +static void delete_vxlan_tunnel(void) +{ + SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", + VXLAN_TUNL_DEV0); + SYS_NOFAIL("ip link delete dev %s", VXLAN_TUNL_DEV1); +} + +static int add_ip6vxlan_tunnel(void) +{ + SYS("ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0", + IP6_ADDR_VETH0); + SYS("ip netns exec at_ns0 ip link set dev veth0 up"); + SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1); + SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1); + SYS("ip link set dev veth1 up"); + + /* at_ns0 namespace */ + SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789", + IP6VXLAN_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip addr add dev %s %s/24", + IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip link set dev %s address %s up", + IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0); + + /* root namespace */ + SYS("ip link add dev %s type vxlan external dstport 4789", + IP6VXLAN_TUNL_DEV1); + SYS("ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); + SYS("ip link set dev %s address %s up", + IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); + + return 0; +fail: + return -1; +} + +static void delete_ip6vxlan_tunnel(void) +{ + SYS_NOFAIL("ip netns exec at_ns0 ip -6 addr delete %s/96 dev veth0", + IP6_ADDR_VETH0); + SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR1_VETH1); + SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR2_VETH1); + SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", + IP6VXLAN_TUNL_DEV0); + SYS_NOFAIL("ip link delete dev %s", IP6VXLAN_TUNL_DEV1); +} + +static int test_ping(int family, const char *addr) +{ + SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); + return 0; +fail: + return -1; +} + +static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, + .priority = 1, .prog_fd = igr_fd); + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, + .priority = 1, .prog_fd = egr_fd); + int ret; + + ret = bpf_tc_hook_create(hook); + if (!ASSERT_OK(ret, "create tc hook")) + return ret; + + if (igr_fd >= 0) { + hook->attach_point = BPF_TC_INGRESS; + ret = bpf_tc_attach(hook, &opts1); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(hook); + return ret; + } + } + + if (egr_fd >= 0) { + hook->attach_point = BPF_TC_EGRESS; + ret = bpf_tc_attach(hook, &opts2); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(hook); + return ret; + } + } + + return 0; +} + +static void test_vxlan_tunnel(void) +{ + struct test_tunnel_kern *skel = NULL; + struct nstoken *nstoken; + int local_ip_map_fd = -1; + int set_src_prog_fd, get_src_prog_fd; + int set_dst_prog_fd; + int key = 0, ifindex = -1; + uint local_ip; + int err; + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .attach_point = BPF_TC_INGRESS); + + /* add vxlan tunnel */ + err = add_vxlan_tunnel(); + if (!ASSERT_OK(err, "add vxlan tunnel")) + goto done; + + /* load and attach bpf prog to tunnel dev tc hook point */ + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + goto done; + ifindex = if_nametoindex(VXLAN_TUNL_DEV1); + if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src); + set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src); + if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) + goto done; + if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + goto done; + + /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ + nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns src")) + goto done; + ifindex = if_nametoindex(VXLAN_TUNL_DEV0); + if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst); + if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) + goto done; + close_netns(nstoken); + + /* use veth1 ip 2 as tunnel source ip */ + local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map); + if (!ASSERT_GE(local_ip_map_fd, 0, "bpf_map__fd")) + goto done; + local_ip = IP4_ADDR2_HEX_VETH1; + err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY); + if (!ASSERT_OK(err, "update bpf local_ip_map")) + goto done; + + /* ping test */ + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); + if (!ASSERT_OK(err, "test_ping")) + goto done; + +done: + /* delete vxlan tunnel */ + delete_vxlan_tunnel(); + if (local_ip_map_fd >= 0) + close(local_ip_map_fd); + if (skel) + test_tunnel_kern__destroy(skel); +} + +static void test_ip6vxlan_tunnel(void) +{ + struct test_tunnel_kern *skel = NULL; + struct nstoken *nstoken; + int local_ip_map_fd = -1; + int set_src_prog_fd, get_src_prog_fd; + int set_dst_prog_fd; + int key = 0, ifindex = -1; + uint local_ip; + int err; + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .attach_point = BPF_TC_INGRESS); + + /* add vxlan tunnel */ + err = add_ip6vxlan_tunnel(); + if (!ASSERT_OK(err, "add_ip6vxlan_tunnel")) + goto done; + + /* load and attach bpf prog to tunnel dev tc hook point */ + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + goto done; + ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1); + if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src); + set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src); + if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) + goto done; + if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + goto done; + + /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ + nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns src")) + goto done; + ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0); + if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst); + if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) + goto done; + close_netns(nstoken); + + /* use veth1 ip 2 as tunnel source ip */ + local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map); + if (!ASSERT_GE(local_ip_map_fd, 0, "get local_ip_map fd")) + goto done; + local_ip = IP6_ADDR2_HEX_VETH1; + err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY); + if (!ASSERT_OK(err, "update bpf local_ip_map")) + goto done; + + /* ping test */ + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); + if (!ASSERT_OK(err, "test_ping")) + goto done; + +done: + /* delete ipv6 vxlan tunnel */ + delete_ip6vxlan_tunnel(); + if (local_ip_map_fd >= 0) + close(local_ip_map_fd); + if (skel) + test_tunnel_kern__destroy(skel); +} + +#define RUN_TEST(name) \ + ({ \ + if (test__start_subtest(#name)) { \ + test_ ## name(); \ + } \ + }) + +static void *test_tunnel_run_tests(void *arg) +{ + cleanup(); + config_device(); + + RUN_TEST(vxlan_tunnel); + RUN_TEST(ip6vxlan_tunnel); + + cleanup(); + + return NULL; +} + +void serial_test_tunnel(void) +{ + pthread_t test_thread; + int err; + + /* Run the tests in their own thread to isolate the namespace changes + * so they do not affect the environment of other tests. + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) + */ + err = pthread_create(&test_thread, NULL, &test_tunnel_run_tests, NULL); + if (ASSERT_OK(err, "pthread_create")) + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index 2ee5f5ae11d4..9ff7843909e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -35,7 +35,7 @@ static int timer_mim(struct timer_mim *timer_skel) ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok"); close(bpf_map__fd(timer_skel->maps.inner_htab)); - err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1); + err = bpf_map__delete_elem(timer_skel->maps.outer_arr, &key1, sizeof(key1), 0); ASSERT_EQ(err, 0, "delete inner map"); /* check that timer_cb[12] are no longer running */ diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index 9c795ee52b7b..b0acbda6dbf5 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -1,126 +1,94 @@ // SPDX-License-Identifier: GPL-2.0-only #define _GNU_SOURCE -#include <sched.h> -#include <sys/prctl.h> #include <test_progs.h> #define MAX_TRAMP_PROGS 38 struct inst { struct bpf_object *obj; - struct bpf_link *link_fentry; - struct bpf_link *link_fexit; + struct bpf_link *link; }; -static int test_task_rename(void) -{ - int fd, duration = 0, err; - char buf[] = "test_overhead"; - - fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); - if (CHECK(fd < 0, "open /proc", "err %d", errno)) - return -1; - err = write(fd, buf, sizeof(buf)); - if (err < 0) { - CHECK(err < 0, "task rename", "err %d", errno); - close(fd); - return -1; - } - close(fd); - return 0; -} - -static struct bpf_link *load(struct bpf_object *obj, const char *name) +static struct bpf_program *load_prog(char *file, char *name, struct inst *inst) { + struct bpf_object *obj; struct bpf_program *prog; - int duration = 0; + int err; + + obj = bpf_object__open_file(file, NULL); + if (!ASSERT_OK_PTR(obj, "obj_open_file")) + return NULL; + + inst->obj = obj; + + err = bpf_object__load(obj); + if (!ASSERT_OK(err, "obj_load")) + return NULL; prog = bpf_object__find_program_by_name(obj, name); - if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name)) - return ERR_PTR(-EINVAL); - return bpf_program__attach_trace(prog); + if (!ASSERT_OK_PTR(prog, "obj_find_prog")) + return NULL; + + return prog; } /* TODO: use different target function to run in concurrent mode */ void serial_test_trampoline_count(void) { - const char *fentry_name = "prog1"; - const char *fexit_name = "prog2"; - const char *object = "test_trampoline_count.o"; - struct inst inst[MAX_TRAMP_PROGS] = {}; - int err, i = 0, duration = 0; - struct bpf_object *obj; + char *file = "test_trampoline_count.o"; + char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" }; + struct inst inst[MAX_TRAMP_PROGS + 1] = {}; + struct bpf_program *prog; struct bpf_link *link; - char comm[16] = {}; + int prog_fd, err, i; + LIBBPF_OPTS(bpf_test_run_opts, opts); /* attach 'allowed' trampoline programs */ for (i = 0; i < MAX_TRAMP_PROGS; i++) { - obj = bpf_object__open_file(object, NULL); - if (!ASSERT_OK_PTR(obj, "obj_open_file")) { - obj = NULL; + prog = load_prog(file, progs[i % ARRAY_SIZE(progs)], &inst[i]); + if (!prog) goto cleanup; - } - err = bpf_object__load(obj); - if (CHECK(err, "obj_load", "err %d\n", err)) + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "attach_prog")) goto cleanup; - inst[i].obj = obj; - obj = NULL; - - if (rand() % 2) { - link = load(inst[i].obj, fentry_name); - if (!ASSERT_OK_PTR(link, "attach_prog")) { - link = NULL; - goto cleanup; - } - inst[i].link_fentry = link; - } else { - link = load(inst[i].obj, fexit_name); - if (!ASSERT_OK_PTR(link, "attach_prog")) { - link = NULL; - goto cleanup; - } - inst[i].link_fexit = link; - } + + inst[i].link = link; } /* and try 1 extra.. */ - obj = bpf_object__open_file(object, NULL); - if (!ASSERT_OK_PTR(obj, "obj_open_file")) { - obj = NULL; + prog = load_prog(file, "fmod_ret_test", &inst[i]); + if (!prog) goto cleanup; - } - - err = bpf_object__load(obj); - if (CHECK(err, "obj_load", "err %d\n", err)) - goto cleanup_extra; /* ..that needs to fail */ - link = load(obj, fentry_name); - err = libbpf_get_error(link); - if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) { - bpf_link__destroy(link); - goto cleanup_extra; + link = bpf_program__attach(prog); + if (!ASSERT_ERR_PTR(link, "attach_prog")) { + inst[i].link = link; + goto cleanup; } /* with E2BIG error */ - ASSERT_EQ(err, -E2BIG, "proper error check"); - ASSERT_EQ(link, NULL, "ptr_is_null"); + if (!ASSERT_EQ(libbpf_get_error(link), -E2BIG, "E2BIG")) + goto cleanup; + if (!ASSERT_EQ(link, NULL, "ptr_is_null")) + goto cleanup; /* and finaly execute the probe */ - if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) - goto cleanup_extra; - CHECK_FAIL(test_task_rename()); - CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L)); + prog_fd = bpf_program__fd(prog); + if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) + goto cleanup; + + err = bpf_prog_test_run_opts(prog_fd, &opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto cleanup; + + ASSERT_EQ(opts.retval & 0xffff, 4, "bpf_modify_return_test.result"); + ASSERT_EQ(opts.retval >> 16, 1, "bpf_modify_return_test.side_effect"); -cleanup_extra: - bpf_object__close(obj); cleanup: - if (i >= MAX_TRAMP_PROGS) - i = MAX_TRAMP_PROGS - 1; for (; i >= 0; i--) { - bpf_link__destroy(inst[i].link_fentry); - bpf_link__destroy(inst[i].link_fexit); + bpf_link__destroy(inst[i].link); bpf_object__close(inst[i].obj); } } diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c new file mode 100644 index 000000000000..1ed3cc2092db --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ + +#include <test_progs.h> +#include <bpf/btf.h> + +#include "test_unpriv_bpf_disabled.skel.h" + +#include "cap_helpers.h" + +/* Using CAP_LAST_CAP is risky here, since it can get pulled in from + * an old /usr/include/linux/capability.h and be < CAP_BPF; as a result + * CAP_BPF would not be included in ALL_CAPS. Instead use CAP_BPF as + * we know its value is correct since it is explicitly defined in + * cap_helpers.h. + */ +#define ALL_CAPS ((2ULL << CAP_BPF) - 1) + +#define PINPATH "/sys/fs/bpf/unpriv_bpf_disabled_" +#define NUM_MAPS 7 + +static __u32 got_perfbuf_val; +static __u32 got_ringbuf_val; + +static int process_ringbuf(void *ctx, void *data, size_t len) +{ + if (ASSERT_EQ(len, sizeof(__u32), "ringbuf_size_valid")) + got_ringbuf_val = *(__u32 *)data; + return 0; +} + +static void process_perfbuf(void *ctx, int cpu, void *data, __u32 len) +{ + if (ASSERT_EQ(len, sizeof(__u32), "perfbuf_size_valid")) + got_perfbuf_val = *(__u32 *)data; +} + +static int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val) +{ + int ret = 0; + FILE *fp; + + fp = fopen(sysctl_path, "r+"); + if (!fp) + return -errno; + if (old_val && fscanf(fp, "%s", old_val) <= 0) { + ret = -ENOENT; + } else if (!old_val || strcmp(old_val, new_val) != 0) { + fseek(fp, 0, SEEK_SET); + if (fprintf(fp, "%s", new_val) < 0) + ret = -errno; + } + fclose(fp); + + return ret; +} + +static void test_unpriv_bpf_disabled_positive(struct test_unpriv_bpf_disabled *skel, + __u32 prog_id, int prog_fd, int perf_fd, + char **map_paths, int *map_fds) +{ + struct perf_buffer *perfbuf = NULL; + struct ring_buffer *ringbuf = NULL; + int i, nr_cpus, link_fd = -1; + + nr_cpus = bpf_num_possible_cpus(); + + skel->bss->perfbuf_val = 1; + skel->bss->ringbuf_val = 2; + + /* Positive tests for unprivileged BPF disabled. Verify we can + * - retrieve and interact with pinned maps; + * - set up and interact with perf buffer; + * - set up and interact with ring buffer; + * - create a link + */ + perfbuf = perf_buffer__new(bpf_map__fd(skel->maps.perfbuf), 8, process_perfbuf, NULL, NULL, + NULL); + if (!ASSERT_OK_PTR(perfbuf, "perf_buffer__new")) + goto cleanup; + + ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), process_ringbuf, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new")) + goto cleanup; + + /* trigger & validate perf event, ringbuf output */ + usleep(1); + + ASSERT_GT(perf_buffer__poll(perfbuf, 100), -1, "perf_buffer__poll"); + ASSERT_EQ(got_perfbuf_val, skel->bss->perfbuf_val, "check_perfbuf_val"); + ASSERT_EQ(ring_buffer__consume(ringbuf), 1, "ring_buffer__consume"); + ASSERT_EQ(got_ringbuf_val, skel->bss->ringbuf_val, "check_ringbuf_val"); + + for (i = 0; i < NUM_MAPS; i++) { + map_fds[i] = bpf_obj_get(map_paths[i]); + if (!ASSERT_GT(map_fds[i], -1, "obj_get")) + goto cleanup; + } + + for (i = 0; i < NUM_MAPS; i++) { + bool prog_array = strstr(map_paths[i], "prog_array") != NULL; + bool array = strstr(map_paths[i], "array") != NULL; + bool buf = strstr(map_paths[i], "buf") != NULL; + __u32 key = 0, vals[nr_cpus], lookup_vals[nr_cpus]; + __u32 expected_val = 1; + int j; + + /* skip ringbuf, perfbuf */ + if (buf) + continue; + + for (j = 0; j < nr_cpus; j++) + vals[j] = expected_val; + + if (prog_array) { + /* need valid prog array value */ + vals[0] = prog_fd; + /* prog array lookup returns prog id, not fd */ + expected_val = prog_id; + } + ASSERT_OK(bpf_map_update_elem(map_fds[i], &key, vals, 0), "map_update_elem"); + ASSERT_OK(bpf_map_lookup_elem(map_fds[i], &key, &lookup_vals), "map_lookup_elem"); + ASSERT_EQ(lookup_vals[0], expected_val, "map_lookup_elem_values"); + if (!array) + ASSERT_OK(bpf_map_delete_elem(map_fds[i], &key), "map_delete_elem"); + } + + link_fd = bpf_link_create(bpf_program__fd(skel->progs.handle_perf_event), perf_fd, + BPF_PERF_EVENT, NULL); + ASSERT_GT(link_fd, 0, "link_create"); + +cleanup: + if (link_fd) + close(link_fd); + if (perfbuf) + perf_buffer__free(perfbuf); + if (ringbuf) + ring_buffer__free(ringbuf); +} + +static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *skel, + __u32 prog_id, int prog_fd, int perf_fd, + char **map_paths, int *map_fds) +{ + const struct bpf_insn prog_insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn); + LIBBPF_OPTS(bpf_prog_load_opts, load_opts); + struct bpf_map_info map_info = {}; + __u32 map_info_len = sizeof(map_info); + struct bpf_link_info link_info = {}; + __u32 link_info_len = sizeof(link_info); + struct btf *btf = NULL; + __u32 attach_flags = 0; + __u32 prog_ids[3] = {}; + __u32 prog_cnt = 3; + __u32 next; + int i; + + /* Negative tests for unprivileged BPF disabled. Verify we cannot + * - load BPF programs; + * - create BPF maps; + * - get a prog/map/link fd by id; + * - get next prog/map/link id + * - query prog + * - BTF load + */ + ASSERT_EQ(bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "simple_prog", "GPL", + prog_insns, prog_insn_cnt, &load_opts), + -EPERM, "prog_load_fails"); + + for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++) + ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL), + -EPERM, "map_create_fails"); + + ASSERT_EQ(bpf_prog_get_fd_by_id(prog_id), -EPERM, "prog_get_fd_by_id_fails"); + ASSERT_EQ(bpf_prog_get_next_id(prog_id, &next), -EPERM, "prog_get_next_id_fails"); + ASSERT_EQ(bpf_prog_get_next_id(0, &next), -EPERM, "prog_get_next_id_fails"); + + if (ASSERT_OK(bpf_obj_get_info_by_fd(map_fds[0], &map_info, &map_info_len), + "obj_get_info_by_fd")) { + ASSERT_EQ(bpf_map_get_fd_by_id(map_info.id), -EPERM, "map_get_fd_by_id_fails"); + ASSERT_EQ(bpf_map_get_next_id(map_info.id, &next), -EPERM, + "map_get_next_id_fails"); + } + ASSERT_EQ(bpf_map_get_next_id(0, &next), -EPERM, "map_get_next_id_fails"); + + if (ASSERT_OK(bpf_obj_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter), + &link_info, &link_info_len), + "obj_get_info_by_fd")) { + ASSERT_EQ(bpf_link_get_fd_by_id(link_info.id), -EPERM, "link_get_fd_by_id_fails"); + ASSERT_EQ(bpf_link_get_next_id(link_info.id, &next), -EPERM, + "link_get_next_id_fails"); + } + ASSERT_EQ(bpf_link_get_next_id(0, &next), -EPERM, "link_get_next_id_fails"); + + ASSERT_EQ(bpf_prog_query(prog_fd, BPF_TRACE_FENTRY, 0, &attach_flags, prog_ids, + &prog_cnt), -EPERM, "prog_query_fails"); + + btf = btf__new_empty(); + if (ASSERT_OK_PTR(btf, "empty_btf") && + ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "unpriv_int_type")) { + const void *raw_btf_data; + __u32 raw_btf_size; + + raw_btf_data = btf__raw_data(btf, &raw_btf_size); + if (ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good")) + ASSERT_EQ(bpf_btf_load(raw_btf_data, raw_btf_size, NULL), -EPERM, + "bpf_btf_load_fails"); + } + btf__free(btf); +} + +void test_unpriv_bpf_disabled(void) +{ + char *map_paths[NUM_MAPS] = { PINPATH "array", + PINPATH "percpu_array", + PINPATH "hash", + PINPATH "percpu_hash", + PINPATH "perfbuf", + PINPATH "ringbuf", + PINPATH "prog_array" }; + int map_fds[NUM_MAPS]; + struct test_unpriv_bpf_disabled *skel; + char unprivileged_bpf_disabled_orig[32] = {}; + char perf_event_paranoid_orig[32] = {}; + struct bpf_prog_info prog_info = {}; + __u32 prog_info_len = sizeof(prog_info); + struct perf_event_attr attr = {}; + int prog_fd, perf_fd = -1, i, ret; + __u64 save_caps = 0; + __u32 prog_id; + + skel = test_unpriv_bpf_disabled__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->test_pid = getpid(); + + map_fds[0] = bpf_map__fd(skel->maps.array); + map_fds[1] = bpf_map__fd(skel->maps.percpu_array); + map_fds[2] = bpf_map__fd(skel->maps.hash); + map_fds[3] = bpf_map__fd(skel->maps.percpu_hash); + map_fds[4] = bpf_map__fd(skel->maps.perfbuf); + map_fds[5] = bpf_map__fd(skel->maps.ringbuf); + map_fds[6] = bpf_map__fd(skel->maps.prog_array); + + for (i = 0; i < NUM_MAPS; i++) + ASSERT_OK(bpf_obj_pin(map_fds[i], map_paths[i]), "pin map_fd"); + + /* allow user without caps to use perf events */ + if (!ASSERT_OK(sysctl_set("/proc/sys/kernel/perf_event_paranoid", perf_event_paranoid_orig, + "-1"), + "set_perf_event_paranoid")) + goto cleanup; + /* ensure unprivileged bpf disabled is set */ + ret = sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled", + unprivileged_bpf_disabled_orig, "2"); + if (ret == -EPERM) { + /* if unprivileged_bpf_disabled=1, we get -EPERM back; that's okay. */ + if (!ASSERT_OK(strcmp(unprivileged_bpf_disabled_orig, "1"), + "unprivileged_bpf_disabled_on")) + goto cleanup; + } else { + if (!ASSERT_OK(ret, "set unprivileged_bpf_disabled")) + goto cleanup; + } + + prog_fd = bpf_program__fd(skel->progs.sys_nanosleep_enter); + ASSERT_OK(bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len), + "obj_get_info_by_fd"); + prog_id = prog_info.id; + ASSERT_GT(prog_id, 0, "valid_prog_id"); + + attr.size = sizeof(attr); + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_CPU_CLOCK; + attr.freq = 1; + attr.sample_freq = 1000; + perf_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + if (!ASSERT_GE(perf_fd, 0, "perf_fd")) + goto cleanup; + + if (!ASSERT_OK(test_unpriv_bpf_disabled__attach(skel), "skel_attach")) + goto cleanup; + + if (!ASSERT_OK(cap_disable_effective(ALL_CAPS, &save_caps), "disable caps")) + goto cleanup; + + if (test__start_subtest("unpriv_bpf_disabled_positive")) + test_unpriv_bpf_disabled_positive(skel, prog_id, prog_fd, perf_fd, map_paths, + map_fds); + + if (test__start_subtest("unpriv_bpf_disabled_negative")) + test_unpriv_bpf_disabled_negative(skel, prog_id, prog_fd, perf_fd, map_paths, + map_fds); + +cleanup: + close(perf_fd); + if (save_caps) + cap_enable_effective(save_caps, NULL); + if (strlen(perf_event_paranoid_orig) > 0) + sysctl_set("/proc/sys/kernel/perf_event_paranoid", NULL, perf_event_paranoid_orig); + if (strlen(unprivileged_bpf_disabled_orig) > 0) + sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled", NULL, + unprivileged_bpf_disabled_orig); + for (i = 0; i < NUM_MAPS; i++) + unlink(map_paths[i]); + test_unpriv_bpf_disabled__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index a71f51bdc08d..5f733d50b0d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -190,9 +190,7 @@ __weak void trigger_300_usdts(void) static void __always_inline f400(int x __attribute__((unused))) { - static int y; - - STAP_PROBE1(test, usdt_400, y++); + STAP_PROBE1(test, usdt_400, 400); } /* this time we have 400 different USDT call sites, but they have uniform @@ -299,7 +297,7 @@ static void subtest_multispec_usdt(void) trigger_400_usdts(); ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called"); - ASSERT_EQ(bss->usdt_100_sum, 399 * 400 / 2, "usdt_400_sum"); + ASSERT_EQ(bss->usdt_100_sum, 400 * 400, "usdt_400_sum"); cleanup: test_usdt__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index 8cfaeba1ddbf..97ec8bc76ae6 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -16,6 +16,7 @@ #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used #define bpf_iter__sockmap bpf_iter__sockmap___not_used +#define bpf_iter__bpf_link bpf_iter__bpf_link___not_used #define btf_ptr btf_ptr___not_used #define BTF_F_COMPACT BTF_F_COMPACT___not_used #define BTF_F_NONAME BTF_F_NONAME___not_used @@ -37,6 +38,7 @@ #undef bpf_iter__bpf_map_elem #undef bpf_iter__bpf_sk_storage_map #undef bpf_iter__sockmap +#undef bpf_iter__bpf_link #undef btf_ptr #undef BTF_F_COMPACT #undef BTF_F_NONAME @@ -132,6 +134,11 @@ struct bpf_iter__sockmap { struct sock *sk; }; +struct bpf_iter__bpf_link { + struct bpf_iter_meta *meta; + struct bpf_link *link; +}; + struct btf_ptr { void *ptr; __u32 type_id; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c new file mode 100644 index 000000000000..e1af2f8f75a6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Red Hat, Inc. */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("iter/bpf_link") +int dump_bpf_link(struct bpf_iter__bpf_link *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct bpf_link *link = ctx->link; + int link_id; + + if (!link) + return 0; + + link_id = link->id; + bpf_seq_write(seq, &link_id, sizeof(link_id)); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c new file mode 100644 index 000000000000..3824345d82ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_size___diff_offs x) {} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 1c7105fcae3c..4ee4748133fe 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -94,7 +94,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int); typedef char * (*fn_ptr_arr1_t[10])(int **); -typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int)); +typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int)); struct struct_w_typedefs { int_t a; diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index c95c0cabe951..f9dc9766546e 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -785,13 +785,21 @@ struct core_reloc_bitfields___err_too_big_bitfield { */ struct core_reloc_size_output { int int_sz; + int int_off; int struct_sz; + int struct_off; int union_sz; + int union_off; int arr_sz; + int arr_off; int arr_elem_sz; + int arr_elem_off; int ptr_sz; + int ptr_off; int enum_sz; + int enum_off; int float_sz; + int float_off; }; struct core_reloc_size { @@ -814,6 +822,16 @@ struct core_reloc_size___diff_sz { double float_field; }; +struct core_reloc_size___diff_offs { + float float_field; + enum { YET_OTHER_VALUE = 123 } enum_field; + void *ptr_field; + int arr_field[4]; + union { int x; } union_field; + struct { int x; } struct_field; + int int_field; +}; + /* Error case of two candidates with the fields (int_field) at the same * offset, but with differing final relocation values: size 4 vs size 1 */ diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c new file mode 100644 index 000000000000..d811cff73597 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -0,0 +1,588 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include <errno.h> +#include <string.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct test_info { + int x; + struct bpf_dynptr ptr; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct bpf_dynptr); +} array_map1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct test_info); +} array_map2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array_map3 SEC(".maps"); + +struct sample { + int pid; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +int err, val; + +static int get_map_val_dynptr(struct bpf_dynptr *ptr) +{ + __u32 key = 0, *map_val; + + bpf_map_update_elem(&array_map3, &key, &val, 0); + + map_val = bpf_map_lookup_elem(&array_map3, &key); + if (!map_val) + return -ENOENT; + + bpf_dynptr_from_mem(map_val, sizeof(*map_val), 0, ptr); + + return 0; +} + +/* Every bpf_ringbuf_reserve_dynptr call must have a corresponding + * bpf_ringbuf_submit/discard_dynptr call + */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_missing_release1(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + /* missing a call to bpf_ringbuf_discard/submit_dynptr */ + + return 0; +} + +SEC("?raw_tp/sys_nanosleep") +int ringbuf_missing_release2(void *ctx) +{ + struct bpf_dynptr ptr1, ptr2; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr1); + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2); + + sample = bpf_dynptr_data(&ptr1, 0, sizeof(*sample)); + if (!sample) { + bpf_ringbuf_discard_dynptr(&ptr1, 0); + bpf_ringbuf_discard_dynptr(&ptr2, 0); + return 0; + } + + bpf_ringbuf_submit_dynptr(&ptr1, 0); + + /* missing a call to bpf_ringbuf_discard/submit_dynptr on ptr2 */ + + return 0; +} + +static int missing_release_callback_fn(__u32 index, void *data) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + /* missing a call to bpf_ringbuf_discard/submit_dynptr */ + + return 0; +} + +/* Any dynptr initialized within a callback must have bpf_dynptr_put called */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_missing_release_callback(void *ctx) +{ + bpf_loop(10, missing_release_callback_fn, NULL, 0); + return 0; +} + +/* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_release_uninit_dynptr(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* A dynptr can't be used after it has been invalidated */ +SEC("?raw_tp/sys_nanosleep") +int use_after_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr); + + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + return 0; +} + +/* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_invalid_api(void *ctx) +{ + struct bpf_dynptr ptr; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr); + sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample)); + if (!sample) + goto done; + + sample->pid = 123; + + /* invalid API use. need to use dynptr API to submit/discard */ + bpf_ringbuf_submit(sample, 0); + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +/* Can't add a dynptr to a map */ +SEC("?raw_tp/sys_nanosleep") +int add_dynptr_to_map1(void *ctx) +{ + struct bpf_dynptr ptr; + int key = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + /* this should fail */ + bpf_map_update_elem(&array_map1, &key, &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* Can't add a struct with an embedded dynptr to a map */ +SEC("?raw_tp/sys_nanosleep") +int add_dynptr_to_map2(void *ctx) +{ + struct test_info x; + int key = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &x.ptr); + + /* this should fail */ + bpf_map_update_elem(&array_map2, &key, &x, 0); + + bpf_ringbuf_submit_dynptr(&x.ptr, 0); + + return 0; +} + +/* A data slice can't be accessed out of bounds */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_out_of_bounds_ringbuf(void *ctx) +{ + struct bpf_dynptr ptr; + void *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr); + + data = bpf_dynptr_data(&ptr, 0, 8); + if (!data) + goto done; + + /* can't index out of bounds of the data slice */ + val = *((char *)data + 8); + +done: + bpf_ringbuf_submit_dynptr(&ptr, 0); + return 0; +} + +SEC("?raw_tp/sys_nanosleep") +int data_slice_out_of_bounds_map_value(void *ctx) +{ + __u32 key = 0, map_val; + struct bpf_dynptr ptr; + void *data; + + get_map_val_dynptr(&ptr); + + data = bpf_dynptr_data(&ptr, 0, sizeof(map_val)); + if (!data) + return 0; + + /* can't index out of bounds of the data slice */ + val = *((char *)data + (sizeof(map_val) + 1)); + + return 0; +} + +/* A data slice can't be used after it has been released */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_use_after_release(void *ctx) +{ + struct bpf_dynptr ptr; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr); + sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample)); + if (!sample) + goto done; + + sample->pid = 123; + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + /* this should fail */ + val = sample->pid; + + return 0; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +/* A data slice must be first checked for NULL */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_missing_null_check1(void *ctx) +{ + struct bpf_dynptr ptr; + void *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr); + + data = bpf_dynptr_data(&ptr, 0, 8); + + /* missing if (!data) check */ + + /* this should fail */ + *(__u8 *)data = 3; + + bpf_ringbuf_submit_dynptr(&ptr, 0); + return 0; +} + +/* A data slice can't be dereferenced if it wasn't checked for null */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_missing_null_check2(void *ctx) +{ + struct bpf_dynptr ptr; + __u64 *data1, *data2; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr); + + data1 = bpf_dynptr_data(&ptr, 0, 8); + data2 = bpf_dynptr_data(&ptr, 0, 8); + if (data1) + /* this should fail */ + *data2 = 3; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +/* Can't pass in a dynptr as an arg to a helper function that doesn't take in a + * dynptr argument + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_helper1(void *ctx) +{ + struct bpf_dynptr ptr; + + get_map_val_dynptr(&ptr); + + /* this should fail */ + bpf_strncmp((const char *)&ptr, sizeof(ptr), "hello!"); + + return 0; +} + +/* A dynptr can't be passed into a helper function at a non-zero offset */ +SEC("?raw_tp/sys_nanosleep") +int invalid_helper2(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + + get_map_val_dynptr(&ptr); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0); + + return 0; +} + +/* A bpf_dynptr is invalidated if it's been written into */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write1(void *ctx) +{ + struct bpf_dynptr ptr; + void *data; + __u8 x = 0; + + get_map_val_dynptr(&ptr); + + memcpy(&ptr, &x, sizeof(x)); + + /* this should fail */ + data = bpf_dynptr_data(&ptr, 0, 1); + + return 0; +} + +/* + * A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed + * offset + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write2(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + __u8 x = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + memcpy((void *)&ptr + 8, &x, sizeof(x)); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* + * A bpf_dynptr can't be used as a dynptr if it has been written into at a + * non-const offset + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write3(void *ctx) +{ + struct bpf_dynptr ptr; + char stack_buf[16]; + unsigned long len; + __u8 x = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr); + + memcpy(stack_buf, &val, sizeof(val)); + len = stack_buf[0] & 0xf; + + memcpy((void *)&ptr + len, &x, sizeof(x)); + + /* this should fail */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +static int invalid_write4_callback(__u32 index, void *data) +{ + *(__u32 *)data = 123; + + return 0; +} + +/* If the dynptr is written into in a callback function, it should + * be invalidated as a dynptr + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write4(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + bpf_loop(10, invalid_write4_callback, &ptr, 0); + + /* this should fail */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */ +struct bpf_dynptr global_dynptr; +SEC("?raw_tp/sys_nanosleep") +int global(void *ctx) +{ + /* this should fail */ + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &global_dynptr); + + bpf_ringbuf_discard_dynptr(&global_dynptr, 0); + + return 0; +} + +/* A direct read should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read1(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + /* this should fail */ + val = *(int *)&ptr; + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + return 0; +} + +/* A direct read at an offset should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read2(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + + get_map_val_dynptr(&ptr); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0); + + return 0; +} + +/* A direct read at an offset into the lower stack slot should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read3(void *ctx) +{ + struct bpf_dynptr ptr1, ptr2; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr1); + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr2); + + /* this should fail */ + memcpy(&val, (void *)&ptr1 + 8, sizeof(val)); + + bpf_ringbuf_discard_dynptr(&ptr1, 0); + bpf_ringbuf_discard_dynptr(&ptr2, 0); + + return 0; +} + +static int invalid_read4_callback(__u32 index, void *data) +{ + /* this should fail */ + val = *(__u32 *)data; + + return 0; +} + +/* A direct read within a callback function should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read4(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + bpf_loop(10, invalid_read4_callback, &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* Initializing a dynptr on an offset should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_offset(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr + 1); + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + return 0; +} + +/* Can't release a dynptr twice */ +SEC("?raw_tp/sys_nanosleep") +int release_twice(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr); + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + /* this second release should fail */ + bpf_ringbuf_discard_dynptr(&ptr, 0); + + return 0; +} + +static int release_twice_callback_fn(__u32 index, void *data) +{ + /* this should fail */ + bpf_ringbuf_discard_dynptr(data, 0); + + return 0; +} + +/* Test that releasing a dynptr twice, where one of the releases happens + * within a calback function, fails + */ +SEC("?raw_tp/sys_nanosleep") +int release_twice_callback(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 32, 0, &ptr); + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + bpf_loop(10, release_twice_callback_fn, &ptr, 0); + + return 0; +} + +/* Reject unsupported local mem types for dynptr_from_mem API */ +SEC("?raw_tp/sys_nanosleep") +int dynptr_from_mem_invalid_api(void *ctx) +{ + struct bpf_dynptr ptr; + int x = 0; + + /* this should fail */ + bpf_dynptr_from_mem(&x, sizeof(x), 0, &ptr); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c new file mode 100644 index 000000000000..d67be48df4b2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include <string.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "errno.h" + +char _license[] SEC("license") = "GPL"; + +int pid, err, val; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array_map SEC(".maps"); + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_read_write(void *ctx) +{ + char write_data[64] = "hello there, world!!"; + char read_data[64] = {}, buf[64] = {}; + struct bpf_dynptr ptr; + int i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr); + + /* Write data into the dynptr */ + err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data)); + + /* Read the data that was written into the dynptr */ + err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + /* Ensure the data we read matches the data we wrote */ + for (i = 0; i < sizeof(read_data); i++) { + if (read_data[i] != write_data[i]) { + err = 1; + break; + } + } + + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_data_slice(void *ctx) +{ + __u32 key = 0, val = 235, *map_val; + struct bpf_dynptr ptr; + __u32 map_val_size; + void *data; + + map_val_size = sizeof(*map_val); + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + bpf_map_update_elem(&array_map, &key, &val, 0); + + map_val = bpf_map_lookup_elem(&array_map, &key); + if (!map_val) { + err = 1; + return 0; + } + + bpf_dynptr_from_mem(map_val, map_val_size, 0, &ptr); + + /* Try getting a data slice that is out of range */ + data = bpf_dynptr_data(&ptr, map_val_size + 1, 1); + if (data) { + err = 2; + return 0; + } + + /* Try getting more bytes than available */ + data = bpf_dynptr_data(&ptr, 0, map_val_size + 1); + if (data) { + err = 3; + return 0; + } + + data = bpf_dynptr_data(&ptr, 0, sizeof(__u32)); + if (!data) { + err = 4; + return 0; + } + + *(__u32 *)data = 999; + + err = bpf_probe_read_kernel(&val, sizeof(val), data); + if (err) + return 0; + + if (val != *(int *)data) + err = 5; + + return 0; +} + +static int ringbuf_callback(__u32 index, void *data) +{ + struct sample *sample; + + struct bpf_dynptr *ptr = (struct bpf_dynptr *)data; + + sample = bpf_dynptr_data(ptr, 0, sizeof(*sample)); + if (!sample) + err = 2; + else + sample->pid += index; + + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_ringbuf(void *ctx) +{ + struct bpf_dynptr ptr; + struct sample *sample; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + val = 100; + + /* check that you can reserve a dynamic size reservation */ + err = bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + sample = err ? NULL : bpf_dynptr_data(&ptr, 0, sizeof(*sample)); + if (!sample) { + err = 1; + goto done; + } + + sample->pid = 10; + + /* Can pass dynptr to callback functions */ + bpf_loop(10, ringbuf_callback, &ptr, 0); + + if (sample->pid != 55) + err = 2; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c index dd9b30a0f0fc..20d009e2d266 100644 --- a/tools/testing/selftests/bpf/progs/exhandler_kern.c +++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c @@ -7,8 +7,6 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) - char _license[] SEC("license") = "GPL"; unsigned int exception_triggered; diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c index 600be50800f8..93510f4f0f3a 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c @@ -98,3 +98,17 @@ int test_kretprobe(struct pt_regs *ctx) kprobe_multi_check(ctx, true); return 0; } + +SEC("kprobe.multi") +int test_kprobe_manual(struct pt_regs *ctx) +{ + kprobe_multi_check(ctx, false); + return 0; +} + +SEC("kretprobe.multi") +int test_kretprobe_manual(struct pt_regs *ctx) +{ + kprobe_multi_check(ctx, true); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c new file mode 100644 index 000000000000..e76e499aca39 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe.multi/") +int test_kprobe_empty(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c index 913791923fa3..1b13f37f85ec 100644 --- a/tools/testing/selftests/bpf/progs/loop5.c +++ b/tools/testing/selftests/bpf/progs/loop5.c @@ -2,7 +2,6 @@ // Copyright (c) 2019 Facebook #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#define barrier() __asm__ __volatile__("": : :"memory") char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index 1b0e0409eaa5..eb8217803493 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -141,7 +141,7 @@ SEC("tc") int test_map_kptr(struct __sk_buff *ctx) { struct map_value *v; - int i, key = 0; + int key = 0; #define TEST(map) \ v = bpf_map_lookup_elem(&map, &key); \ @@ -162,7 +162,7 @@ SEC("tc") int test_map_in_map_kptr(struct __sk_buff *ctx) { struct map_value *v; - int i, key = 0; + int key = 0; void *map; #define TEST(map_in_map) \ @@ -187,4 +187,106 @@ int test_map_in_map_kptr(struct __sk_buff *ctx) return 0; } +SEC("tc") +int test_map_kptr_ref(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p, *p_st; + unsigned long arg = 0; + struct map_value *v; + int key = 0, ret; + + p = bpf_kfunc_call_test_acquire(&arg); + if (!p) + return 1; + + p_st = p->next; + if (p_st->cnt.refs.counter != 2) { + ret = 2; + goto end; + } + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) { + ret = 3; + goto end; + } + + p = bpf_kptr_xchg(&v->ref_ptr, p); + if (p) { + ret = 4; + goto end; + } + if (p_st->cnt.refs.counter != 2) + return 5; + + p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); + if (!p) + return 6; + if (p_st->cnt.refs.counter != 3) { + ret = 7; + goto end; + } + bpf_kfunc_call_test_release(p); + if (p_st->cnt.refs.counter != 2) + return 8; + + p = bpf_kptr_xchg(&v->ref_ptr, NULL); + if (!p) + return 9; + bpf_kfunc_call_test_release(p); + if (p_st->cnt.refs.counter != 1) + return 10; + + p = bpf_kfunc_call_test_acquire(&arg); + if (!p) + return 11; + p = bpf_kptr_xchg(&v->ref_ptr, p); + if (p) { + ret = 12; + goto end; + } + if (p_st->cnt.refs.counter != 2) + return 13; + /* Leave in map */ + + return 0; +end: + bpf_kfunc_call_test_release(p); + return ret; +} + +SEC("tc") +int test_map_kptr_ref2(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p, *p_st; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 1; + + p_st = v->ref_ptr; + if (!p_st || p_st->cnt.refs.counter != 2) + return 2; + + p = bpf_kptr_xchg(&v->ref_ptr, NULL); + if (!p) + return 3; + if (p_st->cnt.refs.counter != 2) { + bpf_kfunc_call_test_release(p); + return 4; + } + + p = bpf_kptr_xchg(&v->ref_ptr, p); + if (p) { + bpf_kfunc_call_test_release(p); + return 5; + } + if (p_st->cnt.refs.counter != 2) + return 6; + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c new file mode 100644 index 000000000000..05e209b1b12a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +struct map_value { + char buf[8]; + struct prog_test_ref_kfunc __kptr *unref_ptr; + struct prog_test_ref_kfunc __kptr_ref *ref_ptr; + struct prog_test_member __kptr_ref *ref_memb_ptr; +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} array_map SEC(".maps"); + +extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; +extern struct prog_test_ref_kfunc * +bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; + +SEC("?tc") +int size_not_bpf_dw(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + *(u32 *)&v->unref_ptr = 0; + return 0; +} + +SEC("?tc") +int non_const_var_off(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0, id; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + id = ctx->protocol; + if (id < 4 || id > 12) + return 0; + *(u64 *)((void *)v + id) = 0; + + return 0; +} + +SEC("?tc") +int non_const_var_off_kptr_xchg(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0, id; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + id = ctx->protocol; + if (id < 4 || id > 12) + return 0; + bpf_kptr_xchg((void *)v + id, NULL); + + return 0; +} + +SEC("?tc") +int misaligned_access_write(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + *(void **)((void *)v + 7) = NULL; + + return 0; +} + +SEC("?tc") +int misaligned_access_read(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + return *(u64 *)((void *)v + 1); +} + +SEC("?tc") +int reject_var_off_store(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *unref_ptr; + struct map_value *v; + int key = 0, id; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + unref_ptr = v->unref_ptr; + if (!unref_ptr) + return 0; + id = ctx->protocol; + if (id < 4 || id > 12) + return 0; + unref_ptr += id; + v->unref_ptr = unref_ptr; + + return 0; +} + +SEC("?tc") +int reject_bad_type_match(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *unref_ptr; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + unref_ptr = v->unref_ptr; + if (!unref_ptr) + return 0; + unref_ptr = (void *)unref_ptr + 4; + v->unref_ptr = unref_ptr; + + return 0; +} + +SEC("?tc") +int marked_as_untrusted_or_null(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_this_cpu_ptr(v->unref_ptr); + return 0; +} + +SEC("?tc") +int correct_btf_id_check_size(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + p = v->unref_ptr; + if (!p) + return 0; + return *(int *)((void *)p + bpf_core_type_size(struct prog_test_ref_kfunc)); +} + +SEC("?tc") +int inherit_untrusted_on_walk(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *unref_ptr; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + unref_ptr = v->unref_ptr; + if (!unref_ptr) + return 0; + unref_ptr = unref_ptr->next; + bpf_this_cpu_ptr(unref_ptr); + return 0; +} + +SEC("?tc") +int reject_kptr_xchg_on_unref(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_kptr_xchg(&v->unref_ptr, NULL); + return 0; +} + +SEC("?tc") +int reject_kptr_get_no_map_val(struct __sk_buff *ctx) +{ + bpf_kfunc_call_test_kptr_get((void *)&ctx, 0, 0); + return 0; +} + +SEC("?tc") +int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx) +{ + bpf_kfunc_call_test_kptr_get(bpf_map_lookup_elem(&array_map, &(int){0}), 0, 0); + return 0; +} + +SEC("?tc") +int reject_kptr_get_no_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_kfunc_call_test_kptr_get((void *)v, 0, 0); + return 0; +} + +SEC("?tc") +int reject_kptr_get_on_unref(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_kfunc_call_test_kptr_get(&v->unref_ptr, 0, 0); + return 0; +} + +SEC("?tc") +int reject_kptr_get_bad_type_match(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_kfunc_call_test_kptr_get((void *)&v->ref_memb_ptr, 0, 0); + return 0; +} + +SEC("?tc") +int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_this_cpu_ptr(v->ref_ptr); + return 0; +} + +SEC("?tc") +int reject_untrusted_store_to_ref(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + p = v->ref_ptr; + if (!p) + return 0; + /* Checkmate, clang */ + *(struct prog_test_ref_kfunc * volatile *)&v->ref_ptr = p; + return 0; +} + +SEC("?tc") +int reject_untrusted_xchg(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + p = v->ref_ptr; + if (!p) + return 0; + bpf_kptr_xchg(&v->ref_ptr, p); + return 0; +} + +SEC("?tc") +int reject_bad_type_xchg(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *ref_ptr; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!ref_ptr) + return 0; + bpf_kptr_xchg(&v->ref_memb_ptr, ref_ptr); + return 0; +} + +SEC("?tc") +int reject_member_of_ref_xchg(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *ref_ptr; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!ref_ptr) + return 0; + bpf_kptr_xchg(&v->ref_memb_ptr, &ref_ptr->memb); + return 0; +} + +SEC("?syscall") +int reject_indirect_helper_access(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_get_current_comm(v, sizeof(v->buf) + 1); + return 0; +} + +__noinline +int write_func(int *p) +{ + return p ? *p = 42 : 0; +} + +SEC("?tc") +int reject_indirect_global_func_access(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + return write_func((void *)v + 5); +} + +SEC("?tc") +int kptr_xchg_ref_state(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 0; + bpf_kptr_xchg(&v->ref_ptr, p); + return 0; +} + +SEC("?tc") +int kptr_get_ref_state(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c new file mode 100644 index 000000000000..91a0d7eff2ac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Tessares SA. */ +/* Copyright (c) 2022, SUSE. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; +__u32 token = 0; + +struct mptcp_storage { + __u32 invoked; + __u32 is_mptcp; + struct sock *sk; + __u32 token; + struct sock *first; + char ca_name[TCP_CA_NAME_MAX]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct mptcp_storage); +} socket_storage_map SEC(".maps"); + +SEC("sockops") +int _sockops(struct bpf_sock_ops *ctx) +{ + struct mptcp_storage *storage; + struct mptcp_sock *msk; + int op = (int)ctx->op; + struct tcp_sock *tsk; + struct bpf_sock *sk; + bool is_mptcp; + + if (op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + sk = ctx->sk; + if (!sk) + return 1; + + tsk = bpf_skc_to_tcp_sock(sk); + if (!tsk) + return 1; + + is_mptcp = bpf_core_field_exists(tsk->is_mptcp) ? tsk->is_mptcp : 0; + if (!is_mptcp) { + storage = bpf_sk_storage_get(&socket_storage_map, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 1; + + storage->token = 0; + __builtin_memset(storage->ca_name, 0, TCP_CA_NAME_MAX); + storage->first = NULL; + } else { + msk = bpf_skc_to_mptcp_sock(sk); + if (!msk) + return 1; + + storage = bpf_sk_storage_get(&socket_storage_map, msk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 1; + + storage->token = msk->token; + __builtin_memcpy(storage->ca_name, msk->ca_name, TCP_CA_NAME_MAX); + storage->first = msk->first; + } + storage->invoked++; + storage->is_mptcp = is_mptcp; + storage->sk = (struct sock *)sk; + + return 1; +} + +SEC("fentry/mptcp_pm_new_connection") +int BPF_PROG(trace_mptcp_pm_new_connection, struct mptcp_sock *msk, + const struct sock *ssk, int server_side) +{ + if (!server_side) + token = msk->token; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c index 4df9088bfc00..fb6b13522949 100644 --- a/tools/testing/selftests/bpf/progs/profiler1.c +++ b/tools/testing/selftests/bpf/progs/profiler1.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) #define UNROLL #define INLINE __always_inline #include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 5d3dc4d66d47..6c7b1fb268d6 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -171,8 +171,6 @@ struct process_frame_ctx { bool done; }; -#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) - static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) { int zero = 0; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index af994d16bb10..ce9acf4db8d2 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -5,9 +5,12 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" int kprobe_res = 0; +int kprobe2_res = 0; int kretprobe_res = 0; +int kretprobe2_res = 0; int uprobe_res = 0; int uretprobe_res = 0; int uprobe_byname_res = 0; @@ -15,20 +18,34 @@ int uretprobe_byname_res = 0; int uprobe_byname2_res = 0; int uretprobe_byname2_res = 0; -SEC("kprobe/sys_nanosleep") +SEC("kprobe") int handle_kprobe(struct pt_regs *ctx) { kprobe_res = 1; return 0; } -SEC("kretprobe/sys_nanosleep") -int BPF_KRETPROBE(handle_kretprobe) +SEC("kprobe/" SYS_PREFIX "sys_nanosleep") +int BPF_KPROBE(handle_kprobe_auto) +{ + kprobe2_res = 11; + return 0; +} + +SEC("kretprobe") +int handle_kretprobe(struct pt_regs *ctx) { kretprobe_res = 2; return 0; } +SEC("kretprobe/" SYS_PREFIX "sys_nanosleep") +int BPF_KRETPROBE(handle_kretprobe_auto) +{ + kretprobe2_res = 22; + return 0; +} + SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c index 0e2222968918..22d0ac8709b4 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c @@ -4,18 +4,23 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <errno.h> int my_tid; -int kprobe_res; -int kprobe_multi_res; -int kretprobe_res; -int uprobe_res; -int uretprobe_res; -int tp_res; -int pe_res; +__u64 kprobe_res; +__u64 kprobe_multi_res; +__u64 kretprobe_res; +__u64 uprobe_res; +__u64 uretprobe_res; +__u64 tp_res; +__u64 pe_res; +__u64 fentry_res; +__u64 fexit_res; +__u64 fmod_ret_res; +__u64 lsm_res; -static void update(void *ctx, int *res) +static void update(void *ctx, __u64 *res) { if (my_tid != (u32)bpf_get_current_pid_tgid()) return; @@ -82,4 +87,35 @@ int handle_pe(struct pt_regs *ctx) return 0; } +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(fentry_test1, int a) +{ + update(ctx, &fentry_res); + return 0; +} + +SEC("fexit/bpf_fentry_test1") +int BPF_PROG(fexit_test1, int a, int ret) +{ + update(ctx, &fexit_res); + return 0; +} + +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret) +{ + update(ctx, &fmod_ret_res); + return 1234; +} + +SEC("lsm/file_mprotect") +int BPF_PROG(test_int_hook, struct vm_area_struct *vma, + unsigned long reqprot, unsigned long prot, int ret) +{ + if (my_tid != (u32)bpf_get_current_pid_tgid()) + return ret; + update(ctx, &lsm_res); + return -EPERM; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c index 7e45e2bdf6cd..5b8a75097ea3 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c @@ -45,35 +45,34 @@ int test_core_existence(void *ctx) struct core_reloc_existence_output *out = (void *)&data.out; out->a_exists = bpf_core_field_exists(in->a); - if (bpf_core_field_exists(in->a)) + if (bpf_core_field_exists(struct core_reloc_existence, a)) out->a_value = BPF_CORE_READ(in, a); else out->a_value = 0xff000001u; out->b_exists = bpf_core_field_exists(in->b); - if (bpf_core_field_exists(in->b)) + if (bpf_core_field_exists(struct core_reloc_existence, b)) out->b_value = BPF_CORE_READ(in, b); else out->b_value = 0xff000002u; out->c_exists = bpf_core_field_exists(in->c); - if (bpf_core_field_exists(in->c)) + if (bpf_core_field_exists(struct core_reloc_existence, c)) out->c_value = BPF_CORE_READ(in, c); else out->c_value = 0xff000003u; out->arr_exists = bpf_core_field_exists(in->arr); - if (bpf_core_field_exists(in->arr)) + if (bpf_core_field_exists(struct core_reloc_existence, arr)) out->arr_value = BPF_CORE_READ(in, arr[0]); else out->arr_value = 0xff000004u; out->s_exists = bpf_core_field_exists(in->s); - if (bpf_core_field_exists(in->s)) + if (bpf_core_field_exists(struct core_reloc_existence, s)) out->s_value = BPF_CORE_READ(in, s.x); else out->s_value = 0xff000005u; return 0; } - diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c index 7b2d576aeea1..5b686053ce42 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c @@ -15,13 +15,21 @@ struct { struct core_reloc_size_output { int int_sz; + int int_off; int struct_sz; + int struct_off; int union_sz; + int union_off; int arr_sz; + int arr_off; int arr_elem_sz; + int arr_elem_off; int ptr_sz; + int ptr_off; int enum_sz; + int enum_off; int float_sz; + int float_off; }; struct core_reloc_size { @@ -41,13 +49,28 @@ int test_core_size(void *ctx) struct core_reloc_size_output *out = (void *)&data.out; out->int_sz = bpf_core_field_size(in->int_field); + out->int_off = bpf_core_field_offset(in->int_field); + out->struct_sz = bpf_core_field_size(in->struct_field); + out->struct_off = bpf_core_field_offset(in->struct_field); + out->union_sz = bpf_core_field_size(in->union_field); + out->union_off = bpf_core_field_offset(in->union_field); + out->arr_sz = bpf_core_field_size(in->arr_field); - out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]); - out->ptr_sz = bpf_core_field_size(in->ptr_field); - out->enum_sz = bpf_core_field_size(in->enum_field); - out->float_sz = bpf_core_field_size(in->float_field); + out->arr_off = bpf_core_field_offset(in->arr_field); + + out->arr_elem_sz = bpf_core_field_size(struct core_reloc_size, arr_field[1]); + out->arr_elem_off = bpf_core_field_offset(struct core_reloc_size, arr_field[1]); + + out->ptr_sz = bpf_core_field_size(struct core_reloc_size, ptr_field); + out->ptr_off = bpf_core_field_offset(struct core_reloc_size, ptr_field); + + out->enum_sz = bpf_core_field_size(struct core_reloc_size, enum_field); + out->enum_off = bpf_core_field_offset(struct core_reloc_size, enum_field); + + out->float_sz = bpf_core_field_size(struct core_reloc_size, float_field); + out->float_off = bpf_core_field_offset(struct core_reloc_size, float_field); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_log_fixup.c b/tools/testing/selftests/bpf/progs/test_log_fixup.c index a78980d897b3..60450cb0e72e 100644 --- a/tools/testing/selftests/bpf/progs/test_log_fixup.c +++ b/tools/testing/selftests/bpf/progs/test_log_fixup.c @@ -35,4 +35,30 @@ int bad_relo_subprog(const void *ctx) return bad_subprog() + bpf_core_field_size(t->pid); } +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} existing_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} missing_map SEC(".maps"); + +SEC("?raw_tp/sys_enter") +int use_missing_map(const void *ctx) +{ + int zero = 0, *value; + + value = bpf_map_lookup_elem(&existing_map, &zero); + + value = bpf_map_lookup_elem(&missing_map, &zero); + + return value != NULL; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c new file mode 100644 index 000000000000..ca827b1092da --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +__u64 percpu_array_elem_sum = 0; +__u64 percpu_hash_elem_sum = 0; +__u64 percpu_lru_hash_elem_sum = 0; +const volatile int nr_cpus; +const volatile int my_pid; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} percpu_array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u64); + __type(value, __u64); +} percpu_hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u64); + __type(value, __u64); +} percpu_lru_hash_map SEC(".maps"); + +struct read_percpu_elem_ctx { + void *map; + __u64 sum; +}; + +static int read_percpu_elem_callback(__u32 index, struct read_percpu_elem_ctx *ctx) +{ + __u64 key = 0; + __u64 *value; + + value = bpf_map_lookup_percpu_elem(ctx->map, &key, index); + if (value) + ctx->sum += *value; + return 0; +} + +SEC("tp/syscalls/sys_enter_getuid") +int sysenter_getuid(const void *ctx) +{ + struct read_percpu_elem_ctx map_ctx; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + map_ctx.map = &percpu_array_map; + map_ctx.sum = 0; + bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0); + percpu_array_elem_sum = map_ctx.sum; + + map_ctx.map = &percpu_hash_map; + map_ctx.sum = 0; + bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0); + percpu_hash_elem_sum = map_ctx.sum; + + map_ctx.map = &percpu_lru_hash_map; + map_ctx.sum = 0; + bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0); + percpu_lru_hash_elem_sum = map_ctx.sum; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index 50ce16d02da7..08628afedb77 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -64,7 +64,7 @@ int BPF_PROG(handle_fentry, __u32 fentry_manual_read_sz = 0; -SEC("fentry/placeholder") +SEC("fentry") int BPF_PROG(handle_fentry_manual, struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 0558544e1ff0..5cd7c096f62d 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -14,8 +14,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#define barrier() __asm__ __volatile__("": : :"memory") - /* llvm will optimize both subprograms into exactly the same BPF assembly * * Disassembly of section .text: diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c index 197b86546dca..e416e0ce12b7 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c @@ -15,6 +15,8 @@ struct sample { struct ringbuf_map { __uint(type, BPF_MAP_TYPE_RINGBUF); + /* libbpf will adjust to valid page size */ + __uint(max_entries, 1000); } ringbuf1 SEC(".maps"), ringbuf2 SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c index b7c37ca09544..f8e9256cf18d 100644 --- a/tools/testing/selftests/bpf/progs/test_subprogs.c +++ b/tools/testing/selftests/bpf/progs/test_subprogs.c @@ -89,6 +89,11 @@ int prog2(void *ctx) return 0; } +static int empty_callback(__u32 index, void *data) +{ + return 0; +} + /* prog3 has the same section name as prog1 */ SEC("raw_tp/sys_enter") int prog3(void *ctx) @@ -98,6 +103,9 @@ int prog3(void *ctx) if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) return 1; + /* test that ld_imm64 with BPF_PSEUDO_FUNC doesn't get blinded */ + bpf_loop(1, empty_callback, NULL, 0); + res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */ return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c index f030e469d05b..7765720da7d5 100644 --- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c +++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c @@ -1,20 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 -#include <stdbool.h> -#include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -struct task_struct; +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(fentry_test, int a, int *b) +{ + return 0; +} -SEC("fentry/__set_task_comm") -int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec) +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(fmod_ret_test, int a, int *b, int ret) { return 0; } -SEC("fexit/__set_task_comm") -int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec) +SEC("fexit/bpf_modify_return_test") +int BPF_PROG(fexit_test, int a, int *b, int ret) { return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index ef0dde83b85a..17f2f325b3f3 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -21,10 +21,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#define ERROR(ret) do {\ - char fmt[] = "ERROR line:%d ret:%d\n";\ - bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ - } while (0) +#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) struct geneve_opt { __be16 opt_class; @@ -40,8 +37,15 @@ struct vxlan_metadata { __u32 gbp; }; -SEC("gre_set_tunnel") -int _gre_set_tunnel(struct __sk_buff *skb) +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} local_ip_map SEC(".maps"); + +SEC("tc") +int gre_set_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; @@ -55,32 +59,31 @@ int _gre_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("gre_get_tunnel") -int _gre_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int gre_get_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; - char fmt[] = "key %d remote ip 0x%x\n"; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4); + bpf_printk("key %d remote ip 0x%x\n", key.tunnel_id, key.remote_ipv4); return TC_ACT_OK; } -SEC("ip6gretap_set_tunnel") -int _ip6gretap_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6gretap_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; int ret; @@ -96,35 +99,34 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb) BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ip6gretap_get_tunnel") -int _ip6gretap_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6gretap_get_tunnel(struct __sk_buff *skb) { - char fmt[] = "key %d remote ip6 ::%x label %x\n"; struct bpf_tunnel_key key; int ret; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); + bpf_printk("key %d remote ip6 ::%x label %x\n", + key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); return TC_ACT_OK; } -SEC("erspan_set_tunnel") -int _erspan_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int erspan_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; struct erspan_metadata md; @@ -139,7 +141,7 @@ int _erspan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } @@ -159,17 +161,16 @@ int _erspan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("erspan_get_tunnel") -int _erspan_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int erspan_get_tunnel(struct __sk_buff *skb) { - char fmt[] = "key %d remote ip 0x%x erspan version %d\n"; struct bpf_tunnel_key key; struct erspan_metadata md; __u32 index; @@ -177,38 +178,34 @@ int _erspan_get_tunnel(struct __sk_buff *skb) ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, md.version); + bpf_printk("key %d remote ip 0x%x erspan version %d\n", + key.tunnel_id, key.remote_ipv4, md.version); #ifdef ERSPAN_V1 - char fmt2[] = "\tindex %x\n"; - index = bpf_ntohl(md.u.index); - bpf_trace_printk(fmt2, sizeof(fmt2), index); + bpf_printk("\tindex %x\n", index); #else - char fmt2[] = "\tdirection %d hwid %x timestamp %u\n"; - - bpf_trace_printk(fmt2, sizeof(fmt2), - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, - bpf_ntohl(md.u.md2.timestamp)); + bpf_printk("\tdirection %d hwid %x timestamp %u\n", + md.u.md2.dir, + (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + bpf_ntohl(md.u.md2.timestamp)); #endif return TC_ACT_OK; } -SEC("ip4ip6erspan_set_tunnel") -int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip4ip6erspan_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; struct erspan_metadata md; @@ -223,7 +220,7 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } @@ -244,17 +241,16 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ip4ip6erspan_get_tunnel") -int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) { - char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n"; struct bpf_tunnel_key key; struct erspan_metadata md; __u32 index; @@ -263,44 +259,88 @@ int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb) ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, md.version); + bpf_printk("ip6erspan get key %d remote ip6 ::%x erspan version %d\n", + key.tunnel_id, key.remote_ipv4, md.version); #ifdef ERSPAN_V1 - char fmt2[] = "\tindex %x\n"; - index = bpf_ntohl(md.u.index); - bpf_trace_printk(fmt2, sizeof(fmt2), index); + bpf_printk("\tindex %x\n", index); #else - char fmt2[] = "\tdirection %d hwid %x timestamp %u\n"; - - bpf_trace_printk(fmt2, sizeof(fmt2), - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, - bpf_ntohl(md.u.md2.timestamp)); + bpf_printk("\tdirection %d hwid %x timestamp %u\n", + md.u.md2.dir, + (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + bpf_ntohl(md.u.md2.timestamp)); #endif return TC_ACT_OK; } -SEC("vxlan_set_tunnel") -int _vxlan_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int vxlan_set_tunnel_dst(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; + __u32 index = 0; + __u32 *local_ip = NULL; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&key, 0x0, sizeof(key)); + key.local_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.remote_ipv4 = *local_ip; + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */ + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("tc") +int vxlan_set_tunnel_src(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct vxlan_metadata md; + __u32 index = 0; + __u32 *local_ip = NULL; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } __builtin_memset(&key, 0x0, sizeof(key)); + key.local_ipv4 = *local_ip; key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ key.tunnel_id = 2; key.tunnel_tos = 0; @@ -309,53 +349,106 @@ int _vxlan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("vxlan_get_tunnel") -int _vxlan_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int vxlan_get_tunnel_src(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; - char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n"; + __u32 index = 0; + __u32 *local_ip = NULL; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, md.gbp); + if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) { + bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n", + key.tunnel_id, key.local_ipv4, + key.remote_ipv4, md.gbp); + bpf_printk("local_ip 0x%x\n", *local_ip); + log_err(ret); + return TC_ACT_SHOT; + } return TC_ACT_OK; } -SEC("ip6vxlan_set_tunnel") -int _ip6vxlan_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) { struct bpf_tunnel_key key; int ret; + __u32 index = 0; + __u32 *local_ip; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } __builtin_memset(&key, 0x0, sizeof(key)); + key.local_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.remote_ipv6[3] = bpf_htonl(*local_ip); + key.tunnel_id = 22; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("tc") +int ip6vxlan_set_tunnel_src(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + int ret; + __u32 index = 0; + __u32 *local_ip; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&key, 0x0, sizeof(key)); + key.local_ipv6[3] = bpf_htonl(*local_ip); key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ key.tunnel_id = 22; key.tunnel_tos = 0; @@ -364,35 +457,48 @@ int _ip6vxlan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ip6vxlan_get_tunnel") -int _ip6vxlan_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6vxlan_get_tunnel_src(struct __sk_buff *skb) { - char fmt[] = "key %d remote ip6 ::%x label %x\n"; struct bpf_tunnel_key key; int ret; + __u32 index = 0; + __u32 *local_ip; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); + if (bpf_ntohl(key.local_ipv6[3]) != *local_ip) { + bpf_printk("ip6vxlan key %d local ip6 ::%x remote ip6 ::%x label 0x%x\n", + key.tunnel_id, bpf_ntohl(key.local_ipv6[3]), + bpf_ntohl(key.remote_ipv6[3]), key.tunnel_label); + bpf_printk("local_ip 0x%x\n", *local_ip); + log_err(ret); + return TC_ACT_SHOT; + } return TC_ACT_OK; } -SEC("geneve_set_tunnel") -int _geneve_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int geneve_set_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; @@ -416,30 +522,29 @@ int _geneve_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("geneve_get_tunnel") -int _geneve_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int geneve_get_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; struct geneve_opt gopt; - char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } @@ -447,13 +552,13 @@ int _geneve_get_tunnel(struct __sk_buff *skb) if (ret < 0) gopt.opt_class = 0; - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, gopt.opt_class); + bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n", + key.tunnel_id, key.remote_ipv4, gopt.opt_class); return TC_ACT_OK; } -SEC("ip6geneve_set_tunnel") -int _ip6geneve_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6geneve_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; struct geneve_opt gopt; @@ -468,7 +573,7 @@ int _ip6geneve_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } @@ -483,17 +588,16 @@ int _ip6geneve_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ip6geneve_get_tunnel") -int _ip6geneve_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6geneve_get_tunnel(struct __sk_buff *skb) { - char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; struct bpf_tunnel_key key; struct geneve_opt gopt; int ret; @@ -501,7 +605,7 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb) ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } @@ -509,14 +613,14 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb) if (ret < 0) gopt.opt_class = 0; - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, gopt.opt_class); + bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n", + key.tunnel_id, key.remote_ipv4, gopt.opt_class); return TC_ACT_OK; } -SEC("ipip_set_tunnel") -int _ipip_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ipip_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; @@ -526,7 +630,7 @@ int _ipip_set_tunnel(struct __sk_buff *skb) /* single length check */ if (data + sizeof(*iph) > data_end) { - ERROR(1); + log_err(1); return TC_ACT_SHOT; } @@ -537,32 +641,31 @@ int _ipip_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ipip_get_tunnel") -int _ipip_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ipip_get_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; - char fmt[] = "remote ip 0x%x\n"; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4); + bpf_printk("remote ip 0x%x\n", key.remote_ipv4); return TC_ACT_OK; } -SEC("ipip6_set_tunnel") -int _ipip6_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ipip6_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; @@ -572,7 +675,7 @@ int _ipip6_set_tunnel(struct __sk_buff *skb) /* single length check */ if (data + sizeof(*iph) > data_end) { - ERROR(1); + log_err(1); return TC_ACT_SHOT; } @@ -585,34 +688,33 @@ int _ipip6_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ipip6_get_tunnel") -int _ipip6_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ipip6_get_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; - char fmt[] = "remote ip6 %x::%x\n"; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]), - bpf_htonl(key.remote_ipv6[3])); + bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]), + bpf_htonl(key.remote_ipv6[3])); return TC_ACT_OK; } -SEC("ip6ip6_set_tunnel") -int _ip6ip6_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6ip6_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; @@ -622,7 +724,7 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb) /* single length check */ if (data + sizeof(*iph) > data_end) { - ERROR(1); + log_err(1); return TC_ACT_SHOT; } @@ -634,45 +736,44 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ip6ip6_get_tunnel") -int _ip6ip6_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6ip6_get_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; - char fmt[] = "remote ip6 %x::%x\n"; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]), - bpf_htonl(key.remote_ipv6[3])); + bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]), + bpf_htonl(key.remote_ipv6[3])); return TC_ACT_OK; } -SEC("xfrm_get_state") -int _xfrm_get_state(struct __sk_buff *skb) +SEC("tc") +int xfrm_get_state(struct __sk_buff *skb) { struct bpf_xfrm_state x; - char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n"; int ret; ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0); if (ret < 0) return TC_ACT_OK; - bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi), - bpf_ntohl(x.remote_ipv4)); + bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n", + x.reqid, bpf_ntohl(x.spi), + bpf_ntohl(x.remote_ipv4)); return TC_ACT_OK; } diff --git a/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c new file mode 100644 index 000000000000..fc423e43a3cd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +__u32 perfbuf_val = 0; +__u32 ringbuf_val = 0; + +int test_pid; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} percpu_array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} percpu_hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, __u32); + __type(value, __u32); +} perfbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} prog_array SEC(".maps"); + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int sys_nanosleep_enter(void *ctx) +{ + int cur_pid; + + cur_pid = bpf_get_current_pid_tgid() >> 32; + + if (cur_pid != test_pid) + return 0; + + bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU, &perfbuf_val, sizeof(perfbuf_val)); + bpf_ringbuf_output(&ringbuf, &ringbuf_val, sizeof(ringbuf_val), 0); + + return 0; +} + +SEC("perf_event") +int handle_perf_event(void *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c536d1d29d57..c639f2e56fc5 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -18,6 +18,93 @@ #include <sys/socket.h> #include <sys/un.h> +static bool verbose(void) +{ + return env.verbosity > VERBOSE_NONE; +} + +static void stdio_hijack_init(char **log_buf, size_t *log_cnt) +{ +#ifdef __GLIBC__ + if (verbose() && env.worker_id == -1) { + /* nothing to do, output to stdout by default */ + return; + } + + fflush(stdout); + fflush(stderr); + + stdout = open_memstream(log_buf, log_cnt); + if (!stdout) { + stdout = env.stdout; + perror("open_memstream"); + return; + } + + if (env.subtest_state) + env.subtest_state->stdout = stdout; + else + env.test_state->stdout = stdout; + + stderr = stdout; +#endif +} + +static void stdio_hijack(char **log_buf, size_t *log_cnt) +{ +#ifdef __GLIBC__ + if (verbose() && env.worker_id == -1) { + /* nothing to do, output to stdout by default */ + return; + } + + env.stdout = stdout; + env.stderr = stderr; + + stdio_hijack_init(log_buf, log_cnt); +#endif +} + +static void stdio_restore_cleanup(void) +{ +#ifdef __GLIBC__ + if (verbose() && env.worker_id == -1) { + /* nothing to do, output to stdout by default */ + return; + } + + fflush(stdout); + + if (env.subtest_state) { + fclose(env.subtest_state->stdout); + env.subtest_state->stdout = NULL; + stdout = env.test_state->stdout; + stderr = env.test_state->stdout; + } else { + fclose(env.test_state->stdout); + env.test_state->stdout = NULL; + } +#endif +} + +static void stdio_restore(void) +{ +#ifdef __GLIBC__ + if (verbose() && env.worker_id == -1) { + /* nothing to do, output to stdout by default */ + return; + } + + if (stdout == env.stdout) + return; + + stdio_restore_cleanup(); + + stdout = env.stdout; + stderr = env.stderr; +#endif +} + /* Adapted from perf/util/string.c */ static bool glob_match(const char *str, const char *pat) { @@ -130,30 +217,98 @@ static bool should_run_subtest(struct test_selector *sel, return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num]; } +static char *test_result(bool failed, bool skipped) +{ + return failed ? "FAIL" : (skipped ? "SKIP" : "OK"); +} + +static void print_test_log(char *log_buf, size_t log_cnt) +{ + log_buf[log_cnt] = '\0'; + fprintf(env.stdout, "%s", log_buf); + if (log_buf[log_cnt - 1] != '\n') + fprintf(env.stdout, "\n"); +} + +#define TEST_NUM_WIDTH 7 + +static void print_test_name(int test_num, const char *test_name, char *result) +{ + fprintf(env.stdout, "#%-*d %s", TEST_NUM_WIDTH, test_num, test_name); + + if (result) + fprintf(env.stdout, ":%s", result); + + fprintf(env.stdout, "\n"); +} + +static void print_subtest_name(int test_num, int subtest_num, + const char *test_name, char *subtest_name, + char *result) +{ + char test_num_str[TEST_NUM_WIDTH + 1]; + + snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num); + + fprintf(env.stdout, "#%-*s %s/%s", + TEST_NUM_WIDTH, test_num_str, + test_name, subtest_name); + + if (result) + fprintf(env.stdout, ":%s", result); + + fprintf(env.stdout, "\n"); +} + static void dump_test_log(const struct prog_test_def *test, const struct test_state *test_state, - bool force_failed) + bool skip_ok_subtests, + bool par_exec_result) { - bool failed = test_state->error_cnt > 0 || force_failed; + bool test_failed = test_state->error_cnt > 0; + bool force_log = test_state->force_log; + bool print_test = verbose() || force_log || test_failed; + int i; + struct subtest_state *subtest_state; + bool subtest_failed; + bool subtest_filtered; + bool print_subtest; - /* worker always holds log */ + /* we do not print anything in the worker thread */ if (env.worker_id != -1) return; - fflush(stdout); /* exports test_state->log_buf & test_state->log_cnt */ + /* there is nothing to print when verbose log is used and execution + * is not in parallel mode + */ + if (verbose() && !par_exec_result) + return; + + if (test_state->log_cnt && print_test) + print_test_log(test_state->log_buf, test_state->log_cnt); - fprintf(env.stdout, "#%-3d %s:%s\n", - test->test_num, test->test_name, - failed ? "FAIL" : (test_state->skip_cnt ? "SKIP" : "OK")); + for (i = 0; i < test_state->subtest_num; i++) { + subtest_state = &test_state->subtest_states[i]; + subtest_failed = subtest_state->error_cnt; + subtest_filtered = subtest_state->filtered; + print_subtest = verbose() || force_log || subtest_failed; - if (env.verbosity > VERBOSE_NONE || test_state->force_log || failed) { - if (test_state->log_cnt) { - test_state->log_buf[test_state->log_cnt] = '\0'; - fprintf(env.stdout, "%s", test_state->log_buf); - if (test_state->log_buf[test_state->log_cnt - 1] != '\n') - fprintf(env.stdout, "\n"); + if ((skip_ok_subtests && !subtest_failed) || subtest_filtered) + continue; + + if (subtest_state->log_cnt && print_subtest) { + print_test_log(subtest_state->log_buf, + subtest_state->log_cnt); } + + print_subtest_name(test->test_num, i + 1, + test->test_name, subtest_state->name, + test_result(subtest_state->error_cnt, + subtest_state->skipped)); } + + print_test_name(test->test_num, test->test_name, + test_result(test_failed, test_state->skip_cnt)); } static void stdio_restore(void); @@ -205,35 +360,50 @@ static void restore_netns(void) void test__end_subtest(void) { struct prog_test_def *test = env.test; - struct test_state *state = env.test_state; - int sub_error_cnt = state->error_cnt - state->old_error_cnt; - - fprintf(stdout, "#%d/%d %s/%s:%s\n", - test->test_num, state->subtest_num, test->test_name, state->subtest_name, - sub_error_cnt ? "FAIL" : (state->subtest_skip_cnt ? "SKIP" : "OK")); - - if (sub_error_cnt == 0) { - if (state->subtest_skip_cnt == 0) { - state->sub_succ_cnt++; - } else { - state->subtest_skip_cnt = 0; - state->skip_cnt++; - } + struct test_state *test_state = env.test_state; + struct subtest_state *subtest_state = env.subtest_state; + + if (subtest_state->error_cnt) { + test_state->error_cnt++; + } else { + if (!subtest_state->skipped) + test_state->sub_succ_cnt++; + else + test_state->skip_cnt++; } - free(state->subtest_name); - state->subtest_name = NULL; + if (verbose() && !env.workers) + print_subtest_name(test->test_num, test_state->subtest_num, + test->test_name, subtest_state->name, + test_result(subtest_state->error_cnt, + subtest_state->skipped)); + + stdio_restore_cleanup(); + env.subtest_state = NULL; } bool test__start_subtest(const char *subtest_name) { struct prog_test_def *test = env.test; struct test_state *state = env.test_state; + struct subtest_state *subtest_state; + size_t sub_state_size = sizeof(*subtest_state); - if (state->subtest_name) + if (env.subtest_state) test__end_subtest(); state->subtest_num++; + state->subtest_states = + realloc(state->subtest_states, + state->subtest_num * sub_state_size); + if (!state->subtest_states) { + fprintf(stderr, "Not enough memory to allocate subtest result\n"); + return false; + } + + subtest_state = &state->subtest_states[state->subtest_num - 1]; + + memset(subtest_state, 0, sub_state_size); if (!subtest_name || !subtest_name[0]) { fprintf(env.stderr, @@ -242,21 +412,25 @@ bool test__start_subtest(const char *subtest_name) return false; } + subtest_state->name = strdup(subtest_name); + if (!subtest_state->name) { + fprintf(env.stderr, + "Subtest #%d: failed to copy subtest name!\n", + state->subtest_num); + return false; + } + if (!should_run_subtest(&env.test_selector, &env.subtest_selector, state->subtest_num, test->test_name, - subtest_name)) - return false; - - state->subtest_name = strdup(subtest_name); - if (!state->subtest_name) { - fprintf(env.stderr, - "Subtest #%d: failed to copy subtest name!\n", - state->subtest_num); + subtest_name)) { + subtest_state->filtered = true; return false; } - state->old_error_cnt = state->error_cnt; + + env.subtest_state = subtest_state; + stdio_hijack_init(&subtest_state->log_buf, &subtest_state->log_cnt); return true; } @@ -268,15 +442,18 @@ void test__force_log(void) void test__skip(void) { - if (env.test_state->subtest_name) - env.test_state->subtest_skip_cnt++; + if (env.subtest_state) + env.subtest_state->skipped = true; else env.test_state->skip_cnt++; } void test__fail(void) { - env.test_state->error_cnt++; + if (env.subtest_state) + env.subtest_state->error_cnt++; + else + env.test_state->error_cnt++; } int test__join_cgroup(const char *path) @@ -455,14 +632,14 @@ static void unload_bpf_testmod(void) fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n"); if (delete_module("bpf_testmod", 0)) { if (errno == ENOENT) { - if (env.verbosity > VERBOSE_NONE) + if (verbose()) fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); return; } fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); return; } - if (env.verbosity > VERBOSE_NONE) + if (verbose()) fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); } @@ -473,7 +650,7 @@ static int load_bpf_testmod(void) /* ensure previous instance of the module is unloaded */ unload_bpf_testmod(); - if (env.verbosity > VERBOSE_NONE) + if (verbose()) fprintf(stdout, "Loading bpf_testmod.ko...\n"); fd = open("bpf_testmod.ko", O_RDONLY); @@ -488,7 +665,7 @@ static int load_bpf_testmod(void) } close(fd); - if (env.verbosity > VERBOSE_NONE) + if (verbose()) fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); return 0; } @@ -655,7 +832,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } } - if (env->verbosity > VERBOSE_NONE) { + if (verbose()) { if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) { fprintf(stderr, "Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)", @@ -696,44 +873,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return 0; } -static void stdio_hijack(char **log_buf, size_t *log_cnt) -{ -#ifdef __GLIBC__ - env.stdout = stdout; - env.stderr = stderr; - - if (env.verbosity > VERBOSE_NONE && env.worker_id == -1) { - /* nothing to do, output to stdout by default */ - return; - } - - /* stdout and stderr -> buffer */ - fflush(stdout); - - stdout = open_memstream(log_buf, log_cnt); - if (!stdout) { - stdout = env.stdout; - perror("open_memstream"); - return; - } - - stderr = stdout; -#endif -} - -static void stdio_restore(void) -{ -#ifdef __GLIBC__ - if (stdout == env.stdout) - return; - - fclose(stdout); - - stdout = env.stdout; - stderr = env.stderr; -#endif -} - /* * Determine if test_progs is running as a "flavored" test runner and switch * into corresponding sub-directory to load correct BPF objects. @@ -759,7 +898,7 @@ int cd_flavor_subdir(const char *exec_name) if (!flavor) return 0; flavor++; - if (env.verbosity > VERBOSE_NONE) + if (verbose()) fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor); return chdir(flavor); @@ -812,8 +951,10 @@ void crash_handler(int signum) sz = backtrace(bt, ARRAY_SIZE(bt)); - if (env.test) - dump_test_log(env.test, env.test_state, true); + if (env.test) { + env.test_state->error_cnt++; + dump_test_log(env.test, env.test_state, true, false); + } if (env.stdout) stdio_restore(); if (env.worker_id != -1) @@ -839,13 +980,18 @@ static inline const char *str_msg(const struct msg *msg, char *buf) { switch (msg->type) { case MSG_DO_TEST: - sprintf(buf, "MSG_DO_TEST %d", msg->do_test.test_num); + sprintf(buf, "MSG_DO_TEST %d", msg->do_test.num); break; case MSG_TEST_DONE: sprintf(buf, "MSG_TEST_DONE %d (log: %d)", - msg->test_done.test_num, + msg->test_done.num, msg->test_done.have_log); break; + case MSG_SUBTEST_DONE: + sprintf(buf, "MSG_SUBTEST_DONE %d (log: %d)", + msg->subtest_done.num, + msg->subtest_done.have_log); + break; case MSG_TEST_LOG: sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", strlen(msg->test_log.log_buf), @@ -901,12 +1047,14 @@ static void run_one_test(int test_num) test->run_serial_test(); /* ensure last sub-test is finalized properly */ - if (state->subtest_name) + if (env.subtest_state) test__end_subtest(); state->tested = true; - dump_test_log(test, state, false); + if (verbose() && env.worker_id == -1) + print_test_name(test_num + 1, test->test_name, + test_result(state->error_cnt, state->skip_cnt)); reset_affinity(); restore_netns(); @@ -914,6 +1062,8 @@ static void run_one_test(int test_num) cleanup_cgroup_environment(); stdio_restore(); + + dump_test_log(test, state, false, false); } struct dispatch_data { @@ -921,11 +1071,83 @@ struct dispatch_data { int sock_fd; }; +static int read_prog_test_msg(int sock_fd, struct msg *msg, enum msg_type type) +{ + if (recv_message(sock_fd, msg) < 0) + return 1; + + if (msg->type != type) { + printf("%s: unexpected message type %d. expected %d\n", __func__, msg->type, type); + return 1; + } + + return 0; +} + +static int dispatch_thread_read_log(int sock_fd, char **log_buf, size_t *log_cnt) +{ + FILE *log_fp = NULL; + int result = 0; + + log_fp = open_memstream(log_buf, log_cnt); + if (!log_fp) + return 1; + + while (true) { + struct msg msg; + + if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_LOG)) { + result = 1; + goto out; + } + + fprintf(log_fp, "%s", msg.test_log.log_buf); + if (msg.test_log.is_last) + break; + } + +out: + fclose(log_fp); + log_fp = NULL; + return result; +} + +static int dispatch_thread_send_subtests(int sock_fd, struct test_state *state) +{ + struct msg msg; + struct subtest_state *subtest_state; + int subtest_num = state->subtest_num; + + state->subtest_states = malloc(subtest_num * sizeof(*subtest_state)); + + for (int i = 0; i < subtest_num; i++) { + subtest_state = &state->subtest_states[i]; + + memset(subtest_state, 0, sizeof(*subtest_state)); + + if (read_prog_test_msg(sock_fd, &msg, MSG_SUBTEST_DONE)) + return 1; + + subtest_state->name = strdup(msg.subtest_done.name); + subtest_state->error_cnt = msg.subtest_done.error_cnt; + subtest_state->skipped = msg.subtest_done.skipped; + subtest_state->filtered = msg.subtest_done.filtered; + + /* collect all logs */ + if (msg.subtest_done.have_log) + if (dispatch_thread_read_log(sock_fd, + &subtest_state->log_buf, + &subtest_state->log_cnt)) + return 1; + } + + return 0; +} + static void *dispatch_thread(void *ctx) { struct dispatch_data *data = ctx; int sock_fd; - FILE *log_fp = NULL; sock_fd = data->sock_fd; @@ -957,8 +1179,9 @@ static void *dispatch_thread(void *ctx) { struct msg msg_do_test; + memset(&msg_do_test, 0, sizeof(msg_do_test)); msg_do_test.type = MSG_DO_TEST; - msg_do_test.do_test.test_num = test_to_run; + msg_do_test.do_test.num = test_to_run; if (send_message(sock_fd, &msg_do_test) < 0) { perror("Fail to send command"); goto done; @@ -967,57 +1190,45 @@ static void *dispatch_thread(void *ctx) } /* wait for test done */ - { - int err; - struct msg msg_test_done; + do { + struct msg msg; - err = recv_message(sock_fd, &msg_test_done); - if (err < 0) - goto error; - if (msg_test_done.type != MSG_TEST_DONE) + if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_DONE)) goto error; - if (test_to_run != msg_test_done.test_done.test_num) + if (test_to_run != msg.test_done.num) goto error; state = &test_states[test_to_run]; state->tested = true; - state->error_cnt = msg_test_done.test_done.error_cnt; - state->skip_cnt = msg_test_done.test_done.skip_cnt; - state->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; + state->error_cnt = msg.test_done.error_cnt; + state->skip_cnt = msg.test_done.skip_cnt; + state->sub_succ_cnt = msg.test_done.sub_succ_cnt; + state->subtest_num = msg.test_done.subtest_num; /* collect all logs */ - if (msg_test_done.test_done.have_log) { - log_fp = open_memstream(&state->log_buf, &state->log_cnt); - if (!log_fp) + if (msg.test_done.have_log) { + if (dispatch_thread_read_log(sock_fd, + &state->log_buf, + &state->log_cnt)) goto error; + } - while (true) { - struct msg msg_log; - - if (recv_message(sock_fd, &msg_log) < 0) - goto error; - if (msg_log.type != MSG_TEST_LOG) - goto error; + /* collect all subtests and subtest logs */ + if (!state->subtest_num) + break; - fprintf(log_fp, "%s", msg_log.test_log.log_buf); - if (msg_log.test_log.is_last) - break; - } - fclose(log_fp); - log_fp = NULL; - } - } /* wait for test done */ + if (dispatch_thread_send_subtests(sock_fd, state)) + goto error; + } while (false); pthread_mutex_lock(&stdout_output_lock); - dump_test_log(test, state, false); + dump_test_log(test, state, false, true); pthread_mutex_unlock(&stdout_output_lock); } /* while (true) */ error: if (env.debug) fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno)); - if (log_fp) - fclose(log_fp); done: { struct msg msg_exit; @@ -1052,18 +1263,24 @@ static void calculate_summary_and_print_errors(struct test_env *env) succ_cnt++; } - if (fail_cnt) + /* + * We only print error logs summary when there are failed tests and + * verbose mode is not enabled. Otherwise, results may be incosistent. + * + */ + if (!verbose() && fail_cnt) { printf("\nAll error logs:\n"); - /* print error logs again */ - for (i = 0; i < prog_test_cnt; i++) { - struct prog_test_def *test = &prog_test_defs[i]; - struct test_state *state = &test_states[i]; + /* print error logs again */ + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; + struct test_state *state = &test_states[i]; - if (!state->tested || !state->error_cnt) - continue; + if (!state->tested || !state->error_cnt) + continue; - dump_test_log(test, state, true); + dump_test_log(test, state, true, true); + } } printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", @@ -1154,6 +1371,91 @@ static void server_main(void) } } +static void worker_main_send_log(int sock, char *log_buf, size_t log_cnt) +{ + char *src; + size_t slen; + + src = log_buf; + slen = log_cnt; + while (slen) { + struct msg msg_log; + char *dest; + size_t len; + + memset(&msg_log, 0, sizeof(msg_log)); + msg_log.type = MSG_TEST_LOG; + dest = msg_log.test_log.log_buf; + len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen; + memcpy(dest, src, len); + + src += len; + slen -= len; + if (!slen) + msg_log.test_log.is_last = true; + + assert(send_message(sock, &msg_log) >= 0); + } +} + +static void free_subtest_state(struct subtest_state *state) +{ + if (state->log_buf) { + free(state->log_buf); + state->log_buf = NULL; + state->log_cnt = 0; + } + free(state->name); + state->name = NULL; +} + +static int worker_main_send_subtests(int sock, struct test_state *state) +{ + int i, result = 0; + struct msg msg; + struct subtest_state *subtest_state; + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_SUBTEST_DONE; + + for (i = 0; i < state->subtest_num; i++) { + subtest_state = &state->subtest_states[i]; + + msg.subtest_done.num = i; + + strncpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME); + + msg.subtest_done.error_cnt = subtest_state->error_cnt; + msg.subtest_done.skipped = subtest_state->skipped; + msg.subtest_done.filtered = subtest_state->filtered; + msg.subtest_done.have_log = false; + + if (verbose() || state->force_log || subtest_state->error_cnt) { + if (subtest_state->log_cnt) + msg.subtest_done.have_log = true; + } + + if (send_message(sock, &msg) < 0) { + perror("Fail to send message done"); + result = 1; + goto out; + } + + /* send logs */ + if (msg.subtest_done.have_log) + worker_main_send_log(sock, subtest_state->log_buf, subtest_state->log_cnt); + + free_subtest_state(subtest_state); + free(subtest_state->name); + } + +out: + for (; i < state->subtest_num; i++) + free_subtest_state(&state->subtest_states[i]); + free(state->subtest_states); + return result; +} + static int worker_main(int sock) { save_netns(); @@ -1172,10 +1474,10 @@ static int worker_main(int sock) env.worker_id); goto out; case MSG_DO_TEST: { - int test_to_run = msg.do_test.test_num; + int test_to_run = msg.do_test.num; struct prog_test_def *test = &prog_test_defs[test_to_run]; struct test_state *state = &test_states[test_to_run]; - struct msg msg_done; + struct msg msg; if (env.debug) fprintf(stderr, "[%d]: #%d:%s running.\n", @@ -1185,54 +1487,38 @@ static int worker_main(int sock) run_one_test(test_to_run); - memset(&msg_done, 0, sizeof(msg_done)); - msg_done.type = MSG_TEST_DONE; - msg_done.test_done.test_num = test_to_run; - msg_done.test_done.error_cnt = state->error_cnt; - msg_done.test_done.skip_cnt = state->skip_cnt; - msg_done.test_done.sub_succ_cnt = state->sub_succ_cnt; - msg_done.test_done.have_log = false; + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_TEST_DONE; + msg.test_done.num = test_to_run; + msg.test_done.error_cnt = state->error_cnt; + msg.test_done.skip_cnt = state->skip_cnt; + msg.test_done.sub_succ_cnt = state->sub_succ_cnt; + msg.test_done.subtest_num = state->subtest_num; + msg.test_done.have_log = false; - if (env.verbosity > VERBOSE_NONE || state->force_log || state->error_cnt) { + if (verbose() || state->force_log || state->error_cnt) { if (state->log_cnt) - msg_done.test_done.have_log = true; + msg.test_done.have_log = true; } - if (send_message(sock, &msg_done) < 0) { + if (send_message(sock, &msg) < 0) { perror("Fail to send message done"); goto out; } /* send logs */ - if (msg_done.test_done.have_log) { - char *src; - size_t slen; - - src = state->log_buf; - slen = state->log_cnt; - while (slen) { - struct msg msg_log; - char *dest; - size_t len; - - memset(&msg_log, 0, sizeof(msg_log)); - msg_log.type = MSG_TEST_LOG; - dest = msg_log.test_log.log_buf; - len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen; - memcpy(dest, src, len); - - src += len; - slen -= len; - if (!slen) - msg_log.test_log.is_last = true; - - assert(send_message(sock, &msg_log) >= 0); - } - } + if (msg.test_done.have_log) + worker_main_send_log(sock, state->log_buf, state->log_cnt); + if (state->log_buf) { free(state->log_buf); state->log_buf = NULL; state->log_cnt = 0; } + + if (state->subtest_num) + if (worker_main_send_subtests(sock, state)) + goto out; + if (env.debug) fprintf(stderr, "[%d]: #%d:%s done.\n", env.worker_id, @@ -1250,6 +1536,23 @@ out: return 0; } +static void free_test_states(void) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(prog_test_defs); i++) { + struct test_state *test_state = &test_states[i]; + + for (j = 0; j < test_state->subtest_num; j++) + free_subtest_state(&test_state->subtest_states[j]); + + free(test_state->subtest_states); + free(test_state->log_buf); + test_state->subtest_states = NULL; + test_state->log_buf = NULL; + } +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -1396,6 +1699,8 @@ out: unload_bpf_testmod(); free_test_selector(&env.test_selector); + free_test_selector(&env.subtest_selector); + free_test_states(); if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) return EXIT_NO_TEST; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index d3fee3b98888..5fe1365c2bb1 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -64,23 +64,32 @@ struct test_selector { int num_set_len; }; +struct subtest_state { + char *name; + size_t log_cnt; + char *log_buf; + int error_cnt; + bool skipped; + bool filtered; + + FILE *stdout; +}; + struct test_state { bool tested; bool force_log; int error_cnt; int skip_cnt; - int subtest_skip_cnt; int sub_succ_cnt; - char *subtest_name; + struct subtest_state *subtest_states; int subtest_num; - /* store counts before subtest started */ - int old_error_cnt; - size_t log_cnt; char *log_buf; + + FILE *stdout; }; struct test_env { @@ -96,7 +105,8 @@ struct test_env { bool list_test_names; struct prog_test_def *test; /* current running test */ - struct test_state *test_state; /* current running test result */ + struct test_state *test_state; /* current running test state */ + struct subtest_state *subtest_state; /* current running subtest state */ FILE *stdout; FILE *stderr; @@ -116,29 +126,40 @@ struct test_env { }; #define MAX_LOG_TRUNK_SIZE 8192 +#define MAX_SUBTEST_NAME 1024 enum msg_type { MSG_DO_TEST = 0, MSG_TEST_DONE = 1, MSG_TEST_LOG = 2, + MSG_SUBTEST_DONE = 3, MSG_EXIT = 255, }; struct msg { enum msg_type type; union { struct { - int test_num; + int num; } do_test; struct { - int test_num; + int num; int sub_succ_cnt; int error_cnt; int skip_cnt; bool have_log; + int subtest_num; } test_done; struct { char log_buf[MAX_LOG_TRUNK_SIZE + 1]; bool is_last; } test_log; + struct { + int num; + char name[MAX_SUBTEST_NAME + 1]; + int error_cnt; + bool skipped; + bool filtered; + bool have_log; + } subtest_done; }; }; diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 2817d9948d59..e9ebc67d73f7 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -45,6 +45,7 @@ # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet # 6) Forward the packet to the overlay tnl dev +BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel" PING_ARG="-c 3 -w 10 -q" ret=0 GREEN='\033[0;92m' @@ -155,52 +156,6 @@ add_ip6erspan_tunnel() ip link set dev $DEV up } -add_vxlan_tunnel() -{ - # Set static ARP entry here because iptables set-mark works - # on L3 packet, as a result not applying to ARP packets, - # causing errors at get_tunnel_{key/opt}. - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE \ - id 2 dstport 4789 gbp remote 172.16.1.200 - ip netns exec at_ns0 \ - ip link set dev $DEV_NS address 52:54:00:d9:01:00 up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 \ - ip neigh add 10.1.1.200 lladdr 52:54:00:d9:02:00 dev $DEV_NS - ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF - - # root namespace - ip link add dev $DEV type $TYPE external gbp dstport 4789 - ip link set dev $DEV address 52:54:00:d9:02:00 up - ip addr add dev $DEV 10.1.1.200/24 - ip neigh add 10.1.1.100 lladdr 52:54:00:d9:01:00 dev $DEV -} - -add_ip6vxlan_tunnel() -{ - #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0 - ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - #ip -4 addr del 172.16.1.200 dev veth1 - ip -6 addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \ - local ::11 remote ::22 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # root namespace - ip link add dev $DEV type $TYPE external dstport 4789 - ip addr add dev $DEV 10.1.1.200/24 - ip link set dev $DEV up -} - add_geneve_tunnel() { # at_ns0 namespace @@ -403,58 +358,6 @@ test_ip6erspan() echo -e ${GREEN}"PASS: $TYPE"${NC} } -test_vxlan() -{ - TYPE=vxlan - DEV_NS=vxlan00 - DEV=vxlan11 - ret=0 - - check $TYPE - config_device - add_vxlan_tunnel - attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6vxlan() -{ - TYPE=vxlan - DEV_NS=ip6vxlan00 - DEV=ip6vxlan11 - ret=0 - - check $TYPE - config_device - add_ip6vxlan_tunnel - ip link set dev veth1 mtu 1500 - attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel - # underlay - ping6 $PING_ARG ::11 - # ip4 over ip6 - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: ip6$TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: ip6$TYPE"${NC} -} - test_geneve() { TYPE=geneve @@ -641,9 +544,11 @@ test_xfrm_tunnel() config_device > /sys/kernel/debug/tracing/trace setup_xfrm_tunnel + mkdir -p ${BPF_PIN_TUNNEL_DIR} + bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR} tc qdisc add dev veth1 clsact - tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \ - sec xfrm_get_state + tc filter add dev veth1 proto ip ingress bpf da object-pinned \ + ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 sleep 1 grep "reqid 1" /sys/kernel/debug/tracing/trace @@ -666,13 +571,17 @@ attach_bpf() DEV=$1 SET=$2 GET=$3 + mkdir -p ${BPF_PIN_TUNNEL_DIR} + bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}/ tc qdisc add dev $DEV clsact - tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET - tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET + tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET + tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET } cleanup() { + rm -rf ${BPF_PIN_TUNNEL_DIR} + ip netns delete at_ns0 2> /dev/null ip link del veth1 2> /dev/null ip link del ipip11 2> /dev/null @@ -681,8 +590,6 @@ cleanup() ip link del gretap11 2> /dev/null ip link del ip6gre11 2> /dev/null ip link del ip6gretap11 2> /dev/null - ip link del vxlan11 2> /dev/null - ip link del ip6vxlan11 2> /dev/null ip link del geneve11 2> /dev/null ip link del ip6geneve11 2> /dev/null ip link del erspan11 2> /dev/null @@ -714,7 +621,6 @@ enable_debug() { echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control - echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control } @@ -750,14 +656,6 @@ bpf_tunnel_test() test_ip6erspan v2 errors=$(( $errors + $? )) - echo "Testing VXLAN tunnel..." - test_vxlan - errors=$(( $errors + $? )) - - echo "Testing IP6VXLAN tunnel..." - test_ip6vxlan - errors=$(( $errors + $? )) - echo "Testing GENEVE tunnel..." test_geneve errors=$(( $errors + $? )) diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index cd7bf32e6a17..567500299231 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -43,7 +43,6 @@ # ** veth<xxxx> in root namespace # ** veth<yyyy> in af_xdp<xxxx> namespace # ** namespace af_xdp<xxxx> -# * create a spec file veth.spec that includes this run-time configuration # *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid # conflict with any existing interface # * tests the veth and xsk layers of the topology @@ -77,7 +76,7 @@ . xsk_prereqs.sh -while getopts "cvD" flag +while getopts "vD" flag do case "${flag}" in v) verbose=1;; @@ -88,7 +87,7 @@ done TEST_NAME="PREREQUISITES" URANDOM=/dev/urandom -[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit 1 1; } +[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit $ksft_fail; } VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4) VETH0=ve${VETH0_POSTFIX} @@ -98,6 +97,13 @@ NS0=root NS1=af_xdp${VETH1_POSTFIX} MTU=1500 +trap ctrl_c INT + +function ctrl_c() { + cleanup_exit ${VETH0} ${VETH1} ${NS1} + exit 1 +} + setup_vethPairs() { if [[ $verbose -eq 1 ]]; then echo "setting up ${VETH0}: namespace: ${NS0}" @@ -110,6 +116,14 @@ setup_vethPairs() { if [[ $verbose -eq 1 ]]; then echo "setting up ${VETH1}: namespace: ${NS1}" fi + + if [[ $busy_poll -eq 1 ]]; then + echo 2 > /sys/class/net/${VETH0}/napi_defer_hard_irqs + echo 200000 > /sys/class/net/${VETH0}/gro_flush_timeout + echo 2 > /sys/class/net/${VETH1}/napi_defer_hard_irqs + echo 200000 > /sys/class/net/${VETH1}/gro_flush_timeout + fi + ip link set ${VETH1} netns ${NS1} ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU} ip link set ${VETH0} mtu ${MTU} @@ -130,17 +144,12 @@ if [ $retval -ne 0 ]; then exit $retval fi -echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE} - -validate_veth_spec_file - if [[ $verbose -eq 1 ]]; then - echo "Spec file created: ${SPECFILE}" - VERBOSE_ARG="-v" + ARGS+="-v " fi if [[ $dump_pkts -eq 1 ]]; then - DUMP_PKTS_ARG="-D" + ARGS="-D " fi test_status $retval "${TEST_NAME}" @@ -149,23 +158,31 @@ test_status $retval "${TEST_NAME}" statusList=() -TEST_NAME="XSK KSELFTESTS" +TEST_NAME="XSK_SELFTESTS_SOFTIRQ" execxdpxceiver -retval=$? -test_status $retval "${TEST_NAME}" -statusList+=($retval) +cleanup_exit ${VETH0} ${VETH1} ${NS1} +TEST_NAME="XSK_SELFTESTS_BUSY_POLL" +busy_poll=1 + +setup_vethPairs +execxdpxceiver ## END TESTS cleanup_exit ${VETH0} ${VETH1} ${NS1} -for _status in "${statusList[@]}" +failures=0 +echo -e "\nSummary:" +for i in "${!statusList[@]}" do - if [ $_status -ne 0 ]; then - test_exit $ksft_fail 0 + if [ ${statusList[$i]} -ne 0 ]; then + test_status ${statusList[$i]} ${nameList[$i]} + failures=1 fi done -test_exit $ksft_pass 0 +if [ $failures -eq 0 ]; then + echo "All tests successful!" +fi diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c index 9113834640e6..6914904344c0 100644 --- a/tools/testing/selftests/bpf/verifier/map_kptr.c +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c @@ -212,13 +212,13 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 24), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 32), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_kptr = { 1 }, .result = REJECT, - .errstr = "access beyond struct prog_test_ref_kfunc at off 24 size 8", + .errstr = "access beyond struct prog_test_ref_kfunc at off 32 size 8", }, { "map_kptr: unref: inherit PTR_UNTRUSTED on struct walk", diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index cfcb031323c5..e5992a6b5e09 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -90,6 +90,8 @@ #include <string.h> #include <stddef.h> #include <sys/mman.h> +#include <sys/socket.h> +#include <sys/time.h> #include <sys/types.h> #include <sys/queue.h> #include <time.h> @@ -122,9 +124,17 @@ static void __exit_with_error(int error, const char *file, const char *func, int #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) #define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV" +#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : "" -#define print_ksft_result(test) \ - (ksft_test_result_pass("PASS: %s %s\n", mode_string(test), (test)->name)) +static void report_failure(struct test_spec *test) +{ + if (test->fail) + return; + + ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test), + test->name); + test->fail = true; +} static void memset32_htonl(void *dest, u32 val, u32 size) { @@ -264,6 +274,26 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size return 0; } +static void enable_busy_poll(struct xsk_socket_info *xsk) +{ + int sock_opt; + + sock_opt = 1; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); + + sock_opt = 20; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); + + sock_opt = BATCH_SIZE; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); +} + static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, struct ifobject *ifobject, bool shared) { @@ -287,8 +317,8 @@ static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_inf static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, - {"queue", optional_argument, 0, 'q'}, - {"dump-pkts", optional_argument, 0, 'D'}, + {"busy-poll", no_argument, 0, 'b'}, + {"dump-pkts", no_argument, 0, 'D'}, {"verbose", no_argument, 0, 'v'}, {0, 0, 0, 0} }; @@ -299,9 +329,9 @@ static void usage(const char *prog) " Usage: %s [OPTIONS]\n" " Options:\n" " -i, --interface Use interface\n" - " -q, --queue=n Use queue n (default 0)\n" " -D, --dump-pkts Dump packets L2 - L5\n" - " -v, --verbose Verbose output\n"; + " -v, --verbose Verbose output\n" + " -b, --busy-poll Enable busy poll\n"; ksft_print_msg(str, prog); } @@ -347,7 +377,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj for (;;) { char *sptr, *token; - c = getopt_long(argc, argv, "i:Dv", long_options, &option_index); + c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index); if (c == -1) break; @@ -373,6 +403,10 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj case 'v': opt_verbose = true; break; + case 'b': + ifobj_tx->busy_poll = true; + ifobj_rx->busy_poll = true; + break; default: usage(basename(argv[0])); ksft_exit_xfail(); @@ -390,8 +424,10 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, ifobj->xsk = &ifobj->xsk_arr[0]; ifobj->use_poll = false; - ifobj->pacing_on = true; + ifobj->use_fill_ring = true; + ifobj->release_rx = true; ifobj->pkt_stream = test->pkt_stream_default; + ifobj->validation_func = NULL; if (i == 0) { ifobj->rx_on = false; @@ -416,6 +452,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, test->current_step = 0; test->total_steps = 1; test->nb_sockets = 1; + test->fail = false; } static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, @@ -467,9 +504,10 @@ static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) return &pkt_stream->pkts[pkt_nb]; } -static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream) +static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) { while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { + (*pkts_sent)++; if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; pkt_stream->rx_pkt_nb++; @@ -485,10 +523,16 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream) static void pkt_stream_restore_default(struct test_spec *test) { - if (test->ifobj_tx->pkt_stream != test->pkt_stream_default) { + struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream; + + if (tx_pkt_stream != test->pkt_stream_default) { pkt_stream_delete(test->ifobj_tx->pkt_stream); test->ifobj_tx->pkt_stream = test->pkt_stream_default; } + + if (test->ifobj_rx->pkt_stream != test->pkt_stream_default && + test->ifobj_rx->pkt_stream != tx_pkt_stream) + pkt_stream_delete(test->ifobj_rx->pkt_stream); test->ifobj_rx->pkt_stream = test->pkt_stream_default; } @@ -510,6 +554,16 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) return pkt_stream; } +static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len) +{ + pkt->addr = addr; + pkt->len = len; + if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom) + pkt->valid = false; + else + pkt->valid = true; +} + static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) { struct pkt_stream *pkt_stream; @@ -521,14 +575,9 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb pkt_stream->nb_pkts = nb_pkts; for (i = 0; i < nb_pkts; i++) { - pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size; - pkt_stream->pkts[i].len = pkt_len; + pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size, + pkt_len); pkt_stream->pkts[i].payload = i; - - if (pkt_len > umem->frame_size) - pkt_stream->pkts[i].valid = false; - else - pkt_stream->pkts[i].valid = true; } return pkt_stream; @@ -556,15 +605,27 @@ static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int off u32 i; pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default); - for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) { - pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size + offset; - pkt_stream->pkts[i].len = pkt_len; - } + for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) + pkt_set(umem, &pkt_stream->pkts[i], + (i % umem->num_frames) * umem->frame_size + offset, pkt_len); test->ifobj_tx->pkt_stream = pkt_stream; test->ifobj_rx->pkt_stream = pkt_stream; } +static void pkt_stream_receive_half(struct test_spec *test) +{ + struct xsk_umem_info *umem = test->ifobj_rx->umem; + struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream; + u32 i; + + test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts, + pkt_stream->pkts[0].len); + pkt_stream = test->ifobj_rx->pkt_stream; + for (i = 1; i < pkt_stream->nb_pkts; i += 2) + pkt_stream->pkts[i].valid = false; +} + static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) { struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); @@ -575,7 +636,7 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) if (!pkt) return NULL; - if (!pkt->valid || pkt->len < PKT_SIZE) + if (!pkt->valid || pkt->len < MIN_PKT_SIZE) return pkt; data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); @@ -662,8 +723,7 @@ static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt if (offset == expected_offset) return true; - ksft_test_result_fail("ERROR: [%s] expected [%u], got [%u]\n", __func__, expected_offset, - offset); + ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset); return false; } @@ -673,19 +733,18 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); if (!pkt) { - ksft_test_result_fail("ERROR: [%s] too many packets received\n", __func__); + ksft_print_msg("[%s] too many packets received\n", __func__); return false; } - if (len < PKT_SIZE) { - /*Do not try to verify packets that are smaller than minimum size. */ + if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) { + /* Do not try to verify packets that are smaller than minimum size. */ return true; } if (pkt->len != len) { - ksft_test_result_fail - ("ERROR: [%s] expected length [%d], got length [%d]\n", - __func__, pkt->len, len); + ksft_print_msg("[%s] expected length [%d], got length [%d]\n", + __func__, pkt->len, len); return false; } @@ -696,9 +755,8 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) pkt_dump(data, PKT_SIZE); if (pkt->payload != seqnum) { - ksft_test_result_fail - ("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n", - __func__, pkt->payload, seqnum); + ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", + __func__, pkt->payload, seqnum); return false; } } else { @@ -716,12 +774,25 @@ static void kick_tx(struct xsk_socket_info *xsk) int ret; ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); - if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) + if (ret >= 0) return; + if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { + usleep(100); + return; + } exit_with_error(errno); } -static void complete_pkts(struct xsk_socket_info *xsk, int batch_size) +static void kick_rx(struct xsk_socket_info *xsk) +{ + int ret; + + ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); + if (ret < 0) + exit_with_error(errno); +} + +static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) { unsigned int rcvd; u32 idx; @@ -734,26 +805,45 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size) if (rcvd > xsk->outstanding_tx) { u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); - ksft_test_result_fail("ERROR: [%s] Too many packets completed\n", - __func__); + ksft_print_msg("[%s] Too many packets completed\n", __func__); ksft_print_msg("Last completion address: %llx\n", addr); - return; + return TEST_FAILURE; } xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; } + + return TEST_PASS; } -static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk, - struct pollfd *fds) +static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds) { - struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream); + struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0}; + u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0; + struct pkt_stream *pkt_stream = ifobj->pkt_stream; + struct xsk_socket_info *xsk = ifobj->xsk; struct xsk_umem_info *umem = xsk->umem; - u32 idx_rx = 0, idx_fq = 0, rcvd, i; + struct pkt *pkt; int ret; + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + timeradd(&tv_now, &tv_timeout, &tv_end); + + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); while (pkt) { + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); + return TEST_FAILURE; + } + + kick_rx(xsk); + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); if (!rcvd) { if (xsk_ring_prod__needs_wakeup(&umem->fq)) { @@ -764,54 +854,53 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info * continue; } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); - while (ret != rcvd) { - if (ret < 0) - exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { - ret = poll(fds, 1, POLL_TMOUT); + if (ifobj->use_fill_ring) { + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret < 0) + exit_with_error(-ret); + } + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } for (i = 0; i < rcvd; i++) { const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); u64 addr = desc->addr, orig; - if (!pkt) { - ksft_test_result_fail("ERROR: [%s] Received too many packets.\n", - __func__); - ksft_print_msg("Last packet has addr: %llx len: %u\n", - addr, desc->len); - return; - } - orig = xsk_umem__extract_addr(addr); addr = xsk_umem__add_offset_to_addr(addr); - if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len)) - return; - if (!is_offset_correct(umem, pkt_stream, addr, pkt->addr)) - return; + if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || + !is_offset_correct(umem, pkt_stream, addr, pkt->addr)) + return TEST_FAILURE; - *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; - pkt = pkt_stream_get_next_rx_pkt(pkt_stream); + if (ifobj->use_fill_ring) + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); } - xsk_ring_prod__submit(&umem->fq, rcvd); - xsk_ring_cons__release(&xsk->rx, rcvd); + if (ifobj->use_fill_ring) + xsk_ring_prod__submit(&umem->fq, rcvd); + if (ifobj->release_rx) + xsk_ring_cons__release(&xsk->rx, rcvd); pthread_mutex_lock(&pacing_mutex); - pkts_in_flight -= rcvd; + pkts_in_flight -= pkts_sent; if (pkts_in_flight < umem->num_frames) pthread_cond_signal(&pacing_cond); pthread_mutex_unlock(&pacing_mutex); + pkts_sent = 0; } + + return TEST_PASS; } -static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) +static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb) { struct xsk_socket_info *xsk = ifobject->xsk; u32 i, idx, valid_pkts = 0; @@ -821,21 +910,22 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) for (i = 0; i < BATCH_SIZE; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - struct pkt *pkt = pkt_generate(ifobject, pkt_nb); + struct pkt *pkt = pkt_generate(ifobject, *pkt_nb); if (!pkt) break; tx_desc->addr = pkt->addr; tx_desc->len = pkt->len; - pkt_nb++; + (*pkt_nb)++; if (pkt->valid) valid_pkts++; } pthread_mutex_lock(&pacing_mutex); pkts_in_flight += valid_pkts; - if (ifobject->pacing_on && pkts_in_flight >= ifobject->umem->num_frames - BATCH_SIZE) { + /* pkts_in_flight might be negative if many invalid packets are sent */ + if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) { kick_tx(xsk); pthread_cond_wait(&pacing_cond, &pacing_mutex); } @@ -843,10 +933,11 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) xsk_ring_prod__submit(&xsk->tx, i); xsk->outstanding_tx += valid_pkts; - complete_pkts(xsk, i); + if (complete_pkts(xsk, i)) + return TEST_FAILURE; usleep(10); - return i; + return TEST_PASS; } static void wait_for_tx_completion(struct xsk_socket_info *xsk) @@ -855,7 +946,7 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk) complete_pkts(xsk, BATCH_SIZE); } -static void send_pkts(struct ifobject *ifobject) +static int send_pkts(struct test_spec *test, struct ifobject *ifobject) { struct pollfd fds = { }; u32 pkt_cnt = 0; @@ -864,6 +955,8 @@ static void send_pkts(struct ifobject *ifobject) fds.events = POLLOUT; while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { + int err; + if (ifobject->use_poll) { int ret; @@ -875,58 +968,95 @@ static void send_pkts(struct ifobject *ifobject) continue; } - pkt_cnt += __send_pkts(ifobject, pkt_cnt); + err = __send_pkts(ifobject, &pkt_cnt); + if (err || test->fail) + return TEST_FAILURE; } wait_for_tx_completion(ifobject->xsk); + return TEST_PASS; } -static bool rx_stats_are_valid(struct ifobject *ifobject) +static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats) +{ + int fd = xsk_socket__fd(xsk), err; + socklen_t optlen, expected_len; + + optlen = sizeof(*stats); + err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen); + if (err) { + ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", + __func__, -err, strerror(-err)); + return TEST_FAILURE; + } + + expected_len = sizeof(struct xdp_statistics); + if (optlen != expected_len) { + ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n", + __func__, expected_len, optlen); + return TEST_FAILURE; + } + + return TEST_PASS; +} + +static int validate_rx_dropped(struct ifobject *ifobject) { - u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts; struct xsk_socket *xsk = ifobject->xsk->xsk; - int fd = xsk_socket__fd(xsk); struct xdp_statistics stats; - socklen_t optlen; int err; - optlen = sizeof(stats); - err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); - if (err) { - ksft_test_result_fail("ERROR Rx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", - __func__, -err, strerror(-err)); - return true; - } + kick_rx(ifobject->xsk); - if (optlen == sizeof(struct xdp_statistics)) { - switch (stat_test_type) { - case STAT_TEST_RX_DROPPED: - xsk_stat = stats.rx_dropped; - break; - case STAT_TEST_TX_INVALID: - return true; - case STAT_TEST_RX_FULL: - xsk_stat = stats.rx_ring_full; - if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) - expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE; - else - expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE; - break; - case STAT_TEST_RX_FILL_EMPTY: - xsk_stat = stats.rx_fill_ring_empty_descs; - break; - default: - break; - } + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; - if (xsk_stat == expected_stat) - return true; - } + if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2) + return TEST_PASS; - return false; + return TEST_FAILURE; } -static void tx_stats_validate(struct ifobject *ifobject) +static int validate_rx_full(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + usleep(1000); + kick_rx(ifobject->xsk); + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_ring_full) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_fill_empty(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + usleep(1000); + kick_rx(ifobject->xsk); + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_fill_ring_empty_descs) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_tx_invalid_descs(struct ifobject *ifobject) { struct xsk_socket *xsk = ifobject->xsk->xsk; int fd = xsk_socket__fd(xsk); @@ -937,16 +1067,18 @@ static void tx_stats_validate(struct ifobject *ifobject) optlen = sizeof(stats); err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); if (err) { - ksft_test_result_fail("ERROR Tx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", - __func__, -err, strerror(-err)); - return; + ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", + __func__, -err, strerror(-err)); + return TEST_FAILURE; } - if (stats.tx_invalid_descs == ifobject->pkt_stream->nb_pkts) - return; + if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) { + ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", + __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); + return TEST_FAILURE; + } - ksft_test_result_fail("ERROR: [%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); + return TEST_PASS; } static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) @@ -984,6 +1116,9 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) exit_with_error(-ret); usleep(USLEEP_MAX); } + + if (ifobject->busy_poll) + enable_busy_poll(&ifobject->xsk_arr[i]); } ifobject->xsk = &ifobject->xsk_arr[0]; @@ -1016,18 +1151,21 @@ static void *worker_testapp_validate_tx(void *arg) { struct test_spec *test = (struct test_spec *)arg; struct ifobject *ifobject = test->ifobj_tx; + int err; if (test->current_step == 1) thread_common_ops(test, ifobject); print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts, ifobject->ifname); - send_pkts(ifobject); + err = send_pkts(test, ifobject); - if (stat_test_type == STAT_TEST_TX_INVALID) - tx_stats_validate(ifobject); + if (!err && ifobject->validation_func) + err = ifobject->validation_func(ifobject); + if (err) + report_failure(test); - if (test->total_steps == test->current_step) + if (test->total_steps == test->current_step || err) testapp_cleanup_xsk_res(ifobject); pthread_exit(NULL); } @@ -1068,6 +1206,7 @@ static void *worker_testapp_validate_rx(void *arg) struct test_spec *test = (struct test_spec *)arg; struct ifobject *ifobject = test->ifobj_rx; struct pollfd fds = { }; + int err; if (test->current_step == 1) thread_common_ops(test, ifobject); @@ -1079,18 +1218,23 @@ static void *worker_testapp_validate_rx(void *arg) pthread_barrier_wait(&barr); - if (test_type == TEST_TYPE_STATS) - while (!rx_stats_are_valid(ifobject)) - continue; - else - receive_pkts(ifobject->pkt_stream, ifobject->xsk, &fds); + err = receive_pkts(ifobject, &fds); - if (test->total_steps == test->current_step) + if (!err && ifobject->validation_func) + err = ifobject->validation_func(ifobject); + if (err) { + report_failure(test); + pthread_mutex_lock(&pacing_mutex); + pthread_cond_signal(&pacing_cond); + pthread_mutex_unlock(&pacing_mutex); + } + + if (test->total_steps == test->current_step || err) testapp_cleanup_xsk_res(ifobject); pthread_exit(NULL); } -static void testapp_validate_traffic(struct test_spec *test) +static int testapp_validate_traffic(struct test_spec *test) { struct ifobject *ifobj_tx = test->ifobj_tx; struct ifobject *ifobj_rx = test->ifobj_rx; @@ -1115,6 +1259,8 @@ static void testapp_validate_traffic(struct test_spec *test) pthread_join(t1, NULL); pthread_join(t0, NULL); + + return !!test->fail; } static void testapp_teardown(struct test_spec *test) @@ -1123,7 +1269,8 @@ static void testapp_teardown(struct test_spec *test) test_spec_set_name(test, "TEARDOWN"); for (i = 0; i < MAX_TEARDOWN_ITER; i++) { - testapp_validate_traffic(test); + if (testapp_validate_traffic(test)) + return; test_spec_reset(test); } } @@ -1146,7 +1293,8 @@ static void testapp_bidi(struct test_spec *test) test->ifobj_tx->rx_on = true; test->ifobj_rx->tx_on = true; test->total_steps = 2; - testapp_validate_traffic(test); + if (testapp_validate_traffic(test)) + return; print_verbose("Switching Tx/Rx vectors\n"); swap_directions(&test->ifobj_rx, &test->ifobj_tx); @@ -1174,7 +1322,8 @@ static void testapp_bpf_res(struct test_spec *test) test_spec_set_name(test, "BPF_RES"); test->total_steps = 2; test->nb_sockets = 2; - testapp_validate_traffic(test); + if (testapp_validate_traffic(test)) + return; swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); testapp_validate_traffic(test); @@ -1187,53 +1336,58 @@ static void testapp_headroom(struct test_spec *test) testapp_validate_traffic(test); } -static void testapp_stats(struct test_spec *test) +static void testapp_stats_rx_dropped(struct test_spec *test) { - int i; + test_spec_set_name(test, "STAT_RX_DROPPED"); + test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - + XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; + pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0); + pkt_stream_receive_half(test); + test->ifobj_rx->validation_func = validate_rx_dropped; + testapp_validate_traffic(test); +} - for (i = 0; i < STAT_TEST_TYPE_MAX; i++) { - test_spec_reset(test); - stat_test_type = i; - /* No or few packets will be received so cannot pace packets */ - test->ifobj_tx->pacing_on = false; - - switch (stat_test_type) { - case STAT_TEST_RX_DROPPED: - test_spec_set_name(test, "STAT_RX_DROPPED"); - test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - - XDP_PACKET_HEADROOM - 1; - testapp_validate_traffic(test); - break; - case STAT_TEST_RX_FULL: - test_spec_set_name(test, "STAT_RX_FULL"); - test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE; - testapp_validate_traffic(test); - break; - case STAT_TEST_TX_INVALID: - test_spec_set_name(test, "STAT_TX_INVALID"); - pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE); - testapp_validate_traffic(test); +static void testapp_stats_tx_invalid_descs(struct test_spec *test) +{ + test_spec_set_name(test, "STAT_TX_INVALID"); + pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); + test->ifobj_tx->validation_func = validate_tx_invalid_descs; + testapp_validate_traffic(test); - pkt_stream_restore_default(test); - break; - case STAT_TEST_RX_FILL_EMPTY: - test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 0, - MIN_PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); - test->ifobj_rx->pkt_stream->use_addr_for_fill = true; - testapp_validate_traffic(test); - - pkt_stream_restore_default(test); - break; - default: - break; - } - } + pkt_stream_restore_default(test); +} - /* To only see the whole stat set being completed unless an individual test fails. */ - test_spec_set_name(test, "STATS"); +static void testapp_stats_rx_full(struct test_spec *test) +{ + test_spec_set_name(test, "STAT_RX_FULL"); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, + DEFAULT_UMEM_BUFFERS, PKT_SIZE); + if (!test->ifobj_rx->pkt_stream) + exit_with_error(ENOMEM); + + test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; + test->ifobj_rx->release_rx = false; + test->ifobj_rx->validation_func = validate_rx_full; + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); +} + +static void testapp_stats_fill_empty(struct test_spec *test) +{ + test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, + DEFAULT_UMEM_BUFFERS, PKT_SIZE); + if (!test->ifobj_rx->pkt_stream) + exit_with_error(ENOMEM); + + test->ifobj_rx->use_fill_ring = false; + test->ifobj_rx->validation_func = validate_fill_empty; + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); } /* Simple test */ @@ -1282,10 +1436,10 @@ static void testapp_single_pkt(struct test_spec *test) static void testapp_invalid_desc(struct test_spec *test) { struct pkt pkts[] = { - /* Zero packet length at address zero allowed */ - {0, 0, 0, true}, - /* Zero packet length allowed */ - {0x1000, 0, 0, true}, + /* Zero packet address allowed */ + {0, PKT_SIZE, 0, true}, + /* Allowed packet */ + {0x1000, PKT_SIZE, 0, true}, /* Straddling the start of umem */ {-2, PKT_SIZE, 0, false}, /* Packet too large */ @@ -1338,14 +1492,18 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char * static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) { - test_type = type; - - /* reset defaults after potential previous test */ - stat_test_type = -1; - - switch (test_type) { - case TEST_TYPE_STATS: - testapp_stats(test); + switch (type) { + case TEST_TYPE_STATS_RX_DROPPED: + testapp_stats_rx_dropped(test); + break; + case TEST_TYPE_STATS_TX_INVALID_DESCS: + testapp_stats_tx_invalid_descs(test); + break; + case TEST_TYPE_STATS_RX_FULL: + testapp_stats_rx_full(test); + break; + case TEST_TYPE_STATS_FILL_EMPTY: + testapp_stats_fill_empty(test); break; case TEST_TYPE_TEARDOWN: testapp_teardown(test); @@ -1368,7 +1526,7 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); test->ifobj_tx->umem->frame_size = 2048; test->ifobj_rx->umem->frame_size = 2048; - pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE); testapp_validate_traffic(test); pkt_stream_restore_default(test); @@ -1410,7 +1568,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ break; } - print_ksft_result(test); + if (!test->fail) + ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), + test->name); } static struct ifobject *ifobject_create(void) @@ -1449,8 +1609,8 @@ int main(int argc, char **argv) { struct pkt_stream *pkt_stream_default; struct ifobject *ifobj_tx, *ifobj_rx; + u32 i, j, failed_tests = 0; struct test_spec test; - u32 i, j; /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); @@ -1489,12 +1649,17 @@ int main(int argc, char **argv) test_spec_init(&test, ifobj_tx, ifobj_rx, i); run_pkt_test(&test, i, j); usleep(USLEEP_MAX); + + if (test.fail) + failed_tests++; } pkt_stream_delete(pkt_stream_default); ifobject_delete(ifobj_tx); ifobject_delete(ifobj_rx); - ksft_exit_pass(); - return 0; + if (failed_tests) + ksft_exit_fail(); + else + ksft_exit_pass(); } diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 62a3e6388632..8f672b0fe0e1 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -17,6 +17,16 @@ #define PF_XDP AF_XDP #endif +#ifndef SO_BUSY_POLL_BUDGET +#define SO_BUSY_POLL_BUDGET 70 +#endif + +#ifndef SO_PREFER_BUSY_POLL +#define SO_PREFER_BUSY_POLL 69 +#endif + +#define TEST_PASS 0 +#define TEST_FAILURE -1 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 7 #define MAX_INTERFACES_NAMESPACE_CHARS 10 @@ -25,9 +35,10 @@ #define MAX_TEARDOWN_ITER 10 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ sizeof(struct udphdr)) -#define MIN_PKT_SIZE 64 +#define MIN_ETH_PKT_SIZE 64 #define ETH_FCS_SIZE 4 -#define PKT_SIZE (MIN_PKT_SIZE - ETH_FCS_SIZE) +#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE) +#define PKT_SIZE (MIN_PKT_SIZE) #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) #define IP_PKT_VER 0x4 #define IP_PKT_TOS 0x9 @@ -37,6 +48,7 @@ #define SOCK_RECONF_CTR 10 #define BATCH_SIZE 64 #define POLL_TMOUT 1000 +#define RECV_TMOUT 3 #define DEFAULT_PKT_CNT (4 * 1024) #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) #define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) @@ -64,24 +76,16 @@ enum test_type { TEST_TYPE_HEADROOM, TEST_TYPE_TEARDOWN, TEST_TYPE_BIDI, - TEST_TYPE_STATS, + TEST_TYPE_STATS_RX_DROPPED, + TEST_TYPE_STATS_TX_INVALID_DESCS, + TEST_TYPE_STATS_RX_FULL, + TEST_TYPE_STATS_FILL_EMPTY, TEST_TYPE_BPF_RES, TEST_TYPE_MAX }; -enum stat_test_type { - STAT_TEST_RX_DROPPED, - STAT_TEST_TX_INVALID, - STAT_TEST_RX_FULL, - STAT_TEST_RX_FILL_EMPTY, - STAT_TEST_TYPE_MAX -}; - static bool opt_pkt_dump; -static int test_type; - static bool opt_verbose; -static int stat_test_type; struct xsk_umem_info { struct xsk_ring_prod fq; @@ -117,6 +121,8 @@ struct pkt_stream { bool use_addr_for_fill; }; +struct ifobject; +typedef int (*validation_func_t)(struct ifobject *ifobj); typedef void *(*thread_func_t)(void *arg); struct ifobject { @@ -126,6 +132,7 @@ struct ifobject { struct xsk_socket_info *xsk_arr; struct xsk_umem_info *umem; thread_func_t func_ptr; + validation_func_t validation_func; struct pkt_stream *pkt_stream; int ns_fd; int xsk_map_fd; @@ -138,7 +145,9 @@ struct ifobject { bool tx_on; bool rx_on; bool use_poll; - bool pacing_on; + bool busy_poll; + bool use_fill_ring; + bool release_rx; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; }; @@ -150,6 +159,7 @@ struct test_spec { u16 total_steps; u16 current_step; u16 nb_sockets; + bool fail; char name[MAX_TEST_NAME_SIZE]; }; @@ -157,6 +167,6 @@ pthread_barrier_t barr; pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; -u32 pkts_in_flight; +int pkts_in_flight; #endif /* XDPXCEIVER_H */ diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh index bf29d2549bee..684e813803ec 100755 --- a/tools/testing/selftests/bpf/xsk_prereqs.sh +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -8,7 +8,6 @@ ksft_xfail=2 ksft_xpass=3 ksft_skip=4 -SPECFILE=veth.spec XSKOBJ=xdpxceiver validate_root_exec() @@ -16,7 +15,7 @@ validate_root_exec() msg="skip all tests:" if [ $UID != 0 ]; then echo $msg must be run as root >&2 - test_exit $ksft_fail 2 + test_exit $ksft_fail else return $ksft_pass fi @@ -27,39 +26,31 @@ validate_veth_support() msg="skip all tests:" if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then echo $msg veth kernel support not available >&2 - test_exit $ksft_skip 1 + test_exit $ksft_skip else ip link del $1 return $ksft_pass fi } -validate_veth_spec_file() -{ - if [ ! -f ${SPECFILE} ]; then - test_exit $ksft_skip 1 - fi -} - test_status() { statusval=$1 - if [ $statusval -eq 2 ]; then - echo -e "$2: [ FAIL ]" - elif [ $statusval -eq 1 ]; then - echo -e "$2: [ SKIPPED ]" - elif [ $statusval -eq 0 ]; then - echo -e "$2: [ PASS ]" + if [ $statusval -eq $ksft_fail ]; then + echo "$2: [ FAIL ]" + elif [ $statusval -eq $ksft_skip ]; then + echo "$2: [ SKIPPED ]" + elif [ $statusval -eq $ksft_pass ]; then + echo "$2: [ PASS ]" fi } test_exit() { - retval=$1 - if [ $2 -ne 0 ]; then - test_status $2 $(basename $0) + if [ $1 -ne 0 ]; then + test_status $1 $(basename $0) fi - exit $retval + exit 1 } clear_configs() @@ -74,9 +65,6 @@ clear_configs() #veth node inside NS won't get removed so we explicitly remove it [ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] && { ip link del $1; } - if [ -f ${SPECFILE} ]; then - rm -f ${SPECFILE} - fi } cleanup_exit() @@ -86,10 +74,19 @@ cleanup_exit() validate_ip_utility() { - [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; } + [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; } } execxdpxceiver() { - ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${VERBOSE_ARG} ${DUMP_PKTS_ARG} + if [[ $busy_poll -eq 1 ]]; then + ARGS+="-b " + fi + + ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${ARGS} + + retval=$? + test_status $retval "${TEST_NAME}" + statusList+=($retval) + nameList+=(${TEST_NAME}) } |