diff options
121 files changed, 7422 insertions, 1582 deletions
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst index 5300837ac2c9..1de6a57c7e1e 100644 --- a/Documentation/bpf/instruction-set.rst +++ b/Documentation/bpf/instruction-set.rst @@ -130,7 +130,7 @@ Byte swap instructions The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit code field of ``BPF_END``. -The byte swap instructions instructions operate on the destination register +The byte swap instructions operate on the destination register only and do not use a separate source register or immediate value. The 1-bit source operand field in the opcode is used to to select what byte @@ -157,7 +157,7 @@ Examples: dst_reg = htobe64(dst_reg) ``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and -``BPF_TO_LE`` respetively. +``BPF_TO_BE`` respectively. Jump instructions diff --git a/MAINTAINERS b/MAINTAINERS index 234380c959c8..a73058b5cc3a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13799,6 +13799,7 @@ F: include/net/mptcp.h F: include/trace/events/mptcp.h F: include/uapi/linux/mptcp.h F: net/mptcp/ +F: tools/testing/selftests/bpf/*/*mptcp*.c F: tools/testing/selftests/net/mptcp/ NETWORKING [TCP] @@ -21551,6 +21552,7 @@ K: (?:\b|_)xdp(?:\b|_) XDP SOCKETS (AF_XDP) M: Björn Töpel <bjorn@kernel.org> M: Magnus Karlsson <magnus.karlsson@intel.com> +M: Maciej Fijalkowski <maciej.fijalkowski@intel.com> R: Jonathan Lemon <jonathan.lemon@gmail.com> L: netdev@vger.kernel.org L: bpf@vger.kernel.org diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index aede9a3ca3f7..af35052d06ed 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1809,7 +1809,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* * Three initial passes: * - 1/2: Determine clobbered registers - * - 3: Calculate program size and addrs arrray + * - 3: Calculate program size and addrs array */ for (pass = 1; pass <= 3; pass++) { if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) { diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index d20ab0921480..1cc15528ce29 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -45,6 +45,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern void *text_poke_copy(void *addr, const void *opcode, size_t len); +extern void *text_poke_set(void *addr, int c, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d374cb3cf024..7563b5bc8328 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -994,7 +994,21 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state) __ro_after_init struct mm_struct *poking_mm; __ro_after_init unsigned long poking_addr; -static void *__text_poke(void *addr, const void *opcode, size_t len) +static void text_poke_memcpy(void *dst, const void *src, size_t len) +{ + memcpy(dst, src, len); +} + +static void text_poke_memset(void *dst, const void *src, size_t len) +{ + int c = *(const int *)src; + + memset(dst, c, len); +} + +typedef void text_poke_f(void *dst, const void *src, size_t len); + +static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t len) { bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE; struct page *pages[2] = {NULL}; @@ -1059,7 +1073,7 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) prev = use_temporary_mm(poking_mm); kasan_disable_current(); - memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len); + func((u8 *)poking_addr + offset_in_page(addr), src, len); kasan_enable_current(); /* @@ -1087,11 +1101,13 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) (cross_page_boundary ? 2 : 1) * PAGE_SIZE, PAGE_SHIFT, false); - /* - * If the text does not match what we just wrote then something is - * fundamentally screwy; there's nothing we can really do about that. - */ - BUG_ON(memcmp(addr, opcode, len)); + if (func == text_poke_memcpy) { + /* + * If the text does not match what we just wrote then something is + * fundamentally screwy; there's nothing we can really do about that. + */ + BUG_ON(memcmp(addr, src, len)); + } local_irq_restore(flags); pte_unmap_unlock(ptep, ptl); @@ -1118,7 +1134,7 @@ void *text_poke(void *addr, const void *opcode, size_t len) { lockdep_assert_held(&text_mutex); - return __text_poke(addr, opcode, len); + return __text_poke(text_poke_memcpy, addr, opcode, len); } /** @@ -1137,7 +1153,7 @@ void *text_poke(void *addr, const void *opcode, size_t len) */ void *text_poke_kgdb(void *addr, const void *opcode, size_t len) { - return __text_poke(addr, opcode, len); + return __text_poke(text_poke_memcpy, addr, opcode, len); } /** @@ -1167,7 +1183,38 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len) s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); - __text_poke((void *)ptr, opcode + patched, s); + __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s); + patched += s; + } + mutex_unlock(&text_mutex); + return addr; +} + +/** + * text_poke_set - memset into (an unused part of) RX memory + * @addr: address to modify + * @c: the byte to fill the area with + * @len: length to copy, could be more than 2x PAGE_SIZE + * + * This is useful to overwrite unused regions of RX memory with illegal + * instructions. + */ +void *text_poke_set(void *addr, int c, size_t len) +{ + unsigned long start = (unsigned long)addr; + size_t patched = 0; + + if (WARN_ON_ONCE(core_kernel_text(start))) + return NULL; + + mutex_lock(&text_mutex); + while (patched < len) { + unsigned long ptr = start + patched; + size_t s; + + s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); + + __text_poke(text_poke_memset, (void *)ptr, (void *)&c, s); patched += s; } mutex_unlock(&text_mutex); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 16b6efacf7c6..f298b18a9a3d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -228,6 +228,11 @@ static void jit_fill_hole(void *area, unsigned int size) memset(area, 0xcc, size); } +int bpf_arch_text_invalidate(void *dst, size_t len) +{ + return IS_ERR_OR_NULL(text_poke_set(dst, 0xcc, len)); +} + struct jit_context { int cleanup_addr; /* Epilogue code offset */ @@ -1762,13 +1767,32 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog *p, int stack_size, bool save_ret) + struct bpf_tramp_link *l, int stack_size, + int run_ctx_off, bool save_ret) { u8 *prog = *pprog; u8 *jmp_insn; + int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); + struct bpf_prog *p = l->link.prog; + u64 cookie = l->cookie; + + /* mov rdi, cookie */ + emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie); + + /* Prepare struct bpf_tramp_run_ctx. + * + * bpf_tramp_run_ctx is already preserved by + * arch_prepare_bpf_trampoline(). + * + * mov QWORD PTR [rbp - run_ctx_off + ctx_cookie_off], rdi + */ + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off); /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); + /* arg2: lea rsi, [rbp - ctx_cookie_off] */ + EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); + if (emit_call(&prog, p->aux->sleepable ? __bpf_prog_enter_sleepable : __bpf_prog_enter, prog)) @@ -1814,6 +1838,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); /* arg2: mov rsi, rbx <- start time in nsec */ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); + /* arg3: lea rdx, [rbp - run_ctx_off] */ + EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); if (emit_call(&prog, p->aux->sleepable ? __bpf_prog_exit_sleepable : __bpf_prog_exit, prog)) @@ -1850,15 +1876,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) } static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size, - bool save_ret) + struct bpf_tramp_links *tl, int stack_size, + int run_ctx_off, bool save_ret) { int i; u8 *prog = *pprog; - for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, - save_ret)) + for (i = 0; i < tl->nr_links; i++) { + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, + run_ctx_off, save_ret)) return -EINVAL; } *pprog = prog; @@ -1866,8 +1892,8 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, } static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size, - u8 **branches) + struct bpf_tramp_links *tl, int stack_size, + int run_ctx_off, u8 **branches) { u8 *prog = *pprog; int i; @@ -1877,8 +1903,8 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, */ emit_mov_imm32(&prog, false, BPF_REG_0, 0); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); - for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true)) + for (i = 0; i < tl->nr_links; i++) { + if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true)) return -EINVAL; /* mod_ret prog stored return value into [rbp - 8]. Emit: @@ -1980,14 +2006,14 @@ static bool is_valid_bpf_tramp_flags(unsigned int flags) */ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call) { int ret, i, nr_args = m->nr_args; - int regs_off, ip_off, args_off, stack_size = nr_args * 8; - struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; - struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; - struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; + int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; u8 **branches = NULL; u8 *prog; bool save_ret; @@ -2014,6 +2040,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * RBP - args_off [ args count ] always * * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag + * + * RBP - run_ctx_off [ bpf_tramp_run_ctx ] */ /* room for return value of orig_call or fentry prog */ @@ -2032,6 +2060,9 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ip_off = stack_size; + stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7; + run_ctx_off = stack_size; + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip patched call instruction and point orig_call to actual * body of the kernel function. @@ -2078,19 +2109,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } } - if (fentry->nr_progs) - if (invoke_bpf(m, &prog, fentry, regs_off, + if (fentry->nr_links) + if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET)) return -EINVAL; - if (fmod_ret->nr_progs) { - branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *), + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *), GFP_KERNEL); if (!branches) return -ENOMEM; if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off, - branches)) { + run_ctx_off, branches)) { ret = -EINVAL; goto cleanup; } @@ -2111,7 +2142,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i prog += X86_PATCH_SIZE; } - if (fmod_ret->nr_progs) { + if (fmod_ret->nr_links) { /* From Intel 64 and IA-32 Architectures Optimization * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler * Coding Rule 11: All branch targets should be 16-byte @@ -2121,13 +2152,13 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i /* Update the branches saved in invoke_bpf_mod_ret with the * aligned address of do_fexit. */ - for (i = 0; i < fmod_ret->nr_progs; i++) + for (i = 0; i < fmod_ret->nr_links; i++) emit_cond_near_jump(&branches[i], prog, branches[i], X86_JNE); } - if (fexit->nr_progs) - if (invoke_bpf(m, &prog, fexit, regs_off, false)) { + if (fexit->nr_links) + if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) { ret = -EINVAL; goto cleanup; } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index be94833d390a..a7080c86fa76 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -89,6 +89,7 @@ struct bpf_map_ops { int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); int (*map_pop_elem)(struct bpf_map *map, void *value); int (*map_peek_elem)(struct bpf_map *map, void *value); + void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -389,9 +390,20 @@ enum bpf_type_flag { */ PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS), - __BPF_TYPE_LAST_FLAG = PTR_UNTRUSTED, + MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to memory local to the bpf program. */ + DYNPTR_TYPE_LOCAL = BIT(8 + BPF_BASE_TYPE_BITS), + + /* DYNPTR points to a ringbuf record. */ + DYNPTR_TYPE_RINGBUF = BIT(9 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_FLAG_MAX, + __BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1, }; +#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF) + /* Max number of base types. */ #define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) @@ -408,16 +420,11 @@ enum bpf_arg_type { ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ - ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ - /* the following constraints used to prototype bpf_memcmp() and other - * functions that access data on eBPF program stack + /* Used to prototype bpf_memcmp() and other functions that access data + * on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ - ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, - * helper function must fill all bytes or clear - * them in error case. - */ ARG_CONST_SIZE, /* number of bytes accessed from memory */ ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ @@ -439,6 +446,7 @@ enum bpf_arg_type { ARG_PTR_TO_CONST_STR, /* pointer to a null terminated read-only string */ ARG_PTR_TO_TIMER, /* pointer to bpf_timer */ ARG_PTR_TO_KPTR, /* pointer to referenced kptr */ + ARG_PTR_TO_DYNPTR, /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */ __BPF_ARG_TYPE_MAX, /* Extended arg_types. */ @@ -449,6 +457,10 @@ enum bpf_arg_type { ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK, ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID, + /* pointer to memory does not need to be initialized, helper function must fill + * all bytes or clear them in error case. + */ + ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM, /* This must be the last entry. Its purpose is to ensure the enum is * wide enough to hold the higher bits reserved for bpf_type_flag. @@ -476,6 +488,7 @@ enum bpf_return_type { RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM, + RET_PTR_TO_DYNPTR_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, /* This must be the last entry. Its purpose is to ensure the enum is @@ -721,15 +734,17 @@ struct btf_func_model { #define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 - * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 + * bytes on x86. */ -#define BPF_MAX_TRAMP_PROGS 38 +#define BPF_MAX_TRAMP_LINKS 38 -struct bpf_tramp_progs { - struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; - int nr_progs; +struct bpf_tramp_links { + struct bpf_tramp_link *links[BPF_MAX_TRAMP_LINKS]; + int nr_links; }; +struct bpf_tramp_run_ctx; + /* Different use cases for BPF trampoline: * 1. replace nop at the function entry (kprobe equivalent) * flags = BPF_TRAMP_F_RESTORE_REGS @@ -753,13 +768,14 @@ struct bpf_tramp_progs { struct bpf_tramp_image; int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call); /* these two functions are called from generated trampoline */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog); -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); @@ -852,9 +868,10 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( { return bpf_func(ctx, insnsi); } + #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -905,12 +922,12 @@ int bpf_jit_charge_modmem(u32 size); void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else -static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, +static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; } -static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog, +static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { return -ENOTSUPP; @@ -1009,7 +1026,6 @@ struct bpf_prog_aux { bool tail_call_reachable; bool xdp_has_frags; bool use_bpf_prog_pack; - struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ @@ -1096,6 +1112,19 @@ struct bpf_link_ops { struct bpf_link_info *info); }; +struct bpf_tramp_link { + struct bpf_link link; + struct hlist_node tramp_hlist; + u64 cookie; +}; + +struct bpf_tracing_link { + struct bpf_tramp_link link; + enum bpf_attach_type attach_type; + struct bpf_trampoline *trampoline; + struct bpf_prog *tgt_prog; +}; + struct bpf_link_primer { struct bpf_link *link; struct file *file; @@ -1133,8 +1162,8 @@ bool bpf_struct_ops_get(const void *kdata); void bpf_struct_ops_put(const void *kdata); int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, void *value); -int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, - struct bpf_prog *prog, +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, + struct bpf_tramp_link *link, const struct btf_func_model *model, void *image, void *image_end); static inline bool bpf_try_module_get(const void *data, struct module *owner) @@ -1339,6 +1368,12 @@ struct bpf_trace_run_ctx { u64 bpf_cookie; }; +struct bpf_tramp_run_ctx { + struct bpf_run_ctx run_ctx; + u64 bpf_cookie; + struct bpf_run_ctx *saved_run_ctx; +}; + static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) { struct bpf_run_ctx *old_ctx = NULL; @@ -1544,6 +1579,7 @@ void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); +struct bpf_link *bpf_link_get_curr_or_next(u32 *id); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); @@ -2161,6 +2197,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_map_push_elem_proto; extern const struct bpf_func_proto bpf_map_pop_elem_proto; extern const struct bpf_func_proto bpf_map_peek_elem_proto; +extern const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; @@ -2198,12 +2235,16 @@ extern const struct bpf_func_proto bpf_ringbuf_reserve_proto; extern const struct bpf_func_proto bpf_ringbuf_submit_proto; extern const struct bpf_func_proto bpf_ringbuf_discard_proto; extern const struct bpf_func_proto bpf_ringbuf_query_proto; +extern const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto; +extern const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto; extern const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto; extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto; extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto; +extern const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto; extern const struct bpf_func_proto bpf_copy_from_user_proto; extern const struct bpf_func_proto bpf_snprintf_btf_proto; extern const struct bpf_func_proto bpf_snprintf_proto; @@ -2223,6 +2264,7 @@ extern const struct bpf_func_proto bpf_find_vma_proto; extern const struct bpf_func_proto bpf_loop_proto; extern const struct bpf_func_proto bpf_strncmp_proto; extern const struct bpf_func_proto bpf_copy_from_user_task_proto; +extern const struct bpf_func_proto bpf_kptr_xchg_proto; const struct bpf_func_proto *tracing_prog_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); @@ -2336,6 +2378,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); void *bpf_arch_text_copy(void *dst, void *src, size_t len); +int bpf_arch_text_invalidate(void *dst, size_t len); struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); @@ -2346,4 +2389,33 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args, u32 **bin_buf, u32 num_args); void bpf_bprintf_cleanup(void); +/* the implementation of the opaque uapi struct bpf_dynptr */ +struct bpf_dynptr_kern { + void *data; + /* Size represents the number of usable bytes of dynptr data. + * If for example the offset is at 4 for a local dynptr whose data is + * of type u64, the number of usable bytes is 4. + * + * The upper 8 bits are reserved. It is as follows: + * Bits 0 - 23 = size + * Bits 24 - 30 = dynptr type + * Bit 31 = whether dynptr is read-only + */ + u32 size; + u32 offset; +} __aligned(8); + +enum bpf_dynptr_type { + BPF_DYNPTR_TYPE_INVALID, + /* Points to memory that is local to the bpf program */ + BPF_DYNPTR_TYPE_LOCAL, + /* Underlying data is a ringbuf record */ + BPF_DYNPTR_TYPE_RINGBUF, +}; + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size); +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); +int bpf_dynptr_check_size(u32 size); + #endif /* _LINUX_BPF_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 3e24ad0c4b3c..2b9112b80171 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -141,3 +141,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) +BPF_LINK_TYPE(BPF_LINK_TYPE_STRUCT_OPS, struct_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 1f1e7f2ea967..e8439f6cbe57 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -72,6 +72,18 @@ struct bpf_reg_state { u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */ + /* For dynptr stack slots */ + struct { + enum bpf_dynptr_type type; + /* A dynptr is 16 bytes so it takes up 2 stack slots. + * We need to track which slot is the first slot + * to protect against cases where the user may try to + * pass in an address starting at the second slot of the + * dynptr. + */ + bool first_slot; + } dynptr; + /* Max size from any of the above. */ struct { unsigned long raw1; @@ -88,6 +100,8 @@ struct bpf_reg_state { * for the purpose of tracking that it's freed. * For PTR_TO_SOCKET this is used to share which pointers retain the * same reference to the socket, to determine proper reference freeing. + * For stack slots that are dynptrs, this is used to track references to + * the dynptr to determine proper reference freeing. */ u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned @@ -174,9 +188,15 @@ enum bpf_stack_slot_type { STACK_SPILL, /* register spilled into stack */ STACK_MISC, /* BPF program wrote some data into this slot */ STACK_ZERO, /* BPF program wrote constant zero */ + /* A dynptr is stored in this stack slot. The type of dynptr + * is stored in bpf_stack_state->spilled_ptr.dynptr.type + */ + STACK_DYNPTR, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +#define BPF_DYNPTR_SIZE sizeof(struct bpf_dynptr_kern) +#define BPF_DYNPTR_NR_SLOTS (BPF_DYNPTR_SIZE / BPF_REG_SIZE) struct bpf_stack_state { struct bpf_reg_state spilled_ptr; diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h index bc5d9cc34e4c..335a19092368 100644 --- a/include/linux/btf_ids.h +++ b/include/linux/btf_ids.h @@ -178,7 +178,8 @@ extern struct btf_id_set name; BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ + BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock) enum { #define BTF_SOCK_TYPE(name, str) name, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4816b7e11047..820500430eae 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -303,6 +303,8 @@ int unregister_ftrace_function(struct ftrace_ops *ops); extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct ftrace_regs *fregs); + +int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs); #else /* !CONFIG_FUNCTION_TRACER */ /* * (un)register_ftrace_function must be a macro since the ops parameter @@ -313,6 +315,10 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1, static inline void ftrace_kill(void) { } static inline void ftrace_free_init_mem(void) { } static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } +static inline int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_FUNCTION_TRACER */ struct ftrace_func_entry { diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index ce1bd2fbf23e..ad39636e0c3f 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -65,11 +65,11 @@ static inline void *dereference_symbol_descriptor(void *ptr) return ptr; } +#ifdef CONFIG_KALLSYMS int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), void *data); -#ifdef CONFIG_KALLSYMS /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); @@ -163,6 +163,11 @@ static inline bool kallsyms_show_value(const struct cred *cred) return false; } +static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, + unsigned long), void *data) +{ + return -EOPNOTSUPP; +} #endif /*CONFIG_KALLSYMS*/ static inline void print_ip_sym(const char *loglvl, unsigned long ip) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index d4ec894ce67b..4d761ad530c9 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -284,4 +284,10 @@ static inline int mptcpv6_init(void) { return 0; } static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { } #endif +#if defined(CONFIG_MPTCP) && defined(CONFIG_BPF_SYSCALL) +struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk); +#else +static inline struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) { return NULL; } +#endif + #endif /* __NET_MPTCP_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 444fe6f1cf35..f4009dbdf62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1013,6 +1013,7 @@ enum bpf_link_type { BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, + BPF_LINK_TYPE_STRUCT_OPS = 9, MAX_BPF_LINK_TYPE, }; @@ -1489,6 +1490,15 @@ union bpf_attr { __aligned_u64 addrs; __aligned_u64 cookies; } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; }; } link_create; @@ -5154,6 +5164,91 @@ union bpf_attr { * if not NULL, is a reference which must be released using its * corresponding release function, or moved into a BPF map before * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. + * + * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr. + * + * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5351,6 +5446,15 @@ union bpf_attr { FN(skb_set_tstamp), \ FN(ima_file_hash), \ FN(kptr_xchg), \ + FN(map_lookup_percpu_elem), \ + FN(skc_to_mptcp_sock), \ + FN(dynptr_from_mem), \ + FN(ringbuf_reserve_dynptr), \ + FN(ringbuf_submit_dynptr), \ + FN(ringbuf_discard_dynptr), \ + FN(dynptr_read), \ + FN(dynptr_write), \ + FN(dynptr_data), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5604,6 +5708,10 @@ struct bpf_tunnel_key { __u8 tunnel_ttl; __u16 tunnel_ext; /* Padding, future use. */ __u32 tunnel_label; + union { + __u32 local_ipv4; + __u32 local_ipv6[4]; + }; }; /* user accessible mirror of in-kernel xfrm_state. @@ -6498,6 +6606,11 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index c1a9be6a4b9f..057ba8e01e70 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -6,7 +6,7 @@ cflags-nogcse-$(CONFIG_X86)$(CONFIG_CC_IS_GCC) := -fno-gcse endif CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o link_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index b3bf31fd9458..fe40d3b9458f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -243,6 +243,20 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) return this_cpu_ptr(array->pptrs[index & array->index_mask]); } +static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + + if (cpu >= nr_cpu_ids) + return NULL; + + if (unlikely(index >= array->map.max_entries)) + return NULL; + + return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu); +} + int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) { struct bpf_array *array = container_of(map, struct bpf_array, map); @@ -725,6 +739,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_lookup_elem = percpu_array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, + .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, @@ -1345,6 +1360,8 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_fd_put_ptr = bpf_map_fd_put_ptr, .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = array_of_map_gen_lookup, + .map_lookup_batch = generic_map_lookup_batch, + .map_update_batch = generic_map_update_batch, .map_check_btf = map_check_no_btf, .map_btf_id = &array_map_btf_ids[0], }; diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 064eccba641d..c1351df9f7ee 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -117,6 +117,21 @@ static const struct bpf_func_proto bpf_ima_file_hash_proto = { .allowed = bpf_ima_inode_hash_allowed, }; +BPF_CALL_1(bpf_get_attach_cookie, void *, ctx) +{ + struct bpf_trace_run_ctx *run_ctx; + + run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); + return run_ctx->bpf_cookie; +} + +static const struct bpf_func_proto bpf_get_attach_cookie_proto = { + .func = bpf_get_attach_cookie, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static const struct bpf_func_proto * bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -141,6 +156,8 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL; case BPF_FUNC_ima_file_hash: return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL; + case BPF_FUNC_get_attach_cookie: + return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL; default: return tracing_prog_func_proto(func_id, prog); } diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 3a0103ad97bc..d9a3c9207240 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -33,15 +33,15 @@ struct bpf_struct_ops_map { const struct bpf_struct_ops *st_ops; /* protect map_update */ struct mutex lock; - /* progs has all the bpf_prog that is populated + /* link has all the bpf_links that is populated * to the func ptr of the kernel's struct * (in kvalue.data). */ - struct bpf_prog **progs; + struct bpf_link **links; /* image is a page that has all the trampolines * that stores the func args before calling the bpf_prog. * A PAGE_SIZE "image" is enough to store all trampoline for - * "progs[]". + * "links[]". */ void *image; /* uvalue->data stores the kernel struct @@ -283,9 +283,9 @@ static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map) u32 i; for (i = 0; i < btf_type_vlen(t); i++) { - if (st_map->progs[i]) { - bpf_prog_put(st_map->progs[i]); - st_map->progs[i] = NULL; + if (st_map->links[i]) { + bpf_link_put(st_map->links[i]); + st_map->links[i] = NULL; } } } @@ -316,18 +316,34 @@ static int check_zero_holes(const struct btf_type *t, void *data) return 0; } -int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, - struct bpf_prog *prog, +static void bpf_struct_ops_link_release(struct bpf_link *link) +{ +} + +static void bpf_struct_ops_link_dealloc(struct bpf_link *link) +{ + struct bpf_tramp_link *tlink = container_of(link, struct bpf_tramp_link, link); + + kfree(tlink); +} + +const struct bpf_link_ops bpf_struct_ops_link_lops = { + .release = bpf_struct_ops_link_release, + .dealloc = bpf_struct_ops_link_dealloc, +}; + +int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, + struct bpf_tramp_link *link, const struct btf_func_model *model, void *image, void *image_end) { u32 flags; - tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; - tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + tlinks[BPF_TRAMP_FENTRY].links[0] = link; + tlinks[BPF_TRAMP_FENTRY].nr_links = 1; flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; return arch_prepare_bpf_trampoline(NULL, image, image_end, - model, flags, tprogs, NULL); + model, flags, tlinks, NULL); } static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, @@ -338,7 +354,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, struct bpf_struct_ops_value *uvalue, *kvalue; const struct btf_member *member; const struct btf_type *t = st_ops->type; - struct bpf_tramp_progs *tprogs = NULL; + struct bpf_tramp_links *tlinks = NULL; void *udata, *kdata; int prog_fd, err = 0; void *image, *image_end; @@ -362,8 +378,8 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, if (uvalue->state || refcount_read(&uvalue->refcnt)) return -EINVAL; - tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); - if (!tprogs) + tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); + if (!tlinks) return -ENOMEM; uvalue = (struct bpf_struct_ops_value *)st_map->uvalue; @@ -386,6 +402,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, for_each_member(i, t, member) { const struct btf_type *mtype, *ptype; struct bpf_prog *prog; + struct bpf_tramp_link *link; u32 moff; moff = __btf_member_bit_offset(t, member) / 8; @@ -439,16 +456,26 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, err = PTR_ERR(prog); goto reset_unlock; } - st_map->progs[i] = prog; if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || prog->aux->attach_btf_id != st_ops->type_id || prog->expected_attach_type != i) { + bpf_prog_put(prog); err = -EINVAL; goto reset_unlock; } - err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + bpf_prog_put(prog); + err = -ENOMEM; + goto reset_unlock; + } + bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, + &bpf_struct_ops_link_lops, prog); + st_map->links[i] = &link->link; + + err = bpf_struct_ops_prepare_trampoline(tlinks, link, &st_ops->func_models[i], image, image_end); if (err < 0) @@ -491,7 +518,7 @@ reset_unlock: memset(uvalue, 0, map->value_size); memset(kvalue, 0, map->value_size); unlock: - kfree(tprogs); + kfree(tlinks); mutex_unlock(&st_map->lock); return err; } @@ -546,9 +573,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map) { struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; - if (st_map->progs) + if (st_map->links) bpf_struct_ops_map_put_progs(st_map); - bpf_map_area_free(st_map->progs); + bpf_map_area_free(st_map->links); bpf_jit_free_exec(st_map->image); bpf_map_area_free(st_map->uvalue); bpf_map_area_free(st_map); @@ -597,11 +624,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) map = &st_map->map; st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); - st_map->progs = - bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_prog *), + st_map->links = + bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *), NUMA_NO_NODE); st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); - if (!st_map->uvalue || !st_map->progs || !st_map->image) { + if (!st_map->uvalue || !st_map->links || !st_map->image) { bpf_struct_ops_map_free(map); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2f0b0440131c..7bccaa4646e5 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -202,6 +202,8 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_XDP, BTF_KFUNC_HOOK_TC, BTF_KFUNC_HOOK_STRUCT_OPS, + BTF_KFUNC_HOOK_TRACING, + BTF_KFUNC_HOOK_SYSCALL, BTF_KFUNC_HOOK_MAX, }; @@ -7110,6 +7112,10 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) return BTF_KFUNC_HOOK_TC; case BPF_PROG_TYPE_STRUCT_OPS: return BTF_KFUNC_HOOK_STRUCT_OPS; + case BPF_PROG_TYPE_TRACING: + return BTF_KFUNC_HOOK_TRACING; + case BPF_PROG_TYPE_SYSCALL: + return BTF_KFUNC_HOOK_SYSCALL; default: return BTF_KFUNC_HOOK_MAX; } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 13e9dbeeedf3..cacd8684c3c4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -873,7 +873,7 @@ static size_t select_bpf_prog_pack_size(void) return size; } -static struct bpf_prog_pack *alloc_new_pack(void) +static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_prog_pack *pack; @@ -886,6 +886,7 @@ static struct bpf_prog_pack *alloc_new_pack(void) kfree(pack); return NULL; } + bpf_fill_ill_insns(pack->ptr, bpf_prog_pack_size); bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE); list_add_tail(&pack->list, &pack_list); @@ -895,7 +896,7 @@ static struct bpf_prog_pack *alloc_new_pack(void) return pack; } -static void *bpf_prog_pack_alloc(u32 size) +static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) { unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size); struct bpf_prog_pack *pack; @@ -910,6 +911,7 @@ static void *bpf_prog_pack_alloc(u32 size) size = round_up(size, PAGE_SIZE); ptr = module_alloc(size); if (ptr) { + bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); set_memory_x((unsigned long)ptr, size / PAGE_SIZE); @@ -923,7 +925,7 @@ static void *bpf_prog_pack_alloc(u32 size) goto found_free_area; } - pack = alloc_new_pack(); + pack = alloc_new_pack(bpf_fill_ill_insns); if (!pack) goto out; @@ -966,6 +968,9 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr) nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size); pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT; + WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size), + "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n"); + bitmap_clear(pack->bitmap, pos, nbits); if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0, bpf_prog_chunk_count(), 0) == 0) { @@ -1102,7 +1107,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, if (bpf_jit_charge_modmem(size)) return NULL; - ro_header = bpf_prog_pack_alloc(size); + ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns); if (!ro_header) { bpf_jit_uncharge_modmem(size); return NULL; @@ -1434,6 +1439,16 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) insn = clone->insnsi; for (i = 0; i < insn_cnt; i++, insn++) { + if (bpf_pseudo_func(insn)) { + /* ld_imm64 with an address of bpf subprog is not + * a user controlled constant. Don't randomize it, + * since it will conflict with jit_subprogs() logic. + */ + insn++; + i++; + continue; + } + /* We temporarily need to hold the original ld64 insn * so that we can still access the first part in the * second blinding run. @@ -2619,6 +2634,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_map_push_elem_proto __weak; const struct bpf_func_proto bpf_map_pop_elem_proto __weak; const struct bpf_func_proto bpf_map_peek_elem_proto __weak; +const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto __weak; const struct bpf_func_proto bpf_spin_lock_proto __weak; const struct bpf_func_proto bpf_spin_unlock_proto __weak; const struct bpf_func_proto bpf_jiffies64_proto __weak; @@ -2727,6 +2743,11 @@ void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len) return ERR_PTR(-ENOTSUPP); } +int __weak bpf_arch_text_invalidate(void *dst, size_t len) +{ + return -ENOTSUPP; +} + DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); EXPORT_SYMBOL(bpf_stats_enabled_key); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3e00e62b2218..17fb69c0e0dc 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -140,7 +140,7 @@ static inline bool htab_use_raw_lock(const struct bpf_htab *htab) static void htab_init_buckets(struct bpf_htab *htab) { - unsigned i; + unsigned int i; for (i = 0; i < htab->n_buckets; i++) { INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); @@ -1627,7 +1627,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, void __user *uvalues = u64_to_user_ptr(attr->batch.values); void __user *ukeys = u64_to_user_ptr(attr->batch.keys); void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); - u32 batch, max_count, size, bucket_size; + u32 batch, max_count, size, bucket_size, map_id; struct htab_elem *node_to_free = NULL; u64 elem_map_flags, map_flags; struct hlist_nulls_head *head; @@ -1752,6 +1752,14 @@ again_nocopy: } } else { value = l->key + roundup_key_size; + if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + struct bpf_map **inner_map = value; + + /* Actual value is the id of the inner map */ + map_id = map->ops->map_fd_sys_lookup_elem(*inner_map); + value = &map_id; + } + if (elem_map_flags & BPF_F_LOCK) copy_map_value_locked(map, dst_val, value, true); @@ -2191,6 +2199,20 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct htab_elem *l; + + if (cpu >= nr_cpu_ids) + return NULL; + + l = __htab_map_lookup_elem(map, key); + if (l) + return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); + else + return NULL; +} + static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) { struct htab_elem *l = __htab_map_lookup_elem(map, key); @@ -2203,6 +2225,22 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct htab_elem *l; + + if (cpu >= nr_cpu_ids) + return NULL; + + l = __htab_map_lookup_elem(map, key); + if (l) { + bpf_lru_node_set_ref(&l->lru_node); + return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); + } + + return NULL; +} + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) { struct htab_elem *l; @@ -2292,6 +2330,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_percpu_map_update_elem, .map_delete_elem = htab_map_delete_elem, + .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, @@ -2310,6 +2349,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_lru_percpu_map_update_elem, .map_delete_elem = htab_lru_map_delete_elem, + .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, @@ -2450,5 +2490,6 @@ const struct bpf_map_ops htab_of_maps_map_ops = { .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, .map_gen_lookup = htab_of_map_gen_lookup, .map_check_btf = map_check_no_btf, + BATCH_OPS(htab), .map_btf_id = &htab_map_btf_ids[0], }; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3e709fed5306..225806a02efb 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -103,7 +103,7 @@ const struct bpf_func_proto bpf_map_pop_elem_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, + .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, }; BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) @@ -116,7 +116,23 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, + .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, +}; + +BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) +{ + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu); +} + +const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = { + .func = bpf_map_lookup_percpu_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, + .arg3_type = ARG_ANYTHING, }; const struct bpf_func_proto bpf_get_prandom_u32_proto = { @@ -1396,6 +1412,169 @@ const struct bpf_func_proto bpf_kptr_xchg_proto = { .arg2_btf_id = BPF_PTR_POISON, }; +/* Since the upper 8 bits of dynptr->size is reserved, the + * maximum supported size is 2^24 - 1. + */ +#define DYNPTR_MAX_SIZE ((1UL << 24) - 1) +#define DYNPTR_TYPE_SHIFT 28 +#define DYNPTR_SIZE_MASK 0xFFFFFF +#define DYNPTR_RDONLY_BIT BIT(31) + +static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) +{ + return ptr->size & DYNPTR_RDONLY_BIT; +} + +static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type) +{ + ptr->size |= type << DYNPTR_TYPE_SHIFT; +} + +static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr) +{ + return ptr->size & DYNPTR_SIZE_MASK; +} + +int bpf_dynptr_check_size(u32 size) +{ + return size > DYNPTR_MAX_SIZE ? -E2BIG : 0; +} + +void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, + enum bpf_dynptr_type type, u32 offset, u32 size) +{ + ptr->data = data; + ptr->offset = offset; + ptr->size = size; + bpf_dynptr_set_type(ptr, type); +} + +void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr) +{ + memset(ptr, 0, sizeof(*ptr)); +} + +static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len) +{ + u32 size = bpf_dynptr_get_size(ptr); + + if (len > size || offset > size - len) + return -E2BIG; + + return 0; +} + +BPF_CALL_4(bpf_dynptr_from_mem, void *, data, u32, size, u64, flags, struct bpf_dynptr_kern *, ptr) +{ + int err; + + err = bpf_dynptr_check_size(size); + if (err) + goto error; + + /* flags is currently unsupported */ + if (flags) { + err = -EINVAL; + goto error; + } + + bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size); + + return 0; + +error: + bpf_dynptr_set_null(ptr); + return err; +} + +const struct bpf_func_proto bpf_dynptr_from_mem_proto = { + .func = bpf_dynptr_from_mem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, +}; + +BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset) +{ + int err; + + if (!src->data) + return -EINVAL; + + err = bpf_dynptr_check_off_len(src, offset, len); + if (err) + return err; + + memcpy(dst, src->data + src->offset + offset, len); + + return 0; +} + +const struct bpf_func_proto bpf_dynptr_read_proto = { + .func = bpf_dynptr_read, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, + .arg3_type = ARG_PTR_TO_DYNPTR, + .arg4_type = ARG_ANYTHING, +}; + +BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len) +{ + int err; + + if (!dst->data || bpf_dynptr_is_rdonly(dst)) + return -EINVAL; + + err = bpf_dynptr_check_off_len(dst, offset, len); + if (err) + return err; + + memcpy(dst->data + dst->offset + offset, src, len); + + return 0; +} + +const struct bpf_func_proto bpf_dynptr_write_proto = { + .func = bpf_dynptr_write, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_DYNPTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE_OR_ZERO, +}; + +BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len) +{ + int err; + + if (!ptr->data) + return 0; + + err = bpf_dynptr_check_off_len(ptr, offset, len); + if (err) + return 0; + + if (bpf_dynptr_is_rdonly(ptr)) + return 0; + + return (unsigned long)(ptr->data + ptr->offset + offset); +} + +const struct bpf_func_proto bpf_dynptr_data_proto = { + .func = bpf_dynptr_data, + .gpl_only = false, + .ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL, + .arg1_type = ARG_PTR_TO_DYNPTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, +}; + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1420,6 +1599,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; + case BPF_FUNC_map_lookup_percpu_elem: + return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: @@ -1442,12 +1623,26 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; + case BPF_FUNC_ringbuf_reserve_dynptr: + return &bpf_ringbuf_reserve_dynptr_proto; + case BPF_FUNC_ringbuf_submit_dynptr: + return &bpf_ringbuf_submit_dynptr_proto; + case BPF_FUNC_ringbuf_discard_dynptr: + return &bpf_ringbuf_discard_dynptr_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_loop: return &bpf_loop_proto; case BPF_FUNC_strncmp: return &bpf_strncmp_proto; + case BPF_FUNC_dynptr_from_mem: + return &bpf_dynptr_from_mem_proto; + case BPF_FUNC_dynptr_read: + return &bpf_dynptr_read_proto; + case BPF_FUNC_dynptr_write: + return &bpf_dynptr_write_proto; + case BPF_FUNC_dynptr_data: + return &bpf_dynptr_data_proto; default: break; } diff --git a/kernel/bpf/link_iter.c b/kernel/bpf/link_iter.c new file mode 100644 index 000000000000..fec8005a121c --- /dev/null +++ b/kernel/bpf/link_iter.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2022 Red Hat, Inc. */ +#include <linux/bpf.h> +#include <linux/fs.h> +#include <linux/filter.h> +#include <linux/kernel.h> +#include <linux/btf_ids.h> + +struct bpf_iter_seq_link_info { + u32 link_id; +}; + +static void *bpf_link_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_iter_seq_link_info *info = seq->private; + struct bpf_link *link; + + link = bpf_link_get_curr_or_next(&info->link_id); + if (!link) + return NULL; + + if (*pos == 0) + ++*pos; + return link; +} + +static void *bpf_link_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_iter_seq_link_info *info = seq->private; + + ++*pos; + ++info->link_id; + bpf_link_put((struct bpf_link *)v); + return bpf_link_get_curr_or_next(&info->link_id); +} + +struct bpf_iter__bpf_link { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_link *, link); +}; + +DEFINE_BPF_ITER_FUNC(bpf_link, struct bpf_iter_meta *meta, struct bpf_link *link) + +static int __bpf_link_seq_show(struct seq_file *seq, void *v, bool in_stop) +{ + struct bpf_iter__bpf_link ctx; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int ret = 0; + + ctx.meta = &meta; + ctx.link = v; + meta.seq = seq; + prog = bpf_iter_get_info(&meta, in_stop); + if (prog) + ret = bpf_iter_run_prog(prog, &ctx); + + return ret; +} + +static int bpf_link_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_link_seq_show(seq, v, false); +} + +static void bpf_link_seq_stop(struct seq_file *seq, void *v) +{ + if (!v) + (void)__bpf_link_seq_show(seq, v, true); + else + bpf_link_put((struct bpf_link *)v); +} + +static const struct seq_operations bpf_link_seq_ops = { + .start = bpf_link_seq_start, + .next = bpf_link_seq_next, + .stop = bpf_link_seq_stop, + .show = bpf_link_seq_show, +}; + +BTF_ID_LIST(btf_bpf_link_id) +BTF_ID(struct, bpf_link) + +static const struct bpf_iter_seq_info bpf_link_seq_info = { + .seq_ops = &bpf_link_seq_ops, + .init_seq_private = NULL, + .fini_seq_private = NULL, + .seq_priv_size = sizeof(struct bpf_iter_seq_link_info), +}; + +static struct bpf_iter_reg bpf_link_reg_info = { + .target = "bpf_link", + .ctx_arg_info_size = 1, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_link, link), + PTR_TO_BTF_ID_OR_NULL }, + }, + .seq_info = &bpf_link_seq_info, +}; + +static int __init bpf_link_iter_init(void) +{ + bpf_link_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_link_id; + return bpf_iter_reg_target(&bpf_link_reg_info); +} + +late_initcall(bpf_link_iter_init); diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 311264ab80c4..ded4faeca192 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -475,3 +475,81 @@ const struct bpf_func_proto bpf_ringbuf_query_proto = { .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_ANYTHING, }; + +BPF_CALL_4(bpf_ringbuf_reserve_dynptr, struct bpf_map *, map, u32, size, u64, flags, + struct bpf_dynptr_kern *, ptr) +{ + struct bpf_ringbuf_map *rb_map; + void *sample; + int err; + + if (unlikely(flags)) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + err = bpf_dynptr_check_size(size); + if (err) { + bpf_dynptr_set_null(ptr); + return err; + } + + rb_map = container_of(map, struct bpf_ringbuf_map, map); + + sample = __bpf_ringbuf_reserve(rb_map->rb, size); + if (!sample) { + bpf_dynptr_set_null(ptr); + return -EINVAL; + } + + bpf_dynptr_init(ptr, sample, BPF_DYNPTR_TYPE_RINGBUF, 0, size); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_reserve_dynptr_proto = { + .func = bpf_ringbuf_reserve_dynptr, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | MEM_UNINIT, +}; + +BPF_CALL_2(bpf_ringbuf_submit_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags) +{ + if (!ptr->data) + return 0; + + bpf_ringbuf_commit(ptr->data, flags, false /* discard */); + + bpf_dynptr_set_null(ptr); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_submit_dynptr_proto = { + .func = bpf_ringbuf_submit_dynptr, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_ringbuf_discard_dynptr, struct bpf_dynptr_kern *, ptr, u64, flags) +{ + if (!ptr->data) + return 0; + + bpf_ringbuf_commit(ptr->data, flags, true /* discard */); + + bpf_dynptr_set_null(ptr); + + return 0; +} + +const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = { + .func = bpf_ringbuf_discard_dynptr, + .ret_type = RET_VOID, + .arg1_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE, + .arg2_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e0aead17dff4..2b69306d3c6e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2864,19 +2864,12 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) } EXPORT_SYMBOL(bpf_link_get_from_fd); -struct bpf_tracing_link { - struct bpf_link link; - enum bpf_attach_type attach_type; - struct bpf_trampoline *trampoline; - struct bpf_prog *tgt_prog; -}; - static void bpf_tracing_link_release(struct bpf_link *link) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); - WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog, + WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, tr_link->trampoline)); bpf_trampoline_put(tr_link->trampoline); @@ -2889,7 +2882,7 @@ static void bpf_tracing_link_release(struct bpf_link *link) static void bpf_tracing_link_dealloc(struct bpf_link *link) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); kfree(tr_link); } @@ -2898,7 +2891,7 @@ static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); seq_printf(seq, "attach_type:\t%d\n", @@ -2909,7 +2902,7 @@ static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) { struct bpf_tracing_link *tr_link = - container_of(link, struct bpf_tracing_link, link); + container_of(link, struct bpf_tracing_link, link.link); info->tracing.attach_type = tr_link->attach_type; bpf_trampoline_unpack_key(tr_link->trampoline->key, @@ -2928,7 +2921,8 @@ static const struct bpf_link_ops bpf_tracing_link_lops = { static int bpf_tracing_prog_attach(struct bpf_prog *prog, int tgt_prog_fd, - u32 btf_id) + u32 btf_id, + u64 bpf_cookie) { struct bpf_link_primer link_primer; struct bpf_prog *tgt_prog = NULL; @@ -2990,9 +2984,10 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, err = -ENOMEM; goto out_put_prog; } - bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING, + bpf_link_init(&link->link.link, BPF_LINK_TYPE_TRACING, &bpf_tracing_link_lops, prog); link->attach_type = prog->expected_attach_type; + link->link.cookie = bpf_cookie; mutex_lock(&prog->aux->dst_mutex); @@ -3060,11 +3055,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, tgt_prog = prog->aux->dst_prog; } - err = bpf_link_prime(&link->link, &link_primer); + err = bpf_link_prime(&link->link.link, &link_primer); if (err) goto out_unlock; - err = bpf_trampoline_link_prog(prog, tr); + err = bpf_trampoline_link_prog(&link->link, tr); if (err) { bpf_link_cleanup(&link_primer); link = NULL; @@ -3278,7 +3273,7 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog, tp_name = prog->aux->attach_func_name; break; } - return bpf_tracing_prog_attach(prog, 0, 0); + return bpf_tracing_prog_attach(prog, 0, 0, 0); case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: if (strncpy_from_user(buf, user_tp_name, sizeof(buf) - 1) < 0) @@ -4531,7 +4526,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_EXT: ret = bpf_tracing_prog_attach(prog, attr->link_create.target_fd, - attr->link_create.target_btf_id); + attr->link_create.target_btf_id, + attr->link_create.tracing.cookie); break; case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_TRACING: @@ -4546,7 +4542,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) else ret = bpf_tracing_prog_attach(prog, attr->link_create.target_fd, - attr->link_create.target_btf_id); + attr->link_create.target_btf_id, + attr->link_create.tracing.cookie); break; case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_SK_LOOKUP: @@ -4680,6 +4677,25 @@ struct bpf_link *bpf_link_by_id(u32 id) return link; } +struct bpf_link *bpf_link_get_curr_or_next(u32 *id) +{ + struct bpf_link *link; + + spin_lock_bh(&link_idr_lock); +again: + link = idr_get_next(&link_idr, id); + if (link) { + link = bpf_link_inc_not_zero(link); + if (IS_ERR(link)) { + (*id)++; + goto again; + } + } + spin_unlock_bh(&link_idr_lock); + + return link; +} + #define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id static int bpf_link_get_fd_by_id(const union bpf_attr *attr) @@ -4847,9 +4863,21 @@ out_prog_put: static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; + bool capable; int err; - if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; + + /* Intent here is for unprivileged_bpf_disabled to block key object + * creation commands for unprivileged users; other actions depend + * of fd availability and access to bpffs, so are dependent on + * object creation success. Capabilities are later verified for + * operations such as load and map create, so even with unprivileged + * BPF disabled, capability checks are still carried out for these + * and other operations. + */ + if (!capable && + (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) return -EPERM; err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); @@ -5008,6 +5036,7 @@ static bool syscall_prog_is_valid_access(int off, int size, BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) { struct bpf_prog * __maybe_unused prog; + struct bpf_tramp_run_ctx __maybe_unused run_ctx; switch (cmd) { case BPF_MAP_CREATE: @@ -5035,13 +5064,15 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) return -EINVAL; } - if (!__bpf_prog_enter_sleepable(prog)) { + run_ctx.bpf_cookie = 0; + run_ctx.saved_run_ctx = NULL; + if (!__bpf_prog_enter_sleepable(prog, &run_ctx)) { /* recursion detected */ bpf_prog_put(prog); return -EBUSY; } attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); - __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */); + __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, &run_ctx); bpf_prog_put(prog); return 0; #endif diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index ada97751ae1b..93c7675f0c9e 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -30,9 +30,12 @@ static DEFINE_MUTEX(trampoline_mutex); bool bpf_prog_has_trampoline(const struct bpf_prog *prog) { enum bpf_attach_type eatype = prog->expected_attach_type; + enum bpf_prog_type ptype = prog->type; - return eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || - eatype == BPF_MODIFY_RETURN; + return (ptype == BPF_PROG_TYPE_TRACING && + (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || + eatype == BPF_MODIFY_RETURN)) || + (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC); } void *bpf_jit_alloc_exec_page(void) @@ -168,30 +171,30 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) return ret; } -static struct bpf_tramp_progs * +static struct bpf_tramp_links * bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total, bool *ip_arg) { - const struct bpf_prog_aux *aux; - struct bpf_tramp_progs *tprogs; - struct bpf_prog **progs; + struct bpf_tramp_link *link; + struct bpf_tramp_links *tlinks; + struct bpf_tramp_link **links; int kind; *total = 0; - tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); - if (!tprogs) + tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); + if (!tlinks) return ERR_PTR(-ENOMEM); for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { - tprogs[kind].nr_progs = tr->progs_cnt[kind]; + tlinks[kind].nr_links = tr->progs_cnt[kind]; *total += tr->progs_cnt[kind]; - progs = tprogs[kind].progs; + links = tlinks[kind].links; - hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) { - *ip_arg |= aux->prog->call_get_func_ip; - *progs++ = aux->prog; + hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) { + *ip_arg |= link->link.prog->call_get_func_ip; + *links++ = link; } } - return tprogs; + return tlinks; } static void __bpf_tramp_image_put_deferred(struct work_struct *work) @@ -330,14 +333,14 @@ out: static int bpf_trampoline_update(struct bpf_trampoline *tr) { struct bpf_tramp_image *im; - struct bpf_tramp_progs *tprogs; + struct bpf_tramp_links *tlinks; u32 flags = BPF_TRAMP_F_RESTORE_REGS; bool ip_arg = false; int err, total; - tprogs = bpf_trampoline_get_progs(tr, &total, &ip_arg); - if (IS_ERR(tprogs)) - return PTR_ERR(tprogs); + tlinks = bpf_trampoline_get_progs(tr, &total, &ip_arg); + if (IS_ERR(tlinks)) + return PTR_ERR(tlinks); if (total == 0) { err = unregister_fentry(tr, tr->cur_image->image); @@ -353,15 +356,15 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) goto out; } - if (tprogs[BPF_TRAMP_FEXIT].nr_progs || - tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) + if (tlinks[BPF_TRAMP_FEXIT].nr_links || + tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; if (ip_arg) flags |= BPF_TRAMP_F_IP_ARG; err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, - &tr->func.model, flags, tprogs, + &tr->func.model, flags, tlinks, tr->func.addr); if (err < 0) goto out; @@ -381,7 +384,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) tr->cur_image = im; tr->selector++; out: - kfree(tprogs); + kfree(tlinks); return err; } @@ -407,13 +410,14 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) } } -int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; + struct bpf_tramp_link *link_exiting; int err = 0; - int cnt; + int cnt = 0, i; - kind = bpf_attach_type_to_tramp(prog); + kind = bpf_attach_type_to_tramp(link->link.prog); mutex_lock(&tr->mutex); if (tr->extension_prog) { /* cannot attach fentry/fexit if extension prog is attached. @@ -422,32 +426,43 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) err = -EBUSY; goto out; } - cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]; + + for (i = 0; i < BPF_TRAMP_MAX; i++) + cnt += tr->progs_cnt[i]; + if (kind == BPF_TRAMP_REPLACE) { /* Cannot attach extension if fentry/fexit are in use. */ if (cnt) { err = -EBUSY; goto out; } - tr->extension_prog = prog; + tr->extension_prog = link->link.prog; err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, - prog->bpf_func); + link->link.prog->bpf_func); goto out; } - if (cnt >= BPF_MAX_TRAMP_PROGS) { + if (cnt >= BPF_MAX_TRAMP_LINKS) { err = -E2BIG; goto out; } - if (!hlist_unhashed(&prog->aux->tramp_hlist)) { + if (!hlist_unhashed(&link->tramp_hlist)) { + /* prog already linked */ + err = -EBUSY; + goto out; + } + hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) { + if (link_exiting->link.prog != link->link.prog) + continue; /* prog already linked */ err = -EBUSY; goto out; } - hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]); + + hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]); tr->progs_cnt[kind]++; err = bpf_trampoline_update(tr); if (err) { - hlist_del_init(&prog->aux->tramp_hlist); + hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; } out: @@ -456,12 +471,12 @@ out: } /* bpf_trampoline_unlink_prog() should never fail. */ -int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; int err; - kind = bpf_attach_type_to_tramp(prog); + kind = bpf_attach_type_to_tramp(link->link.prog); mutex_lock(&tr->mutex); if (kind == BPF_TRAMP_REPLACE) { WARN_ON_ONCE(!tr->extension_prog); @@ -470,7 +485,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) tr->extension_prog = NULL; goto out; } - hlist_del_init(&prog->aux->tramp_hlist); + hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; err = bpf_trampoline_update(tr); out: @@ -500,16 +515,19 @@ out: void bpf_trampoline_put(struct bpf_trampoline *tr) { + int i; + if (!tr) return; mutex_lock(&trampoline_mutex); if (!refcount_dec_and_test(&tr->refcnt)) goto out; WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); - if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) - goto out; - if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) - goto out; + + for (i = 0; i < BPF_TRAMP_MAX; i++) + if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[i]))) + goto out; + /* This code will be executed even when the last bpf_tramp_image * is alive. All progs are detached from the trampoline and the * trampoline image is patched with jmp into epilogue to skip @@ -559,11 +577,14 @@ static void notrace inc_misses_counter(struct bpf_prog *prog) * [2..MAX_U64] - execute bpf prog and record execution time. * This is start time. */ -u64 notrace __bpf_prog_enter(struct bpf_prog *prog) +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) __acquires(RCU) { rcu_read_lock(); migrate_disable(); + + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { inc_misses_counter(prog); return 0; @@ -593,29 +614,38 @@ static void notrace update_prog_stats(struct bpf_prog *prog, } } -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx) __releases(RCU) { + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + update_prog_stats(prog, start); __this_cpu_dec(*(prog->active)); migrate_enable(); rcu_read_unlock(); } -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx) { rcu_read_lock_trace(); migrate_disable(); might_fault(); + if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { inc_misses_counter(prog); return 0; } + + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + return bpf_prog_start_time(); } -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start) +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) { + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + update_prog_stats(prog, start); __this_cpu_dec(*(prog->active)); migrate_enable(); @@ -635,7 +665,7 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) int __weak arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_tramp_progs *tprogs, + struct bpf_tramp_links *tlinks, void *orig_call) { return -ENOTSUPP; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 813f6ee80419..aedac2ac02b9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -187,6 +187,9 @@ struct bpf_verifier_stack_elem { POISON_POINTER_DELTA)) #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV)) +static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); +static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); + static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON; @@ -259,6 +262,7 @@ struct bpf_call_arg_meta { u32 ret_btf_id; u32 subprogno; struct bpf_map_value_off_desc *kptr_off_desc; + u8 uninit_dynptr_regno; }; struct btf *btf_vmlinux; @@ -509,6 +513,7 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id) func_id == BPF_FUNC_skc_to_tcp_sock || func_id == BPF_FUNC_skc_to_tcp6_sock || func_id == BPF_FUNC_skc_to_udp6_sock || + func_id == BPF_FUNC_skc_to_mptcp_sock || func_id == BPF_FUNC_skc_to_tcp_timewait_sock || func_id == BPF_FUNC_skc_to_tcp_request_sock; } @@ -580,6 +585,7 @@ static char slot_type_char[] = { [STACK_SPILL] = 'r', [STACK_MISC] = 'm', [STACK_ZERO] = '0', + [STACK_DYNPTR] = 'd', }; static void print_liveness(struct bpf_verifier_env *env, @@ -595,6 +601,25 @@ static void print_liveness(struct bpf_verifier_env *env, verbose(env, "D"); } +static int get_spi(s32 off) +{ + return (-off - 1) / BPF_REG_SIZE; +} + +static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots) +{ + int allocated_slots = state->allocated_stack / BPF_REG_SIZE; + + /* We need to check that slots between [spi - nr_slots + 1, spi] are + * within [0, allocated_stack). + * + * Please note that the spi grows downwards. For example, a dynptr + * takes the size of two stack slots; the first slot will be at + * spi and the second slot will be at spi - 1. + */ + return spi - nr_slots + 1 >= 0 && spi < allocated_slots; +} + static struct bpf_func_state *func(struct bpf_verifier_env *env, const struct bpf_reg_state *reg) { @@ -646,6 +671,132 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env) env->scratched_stack_slots = ~0ULL; } +static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) +{ + switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { + case DYNPTR_TYPE_LOCAL: + return BPF_DYNPTR_TYPE_LOCAL; + case DYNPTR_TYPE_RINGBUF: + return BPF_DYNPTR_TYPE_RINGBUF; + default: + return BPF_DYNPTR_TYPE_INVALID; + } +} + +static bool dynptr_type_refcounted(enum bpf_dynptr_type type) +{ + return type == BPF_DYNPTR_TYPE_RINGBUF; +} + +static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + enum bpf_arg_type arg_type, int insn_idx) +{ + struct bpf_func_state *state = func(env, reg); + enum bpf_dynptr_type type; + int spi, i, id; + + spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) { + state->stack[spi].slot_type[i] = STACK_DYNPTR; + state->stack[spi - 1].slot_type[i] = STACK_DYNPTR; + } + + type = arg_to_dynptr_type(arg_type); + if (type == BPF_DYNPTR_TYPE_INVALID) + return -EINVAL; + + state->stack[spi].spilled_ptr.dynptr.first_slot = true; + state->stack[spi].spilled_ptr.dynptr.type = type; + state->stack[spi - 1].spilled_ptr.dynptr.type = type; + + if (dynptr_type_refcounted(type)) { + /* The id is used to track proper releasing */ + id = acquire_reference_state(env, insn_idx); + if (id < 0) + return id; + + state->stack[spi].spilled_ptr.id = id; + state->stack[spi - 1].spilled_ptr.id = id; + } + + return 0; +} + +static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi, i; + + spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) { + state->stack[spi].slot_type[i] = STACK_INVALID; + state->stack[spi - 1].slot_type[i] = STACK_INVALID; + } + + /* Invalidate any slices associated with this dynptr */ + if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { + release_reference(env, state->stack[spi].spilled_ptr.id); + state->stack[spi].spilled_ptr.id = 0; + state->stack[spi - 1].spilled_ptr.id = 0; + } + + state->stack[spi].spilled_ptr.dynptr.first_slot = false; + state->stack[spi].spilled_ptr.dynptr.type = 0; + state->stack[spi - 1].spilled_ptr.dynptr.type = 0; + + return 0; +} + +static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + int i; + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS)) + return true; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] == STACK_DYNPTR || + state->stack[spi - 1].slot_type[i] == STACK_DYNPTR) + return false; + } + + return true; +} + +static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, + enum bpf_arg_type arg_type) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + int i; + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || + !state->stack[spi].spilled_ptr.dynptr.first_slot) + return false; + + for (i = 0; i < BPF_REG_SIZE; i++) { + if (state->stack[spi].slot_type[i] != STACK_DYNPTR || + state->stack[spi - 1].slot_type[i] != STACK_DYNPTR) + return false; + } + + /* ARG_PTR_TO_DYNPTR takes any type of dynptr */ + if (arg_type == ARG_PTR_TO_DYNPTR) + return true; + + return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type); +} + /* The reg state of a pointer or a bounded scalar was saved when * it was spilled to the stack. */ @@ -1815,8 +1966,7 @@ void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab) kfree(tab); } -static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, - u32 func_id, s16 offset) +static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset) { if (offset) { if (offset < 0) { @@ -1891,7 +2041,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) prog_aux->kfunc_btf_tab = btf_tab; } - desc_btf = find_kfunc_desc_btf(env, func_id, offset); + desc_btf = find_kfunc_desc_btf(env, offset); if (IS_ERR(desc_btf)) { verbose(env, "failed to find BTF for kernel function\n"); return PTR_ERR(desc_btf); @@ -2360,7 +2510,7 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) return NULL; - desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off); + desc_btf = find_kfunc_desc_btf(data, insn->off); if (IS_ERR(desc_btf)) return "<error>"; @@ -5352,7 +5502,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, return -EINVAL; } if (!map_value_has_kptrs(map_ptr)) { - ret = PTR_ERR(map_ptr->kptr_off_tab); + ret = PTR_ERR_OR_ZERO(map_ptr->kptr_off_tab); if (ret == -E2BIG) verbose(env, "map '%s' has more than %d kptr\n", map_ptr->name, BPF_MAP_VALUE_OFF_MAX); @@ -5378,12 +5528,6 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, return 0; } -static bool arg_type_is_mem_ptr(enum bpf_arg_type type) -{ - return base_type(type) == ARG_PTR_TO_MEM || - base_type(type) == ARG_PTR_TO_UNINIT_MEM; -} - static bool arg_type_is_mem_size(enum bpf_arg_type type) { return type == ARG_CONST_SIZE || @@ -5406,6 +5550,11 @@ static bool arg_type_is_release(enum bpf_arg_type type) return type & OBJ_RELEASE; } +static bool arg_type_is_dynptr(enum bpf_arg_type type) +{ + return base_type(type) == ARG_PTR_TO_DYNPTR; +} + static int int_ptr_type_to_size(enum bpf_arg_type type) { if (type == ARG_PTR_TO_INT) @@ -5523,7 +5672,6 @@ static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } } static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, - [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types, [ARG_CONST_SIZE] = &scalar_types, [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, @@ -5537,7 +5685,6 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, [ARG_PTR_TO_MEM] = &mem_types, - [ARG_PTR_TO_UNINIT_MEM] = &mem_types, [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, @@ -5547,6 +5694,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_CONST_STR] = &const_str_ptr_types, [ARG_PTR_TO_TIMER] = &timer_types, [ARG_PTR_TO_KPTR] = &kptr_types, + [ARG_PTR_TO_DYNPTR] = &stack_ptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -5636,8 +5784,13 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, bool fixed_off_ok = false; switch ((u32)type) { - case SCALAR_VALUE: /* Pointer types where reg offset is explicitly allowed: */ + case PTR_TO_STACK: + if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) { + verbose(env, "cannot pass in dynptr at an offset\n"); + return -EINVAL; + } + fallthrough; case PTR_TO_PACKET: case PTR_TO_PACKET_META: case PTR_TO_MAP_KEY: @@ -5647,7 +5800,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, case PTR_TO_MEM | MEM_ALLOC: case PTR_TO_BUF: case PTR_TO_BUF | MEM_RDONLY: - case PTR_TO_STACK: + case SCALAR_VALUE: /* Some of the argument types nevertheless require a * zero register offset. */ @@ -5679,6 +5832,14 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env, return __check_ptr_off_reg(env, reg, regno, fixed_off_ok); } +static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + + return state->stack[spi].spilled_ptr.id; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn) @@ -5711,8 +5872,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, return -EACCES; } - if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || - base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { + if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) { err = resolve_map_arg_type(env, meta, &arg_type); if (err) return err; @@ -5734,7 +5894,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, skip_type_check: if (arg_type_is_release(arg_type)) { - if (!reg->ref_obj_id && !register_is_null(reg)) { + if (arg_type_is_dynptr(arg_type)) { + struct bpf_func_state *state = func(env, reg); + int spi = get_spi(reg->off); + + if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) || + !state->stack[spi].spilled_ptr.id) { + verbose(env, "arg %d is an unacquired reference\n", regno); + return -EINVAL; + } + } else if (!reg->ref_obj_id && !register_is_null(reg)) { verbose(env, "R%d must be referenced when passed to release function\n", regno); return -EINVAL; @@ -5798,8 +5967,7 @@ skip_type_check: err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, false, NULL); - } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || - base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { + } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) { if (type_may_be_null(arg_type) && register_is_null(reg)) return 0; @@ -5811,7 +5979,7 @@ skip_type_check: verbose(env, "invalid map_ptr to access map->value\n"); return -EACCES; } - meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE); + meta->raw_mode = arg_type & MEM_UNINIT; err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, false, meta); @@ -5838,15 +6006,49 @@ skip_type_check: return -EACCES; } else if (arg_type == ARG_PTR_TO_FUNC) { meta->subprogno = reg->subprogno; - } else if (arg_type_is_mem_ptr(arg_type)) { + } else if (base_type(arg_type) == ARG_PTR_TO_MEM) { /* The access to this pointer is only checked when we hit the * next is_mem_size argument below. */ - meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM); + meta->raw_mode = arg_type & MEM_UNINIT; } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta); + } else if (arg_type_is_dynptr(arg_type)) { + if (arg_type & MEM_UNINIT) { + if (!is_dynptr_reg_valid_uninit(env, reg)) { + verbose(env, "Dynptr has to be an uninitialized dynptr\n"); + return -EINVAL; + } + + /* We only support one dynptr being uninitialized at the moment, + * which is sufficient for the helper functions we have right now. + */ + if (meta->uninit_dynptr_regno) { + verbose(env, "verifier internal error: multiple uninitialized dynptr args\n"); + return -EFAULT; + } + + meta->uninit_dynptr_regno = regno; + } else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) { + const char *err_extra = ""; + + switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { + case DYNPTR_TYPE_LOCAL: + err_extra = "local "; + break; + case DYNPTR_TYPE_RINGBUF: + err_extra = "ringbuf "; + break; + default: + break; + } + + verbose(env, "Expected an initialized %sdynptr as arg #%d\n", + err_extra, arg + 1); + return -EINVAL; + } } else if (arg_type_is_alloc_size(arg_type)) { if (!tnum_is_const(reg->var_off)) { verbose(env, "R%d is not a known constant'\n", @@ -5968,7 +6170,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_RINGBUF: if (func_id != BPF_FUNC_ringbuf_output && func_id != BPF_FUNC_ringbuf_reserve && - func_id != BPF_FUNC_ringbuf_query) + func_id != BPF_FUNC_ringbuf_query && + func_id != BPF_FUNC_ringbuf_reserve_dynptr && + func_id != BPF_FUNC_ringbuf_submit_dynptr && + func_id != BPF_FUNC_ringbuf_discard_dynptr) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -6084,6 +6289,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_ringbuf_output: case BPF_FUNC_ringbuf_reserve: case BPF_FUNC_ringbuf_query: + case BPF_FUNC_ringbuf_reserve_dynptr: + case BPF_FUNC_ringbuf_submit_dynptr: + case BPF_FUNC_ringbuf_discard_dynptr: if (map->map_type != BPF_MAP_TYPE_RINGBUF) goto error; break; @@ -6138,6 +6346,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) goto error; break; + case BPF_FUNC_map_lookup_percpu_elem: + if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && + map->map_type != BPF_MAP_TYPE_PERCPU_HASH && + map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) @@ -6189,10 +6403,8 @@ static bool check_raw_mode_ok(const struct bpf_func_proto *fn) static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, enum bpf_arg_type arg_next) { - return (arg_type_is_mem_ptr(arg_curr) && - !arg_type_is_mem_size(arg_next)) || - (!arg_type_is_mem_ptr(arg_curr) && - arg_type_is_mem_size(arg_next)); + return (base_type(arg_curr) == ARG_PTR_TO_MEM) != + arg_type_is_mem_size(arg_next); } static bool check_arg_pair_ok(const struct bpf_func_proto *fn) @@ -6203,7 +6415,7 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) * helper function specification. */ if (arg_type_is_mem_size(fn->arg1_type) || - arg_type_is_mem_ptr(fn->arg5_type) || + base_type(fn->arg5_type) == ARG_PTR_TO_MEM || check_args_pair_invalid(fn->arg1_type, fn->arg2_type) || check_args_pair_invalid(fn->arg2_type, fn->arg3_type) || check_args_pair_invalid(fn->arg3_type, fn->arg4_type) || @@ -6751,7 +6963,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, func_id != BPF_FUNC_map_pop_elem && func_id != BPF_FUNC_map_peek_elem && func_id != BPF_FUNC_for_each_map_elem && - func_id != BPF_FUNC_redirect_map) + func_id != BPF_FUNC_redirect_map && + func_id != BPF_FUNC_map_lookup_percpu_elem) return 0; if (map == NULL) { @@ -6975,9 +7188,27 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs = cur_regs(env); + if (meta.uninit_dynptr_regno) { + /* we write BPF_DW bits (8 bytes) at a time */ + for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) { + err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno, + i, BPF_DW, BPF_WRITE, -1, false); + if (err) + return err; + } + + err = mark_stack_slots_dynptr(env, ®s[meta.uninit_dynptr_regno], + fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1], + insn_idx); + if (err) + return err; + } + if (meta.release_regno) { err = -EINVAL; - if (meta.ref_obj_id) + if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) + err = unmark_stack_slots_dynptr(env, ®s[meta.release_regno]); + else if (meta.ref_obj_id) err = release_reference(env, meta.ref_obj_id); /* meta.ref_obj_id can only be 0 if register that is meant to be * released is NULL, which must be > R0. @@ -7027,6 +7258,12 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_loop_callback_state); break; + case BPF_FUNC_dynptr_from_mem: + if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) { + verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n", + reg_type_str(env, regs[BPF_REG_1].type)); + return -EACCES; + } } if (err) @@ -7155,6 +7392,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn regs[BPF_REG_0].id = id; /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = id; + } else if (func_id == BPF_FUNC_dynptr_data) { + int dynptr_id = 0, i; + + /* Find the id of the dynptr we're acquiring a reference to */ + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { + if (arg_type_is_dynptr(fn->arg_type[i])) { + if (dynptr_id) { + verbose(env, "verifier internal error: multiple dynptr args in func\n"); + return -EFAULT; + } + dynptr_id = stack_slot_get_id(env, ®s[BPF_REG_1 + i]); + } + } + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = dynptr_id; } do_refine_retval_range(regs, fn->ret_type, func_id, &meta); @@ -7237,7 +7489,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (!insn->imm) return 0; - desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off); + desc_btf = find_kfunc_desc_btf(env, insn->off); if (IS_ERR(desc_btf)) return PTR_ERR(desc_btf); @@ -13811,7 +14063,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem || insn->imm == BPF_FUNC_redirect_map || - insn->imm == BPF_FUNC_for_each_map_elem)) { + insn->imm == BPF_FUNC_for_each_map_elem || + insn->imm == BPF_FUNC_map_lookup_percpu_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -13860,6 +14113,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) bpf_callback_t callback_fn, void *callback_ctx, u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem, + (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL)); patch_map_ops_generic: switch (insn->imm) { @@ -13887,6 +14142,9 @@ patch_map_ops_generic: case BPF_FUNC_for_each_map_elem: insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); continue; + case BPF_FUNC_map_lookup_percpu_elem: + insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem); + continue; } goto patch_call_imm; diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 79f2eb617a62..fbdf8d3279ac 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -29,6 +29,7 @@ #include <linux/compiler.h> #include <linux/module.h> #include <linux/kernel.h> +#include <linux/bsearch.h> /* * These will be re-linked against their real values @@ -228,7 +229,6 @@ unsigned long kallsyms_lookup_name(const char *name) return module_kallsyms_lookup_name(name); } -#ifdef CONFIG_LIVEPATCH /* * Iterate over all symbols in vmlinux. For symbols from modules use * module_kallsyms_on_each_symbol instead. @@ -251,7 +251,6 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, } return 0; } -#endif /* CONFIG_LIVEPATCH */ static unsigned long get_symbol_pos(unsigned long addr, unsigned long *symbolsize, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f15b826f9899..10b157a6d73e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1091,6 +1091,21 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = { .arg1_type = ARG_PTR_TO_CTX, }; +BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx) +{ + struct bpf_trace_run_ctx *run_ctx; + + run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx); + return run_ctx->bpf_cookie; +} + +static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = { + .func = bpf_get_attach_cookie_tracing, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags) { #ifndef CONFIG_X86 @@ -1182,6 +1197,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; + case BPF_FUNC_map_lookup_percpu_elem: + return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: @@ -1688,6 +1705,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skc_to_udp6_sock_proto; case BPF_FUNC_skc_to_unix_sock: return &bpf_skc_to_unix_sock_proto; + case BPF_FUNC_skc_to_mptcp_sock: + return &bpf_skc_to_mptcp_sock_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_tracing_proto; case BPF_FUNC_sk_storage_delete: @@ -1719,6 +1738,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL; case BPF_FUNC_get_func_arg_cnt: return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL; + case BPF_FUNC_get_attach_cookie: + return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL; default: fn = raw_tp_prog_func_proto(func_id, prog); if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) @@ -2229,6 +2250,59 @@ struct bpf_kprobe_multi_run_ctx { unsigned long entry_ip; }; +struct user_syms { + const char **syms; + char *buf; +}; + +static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt) +{ + unsigned long __user usymbol; + const char **syms = NULL; + char *buf = NULL, *p; + int err = -ENOMEM; + unsigned int i; + + syms = kvmalloc(cnt * sizeof(*syms), GFP_KERNEL); + if (!syms) + goto error; + + buf = kvmalloc(cnt * KSYM_NAME_LEN, GFP_KERNEL); + if (!buf) + goto error; + + for (p = buf, i = 0; i < cnt; i++) { + if (__get_user(usymbol, usyms + i)) { + err = -EFAULT; + goto error; + } + err = strncpy_from_user(p, (const char __user *) usymbol, KSYM_NAME_LEN); + if (err == KSYM_NAME_LEN) + err = -E2BIG; + if (err < 0) + goto error; + syms[i] = p; + p += err + 1; + } + + us->syms = syms; + us->buf = buf; + return 0; + +error: + if (err) { + kvfree(syms); + kvfree(buf); + } + return err; +} + +static void free_user_syms(struct user_syms *us) +{ + kvfree(us->syms); + kvfree(us->buf); +} + static void bpf_kprobe_multi_link_release(struct bpf_link *link) { struct bpf_kprobe_multi_link *kmulti_link; @@ -2349,53 +2423,12 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip, kprobe_multi_link_prog_run(link, entry_ip, regs); } -static int -kprobe_multi_resolve_syms(const void __user *usyms, u32 cnt, - unsigned long *addrs) +static int symbols_cmp(const void *a, const void *b) { - unsigned long addr, size; - const char __user **syms; - int err = -ENOMEM; - unsigned int i; - char *func; - - size = cnt * sizeof(*syms); - syms = kvzalloc(size, GFP_KERNEL); - if (!syms) - return -ENOMEM; + const char **str_a = (const char **) a; + const char **str_b = (const char **) b; - func = kmalloc(KSYM_NAME_LEN, GFP_KERNEL); - if (!func) - goto error; - - if (copy_from_user(syms, usyms, size)) { - err = -EFAULT; - goto error; - } - - for (i = 0; i < cnt; i++) { - err = strncpy_from_user(func, syms[i], KSYM_NAME_LEN); - if (err == KSYM_NAME_LEN) - err = -E2BIG; - if (err < 0) - goto error; - err = -EINVAL; - addr = kallsyms_lookup_name(func); - if (!addr) - goto error; - if (!kallsyms_lookup_size_offset(addr, &size, NULL)) - goto error; - addr = ftrace_location_range(addr, addr + size - 1); - if (!addr) - goto error; - addrs[i] = addr; - } - - err = 0; -error: - kvfree(syms); - kfree(func); - return err; + return strcmp(*str_a, *str_b); } int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) @@ -2441,7 +2474,15 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error; } } else { - err = kprobe_multi_resolve_syms(usyms, cnt, addrs); + struct user_syms us; + + err = copy_user_syms(&us, usyms, cnt); + if (err) + goto error; + + sort(us.syms, cnt, sizeof(*us.syms), symbols_cmp, NULL); + err = ftrace_lookup_symbols(us.syms, cnt, addrs); + free_user_syms(&us); if (err) goto error; } diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 89d9f994ebb0..aac63ca9c3d1 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -85,39 +85,31 @@ static void fprobe_exit_handler(struct rethook_node *rh, void *data, } NOKPROBE_SYMBOL(fprobe_exit_handler); +static int symbols_cmp(const void *a, const void *b) +{ + const char **str_a = (const char **) a; + const char **str_b = (const char **) b; + + return strcmp(*str_a, *str_b); +} + /* Convert ftrace location address from symbols */ static unsigned long *get_ftrace_locations(const char **syms, int num) { - unsigned long addr, size; unsigned long *addrs; - int i; /* Convert symbols to symbol address */ addrs = kcalloc(num, sizeof(*addrs), GFP_KERNEL); if (!addrs) return ERR_PTR(-ENOMEM); - for (i = 0; i < num; i++) { - addr = kallsyms_lookup_name(syms[i]); - if (!addr) /* Maybe wrong symbol */ - goto error; - - /* Convert symbol address to ftrace location. */ - if (!kallsyms_lookup_size_offset(addr, &size, NULL) || !size) - goto error; + /* ftrace_lookup_symbols expects sorted symbols */ + sort(syms, num, sizeof(*syms), symbols_cmp, NULL); - addr = ftrace_location_range(addr, addr + size - 1); - if (!addr) /* No dynamic ftrace there. */ - goto error; + if (!ftrace_lookup_symbols(syms, num, addrs)) + return addrs; - addrs[i] = addr; - } - - return addrs; - -error: kfree(addrs); - return ERR_PTR(-ENOENT); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index af899b058c8d..674add0aafb3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7964,3 +7964,65 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, mutex_unlock(&ftrace_lock); return ret; } + +static int symbols_cmp(const void *a, const void *b) +{ + const char **str_a = (const char **) a; + const char **str_b = (const char **) b; + + return strcmp(*str_a, *str_b); +} + +struct kallsyms_data { + unsigned long *addrs; + const char **syms; + size_t cnt; + size_t found; +}; + +static int kallsyms_callback(void *data, const char *name, + struct module *mod, unsigned long addr) +{ + struct kallsyms_data *args = data; + + if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp)) + return 0; + + addr = ftrace_location(addr); + if (!addr) + return 0; + + args->addrs[args->found++] = addr; + return args->found == args->cnt ? 1 : 0; +} + +/** + * ftrace_lookup_symbols - Lookup addresses for array of symbols + * + * @sorted_syms: array of symbols pointers symbols to resolve, + * must be alphabetically sorted + * @cnt: number of symbols/addresses in @syms/@addrs arrays + * @addrs: array for storing resulting addresses + * + * This function looks up addresses for array of symbols provided in + * @syms array (must be alphabetically sorted) and stores them in + * @addrs array, which needs to be big enough to store at least @cnt + * addresses. + * + * This function returns 0 if all provided symbols are found, + * -ESRCH otherwise. + */ +int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *addrs) +{ + struct kallsyms_data args; + int err; + + args.addrs = addrs; + args.syms = sorted_syms; + args.cnt = cnt; + args.found = 0; + err = kallsyms_on_each_symbol(kallsyms_callback, &args); + if (err < 0) + return err; + return args.found == args.cnt ? 0 : -ESRCH; +} diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index d0e54e30658a..e78dadfc5829 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -72,13 +72,16 @@ static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args) args->args[3], args->args[4]); } +extern const struct bpf_link_ops bpf_struct_ops_link_lops; + int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops; const struct btf_type *func_proto; struct bpf_dummy_ops_test_args *args; - struct bpf_tramp_progs *tprogs; + struct bpf_tramp_links *tlinks; + struct bpf_tramp_link *link = NULL; void *image = NULL; unsigned int op_idx; int prog_ret; @@ -92,8 +95,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, if (IS_ERR(args)) return PTR_ERR(args); - tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); - if (!tprogs) { + tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); + if (!tlinks) { err = -ENOMEM; goto out; } @@ -105,8 +108,17 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, } set_vm_flush_reset_perms(image); + link = kzalloc(sizeof(*link), GFP_USER); + if (!link) { + err = -ENOMEM; + goto out; + } + /* prog doesn't take the ownership of the reference from caller */ + bpf_prog_inc(prog); + bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog); + op_idx = prog->expected_attach_type; - err = bpf_struct_ops_prepare_trampoline(tprogs, prog, + err = bpf_struct_ops_prepare_trampoline(tlinks, link, &st_ops->func_models[op_idx], image, image + PAGE_SIZE); if (err < 0) @@ -124,7 +136,9 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, out: kfree(args); bpf_jit_free_exec(image); - kfree(tprogs); + if (link) + bpf_link_put(&link->link); + kfree(tlinks); return err; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 9b5a1f630bb0..56f059b3c242 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -565,31 +565,36 @@ struct prog_test_ref_kfunc { int b; struct prog_test_member memb; struct prog_test_ref_kfunc *next; + refcount_t cnt; }; static struct prog_test_ref_kfunc prog_test_struct = { .a = 42, .b = 108, .next = &prog_test_struct, + .cnt = REFCOUNT_INIT(1), }; noinline struct prog_test_ref_kfunc * bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) { - /* randomly return NULL */ - if (get_jiffies_64() % 2) - return NULL; + refcount_inc(&prog_test_struct.cnt); return &prog_test_struct; } noinline struct prog_test_member * bpf_kfunc_call_memb_acquire(void) { - return &prog_test_struct.memb; + WARN_ON_ONCE(1); + return NULL; } noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) { + if (!p) + return; + + refcount_dec(&p->cnt); } noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p) @@ -598,12 +603,18 @@ noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p) noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) { + WARN_ON_ONCE(1); } noinline struct prog_test_ref_kfunc * -bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) +bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b) { - return &prog_test_struct; + struct prog_test_ref_kfunc *p = READ_ONCE(*pp); + + if (!p) + return NULL; + refcount_inc(&p->cnt); + return p; } struct prog_test_pass1 { diff --git a/net/core/filter.c b/net/core/filter.c index b741b9f7e6a9..5af58eb48587 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -78,6 +78,7 @@ #include <linux/btf_ids.h> #include <net/tls.h> #include <net/xdp.h> +#include <net/mptcp.h> static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -4498,6 +4499,7 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key if (unlikely(size != sizeof(struct bpf_tunnel_key))) { err = -EINVAL; switch (size) { + case offsetof(struct bpf_tunnel_key, local_ipv6[0]): case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_ext): goto set_compat; @@ -4523,10 +4525,14 @@ set_compat: if (flags & BPF_F_TUNINFO_IPV6) { memcpy(to->remote_ipv6, &info->key.u.ipv6.src, sizeof(to->remote_ipv6)); + memcpy(to->local_ipv6, &info->key.u.ipv6.dst, + sizeof(to->local_ipv6)); to->tunnel_label = be32_to_cpu(info->key.label); } else { to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3); + to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst); + memset(&to->local_ipv6[1], 0, sizeof(__u32) * 3); to->tunnel_label = 0; } @@ -4597,6 +4603,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, return -EINVAL; if (unlikely(size != sizeof(struct bpf_tunnel_key))) { switch (size) { + case offsetof(struct bpf_tunnel_key, local_ipv6[0]): case offsetof(struct bpf_tunnel_key, tunnel_label): case offsetof(struct bpf_tunnel_key, tunnel_ext): case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): @@ -4639,10 +4646,13 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, info->mode |= IP_TUNNEL_INFO_IPV6; memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, sizeof(from->remote_ipv6)); + memcpy(&info->key.u.ipv6.src, from->local_ipv6, + sizeof(from->local_ipv6)); info->key.label = cpu_to_be32(from->tunnel_label) & IPV6_FLOWLABEL_MASK; } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); + info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4); } return 0; @@ -11272,6 +11282,20 @@ const struct bpf_func_proto bpf_skc_to_unix_sock_proto = { .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX], }; +BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk) +{ + BTF_TYPE_EMIT(struct mptcp_sock); + return (unsigned long)bpf_mptcp_sock_from_subflow(sk); +} + +const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = { + .func = bpf_skc_to_mptcp_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP], +}; + BPF_CALL_1(bpf_sock_from_file, struct file *, file) { return (unsigned long)sock_from_file(file); @@ -11314,6 +11338,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_skc_to_unix_sock: func = &bpf_skc_to_unix_sock_proto; break; + case BPF_FUNC_skc_to_mptcp_sock: + func = &bpf_skc_to_mptcp_sock_proto; + break; case BPF_FUNC_ktime_get_coarse_ns: return &bpf_ktime_get_coarse_ns_proto; default: diff --git a/net/core/skmsg.c b/net/core/skmsg.c index cc381165ea08..22b983ade0e7 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -524,16 +524,20 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, { int num_sge, copied; - /* skb linearize may fail with ENOMEM, but lets simply try again - * later if this happens. Under memory pressure we don't want to - * drop the skb. We need to linearize the skb so that the mapping - * in skb_to_sgvec can not error. - */ - if (skb_linearize(skb)) - return -EAGAIN; num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); - if (unlikely(num_sge < 0)) - return num_sge; + if (num_sge < 0) { + /* skb linearize may fail with ENOMEM, but lets simply try again + * later if this happens. Under memory pressure we don't want to + * drop the skb. We need to linearize the skb so that the mapping + * in skb_to_sgvec can not error. + */ + if (skb_linearize(skb)) + return -EAGAIN; + + num_sge = skb_to_sgvec(skb, msg->sg.data, off, len); + if (unlikely(num_sge < 0)) + return num_sge; + } copied = len; msg->sg.start = 0; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index ca8d38325e1e..71a13596ea2b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -264,6 +264,8 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, loff_t *ppos) { int ret, jit_enable = *(int *)table->data; + int min = *(int *)table->extra1; + int max = *(int *)table->extra2; struct ctl_table tmp = *table; if (write && !capable(CAP_SYS_ADMIN)) @@ -281,6 +283,10 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, ret = -EPERM; } } + + if (write && ret && min == max) + pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); + return ret; } diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index cb7f53f6ab22..6e7df47c9584 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -10,3 +10,5 @@ obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o mptcp_crypto_test-objs := crypto_test.o mptcp_token_test-objs := token_test.o obj-$(CONFIG_MPTCP_KUNIT_TEST) += mptcp_crypto_test.o mptcp_token_test.o + +obj-$(CONFIG_BPF_SYSCALL) += bpf.o diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c new file mode 100644 index 000000000000..5a0a84ad94af --- /dev/null +++ b/net/mptcp/bpf.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Multipath TCP + * + * Copyright (c) 2020, Tessares SA. + * Copyright (c) 2022, SUSE. + * + * Author: Nicolas Rybowski <nicolas.rybowski@tessares.net> + */ + +#define pr_fmt(fmt) "MPTCP: " fmt + +#include <linux/bpf.h> +#include "protocol.h" + +struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk) +{ + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk)) + return mptcp_sk(mptcp_subflow_ctx(sk)->conn); + + return NULL; +} diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 8fff5ad3444b..03e3d3529ac9 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -369,16 +369,15 @@ VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) ifeq ($(VMLINUX_H),) +ifeq ($(VMLINUX_BTF),) + $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\ + build the kernel or set VMLINUX_BTF or VMLINUX_H variable) +endif $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ else $(Q)cp "$(VMLINUX_H)" $@ endif -ifeq ($(VMLINUX_BTF),) - $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)",\ - build the kernel or set VMLINUX_BTF variable) -endif - clean-files += vmlinux.h # Get Clang's default includes on this system, as opposed to those seen by diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 05a24a712d7d..08f5331d2b00 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -17,7 +17,7 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n" #include <getopt.h> #include <net/if.h> #include <time.h> - +#include <limits.h> #include <arpa/inet.h> #include <linux/if_link.h> @@ -43,6 +43,9 @@ static struct bpf_map *rx_queue_index_map; #define EXIT_FAIL_BPF 4 #define EXIT_FAIL_MEM 5 +#define FAIL_MEM_SIG INT_MAX +#define FAIL_STAT_SIG (INT_MAX - 1) + static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, {"dev", required_argument, NULL, 'd' }, @@ -76,6 +79,12 @@ static void int_exit(int sig) printf("program on interface changed, not removing\n"); } } + + if (sig == FAIL_MEM_SIG) + exit(EXIT_FAIL_MEM); + else if (sig == FAIL_STAT_SIG) + exit(EXIT_FAIL); + exit(EXIT_OK); } @@ -140,7 +149,8 @@ static char* options2str(enum cfg_options_flags flag) if (flag & READ_MEM) return "read"; fprintf(stderr, "ERR: Unknown config option flags"); - exit(EXIT_FAIL); + int_exit(FAIL_STAT_SIG); + return "unknown"; } static void usage(char *argv[]) @@ -173,7 +183,7 @@ static __u64 gettime(void) res = clock_gettime(CLOCK_MONOTONIC, &t); if (res < 0) { fprintf(stderr, "Error with gettimeofday! (%i)\n", res); - exit(EXIT_FAIL); + int_exit(FAIL_STAT_SIG); } return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; } @@ -201,7 +211,7 @@ static struct datarec *alloc_record_per_cpu(void) array = calloc(nr_cpus, sizeof(struct datarec)); if (!array) { fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); - exit(EXIT_FAIL_MEM); + int_exit(FAIL_MEM_SIG); } return array; } @@ -214,7 +224,7 @@ static struct record *alloc_record_per_rxq(void) array = calloc(nr_rxqs, sizeof(struct record)); if (!array) { fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs); - exit(EXIT_FAIL_MEM); + int_exit(FAIL_MEM_SIG); } return array; } @@ -228,7 +238,7 @@ static struct stats_record *alloc_stats_record(void) rec = calloc(1, sizeof(struct stats_record)); if (!rec) { fprintf(stderr, "Mem alloc error\n"); - exit(EXIT_FAIL_MEM); + int_exit(FAIL_MEM_SIG); } rec->rxq = alloc_record_per_rxq(); for (i = 0; i < nr_rxqs; i++) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 096625242475..855b937e7585 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -633,6 +633,8 @@ class PrinterHelpers(Printer): 'struct socket', 'struct file', 'struct bpf_timer', + 'struct mptcp_sock', + 'struct bpf_dynptr', ] known_types = { '...', @@ -682,6 +684,8 @@ class PrinterHelpers(Printer): 'struct socket', 'struct file', 'struct bpf_timer', + 'struct mptcp_sock', + 'struct bpf_dynptr', } mapped_types = { 'u8': '__u8', diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index a2c665beda87..7e6accb9d9f7 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -459,6 +459,51 @@ done: return err; } +static const char sysfs_vmlinux[] = "/sys/kernel/btf/vmlinux"; + +static struct btf *get_vmlinux_btf_from_sysfs(void) +{ + struct btf *base; + + base = btf__parse(sysfs_vmlinux, NULL); + if (libbpf_get_error(base)) { + p_err("failed to parse vmlinux BTF at '%s': %ld\n", + sysfs_vmlinux, libbpf_get_error(base)); + base = NULL; + } + + return base; +} + +#define BTF_NAME_BUFF_LEN 64 + +static bool btf_is_kernel_module(__u32 btf_id) +{ + struct bpf_btf_info btf_info = {}; + char btf_name[BTF_NAME_BUFF_LEN]; + int btf_fd; + __u32 len; + int err; + + btf_fd = bpf_btf_get_fd_by_id(btf_id); + if (btf_fd < 0) { + p_err("can't get BTF object by id (%u): %s", btf_id, strerror(errno)); + return false; + } + + len = sizeof(btf_info); + btf_info.name = ptr_to_u64(btf_name); + btf_info.name_len = sizeof(btf_name); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + close(btf_fd); + if (err) { + p_err("can't get BTF (ID %u) object info: %s", btf_id, strerror(errno)); + return false; + } + + return btf_info.kernel_btf && strncmp(btf_name, "vmlinux", sizeof(btf_name)) != 0; +} + static int do_dump(int argc, char **argv) { struct btf *btf = NULL, *base = NULL; @@ -536,18 +581,11 @@ static int do_dump(int argc, char **argv) NEXT_ARG(); } else if (is_prefix(src, "file")) { const char sysfs_prefix[] = "/sys/kernel/btf/"; - const char sysfs_vmlinux[] = "/sys/kernel/btf/vmlinux"; if (!base_btf && strncmp(*argv, sysfs_prefix, sizeof(sysfs_prefix) - 1) == 0 && - strcmp(*argv, sysfs_vmlinux) != 0) { - base = btf__parse(sysfs_vmlinux, NULL); - if (libbpf_get_error(base)) { - p_err("failed to parse vmlinux BTF at '%s': %ld\n", - sysfs_vmlinux, libbpf_get_error(base)); - base = NULL; - } - } + strcmp(*argv, sysfs_vmlinux) != 0) + base = get_vmlinux_btf_from_sysfs(); btf = btf__parse_split(*argv, base ?: base_btf); err = libbpf_get_error(btf); @@ -591,6 +629,12 @@ static int do_dump(int argc, char **argv) } if (!btf) { + if (!base_btf && btf_is_kernel_module(btf_id)) { + p_info("Warning: valid base BTF was not specified with -B option, falling back to standard base BTF (%s)", + sysfs_vmlinux); + base_btf = get_vmlinux_btf_from_sysfs(); + } + btf = btf__load_from_kernel_by_id_split(btf_id, base_btf); err = libbpf_get_error(btf); if (err) { diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index be130e35462f..d12f46051aac 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -638,7 +638,7 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix, res = probe_map_type_ifindex(map_type, ifindex); } else { - res = libbpf_probe_bpf_map_type(map_type, NULL); + res = libbpf_probe_bpf_map_type(map_type, NULL) > 0; } /* Probe result depends on the success of map creation, no additional @@ -690,7 +690,7 @@ probe_helper_ifindex(enum bpf_func_id id, enum bpf_prog_type prog_type, return res; } -static void +static bool probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, const char *define_prefix, unsigned int id, const char *ptype_name, __u32 ifindex) @@ -701,7 +701,7 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, if (ifindex) res = probe_helper_ifindex(id, prog_type, ifindex); else - res = libbpf_probe_bpf_helper(prog_type, id, NULL); + res = libbpf_probe_bpf_helper(prog_type, id, NULL) > 0; #ifdef USE_LIBCAP /* Probe may succeed even if program load fails, for * unprivileged users check that we did not fail because of @@ -723,6 +723,8 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type, if (res) printf("\n\t- %s", helper_name[id]); } + + return res; } static void @@ -732,6 +734,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, const char *ptype_name = prog_type_name[prog_type]; char feat_name[128]; unsigned int id; + bool probe_res = false; if (ifindex) /* Only test helpers for offload-able program types */ @@ -764,7 +767,7 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, continue; /* fallthrough */ default: - probe_helper_for_progtype(prog_type, supported_type, + probe_res |= probe_helper_for_progtype(prog_type, supported_type, define_prefix, id, ptype_name, ifindex); } @@ -772,8 +775,17 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, if (json_output) jsonw_end_array(json_wtr); - else if (!define_prefix) + else if (!define_prefix) { printf("\n"); + if (!probe_res) { + if (!supported_type) + printf("\tProgram type not supported\n"); + else + printf("\tCould not determine which helpers are available\n"); + } + } + + } static void diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 7678af364793..4c9477ff748d 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -549,6 +549,7 @@ static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name) printf("\tint fd = skel_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name); break; case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: if (bpf_program__expected_attach_type(prog) == BPF_TRACE_ITER) printf("\tint fd = skel_link_create(prog_fd, 0, BPF_TRACE_ITER);\n"); else @@ -999,7 +1000,7 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\ - /* THIS FILE IS AUTOGENERATED! */ \n\ + /* THIS FILE IS AUTOGENERATED BY BPFTOOL! */ \n\ #ifndef %2$s \n\ #define %2$s \n\ \n\ @@ -1015,7 +1016,7 @@ static int do_skeleton(int argc, char **argv) \n\ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\ \n\ - /* THIS FILE IS AUTOGENERATED! */ \n\ + /* THIS FILE IS AUTOGENERATED BY BPFTOOL! */ \n\ #ifndef %2$s \n\ #define %2$s \n\ \n\ diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 8fb0116f9136..6353a789322b 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -23,6 +23,7 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_XDP] = "xdp", [BPF_LINK_TYPE_PERF_EVENT] = "perf_event", [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi", + [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", }; static struct hashmap *link_table; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 444fe6f1cf35..f4009dbdf62d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1013,6 +1013,7 @@ enum bpf_link_type { BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, BPF_LINK_TYPE_KPROBE_MULTI = 8, + BPF_LINK_TYPE_STRUCT_OPS = 9, MAX_BPF_LINK_TYPE, }; @@ -1489,6 +1490,15 @@ union bpf_attr { __aligned_u64 addrs; __aligned_u64 cookies; } kprobe_multi; + struct { + /* this is overlaid with the target_btf_id above. */ + __u32 target_btf_id; + /* black box user-provided value passed through + * to BPF program at the execution time and + * accessible through bpf_get_attach_cookie() BPF helper + */ + __u64 cookie; + } tracing; }; } link_create; @@ -5154,6 +5164,91 @@ union bpf_attr { * if not NULL, is a reference which must be released using its * corresponding release function, or moved into a BPF map before * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. + * + * struct mptcp_sock *bpf_skc_to_mptcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *mptcp_sock* pointer. + * Return + * *sk* if casting is valid, or **NULL** otherwise. + * + * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Get a dynptr to local memory *data*. + * + * *data* must be a ptr to a map value. + * The maximum *size* supported is DYNPTR_MAX_SIZE. + * *flags* is currently unused. + * Return + * 0 on success, -E2BIG if the size exceeds DYNPTR_MAX_SIZE, + * -EINVAL if flags is not 0. + * + * long bpf_ringbuf_reserve_dynptr(void *ringbuf, u32 size, u64 flags, struct bpf_dynptr *ptr) + * Description + * Reserve *size* bytes of payload in a ring buffer *ringbuf* + * through the dynptr interface. *flags* must be 0. + * + * Please note that a corresponding bpf_ringbuf_submit_dynptr or + * bpf_ringbuf_discard_dynptr must be called on *ptr*, even if the + * reservation fails. This is enforced by the verifier. + * Return + * 0 on success, or a negative error in case of failure. + * + * void bpf_ringbuf_submit_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Submit reserved ring buffer sample, pointed to by *data*, + * through the dynptr interface. This is a no-op if the dynptr is + * invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_submit'. + * Return + * Nothing. Always succeeds. + * + * void bpf_ringbuf_discard_dynptr(struct bpf_dynptr *ptr, u64 flags) + * Description + * Discard reserved ring buffer sample through the dynptr + * interface. This is a no-op if the dynptr is invalid/null. + * + * For more information on *flags*, please see + * 'bpf_ringbuf_discard'. + * Return + * Nothing. Always succeeds. + * + * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset) + * Description + * Read *len* bytes from *src* into *dst*, starting from *offset* + * into *src*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *src*'s data, -EINVAL if *src* is an invalid dynptr. + * + * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len) + * Description + * Write *len* bytes from *src* into *dst*, starting from *offset* + * into *dst*. + * Return + * 0 on success, -E2BIG if *offset* + *len* exceeds the length + * of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst* + * is a read-only dynptr. + * + * void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len) + * Description + * Get a pointer to the underlying dynptr data. + * + * *len* must be a statically known value. The returned data slice + * is invalidated whenever the dynptr is invalidated. + * Return + * Pointer to the underlying dynptr data, NULL if the dynptr is + * read-only, if the dynptr is invalid, or if the offset and length + * is out of bounds. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5351,6 +5446,15 @@ union bpf_attr { FN(skb_set_tstamp), \ FN(ima_file_hash), \ FN(kptr_xchg), \ + FN(map_lookup_percpu_elem), \ + FN(skc_to_mptcp_sock), \ + FN(dynptr_from_mem), \ + FN(ringbuf_reserve_dynptr), \ + FN(ringbuf_submit_dynptr), \ + FN(ringbuf_discard_dynptr), \ + FN(dynptr_read), \ + FN(dynptr_write), \ + FN(dynptr_data), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5604,6 +5708,10 @@ struct bpf_tunnel_key { __u8 tunnel_ttl; __u16 tunnel_ext; /* Padding, future use. */ __u32 tunnel_label; + union { + __u32 local_ipv4; + __u32 local_ipv6[4]; + }; }; /* user accessible mirror of in-kernel xfrm_state. @@ -6498,6 +6606,11 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr { + __u64 :64; + __u64 :64; +} __attribute__((aligned(8))); + struct bpf_sysctl { __u32 write; /* Sysctl is being read (= 0) or written (= 1). * Allows 1,2,4-byte read, but no write. diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 64741c55b8e3..a1265b152027 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -127,7 +127,7 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a9d292c106c2..240186aac8e6 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -208,86 +208,6 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err_errno(fd); } -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) -{ - LIBBPF_OPTS(bpf_map_create_opts, p); - - p.map_flags = create_attr->map_flags; - p.numa_node = create_attr->numa_node; - p.btf_fd = create_attr->btf_fd; - p.btf_key_type_id = create_attr->btf_key_type_id; - p.btf_value_type_id = create_attr->btf_value_type_id; - p.map_ifindex = create_attr->map_ifindex; - if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) - p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id; - else - p.inner_map_fd = create_attr->inner_map_fd; - - return bpf_map_create(create_attr->map_type, create_attr->name, - create_attr->key_size, create_attr->value_size, - create_attr->max_entries, &p); -} - -int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags, int node) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts); - - opts.map_flags = map_flags; - if (node >= 0) { - opts.numa_node = node; - opts.map_flags |= BPF_F_NUMA_NODE; - } - - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - - return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags, int node) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts); - - opts.inner_map_fd = inner_map_fd; - opts.map_flags = map_flags; - if (node >= 0) { - opts.map_flags |= BPF_F_NUMA_NODE; - opts.numa_node = node; - } - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); -} - -int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, - .inner_map_fd = inner_map_fd, - .map_flags = map_flags, - ); - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); -} - static void * alloc_zero_tailing_info(const void *orecord, __u32 cnt, __u32 actual_rec_size, __u32 expected_rec_size) @@ -639,6 +559,20 @@ int bpf_map_delete_elem(int fd, const void *key) return libbpf_err_errno(ret); } +int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags) +{ + union bpf_attr attr; + int ret; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.flags = flags; + + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + return libbpf_err_errno(ret); +} + int bpf_map_get_next_key(int fd, const void *key, void *next_key) { union bpf_attr attr; @@ -863,6 +797,14 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, kprobe_multi)) return libbpf_err(-EINVAL); break; + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + case BPF_LSM_MAC: + attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0); + if (!OPTS_ZEROED(opts, tracing)) + return libbpf_err(-EINVAL); + break; default: if (!OPTS_ZEROED(opts, flags)) return libbpf_err(-EINVAL); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index f4b4afb6d4ba..cabc03703e29 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -61,48 +61,6 @@ LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts); -struct bpf_create_map_attr { - const char *name; - enum bpf_map_type map_type; - __u32 map_flags; - __u32 key_size; - __u32 value_size; - __u32 max_entries; - __u32 numa_node; - __u32 btf_fd; - __u32 btf_key_type_id; - __u32 btf_value_type_id; - __u32 map_ifindex; - union { - __u32 inner_map_fd; - __u32 btf_vmlinux_value_type_id; - }; -}; - -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags); - struct bpf_prog_load_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -244,6 +202,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); +LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd); @@ -420,6 +379,9 @@ struct bpf_link_create_opts { const unsigned long *addrs; const __u64 *cookies; } kprobe_multi; + struct { + __u64 cookie; + } tracing; }; size_t :0; }; diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index e4aa9996a550..fd48b1ff59ca 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -110,21 +110,50 @@ enum bpf_enum_value_kind { val; \ }) +#define ___bpf_field_ref1(field) (field) +#define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field) +#define ___bpf_field_ref(args...) \ + ___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args) + /* * Convenience macro to check that field actually exists in target kernel's. * Returns: * 1, if matching field is present in target kernel; * 0, if no matching field found. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_exists(p->my_field); + * - field reference through type and field names: + * bpf_core_field_exists(struct my_type, my_field). */ -#define bpf_core_field_exists(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) +#define bpf_core_field_exists(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_EXISTS) /* * Convenience macro to get the byte size of a field. Works for integers, * struct/unions, pointers, arrays, and enums. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_size(p->my_field); + * - field reference through type and field names: + * bpf_core_field_size(struct my_type, my_field). + */ +#define bpf_core_field_size(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE) + +/* + * Convenience macro to get field's byte offset. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_offset(p->my_field); + * - field reference through type and field names: + * bpf_core_field_offset(struct my_type, my_field). */ -#define bpf_core_field_size(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) +#define bpf_core_field_offset(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_OFFSET) /* * Convenience macro to get BTF type ID of a specified type, using a local BTF diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 5de3eb267125..fb04eaf367f1 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -76,6 +76,30 @@ #endif /* + * Compiler (optimization) barrier. + */ +#ifndef barrier +#define barrier() asm volatile("" ::: "memory") +#endif + +/* Variable-specific compiler (optimization) barrier. It's a no-op which makes + * compiler believe that there is some black box modification of a given + * variable and thus prevents compiler from making extra assumption about its + * value and potential simplifications and optimizations on this variable. + * + * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of + * a variable, making some code patterns unverifiable. Putting barrier_var() + * in place will ensure that cast is performed before the barrier_var() + * invocation, because compiler has to pessimistically assume that embedded + * asm section might perform some extra operations on that variable. + * + * This is a variable-specific variant of more global barrier(). + */ +#ifndef barrier_var +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) +#endif + +/* * Helper macro to throw a compilation error if __bpf_unreachable() gets * built into the resulting code. This works given BPF back end does not * implement __builtin_trap(). This is useful to assert that certain paths @@ -149,13 +173,8 @@ enum libbpf_tristate { #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) -#if __has_attribute(btf_type_tag) #define __kptr __attribute__((btf_type_tag("kptr"))) #define __kptr_ref __attribute__((btf_type_tag("kptr_ref"))) -#else -#define __kptr -#define __kptr_ref -#endif #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 73a5192defb3..e89cc9c885b3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -357,6 +357,7 @@ enum libbpf_map_type { }; struct bpf_map { + struct bpf_object *obj; char *name; /* real_name is defined for special internal maps (.rodata*, * .data*, .bss, .kconfig) and preserves their original ELF section @@ -386,7 +387,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; - bool skipped; + bool autocreate; __u64 map_extra; }; @@ -1433,36 +1434,21 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) { - struct bpf_map *new_maps; - size_t new_cap; - int i; - - if (obj->nr_maps < obj->maps_cap) - return &obj->maps[obj->nr_maps++]; - - new_cap = max((size_t)4, obj->maps_cap * 3 / 2); - new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps)); - if (!new_maps) { - pr_warn("alloc maps for object failed\n"); - return ERR_PTR(-ENOMEM); - } + struct bpf_map *map; + int err; - obj->maps_cap = new_cap; - obj->maps = new_maps; + err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, + sizeof(*obj->maps), obj->nr_maps + 1); + if (err) + return ERR_PTR(err); - /* zero out new maps */ - memset(obj->maps + obj->nr_maps, 0, - (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps)); - /* - * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin) - * when failure (zclose won't close negative fd)). - */ - for (i = obj->nr_maps; i < obj->maps_cap; i++) { - obj->maps[i].fd = -1; - obj->maps[i].inner_map_fd = -1; - } + map = &obj->maps[obj->nr_maps++]; + map->obj = obj; + map->fd = -1; + map->inner_map_fd = -1; + map->autocreate = true; - return &obj->maps[obj->nr_maps++]; + return map; } static size_t bpf_map_mmap_sz(const struct bpf_map *map) @@ -4324,6 +4310,20 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) return 0; } +bool bpf_map__autocreate(const struct bpf_map *map) +{ + return map->autocreate; +} + +int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) +{ + if (map->obj->loaded) + return libbpf_err(-EBUSY); + + map->autocreate = autocreate; + return 0; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; @@ -4943,6 +4943,42 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); +static bool is_pow_of_2(size_t x) +{ + return x && (x & (x - 1)); +} + +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); @@ -4981,6 +5017,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } switch (def->type) { + case BPF_MAP_TYPE_RINGBUF: + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + /* fallthrough */ case BPF_MAP_TYPE_PERF_EVENT_ARRAY: case BPF_MAP_TYPE_CGROUP_ARRAY: case BPF_MAP_TYPE_STACK_TRACE: @@ -4994,7 +5033,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: - case BPF_MAP_TYPE_RINGBUF: create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -5180,9 +5218,11 @@ bpf_object__create_maps(struct bpf_object *obj) * bpf_object loading will succeed just fine even on old * kernels. */ - if (bpf_map__is_internal(map) && - !kernel_supports(obj, FEAT_GLOBAL_DATA)) { - map->skipped = true; + if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) + map->autocreate = false; + + if (!map->autocreate) { + pr_debug("map '%s': skipped auto-creating...\n", map->name); continue; } @@ -5805,6 +5845,36 @@ out: return err; } +/* base map load ldimm64 special constant, used also for log fixup logic */ +#define MAP_LDIMM64_POISON_BASE 2001000000 +#define MAP_LDIMM64_POISON_PFX "200100" + +static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, + int insn_idx, struct bpf_insn *insn, + int map_idx, const struct bpf_map *map) +{ + int i; + + pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", + prog->name, relo_idx, insn_idx, map_idx, map->name); + + /* we turn single ldimm64 into two identical invalid calls */ + for (i = 0; i < 2; i++) { + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with something like: + * invalid func unknown#2001000123 + * where lower 123 is map index into obj->maps[] array + */ + insn->imm = MAP_LDIMM64_POISON_BASE + map_idx; + + insn++; + } +} + /* Relocate data references within program code: * - map references; * - global variable references; @@ -5818,33 +5888,35 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + const struct bpf_map *map; struct extern_desc *ext; switch (relo->type) { case RELO_LD64: + map = &obj->maps[relo->map_idx]; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX; insn[0].imm = relo->map_idx; - } else { + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_FD; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_DATA: + map = &obj->maps[relo->map_idx]; insn[1].imm = insn[0].imm + relo->sym_off; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; - } else { - const struct bpf_map *map = &obj->maps[relo->map_idx]; - - if (map->skipped) { - pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", - prog->name, i); - return -ENOTSUP; - } + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_EXTERN_VAR: @@ -6682,17 +6754,32 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; - if (def & SEC_DEPRECATED) + if (def & SEC_DEPRECATED) { pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n", prog->sec_name); + } - if ((prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_LSM || - prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { + if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; const char *attach_name; - attach_name = strchr(prog->sec_name, '/') + 1; + attach_name = strchr(prog->sec_name, '/'); + if (!attach_name) { + /* if BPF program is annotated with just SEC("fentry") + * (or similar) without declaratively specifying + * target, then it is expected that target will be + * specified with bpf_program__set_attach_target() at + * runtime before BPF object load step. If not, then + * there is nothing to load into the kernel as BPF + * verifier won't be able to validate BPF program + * correctness anyways. + */ + pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", + prog->name); + return -EINVAL; + } + attach_name++; /* skip over / */ + err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); if (err) return err; @@ -6773,6 +6860,8 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog prog->name, err); return err; } + insns = prog->insns; + insns_cnt = prog->insns_cnt; } if (obj->gen_loader) { @@ -6784,7 +6873,7 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog } retry_load: - /* if log_level is zero, we don't request logs initiallly even if + /* if log_level is zero, we don't request logs initially even if * custom log_buf is specified; if the program load fails, then we'll * bump log_level to 1 and use either custom log_buf or we'll allocate * our own and retry the load to get details on what failed @@ -6947,7 +7036,7 @@ static void fixup_log_failed_core_relo(struct bpf_program *prog, const struct bpf_core_relo *relo; struct bpf_core_spec spec; char patch[512], spec_buf[256]; - int insn_idx, err; + int insn_idx, err, spec_len; if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) return; @@ -6960,11 +7049,44 @@ static void fixup_log_failed_core_relo(struct bpf_program *prog, if (err) return; - bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); + spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); snprintf(patch, sizeof(patch), "%d: <invalid CO-RE relocation>\n" - "failed to resolve CO-RE relocation %s\n", - insn_idx, spec_buf); + "failed to resolve CO-RE relocation %s%s\n", + insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_log_missing_map_load(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#2001000345 + * line2 -> invalid func unknown#2001000345 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. + * "345" in "2001000345" are map index in obj->maps to fetch map name. + */ + struct bpf_object *obj = prog->obj; + const struct bpf_map *map; + int insn_idx, map_idx; + char patch[128]; + + if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) + return; + + map_idx -= MAP_LDIMM64_POISON_BASE; + if (map_idx < 0 || map_idx >= obj->nr_maps) + return; + map = &obj->maps[map_idx]; + + snprintf(patch, sizeof(patch), + "%d: <invalid BPF map reference>\n" + "BPF map '%s' is referenced but wasn't created\n", + insn_idx, map->name); patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); } @@ -6997,6 +7119,14 @@ static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_s fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, prev_line, cur_line, next_line); return; + } else if (str_has_pfx(cur_line, "invalid func unknown#"MAP_LDIMM64_POISON_PFX)) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; } } } @@ -8185,7 +8315,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) char *pin_path = NULL; char buf[PATH_MAX]; - if (map->skipped) + if (!map->autocreate) continue; if (path) { @@ -8660,6 +8790,26 @@ size_t bpf_program__insn_cnt(const struct bpf_program *prog) return prog->insns_cnt; } +int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt) +{ + struct bpf_insn *insns; + + if (prog->obj->loaded) + return -EBUSY; + + insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", prog->name); + return -ENOMEM; + } + memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); + + prog->insns = insns; + prog->insns_cnt = new_insn_cnt; + return 0; +} + int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { @@ -8853,34 +9003,34 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), - SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), - SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), - SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED), SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fexit/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry.s/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fmod_ret.s/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fexit.s/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), - SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), - SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), - SEC_DEF("iter.s/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), + SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), + SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), + SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), @@ -9799,6 +9949,110 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) return libbpf_err_ptr(-ENOTSUP); } +static int validate_map_op(const struct bpf_map *map, size_t key_sz, + size_t value_sz, bool check_value_sz) +{ + if (map->fd <= 0) + return -ENOENT; + + if (map->def.key_size != key_sz) { + pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", + map->name, key_sz, map->def.key_size); + return -EINVAL; + } + + if (!check_value_sz) + return 0; + + switch (map->def.type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { + int num_cpu = libbpf_num_possible_cpus(); + size_t elem_sz = roundup(map->def.value_size, 8); + + if (value_sz != num_cpu * elem_sz) { + pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", + map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); + return -EINVAL; + } + break; + } + default: + if (map->def.value_size != value_sz) { + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", + map->name, value_sz, map->def.value_size); + return -EINVAL; + } + break; + } + return 0; +} + +int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_lookup_elem_flags(map->fd, key, value, flags); +} + +int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_update_elem(map->fd, key, value, flags); +} + +int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) + return libbpf_err(err); + + return bpf_map_delete_elem_flags(map->fd, key, flags); +} + +int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); +} + +int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) + return libbpf_err(err); + + return bpf_map_get_next_key(map->fd, cur_key, next_key); +} + long libbpf_get_error(const void *ptr) { if (!IS_ERR_OR_NULL(ptr)) @@ -10595,6 +10849,12 @@ static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf char *func; int n; + *link = NULL; + + /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ + if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) + return 0; + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); if (opts.retprobe) func_name = prog->sec_name + sizeof("kretprobe/") - 1; @@ -10625,6 +10885,13 @@ static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, stru char *pattern; int n; + *link = NULL; + + /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ + if (strcmp(prog->sec_name, "kprobe.multi") == 0 || + strcmp(prog->sec_name, "kretprobe.multi") == 0) + return 0; + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); if (opts.retprobe) spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; @@ -11325,6 +11592,12 @@ static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_lin { char *sec_name, *tp_cat, *tp_name; + *link = NULL; + + /* no auto-attach for SEC("tp") or SEC("tracepoint") */ + if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) + return 0; + sec_name = strdup(prog->sec_name); if (!sec_name) return -ENOMEM; @@ -11380,20 +11653,34 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { static const char *const prefixes[] = { - "raw_tp/", - "raw_tracepoint/", - "raw_tp.w/", - "raw_tracepoint.w/", + "raw_tp", + "raw_tracepoint", + "raw_tp.w", + "raw_tracepoint.w", }; size_t i; const char *tp_name = NULL; + *link = NULL; + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { - if (str_has_pfx(prog->sec_name, prefixes[i])) { - tp_name = prog->sec_name + strlen(prefixes[i]); - break; - } + size_t pfx_len; + + if (!str_has_pfx(prog->sec_name, prefixes[i])) + continue; + + pfx_len = strlen(prefixes[i]); + /* no auto-attach case of, e.g., SEC("raw_tp") */ + if (prog->sec_name[pfx_len] == '\0') + return 0; + + if (prog->sec_name[pfx_len] != '/') + continue; + + tp_name = prog->sec_name + pfx_len + 1; + break; } + if (!tp_name) { pr_warn("prog '%s': invalid section name '%s'\n", prog->name, prog->sec_name); @@ -11405,12 +11692,17 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf } /* Common logic for all BPF program types that attach to a btf_id */ -static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog) +static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) { + LIBBPF_OPTS(bpf_link_create_opts, link_opts); char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, pfd; + if (!OPTS_VALID(opts, bpf_trace_opts)) + return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); @@ -11423,7 +11715,8 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro link->detach = &bpf_link__detach_fd; /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ - pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), NULL); + link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); if (pfd < 0) { pfd = -errno; free(link); @@ -11437,12 +11730,18 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); +} + +struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) +{ + return bpf_program__attach_btf_id(prog, opts); } struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); } static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index cdbfee60ea3e..9e9a3fd3edd8 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -323,6 +323,24 @@ struct bpf_insn; * different. */ LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_insns()** can set BPF program's underlying + * BPF instructions. + * + * WARNING: This is a very advanced libbpf API and users need to know + * what they are doing. This should be used from prog_prepare_load_fn + * callback only. + * + * @param prog BPF program for which to return instructions + * @param new_insns a pointer to an array of BPF instructions + * @param new_insn_cnt number of `struct bpf_insn`'s that form + * specified BPF program + * @return 0, on success; negative error code, otherwise + */ +LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt); + /** * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s * that form specified BPF program. @@ -603,9 +621,21 @@ bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, LIBBPF_API struct bpf_link * bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, const char *tp_name); + +struct bpf_trace_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value fetchable through bpf_get_attach_cookie() */ + __u64 cookie; +}; +#define bpf_trace_opts__last_field cookie + LIBBPF_API struct bpf_link * bpf_program__attach_trace(const struct bpf_program *prog); LIBBPF_API struct bpf_link * +bpf_program__attach_trace_opts(const struct bpf_program *prog, const struct bpf_trace_opts *opts); + +LIBBPF_API struct bpf_link * bpf_program__attach_lsm(const struct bpf_program *prog); LIBBPF_API struct bpf_link * bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd); @@ -867,6 +897,28 @@ LIBBPF_API struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); /** + * @brief **bpf_map__set_autocreate()** sets whether libbpf has to auto-create + * BPF map during BPF object load phase. + * @param map the BPF map instance + * @param autocreate whether to create BPF map during BPF object load + * @return 0 on success; -EBUSY if BPF object was already loaded + * + * **bpf_map__set_autocreate()** allows to opt-out from libbpf auto-creating + * BPF map. By default, libbpf will attempt to create every single BPF map + * defined in BPF object file using BPF_MAP_CREATE command of bpf() syscall + * and fill in map FD in BPF instructions. + * + * This API allows to opt-out of this process for specific map instance. This + * can be useful if host kernel doesn't support such BPF map type or used + * combination of flags and user application wants to avoid creating such + * a map in the first place. User is still responsible to make sure that their + * BPF-side code that expects to use such missing BPF map is recognized by BPF + * verifier as dead code, otherwise BPF verifier will reject such BPF program. + */ +LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate); +LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); + +/** * @brief **bpf_map__fd()** gets the file descriptor of the passed * BPF map * @param map the BPF map instance @@ -939,6 +991,110 @@ LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); /** + * @brief **bpf_map__lookup_elem()** allows to lookup BPF map value + * corresponding to provided key. + * @param map BPF map to lookup element in + * @param key pointer to memory containing bytes of the key used for lookup + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory in which looked up value will be stored + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__lookup_elem()** is high-level equivalent of + * **bpf_map_lookup_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__update_elem()** allows to insert or update value in BPF + * map that corresponds to provided key. + * @param map BPF map to insert to or update element in + * @param key pointer to memory containing bytes of the key + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory containing bytes of the value + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__update_elem()** is high-level equivalent of + * **bpf_map_update_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__delete_elem()** allows to delete element in BPF map that + * corresponds to provided key. + * @param map BPF map to delete element from + * @param key pointer to memory containing bytes of the key + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__delete_elem()** is high-level equivalent of + * **bpf_map_delete_elem()** API with added check for key size. + */ +LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags); + +/** + * @brief **bpf_map__lookup_and_delete_elem()** allows to lookup BPF map value + * corresponding to provided key and atomically delete it afterwards. + * @param map BPF map to lookup element in + * @param key pointer to memory containing bytes of the key used for lookup + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory in which looked up value will be stored + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of + * **bpf_map_lookup_and_delete_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__get_next_key()** allows to iterate BPF map keys by + * fetching next key that follows current key. + * @param map BPF map to fetch next key from + * @param cur_key pointer to memory containing bytes of current key or NULL to + * fetch the first key + * @param next_key pointer to memory to write next key into + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @return 0, on success; -ENOENT if **cur_key** is the last key in BPF map; + * negative error, otherwise + * + * **bpf_map__get_next_key()** is high-level equivalent of + * **bpf_map_get_next_key()** API with added check for key size. + */ +LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz); + +/** * @brief **libbpf_get_error()** extracts the error code from the passed * pointer * @param ptr pointer returned from libbpf API function diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 82f6d62176dd..52973cffc20c 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -442,10 +442,24 @@ LIBBPF_0.7.0 { LIBBPF_0.8.0 { global: + bpf_map__autocreate; + bpf_map__get_next_key; + bpf_map__delete_elem; + bpf_map__lookup_and_delete_elem; + bpf_map__lookup_elem; + bpf_map__set_autocreate; + bpf_map__update_elem; + bpf_map_delete_elem_flags; bpf_object__destroy_subskeleton; bpf_object__open_subskeleton; + bpf_program__attach_kprobe_multi_opts; + bpf_program__attach_trace_opts; bpf_program__attach_usdt; + bpf_program__set_insns; libbpf_register_prog_handler; libbpf_unregister_prog_handler; - bpf_program__attach_kprobe_multi_opts; } LIBBPF_0.7.0; + +LIBBPF_1.0.0 { + local: *; +}; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 61f2039404b6..2fb2f4290080 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -3,7 +3,7 @@ #ifndef __LIBBPF_VERSION_H #define __LIBBPF_VERSION_H -#define LIBBPF_MAJOR_VERSION 0 -#define LIBBPF_MINOR_VERSION 8 +#define LIBBPF_MAJOR_VERSION 1 +#define LIBBPF_MINOR_VERSION 0 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index bafdc5373a13..2d3c8c8f558a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -75,7 +75,7 @@ TEST_PROGS := test_kmod.sh \ test_xsk.sh TEST_PROGS_EXTENDED := with_addr.sh \ - with_tunnels.sh \ + with_tunnels.sh ima_setup.sh \ test_xdp_vlan.sh test_bpftool.py # Compile but not part of 'make run_tests' @@ -168,13 +168,15 @@ $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ +# Filter out -static for liburandom_read.so and its dependent targets so that static builds +# do not fail. Static builds leave urandom_read relying on system-wide shared libraries. $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c $(call msg,LIB,,$@) - $(Q)$(CC) $(CFLAGS) -fPIC $(LDFLAGS) $^ $(LDLIBS) --shared -o $@ + $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.c,$^) \ + $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ liburandom_read.so $(LDLIBS) \ -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ @@ -423,11 +425,11 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) - $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$< - $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o) - $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o) - $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) - $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@ + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$< + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o) + $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o) + $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o)) diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index b1ede6f0b821..82a7c9de95f9 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -16,6 +16,10 @@ BPF_PROG(name, args) #define SOL_TCP 6 #endif +#ifndef TCP_CA_NAME_MAX +#define TCP_CA_NAME_MAX 16 +#endif + #define tcp_jiffies32 ((__u32)bpf_jiffies64()) struct sock_common { @@ -81,6 +85,7 @@ struct tcp_sock { __u32 lsndtime; __u32 prior_cwnd; __u64 tcp_mstamp; /* most recent packet received/sent */ + bool is_mptcp; } __attribute__((preserve_access_index)); static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) @@ -225,4 +230,12 @@ static __always_inline bool tcp_cc_eq(const char *a, const char *b) extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; +struct mptcp_sock { + struct inet_connection_sock sk; + + __u32 token; + struct sock *first; + char ca_name[TCP_CA_NAME_MAX]; +} __attribute__((preserve_access_index)); + #endif diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 763db63a3890..3b3edc0fc8a6 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -53,3 +53,7 @@ CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_NF_CONNTRACK=y CONFIG_USERFAULTFD=y +CONFIG_FPROBE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_MPTCP=y diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c new file mode 100644 index 000000000000..f472d28ad11a --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_maps.h> + +#define OUTER_MAP_ENTRIES 10 + +static __u32 get_map_id_from_fd(int map_fd) +{ + struct bpf_map_info map_info = {}; + uint32_t info_len = sizeof(map_info); + int ret; + + ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + CHECK(ret < 0, "Finding map info failed", "error:%s\n", + strerror(errno)); + + return map_info.id; +} + +/* This creates number of OUTER_MAP_ENTRIES maps that will be stored + * in outer map and return the created map_fds + */ +static void create_inner_maps(enum bpf_map_type map_type, + __u32 *inner_map_fds) +{ + int map_fd, map_index, ret; + __u32 map_key = 0, map_id; + char map_name[15]; + + for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) { + memset(map_name, 0, sizeof(map_name)); + sprintf(map_name, "inner_map_fd_%d", map_index); + map_fd = bpf_map_create(map_type, map_name, sizeof(__u32), + sizeof(__u32), 1, NULL); + CHECK(map_fd < 0, + "inner bpf_map_create() failed", + "map_type=(%d) map_name(%s), error:%s\n", + map_type, map_name, strerror(errno)); + + /* keep track of the inner map fd as it is required + * to add records in outer map + */ + inner_map_fds[map_index] = map_fd; + + /* Add entry into this created map + * eg: map1 key = 0, value = map1's map id + * map2 key = 0, value = map2's map id + */ + map_id = get_map_id_from_fd(map_fd); + ret = bpf_map_update_elem(map_fd, &map_key, &map_id, 0); + CHECK(ret != 0, + "bpf_map_update_elem failed", + "map_type=(%d) map_name(%s), error:%s\n", + map_type, map_name, strerror(errno)); + } +} + +static int create_outer_map(enum bpf_map_type map_type, __u32 inner_map_fd) +{ + int outer_map_fd; + LIBBPF_OPTS(bpf_map_create_opts, attr); + + attr.inner_map_fd = inner_map_fd; + outer_map_fd = bpf_map_create(map_type, "outer_map", sizeof(__u32), + sizeof(__u32), OUTER_MAP_ENTRIES, + &attr); + CHECK(outer_map_fd < 0, + "outer bpf_map_create()", + "map_type=(%d), error:%s\n", + map_type, strerror(errno)); + + return outer_map_fd; +} + +static void validate_fetch_results(int outer_map_fd, + __u32 *fetched_keys, __u32 *fetched_values, + __u32 max_entries_fetched) +{ + __u32 inner_map_key, inner_map_value; + int inner_map_fd, entry, err; + __u32 outer_map_value; + + for (entry = 0; entry < max_entries_fetched; ++entry) { + outer_map_value = fetched_values[entry]; + inner_map_fd = bpf_map_get_fd_by_id(outer_map_value); + CHECK(inner_map_fd < 0, + "Failed to get inner map fd", + "from id(%d), error=%s\n", + outer_map_value, strerror(errno)); + err = bpf_map_get_next_key(inner_map_fd, NULL, &inner_map_key); + CHECK(err != 0, + "Failed to get inner map key", + "error=%s\n", strerror(errno)); + + err = bpf_map_lookup_elem(inner_map_fd, &inner_map_key, + &inner_map_value); + + close(inner_map_fd); + + CHECK(err != 0, + "Failed to get inner map value", + "for key(%d), error=%s\n", + inner_map_key, strerror(errno)); + + /* Actual value validation */ + CHECK(outer_map_value != inner_map_value, + "Failed to validate inner map value", + "fetched(%d) and lookedup(%d)!\n", + outer_map_value, inner_map_value); + } +} + +static void fetch_and_validate(int outer_map_fd, + struct bpf_map_batch_opts *opts, + __u32 batch_size, bool delete_entries) +{ + __u32 *fetched_keys, *fetched_values, total_fetched = 0; + __u32 batch_key = 0, fetch_count, step_size; + int err, max_entries = OUTER_MAP_ENTRIES; + __u32 value_size = sizeof(__u32); + + /* Total entries needs to be fetched */ + fetched_keys = calloc(max_entries, value_size); + fetched_values = calloc(max_entries, value_size); + CHECK((!fetched_keys || !fetched_values), + "Memory allocation failed for fetched_keys or fetched_values", + "error=%s\n", strerror(errno)); + + for (step_size = batch_size; + step_size <= max_entries; + step_size += batch_size) { + fetch_count = step_size; + err = delete_entries + ? bpf_map_lookup_and_delete_batch(outer_map_fd, + total_fetched ? &batch_key : NULL, + &batch_key, + fetched_keys + total_fetched, + fetched_values + total_fetched, + &fetch_count, opts) + : bpf_map_lookup_batch(outer_map_fd, + total_fetched ? &batch_key : NULL, + &batch_key, + fetched_keys + total_fetched, + fetched_values + total_fetched, + &fetch_count, opts); + + if (err && errno == ENOSPC) { + /* Fetch again with higher batch size */ + total_fetched = 0; + continue; + } + + CHECK((err < 0 && (errno != ENOENT)), + "lookup with steps failed", + "error: %s\n", strerror(errno)); + + /* Update the total fetched number */ + total_fetched += fetch_count; + if (err) + break; + } + + CHECK((total_fetched != max_entries), + "Unable to fetch expected entries !", + "total_fetched(%d) and max_entries(%d) error: (%d):%s\n", + total_fetched, max_entries, errno, strerror(errno)); + + /* validate the fetched entries */ + validate_fetch_results(outer_map_fd, fetched_keys, + fetched_values, total_fetched); + printf("batch_op(%s) is successful with batch_size(%d)\n", + delete_entries ? "LOOKUP_AND_DELETE" : "LOOKUP", batch_size); + + free(fetched_keys); + free(fetched_values); +} + +static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, + enum bpf_map_type inner_map_type) +{ + __u32 *outer_map_keys, *inner_map_fds; + __u32 max_entries = OUTER_MAP_ENTRIES; + LIBBPF_OPTS(bpf_map_batch_opts, opts); + __u32 value_size = sizeof(__u32); + int batch_size[2] = {5, 10}; + __u32 map_index, op_index; + int outer_map_fd, ret; + + outer_map_keys = calloc(max_entries, value_size); + inner_map_fds = calloc(max_entries, value_size); + CHECK((!outer_map_keys || !inner_map_fds), + "Memory allocation failed for outer_map_keys or inner_map_fds", + "error=%s\n", strerror(errno)); + + create_inner_maps(inner_map_type, inner_map_fds); + + outer_map_fd = create_outer_map(outer_map_type, *inner_map_fds); + /* create outer map keys */ + for (map_index = 0; map_index < max_entries; map_index++) + outer_map_keys[map_index] = + ((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) + ? 9 : 1000) - map_index; + + /* batch operation - map_update */ + ret = bpf_map_update_batch(outer_map_fd, outer_map_keys, + inner_map_fds, &max_entries, &opts); + CHECK(ret != 0, + "Failed to update the outer map batch ops", + "error=%s\n", strerror(errno)); + + /* batch operation - map_lookup */ + for (op_index = 0; op_index < 2; ++op_index) + fetch_and_validate(outer_map_fd, &opts, + batch_size[op_index], false); + + /* batch operation - map_lookup_delete */ + if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + fetch_and_validate(outer_map_fd, &opts, + max_entries, true /*delete*/); + + /* close all map fds */ + for (map_index = 0; map_index < max_entries; map_index++) + close(inner_map_fds[map_index]); + close(outer_map_fd); + + free(inner_map_fds); + free(outer_map_keys); +} + +void test_map_in_map_batch_ops_array(void) +{ + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY); + printf("%s:PASS with inner ARRAY map\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH); + printf("%s:PASS with inner HASH map\n", __func__); +} + +void test_map_in_map_batch_ops_hash(void) +{ + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY); + printf("%s:PASS with inner ARRAY map\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH); + printf("%s:PASS with inner HASH map\n", __func__); +} diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 2bb1f9b3841d..59cf81ec55af 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -21,6 +21,10 @@ #include "network_helpers.h" #include "test_progs.h" +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + #define clean_errno() (errno == 0 ? "None" : strerror(errno)) #define log_err(MSG, ...) ({ \ int __save = errno; \ @@ -73,13 +77,13 @@ int settimeo(int fd, int timeout_ms) #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) -static int __start_server(int type, const struct sockaddr *addr, +static int __start_server(int type, int protocol, const struct sockaddr *addr, socklen_t addrlen, int timeout_ms, bool reuseport) { int on = 1; int fd; - fd = socket(addr->sa_family, type, 0); + fd = socket(addr->sa_family, type, protocol); if (fd < 0) { log_err("Failed to create server socket"); return -1; @@ -113,8 +117,8 @@ error_close: return -1; } -int start_server(int family, int type, const char *addr_str, __u16 port, - int timeout_ms) +static int start_server_proto(int family, int type, int protocol, + const char *addr_str, __u16 port, int timeout_ms) { struct sockaddr_storage addr; socklen_t addrlen; @@ -122,10 +126,23 @@ int start_server(int family, int type, const char *addr_str, __u16 port, if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) return -1; - return __start_server(type, (struct sockaddr *)&addr, + return __start_server(type, protocol, (struct sockaddr *)&addr, addrlen, timeout_ms, false); } +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) +{ + return start_server_proto(family, type, 0, addr_str, port, timeout_ms); +} + +int start_mptcp_server(int family, const char *addr_str, __u16 port, + int timeout_ms) +{ + return start_server_proto(family, SOCK_STREAM, IPPROTO_MPTCP, addr_str, + port, timeout_ms); +} + int *start_reuseport_server(int family, int type, const char *addr_str, __u16 port, int timeout_ms, unsigned int nr_listens) { @@ -144,7 +161,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str, if (!fds) return NULL; - fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen, + fds[0] = __start_server(type, 0, (struct sockaddr *)&addr, addrlen, timeout_ms, true); if (fds[0] == -1) goto close_fds; @@ -154,7 +171,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str, goto close_fds; for (; nr_fds < nr_listens; nr_fds++) { - fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr, + fds[nr_fds] = __start_server(type, 0, (struct sockaddr *)&addr, addrlen, timeout_ms, true); if (fds[nr_fds] == -1) goto close_fds; @@ -247,7 +264,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) struct sockaddr_storage addr; struct sockaddr_in *addr_in; socklen_t addrlen, optlen; - int fd, type; + int fd, type, protocol; if (!opts) opts = &default_opts; @@ -258,6 +275,11 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) return -1; } + if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { + log_err("getsockopt(SOL_PROTOCOL)"); + return -1; + } + addrlen = sizeof(addr); if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { log_err("Failed to get server addr"); @@ -265,7 +287,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) } addr_in = (struct sockaddr_in *)&addr; - fd = socket(addr_in->sin_family, type, 0); + fd = socket(addr_in->sin_family, type, protocol); if (fd < 0) { log_err("Failed to create client socket"); return -1; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index a4b3b2f9877b..f882c691b790 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -42,6 +42,8 @@ extern struct ipv6_packet pkt_v6; int settimeo(int fd, int timeout_ms); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); +int start_mptcp_server(int family, const char *addr, __u16 port, + int timeout_ms); int *start_reuseport_server(int family, int type, const char *addr_str, __u16 port, int timeout_ms, unsigned int nr_listens); diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index c0c6d410751d..08c0601b3e84 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -55,6 +55,7 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->bss, "check_bss")) goto cleanup; + /* manual-attach kprobe/kretprobe */ kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, false /* retprobe */, SYS_NANOSLEEP_KPROBE_NAME); @@ -69,6 +70,13 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_kretprobe = kretprobe_link; + /* auto-attachable kprobe and kretprobe */ + skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto); + ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto"); + + skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto); + ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto"); + if (!legacy) ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); @@ -157,7 +165,9 @@ void test_attach_probe(void) trigger_func2(); ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); + ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); + ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res"); ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res"); ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res"); ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 923a6139b2d8..83ef55e3caa4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -4,8 +4,11 @@ #include <pthread.h> #include <sched.h> #include <sys/syscall.h> +#include <sys/mman.h> #include <unistd.h> #include <test_progs.h> +#include <network_helpers.h> +#include <bpf/btf.h> #include "test_bpf_cookie.skel.h" #include "kprobe_multi.skel.h" @@ -410,6 +413,88 @@ cleanup: bpf_link__destroy(link); } +static void tracing_subtest(struct test_bpf_cookie *skel) +{ + __u64 cookie; + int prog_fd; + int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1; + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + + skel->bss->fentry_res = 0; + skel->bss->fexit_res = 0; + + cookie = 0x10000000000000L; + prog_fd = bpf_program__fd(skel->progs.fentry_test1); + link_opts.tracing.cookie = cookie; + fentry_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FENTRY, &link_opts); + if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create")) + goto cleanup; + + cookie = 0x20000000000000L; + prog_fd = bpf_program__fd(skel->progs.fexit_test1); + link_opts.tracing.cookie = cookie; + fexit_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FEXIT, &link_opts); + if (!ASSERT_GE(fexit_fd, 0, "fexit.link_create")) + goto cleanup; + + cookie = 0x30000000000000L; + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); + link_opts.tracing.cookie = cookie; + fmod_ret_fd = bpf_link_create(prog_fd, 0, BPF_MODIFY_RETURN, &link_opts); + if (!ASSERT_GE(fmod_ret_fd, 0, "fmod_ret.link_create")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.fentry_test1); + bpf_prog_test_run_opts(prog_fd, &opts); + + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); + bpf_prog_test_run_opts(prog_fd, &opts); + + ASSERT_EQ(skel->bss->fentry_res, 0x10000000000000L, "fentry_res"); + ASSERT_EQ(skel->bss->fexit_res, 0x20000000000000L, "fexit_res"); + ASSERT_EQ(skel->bss->fmod_ret_res, 0x30000000000000L, "fmod_ret_res"); + +cleanup: + if (fentry_fd >= 0) + close(fentry_fd); + if (fexit_fd >= 0) + close(fexit_fd); + if (fmod_ret_fd >= 0) + close(fmod_ret_fd); +} + +int stack_mprotect(void); + +static void lsm_subtest(struct test_bpf_cookie *skel) +{ + __u64 cookie; + int prog_fd; + int lsm_fd = -1; + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + + skel->bss->lsm_res = 0; + + cookie = 0x90000000000090L; + prog_fd = bpf_program__fd(skel->progs.test_int_hook); + link_opts.tracing.cookie = cookie; + lsm_fd = bpf_link_create(prog_fd, 0, BPF_LSM_MAC, &link_opts); + if (!ASSERT_GE(lsm_fd, 0, "lsm.link_create")) + goto cleanup; + + stack_mprotect(); + if (!ASSERT_EQ(errno, EPERM, "stack_mprotect")) + goto cleanup; + + usleep(1); + + ASSERT_EQ(skel->bss->lsm_res, 0x90000000000090L, "fentry_res"); + +cleanup: + if (lsm_fd >= 0) + close(lsm_fd); +} + void test_bpf_cookie(void) { struct test_bpf_cookie *skel; @@ -432,6 +517,10 @@ void test_bpf_cookie(void) tp_subtest(skel); if (test__start_subtest("perf_event")) pe_subtest(skel); + if (test__start_subtest("trampoline")) + tracing_subtest(skel); + if (test__start_subtest("lsm")) + lsm_subtest(skel); test_bpf_cookie__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 2c403ddc8076..7ff5fa93d056 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -26,6 +26,7 @@ #include "bpf_iter_bpf_sk_storage_map.skel.h" #include "bpf_iter_test_kern5.skel.h" #include "bpf_iter_test_kern6.skel.h" +#include "bpf_iter_bpf_link.skel.h" static int duration; @@ -34,8 +35,7 @@ static void test_btf_id_or_null(void) struct bpf_iter_test_kern3 *skel; skel = bpf_iter_test_kern3__open_and_load(); - if (CHECK(skel, "bpf_iter_test_kern3__open_and_load", - "skeleton open_and_load unexpectedly succeeded\n")) { + if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern3__open_and_load")) { bpf_iter_test_kern3__destroy(skel); return; } @@ -52,7 +52,7 @@ static void do_dummy_read(struct bpf_program *prog) return; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* not check contents, but ensure read() ends without error */ @@ -87,8 +87,7 @@ static void test_ipv6_route(void) struct bpf_iter_ipv6_route *skel; skel = bpf_iter_ipv6_route__open_and_load(); - if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_ipv6_route__open_and_load")) return; do_dummy_read(skel->progs.dump_ipv6_route); @@ -101,8 +100,7 @@ static void test_netlink(void) struct bpf_iter_netlink *skel; skel = bpf_iter_netlink__open_and_load(); - if (CHECK(!skel, "bpf_iter_netlink__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_netlink__open_and_load")) return; do_dummy_read(skel->progs.dump_netlink); @@ -115,8 +113,7 @@ static void test_bpf_map(void) struct bpf_iter_bpf_map *skel; skel = bpf_iter_bpf_map__open_and_load(); - if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_map__open_and_load")) return; do_dummy_read(skel->progs.dump_bpf_map); @@ -129,8 +126,7 @@ static void test_task(void) struct bpf_iter_task *skel; skel = bpf_iter_task__open_and_load(); - if (CHECK(!skel, "bpf_iter_task__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load")) return; do_dummy_read(skel->progs.dump_task); @@ -161,8 +157,7 @@ static void test_task_stack(void) struct bpf_iter_task_stack *skel; skel = bpf_iter_task_stack__open_and_load(); - if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_stack__open_and_load")) return; do_dummy_read(skel->progs.dump_task_stack); @@ -183,24 +178,22 @@ static void test_task_file(void) void *ret; skel = bpf_iter_task_file__open_and_load(); - if (CHECK(!skel, "bpf_iter_task_file__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_file__open_and_load")) return; skel->bss->tgid = getpid(); - if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL), - "pthread_create", "pthread_create failed\n")) + if (!ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing, NULL), + "pthread_create")) goto done; do_dummy_read(skel->progs.dump_task_file); - if (CHECK(pthread_join(thread_id, &ret) || ret != NULL, - "pthread_join", "pthread_join failed\n")) + if (!ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL, + "pthread_join")) goto done; - CHECK(skel->bss->count != 0, "check_count", - "invalid non pthread file visit count %d\n", skel->bss->count); + ASSERT_EQ(skel->bss->count, 0, "check_count"); done: bpf_iter_task_file__destroy(skel); @@ -224,7 +217,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) return ret; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ); @@ -238,9 +231,8 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno))) goto free_link; - CHECK(strstr(taskbuf, "(struct task_struct)") == NULL, - "check for btf representation of task_struct in iter data", - "struct task_struct not found"); + ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)", + "check for btf representation of task_struct in iter data"); free_link: if (iter_fd > 0) close(iter_fd); @@ -255,8 +247,7 @@ static void test_task_btf(void) int ret; skel = bpf_iter_task_btf__open_and_load(); - if (CHECK(!skel, "bpf_iter_task_btf__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_btf__open_and_load")) return; bss = skel->bss; @@ -265,12 +256,10 @@ static void test_task_btf(void) if (ret) goto cleanup; - if (CHECK(bss->tasks == 0, "check if iterated over tasks", - "no task iteration, did BPF program run?\n")) + if (!ASSERT_NEQ(bss->tasks, 0, "no task iteration, did BPF program run?")) goto cleanup; - CHECK(bss->seq_err != 0, "check for unexpected err", - "bpf_seq_printf_btf returned %ld", bss->seq_err); + ASSERT_EQ(bss->seq_err, 0, "check for unexpected err"); cleanup: bpf_iter_task_btf__destroy(skel); @@ -281,8 +270,7 @@ static void test_tcp4(void) struct bpf_iter_tcp4 *skel; skel = bpf_iter_tcp4__open_and_load(); - if (CHECK(!skel, "bpf_iter_tcp4__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp4__open_and_load")) return; do_dummy_read(skel->progs.dump_tcp4); @@ -295,8 +283,7 @@ static void test_tcp6(void) struct bpf_iter_tcp6 *skel; skel = bpf_iter_tcp6__open_and_load(); - if (CHECK(!skel, "bpf_iter_tcp6__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp6__open_and_load")) return; do_dummy_read(skel->progs.dump_tcp6); @@ -309,8 +296,7 @@ static void test_udp4(void) struct bpf_iter_udp4 *skel; skel = bpf_iter_udp4__open_and_load(); - if (CHECK(!skel, "bpf_iter_udp4__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_udp4__open_and_load")) return; do_dummy_read(skel->progs.dump_udp4); @@ -323,8 +309,7 @@ static void test_udp6(void) struct bpf_iter_udp6 *skel; skel = bpf_iter_udp6__open_and_load(); - if (CHECK(!skel, "bpf_iter_udp6__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_udp6__open_and_load")) return; do_dummy_read(skel->progs.dump_udp6); @@ -349,7 +334,7 @@ static void test_unix(void) static int do_read_with_fd(int iter_fd, const char *expected, bool read_one_char) { - int err = -1, len, read_buf_len, start; + int len, read_buf_len, start; char buf[16] = {}; read_buf_len = read_one_char ? 1 : 16; @@ -363,9 +348,7 @@ static int do_read_with_fd(int iter_fd, const char *expected, if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) return -1; - err = strcmp(buf, expected); - if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n", - buf, expected)) + if (!ASSERT_STREQ(buf, expected, "read")) return -1; return 0; @@ -378,19 +361,17 @@ static void test_anon_iter(bool read_one_char) int iter_fd, err; skel = bpf_iter_test_kern1__open_and_load(); - if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern1__open_and_load")) return; err = bpf_iter_test_kern1__attach(skel); - if (CHECK(err, "bpf_iter_test_kern1__attach", - "skeleton attach failed\n")) { + if (!ASSERT_OK(err, "bpf_iter_test_kern1__attach")) { goto out; } link = skel->links.dump_task; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto out; do_read_with_fd(iter_fd, "abcd", read_one_char); @@ -423,8 +404,7 @@ static void test_file_iter(void) int err; skel1 = bpf_iter_test_kern1__open_and_load(); - if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel1, "bpf_iter_test_kern1__open_and_load")) return; link = bpf_program__attach_iter(skel1->progs.dump_task, NULL); @@ -447,12 +427,11 @@ static void test_file_iter(void) * should change. */ skel2 = bpf_iter_test_kern2__open_and_load(); - if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel2, "bpf_iter_test_kern2__open_and_load")) goto unlink_path; err = bpf_link__update_program(link, skel2->progs.dump_task); - if (CHECK(err, "update_prog", "update_prog failed\n")) + if (!ASSERT_OK(err, "update_prog")) goto destroy_skel2; do_read(path, "ABCD"); @@ -478,8 +457,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) char *buf; skel = bpf_iter_test_kern4__open(); - if (CHECK(!skel, "bpf_iter_test_kern4__open", - "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern4__open")) return; /* create two maps: bpf program will only do bpf_seq_write @@ -515,8 +493,8 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) } skel->rodata->ret1 = ret1; - if (CHECK(bpf_iter_test_kern4__load(skel), - "bpf_iter_test_kern4__load", "skeleton load failed\n")) + if (!ASSERT_OK(bpf_iter_test_kern4__load(skel), + "bpf_iter_test_kern4__load")) goto free_map2; /* setup filtering map_id in bpf program */ @@ -538,7 +516,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) goto free_map2; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; buf = malloc(expected_read_len); @@ -574,22 +552,16 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) goto free_buf; } - if (CHECK(total_read_len != expected_read_len, "read", - "total len %u, expected len %u\n", total_read_len, - expected_read_len)) + if (!ASSERT_EQ(total_read_len, expected_read_len, "read")) goto free_buf; - if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed", - "expected 1 actual %d\n", skel->bss->map1_accessed)) + if (!ASSERT_EQ(skel->bss->map1_accessed, 1, "map1_accessed")) goto free_buf; - if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed", - "expected 2 actual %d\n", skel->bss->map2_accessed)) + if (!ASSERT_EQ(skel->bss->map2_accessed, 2, "map2_accessed")) goto free_buf; - CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2, - "map2_seqnum", "two different seqnum %lld %lld\n", - skel->bss->map2_seqnum1, skel->bss->map2_seqnum2); + ASSERT_EQ(skel->bss->map2_seqnum1, skel->bss->map2_seqnum2, "map2_seqnum"); free_buf: free(buf); @@ -622,15 +594,13 @@ static void test_bpf_hash_map(void) char buf[64]; skel = bpf_iter_bpf_hash_map__open(); - if (CHECK(!skel, "bpf_iter_bpf_hash_map__open", - "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_hash_map__open")) return; skel->bss->in_test_mode = true; err = bpf_iter_bpf_hash_map__load(skel); - if (CHECK(!skel, "bpf_iter_bpf_hash_map__load", - "skeleton load failed\n")) + if (!ASSERT_OK(err, "bpf_iter_bpf_hash_map__load")) goto out; /* iterator with hashmap2 and hashmap3 should fail */ @@ -659,7 +629,7 @@ static void test_bpf_hash_map(void) expected_val += val; err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -669,7 +639,7 @@ static void test_bpf_hash_map(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -679,17 +649,11 @@ static void test_bpf_hash_map(void) goto close_iter; /* test results */ - if (CHECK(skel->bss->key_sum_a != expected_key_a, - "key_sum_a", "got %u expected %u\n", - skel->bss->key_sum_a, expected_key_a)) + if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a")) goto close_iter; - if (CHECK(skel->bss->key_sum_b != expected_key_b, - "key_sum_b", "got %u expected %u\n", - skel->bss->key_sum_b, expected_key_b)) + if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b")) goto close_iter; - if (CHECK(skel->bss->val_sum != expected_val, - "val_sum", "got %llu expected %llu\n", - skel->bss->val_sum, expected_val)) + if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; close_iter: @@ -718,16 +682,14 @@ static void test_bpf_percpu_hash_map(void) void *val; skel = bpf_iter_bpf_percpu_hash_map__open(); - if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open", - "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__open")) return; skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); err = bpf_iter_bpf_percpu_hash_map__load(skel); - if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load", - "skeleton load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load")) goto out; /* update map values here */ @@ -745,7 +707,7 @@ static void test_bpf_percpu_hash_map(void) } err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -758,7 +720,7 @@ static void test_bpf_percpu_hash_map(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -768,17 +730,11 @@ static void test_bpf_percpu_hash_map(void) goto close_iter; /* test results */ - if (CHECK(skel->bss->key_sum_a != expected_key_a, - "key_sum_a", "got %u expected %u\n", - skel->bss->key_sum_a, expected_key_a)) + if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a")) goto close_iter; - if (CHECK(skel->bss->key_sum_b != expected_key_b, - "key_sum_b", "got %u expected %u\n", - skel->bss->key_sum_b, expected_key_b)) + if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b")) goto close_iter; - if (CHECK(skel->bss->val_sum != expected_val, - "val_sum", "got %u expected %u\n", - skel->bss->val_sum, expected_val)) + if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; close_iter: @@ -803,8 +759,7 @@ static void test_bpf_array_map(void) int len, start; skel = bpf_iter_bpf_array_map__open_and_load(); - if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load")) return; map_fd = bpf_map__fd(skel->maps.arraymap1); @@ -817,7 +772,7 @@ static void test_bpf_array_map(void) first_val = val; err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -830,7 +785,7 @@ static void test_bpf_array_map(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -850,21 +805,16 @@ static void test_bpf_array_map(void) res_first_key, res_first_val, first_val)) goto close_iter; - if (CHECK(skel->bss->key_sum != expected_key, - "key_sum", "got %u expected %u\n", - skel->bss->key_sum, expected_key)) + if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum")) goto close_iter; - if (CHECK(skel->bss->val_sum != expected_val, - "val_sum", "got %llu expected %llu\n", - skel->bss->val_sum, expected_val)) + if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { err = bpf_map_lookup_elem(map_fd, &i, &val); - if (CHECK(err, "map_lookup", "map_lookup failed\n")) + if (!ASSERT_OK(err, "map_lookup")) goto out; - if (CHECK(i != val, "invalid_val", - "got value %llu expected %u\n", val, i)) + if (!ASSERT_EQ(i, val, "invalid_val")) goto out; } @@ -889,16 +839,14 @@ static void test_bpf_percpu_array_map(void) int len; skel = bpf_iter_bpf_percpu_array_map__open(); - if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open", - "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__open")) return; skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); err = bpf_iter_bpf_percpu_array_map__load(skel); - if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load", - "skeleton load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load")) goto out; /* update map values here */ @@ -912,7 +860,7 @@ static void test_bpf_percpu_array_map(void) } err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -925,7 +873,7 @@ static void test_bpf_percpu_array_map(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -935,13 +883,9 @@ static void test_bpf_percpu_array_map(void) goto close_iter; /* test results */ - if (CHECK(skel->bss->key_sum != expected_key, - "key_sum", "got %u expected %u\n", - skel->bss->key_sum, expected_key)) + if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum")) goto close_iter; - if (CHECK(skel->bss->val_sum != expected_val, - "val_sum", "got %u expected %u\n", - skel->bss->val_sum, expected_val)) + if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; close_iter: @@ -966,17 +910,16 @@ static void test_bpf_sk_storage_delete(void) char buf[64]; skel = bpf_iter_bpf_sk_storage_helpers__open_and_load(); - if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load")) return; map_fd = bpf_map__fd(skel->maps.sk_stg_map); sock_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno)) + if (!ASSERT_GE(sock_fd, 0, "socket")) goto out; err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; memset(&linfo, 0, sizeof(linfo)); @@ -989,7 +932,7 @@ static void test_bpf_sk_storage_delete(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -1027,22 +970,21 @@ static void test_bpf_sk_storage_get(void) int sock_fd = -1; skel = bpf_iter_bpf_sk_storage_helpers__open_and_load(); - if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load")) return; sock_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno)) + if (!ASSERT_GE(sock_fd, 0, "socket")) goto out; err = listen(sock_fd, 1); - if (CHECK(err != 0, "listen", "errno: %d\n", errno)) + if (!ASSERT_OK(err, "listen")) goto close_socket; map_fd = bpf_map__fd(skel->maps.sk_stg_map); err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); - if (CHECK(err, "bpf_map_update_elem", "map_update_failed\n")) + if (!ASSERT_OK(err, "bpf_map_update_elem")) goto close_socket; do_dummy_read(skel->progs.fill_socket_owner); @@ -1078,15 +1020,14 @@ static void test_bpf_sk_storage_map(void) char buf[64]; skel = bpf_iter_bpf_sk_storage_map__open_and_load(); - if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_map__open_and_load")) return; map_fd = bpf_map__fd(skel->maps.sk_stg_map); num_sockets = ARRAY_SIZE(sock_fd); for (i = 0; i < num_sockets; i++) { sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno)) + if (!ASSERT_GE(sock_fd[i], 0, "socket")) goto out; val = i + 1; @@ -1094,7 +1035,7 @@ static void test_bpf_sk_storage_map(void) err = bpf_map_update_elem(map_fd, &sock_fd[i], &val, BPF_NOEXIST); - if (CHECK(err, "map_update", "map_update failed\n")) + if (!ASSERT_OK(err, "map_update")) goto out; } @@ -1107,7 +1048,7 @@ static void test_bpf_sk_storage_map(void) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto free_link; /* do some tests */ @@ -1117,14 +1058,10 @@ static void test_bpf_sk_storage_map(void) goto close_iter; /* test results */ - if (CHECK(skel->bss->ipv6_sk_count != num_sockets, - "ipv6_sk_count", "got %u expected %u\n", - skel->bss->ipv6_sk_count, num_sockets)) + if (!ASSERT_EQ(skel->bss->ipv6_sk_count, num_sockets, "ipv6_sk_count")) goto close_iter; - if (CHECK(skel->bss->val_sum != expected_val, - "val_sum", "got %u expected %u\n", - skel->bss->val_sum, expected_val)) + if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum")) goto close_iter; close_iter: @@ -1147,8 +1084,7 @@ static void test_rdonly_buf_out_of_bound(void) struct bpf_link *link; skel = bpf_iter_test_kern5__open_and_load(); - if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load", - "skeleton open_and_load failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern5__open_and_load")) return; memset(&linfo, 0, sizeof(linfo)); @@ -1167,11 +1103,23 @@ static void test_buf_neg_offset(void) struct bpf_iter_test_kern6 *skel; skel = bpf_iter_test_kern6__open_and_load(); - if (CHECK(skel, "bpf_iter_test_kern6__open_and_load", - "skeleton open_and_load unexpected success\n")) + if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern6__open_and_load")) bpf_iter_test_kern6__destroy(skel); } +static void test_link_iter(void) +{ + struct bpf_iter_bpf_link *skel; + + skel = bpf_iter_bpf_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_link__open_and_load")) + return; + + do_dummy_read(skel->progs.dump_bpf_link); + + bpf_iter_bpf_link__destroy(skel); +} + #define CMP_BUFFER_SIZE 1024 static char task_vma_output[CMP_BUFFER_SIZE]; static char proc_maps_output[CMP_BUFFER_SIZE]; @@ -1200,13 +1148,13 @@ static void test_task_vma(void) char maps_path[64]; skel = bpf_iter_task_vma__open(); - if (CHECK(!skel, "bpf_iter_task_vma__open", "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open")) return; skel->bss->pid = getpid(); err = bpf_iter_task_vma__load(skel); - if (CHECK(err, "bpf_iter_task_vma__load", "skeleton load failed\n")) + if (!ASSERT_OK(err, "bpf_iter_task_vma__load")) goto out; skel->links.proc_maps = bpf_program__attach_iter( @@ -1218,7 +1166,7 @@ static void test_task_vma(void) } iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps)); - if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + if (!ASSERT_GE(iter_fd, 0, "create_iter")) goto out; /* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks @@ -1230,7 +1178,7 @@ static void test_task_vma(void) MIN(read_size, CMP_BUFFER_SIZE - len)); if (!err) break; - if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n")) + if (!ASSERT_GE(err, 0, "read_iter_fd")) goto out; len += err; } @@ -1238,18 +1186,17 @@ static void test_task_vma(void) /* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */ snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid); proc_maps_fd = open(maps_path, O_RDONLY); - if (CHECK(proc_maps_fd < 0, "open_proc_maps", "open_proc_maps failed\n")) + if (!ASSERT_GE(proc_maps_fd, 0, "open_proc_maps")) goto out; err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE); - if (CHECK(err < 0, "read_prog_maps_fd", "read_prog_maps_fd failed\n")) + if (!ASSERT_GE(err, 0, "read_prog_maps_fd")) goto out; /* strip and compare the first line of the two files */ str_strip_first_line(task_vma_output); str_strip_first_line(proc_maps_output); - CHECK(strcmp(task_vma_output, proc_maps_output), "compare_output", - "found mismatch\n"); + ASSERT_STREQ(task_vma_output, proc_maps_output, "compare_output"); out: close(proc_maps_fd); close(iter_fd); @@ -1318,4 +1265,6 @@ void test_bpf_iter(void) test_rdonly_buf_out_of_bound(); if (test__start_subtest("buf-neg-offset")) test_buf_neg_offset(); + if (test__start_subtest("link-iter")) + test_link_iter(); } diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c index 1dfe14ff6aa4..f2ce4fd1cdae 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -167,7 +167,7 @@ void test_core_autosize(void) if (!ASSERT_OK_PTR(bss_map, "bss_map_find")) goto cleanup; - err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out); + err = bpf_map__lookup_elem(bss_map, &zero, sizeof(zero), &out, sizeof(out), 0); if (!ASSERT_OK(err, "bss_lookup")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index f28f75aa9154..3712dfe1be59 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -277,13 +277,21 @@ static int duration = 0; #define SIZE_OUTPUT_DATA(type) \ STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \ .int_sz = sizeof(((type *)0)->int_field), \ + .int_off = offsetof(type, int_field), \ .struct_sz = sizeof(((type *)0)->struct_field), \ + .struct_off = offsetof(type, struct_field), \ .union_sz = sizeof(((type *)0)->union_field), \ + .union_off = offsetof(type, union_field), \ .arr_sz = sizeof(((type *)0)->arr_field), \ - .arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \ + .arr_off = offsetof(type, arr_field), \ + .arr_elem_sz = sizeof(((type *)0)->arr_field[1]), \ + .arr_elem_off = offsetof(type, arr_field[1]), \ .ptr_sz = 8, /* always 8-byte pointer for BPF */ \ + .ptr_off = offsetof(type, ptr_field), \ .enum_sz = sizeof(((type *)0)->enum_field), \ + .enum_off = offsetof(type, enum_field), \ .float_sz = sizeof(((type *)0)->float_field), \ + .float_off = offsetof(type, float_field), \ } #define SIZE_CASE(name) { \ @@ -714,9 +722,10 @@ static const struct core_reloc_test_case test_cases[] = { }), BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield), - /* size relocation checks */ + /* field size and offset relocation checks */ SIZE_CASE(size), SIZE_CASE(size___diff_sz), + SIZE_CASE(size___diff_offs), SIZE_ERR_CASE(size___err_ambiguous), /* validate type existence and size relocations */ diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c index 6acb0e94d4d7..4a2c256c8db6 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_retro.c +++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c @@ -6,31 +6,32 @@ void test_core_retro(void) { - int err, zero = 0, res, duration = 0, my_pid = getpid(); + int err, zero = 0, res, my_pid = getpid(); struct test_core_retro *skel; /* load program */ skel = test_core_retro__open_and_load(); - if (CHECK(!skel, "skel_load", "skeleton open/load failed\n")) + if (!ASSERT_OK_PTR(skel, "skel_load")) goto out_close; - err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0); - if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno)) + err = bpf_map__update_elem(skel->maps.exp_tgid_map, &zero, sizeof(zero), + &my_pid, sizeof(my_pid), 0); + if (!ASSERT_OK(err, "map_update")) goto out_close; /* attach probe */ err = test_core_retro__attach(skel); - if (CHECK(err, "attach_kprobe", "err %d\n", err)) + if (!ASSERT_OK(err, "attach_kprobe")) goto out_close; /* trigger */ usleep(1); - err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.results), &zero, &res); - if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno)) + err = bpf_map__lookup_elem(skel->maps.results, &zero, sizeof(zero), &res, sizeof(res), 0); + if (!ASSERT_OK(err, "map_lookup")) goto out_close; - CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid); + ASSERT_EQ(res, my_pid, "pid_check"); out_close: test_core_retro__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c new file mode 100644 index 000000000000..3c7aa82b98e2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include <test_progs.h> +#include "dynptr_fail.skel.h" +#include "dynptr_success.skel.h" + +static size_t log_buf_sz = 1048576; /* 1 MB */ +static char obj_log_buf[1048576]; + +static struct { + const char *prog_name; + const char *expected_err_msg; +} dynptr_tests[] = { + /* failure cases */ + {"ringbuf_missing_release1", "Unreleased reference id=1"}, + {"ringbuf_missing_release2", "Unreleased reference id=2"}, + {"ringbuf_missing_release_callback", "Unreleased reference id"}, + {"use_after_invalid", "Expected an initialized dynptr as arg #3"}, + {"ringbuf_invalid_api", "type=mem expected=alloc_mem"}, + {"add_dynptr_to_map1", "invalid indirect read from stack"}, + {"add_dynptr_to_map2", "invalid indirect read from stack"}, + {"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"}, + {"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"}, + {"data_slice_use_after_release", "invalid mem access 'scalar'"}, + {"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"}, + {"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"}, + {"invalid_helper1", "invalid indirect read from stack"}, + {"invalid_helper2", "Expected an initialized dynptr as arg #3"}, + {"invalid_write1", "Expected an initialized dynptr as arg #1"}, + {"invalid_write2", "Expected an initialized dynptr as arg #3"}, + {"invalid_write3", "Expected an initialized ringbuf dynptr as arg #1"}, + {"invalid_write4", "arg 1 is an unacquired reference"}, + {"invalid_read1", "invalid read from stack"}, + {"invalid_read2", "cannot pass in dynptr at an offset"}, + {"invalid_read3", "invalid read from stack"}, + {"invalid_read4", "invalid read from stack"}, + {"invalid_offset", "invalid write to stack"}, + {"global", "type=map_value expected=fp"}, + {"release_twice", "arg 1 is an unacquired reference"}, + {"release_twice_callback", "arg 1 is an unacquired reference"}, + {"dynptr_from_mem_invalid_api", + "Unsupported reg type fp for bpf_dynptr_from_mem data"}, + + /* success cases */ + {"test_read_write", NULL}, + {"test_data_slice", NULL}, + {"test_ringbuf", NULL}, +}; + +static void verify_fail(const char *prog_name, const char *expected_err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct bpf_program *prog; + struct dynptr_fail *skel; + int err; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 1; + + skel = dynptr_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + bpf_program__set_autoload(prog, true); + + bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); + + err = dynptr_fail__load(skel); + if (!ASSERT_ERR(err, "unexpected load success")) + goto cleanup; + + if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) { + fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg); + fprintf(stderr, "Verifier output: %s\n", obj_log_buf); + } + +cleanup: + dynptr_fail__destroy(skel); +} + +static void verify_success(const char *prog_name) +{ + struct dynptr_success *skel; + struct bpf_program *prog; + struct bpf_link *link; + + skel = dynptr_success__open(); + if (!ASSERT_OK_PTR(skel, "dynptr_success__open")) + return; + + skel->bss->pid = getpid(); + + bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize()); + + dynptr_success__load(skel); + if (!ASSERT_OK_PTR(skel, "dynptr_success__load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + goto cleanup; + + usleep(1); + + ASSERT_EQ(skel->bss->err, 0, "err"); + + bpf_link__destroy(link); + +cleanup: + dynptr_success__destroy(skel); +} + +void test_dynptr(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dynptr_tests); i++) { + if (!test__start_subtest(dynptr_tests[i].prog_name)) + continue; + + if (dynptr_tests[i].expected_err_msg) + verify_fail(dynptr_tests[i].prog_name, + dynptr_tests[i].expected_err_msg); + else + verify_success(dynptr_tests[i].prog_name); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index fe1f0f26ea14..a7e74297f15f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -5,7 +5,7 @@ /* that's kernel internal BPF_MAX_TRAMP_PROGS define */ #define CNT 38 -void test_fexit_stress(void) +void serial_test_fexit_stress(void) { char test_skb[128] = {}; int fexit_fd[CNT] = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 754e80937e5d..8963f8a549f2 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -10,9 +10,10 @@ static unsigned int duration; static void test_hash_map(void) { - int i, err, hashmap_fd, max_entries, percpu_map_fd; + int i, err, max_entries; struct for_each_hash_map_elem *skel; __u64 *percpu_valbuf = NULL; + size_t percpu_val_sz; __u32 key, num_cpus; __u64 val; LIBBPF_OPTS(bpf_test_run_opts, topts, @@ -25,26 +26,27 @@ static void test_hash_map(void) if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load")) return; - hashmap_fd = bpf_map__fd(skel->maps.hashmap); max_entries = bpf_map__max_entries(skel->maps.hashmap); for (i = 0; i < max_entries; i++) { key = i; val = i + 1; - err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY); + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); if (!ASSERT_OK(err, "map_update")) goto out; } num_cpus = bpf_num_possible_cpus(); - percpu_map_fd = bpf_map__fd(skel->maps.percpu_map); - percpu_valbuf = malloc(sizeof(__u64) * num_cpus); + percpu_val_sz = sizeof(__u64) * num_cpus; + percpu_valbuf = malloc(percpu_val_sz); if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf")) goto out; key = 1; for (i = 0; i < num_cpus; i++) percpu_valbuf[i] = i + 1; - err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY); + err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key), + percpu_valbuf, percpu_val_sz, BPF_ANY); if (!ASSERT_OK(err, "percpu_map_update")) goto out; @@ -58,7 +60,7 @@ static void test_hash_map(void) ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems"); key = 1; - err = bpf_map_lookup_elem(hashmap_fd, &key, &val); + err = bpf_map__lookup_elem(skel->maps.hashmap, &key, sizeof(key), &val, sizeof(val), 0); ASSERT_ERR(err, "hashmap_lookup"); ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called"); @@ -75,9 +77,10 @@ out: static void test_array_map(void) { __u32 key, num_cpus, max_entries; - int i, arraymap_fd, percpu_map_fd, err; + int i, err; struct for_each_array_map_elem *skel; __u64 *percpu_valbuf = NULL; + size_t percpu_val_sz; __u64 val, expected_total; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, @@ -89,7 +92,6 @@ static void test_array_map(void) if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load")) return; - arraymap_fd = bpf_map__fd(skel->maps.arraymap); expected_total = 0; max_entries = bpf_map__max_entries(skel->maps.arraymap); for (i = 0; i < max_entries; i++) { @@ -98,21 +100,23 @@ static void test_array_map(void) /* skip the last iteration for expected total */ if (i != max_entries - 1) expected_total += val; - err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY); + err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); if (!ASSERT_OK(err, "map_update")) goto out; } num_cpus = bpf_num_possible_cpus(); - percpu_map_fd = bpf_map__fd(skel->maps.percpu_map); - percpu_valbuf = malloc(sizeof(__u64) * num_cpus); + percpu_val_sz = sizeof(__u64) * num_cpus; + percpu_valbuf = malloc(percpu_val_sz); if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf")) goto out; key = 0; for (i = 0; i < num_cpus; i++) percpu_valbuf[i] = i + 1; - err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY); + err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key), + percpu_valbuf, percpu_val_sz, BPF_ANY); if (!ASSERT_OK(err, "percpu_map_update")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index b9876b55fc0c..586dc52d6fb9 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -2,6 +2,9 @@ #include <test_progs.h> #include "kprobe_multi.skel.h" #include "trace_helpers.h" +#include "kprobe_multi_empty.skel.h" +#include "bpf/libbpf_internal.h" +#include "bpf/hashmap.h" static void kprobe_multi_test_run(struct kprobe_multi *skel, bool test_return) { @@ -140,14 +143,14 @@ test_attach_api(const char *pattern, struct bpf_kprobe_multi_opts *opts) goto cleanup; skel->bss->pid = getpid(); - link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, pattern, opts); if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts")) goto cleanup; if (opts) { opts->retprobe = true; - link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe, + link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe_manual, pattern, opts); if (!ASSERT_OK_PTR(link2, "bpf_program__attach_kprobe_multi_opts")) goto cleanup; @@ -232,7 +235,7 @@ static void test_attach_api_fails(void) skel->bss->pid = getpid(); /* fail_1 - pattern and opts NULL */ - link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, NULL, NULL); if (!ASSERT_ERR_PTR(link, "fail_1")) goto cleanup; @@ -246,7 +249,7 @@ static void test_attach_api_fails(void) opts.cnt = ARRAY_SIZE(syms); opts.cookies = NULL; - link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, NULL, &opts); if (!ASSERT_ERR_PTR(link, "fail_2")) goto cleanup; @@ -260,7 +263,7 @@ static void test_attach_api_fails(void) opts.cnt = ARRAY_SIZE(syms); opts.cookies = NULL; - link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); if (!ASSERT_ERR_PTR(link, "fail_3")) goto cleanup; @@ -274,7 +277,7 @@ static void test_attach_api_fails(void) opts.cnt = ARRAY_SIZE(syms); opts.cookies = NULL; - link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); if (!ASSERT_ERR_PTR(link, "fail_4")) goto cleanup; @@ -288,7 +291,7 @@ static void test_attach_api_fails(void) opts.cnt = 0; opts.cookies = cookies; - link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe, + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); if (!ASSERT_ERR_PTR(link, "fail_5")) goto cleanup; @@ -301,6 +304,146 @@ cleanup: kprobe_multi__destroy(skel); } +static inline __u64 get_time_ns(void) +{ + struct timespec t; + + clock_gettime(CLOCK_MONOTONIC, &t); + return (__u64) t.tv_sec * 1000000000 + t.tv_nsec; +} + +static size_t symbol_hash(const void *key, void *ctx __maybe_unused) +{ + return str_hash((const char *) key); +} + +static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_unused) +{ + return strcmp((const char *) key1, (const char *) key2) == 0; +} + +static int get_syms(char ***symsp, size_t *cntp) +{ + size_t cap = 0, cnt = 0, i; + char *name, **syms = NULL; + struct hashmap *map; + char buf[256]; + FILE *f; + int err; + + /* + * The available_filter_functions contains many duplicates, + * but other than that all symbols are usable in kprobe multi + * interface. + * Filtering out duplicates by using hashmap__add, which won't + * add existing entry. + */ + f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r"); + if (!f) + return -EINVAL; + + map = hashmap__new(symbol_hash, symbol_equal, NULL); + if (IS_ERR(map)) { + err = libbpf_get_error(map); + goto error; + } + + while (fgets(buf, sizeof(buf), f)) { + /* skip modules */ + if (strchr(buf, '[')) + continue; + if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) + continue; + /* + * We attach to almost all kernel functions and some of them + * will cause 'suspicious RCU usage' when fprobe is attached + * to them. Filter out the current culprits - arch_cpu_idle + * and rcu_* functions. + */ + if (!strcmp(name, "arch_cpu_idle")) + continue; + if (!strncmp(name, "rcu_", 4)) + continue; + err = hashmap__add(map, name, NULL); + if (err) { + free(name); + if (err == -EEXIST) + continue; + goto error; + } + err = libbpf_ensure_mem((void **) &syms, &cap, + sizeof(*syms), cnt + 1); + if (err) { + free(name); + goto error; + } + syms[cnt] = name; + cnt++; + } + + *symsp = syms; + *cntp = cnt; + +error: + fclose(f); + hashmap__free(map); + if (err) { + for (i = 0; i < cnt; i++) + free(syms[cnt]); + free(syms); + } + return err; +} + +static void test_bench_attach(void) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + struct kprobe_multi_empty *skel = NULL; + long attach_start_ns, attach_end_ns; + long detach_start_ns, detach_end_ns; + double attach_delta, detach_delta; + struct bpf_link *link = NULL; + char **syms = NULL; + size_t cnt, i; + + if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms")) + return; + + skel = kprobe_multi_empty__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) + goto cleanup; + + opts.syms = (const char **) syms; + opts.cnt = cnt; + + attach_start_ns = get_time_ns(); + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_empty, + NULL, &opts); + attach_end_ns = get_time_ns(); + + if (!ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts")) + goto cleanup; + + detach_start_ns = get_time_ns(); + bpf_link__destroy(link); + detach_end_ns = get_time_ns(); + + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; + + printf("%s: found %lu functions\n", __func__, cnt); + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); + +cleanup: + kprobe_multi_empty__destroy(skel); + if (syms) { + for (i = 0; i < cnt; i++) + free(syms[i]); + free(syms); + } +} + void test_kprobe_multi_test(void) { if (!ASSERT_OK(load_kallsyms(), "load_kallsyms")) @@ -320,4 +463,6 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); + if (test__start_subtest("bench_attach")) + test_bench_attach(); } diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index be3a956cb3a5..f4ffdcabf4e4 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -85,7 +85,6 @@ static void bad_core_relo_subprog(void) if (!ASSERT_ERR(err, "load_fail")) goto cleanup; - /* there should be no prog loading log because we specified per-prog log buf */ ASSERT_HAS_SUBSTR(log_buf, ": <invalid CO-RE relocation>\n" "failed to resolve CO-RE relocation <byte_off> ", @@ -101,6 +100,40 @@ cleanup: test_log_fixup__destroy(skel); } +static void missing_map(void) +{ + char log_buf[8 * 1024]; + struct test_log_fixup* skel; + int err; + + skel = test_log_fixup__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_map__set_autocreate(skel->maps.missing_map, false); + + bpf_program__set_autoload(skel->progs.use_missing_map, true); + bpf_program__set_log_buf(skel->progs.use_missing_map, log_buf, sizeof(log_buf)); + + err = test_log_fixup__load(skel); + if (!ASSERT_ERR(err, "load_fail")) + goto cleanup; + + ASSERT_TRUE(bpf_map__autocreate(skel->maps.existing_map), "existing_map_autocreate"); + ASSERT_FALSE(bpf_map__autocreate(skel->maps.missing_map), "missing_map_autocreate"); + + ASSERT_HAS_SUBSTR(log_buf, + "8: <invalid BPF map reference>\n" + "BPF map 'missing_map' is referenced but wasn't created\n", + "log_buf"); + + if (env.verbosity > VERBOSE_NONE) + printf("LOG: \n=================\n%s=================\n", log_buf); + +cleanup: + test_log_fixup__destroy(skel); +} + void test_log_fixup(void) { if (test__start_subtest("bad_core_relo_trunc_none")) @@ -111,4 +144,6 @@ void test_log_fixup(void) bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */); if (test__start_subtest("bad_core_relo_subprog")) bad_core_relo_subprog(); + if (test__start_subtest("missing_map")) + missing_map(); } diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c index beebfa9730e1..a767bb4a271c 100644 --- a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c +++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c @@ -112,7 +112,8 @@ static void test_lookup_and_delete_hash(void) /* Lookup and delete element. */ key = 1; - err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value); + err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map, + &key, sizeof(key), &value, sizeof(value), 0); if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) goto cleanup; @@ -147,7 +148,8 @@ static void test_lookup_and_delete_percpu_hash(void) /* Lookup and delete element. */ key = 1; - err = bpf_map_lookup_and_delete_elem(map_fd, &key, value); + err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map, + &key, sizeof(key), value, sizeof(value), 0); if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) goto cleanup; @@ -191,7 +193,8 @@ static void test_lookup_and_delete_lru_hash(void) goto cleanup; /* Lookup and delete element 3. */ - err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value); + err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map, + &key, sizeof(key), &value, sizeof(value), 0); if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) goto cleanup; @@ -240,10 +243,10 @@ static void test_lookup_and_delete_lru_percpu_hash(void) value[i] = 0; /* Lookup and delete element 3. */ - err = bpf_map_lookup_and_delete_elem(map_fd, &key, value); - if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) { + err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map, + &key, sizeof(key), value, sizeof(value), 0); + if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) goto cleanup; - } /* Check if only one CPU has set the value. */ for (i = 0; i < nr_cpus; i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index 9e2fbda64a65..fdcea7a61491 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -1,37 +1,148 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> #include "map_kptr.skel.h" +#include "map_kptr_fail.skel.h" -void test_map_kptr(void) +static char log_buf[1024 * 1024]; + +struct { + const char *prog_name; + const char *err_msg; +} map_kptr_fail_tests[] = { + { "size_not_bpf_dw", "kptr access size must be BPF_DW" }, + { "non_const_var_off", "kptr access cannot have variable offset" }, + { "non_const_var_off_kptr_xchg", "R1 doesn't have constant offset. kptr has to be" }, + { "misaligned_access_write", "kptr access misaligned expected=8 off=7" }, + { "misaligned_access_read", "kptr access misaligned expected=8 off=1" }, + { "reject_var_off_store", "variable untrusted_ptr_ access var_off=(0x0; 0x1e0)" }, + { "reject_bad_type_match", "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc" }, + { "marked_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" }, + { "correct_btf_id_check_size", "access beyond struct prog_test_ref_kfunc at off 32 size 4" }, + { "inherit_untrusted_on_walk", "R1 type=untrusted_ptr_ expected=percpu_ptr_" }, + { "reject_kptr_xchg_on_unref", "off=8 kptr isn't referenced kptr" }, + { "reject_kptr_get_no_map_val", "arg#0 expected pointer to map value" }, + { "reject_kptr_get_no_null_map_val", "arg#0 expected pointer to map value" }, + { "reject_kptr_get_no_kptr", "arg#0 no referenced kptr at map value offset=0" }, + { "reject_kptr_get_on_unref", "arg#0 no referenced kptr at map value offset=8" }, + { "reject_kptr_get_bad_type_match", "kernel function bpf_kfunc_call_test_kptr_get args#0" }, + { "mark_ref_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" }, + { "reject_untrusted_store_to_ref", "store to referenced kptr disallowed" }, + { "reject_bad_type_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member" }, + { "reject_untrusted_xchg", "R2 type=untrusted_ptr_ expected=ptr_" }, + { "reject_member_of_ref_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc" }, + { "reject_indirect_helper_access", "kptr cannot be accessed indirectly by helper" }, + { "reject_indirect_global_func_access", "kptr cannot be accessed indirectly by helper" }, + { "kptr_xchg_ref_state", "Unreleased reference id=5 alloc_insn=" }, + { "kptr_get_ref_state", "Unreleased reference id=3 alloc_insn=" }, +}; + +static void test_map_kptr_fail_prog(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct map_kptr_fail *skel; + struct bpf_program *prog; + int ret; + + skel = map_kptr_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "map_kptr_fail__open_opts")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = map_kptr_fail__load(skel); + if (!ASSERT_ERR(ret, "map_kptr__load must fail")) + goto end; + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + map_kptr_fail__destroy(skel); +} + +static void test_map_kptr_fail(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(map_kptr_fail_tests); i++) { + if (!test__start_subtest(map_kptr_fail_tests[i].prog_name)) + continue; + test_map_kptr_fail_prog(map_kptr_fail_tests[i].prog_name, + map_kptr_fail_tests[i].err_msg); + } +} + +static void test_map_kptr_success(bool test_run) { + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); struct map_kptr *skel; int key = 0, ret; - char buf[24]; + char buf[16]; skel = map_kptr__open_and_load(); if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) return; - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts); + ASSERT_OK(ret, "test_map_kptr_ref refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts); + ASSERT_OK(ret, "test_map_kptr_ref2 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval"); + + if (test_run) + return; + + ret = bpf_map__update_elem(skel->maps.array_map, + &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "array_map update"); - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0); + ret = bpf_map__update_elem(skel->maps.array_map, + &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "array_map update2"); - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_map), &key, buf, 0); + ret = bpf_map__update_elem(skel->maps.hash_map, + &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "hash_map update"); - ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_map), &key); + ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_map delete"); - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key, buf, 0); + ret = bpf_map__update_elem(skel->maps.hash_malloc_map, + &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "hash_malloc_map update"); - ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key); + ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_malloc_map delete"); - ret = bpf_map_update_elem(bpf_map__fd(skel->maps.lru_hash_map), &key, buf, 0); + ret = bpf_map__update_elem(skel->maps.lru_hash_map, + &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "lru_hash_map update"); - ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.lru_hash_map), &key); + ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_hash_map delete"); map_kptr__destroy(skel); } + +void test_map_kptr(void) +{ + if (test__start_subtest("success")) { + test_map_kptr_success(false); + /* Do test_run twice, so that we see refcount going back to 1 + * after we leave it in map from first iteration. + */ + test_map_kptr_success(true); + } + test_map_kptr_fail(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c new file mode 100644 index 000000000000..bfb1bf3fd427 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include <test_progs.h> +#include "test_map_lookup_percpu_elem.skel.h" + +void test_map_lookup_percpu_elem(void) +{ + struct test_map_lookup_percpu_elem *skel; + __u64 key = 0, sum; + int ret, i, nr_cpus = libbpf_num_possible_cpus(); + __u64 *buf; + + buf = malloc(nr_cpus*sizeof(__u64)); + if (!ASSERT_OK_PTR(buf, "malloc")) + return; + + for (i = 0; i < nr_cpus; i++) + buf[i] = i; + sum = (nr_cpus - 1) * nr_cpus / 2; + + skel = test_map_lookup_percpu_elem__open(); + if (!ASSERT_OK_PTR(skel, "test_map_lookup_percpu_elem__open")) + goto exit; + + skel->rodata->my_pid = getpid(); + skel->rodata->nr_cpus = nr_cpus; + + ret = test_map_lookup_percpu_elem__load(skel); + if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__load")) + goto cleanup; + + ret = test_map_lookup_percpu_elem__attach(skel); + if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__attach")) + goto cleanup; + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_array_map), &key, buf, 0); + ASSERT_OK(ret, "percpu_array_map update"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_hash_map), &key, buf, 0); + ASSERT_OK(ret, "percpu_hash_map update"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_lru_hash_map), &key, buf, 0); + ASSERT_OK(ret, "percpu_lru_hash_map update"); + + syscall(__NR_getuid); + + test_map_lookup_percpu_elem__detach(skel); + + ASSERT_EQ(skel->bss->percpu_array_elem_sum, sum, "percpu_array lookup percpu elem"); + ASSERT_EQ(skel->bss->percpu_hash_elem_sum, sum, "percpu_hash lookup percpu elem"); + ASSERT_EQ(skel->bss->percpu_lru_hash_elem_sum, sum, "percpu_lru_hash lookup percpu elem"); + +cleanup: + test_map_lookup_percpu_elem__destroy(skel); +exit: + free(buf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c new file mode 100644 index 000000000000..59f08d6d1d53 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Tessares SA. */ +/* Copyright (c) 2022, SUSE. */ + +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "mptcp_sock.skel.h" + +#ifndef TCP_CA_NAME_MAX +#define TCP_CA_NAME_MAX 16 +#endif + +struct mptcp_storage { + __u32 invoked; + __u32 is_mptcp; + struct sock *sk; + __u32 token; + struct sock *first; + char ca_name[TCP_CA_NAME_MAX]; +}; + +static int verify_tsk(int map_fd, int client_fd) +{ + int err, cfd = client_fd; + struct mptcp_storage val; + + err = bpf_map_lookup_elem(map_fd, &cfd, &val); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + return err; + + if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count")) + err++; + + if (!ASSERT_EQ(val.is_mptcp, 0, "unexpected is_mptcp")) + err++; + + return err; +} + +static void get_msk_ca_name(char ca_name[]) +{ + size_t len; + int fd; + + fd = open("/proc/sys/net/ipv4/tcp_congestion_control", O_RDONLY); + if (!ASSERT_GE(fd, 0, "failed to open tcp_congestion_control")) + return; + + len = read(fd, ca_name, TCP_CA_NAME_MAX); + if (!ASSERT_GT(len, 0, "failed to read ca_name")) + goto err; + + if (len > 0 && ca_name[len - 1] == '\n') + ca_name[len - 1] = '\0'; + +err: + close(fd); +} + +static int verify_msk(int map_fd, int client_fd, __u32 token) +{ + char ca_name[TCP_CA_NAME_MAX]; + int err, cfd = client_fd; + struct mptcp_storage val; + + if (!ASSERT_GT(token, 0, "invalid token")) + return -1; + + get_msk_ca_name(ca_name); + + err = bpf_map_lookup_elem(map_fd, &cfd, &val); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + return err; + + if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count")) + err++; + + if (!ASSERT_EQ(val.is_mptcp, 1, "unexpected is_mptcp")) + err++; + + if (!ASSERT_EQ(val.token, token, "unexpected token")) + err++; + + if (!ASSERT_EQ(val.first, val.sk, "unexpected first")) + err++; + + if (!ASSERT_STRNEQ(val.ca_name, ca_name, TCP_CA_NAME_MAX, "unexpected ca_name")) + err++; + + return err; +} + +static int run_test(int cgroup_fd, int server_fd, bool is_mptcp) +{ + int client_fd, prog_fd, map_fd, err; + struct mptcp_sock *sock_skel; + + sock_skel = mptcp_sock__open_and_load(); + if (!ASSERT_OK_PTR(sock_skel, "skel_open_load")) + return -EIO; + + err = mptcp_sock__attach(sock_skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + prog_fd = bpf_program__fd(sock_skel->progs._sockops); + if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) { + err = -EIO; + goto out; + } + + map_fd = bpf_map__fd(sock_skel->maps.socket_storage_map); + if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) { + err = -EIO; + goto out; + } + + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect to fd")) { + err = -EIO; + goto out; + } + + err += is_mptcp ? verify_msk(map_fd, client_fd, sock_skel->bss->token) : + verify_tsk(map_fd, client_fd); + + close(client_fd); + +out: + mptcp_sock__destroy(sock_skel); + return err; +} + +static void test_base(void) +{ + int server_fd, cgroup_fd; + + cgroup_fd = test__join_cgroup("/mptcp"); + if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) + return; + + /* without MPTCP */ + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto with_mptcp; + + ASSERT_OK(run_test(cgroup_fd, server_fd, false), "run_test tcp"); + + close(server_fd); + +with_mptcp: + /* with MPTCP */ + server_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_mptcp_server")) + goto close_cgroup_fd; + + ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp"); + + close(server_fd); + +close_cgroup_fd: + close(cgroup_fd); +} + +void test_mptcp(void) +{ + if (test__start_subtest("base")) + test_base(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index e945195b24c9..eb5f7f5aa81a 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -50,18 +50,6 @@ void test_ringbuf_multi(void) if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; - err = bpf_map__set_max_entries(skel->maps.ringbuf1, page_size); - if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) - goto cleanup; - - err = bpf_map__set_max_entries(skel->maps.ringbuf2, page_size); - if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) - goto cleanup; - - err = bpf_map__set_max_entries(bpf_map__inner_map(skel->maps.ringbuf_arr), page_size); - if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) - goto cleanup; - proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL); if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c index e8399ae50e77..9ad09a6c538a 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c @@ -8,7 +8,7 @@ void test_stacktrace_build_id(void) int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; struct test_stacktrace_build_id *skel; int err, stack_trace_len; - __u32 key, previous_key, val, duration = 0; + __u32 key, prev_key, val, duration = 0; char buf[256]; int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; @@ -58,7 +58,7 @@ retry: "err %d errno %d\n", err, errno)) goto cleanup; - err = bpf_map_get_next_key(stackmap_fd, NULL, &key); + err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key)); if (CHECK(err, "get_next_key from stackmap", "err %d, errno %d\n", err, errno)) goto cleanup; @@ -79,8 +79,8 @@ retry: if (strstr(buf, build_id) != NULL) build_id_matches = 1; } - previous_key = key; - } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + prev_key = key; + } while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0); /* stack_map_get_build_id_offset() is racy and sometimes can return * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f45a1d7b0a28..f4ea1a215ce4 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -27,7 +27,7 @@ void test_stacktrace_build_id_nmi(void) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; - __u32 key, previous_key, val, duration = 0; + __u32 key, prev_key, val, duration = 0; char buf[256]; int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; @@ -100,7 +100,7 @@ retry: "err %d errno %d\n", err, errno)) goto cleanup; - err = bpf_map_get_next_key(stackmap_fd, NULL, &key); + err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key)); if (CHECK(err, "get_next_key from stackmap", "err %d, errno %d\n", err, errno)) goto cleanup; @@ -108,7 +108,8 @@ retry: do { char build_id[64]; - err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); + err = bpf_map__lookup_elem(skel->maps.stackmap, &key, sizeof(key), + id_offs, sizeof(id_offs), 0); if (CHECK(err, "lookup_elem from stackmap", "err %d, errno %d\n", err, errno)) goto cleanup; @@ -121,8 +122,8 @@ retry: if (strstr(buf, build_id) != NULL) build_id_matches = 1; } - previous_key = key; - } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + prev_key = key; + } while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0); /* stack_map_get_build_id_offset() is racy and sometimes can return * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID; diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c new file mode 100644 index 000000000000..3bba4a2a0530 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -0,0 +1,423 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * End-to-end eBPF tunnel test suite + * The file tests BPF network tunnel implementation. + * + * Topology: + * --------- + * root namespace | at_ns0 namespace + * | + * ----------- | ----------- + * | tnl dev | | | tnl dev | (overlay network) + * ----------- | ----------- + * metadata-mode | metadata-mode + * with bpf | with bpf + * | + * ---------- | ---------- + * | veth1 | --------- | veth0 | (underlay network) + * ---------- peer ---------- + * + * + * Device Configuration + * -------------------- + * root namespace with metadata-mode tunnel + BPF + * Device names and addresses: + * veth1 IP 1: 172.16.1.200, IPv6: 00::22 (underlay) + * IP 2: 172.16.1.20, IPv6: 00::bb (underlay) + * tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay) + * + * Namespace at_ns0 with native tunnel + * Device names and addresses: + * veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay) + * tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay) + * + * + * End-to-end ping packet flow + * --------------------------- + * Most of the tests start by namespace creation, device configuration, + * then ping the underlay and overlay network. When doing 'ping 10.1.1.100' + * from root namespace, the following operations happen: + * 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev. + * 2) Tnl device's egress BPF program is triggered and set the tunnel metadata, + * with local_ip=172.16.1.200, remote_ip=172.16.1.100. BPF program choose + * the primary or secondary ip of veth1 as the local ip of tunnel. The + * choice is made based on the value of bpf map local_ip_map. + * 3) Outer tunnel header is prepended and route the packet to veth1's egress. + * 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0. + * 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet. + * 6) Forward the packet to the overlay tnl dev. + */ + +#include <arpa/inet.h> +#include <linux/if_tun.h> +#include <linux/limits.h> +#include <linux/sysctl.h> +#include <linux/time_types.h> +#include <linux/net_tstamp.h> +#include <net/if.h> +#include <stdbool.h> +#include <stdio.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_tunnel_kern.skel.h" + +#define IP4_ADDR_VETH0 "172.16.1.100" +#define IP4_ADDR1_VETH1 "172.16.1.200" +#define IP4_ADDR2_VETH1 "172.16.1.20" +#define IP4_ADDR_TUNL_DEV0 "10.1.1.100" +#define IP4_ADDR_TUNL_DEV1 "10.1.1.200" + +#define IP6_ADDR_VETH0 "::11" +#define IP6_ADDR1_VETH1 "::22" +#define IP6_ADDR2_VETH1 "::bb" + +#define IP4_ADDR1_HEX_VETH1 0xac1001c8 +#define IP4_ADDR2_HEX_VETH1 0xac100114 +#define IP6_ADDR1_HEX_VETH1 0x22 +#define IP6_ADDR2_HEX_VETH1 0xbb + +#define MAC_TUNL_DEV0 "52:54:00:d9:01:00" +#define MAC_TUNL_DEV1 "52:54:00:d9:02:00" + +#define VXLAN_TUNL_DEV0 "vxlan00" +#define VXLAN_TUNL_DEV1 "vxlan11" +#define IP6VXLAN_TUNL_DEV0 "ip6vxlan00" +#define IP6VXLAN_TUNL_DEV1 "ip6vxlan11" + +#define PING_ARGS "-i 0.01 -c 3 -w 10 -q" + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +#define SYS_NOFAIL(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + system(cmd); \ + }) + +static int config_device(void) +{ + SYS("ip netns add at_ns0"); + SYS("ip link add veth0 type veth peer name veth1"); + SYS("ip link set veth0 netns at_ns0"); + SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); + SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1"); + SYS("ip link set dev veth1 up mtu 1500"); + SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); + SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); + + return 0; +fail: + return -1; +} + +static void cleanup(void) +{ + SYS_NOFAIL("test -f /var/run/netns/at_ns0 && ip netns delete at_ns0"); + SYS_NOFAIL("ip link del veth1 2> /dev/null"); + SYS_NOFAIL("ip link del %s 2> /dev/null", VXLAN_TUNL_DEV1); + SYS_NOFAIL("ip link del %s 2> /dev/null", IP6VXLAN_TUNL_DEV1); +} + +static int add_vxlan_tunnel(void) +{ + /* at_ns0 namespace */ + SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789", + VXLAN_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip link set dev %s address %s up", + VXLAN_TUNL_DEV0, MAC_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip addr add dev %s %s/24", + VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", + IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); + + /* root namespace */ + SYS("ip link add dev %s type vxlan external gbp dstport 4789", + VXLAN_TUNL_DEV1); + SYS("ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); + SYS("ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); + SYS("ip neigh add %s lladdr %s dev %s", + IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1); + + return 0; +fail: + return -1; +} + +static void delete_vxlan_tunnel(void) +{ + SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", + VXLAN_TUNL_DEV0); + SYS_NOFAIL("ip link delete dev %s", VXLAN_TUNL_DEV1); +} + +static int add_ip6vxlan_tunnel(void) +{ + SYS("ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0", + IP6_ADDR_VETH0); + SYS("ip netns exec at_ns0 ip link set dev veth0 up"); + SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1); + SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1); + SYS("ip link set dev veth1 up"); + + /* at_ns0 namespace */ + SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789", + IP6VXLAN_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip addr add dev %s %s/24", + IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip link set dev %s address %s up", + IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0); + + /* root namespace */ + SYS("ip link add dev %s type vxlan external dstport 4789", + IP6VXLAN_TUNL_DEV1); + SYS("ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); + SYS("ip link set dev %s address %s up", + IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); + + return 0; +fail: + return -1; +} + +static void delete_ip6vxlan_tunnel(void) +{ + SYS_NOFAIL("ip netns exec at_ns0 ip -6 addr delete %s/96 dev veth0", + IP6_ADDR_VETH0); + SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR1_VETH1); + SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR2_VETH1); + SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", + IP6VXLAN_TUNL_DEV0); + SYS_NOFAIL("ip link delete dev %s", IP6VXLAN_TUNL_DEV1); +} + +static int test_ping(int family, const char *addr) +{ + SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); + return 0; +fail: + return -1; +} + +static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, + .priority = 1, .prog_fd = igr_fd); + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, + .priority = 1, .prog_fd = egr_fd); + int ret; + + ret = bpf_tc_hook_create(hook); + if (!ASSERT_OK(ret, "create tc hook")) + return ret; + + if (igr_fd >= 0) { + hook->attach_point = BPF_TC_INGRESS; + ret = bpf_tc_attach(hook, &opts1); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(hook); + return ret; + } + } + + if (egr_fd >= 0) { + hook->attach_point = BPF_TC_EGRESS; + ret = bpf_tc_attach(hook, &opts2); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(hook); + return ret; + } + } + + return 0; +} + +static void test_vxlan_tunnel(void) +{ + struct test_tunnel_kern *skel = NULL; + struct nstoken *nstoken; + int local_ip_map_fd = -1; + int set_src_prog_fd, get_src_prog_fd; + int set_dst_prog_fd; + int key = 0, ifindex = -1; + uint local_ip; + int err; + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .attach_point = BPF_TC_INGRESS); + + /* add vxlan tunnel */ + err = add_vxlan_tunnel(); + if (!ASSERT_OK(err, "add vxlan tunnel")) + goto done; + + /* load and attach bpf prog to tunnel dev tc hook point */ + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + goto done; + ifindex = if_nametoindex(VXLAN_TUNL_DEV1); + if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src); + set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src); + if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) + goto done; + if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + goto done; + + /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ + nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns src")) + goto done; + ifindex = if_nametoindex(VXLAN_TUNL_DEV0); + if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst); + if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) + goto done; + close_netns(nstoken); + + /* use veth1 ip 2 as tunnel source ip */ + local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map); + if (!ASSERT_GE(local_ip_map_fd, 0, "bpf_map__fd")) + goto done; + local_ip = IP4_ADDR2_HEX_VETH1; + err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY); + if (!ASSERT_OK(err, "update bpf local_ip_map")) + goto done; + + /* ping test */ + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); + if (!ASSERT_OK(err, "test_ping")) + goto done; + +done: + /* delete vxlan tunnel */ + delete_vxlan_tunnel(); + if (local_ip_map_fd >= 0) + close(local_ip_map_fd); + if (skel) + test_tunnel_kern__destroy(skel); +} + +static void test_ip6vxlan_tunnel(void) +{ + struct test_tunnel_kern *skel = NULL; + struct nstoken *nstoken; + int local_ip_map_fd = -1; + int set_src_prog_fd, get_src_prog_fd; + int set_dst_prog_fd; + int key = 0, ifindex = -1; + uint local_ip; + int err; + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .attach_point = BPF_TC_INGRESS); + + /* add vxlan tunnel */ + err = add_ip6vxlan_tunnel(); + if (!ASSERT_OK(err, "add_ip6vxlan_tunnel")) + goto done; + + /* load and attach bpf prog to tunnel dev tc hook point */ + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + goto done; + ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1); + if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src); + set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src); + if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd")) + goto done; + if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) + goto done; + + /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ + nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns src")) + goto done; + ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0); + if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst); + if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd)) + goto done; + close_netns(nstoken); + + /* use veth1 ip 2 as tunnel source ip */ + local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map); + if (!ASSERT_GE(local_ip_map_fd, 0, "get local_ip_map fd")) + goto done; + local_ip = IP6_ADDR2_HEX_VETH1; + err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY); + if (!ASSERT_OK(err, "update bpf local_ip_map")) + goto done; + + /* ping test */ + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0); + if (!ASSERT_OK(err, "test_ping")) + goto done; + +done: + /* delete ipv6 vxlan tunnel */ + delete_ip6vxlan_tunnel(); + if (local_ip_map_fd >= 0) + close(local_ip_map_fd); + if (skel) + test_tunnel_kern__destroy(skel); +} + +#define RUN_TEST(name) \ + ({ \ + if (test__start_subtest(#name)) { \ + test_ ## name(); \ + } \ + }) + +static void *test_tunnel_run_tests(void *arg) +{ + cleanup(); + config_device(); + + RUN_TEST(vxlan_tunnel); + RUN_TEST(ip6vxlan_tunnel); + + cleanup(); + + return NULL; +} + +void serial_test_tunnel(void) +{ + pthread_t test_thread; + int err; + + /* Run the tests in their own thread to isolate the namespace changes + * so they do not affect the environment of other tests. + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) + */ + err = pthread_create(&test_thread, NULL, &test_tunnel_run_tests, NULL); + if (ASSERT_OK(err, "pthread_create")) + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c index 2ee5f5ae11d4..9ff7843909e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c @@ -35,7 +35,7 @@ static int timer_mim(struct timer_mim *timer_skel) ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok"); close(bpf_map__fd(timer_skel->maps.inner_htab)); - err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1); + err = bpf_map__delete_elem(timer_skel->maps.outer_arr, &key1, sizeof(key1), 0); ASSERT_EQ(err, 0, "delete inner map"); /* check that timer_cb[12] are no longer running */ diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index 9c795ee52b7b..b0acbda6dbf5 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -1,126 +1,94 @@ // SPDX-License-Identifier: GPL-2.0-only #define _GNU_SOURCE -#include <sched.h> -#include <sys/prctl.h> #include <test_progs.h> #define MAX_TRAMP_PROGS 38 struct inst { struct bpf_object *obj; - struct bpf_link *link_fentry; - struct bpf_link *link_fexit; + struct bpf_link *link; }; -static int test_task_rename(void) -{ - int fd, duration = 0, err; - char buf[] = "test_overhead"; - - fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); - if (CHECK(fd < 0, "open /proc", "err %d", errno)) - return -1; - err = write(fd, buf, sizeof(buf)); - if (err < 0) { - CHECK(err < 0, "task rename", "err %d", errno); - close(fd); - return -1; - } - close(fd); - return 0; -} - -static struct bpf_link *load(struct bpf_object *obj, const char *name) +static struct bpf_program *load_prog(char *file, char *name, struct inst *inst) { + struct bpf_object *obj; struct bpf_program *prog; - int duration = 0; + int err; + + obj = bpf_object__open_file(file, NULL); + if (!ASSERT_OK_PTR(obj, "obj_open_file")) + return NULL; + + inst->obj = obj; + + err = bpf_object__load(obj); + if (!ASSERT_OK(err, "obj_load")) + return NULL; prog = bpf_object__find_program_by_name(obj, name); - if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name)) - return ERR_PTR(-EINVAL); - return bpf_program__attach_trace(prog); + if (!ASSERT_OK_PTR(prog, "obj_find_prog")) + return NULL; + + return prog; } /* TODO: use different target function to run in concurrent mode */ void serial_test_trampoline_count(void) { - const char *fentry_name = "prog1"; - const char *fexit_name = "prog2"; - const char *object = "test_trampoline_count.o"; - struct inst inst[MAX_TRAMP_PROGS] = {}; - int err, i = 0, duration = 0; - struct bpf_object *obj; + char *file = "test_trampoline_count.o"; + char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" }; + struct inst inst[MAX_TRAMP_PROGS + 1] = {}; + struct bpf_program *prog; struct bpf_link *link; - char comm[16] = {}; + int prog_fd, err, i; + LIBBPF_OPTS(bpf_test_run_opts, opts); /* attach 'allowed' trampoline programs */ for (i = 0; i < MAX_TRAMP_PROGS; i++) { - obj = bpf_object__open_file(object, NULL); - if (!ASSERT_OK_PTR(obj, "obj_open_file")) { - obj = NULL; + prog = load_prog(file, progs[i % ARRAY_SIZE(progs)], &inst[i]); + if (!prog) goto cleanup; - } - err = bpf_object__load(obj); - if (CHECK(err, "obj_load", "err %d\n", err)) + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "attach_prog")) goto cleanup; - inst[i].obj = obj; - obj = NULL; - - if (rand() % 2) { - link = load(inst[i].obj, fentry_name); - if (!ASSERT_OK_PTR(link, "attach_prog")) { - link = NULL; - goto cleanup; - } - inst[i].link_fentry = link; - } else { - link = load(inst[i].obj, fexit_name); - if (!ASSERT_OK_PTR(link, "attach_prog")) { - link = NULL; - goto cleanup; - } - inst[i].link_fexit = link; - } + + inst[i].link = link; } /* and try 1 extra.. */ - obj = bpf_object__open_file(object, NULL); - if (!ASSERT_OK_PTR(obj, "obj_open_file")) { - obj = NULL; + prog = load_prog(file, "fmod_ret_test", &inst[i]); + if (!prog) goto cleanup; - } - - err = bpf_object__load(obj); - if (CHECK(err, "obj_load", "err %d\n", err)) - goto cleanup_extra; /* ..that needs to fail */ - link = load(obj, fentry_name); - err = libbpf_get_error(link); - if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) { - bpf_link__destroy(link); - goto cleanup_extra; + link = bpf_program__attach(prog); + if (!ASSERT_ERR_PTR(link, "attach_prog")) { + inst[i].link = link; + goto cleanup; } /* with E2BIG error */ - ASSERT_EQ(err, -E2BIG, "proper error check"); - ASSERT_EQ(link, NULL, "ptr_is_null"); + if (!ASSERT_EQ(libbpf_get_error(link), -E2BIG, "E2BIG")) + goto cleanup; + if (!ASSERT_EQ(link, NULL, "ptr_is_null")) + goto cleanup; /* and finaly execute the probe */ - if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) - goto cleanup_extra; - CHECK_FAIL(test_task_rename()); - CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L)); + prog_fd = bpf_program__fd(prog); + if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) + goto cleanup; + + err = bpf_prog_test_run_opts(prog_fd, &opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto cleanup; + + ASSERT_EQ(opts.retval & 0xffff, 4, "bpf_modify_return_test.result"); + ASSERT_EQ(opts.retval >> 16, 1, "bpf_modify_return_test.side_effect"); -cleanup_extra: - bpf_object__close(obj); cleanup: - if (i >= MAX_TRAMP_PROGS) - i = MAX_TRAMP_PROGS - 1; for (; i >= 0; i--) { - bpf_link__destroy(inst[i].link_fentry); - bpf_link__destroy(inst[i].link_fexit); + bpf_link__destroy(inst[i].link); bpf_object__close(inst[i].obj); } } diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c new file mode 100644 index 000000000000..1ed3cc2092db --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ + +#include <test_progs.h> +#include <bpf/btf.h> + +#include "test_unpriv_bpf_disabled.skel.h" + +#include "cap_helpers.h" + +/* Using CAP_LAST_CAP is risky here, since it can get pulled in from + * an old /usr/include/linux/capability.h and be < CAP_BPF; as a result + * CAP_BPF would not be included in ALL_CAPS. Instead use CAP_BPF as + * we know its value is correct since it is explicitly defined in + * cap_helpers.h. + */ +#define ALL_CAPS ((2ULL << CAP_BPF) - 1) + +#define PINPATH "/sys/fs/bpf/unpriv_bpf_disabled_" +#define NUM_MAPS 7 + +static __u32 got_perfbuf_val; +static __u32 got_ringbuf_val; + +static int process_ringbuf(void *ctx, void *data, size_t len) +{ + if (ASSERT_EQ(len, sizeof(__u32), "ringbuf_size_valid")) + got_ringbuf_val = *(__u32 *)data; + return 0; +} + +static void process_perfbuf(void *ctx, int cpu, void *data, __u32 len) +{ + if (ASSERT_EQ(len, sizeof(__u32), "perfbuf_size_valid")) + got_perfbuf_val = *(__u32 *)data; +} + +static int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val) +{ + int ret = 0; + FILE *fp; + + fp = fopen(sysctl_path, "r+"); + if (!fp) + return -errno; + if (old_val && fscanf(fp, "%s", old_val) <= 0) { + ret = -ENOENT; + } else if (!old_val || strcmp(old_val, new_val) != 0) { + fseek(fp, 0, SEEK_SET); + if (fprintf(fp, "%s", new_val) < 0) + ret = -errno; + } + fclose(fp); + + return ret; +} + +static void test_unpriv_bpf_disabled_positive(struct test_unpriv_bpf_disabled *skel, + __u32 prog_id, int prog_fd, int perf_fd, + char **map_paths, int *map_fds) +{ + struct perf_buffer *perfbuf = NULL; + struct ring_buffer *ringbuf = NULL; + int i, nr_cpus, link_fd = -1; + + nr_cpus = bpf_num_possible_cpus(); + + skel->bss->perfbuf_val = 1; + skel->bss->ringbuf_val = 2; + + /* Positive tests for unprivileged BPF disabled. Verify we can + * - retrieve and interact with pinned maps; + * - set up and interact with perf buffer; + * - set up and interact with ring buffer; + * - create a link + */ + perfbuf = perf_buffer__new(bpf_map__fd(skel->maps.perfbuf), 8, process_perfbuf, NULL, NULL, + NULL); + if (!ASSERT_OK_PTR(perfbuf, "perf_buffer__new")) + goto cleanup; + + ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), process_ringbuf, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new")) + goto cleanup; + + /* trigger & validate perf event, ringbuf output */ + usleep(1); + + ASSERT_GT(perf_buffer__poll(perfbuf, 100), -1, "perf_buffer__poll"); + ASSERT_EQ(got_perfbuf_val, skel->bss->perfbuf_val, "check_perfbuf_val"); + ASSERT_EQ(ring_buffer__consume(ringbuf), 1, "ring_buffer__consume"); + ASSERT_EQ(got_ringbuf_val, skel->bss->ringbuf_val, "check_ringbuf_val"); + + for (i = 0; i < NUM_MAPS; i++) { + map_fds[i] = bpf_obj_get(map_paths[i]); + if (!ASSERT_GT(map_fds[i], -1, "obj_get")) + goto cleanup; + } + + for (i = 0; i < NUM_MAPS; i++) { + bool prog_array = strstr(map_paths[i], "prog_array") != NULL; + bool array = strstr(map_paths[i], "array") != NULL; + bool buf = strstr(map_paths[i], "buf") != NULL; + __u32 key = 0, vals[nr_cpus], lookup_vals[nr_cpus]; + __u32 expected_val = 1; + int j; + + /* skip ringbuf, perfbuf */ + if (buf) + continue; + + for (j = 0; j < nr_cpus; j++) + vals[j] = expected_val; + + if (prog_array) { + /* need valid prog array value */ + vals[0] = prog_fd; + /* prog array lookup returns prog id, not fd */ + expected_val = prog_id; + } + ASSERT_OK(bpf_map_update_elem(map_fds[i], &key, vals, 0), "map_update_elem"); + ASSERT_OK(bpf_map_lookup_elem(map_fds[i], &key, &lookup_vals), "map_lookup_elem"); + ASSERT_EQ(lookup_vals[0], expected_val, "map_lookup_elem_values"); + if (!array) + ASSERT_OK(bpf_map_delete_elem(map_fds[i], &key), "map_delete_elem"); + } + + link_fd = bpf_link_create(bpf_program__fd(skel->progs.handle_perf_event), perf_fd, + BPF_PERF_EVENT, NULL); + ASSERT_GT(link_fd, 0, "link_create"); + +cleanup: + if (link_fd) + close(link_fd); + if (perfbuf) + perf_buffer__free(perfbuf); + if (ringbuf) + ring_buffer__free(ringbuf); +} + +static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *skel, + __u32 prog_id, int prog_fd, int perf_fd, + char **map_paths, int *map_fds) +{ + const struct bpf_insn prog_insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn); + LIBBPF_OPTS(bpf_prog_load_opts, load_opts); + struct bpf_map_info map_info = {}; + __u32 map_info_len = sizeof(map_info); + struct bpf_link_info link_info = {}; + __u32 link_info_len = sizeof(link_info); + struct btf *btf = NULL; + __u32 attach_flags = 0; + __u32 prog_ids[3] = {}; + __u32 prog_cnt = 3; + __u32 next; + int i; + + /* Negative tests for unprivileged BPF disabled. Verify we cannot + * - load BPF programs; + * - create BPF maps; + * - get a prog/map/link fd by id; + * - get next prog/map/link id + * - query prog + * - BTF load + */ + ASSERT_EQ(bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "simple_prog", "GPL", + prog_insns, prog_insn_cnt, &load_opts), + -EPERM, "prog_load_fails"); + + for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++) + ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL), + -EPERM, "map_create_fails"); + + ASSERT_EQ(bpf_prog_get_fd_by_id(prog_id), -EPERM, "prog_get_fd_by_id_fails"); + ASSERT_EQ(bpf_prog_get_next_id(prog_id, &next), -EPERM, "prog_get_next_id_fails"); + ASSERT_EQ(bpf_prog_get_next_id(0, &next), -EPERM, "prog_get_next_id_fails"); + + if (ASSERT_OK(bpf_obj_get_info_by_fd(map_fds[0], &map_info, &map_info_len), + "obj_get_info_by_fd")) { + ASSERT_EQ(bpf_map_get_fd_by_id(map_info.id), -EPERM, "map_get_fd_by_id_fails"); + ASSERT_EQ(bpf_map_get_next_id(map_info.id, &next), -EPERM, + "map_get_next_id_fails"); + } + ASSERT_EQ(bpf_map_get_next_id(0, &next), -EPERM, "map_get_next_id_fails"); + + if (ASSERT_OK(bpf_obj_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter), + &link_info, &link_info_len), + "obj_get_info_by_fd")) { + ASSERT_EQ(bpf_link_get_fd_by_id(link_info.id), -EPERM, "link_get_fd_by_id_fails"); + ASSERT_EQ(bpf_link_get_next_id(link_info.id, &next), -EPERM, + "link_get_next_id_fails"); + } + ASSERT_EQ(bpf_link_get_next_id(0, &next), -EPERM, "link_get_next_id_fails"); + + ASSERT_EQ(bpf_prog_query(prog_fd, BPF_TRACE_FENTRY, 0, &attach_flags, prog_ids, + &prog_cnt), -EPERM, "prog_query_fails"); + + btf = btf__new_empty(); + if (ASSERT_OK_PTR(btf, "empty_btf") && + ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "unpriv_int_type")) { + const void *raw_btf_data; + __u32 raw_btf_size; + + raw_btf_data = btf__raw_data(btf, &raw_btf_size); + if (ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good")) + ASSERT_EQ(bpf_btf_load(raw_btf_data, raw_btf_size, NULL), -EPERM, + "bpf_btf_load_fails"); + } + btf__free(btf); +} + +void test_unpriv_bpf_disabled(void) +{ + char *map_paths[NUM_MAPS] = { PINPATH "array", + PINPATH "percpu_array", + PINPATH "hash", + PINPATH "percpu_hash", + PINPATH "perfbuf", + PINPATH "ringbuf", + PINPATH "prog_array" }; + int map_fds[NUM_MAPS]; + struct test_unpriv_bpf_disabled *skel; + char unprivileged_bpf_disabled_orig[32] = {}; + char perf_event_paranoid_orig[32] = {}; + struct bpf_prog_info prog_info = {}; + __u32 prog_info_len = sizeof(prog_info); + struct perf_event_attr attr = {}; + int prog_fd, perf_fd = -1, i, ret; + __u64 save_caps = 0; + __u32 prog_id; + + skel = test_unpriv_bpf_disabled__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->bss->test_pid = getpid(); + + map_fds[0] = bpf_map__fd(skel->maps.array); + map_fds[1] = bpf_map__fd(skel->maps.percpu_array); + map_fds[2] = bpf_map__fd(skel->maps.hash); + map_fds[3] = bpf_map__fd(skel->maps.percpu_hash); + map_fds[4] = bpf_map__fd(skel->maps.perfbuf); + map_fds[5] = bpf_map__fd(skel->maps.ringbuf); + map_fds[6] = bpf_map__fd(skel->maps.prog_array); + + for (i = 0; i < NUM_MAPS; i++) + ASSERT_OK(bpf_obj_pin(map_fds[i], map_paths[i]), "pin map_fd"); + + /* allow user without caps to use perf events */ + if (!ASSERT_OK(sysctl_set("/proc/sys/kernel/perf_event_paranoid", perf_event_paranoid_orig, + "-1"), + "set_perf_event_paranoid")) + goto cleanup; + /* ensure unprivileged bpf disabled is set */ + ret = sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled", + unprivileged_bpf_disabled_orig, "2"); + if (ret == -EPERM) { + /* if unprivileged_bpf_disabled=1, we get -EPERM back; that's okay. */ + if (!ASSERT_OK(strcmp(unprivileged_bpf_disabled_orig, "1"), + "unprivileged_bpf_disabled_on")) + goto cleanup; + } else { + if (!ASSERT_OK(ret, "set unprivileged_bpf_disabled")) + goto cleanup; + } + + prog_fd = bpf_program__fd(skel->progs.sys_nanosleep_enter); + ASSERT_OK(bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len), + "obj_get_info_by_fd"); + prog_id = prog_info.id; + ASSERT_GT(prog_id, 0, "valid_prog_id"); + + attr.size = sizeof(attr); + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_CPU_CLOCK; + attr.freq = 1; + attr.sample_freq = 1000; + perf_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + if (!ASSERT_GE(perf_fd, 0, "perf_fd")) + goto cleanup; + + if (!ASSERT_OK(test_unpriv_bpf_disabled__attach(skel), "skel_attach")) + goto cleanup; + + if (!ASSERT_OK(cap_disable_effective(ALL_CAPS, &save_caps), "disable caps")) + goto cleanup; + + if (test__start_subtest("unpriv_bpf_disabled_positive")) + test_unpriv_bpf_disabled_positive(skel, prog_id, prog_fd, perf_fd, map_paths, + map_fds); + + if (test__start_subtest("unpriv_bpf_disabled_negative")) + test_unpriv_bpf_disabled_negative(skel, prog_id, prog_fd, perf_fd, map_paths, + map_fds); + +cleanup: + close(perf_fd); + if (save_caps) + cap_enable_effective(save_caps, NULL); + if (strlen(perf_event_paranoid_orig) > 0) + sysctl_set("/proc/sys/kernel/perf_event_paranoid", NULL, perf_event_paranoid_orig); + if (strlen(unprivileged_bpf_disabled_orig) > 0) + sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled", NULL, + unprivileged_bpf_disabled_orig); + for (i = 0; i < NUM_MAPS; i++) + unlink(map_paths[i]); + test_unpriv_bpf_disabled__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index a71f51bdc08d..5f733d50b0d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -190,9 +190,7 @@ __weak void trigger_300_usdts(void) static void __always_inline f400(int x __attribute__((unused))) { - static int y; - - STAP_PROBE1(test, usdt_400, y++); + STAP_PROBE1(test, usdt_400, 400); } /* this time we have 400 different USDT call sites, but they have uniform @@ -299,7 +297,7 @@ static void subtest_multispec_usdt(void) trigger_400_usdts(); ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called"); - ASSERT_EQ(bss->usdt_100_sum, 399 * 400 / 2, "usdt_400_sum"); + ASSERT_EQ(bss->usdt_100_sum, 400 * 400, "usdt_400_sum"); cleanup: test_usdt__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index 8cfaeba1ddbf..97ec8bc76ae6 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -16,6 +16,7 @@ #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used #define bpf_iter__sockmap bpf_iter__sockmap___not_used +#define bpf_iter__bpf_link bpf_iter__bpf_link___not_used #define btf_ptr btf_ptr___not_used #define BTF_F_COMPACT BTF_F_COMPACT___not_used #define BTF_F_NONAME BTF_F_NONAME___not_used @@ -37,6 +38,7 @@ #undef bpf_iter__bpf_map_elem #undef bpf_iter__bpf_sk_storage_map #undef bpf_iter__sockmap +#undef bpf_iter__bpf_link #undef btf_ptr #undef BTF_F_COMPACT #undef BTF_F_NONAME @@ -132,6 +134,11 @@ struct bpf_iter__sockmap { struct sock *sk; }; +struct bpf_iter__bpf_link { + struct bpf_iter_meta *meta; + struct bpf_link *link; +}; + struct btf_ptr { void *ptr; __u32 type_id; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c new file mode 100644 index 000000000000..e1af2f8f75a6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Red Hat, Inc. */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("iter/bpf_link") +int dump_bpf_link(struct bpf_iter__bpf_link *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct bpf_link *link = ctx->link; + int link_id; + + if (!link) + return 0; + + link_id = link->id; + bpf_seq_write(seq, &link_id, sizeof(link_id)); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c new file mode 100644 index 000000000000..3824345d82ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_size___diff_offs x) {} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 1c7105fcae3c..4ee4748133fe 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -94,7 +94,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int); typedef char * (*fn_ptr_arr1_t[10])(int **); -typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int)); +typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int)); struct struct_w_typedefs { int_t a; diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index c95c0cabe951..f9dc9766546e 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -785,13 +785,21 @@ struct core_reloc_bitfields___err_too_big_bitfield { */ struct core_reloc_size_output { int int_sz; + int int_off; int struct_sz; + int struct_off; int union_sz; + int union_off; int arr_sz; + int arr_off; int arr_elem_sz; + int arr_elem_off; int ptr_sz; + int ptr_off; int enum_sz; + int enum_off; int float_sz; + int float_off; }; struct core_reloc_size { @@ -814,6 +822,16 @@ struct core_reloc_size___diff_sz { double float_field; }; +struct core_reloc_size___diff_offs { + float float_field; + enum { YET_OTHER_VALUE = 123 } enum_field; + void *ptr_field; + int arr_field[4]; + union { int x; } union_field; + struct { int x; } struct_field; + int int_field; +}; + /* Error case of two candidates with the fields (int_field) at the same * offset, but with differing final relocation values: size 4 vs size 1 */ diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c new file mode 100644 index 000000000000..d811cff73597 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -0,0 +1,588 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include <errno.h> +#include <string.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct test_info { + int x; + struct bpf_dynptr ptr; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct bpf_dynptr); +} array_map1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct test_info); +} array_map2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array_map3 SEC(".maps"); + +struct sample { + int pid; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +int err, val; + +static int get_map_val_dynptr(struct bpf_dynptr *ptr) +{ + __u32 key = 0, *map_val; + + bpf_map_update_elem(&array_map3, &key, &val, 0); + + map_val = bpf_map_lookup_elem(&array_map3, &key); + if (!map_val) + return -ENOENT; + + bpf_dynptr_from_mem(map_val, sizeof(*map_val), 0, ptr); + + return 0; +} + +/* Every bpf_ringbuf_reserve_dynptr call must have a corresponding + * bpf_ringbuf_submit/discard_dynptr call + */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_missing_release1(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + /* missing a call to bpf_ringbuf_discard/submit_dynptr */ + + return 0; +} + +SEC("?raw_tp/sys_nanosleep") +int ringbuf_missing_release2(void *ctx) +{ + struct bpf_dynptr ptr1, ptr2; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr1); + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2); + + sample = bpf_dynptr_data(&ptr1, 0, sizeof(*sample)); + if (!sample) { + bpf_ringbuf_discard_dynptr(&ptr1, 0); + bpf_ringbuf_discard_dynptr(&ptr2, 0); + return 0; + } + + bpf_ringbuf_submit_dynptr(&ptr1, 0); + + /* missing a call to bpf_ringbuf_discard/submit_dynptr on ptr2 */ + + return 0; +} + +static int missing_release_callback_fn(__u32 index, void *data) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + /* missing a call to bpf_ringbuf_discard/submit_dynptr */ + + return 0; +} + +/* Any dynptr initialized within a callback must have bpf_dynptr_put called */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_missing_release_callback(void *ctx) +{ + bpf_loop(10, missing_release_callback_fn, NULL, 0); + return 0; +} + +/* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_release_uninit_dynptr(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* A dynptr can't be used after it has been invalidated */ +SEC("?raw_tp/sys_nanosleep") +int use_after_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr); + + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + return 0; +} + +/* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */ +SEC("?raw_tp/sys_nanosleep") +int ringbuf_invalid_api(void *ctx) +{ + struct bpf_dynptr ptr; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr); + sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample)); + if (!sample) + goto done; + + sample->pid = 123; + + /* invalid API use. need to use dynptr API to submit/discard */ + bpf_ringbuf_submit(sample, 0); + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +/* Can't add a dynptr to a map */ +SEC("?raw_tp/sys_nanosleep") +int add_dynptr_to_map1(void *ctx) +{ + struct bpf_dynptr ptr; + int key = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + /* this should fail */ + bpf_map_update_elem(&array_map1, &key, &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* Can't add a struct with an embedded dynptr to a map */ +SEC("?raw_tp/sys_nanosleep") +int add_dynptr_to_map2(void *ctx) +{ + struct test_info x; + int key = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &x.ptr); + + /* this should fail */ + bpf_map_update_elem(&array_map2, &key, &x, 0); + + bpf_ringbuf_submit_dynptr(&x.ptr, 0); + + return 0; +} + +/* A data slice can't be accessed out of bounds */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_out_of_bounds_ringbuf(void *ctx) +{ + struct bpf_dynptr ptr; + void *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr); + + data = bpf_dynptr_data(&ptr, 0, 8); + if (!data) + goto done; + + /* can't index out of bounds of the data slice */ + val = *((char *)data + 8); + +done: + bpf_ringbuf_submit_dynptr(&ptr, 0); + return 0; +} + +SEC("?raw_tp/sys_nanosleep") +int data_slice_out_of_bounds_map_value(void *ctx) +{ + __u32 key = 0, map_val; + struct bpf_dynptr ptr; + void *data; + + get_map_val_dynptr(&ptr); + + data = bpf_dynptr_data(&ptr, 0, sizeof(map_val)); + if (!data) + return 0; + + /* can't index out of bounds of the data slice */ + val = *((char *)data + (sizeof(map_val) + 1)); + + return 0; +} + +/* A data slice can't be used after it has been released */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_use_after_release(void *ctx) +{ + struct bpf_dynptr ptr; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr); + sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample)); + if (!sample) + goto done; + + sample->pid = 123; + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + /* this should fail */ + val = sample->pid; + + return 0; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +/* A data slice must be first checked for NULL */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_missing_null_check1(void *ctx) +{ + struct bpf_dynptr ptr; + void *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr); + + data = bpf_dynptr_data(&ptr, 0, 8); + + /* missing if (!data) check */ + + /* this should fail */ + *(__u8 *)data = 3; + + bpf_ringbuf_submit_dynptr(&ptr, 0); + return 0; +} + +/* A data slice can't be dereferenced if it wasn't checked for null */ +SEC("?raw_tp/sys_nanosleep") +int data_slice_missing_null_check2(void *ctx) +{ + struct bpf_dynptr ptr; + __u64 *data1, *data2; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr); + + data1 = bpf_dynptr_data(&ptr, 0, 8); + data2 = bpf_dynptr_data(&ptr, 0, 8); + if (data1) + /* this should fail */ + *data2 = 3; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +/* Can't pass in a dynptr as an arg to a helper function that doesn't take in a + * dynptr argument + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_helper1(void *ctx) +{ + struct bpf_dynptr ptr; + + get_map_val_dynptr(&ptr); + + /* this should fail */ + bpf_strncmp((const char *)&ptr, sizeof(ptr), "hello!"); + + return 0; +} + +/* A dynptr can't be passed into a helper function at a non-zero offset */ +SEC("?raw_tp/sys_nanosleep") +int invalid_helper2(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + + get_map_val_dynptr(&ptr); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0); + + return 0; +} + +/* A bpf_dynptr is invalidated if it's been written into */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write1(void *ctx) +{ + struct bpf_dynptr ptr; + void *data; + __u8 x = 0; + + get_map_val_dynptr(&ptr); + + memcpy(&ptr, &x, sizeof(x)); + + /* this should fail */ + data = bpf_dynptr_data(&ptr, 0, 1); + + return 0; +} + +/* + * A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed + * offset + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write2(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + __u8 x = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + memcpy((void *)&ptr + 8, &x, sizeof(x)); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* + * A bpf_dynptr can't be used as a dynptr if it has been written into at a + * non-const offset + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write3(void *ctx) +{ + struct bpf_dynptr ptr; + char stack_buf[16]; + unsigned long len; + __u8 x = 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr); + + memcpy(stack_buf, &val, sizeof(val)); + len = stack_buf[0] & 0xf; + + memcpy((void *)&ptr + len, &x, sizeof(x)); + + /* this should fail */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +static int invalid_write4_callback(__u32 index, void *data) +{ + *(__u32 *)data = 123; + + return 0; +} + +/* If the dynptr is written into in a callback function, it should + * be invalidated as a dynptr + */ +SEC("?raw_tp/sys_nanosleep") +int invalid_write4(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + bpf_loop(10, invalid_write4_callback, &ptr, 0); + + /* this should fail */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */ +struct bpf_dynptr global_dynptr; +SEC("?raw_tp/sys_nanosleep") +int global(void *ctx) +{ + /* this should fail */ + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &global_dynptr); + + bpf_ringbuf_discard_dynptr(&global_dynptr, 0); + + return 0; +} + +/* A direct read should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read1(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + /* this should fail */ + val = *(int *)&ptr; + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + return 0; +} + +/* A direct read at an offset should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read2(void *ctx) +{ + struct bpf_dynptr ptr; + char read_data[64]; + + get_map_val_dynptr(&ptr); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0); + + return 0; +} + +/* A direct read at an offset into the lower stack slot should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read3(void *ctx) +{ + struct bpf_dynptr ptr1, ptr2; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr1); + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr2); + + /* this should fail */ + memcpy(&val, (void *)&ptr1 + 8, sizeof(val)); + + bpf_ringbuf_discard_dynptr(&ptr1, 0); + bpf_ringbuf_discard_dynptr(&ptr2, 0); + + return 0; +} + +static int invalid_read4_callback(__u32 index, void *data) +{ + /* this should fail */ + val = *(__u32 *)data; + + return 0; +} + +/* A direct read within a callback function should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_read4(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr); + + bpf_loop(10, invalid_read4_callback, &ptr, 0); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + return 0; +} + +/* Initializing a dynptr on an offset should fail */ +SEC("?raw_tp/sys_nanosleep") +int invalid_offset(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr + 1); + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + return 0; +} + +/* Can't release a dynptr twice */ +SEC("?raw_tp/sys_nanosleep") +int release_twice(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr); + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + /* this second release should fail */ + bpf_ringbuf_discard_dynptr(&ptr, 0); + + return 0; +} + +static int release_twice_callback_fn(__u32 index, void *data) +{ + /* this should fail */ + bpf_ringbuf_discard_dynptr(data, 0); + + return 0; +} + +/* Test that releasing a dynptr twice, where one of the releases happens + * within a calback function, fails + */ +SEC("?raw_tp/sys_nanosleep") +int release_twice_callback(void *ctx) +{ + struct bpf_dynptr ptr; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 32, 0, &ptr); + + bpf_ringbuf_discard_dynptr(&ptr, 0); + + bpf_loop(10, release_twice_callback_fn, &ptr, 0); + + return 0; +} + +/* Reject unsupported local mem types for dynptr_from_mem API */ +SEC("?raw_tp/sys_nanosleep") +int dynptr_from_mem_invalid_api(void *ctx) +{ + struct bpf_dynptr ptr; + int x = 0; + + /* this should fail */ + bpf_dynptr_from_mem(&x, sizeof(x), 0, &ptr); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c new file mode 100644 index 000000000000..d67be48df4b2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Facebook */ + +#include <string.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "errno.h" + +char _license[] SEC("license") = "GPL"; + +int pid, err, val; + +struct sample { + int pid; + int seq; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array_map SEC(".maps"); + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_read_write(void *ctx) +{ + char write_data[64] = "hello there, world!!"; + char read_data[64] = {}, buf[64] = {}; + struct bpf_dynptr ptr; + int i; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr); + + /* Write data into the dynptr */ + err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data)); + + /* Read the data that was written into the dynptr */ + err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + + /* Ensure the data we read matches the data we wrote */ + for (i = 0; i < sizeof(read_data); i++) { + if (read_data[i] != write_data[i]) { + err = 1; + break; + } + } + + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_data_slice(void *ctx) +{ + __u32 key = 0, val = 235, *map_val; + struct bpf_dynptr ptr; + __u32 map_val_size; + void *data; + + map_val_size = sizeof(*map_val); + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + bpf_map_update_elem(&array_map, &key, &val, 0); + + map_val = bpf_map_lookup_elem(&array_map, &key); + if (!map_val) { + err = 1; + return 0; + } + + bpf_dynptr_from_mem(map_val, map_val_size, 0, &ptr); + + /* Try getting a data slice that is out of range */ + data = bpf_dynptr_data(&ptr, map_val_size + 1, 1); + if (data) { + err = 2; + return 0; + } + + /* Try getting more bytes than available */ + data = bpf_dynptr_data(&ptr, 0, map_val_size + 1); + if (data) { + err = 3; + return 0; + } + + data = bpf_dynptr_data(&ptr, 0, sizeof(__u32)); + if (!data) { + err = 4; + return 0; + } + + *(__u32 *)data = 999; + + err = bpf_probe_read_kernel(&val, sizeof(val), data); + if (err) + return 0; + + if (val != *(int *)data) + err = 5; + + return 0; +} + +static int ringbuf_callback(__u32 index, void *data) +{ + struct sample *sample; + + struct bpf_dynptr *ptr = (struct bpf_dynptr *)data; + + sample = bpf_dynptr_data(ptr, 0, sizeof(*sample)); + if (!sample) + err = 2; + else + sample->pid += index; + + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_ringbuf(void *ctx) +{ + struct bpf_dynptr ptr; + struct sample *sample; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + val = 100; + + /* check that you can reserve a dynamic size reservation */ + err = bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + sample = err ? NULL : bpf_dynptr_data(&ptr, 0, sizeof(*sample)); + if (!sample) { + err = 1; + goto done; + } + + sample->pid = 10; + + /* Can pass dynptr to callback functions */ + bpf_loop(10, ringbuf_callback, &ptr, 0); + + if (sample->pid != 55) + err = 2; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c index dd9b30a0f0fc..20d009e2d266 100644 --- a/tools/testing/selftests/bpf/progs/exhandler_kern.c +++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c @@ -7,8 +7,6 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) - char _license[] SEC("license") = "GPL"; unsigned int exception_triggered; diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c index 600be50800f8..93510f4f0f3a 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c @@ -98,3 +98,17 @@ int test_kretprobe(struct pt_regs *ctx) kprobe_multi_check(ctx, true); return 0; } + +SEC("kprobe.multi") +int test_kprobe_manual(struct pt_regs *ctx) +{ + kprobe_multi_check(ctx, false); + return 0; +} + +SEC("kretprobe.multi") +int test_kretprobe_manual(struct pt_regs *ctx) +{ + kprobe_multi_check(ctx, true); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c new file mode 100644 index 000000000000..e76e499aca39 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe.multi/") +int test_kprobe_empty(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c index 913791923fa3..1b13f37f85ec 100644 --- a/tools/testing/selftests/bpf/progs/loop5.c +++ b/tools/testing/selftests/bpf/progs/loop5.c @@ -2,7 +2,6 @@ // Copyright (c) 2019 Facebook #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#define barrier() __asm__ __volatile__("": : :"memory") char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index 1b0e0409eaa5..eb8217803493 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -141,7 +141,7 @@ SEC("tc") int test_map_kptr(struct __sk_buff *ctx) { struct map_value *v; - int i, key = 0; + int key = 0; #define TEST(map) \ v = bpf_map_lookup_elem(&map, &key); \ @@ -162,7 +162,7 @@ SEC("tc") int test_map_in_map_kptr(struct __sk_buff *ctx) { struct map_value *v; - int i, key = 0; + int key = 0; void *map; #define TEST(map_in_map) \ @@ -187,4 +187,106 @@ int test_map_in_map_kptr(struct __sk_buff *ctx) return 0; } +SEC("tc") +int test_map_kptr_ref(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p, *p_st; + unsigned long arg = 0; + struct map_value *v; + int key = 0, ret; + + p = bpf_kfunc_call_test_acquire(&arg); + if (!p) + return 1; + + p_st = p->next; + if (p_st->cnt.refs.counter != 2) { + ret = 2; + goto end; + } + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) { + ret = 3; + goto end; + } + + p = bpf_kptr_xchg(&v->ref_ptr, p); + if (p) { + ret = 4; + goto end; + } + if (p_st->cnt.refs.counter != 2) + return 5; + + p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); + if (!p) + return 6; + if (p_st->cnt.refs.counter != 3) { + ret = 7; + goto end; + } + bpf_kfunc_call_test_release(p); + if (p_st->cnt.refs.counter != 2) + return 8; + + p = bpf_kptr_xchg(&v->ref_ptr, NULL); + if (!p) + return 9; + bpf_kfunc_call_test_release(p); + if (p_st->cnt.refs.counter != 1) + return 10; + + p = bpf_kfunc_call_test_acquire(&arg); + if (!p) + return 11; + p = bpf_kptr_xchg(&v->ref_ptr, p); + if (p) { + ret = 12; + goto end; + } + if (p_st->cnt.refs.counter != 2) + return 13; + /* Leave in map */ + + return 0; +end: + bpf_kfunc_call_test_release(p); + return ret; +} + +SEC("tc") +int test_map_kptr_ref2(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p, *p_st; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 1; + + p_st = v->ref_ptr; + if (!p_st || p_st->cnt.refs.counter != 2) + return 2; + + p = bpf_kptr_xchg(&v->ref_ptr, NULL); + if (!p) + return 3; + if (p_st->cnt.refs.counter != 2) { + bpf_kfunc_call_test_release(p); + return 4; + } + + p = bpf_kptr_xchg(&v->ref_ptr, p); + if (p) { + bpf_kfunc_call_test_release(p); + return 5; + } + if (p_st->cnt.refs.counter != 2) + return 6; + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c new file mode 100644 index 000000000000..05e209b1b12a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +struct map_value { + char buf[8]; + struct prog_test_ref_kfunc __kptr *unref_ptr; + struct prog_test_ref_kfunc __kptr_ref *ref_ptr; + struct prog_test_member __kptr_ref *ref_memb_ptr; +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} array_map SEC(".maps"); + +extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; +extern struct prog_test_ref_kfunc * +bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; + +SEC("?tc") +int size_not_bpf_dw(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + *(u32 *)&v->unref_ptr = 0; + return 0; +} + +SEC("?tc") +int non_const_var_off(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0, id; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + id = ctx->protocol; + if (id < 4 || id > 12) + return 0; + *(u64 *)((void *)v + id) = 0; + + return 0; +} + +SEC("?tc") +int non_const_var_off_kptr_xchg(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0, id; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + id = ctx->protocol; + if (id < 4 || id > 12) + return 0; + bpf_kptr_xchg((void *)v + id, NULL); + + return 0; +} + +SEC("?tc") +int misaligned_access_write(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + *(void **)((void *)v + 7) = NULL; + + return 0; +} + +SEC("?tc") +int misaligned_access_read(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + return *(u64 *)((void *)v + 1); +} + +SEC("?tc") +int reject_var_off_store(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *unref_ptr; + struct map_value *v; + int key = 0, id; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + unref_ptr = v->unref_ptr; + if (!unref_ptr) + return 0; + id = ctx->protocol; + if (id < 4 || id > 12) + return 0; + unref_ptr += id; + v->unref_ptr = unref_ptr; + + return 0; +} + +SEC("?tc") +int reject_bad_type_match(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *unref_ptr; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + unref_ptr = v->unref_ptr; + if (!unref_ptr) + return 0; + unref_ptr = (void *)unref_ptr + 4; + v->unref_ptr = unref_ptr; + + return 0; +} + +SEC("?tc") +int marked_as_untrusted_or_null(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_this_cpu_ptr(v->unref_ptr); + return 0; +} + +SEC("?tc") +int correct_btf_id_check_size(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + p = v->unref_ptr; + if (!p) + return 0; + return *(int *)((void *)p + bpf_core_type_size(struct prog_test_ref_kfunc)); +} + +SEC("?tc") +int inherit_untrusted_on_walk(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *unref_ptr; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + unref_ptr = v->unref_ptr; + if (!unref_ptr) + return 0; + unref_ptr = unref_ptr->next; + bpf_this_cpu_ptr(unref_ptr); + return 0; +} + +SEC("?tc") +int reject_kptr_xchg_on_unref(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_kptr_xchg(&v->unref_ptr, NULL); + return 0; +} + +SEC("?tc") +int reject_kptr_get_no_map_val(struct __sk_buff *ctx) +{ + bpf_kfunc_call_test_kptr_get((void *)&ctx, 0, 0); + return 0; +} + +SEC("?tc") +int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx) +{ + bpf_kfunc_call_test_kptr_get(bpf_map_lookup_elem(&array_map, &(int){0}), 0, 0); + return 0; +} + +SEC("?tc") +int reject_kptr_get_no_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_kfunc_call_test_kptr_get((void *)v, 0, 0); + return 0; +} + +SEC("?tc") +int reject_kptr_get_on_unref(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_kfunc_call_test_kptr_get(&v->unref_ptr, 0, 0); + return 0; +} + +SEC("?tc") +int reject_kptr_get_bad_type_match(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_kfunc_call_test_kptr_get((void *)&v->ref_memb_ptr, 0, 0); + return 0; +} + +SEC("?tc") +int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_this_cpu_ptr(v->ref_ptr); + return 0; +} + +SEC("?tc") +int reject_untrusted_store_to_ref(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + p = v->ref_ptr; + if (!p) + return 0; + /* Checkmate, clang */ + *(struct prog_test_ref_kfunc * volatile *)&v->ref_ptr = p; + return 0; +} + +SEC("?tc") +int reject_untrusted_xchg(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + p = v->ref_ptr; + if (!p) + return 0; + bpf_kptr_xchg(&v->ref_ptr, p); + return 0; +} + +SEC("?tc") +int reject_bad_type_xchg(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *ref_ptr; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!ref_ptr) + return 0; + bpf_kptr_xchg(&v->ref_memb_ptr, ref_ptr); + return 0; +} + +SEC("?tc") +int reject_member_of_ref_xchg(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *ref_ptr; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!ref_ptr) + return 0; + bpf_kptr_xchg(&v->ref_memb_ptr, &ref_ptr->memb); + return 0; +} + +SEC("?syscall") +int reject_indirect_helper_access(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_get_current_comm(v, sizeof(v->buf) + 1); + return 0; +} + +__noinline +int write_func(int *p) +{ + return p ? *p = 42 : 0; +} + +SEC("?tc") +int reject_indirect_global_func_access(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + return write_func((void *)v + 5); +} + +SEC("?tc") +int kptr_xchg_ref_state(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + p = bpf_kfunc_call_test_acquire(&(unsigned long){0}); + if (!p) + return 0; + bpf_kptr_xchg(&v->ref_ptr, p); + return 0; +} + +SEC("?tc") +int kptr_get_ref_state(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&array_map, &key); + if (!v) + return 0; + + bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c new file mode 100644 index 000000000000..91a0d7eff2ac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Tessares SA. */ +/* Copyright (c) 2022, SUSE. */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; +__u32 token = 0; + +struct mptcp_storage { + __u32 invoked; + __u32 is_mptcp; + struct sock *sk; + __u32 token; + struct sock *first; + char ca_name[TCP_CA_NAME_MAX]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct mptcp_storage); +} socket_storage_map SEC(".maps"); + +SEC("sockops") +int _sockops(struct bpf_sock_ops *ctx) +{ + struct mptcp_storage *storage; + struct mptcp_sock *msk; + int op = (int)ctx->op; + struct tcp_sock *tsk; + struct bpf_sock *sk; + bool is_mptcp; + + if (op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + sk = ctx->sk; + if (!sk) + return 1; + + tsk = bpf_skc_to_tcp_sock(sk); + if (!tsk) + return 1; + + is_mptcp = bpf_core_field_exists(tsk->is_mptcp) ? tsk->is_mptcp : 0; + if (!is_mptcp) { + storage = bpf_sk_storage_get(&socket_storage_map, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 1; + + storage->token = 0; + __builtin_memset(storage->ca_name, 0, TCP_CA_NAME_MAX); + storage->first = NULL; + } else { + msk = bpf_skc_to_mptcp_sock(sk); + if (!msk) + return 1; + + storage = bpf_sk_storage_get(&socket_storage_map, msk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 1; + + storage->token = msk->token; + __builtin_memcpy(storage->ca_name, msk->ca_name, TCP_CA_NAME_MAX); + storage->first = msk->first; + } + storage->invoked++; + storage->is_mptcp = is_mptcp; + storage->sk = (struct sock *)sk; + + return 1; +} + +SEC("fentry/mptcp_pm_new_connection") +int BPF_PROG(trace_mptcp_pm_new_connection, struct mptcp_sock *msk, + const struct sock *ssk, int server_side) +{ + if (!server_side) + token = msk->token; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c index 4df9088bfc00..fb6b13522949 100644 --- a/tools/testing/selftests/bpf/progs/profiler1.c +++ b/tools/testing/selftests/bpf/progs/profiler1.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) #define UNROLL #define INLINE __always_inline #include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 5d3dc4d66d47..6c7b1fb268d6 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -171,8 +171,6 @@ struct process_frame_ctx { bool done; }; -#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) - static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) { int zero = 0; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index af994d16bb10..ce9acf4db8d2 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -5,9 +5,12 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_misc.h" int kprobe_res = 0; +int kprobe2_res = 0; int kretprobe_res = 0; +int kretprobe2_res = 0; int uprobe_res = 0; int uretprobe_res = 0; int uprobe_byname_res = 0; @@ -15,20 +18,34 @@ int uretprobe_byname_res = 0; int uprobe_byname2_res = 0; int uretprobe_byname2_res = 0; -SEC("kprobe/sys_nanosleep") +SEC("kprobe") int handle_kprobe(struct pt_regs *ctx) { kprobe_res = 1; return 0; } -SEC("kretprobe/sys_nanosleep") -int BPF_KRETPROBE(handle_kretprobe) +SEC("kprobe/" SYS_PREFIX "sys_nanosleep") +int BPF_KPROBE(handle_kprobe_auto) +{ + kprobe2_res = 11; + return 0; +} + +SEC("kretprobe") +int handle_kretprobe(struct pt_regs *ctx) { kretprobe_res = 2; return 0; } +SEC("kretprobe/" SYS_PREFIX "sys_nanosleep") +int BPF_KRETPROBE(handle_kretprobe_auto) +{ + kretprobe2_res = 22; + return 0; +} + SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c index 0e2222968918..22d0ac8709b4 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c @@ -4,18 +4,23 @@ #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <errno.h> int my_tid; -int kprobe_res; -int kprobe_multi_res; -int kretprobe_res; -int uprobe_res; -int uretprobe_res; -int tp_res; -int pe_res; +__u64 kprobe_res; +__u64 kprobe_multi_res; +__u64 kretprobe_res; +__u64 uprobe_res; +__u64 uretprobe_res; +__u64 tp_res; +__u64 pe_res; +__u64 fentry_res; +__u64 fexit_res; +__u64 fmod_ret_res; +__u64 lsm_res; -static void update(void *ctx, int *res) +static void update(void *ctx, __u64 *res) { if (my_tid != (u32)bpf_get_current_pid_tgid()) return; @@ -82,4 +87,35 @@ int handle_pe(struct pt_regs *ctx) return 0; } +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(fentry_test1, int a) +{ + update(ctx, &fentry_res); + return 0; +} + +SEC("fexit/bpf_fentry_test1") +int BPF_PROG(fexit_test1, int a, int ret) +{ + update(ctx, &fexit_res); + return 0; +} + +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret) +{ + update(ctx, &fmod_ret_res); + return 1234; +} + +SEC("lsm/file_mprotect") +int BPF_PROG(test_int_hook, struct vm_area_struct *vma, + unsigned long reqprot, unsigned long prot, int ret) +{ + if (my_tid != (u32)bpf_get_current_pid_tgid()) + return ret; + update(ctx, &lsm_res); + return -EPERM; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c index 7e45e2bdf6cd..5b8a75097ea3 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c @@ -45,35 +45,34 @@ int test_core_existence(void *ctx) struct core_reloc_existence_output *out = (void *)&data.out; out->a_exists = bpf_core_field_exists(in->a); - if (bpf_core_field_exists(in->a)) + if (bpf_core_field_exists(struct core_reloc_existence, a)) out->a_value = BPF_CORE_READ(in, a); else out->a_value = 0xff000001u; out->b_exists = bpf_core_field_exists(in->b); - if (bpf_core_field_exists(in->b)) + if (bpf_core_field_exists(struct core_reloc_existence, b)) out->b_value = BPF_CORE_READ(in, b); else out->b_value = 0xff000002u; out->c_exists = bpf_core_field_exists(in->c); - if (bpf_core_field_exists(in->c)) + if (bpf_core_field_exists(struct core_reloc_existence, c)) out->c_value = BPF_CORE_READ(in, c); else out->c_value = 0xff000003u; out->arr_exists = bpf_core_field_exists(in->arr); - if (bpf_core_field_exists(in->arr)) + if (bpf_core_field_exists(struct core_reloc_existence, arr)) out->arr_value = BPF_CORE_READ(in, arr[0]); else out->arr_value = 0xff000004u; out->s_exists = bpf_core_field_exists(in->s); - if (bpf_core_field_exists(in->s)) + if (bpf_core_field_exists(struct core_reloc_existence, s)) out->s_value = BPF_CORE_READ(in, s.x); else out->s_value = 0xff000005u; return 0; } - diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c index 7b2d576aeea1..5b686053ce42 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c @@ -15,13 +15,21 @@ struct { struct core_reloc_size_output { int int_sz; + int int_off; int struct_sz; + int struct_off; int union_sz; + int union_off; int arr_sz; + int arr_off; int arr_elem_sz; + int arr_elem_off; int ptr_sz; + int ptr_off; int enum_sz; + int enum_off; int float_sz; + int float_off; }; struct core_reloc_size { @@ -41,13 +49,28 @@ int test_core_size(void *ctx) struct core_reloc_size_output *out = (void *)&data.out; out->int_sz = bpf_core_field_size(in->int_field); + out->int_off = bpf_core_field_offset(in->int_field); + out->struct_sz = bpf_core_field_size(in->struct_field); + out->struct_off = bpf_core_field_offset(in->struct_field); + out->union_sz = bpf_core_field_size(in->union_field); + out->union_off = bpf_core_field_offset(in->union_field); + out->arr_sz = bpf_core_field_size(in->arr_field); - out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]); - out->ptr_sz = bpf_core_field_size(in->ptr_field); - out->enum_sz = bpf_core_field_size(in->enum_field); - out->float_sz = bpf_core_field_size(in->float_field); + out->arr_off = bpf_core_field_offset(in->arr_field); + + out->arr_elem_sz = bpf_core_field_size(struct core_reloc_size, arr_field[1]); + out->arr_elem_off = bpf_core_field_offset(struct core_reloc_size, arr_field[1]); + + out->ptr_sz = bpf_core_field_size(struct core_reloc_size, ptr_field); + out->ptr_off = bpf_core_field_offset(struct core_reloc_size, ptr_field); + + out->enum_sz = bpf_core_field_size(struct core_reloc_size, enum_field); + out->enum_off = bpf_core_field_offset(struct core_reloc_size, enum_field); + + out->float_sz = bpf_core_field_size(struct core_reloc_size, float_field); + out->float_off = bpf_core_field_offset(struct core_reloc_size, float_field); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_log_fixup.c b/tools/testing/selftests/bpf/progs/test_log_fixup.c index a78980d897b3..60450cb0e72e 100644 --- a/tools/testing/selftests/bpf/progs/test_log_fixup.c +++ b/tools/testing/selftests/bpf/progs/test_log_fixup.c @@ -35,4 +35,30 @@ int bad_relo_subprog(const void *ctx) return bad_subprog() + bpf_core_field_size(t->pid); } +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} existing_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} missing_map SEC(".maps"); + +SEC("?raw_tp/sys_enter") +int use_missing_map(const void *ctx) +{ + int zero = 0, *value; + + value = bpf_map_lookup_elem(&existing_map, &zero); + + value = bpf_map_lookup_elem(&missing_map, &zero); + + return value != NULL; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c new file mode 100644 index 000000000000..ca827b1092da --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +__u64 percpu_array_elem_sum = 0; +__u64 percpu_hash_elem_sum = 0; +__u64 percpu_lru_hash_elem_sum = 0; +const volatile int nr_cpus; +const volatile int my_pid; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} percpu_array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u64); + __type(value, __u64); +} percpu_hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u64); + __type(value, __u64); +} percpu_lru_hash_map SEC(".maps"); + +struct read_percpu_elem_ctx { + void *map; + __u64 sum; +}; + +static int read_percpu_elem_callback(__u32 index, struct read_percpu_elem_ctx *ctx) +{ + __u64 key = 0; + __u64 *value; + + value = bpf_map_lookup_percpu_elem(ctx->map, &key, index); + if (value) + ctx->sum += *value; + return 0; +} + +SEC("tp/syscalls/sys_enter_getuid") +int sysenter_getuid(const void *ctx) +{ + struct read_percpu_elem_ctx map_ctx; + + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + map_ctx.map = &percpu_array_map; + map_ctx.sum = 0; + bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0); + percpu_array_elem_sum = map_ctx.sum; + + map_ctx.map = &percpu_hash_map; + map_ctx.sum = 0; + bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0); + percpu_hash_elem_sum = map_ctx.sum; + + map_ctx.map = &percpu_lru_hash_map; + map_ctx.sum = 0; + bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0); + percpu_lru_hash_elem_sum = map_ctx.sum; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index 50ce16d02da7..08628afedb77 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -64,7 +64,7 @@ int BPF_PROG(handle_fentry, __u32 fentry_manual_read_sz = 0; -SEC("fentry/placeholder") +SEC("fentry") int BPF_PROG(handle_fentry_manual, struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 0558544e1ff0..5cd7c096f62d 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -14,8 +14,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#define barrier() __asm__ __volatile__("": : :"memory") - /* llvm will optimize both subprograms into exactly the same BPF assembly * * Disassembly of section .text: diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c index 197b86546dca..e416e0ce12b7 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c @@ -15,6 +15,8 @@ struct sample { struct ringbuf_map { __uint(type, BPF_MAP_TYPE_RINGBUF); + /* libbpf will adjust to valid page size */ + __uint(max_entries, 1000); } ringbuf1 SEC(".maps"), ringbuf2 SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c index b7c37ca09544..f8e9256cf18d 100644 --- a/tools/testing/selftests/bpf/progs/test_subprogs.c +++ b/tools/testing/selftests/bpf/progs/test_subprogs.c @@ -89,6 +89,11 @@ int prog2(void *ctx) return 0; } +static int empty_callback(__u32 index, void *data) +{ + return 0; +} + /* prog3 has the same section name as prog1 */ SEC("raw_tp/sys_enter") int prog3(void *ctx) @@ -98,6 +103,9 @@ int prog3(void *ctx) if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) return 1; + /* test that ld_imm64 with BPF_PSEUDO_FUNC doesn't get blinded */ + bpf_loop(1, empty_callback, NULL, 0); + res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */ return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c index f030e469d05b..7765720da7d5 100644 --- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c +++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c @@ -1,20 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 -#include <stdbool.h> -#include <stddef.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -struct task_struct; +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(fentry_test, int a, int *b) +{ + return 0; +} -SEC("fentry/__set_task_comm") -int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec) +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(fmod_ret_test, int a, int *b, int ret) { return 0; } -SEC("fexit/__set_task_comm") -int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec) +SEC("fexit/bpf_modify_return_test") +int BPF_PROG(fexit_test, int a, int *b, int ret) { return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index ef0dde83b85a..17f2f325b3f3 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -21,10 +21,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#define ERROR(ret) do {\ - char fmt[] = "ERROR line:%d ret:%d\n";\ - bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ - } while (0) +#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) struct geneve_opt { __be16 opt_class; @@ -40,8 +37,15 @@ struct vxlan_metadata { __u32 gbp; }; -SEC("gre_set_tunnel") -int _gre_set_tunnel(struct __sk_buff *skb) +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} local_ip_map SEC(".maps"); + +SEC("tc") +int gre_set_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; @@ -55,32 +59,31 @@ int _gre_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("gre_get_tunnel") -int _gre_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int gre_get_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; - char fmt[] = "key %d remote ip 0x%x\n"; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4); + bpf_printk("key %d remote ip 0x%x\n", key.tunnel_id, key.remote_ipv4); return TC_ACT_OK; } -SEC("ip6gretap_set_tunnel") -int _ip6gretap_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6gretap_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; int ret; @@ -96,35 +99,34 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb) BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ip6gretap_get_tunnel") -int _ip6gretap_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6gretap_get_tunnel(struct __sk_buff *skb) { - char fmt[] = "key %d remote ip6 ::%x label %x\n"; struct bpf_tunnel_key key; int ret; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); + bpf_printk("key %d remote ip6 ::%x label %x\n", + key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); return TC_ACT_OK; } -SEC("erspan_set_tunnel") -int _erspan_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int erspan_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; struct erspan_metadata md; @@ -139,7 +141,7 @@ int _erspan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } @@ -159,17 +161,16 @@ int _erspan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("erspan_get_tunnel") -int _erspan_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int erspan_get_tunnel(struct __sk_buff *skb) { - char fmt[] = "key %d remote ip 0x%x erspan version %d\n"; struct bpf_tunnel_key key; struct erspan_metadata md; __u32 index; @@ -177,38 +178,34 @@ int _erspan_get_tunnel(struct __sk_buff *skb) ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, md.version); + bpf_printk("key %d remote ip 0x%x erspan version %d\n", + key.tunnel_id, key.remote_ipv4, md.version); #ifdef ERSPAN_V1 - char fmt2[] = "\tindex %x\n"; - index = bpf_ntohl(md.u.index); - bpf_trace_printk(fmt2, sizeof(fmt2), index); + bpf_printk("\tindex %x\n", index); #else - char fmt2[] = "\tdirection %d hwid %x timestamp %u\n"; - - bpf_trace_printk(fmt2, sizeof(fmt2), - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, - bpf_ntohl(md.u.md2.timestamp)); + bpf_printk("\tdirection %d hwid %x timestamp %u\n", + md.u.md2.dir, + (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + bpf_ntohl(md.u.md2.timestamp)); #endif return TC_ACT_OK; } -SEC("ip4ip6erspan_set_tunnel") -int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip4ip6erspan_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; struct erspan_metadata md; @@ -223,7 +220,7 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } @@ -244,17 +241,16 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ip4ip6erspan_get_tunnel") -int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) { - char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n"; struct bpf_tunnel_key key; struct erspan_metadata md; __u32 index; @@ -263,44 +259,88 @@ int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb) ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, md.version); + bpf_printk("ip6erspan get key %d remote ip6 ::%x erspan version %d\n", + key.tunnel_id, key.remote_ipv4, md.version); #ifdef ERSPAN_V1 - char fmt2[] = "\tindex %x\n"; - index = bpf_ntohl(md.u.index); - bpf_trace_printk(fmt2, sizeof(fmt2), index); + bpf_printk("\tindex %x\n", index); #else - char fmt2[] = "\tdirection %d hwid %x timestamp %u\n"; - - bpf_trace_printk(fmt2, sizeof(fmt2), - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, - bpf_ntohl(md.u.md2.timestamp)); + bpf_printk("\tdirection %d hwid %x timestamp %u\n", + md.u.md2.dir, + (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + bpf_ntohl(md.u.md2.timestamp)); #endif return TC_ACT_OK; } -SEC("vxlan_set_tunnel") -int _vxlan_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int vxlan_set_tunnel_dst(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; + __u32 index = 0; + __u32 *local_ip = NULL; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&key, 0x0, sizeof(key)); + key.local_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.remote_ipv4 = *local_ip; + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */ + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("tc") +int vxlan_set_tunnel_src(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct vxlan_metadata md; + __u32 index = 0; + __u32 *local_ip = NULL; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } __builtin_memset(&key, 0x0, sizeof(key)); + key.local_ipv4 = *local_ip; key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ key.tunnel_id = 2; key.tunnel_tos = 0; @@ -309,53 +349,106 @@ int _vxlan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("vxlan_get_tunnel") -int _vxlan_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int vxlan_get_tunnel_src(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; - char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n"; + __u32 index = 0; + __u32 *local_ip = NULL; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, md.gbp); + if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) { + bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n", + key.tunnel_id, key.local_ipv4, + key.remote_ipv4, md.gbp); + bpf_printk("local_ip 0x%x\n", *local_ip); + log_err(ret); + return TC_ACT_SHOT; + } return TC_ACT_OK; } -SEC("ip6vxlan_set_tunnel") -int _ip6vxlan_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) { struct bpf_tunnel_key key; int ret; + __u32 index = 0; + __u32 *local_ip; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } __builtin_memset(&key, 0x0, sizeof(key)); + key.local_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.remote_ipv6[3] = bpf_htonl(*local_ip); + key.tunnel_id = 22; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("tc") +int ip6vxlan_set_tunnel_src(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + int ret; + __u32 index = 0; + __u32 *local_ip; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&key, 0x0, sizeof(key)); + key.local_ipv6[3] = bpf_htonl(*local_ip); key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ key.tunnel_id = 22; key.tunnel_tos = 0; @@ -364,35 +457,48 @@ int _ip6vxlan_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ip6vxlan_get_tunnel") -int _ip6vxlan_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6vxlan_get_tunnel_src(struct __sk_buff *skb) { - char fmt[] = "key %d remote ip6 ::%x label %x\n"; struct bpf_tunnel_key key; int ret; + __u32 index = 0; + __u32 *local_ip; + + local_ip = bpf_map_lookup_elem(&local_ip_map, &index); + if (!local_ip) { + log_err(ret); + return TC_ACT_SHOT; + } ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); + if (bpf_ntohl(key.local_ipv6[3]) != *local_ip) { + bpf_printk("ip6vxlan key %d local ip6 ::%x remote ip6 ::%x label 0x%x\n", + key.tunnel_id, bpf_ntohl(key.local_ipv6[3]), + bpf_ntohl(key.remote_ipv6[3]), key.tunnel_label); + bpf_printk("local_ip 0x%x\n", *local_ip); + log_err(ret); + return TC_ACT_SHOT; + } return TC_ACT_OK; } -SEC("geneve_set_tunnel") -int _geneve_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int geneve_set_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; @@ -416,30 +522,29 @@ int _geneve_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("geneve_get_tunnel") -int _geneve_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int geneve_get_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; struct geneve_opt gopt; - char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } @@ -447,13 +552,13 @@ int _geneve_get_tunnel(struct __sk_buff *skb) if (ret < 0) gopt.opt_class = 0; - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, gopt.opt_class); + bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n", + key.tunnel_id, key.remote_ipv4, gopt.opt_class); return TC_ACT_OK; } -SEC("ip6geneve_set_tunnel") -int _ip6geneve_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6geneve_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; struct geneve_opt gopt; @@ -468,7 +573,7 @@ int _ip6geneve_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } @@ -483,17 +588,16 @@ int _ip6geneve_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ip6geneve_get_tunnel") -int _ip6geneve_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6geneve_get_tunnel(struct __sk_buff *skb) { - char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; struct bpf_tunnel_key key; struct geneve_opt gopt; int ret; @@ -501,7 +605,7 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb) ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } @@ -509,14 +613,14 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb) if (ret < 0) gopt.opt_class = 0; - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, gopt.opt_class); + bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n", + key.tunnel_id, key.remote_ipv4, gopt.opt_class); return TC_ACT_OK; } -SEC("ipip_set_tunnel") -int _ipip_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ipip_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; @@ -526,7 +630,7 @@ int _ipip_set_tunnel(struct __sk_buff *skb) /* single length check */ if (data + sizeof(*iph) > data_end) { - ERROR(1); + log_err(1); return TC_ACT_SHOT; } @@ -537,32 +641,31 @@ int _ipip_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ipip_get_tunnel") -int _ipip_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ipip_get_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; - char fmt[] = "remote ip 0x%x\n"; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4); + bpf_printk("remote ip 0x%x\n", key.remote_ipv4); return TC_ACT_OK; } -SEC("ipip6_set_tunnel") -int _ipip6_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ipip6_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; @@ -572,7 +675,7 @@ int _ipip6_set_tunnel(struct __sk_buff *skb) /* single length check */ if (data + sizeof(*iph) > data_end) { - ERROR(1); + log_err(1); return TC_ACT_SHOT; } @@ -585,34 +688,33 @@ int _ipip6_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ipip6_get_tunnel") -int _ipip6_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ipip6_get_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; - char fmt[] = "remote ip6 %x::%x\n"; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]), - bpf_htonl(key.remote_ipv6[3])); + bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]), + bpf_htonl(key.remote_ipv6[3])); return TC_ACT_OK; } -SEC("ip6ip6_set_tunnel") -int _ip6ip6_set_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6ip6_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key = {}; void *data = (void *)(long)skb->data; @@ -622,7 +724,7 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb) /* single length check */ if (data + sizeof(*iph) > data_end) { - ERROR(1); + log_err(1); return TC_ACT_SHOT; } @@ -634,45 +736,44 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb) ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } return TC_ACT_OK; } -SEC("ip6ip6_get_tunnel") -int _ip6ip6_get_tunnel(struct __sk_buff *skb) +SEC("tc") +int ip6ip6_get_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; - char fmt[] = "remote ip6 %x::%x\n"; ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); if (ret < 0) { - ERROR(ret); + log_err(ret); return TC_ACT_SHOT; } - bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]), - bpf_htonl(key.remote_ipv6[3])); + bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]), + bpf_htonl(key.remote_ipv6[3])); return TC_ACT_OK; } -SEC("xfrm_get_state") -int _xfrm_get_state(struct __sk_buff *skb) +SEC("tc") +int xfrm_get_state(struct __sk_buff *skb) { struct bpf_xfrm_state x; - char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n"; int ret; ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0); if (ret < 0) return TC_ACT_OK; - bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi), - bpf_ntohl(x.remote_ipv4)); + bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n", + x.reqid, bpf_ntohl(x.spi), + bpf_ntohl(x.remote_ipv4)); return TC_ACT_OK; } diff --git a/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c new file mode 100644 index 000000000000..fc423e43a3cd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +__u32 perfbuf_val = 0; +__u32 ringbuf_val = 0; + +int test_pid; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} percpu_array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} percpu_hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, __u32); + __type(value, __u32); +} perfbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} ringbuf SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} prog_array SEC(".maps"); + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int sys_nanosleep_enter(void *ctx) +{ + int cur_pid; + + cur_pid = bpf_get_current_pid_tgid() >> 32; + + if (cur_pid != test_pid) + return 0; + + bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU, &perfbuf_val, sizeof(perfbuf_val)); + bpf_ringbuf_output(&ringbuf, &ringbuf_val, sizeof(ringbuf_val), 0); + + return 0; +} + +SEC("perf_event") +int handle_perf_event(void *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c536d1d29d57..c639f2e56fc5 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -18,6 +18,93 @@ #include <sys/socket.h> #include <sys/un.h> +static bool verbose(void) +{ + return env.verbosity > VERBOSE_NONE; +} + +static void stdio_hijack_init(char **log_buf, size_t *log_cnt) +{ +#ifdef __GLIBC__ + if (verbose() && env.worker_id == -1) { + /* nothing to do, output to stdout by default */ + return; + } + + fflush(stdout); + fflush(stderr); + + stdout = open_memstream(log_buf, log_cnt); + if (!stdout) { + stdout = env.stdout; + perror("open_memstream"); + return; + } + + if (env.subtest_state) + env.subtest_state->stdout = stdout; + else + env.test_state->stdout = stdout; + + stderr = stdout; +#endif +} + +static void stdio_hijack(char **log_buf, size_t *log_cnt) +{ +#ifdef __GLIBC__ + if (verbose() && env.worker_id == -1) { + /* nothing to do, output to stdout by default */ + return; + } + + env.stdout = stdout; + env.stderr = stderr; + + stdio_hijack_init(log_buf, log_cnt); +#endif +} + +static void stdio_restore_cleanup(void) +{ +#ifdef __GLIBC__ + if (verbose() && env.worker_id == -1) { + /* nothing to do, output to stdout by default */ + return; + } + + fflush(stdout); + + if (env.subtest_state) { + fclose(env.subtest_state->stdout); + env.subtest_state->stdout = NULL; + stdout = env.test_state->stdout; + stderr = env.test_state->stdout; + } else { + fclose(env.test_state->stdout); + env.test_state->stdout = NULL; + } +#endif +} + +static void stdio_restore(void) +{ +#ifdef __GLIBC__ + if (verbose() && env.worker_id == -1) { + /* nothing to do, output to stdout by default */ + return; + } + + if (stdout == env.stdout) + return; + + stdio_restore_cleanup(); + + stdout = env.stdout; + stderr = env.stderr; +#endif +} + /* Adapted from perf/util/string.c */ static bool glob_match(const char *str, const char *pat) { @@ -130,30 +217,98 @@ static bool should_run_subtest(struct test_selector *sel, return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num]; } +static char *test_result(bool failed, bool skipped) +{ + return failed ? "FAIL" : (skipped ? "SKIP" : "OK"); +} + +static void print_test_log(char *log_buf, size_t log_cnt) +{ + log_buf[log_cnt] = '\0'; + fprintf(env.stdout, "%s", log_buf); + if (log_buf[log_cnt - 1] != '\n') + fprintf(env.stdout, "\n"); +} + +#define TEST_NUM_WIDTH 7 + +static void print_test_name(int test_num, const char *test_name, char *result) +{ + fprintf(env.stdout, "#%-*d %s", TEST_NUM_WIDTH, test_num, test_name); + + if (result) + fprintf(env.stdout, ":%s", result); + + fprintf(env.stdout, "\n"); +} + +static void print_subtest_name(int test_num, int subtest_num, + const char *test_name, char *subtest_name, + char *result) +{ + char test_num_str[TEST_NUM_WIDTH + 1]; + + snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num); + + fprintf(env.stdout, "#%-*s %s/%s", + TEST_NUM_WIDTH, test_num_str, + test_name, subtest_name); + + if (result) + fprintf(env.stdout, ":%s", result); + + fprintf(env.stdout, "\n"); +} + static void dump_test_log(const struct prog_test_def *test, const struct test_state *test_state, - bool force_failed) + bool skip_ok_subtests, + bool par_exec_result) { - bool failed = test_state->error_cnt > 0 || force_failed; + bool test_failed = test_state->error_cnt > 0; + bool force_log = test_state->force_log; + bool print_test = verbose() || force_log || test_failed; + int i; + struct subtest_state *subtest_state; + bool subtest_failed; + bool subtest_filtered; + bool print_subtest; - /* worker always holds log */ + /* we do not print anything in the worker thread */ if (env.worker_id != -1) return; - fflush(stdout); /* exports test_state->log_buf & test_state->log_cnt */ + /* there is nothing to print when verbose log is used and execution + * is not in parallel mode + */ + if (verbose() && !par_exec_result) + return; + + if (test_state->log_cnt && print_test) + print_test_log(test_state->log_buf, test_state->log_cnt); - fprintf(env.stdout, "#%-3d %s:%s\n", - test->test_num, test->test_name, - failed ? "FAIL" : (test_state->skip_cnt ? "SKIP" : "OK")); + for (i = 0; i < test_state->subtest_num; i++) { + subtest_state = &test_state->subtest_states[i]; + subtest_failed = subtest_state->error_cnt; + subtest_filtered = subtest_state->filtered; + print_subtest = verbose() || force_log || subtest_failed; - if (env.verbosity > VERBOSE_NONE || test_state->force_log || failed) { - if (test_state->log_cnt) { - test_state->log_buf[test_state->log_cnt] = '\0'; - fprintf(env.stdout, "%s", test_state->log_buf); - if (test_state->log_buf[test_state->log_cnt - 1] != '\n') - fprintf(env.stdout, "\n"); + if ((skip_ok_subtests && !subtest_failed) || subtest_filtered) + continue; + + if (subtest_state->log_cnt && print_subtest) { + print_test_log(subtest_state->log_buf, + subtest_state->log_cnt); } + + print_subtest_name(test->test_num, i + 1, + test->test_name, subtest_state->name, + test_result(subtest_state->error_cnt, + subtest_state->skipped)); } + + print_test_name(test->test_num, test->test_name, + test_result(test_failed, test_state->skip_cnt)); } static void stdio_restore(void); @@ -205,35 +360,50 @@ static void restore_netns(void) void test__end_subtest(void) { struct prog_test_def *test = env.test; - struct test_state *state = env.test_state; - int sub_error_cnt = state->error_cnt - state->old_error_cnt; - - fprintf(stdout, "#%d/%d %s/%s:%s\n", - test->test_num, state->subtest_num, test->test_name, state->subtest_name, - sub_error_cnt ? "FAIL" : (state->subtest_skip_cnt ? "SKIP" : "OK")); - - if (sub_error_cnt == 0) { - if (state->subtest_skip_cnt == 0) { - state->sub_succ_cnt++; - } else { - state->subtest_skip_cnt = 0; - state->skip_cnt++; - } + struct test_state *test_state = env.test_state; + struct subtest_state *subtest_state = env.subtest_state; + + if (subtest_state->error_cnt) { + test_state->error_cnt++; + } else { + if (!subtest_state->skipped) + test_state->sub_succ_cnt++; + else + test_state->skip_cnt++; } - free(state->subtest_name); - state->subtest_name = NULL; + if (verbose() && !env.workers) + print_subtest_name(test->test_num, test_state->subtest_num, + test->test_name, subtest_state->name, + test_result(subtest_state->error_cnt, + subtest_state->skipped)); + + stdio_restore_cleanup(); + env.subtest_state = NULL; } bool test__start_subtest(const char *subtest_name) { struct prog_test_def *test = env.test; struct test_state *state = env.test_state; + struct subtest_state *subtest_state; + size_t sub_state_size = sizeof(*subtest_state); - if (state->subtest_name) + if (env.subtest_state) test__end_subtest(); state->subtest_num++; + state->subtest_states = + realloc(state->subtest_states, + state->subtest_num * sub_state_size); + if (!state->subtest_states) { + fprintf(stderr, "Not enough memory to allocate subtest result\n"); + return false; + } + + subtest_state = &state->subtest_states[state->subtest_num - 1]; + + memset(subtest_state, 0, sub_state_size); if (!subtest_name || !subtest_name[0]) { fprintf(env.stderr, @@ -242,21 +412,25 @@ bool test__start_subtest(const char *subtest_name) return false; } + subtest_state->name = strdup(subtest_name); + if (!subtest_state->name) { + fprintf(env.stderr, + "Subtest #%d: failed to copy subtest name!\n", + state->subtest_num); + return false; + } + if (!should_run_subtest(&env.test_selector, &env.subtest_selector, state->subtest_num, test->test_name, - subtest_name)) - return false; - - state->subtest_name = strdup(subtest_name); - if (!state->subtest_name) { - fprintf(env.stderr, - "Subtest #%d: failed to copy subtest name!\n", - state->subtest_num); + subtest_name)) { + subtest_state->filtered = true; return false; } - state->old_error_cnt = state->error_cnt; + + env.subtest_state = subtest_state; + stdio_hijack_init(&subtest_state->log_buf, &subtest_state->log_cnt); return true; } @@ -268,15 +442,18 @@ void test__force_log(void) void test__skip(void) { - if (env.test_state->subtest_name) - env.test_state->subtest_skip_cnt++; + if (env.subtest_state) + env.subtest_state->skipped = true; else env.test_state->skip_cnt++; } void test__fail(void) { - env.test_state->error_cnt++; + if (env.subtest_state) + env.subtest_state->error_cnt++; + else + env.test_state->error_cnt++; } int test__join_cgroup(const char *path) @@ -455,14 +632,14 @@ static void unload_bpf_testmod(void) fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n"); if (delete_module("bpf_testmod", 0)) { if (errno == ENOENT) { - if (env.verbosity > VERBOSE_NONE) + if (verbose()) fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); return; } fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); return; } - if (env.verbosity > VERBOSE_NONE) + if (verbose()) fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); } @@ -473,7 +650,7 @@ static int load_bpf_testmod(void) /* ensure previous instance of the module is unloaded */ unload_bpf_testmod(); - if (env.verbosity > VERBOSE_NONE) + if (verbose()) fprintf(stdout, "Loading bpf_testmod.ko...\n"); fd = open("bpf_testmod.ko", O_RDONLY); @@ -488,7 +665,7 @@ static int load_bpf_testmod(void) } close(fd); - if (env.verbosity > VERBOSE_NONE) + if (verbose()) fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); return 0; } @@ -655,7 +832,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } } - if (env->verbosity > VERBOSE_NONE) { + if (verbose()) { if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) { fprintf(stderr, "Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)", @@ -696,44 +873,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return 0; } -static void stdio_hijack(char **log_buf, size_t *log_cnt) -{ -#ifdef __GLIBC__ - env.stdout = stdout; - env.stderr = stderr; - - if (env.verbosity > VERBOSE_NONE && env.worker_id == -1) { - /* nothing to do, output to stdout by default */ - return; - } - - /* stdout and stderr -> buffer */ - fflush(stdout); - - stdout = open_memstream(log_buf, log_cnt); - if (!stdout) { - stdout = env.stdout; - perror("open_memstream"); - return; - } - - stderr = stdout; -#endif -} - -static void stdio_restore(void) -{ -#ifdef __GLIBC__ - if (stdout == env.stdout) - return; - - fclose(stdout); - - stdout = env.stdout; - stderr = env.stderr; -#endif -} - /* * Determine if test_progs is running as a "flavored" test runner and switch * into corresponding sub-directory to load correct BPF objects. @@ -759,7 +898,7 @@ int cd_flavor_subdir(const char *exec_name) if (!flavor) return 0; flavor++; - if (env.verbosity > VERBOSE_NONE) + if (verbose()) fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor); return chdir(flavor); @@ -812,8 +951,10 @@ void crash_handler(int signum) sz = backtrace(bt, ARRAY_SIZE(bt)); - if (env.test) - dump_test_log(env.test, env.test_state, true); + if (env.test) { + env.test_state->error_cnt++; + dump_test_log(env.test, env.test_state, true, false); + } if (env.stdout) stdio_restore(); if (env.worker_id != -1) @@ -839,13 +980,18 @@ static inline const char *str_msg(const struct msg *msg, char *buf) { switch (msg->type) { case MSG_DO_TEST: - sprintf(buf, "MSG_DO_TEST %d", msg->do_test.test_num); + sprintf(buf, "MSG_DO_TEST %d", msg->do_test.num); break; case MSG_TEST_DONE: sprintf(buf, "MSG_TEST_DONE %d (log: %d)", - msg->test_done.test_num, + msg->test_done.num, msg->test_done.have_log); break; + case MSG_SUBTEST_DONE: + sprintf(buf, "MSG_SUBTEST_DONE %d (log: %d)", + msg->subtest_done.num, + msg->subtest_done.have_log); + break; case MSG_TEST_LOG: sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", strlen(msg->test_log.log_buf), @@ -901,12 +1047,14 @@ static void run_one_test(int test_num) test->run_serial_test(); /* ensure last sub-test is finalized properly */ - if (state->subtest_name) + if (env.subtest_state) test__end_subtest(); state->tested = true; - dump_test_log(test, state, false); + if (verbose() && env.worker_id == -1) + print_test_name(test_num + 1, test->test_name, + test_result(state->error_cnt, state->skip_cnt)); reset_affinity(); restore_netns(); @@ -914,6 +1062,8 @@ static void run_one_test(int test_num) cleanup_cgroup_environment(); stdio_restore(); + + dump_test_log(test, state, false, false); } struct dispatch_data { @@ -921,11 +1071,83 @@ struct dispatch_data { int sock_fd; }; +static int read_prog_test_msg(int sock_fd, struct msg *msg, enum msg_type type) +{ + if (recv_message(sock_fd, msg) < 0) + return 1; + + if (msg->type != type) { + printf("%s: unexpected message type %d. expected %d\n", __func__, msg->type, type); + return 1; + } + + return 0; +} + +static int dispatch_thread_read_log(int sock_fd, char **log_buf, size_t *log_cnt) +{ + FILE *log_fp = NULL; + int result = 0; + + log_fp = open_memstream(log_buf, log_cnt); + if (!log_fp) + return 1; + + while (true) { + struct msg msg; + + if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_LOG)) { + result = 1; + goto out; + } + + fprintf(log_fp, "%s", msg.test_log.log_buf); + if (msg.test_log.is_last) + break; + } + +out: + fclose(log_fp); + log_fp = NULL; + return result; +} + +static int dispatch_thread_send_subtests(int sock_fd, struct test_state *state) +{ + struct msg msg; + struct subtest_state *subtest_state; + int subtest_num = state->subtest_num; + + state->subtest_states = malloc(subtest_num * sizeof(*subtest_state)); + + for (int i = 0; i < subtest_num; i++) { + subtest_state = &state->subtest_states[i]; + + memset(subtest_state, 0, sizeof(*subtest_state)); + + if (read_prog_test_msg(sock_fd, &msg, MSG_SUBTEST_DONE)) + return 1; + + subtest_state->name = strdup(msg.subtest_done.name); + subtest_state->error_cnt = msg.subtest_done.error_cnt; + subtest_state->skipped = msg.subtest_done.skipped; + subtest_state->filtered = msg.subtest_done.filtered; + + /* collect all logs */ + if (msg.subtest_done.have_log) + if (dispatch_thread_read_log(sock_fd, + &subtest_state->log_buf, + &subtest_state->log_cnt)) + return 1; + } + + return 0; +} + static void *dispatch_thread(void *ctx) { struct dispatch_data *data = ctx; int sock_fd; - FILE *log_fp = NULL; sock_fd = data->sock_fd; @@ -957,8 +1179,9 @@ static void *dispatch_thread(void *ctx) { struct msg msg_do_test; + memset(&msg_do_test, 0, sizeof(msg_do_test)); msg_do_test.type = MSG_DO_TEST; - msg_do_test.do_test.test_num = test_to_run; + msg_do_test.do_test.num = test_to_run; if (send_message(sock_fd, &msg_do_test) < 0) { perror("Fail to send command"); goto done; @@ -967,57 +1190,45 @@ static void *dispatch_thread(void *ctx) } /* wait for test done */ - { - int err; - struct msg msg_test_done; + do { + struct msg msg; - err = recv_message(sock_fd, &msg_test_done); - if (err < 0) - goto error; - if (msg_test_done.type != MSG_TEST_DONE) + if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_DONE)) goto error; - if (test_to_run != msg_test_done.test_done.test_num) + if (test_to_run != msg.test_done.num) goto error; state = &test_states[test_to_run]; state->tested = true; - state->error_cnt = msg_test_done.test_done.error_cnt; - state->skip_cnt = msg_test_done.test_done.skip_cnt; - state->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt; + state->error_cnt = msg.test_done.error_cnt; + state->skip_cnt = msg.test_done.skip_cnt; + state->sub_succ_cnt = msg.test_done.sub_succ_cnt; + state->subtest_num = msg.test_done.subtest_num; /* collect all logs */ - if (msg_test_done.test_done.have_log) { - log_fp = open_memstream(&state->log_buf, &state->log_cnt); - if (!log_fp) + if (msg.test_done.have_log) { + if (dispatch_thread_read_log(sock_fd, + &state->log_buf, + &state->log_cnt)) goto error; + } - while (true) { - struct msg msg_log; - - if (recv_message(sock_fd, &msg_log) < 0) - goto error; - if (msg_log.type != MSG_TEST_LOG) - goto error; + /* collect all subtests and subtest logs */ + if (!state->subtest_num) + break; - fprintf(log_fp, "%s", msg_log.test_log.log_buf); - if (msg_log.test_log.is_last) - break; - } - fclose(log_fp); - log_fp = NULL; - } - } /* wait for test done */ + if (dispatch_thread_send_subtests(sock_fd, state)) + goto error; + } while (false); pthread_mutex_lock(&stdout_output_lock); - dump_test_log(test, state, false); + dump_test_log(test, state, false, true); pthread_mutex_unlock(&stdout_output_lock); } /* while (true) */ error: if (env.debug) fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno)); - if (log_fp) - fclose(log_fp); done: { struct msg msg_exit; @@ -1052,18 +1263,24 @@ static void calculate_summary_and_print_errors(struct test_env *env) succ_cnt++; } - if (fail_cnt) + /* + * We only print error logs summary when there are failed tests and + * verbose mode is not enabled. Otherwise, results may be incosistent. + * + */ + if (!verbose() && fail_cnt) { printf("\nAll error logs:\n"); - /* print error logs again */ - for (i = 0; i < prog_test_cnt; i++) { - struct prog_test_def *test = &prog_test_defs[i]; - struct test_state *state = &test_states[i]; + /* print error logs again */ + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; + struct test_state *state = &test_states[i]; - if (!state->tested || !state->error_cnt) - continue; + if (!state->tested || !state->error_cnt) + continue; - dump_test_log(test, state, true); + dump_test_log(test, state, true, true); + } } printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", @@ -1154,6 +1371,91 @@ static void server_main(void) } } +static void worker_main_send_log(int sock, char *log_buf, size_t log_cnt) +{ + char *src; + size_t slen; + + src = log_buf; + slen = log_cnt; + while (slen) { + struct msg msg_log; + char *dest; + size_t len; + + memset(&msg_log, 0, sizeof(msg_log)); + msg_log.type = MSG_TEST_LOG; + dest = msg_log.test_log.log_buf; + len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen; + memcpy(dest, src, len); + + src += len; + slen -= len; + if (!slen) + msg_log.test_log.is_last = true; + + assert(send_message(sock, &msg_log) >= 0); + } +} + +static void free_subtest_state(struct subtest_state *state) +{ + if (state->log_buf) { + free(state->log_buf); + state->log_buf = NULL; + state->log_cnt = 0; + } + free(state->name); + state->name = NULL; +} + +static int worker_main_send_subtests(int sock, struct test_state *state) +{ + int i, result = 0; + struct msg msg; + struct subtest_state *subtest_state; + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_SUBTEST_DONE; + + for (i = 0; i < state->subtest_num; i++) { + subtest_state = &state->subtest_states[i]; + + msg.subtest_done.num = i; + + strncpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME); + + msg.subtest_done.error_cnt = subtest_state->error_cnt; + msg.subtest_done.skipped = subtest_state->skipped; + msg.subtest_done.filtered = subtest_state->filtered; + msg.subtest_done.have_log = false; + + if (verbose() || state->force_log || subtest_state->error_cnt) { + if (subtest_state->log_cnt) + msg.subtest_done.have_log = true; + } + + if (send_message(sock, &msg) < 0) { + perror("Fail to send message done"); + result = 1; + goto out; + } + + /* send logs */ + if (msg.subtest_done.have_log) + worker_main_send_log(sock, subtest_state->log_buf, subtest_state->log_cnt); + + free_subtest_state(subtest_state); + free(subtest_state->name); + } + +out: + for (; i < state->subtest_num; i++) + free_subtest_state(&state->subtest_states[i]); + free(state->subtest_states); + return result; +} + static int worker_main(int sock) { save_netns(); @@ -1172,10 +1474,10 @@ static int worker_main(int sock) env.worker_id); goto out; case MSG_DO_TEST: { - int test_to_run = msg.do_test.test_num; + int test_to_run = msg.do_test.num; struct prog_test_def *test = &prog_test_defs[test_to_run]; struct test_state *state = &test_states[test_to_run]; - struct msg msg_done; + struct msg msg; if (env.debug) fprintf(stderr, "[%d]: #%d:%s running.\n", @@ -1185,54 +1487,38 @@ static int worker_main(int sock) run_one_test(test_to_run); - memset(&msg_done, 0, sizeof(msg_done)); - msg_done.type = MSG_TEST_DONE; - msg_done.test_done.test_num = test_to_run; - msg_done.test_done.error_cnt = state->error_cnt; - msg_done.test_done.skip_cnt = state->skip_cnt; - msg_done.test_done.sub_succ_cnt = state->sub_succ_cnt; - msg_done.test_done.have_log = false; + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_TEST_DONE; + msg.test_done.num = test_to_run; + msg.test_done.error_cnt = state->error_cnt; + msg.test_done.skip_cnt = state->skip_cnt; + msg.test_done.sub_succ_cnt = state->sub_succ_cnt; + msg.test_done.subtest_num = state->subtest_num; + msg.test_done.have_log = false; - if (env.verbosity > VERBOSE_NONE || state->force_log || state->error_cnt) { + if (verbose() || state->force_log || state->error_cnt) { if (state->log_cnt) - msg_done.test_done.have_log = true; + msg.test_done.have_log = true; } - if (send_message(sock, &msg_done) < 0) { + if (send_message(sock, &msg) < 0) { perror("Fail to send message done"); goto out; } /* send logs */ - if (msg_done.test_done.have_log) { - char *src; - size_t slen; - - src = state->log_buf; - slen = state->log_cnt; - while (slen) { - struct msg msg_log; - char *dest; - size_t len; - - memset(&msg_log, 0, sizeof(msg_log)); - msg_log.type = MSG_TEST_LOG; - dest = msg_log.test_log.log_buf; - len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen; - memcpy(dest, src, len); - - src += len; - slen -= len; - if (!slen) - msg_log.test_log.is_last = true; - - assert(send_message(sock, &msg_log) >= 0); - } - } + if (msg.test_done.have_log) + worker_main_send_log(sock, state->log_buf, state->log_cnt); + if (state->log_buf) { free(state->log_buf); state->log_buf = NULL; state->log_cnt = 0; } + + if (state->subtest_num) + if (worker_main_send_subtests(sock, state)) + goto out; + if (env.debug) fprintf(stderr, "[%d]: #%d:%s done.\n", env.worker_id, @@ -1250,6 +1536,23 @@ out: return 0; } +static void free_test_states(void) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(prog_test_defs); i++) { + struct test_state *test_state = &test_states[i]; + + for (j = 0; j < test_state->subtest_num; j++) + free_subtest_state(&test_state->subtest_states[j]); + + free(test_state->subtest_states); + free(test_state->log_buf); + test_state->subtest_states = NULL; + test_state->log_buf = NULL; + } +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -1396,6 +1699,8 @@ out: unload_bpf_testmod(); free_test_selector(&env.test_selector); + free_test_selector(&env.subtest_selector); + free_test_states(); if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) return EXIT_NO_TEST; diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index d3fee3b98888..5fe1365c2bb1 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -64,23 +64,32 @@ struct test_selector { int num_set_len; }; +struct subtest_state { + char *name; + size_t log_cnt; + char *log_buf; + int error_cnt; + bool skipped; + bool filtered; + + FILE *stdout; +}; + struct test_state { bool tested; bool force_log; int error_cnt; int skip_cnt; - int subtest_skip_cnt; int sub_succ_cnt; - char *subtest_name; + struct subtest_state *subtest_states; int subtest_num; - /* store counts before subtest started */ - int old_error_cnt; - size_t log_cnt; char *log_buf; + + FILE *stdout; }; struct test_env { @@ -96,7 +105,8 @@ struct test_env { bool list_test_names; struct prog_test_def *test; /* current running test */ - struct test_state *test_state; /* current running test result */ + struct test_state *test_state; /* current running test state */ + struct subtest_state *subtest_state; /* current running subtest state */ FILE *stdout; FILE *stderr; @@ -116,29 +126,40 @@ struct test_env { }; #define MAX_LOG_TRUNK_SIZE 8192 +#define MAX_SUBTEST_NAME 1024 enum msg_type { MSG_DO_TEST = 0, MSG_TEST_DONE = 1, MSG_TEST_LOG = 2, + MSG_SUBTEST_DONE = 3, MSG_EXIT = 255, }; struct msg { enum msg_type type; union { struct { - int test_num; + int num; } do_test; struct { - int test_num; + int num; int sub_succ_cnt; int error_cnt; int skip_cnt; bool have_log; + int subtest_num; } test_done; struct { char log_buf[MAX_LOG_TRUNK_SIZE + 1]; bool is_last; } test_log; + struct { + int num; + char name[MAX_SUBTEST_NAME + 1]; + int error_cnt; + bool skipped; + bool filtered; + bool have_log; + } subtest_done; }; }; diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 2817d9948d59..e9ebc67d73f7 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -45,6 +45,7 @@ # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet # 6) Forward the packet to the overlay tnl dev +BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel" PING_ARG="-c 3 -w 10 -q" ret=0 GREEN='\033[0;92m' @@ -155,52 +156,6 @@ add_ip6erspan_tunnel() ip link set dev $DEV up } -add_vxlan_tunnel() -{ - # Set static ARP entry here because iptables set-mark works - # on L3 packet, as a result not applying to ARP packets, - # causing errors at get_tunnel_{key/opt}. - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE \ - id 2 dstport 4789 gbp remote 172.16.1.200 - ip netns exec at_ns0 \ - ip link set dev $DEV_NS address 52:54:00:d9:01:00 up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 \ - ip neigh add 10.1.1.200 lladdr 52:54:00:d9:02:00 dev $DEV_NS - ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF - - # root namespace - ip link add dev $DEV type $TYPE external gbp dstport 4789 - ip link set dev $DEV address 52:54:00:d9:02:00 up - ip addr add dev $DEV 10.1.1.200/24 - ip neigh add 10.1.1.100 lladdr 52:54:00:d9:01:00 dev $DEV -} - -add_ip6vxlan_tunnel() -{ - #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0 - ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - #ip -4 addr del 172.16.1.200 dev veth1 - ip -6 addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # at_ns0 namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \ - local ::11 remote ::22 - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # root namespace - ip link add dev $DEV type $TYPE external dstport 4789 - ip addr add dev $DEV 10.1.1.200/24 - ip link set dev $DEV up -} - add_geneve_tunnel() { # at_ns0 namespace @@ -403,58 +358,6 @@ test_ip6erspan() echo -e ${GREEN}"PASS: $TYPE"${NC} } -test_vxlan() -{ - TYPE=vxlan - DEV_NS=vxlan00 - DEV=vxlan11 - ret=0 - - check $TYPE - config_device - add_vxlan_tunnel - attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: $TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: $TYPE"${NC} -} - -test_ip6vxlan() -{ - TYPE=vxlan - DEV_NS=ip6vxlan00 - DEV=ip6vxlan11 - ret=0 - - check $TYPE - config_device - add_ip6vxlan_tunnel - ip link set dev veth1 mtu 1500 - attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel - # underlay - ping6 $PING_ARG ::11 - # ip4 over ip6 - ping $PING_ARG 10.1.1.100 - check_err $? - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: ip6$TYPE"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: ip6$TYPE"${NC} -} - test_geneve() { TYPE=geneve @@ -641,9 +544,11 @@ test_xfrm_tunnel() config_device > /sys/kernel/debug/tracing/trace setup_xfrm_tunnel + mkdir -p ${BPF_PIN_TUNNEL_DIR} + bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR} tc qdisc add dev veth1 clsact - tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \ - sec xfrm_get_state + tc filter add dev veth1 proto ip ingress bpf da object-pinned \ + ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 sleep 1 grep "reqid 1" /sys/kernel/debug/tracing/trace @@ -666,13 +571,17 @@ attach_bpf() DEV=$1 SET=$2 GET=$3 + mkdir -p ${BPF_PIN_TUNNEL_DIR} + bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}/ tc qdisc add dev $DEV clsact - tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET - tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET + tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET + tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET } cleanup() { + rm -rf ${BPF_PIN_TUNNEL_DIR} + ip netns delete at_ns0 2> /dev/null ip link del veth1 2> /dev/null ip link del ipip11 2> /dev/null @@ -681,8 +590,6 @@ cleanup() ip link del gretap11 2> /dev/null ip link del ip6gre11 2> /dev/null ip link del ip6gretap11 2> /dev/null - ip link del vxlan11 2> /dev/null - ip link del ip6vxlan11 2> /dev/null ip link del geneve11 2> /dev/null ip link del ip6geneve11 2> /dev/null ip link del erspan11 2> /dev/null @@ -714,7 +621,6 @@ enable_debug() { echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control - echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control } @@ -750,14 +656,6 @@ bpf_tunnel_test() test_ip6erspan v2 errors=$(( $errors + $? )) - echo "Testing VXLAN tunnel..." - test_vxlan - errors=$(( $errors + $? )) - - echo "Testing IP6VXLAN tunnel..." - test_ip6vxlan - errors=$(( $errors + $? )) - echo "Testing GENEVE tunnel..." test_geneve errors=$(( $errors + $? )) diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index cd7bf32e6a17..567500299231 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -43,7 +43,6 @@ # ** veth<xxxx> in root namespace # ** veth<yyyy> in af_xdp<xxxx> namespace # ** namespace af_xdp<xxxx> -# * create a spec file veth.spec that includes this run-time configuration # *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid # conflict with any existing interface # * tests the veth and xsk layers of the topology @@ -77,7 +76,7 @@ . xsk_prereqs.sh -while getopts "cvD" flag +while getopts "vD" flag do case "${flag}" in v) verbose=1;; @@ -88,7 +87,7 @@ done TEST_NAME="PREREQUISITES" URANDOM=/dev/urandom -[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit 1 1; } +[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit $ksft_fail; } VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4) VETH0=ve${VETH0_POSTFIX} @@ -98,6 +97,13 @@ NS0=root NS1=af_xdp${VETH1_POSTFIX} MTU=1500 +trap ctrl_c INT + +function ctrl_c() { + cleanup_exit ${VETH0} ${VETH1} ${NS1} + exit 1 +} + setup_vethPairs() { if [[ $verbose -eq 1 ]]; then echo "setting up ${VETH0}: namespace: ${NS0}" @@ -110,6 +116,14 @@ setup_vethPairs() { if [[ $verbose -eq 1 ]]; then echo "setting up ${VETH1}: namespace: ${NS1}" fi + + if [[ $busy_poll -eq 1 ]]; then + echo 2 > /sys/class/net/${VETH0}/napi_defer_hard_irqs + echo 200000 > /sys/class/net/${VETH0}/gro_flush_timeout + echo 2 > /sys/class/net/${VETH1}/napi_defer_hard_irqs + echo 200000 > /sys/class/net/${VETH1}/gro_flush_timeout + fi + ip link set ${VETH1} netns ${NS1} ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU} ip link set ${VETH0} mtu ${MTU} @@ -130,17 +144,12 @@ if [ $retval -ne 0 ]; then exit $retval fi -echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE} - -validate_veth_spec_file - if [[ $verbose -eq 1 ]]; then - echo "Spec file created: ${SPECFILE}" - VERBOSE_ARG="-v" + ARGS+="-v " fi if [[ $dump_pkts -eq 1 ]]; then - DUMP_PKTS_ARG="-D" + ARGS="-D " fi test_status $retval "${TEST_NAME}" @@ -149,23 +158,31 @@ test_status $retval "${TEST_NAME}" statusList=() -TEST_NAME="XSK KSELFTESTS" +TEST_NAME="XSK_SELFTESTS_SOFTIRQ" execxdpxceiver -retval=$? -test_status $retval "${TEST_NAME}" -statusList+=($retval) +cleanup_exit ${VETH0} ${VETH1} ${NS1} +TEST_NAME="XSK_SELFTESTS_BUSY_POLL" +busy_poll=1 + +setup_vethPairs +execxdpxceiver ## END TESTS cleanup_exit ${VETH0} ${VETH1} ${NS1} -for _status in "${statusList[@]}" +failures=0 +echo -e "\nSummary:" +for i in "${!statusList[@]}" do - if [ $_status -ne 0 ]; then - test_exit $ksft_fail 0 + if [ ${statusList[$i]} -ne 0 ]; then + test_status ${statusList[$i]} ${nameList[$i]} + failures=1 fi done -test_exit $ksft_pass 0 +if [ $failures -eq 0 ]; then + echo "All tests successful!" +fi diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c index 9113834640e6..6914904344c0 100644 --- a/tools/testing/selftests/bpf/verifier/map_kptr.c +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c @@ -212,13 +212,13 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 24), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 32), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_kptr = { 1 }, .result = REJECT, - .errstr = "access beyond struct prog_test_ref_kfunc at off 24 size 8", + .errstr = "access beyond struct prog_test_ref_kfunc at off 32 size 8", }, { "map_kptr: unref: inherit PTR_UNTRUSTED on struct walk", diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index cfcb031323c5..e5992a6b5e09 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -90,6 +90,8 @@ #include <string.h> #include <stddef.h> #include <sys/mman.h> +#include <sys/socket.h> +#include <sys/time.h> #include <sys/types.h> #include <sys/queue.h> #include <time.h> @@ -122,9 +124,17 @@ static void __exit_with_error(int error, const char *file, const char *func, int #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) #define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV" +#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : "" -#define print_ksft_result(test) \ - (ksft_test_result_pass("PASS: %s %s\n", mode_string(test), (test)->name)) +static void report_failure(struct test_spec *test) +{ + if (test->fail) + return; + + ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test), + test->name); + test->fail = true; +} static void memset32_htonl(void *dest, u32 val, u32 size) { @@ -264,6 +274,26 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size return 0; } +static void enable_busy_poll(struct xsk_socket_info *xsk) +{ + int sock_opt; + + sock_opt = 1; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); + + sock_opt = 20; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); + + sock_opt = BATCH_SIZE; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + exit_with_error(errno); +} + static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, struct ifobject *ifobject, bool shared) { @@ -287,8 +317,8 @@ static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_inf static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, - {"queue", optional_argument, 0, 'q'}, - {"dump-pkts", optional_argument, 0, 'D'}, + {"busy-poll", no_argument, 0, 'b'}, + {"dump-pkts", no_argument, 0, 'D'}, {"verbose", no_argument, 0, 'v'}, {0, 0, 0, 0} }; @@ -299,9 +329,9 @@ static void usage(const char *prog) " Usage: %s [OPTIONS]\n" " Options:\n" " -i, --interface Use interface\n" - " -q, --queue=n Use queue n (default 0)\n" " -D, --dump-pkts Dump packets L2 - L5\n" - " -v, --verbose Verbose output\n"; + " -v, --verbose Verbose output\n" + " -b, --busy-poll Enable busy poll\n"; ksft_print_msg(str, prog); } @@ -347,7 +377,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj for (;;) { char *sptr, *token; - c = getopt_long(argc, argv, "i:Dv", long_options, &option_index); + c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index); if (c == -1) break; @@ -373,6 +403,10 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj case 'v': opt_verbose = true; break; + case 'b': + ifobj_tx->busy_poll = true; + ifobj_rx->busy_poll = true; + break; default: usage(basename(argv[0])); ksft_exit_xfail(); @@ -390,8 +424,10 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, ifobj->xsk = &ifobj->xsk_arr[0]; ifobj->use_poll = false; - ifobj->pacing_on = true; + ifobj->use_fill_ring = true; + ifobj->release_rx = true; ifobj->pkt_stream = test->pkt_stream_default; + ifobj->validation_func = NULL; if (i == 0) { ifobj->rx_on = false; @@ -416,6 +452,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, test->current_step = 0; test->total_steps = 1; test->nb_sockets = 1; + test->fail = false; } static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, @@ -467,9 +504,10 @@ static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) return &pkt_stream->pkts[pkt_nb]; } -static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream) +static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) { while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { + (*pkts_sent)++; if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; pkt_stream->rx_pkt_nb++; @@ -485,10 +523,16 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream) static void pkt_stream_restore_default(struct test_spec *test) { - if (test->ifobj_tx->pkt_stream != test->pkt_stream_default) { + struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream; + + if (tx_pkt_stream != test->pkt_stream_default) { pkt_stream_delete(test->ifobj_tx->pkt_stream); test->ifobj_tx->pkt_stream = test->pkt_stream_default; } + + if (test->ifobj_rx->pkt_stream != test->pkt_stream_default && + test->ifobj_rx->pkt_stream != tx_pkt_stream) + pkt_stream_delete(test->ifobj_rx->pkt_stream); test->ifobj_rx->pkt_stream = test->pkt_stream_default; } @@ -510,6 +554,16 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) return pkt_stream; } +static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len) +{ + pkt->addr = addr; + pkt->len = len; + if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom) + pkt->valid = false; + else + pkt->valid = true; +} + static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) { struct pkt_stream *pkt_stream; @@ -521,14 +575,9 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb pkt_stream->nb_pkts = nb_pkts; for (i = 0; i < nb_pkts; i++) { - pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size; - pkt_stream->pkts[i].len = pkt_len; + pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size, + pkt_len); pkt_stream->pkts[i].payload = i; - - if (pkt_len > umem->frame_size) - pkt_stream->pkts[i].valid = false; - else - pkt_stream->pkts[i].valid = true; } return pkt_stream; @@ -556,15 +605,27 @@ static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int off u32 i; pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default); - for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) { - pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size + offset; - pkt_stream->pkts[i].len = pkt_len; - } + for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) + pkt_set(umem, &pkt_stream->pkts[i], + (i % umem->num_frames) * umem->frame_size + offset, pkt_len); test->ifobj_tx->pkt_stream = pkt_stream; test->ifobj_rx->pkt_stream = pkt_stream; } +static void pkt_stream_receive_half(struct test_spec *test) +{ + struct xsk_umem_info *umem = test->ifobj_rx->umem; + struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream; + u32 i; + + test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts, + pkt_stream->pkts[0].len); + pkt_stream = test->ifobj_rx->pkt_stream; + for (i = 1; i < pkt_stream->nb_pkts; i += 2) + pkt_stream->pkts[i].valid = false; +} + static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) { struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); @@ -575,7 +636,7 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) if (!pkt) return NULL; - if (!pkt->valid || pkt->len < PKT_SIZE) + if (!pkt->valid || pkt->len < MIN_PKT_SIZE) return pkt; data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); @@ -662,8 +723,7 @@ static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt if (offset == expected_offset) return true; - ksft_test_result_fail("ERROR: [%s] expected [%u], got [%u]\n", __func__, expected_offset, - offset); + ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset); return false; } @@ -673,19 +733,18 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); if (!pkt) { - ksft_test_result_fail("ERROR: [%s] too many packets received\n", __func__); + ksft_print_msg("[%s] too many packets received\n", __func__); return false; } - if (len < PKT_SIZE) { - /*Do not try to verify packets that are smaller than minimum size. */ + if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) { + /* Do not try to verify packets that are smaller than minimum size. */ return true; } if (pkt->len != len) { - ksft_test_result_fail - ("ERROR: [%s] expected length [%d], got length [%d]\n", - __func__, pkt->len, len); + ksft_print_msg("[%s] expected length [%d], got length [%d]\n", + __func__, pkt->len, len); return false; } @@ -696,9 +755,8 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) pkt_dump(data, PKT_SIZE); if (pkt->payload != seqnum) { - ksft_test_result_fail - ("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n", - __func__, pkt->payload, seqnum); + ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", + __func__, pkt->payload, seqnum); return false; } } else { @@ -716,12 +774,25 @@ static void kick_tx(struct xsk_socket_info *xsk) int ret; ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); - if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) + if (ret >= 0) return; + if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { + usleep(100); + return; + } exit_with_error(errno); } -static void complete_pkts(struct xsk_socket_info *xsk, int batch_size) +static void kick_rx(struct xsk_socket_info *xsk) +{ + int ret; + + ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); + if (ret < 0) + exit_with_error(errno); +} + +static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) { unsigned int rcvd; u32 idx; @@ -734,26 +805,45 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size) if (rcvd > xsk->outstanding_tx) { u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); - ksft_test_result_fail("ERROR: [%s] Too many packets completed\n", - __func__); + ksft_print_msg("[%s] Too many packets completed\n", __func__); ksft_print_msg("Last completion address: %llx\n", addr); - return; + return TEST_FAILURE; } xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; } + + return TEST_PASS; } -static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk, - struct pollfd *fds) +static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds) { - struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream); + struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0}; + u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0; + struct pkt_stream *pkt_stream = ifobj->pkt_stream; + struct xsk_socket_info *xsk = ifobj->xsk; struct xsk_umem_info *umem = xsk->umem; - u32 idx_rx = 0, idx_fq = 0, rcvd, i; + struct pkt *pkt; int ret; + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + timeradd(&tv_now, &tv_timeout, &tv_end); + + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); while (pkt) { + ret = gettimeofday(&tv_now, NULL); + if (ret) + exit_with_error(errno); + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); + return TEST_FAILURE; + } + + kick_rx(xsk); + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); if (!rcvd) { if (xsk_ring_prod__needs_wakeup(&umem->fq)) { @@ -764,54 +854,53 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info * continue; } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); - while (ret != rcvd) { - if (ret < 0) - exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { - ret = poll(fds, 1, POLL_TMOUT); + if (ifobj->use_fill_ring) { + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret < 0) + exit_with_error(-ret); + } + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } for (i = 0; i < rcvd; i++) { const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); u64 addr = desc->addr, orig; - if (!pkt) { - ksft_test_result_fail("ERROR: [%s] Received too many packets.\n", - __func__); - ksft_print_msg("Last packet has addr: %llx len: %u\n", - addr, desc->len); - return; - } - orig = xsk_umem__extract_addr(addr); addr = xsk_umem__add_offset_to_addr(addr); - if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len)) - return; - if (!is_offset_correct(umem, pkt_stream, addr, pkt->addr)) - return; + if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || + !is_offset_correct(umem, pkt_stream, addr, pkt->addr)) + return TEST_FAILURE; - *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; - pkt = pkt_stream_get_next_rx_pkt(pkt_stream); + if (ifobj->use_fill_ring) + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); } - xsk_ring_prod__submit(&umem->fq, rcvd); - xsk_ring_cons__release(&xsk->rx, rcvd); + if (ifobj->use_fill_ring) + xsk_ring_prod__submit(&umem->fq, rcvd); + if (ifobj->release_rx) + xsk_ring_cons__release(&xsk->rx, rcvd); pthread_mutex_lock(&pacing_mutex); - pkts_in_flight -= rcvd; + pkts_in_flight -= pkts_sent; if (pkts_in_flight < umem->num_frames) pthread_cond_signal(&pacing_cond); pthread_mutex_unlock(&pacing_mutex); + pkts_sent = 0; } + + return TEST_PASS; } -static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) +static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb) { struct xsk_socket_info *xsk = ifobject->xsk; u32 i, idx, valid_pkts = 0; @@ -821,21 +910,22 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) for (i = 0; i < BATCH_SIZE; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - struct pkt *pkt = pkt_generate(ifobject, pkt_nb); + struct pkt *pkt = pkt_generate(ifobject, *pkt_nb); if (!pkt) break; tx_desc->addr = pkt->addr; tx_desc->len = pkt->len; - pkt_nb++; + (*pkt_nb)++; if (pkt->valid) valid_pkts++; } pthread_mutex_lock(&pacing_mutex); pkts_in_flight += valid_pkts; - if (ifobject->pacing_on && pkts_in_flight >= ifobject->umem->num_frames - BATCH_SIZE) { + /* pkts_in_flight might be negative if many invalid packets are sent */ + if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) { kick_tx(xsk); pthread_cond_wait(&pacing_cond, &pacing_mutex); } @@ -843,10 +933,11 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) xsk_ring_prod__submit(&xsk->tx, i); xsk->outstanding_tx += valid_pkts; - complete_pkts(xsk, i); + if (complete_pkts(xsk, i)) + return TEST_FAILURE; usleep(10); - return i; + return TEST_PASS; } static void wait_for_tx_completion(struct xsk_socket_info *xsk) @@ -855,7 +946,7 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk) complete_pkts(xsk, BATCH_SIZE); } -static void send_pkts(struct ifobject *ifobject) +static int send_pkts(struct test_spec *test, struct ifobject *ifobject) { struct pollfd fds = { }; u32 pkt_cnt = 0; @@ -864,6 +955,8 @@ static void send_pkts(struct ifobject *ifobject) fds.events = POLLOUT; while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { + int err; + if (ifobject->use_poll) { int ret; @@ -875,58 +968,95 @@ static void send_pkts(struct ifobject *ifobject) continue; } - pkt_cnt += __send_pkts(ifobject, pkt_cnt); + err = __send_pkts(ifobject, &pkt_cnt); + if (err || test->fail) + return TEST_FAILURE; } wait_for_tx_completion(ifobject->xsk); + return TEST_PASS; } -static bool rx_stats_are_valid(struct ifobject *ifobject) +static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats) +{ + int fd = xsk_socket__fd(xsk), err; + socklen_t optlen, expected_len; + + optlen = sizeof(*stats); + err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen); + if (err) { + ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", + __func__, -err, strerror(-err)); + return TEST_FAILURE; + } + + expected_len = sizeof(struct xdp_statistics); + if (optlen != expected_len) { + ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n", + __func__, expected_len, optlen); + return TEST_FAILURE; + } + + return TEST_PASS; +} + +static int validate_rx_dropped(struct ifobject *ifobject) { - u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts; struct xsk_socket *xsk = ifobject->xsk->xsk; - int fd = xsk_socket__fd(xsk); struct xdp_statistics stats; - socklen_t optlen; int err; - optlen = sizeof(stats); - err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); - if (err) { - ksft_test_result_fail("ERROR Rx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", - __func__, -err, strerror(-err)); - return true; - } + kick_rx(ifobject->xsk); - if (optlen == sizeof(struct xdp_statistics)) { - switch (stat_test_type) { - case STAT_TEST_RX_DROPPED: - xsk_stat = stats.rx_dropped; - break; - case STAT_TEST_TX_INVALID: - return true; - case STAT_TEST_RX_FULL: - xsk_stat = stats.rx_ring_full; - if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) - expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE; - else - expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE; - break; - case STAT_TEST_RX_FILL_EMPTY: - xsk_stat = stats.rx_fill_ring_empty_descs; - break; - default: - break; - } + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; - if (xsk_stat == expected_stat) - return true; - } + if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2) + return TEST_PASS; - return false; + return TEST_FAILURE; } -static void tx_stats_validate(struct ifobject *ifobject) +static int validate_rx_full(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + usleep(1000); + kick_rx(ifobject->xsk); + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_ring_full) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_fill_empty(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + usleep(1000); + kick_rx(ifobject->xsk); + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_fill_ring_empty_descs) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_tx_invalid_descs(struct ifobject *ifobject) { struct xsk_socket *xsk = ifobject->xsk->xsk; int fd = xsk_socket__fd(xsk); @@ -937,16 +1067,18 @@ static void tx_stats_validate(struct ifobject *ifobject) optlen = sizeof(stats); err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); if (err) { - ksft_test_result_fail("ERROR Tx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n", - __func__, -err, strerror(-err)); - return; + ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", + __func__, -err, strerror(-err)); + return TEST_FAILURE; } - if (stats.tx_invalid_descs == ifobject->pkt_stream->nb_pkts) - return; + if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) { + ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", + __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); + return TEST_FAILURE; + } - ksft_test_result_fail("ERROR: [%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts); + return TEST_PASS; } static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) @@ -984,6 +1116,9 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) exit_with_error(-ret); usleep(USLEEP_MAX); } + + if (ifobject->busy_poll) + enable_busy_poll(&ifobject->xsk_arr[i]); } ifobject->xsk = &ifobject->xsk_arr[0]; @@ -1016,18 +1151,21 @@ static void *worker_testapp_validate_tx(void *arg) { struct test_spec *test = (struct test_spec *)arg; struct ifobject *ifobject = test->ifobj_tx; + int err; if (test->current_step == 1) thread_common_ops(test, ifobject); print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts, ifobject->ifname); - send_pkts(ifobject); + err = send_pkts(test, ifobject); - if (stat_test_type == STAT_TEST_TX_INVALID) - tx_stats_validate(ifobject); + if (!err && ifobject->validation_func) + err = ifobject->validation_func(ifobject); + if (err) + report_failure(test); - if (test->total_steps == test->current_step) + if (test->total_steps == test->current_step || err) testapp_cleanup_xsk_res(ifobject); pthread_exit(NULL); } @@ -1068,6 +1206,7 @@ static void *worker_testapp_validate_rx(void *arg) struct test_spec *test = (struct test_spec *)arg; struct ifobject *ifobject = test->ifobj_rx; struct pollfd fds = { }; + int err; if (test->current_step == 1) thread_common_ops(test, ifobject); @@ -1079,18 +1218,23 @@ static void *worker_testapp_validate_rx(void *arg) pthread_barrier_wait(&barr); - if (test_type == TEST_TYPE_STATS) - while (!rx_stats_are_valid(ifobject)) - continue; - else - receive_pkts(ifobject->pkt_stream, ifobject->xsk, &fds); + err = receive_pkts(ifobject, &fds); - if (test->total_steps == test->current_step) + if (!err && ifobject->validation_func) + err = ifobject->validation_func(ifobject); + if (err) { + report_failure(test); + pthread_mutex_lock(&pacing_mutex); + pthread_cond_signal(&pacing_cond); + pthread_mutex_unlock(&pacing_mutex); + } + + if (test->total_steps == test->current_step || err) testapp_cleanup_xsk_res(ifobject); pthread_exit(NULL); } -static void testapp_validate_traffic(struct test_spec *test) +static int testapp_validate_traffic(struct test_spec *test) { struct ifobject *ifobj_tx = test->ifobj_tx; struct ifobject *ifobj_rx = test->ifobj_rx; @@ -1115,6 +1259,8 @@ static void testapp_validate_traffic(struct test_spec *test) pthread_join(t1, NULL); pthread_join(t0, NULL); + + return !!test->fail; } static void testapp_teardown(struct test_spec *test) @@ -1123,7 +1269,8 @@ static void testapp_teardown(struct test_spec *test) test_spec_set_name(test, "TEARDOWN"); for (i = 0; i < MAX_TEARDOWN_ITER; i++) { - testapp_validate_traffic(test); + if (testapp_validate_traffic(test)) + return; test_spec_reset(test); } } @@ -1146,7 +1293,8 @@ static void testapp_bidi(struct test_spec *test) test->ifobj_tx->rx_on = true; test->ifobj_rx->tx_on = true; test->total_steps = 2; - testapp_validate_traffic(test); + if (testapp_validate_traffic(test)) + return; print_verbose("Switching Tx/Rx vectors\n"); swap_directions(&test->ifobj_rx, &test->ifobj_tx); @@ -1174,7 +1322,8 @@ static void testapp_bpf_res(struct test_spec *test) test_spec_set_name(test, "BPF_RES"); test->total_steps = 2; test->nb_sockets = 2; - testapp_validate_traffic(test); + if (testapp_validate_traffic(test)) + return; swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); testapp_validate_traffic(test); @@ -1187,53 +1336,58 @@ static void testapp_headroom(struct test_spec *test) testapp_validate_traffic(test); } -static void testapp_stats(struct test_spec *test) +static void testapp_stats_rx_dropped(struct test_spec *test) { - int i; + test_spec_set_name(test, "STAT_RX_DROPPED"); + test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - + XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; + pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0); + pkt_stream_receive_half(test); + test->ifobj_rx->validation_func = validate_rx_dropped; + testapp_validate_traffic(test); +} - for (i = 0; i < STAT_TEST_TYPE_MAX; i++) { - test_spec_reset(test); - stat_test_type = i; - /* No or few packets will be received so cannot pace packets */ - test->ifobj_tx->pacing_on = false; - - switch (stat_test_type) { - case STAT_TEST_RX_DROPPED: - test_spec_set_name(test, "STAT_RX_DROPPED"); - test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - - XDP_PACKET_HEADROOM - 1; - testapp_validate_traffic(test); - break; - case STAT_TEST_RX_FULL: - test_spec_set_name(test, "STAT_RX_FULL"); - test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE; - testapp_validate_traffic(test); - break; - case STAT_TEST_TX_INVALID: - test_spec_set_name(test, "STAT_TX_INVALID"); - pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE); - testapp_validate_traffic(test); +static void testapp_stats_tx_invalid_descs(struct test_spec *test) +{ + test_spec_set_name(test, "STAT_TX_INVALID"); + pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); + test->ifobj_tx->validation_func = validate_tx_invalid_descs; + testapp_validate_traffic(test); - pkt_stream_restore_default(test); - break; - case STAT_TEST_RX_FILL_EMPTY: - test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); - test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 0, - MIN_PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); - test->ifobj_rx->pkt_stream->use_addr_for_fill = true; - testapp_validate_traffic(test); - - pkt_stream_restore_default(test); - break; - default: - break; - } - } + pkt_stream_restore_default(test); +} - /* To only see the whole stat set being completed unless an individual test fails. */ - test_spec_set_name(test, "STATS"); +static void testapp_stats_rx_full(struct test_spec *test) +{ + test_spec_set_name(test, "STAT_RX_FULL"); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, + DEFAULT_UMEM_BUFFERS, PKT_SIZE); + if (!test->ifobj_rx->pkt_stream) + exit_with_error(ENOMEM); + + test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; + test->ifobj_rx->release_rx = false; + test->ifobj_rx->validation_func = validate_rx_full; + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); +} + +static void testapp_stats_fill_empty(struct test_spec *test) +{ + test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, + DEFAULT_UMEM_BUFFERS, PKT_SIZE); + if (!test->ifobj_rx->pkt_stream) + exit_with_error(ENOMEM); + + test->ifobj_rx->use_fill_ring = false; + test->ifobj_rx->validation_func = validate_fill_empty; + testapp_validate_traffic(test); + + pkt_stream_restore_default(test); } /* Simple test */ @@ -1282,10 +1436,10 @@ static void testapp_single_pkt(struct test_spec *test) static void testapp_invalid_desc(struct test_spec *test) { struct pkt pkts[] = { - /* Zero packet length at address zero allowed */ - {0, 0, 0, true}, - /* Zero packet length allowed */ - {0x1000, 0, 0, true}, + /* Zero packet address allowed */ + {0, PKT_SIZE, 0, true}, + /* Allowed packet */ + {0x1000, PKT_SIZE, 0, true}, /* Straddling the start of umem */ {-2, PKT_SIZE, 0, false}, /* Packet too large */ @@ -1338,14 +1492,18 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char * static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) { - test_type = type; - - /* reset defaults after potential previous test */ - stat_test_type = -1; - - switch (test_type) { - case TEST_TYPE_STATS: - testapp_stats(test); + switch (type) { + case TEST_TYPE_STATS_RX_DROPPED: + testapp_stats_rx_dropped(test); + break; + case TEST_TYPE_STATS_TX_INVALID_DESCS: + testapp_stats_tx_invalid_descs(test); + break; + case TEST_TYPE_STATS_RX_FULL: + testapp_stats_rx_full(test); + break; + case TEST_TYPE_STATS_FILL_EMPTY: + testapp_stats_fill_empty(test); break; case TEST_TYPE_TEARDOWN: testapp_teardown(test); @@ -1368,7 +1526,7 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); test->ifobj_tx->umem->frame_size = 2048; test->ifobj_rx->umem->frame_size = 2048; - pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE); testapp_validate_traffic(test); pkt_stream_restore_default(test); @@ -1410,7 +1568,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ break; } - print_ksft_result(test); + if (!test->fail) + ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), + test->name); } static struct ifobject *ifobject_create(void) @@ -1449,8 +1609,8 @@ int main(int argc, char **argv) { struct pkt_stream *pkt_stream_default; struct ifobject *ifobj_tx, *ifobj_rx; + u32 i, j, failed_tests = 0; struct test_spec test; - u32 i, j; /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); @@ -1489,12 +1649,17 @@ int main(int argc, char **argv) test_spec_init(&test, ifobj_tx, ifobj_rx, i); run_pkt_test(&test, i, j); usleep(USLEEP_MAX); + + if (test.fail) + failed_tests++; } pkt_stream_delete(pkt_stream_default); ifobject_delete(ifobj_tx); ifobject_delete(ifobj_rx); - ksft_exit_pass(); - return 0; + if (failed_tests) + ksft_exit_fail(); + else + ksft_exit_pass(); } diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 62a3e6388632..8f672b0fe0e1 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -17,6 +17,16 @@ #define PF_XDP AF_XDP #endif +#ifndef SO_BUSY_POLL_BUDGET +#define SO_BUSY_POLL_BUDGET 70 +#endif + +#ifndef SO_PREFER_BUSY_POLL +#define SO_PREFER_BUSY_POLL 69 +#endif + +#define TEST_PASS 0 +#define TEST_FAILURE -1 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 7 #define MAX_INTERFACES_NAMESPACE_CHARS 10 @@ -25,9 +35,10 @@ #define MAX_TEARDOWN_ITER 10 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ sizeof(struct udphdr)) -#define MIN_PKT_SIZE 64 +#define MIN_ETH_PKT_SIZE 64 #define ETH_FCS_SIZE 4 -#define PKT_SIZE (MIN_PKT_SIZE - ETH_FCS_SIZE) +#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE) +#define PKT_SIZE (MIN_PKT_SIZE) #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) #define IP_PKT_VER 0x4 #define IP_PKT_TOS 0x9 @@ -37,6 +48,7 @@ #define SOCK_RECONF_CTR 10 #define BATCH_SIZE 64 #define POLL_TMOUT 1000 +#define RECV_TMOUT 3 #define DEFAULT_PKT_CNT (4 * 1024) #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) #define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) @@ -64,24 +76,16 @@ enum test_type { TEST_TYPE_HEADROOM, TEST_TYPE_TEARDOWN, TEST_TYPE_BIDI, - TEST_TYPE_STATS, + TEST_TYPE_STATS_RX_DROPPED, + TEST_TYPE_STATS_TX_INVALID_DESCS, + TEST_TYPE_STATS_RX_FULL, + TEST_TYPE_STATS_FILL_EMPTY, TEST_TYPE_BPF_RES, TEST_TYPE_MAX }; -enum stat_test_type { - STAT_TEST_RX_DROPPED, - STAT_TEST_TX_INVALID, - STAT_TEST_RX_FULL, - STAT_TEST_RX_FILL_EMPTY, - STAT_TEST_TYPE_MAX -}; - static bool opt_pkt_dump; -static int test_type; - static bool opt_verbose; -static int stat_test_type; struct xsk_umem_info { struct xsk_ring_prod fq; @@ -117,6 +121,8 @@ struct pkt_stream { bool use_addr_for_fill; }; +struct ifobject; +typedef int (*validation_func_t)(struct ifobject *ifobj); typedef void *(*thread_func_t)(void *arg); struct ifobject { @@ -126,6 +132,7 @@ struct ifobject { struct xsk_socket_info *xsk_arr; struct xsk_umem_info *umem; thread_func_t func_ptr; + validation_func_t validation_func; struct pkt_stream *pkt_stream; int ns_fd; int xsk_map_fd; @@ -138,7 +145,9 @@ struct ifobject { bool tx_on; bool rx_on; bool use_poll; - bool pacing_on; + bool busy_poll; + bool use_fill_ring; + bool release_rx; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; }; @@ -150,6 +159,7 @@ struct test_spec { u16 total_steps; u16 current_step; u16 nb_sockets; + bool fail; char name[MAX_TEST_NAME_SIZE]; }; @@ -157,6 +167,6 @@ pthread_barrier_t barr; pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; -u32 pkts_in_flight; +int pkts_in_flight; #endif /* XDPXCEIVER_H */ diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh index bf29d2549bee..684e813803ec 100755 --- a/tools/testing/selftests/bpf/xsk_prereqs.sh +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -8,7 +8,6 @@ ksft_xfail=2 ksft_xpass=3 ksft_skip=4 -SPECFILE=veth.spec XSKOBJ=xdpxceiver validate_root_exec() @@ -16,7 +15,7 @@ validate_root_exec() msg="skip all tests:" if [ $UID != 0 ]; then echo $msg must be run as root >&2 - test_exit $ksft_fail 2 + test_exit $ksft_fail else return $ksft_pass fi @@ -27,39 +26,31 @@ validate_veth_support() msg="skip all tests:" if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then echo $msg veth kernel support not available >&2 - test_exit $ksft_skip 1 + test_exit $ksft_skip else ip link del $1 return $ksft_pass fi } -validate_veth_spec_file() -{ - if [ ! -f ${SPECFILE} ]; then - test_exit $ksft_skip 1 - fi -} - test_status() { statusval=$1 - if [ $statusval -eq 2 ]; then - echo -e "$2: [ FAIL ]" - elif [ $statusval -eq 1 ]; then - echo -e "$2: [ SKIPPED ]" - elif [ $statusval -eq 0 ]; then - echo -e "$2: [ PASS ]" + if [ $statusval -eq $ksft_fail ]; then + echo "$2: [ FAIL ]" + elif [ $statusval -eq $ksft_skip ]; then + echo "$2: [ SKIPPED ]" + elif [ $statusval -eq $ksft_pass ]; then + echo "$2: [ PASS ]" fi } test_exit() { - retval=$1 - if [ $2 -ne 0 ]; then - test_status $2 $(basename $0) + if [ $1 -ne 0 ]; then + test_status $1 $(basename $0) fi - exit $retval + exit 1 } clear_configs() @@ -74,9 +65,6 @@ clear_configs() #veth node inside NS won't get removed so we explicitly remove it [ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] && { ip link del $1; } - if [ -f ${SPECFILE} ]; then - rm -f ${SPECFILE} - fi } cleanup_exit() @@ -86,10 +74,19 @@ cleanup_exit() validate_ip_utility() { - [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; } + [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; } } execxdpxceiver() { - ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${VERBOSE_ARG} ${DUMP_PKTS_ARG} + if [[ $busy_poll -eq 1 ]]; then + ARGS+="-b " + fi + + ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${ARGS} + + retval=$? + test_status $retval "${TEST_NAME}" + statusList+=($retval) + nameList+=(${TEST_NAME}) } |