diff options
Diffstat (limited to 'tools/lib/bpf')
-rw-r--r-- | tools/lib/bpf/.gitignore | 1 | ||||
-rw-r--r-- | tools/lib/bpf/Makefile | 47 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_core_read.h | 169 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_helpers.h | 2 | ||||
-rw-r--r-- | tools/lib/bpf/btf.c | 12 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.c | 72 | ||||
-rw-r--r-- | tools/lib/bpf/xsk.c | 83 |
7 files changed, 267 insertions, 119 deletions
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 8a81b3679d2b..5d4cfac671d5 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only libbpf_version.h libbpf.pc -FEATURE-DUMP.libbpf libbpf.so.* TAGS tags diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 55bd78b3496f..887a494ad5fc 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -58,28 +58,7 @@ ifndef VERBOSE VERBOSE = 0 endif -FEATURE_USER = .libbpf -FEATURE_TESTS = libelf zlib bpf -FEATURE_DISPLAY = libelf zlib bpf - INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi -FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) - -check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean TAGS tags cscope help -ifdef MAKECMDGOALS -ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) - check_feat := 0 -endif -endif - -ifeq ($(check_feat),1) -ifeq ($(FEATURES_DUMP),) -include $(srctree)/tools/build/Makefile.feature -else -include $(FEATURES_DUMP) -endif -endif export prefix libdir src obj @@ -157,7 +136,7 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_SHARED): force $(BPF_HELPER_DEFS) @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -175,7 +154,7 @@ $(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_STATIC): force $(BPF_HELPER_DEFS) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h @@ -264,34 +243,16 @@ install_pkgconfig: $(PC_FILE) install: install_lib install_pkgconfig install_headers -### Cleaning rules - -config-clean: - $(call QUIET_CLEAN, feature-detect) - $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null - -clean: config-clean +clean: $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ $(addprefix $(OUTPUT), \ *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc) - $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf - - -PHONY += force elfdep zdep bpfdep cscope tags +PHONY += force cscope tags force: -elfdep: - @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi - -zdep: - @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi - -bpfdep: - @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi - cscope: ls *.c *.h > cscope.files cscope -b -q -I $(srctree)/include -f cscope.out diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index bbcefb3ff5a5..53b3e199fb25 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -195,17 +195,22 @@ enum bpf_enum_value_kind { * (local) BTF, used to record relocation. */ #define bpf_core_read(dst, sz, src) \ - bpf_probe_read_kernel(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) + bpf_probe_read_kernel(dst, sz, (const void *)__builtin_preserve_access_index(src)) +/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ +#define bpf_core_read_user(dst, sz, src) \ + bpf_probe_read_user(dst, sz, (const void *)__builtin_preserve_access_index(src)) /* * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() * additionally emitting BPF CO-RE field relocation for specified source * argument. */ #define bpf_core_read_str(dst, sz, src) \ - bpf_probe_read_kernel_str(dst, sz, \ - (const void *)__builtin_preserve_access_index(src)) + bpf_probe_read_kernel_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) + +/* NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. */ +#define bpf_core_read_user_str(dst, sz, src) \ + bpf_probe_read_user_str(dst, sz, (const void *)__builtin_preserve_access_index(src)) #define ___concat(a, b) a ## b #define ___apply(fn, n) ___concat(fn, n) @@ -264,30 +269,29 @@ enum bpf_enum_value_kind { read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) /* "recursively" read a sequence of inner pointers using local __t var */ -#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a); -#define ___rd_last(...) \ - ___read(bpf_core_read, &__t, \ - ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); -#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__) -#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) -#define ___read_ptrs(src, ...) \ - ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__) - -#define ___core_read0(fn, dst, src, a) \ +#define ___rd_first(fn, src, a) ___read(fn, &__t, ___type(src), src, a); +#define ___rd_last(fn, ...) \ + ___read(fn, &__t, ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); +#define ___rd_p1(fn, ...) const void *__t; ___rd_first(fn, __VA_ARGS__) +#define ___rd_p2(fn, ...) ___rd_p1(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p3(fn, ...) ___rd_p2(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p4(fn, ...) ___rd_p3(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p5(fn, ...) ___rd_p4(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p6(fn, ...) ___rd_p5(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p7(fn, ...) ___rd_p6(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p8(fn, ...) ___rd_p7(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___rd_p9(fn, ...) ___rd_p8(fn, ___nolast(__VA_ARGS__)) ___rd_last(fn, __VA_ARGS__) +#define ___read_ptrs(fn, src, ...) \ + ___apply(___rd_p, ___narg(__VA_ARGS__))(fn, src, __VA_ARGS__) + +#define ___core_read0(fn, fn_ptr, dst, src, a) \ ___read(fn, dst, ___type(src), src, a); -#define ___core_readN(fn, dst, src, ...) \ - ___read_ptrs(src, ___nolast(__VA_ARGS__)) \ +#define ___core_readN(fn, fn_ptr, dst, src, ...) \ + ___read_ptrs(fn_ptr, src, ___nolast(__VA_ARGS__)) \ ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \ ___last(__VA_ARGS__)); -#define ___core_read(fn, dst, src, a, ...) \ - ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \ +#define ___core_read(fn, fn_ptr, dst, src, a, ...) \ + ___apply(___core_read, ___empty(__VA_ARGS__))(fn, fn_ptr, dst, \ src, a, ##__VA_ARGS__) /* @@ -295,20 +299,73 @@ enum bpf_enum_value_kind { * BPF_CORE_READ(), in which final field is read into user-provided storage. * See BPF_CORE_READ() below for more details on general usage. */ -#define BPF_CORE_READ_INTO(dst, src, a, ...) \ - ({ \ - ___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__) \ - }) +#define BPF_CORE_READ_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read, bpf_core_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* + * Variant of BPF_CORE_READ_INTO() for reading from user-space memory. + * + * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. + */ +#define BPF_CORE_READ_USER_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_user, bpf_core_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_INTO() */ +#define BPF_PROBE_READ_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read, bpf_probe_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_USER_INTO(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ +#define BPF_PROBE_READ_USER_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_user, bpf_probe_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) /* * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as * BPF_CORE_READ() for intermediate pointers, but then executes (and returns * corresponding error code) bpf_core_read_str() for final string read. */ -#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ - ({ \ - ___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\ - }) +#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_str, bpf_core_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* + * Variant of BPF_CORE_READ_STR_INTO() for reading from user-space memory. + * + * NOTE: see comments for BPF_CORE_READ_USER() about the proper types use. + */ +#define BPF_CORE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_core_read_user_str, bpf_core_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ_STR_INTO() */ +#define BPF_PROBE_READ_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_str, bpf_probe_read, \ + dst, (src), a, ##__VA_ARGS__) \ +}) + +/* + * Non-CO-RE variant of BPF_CORE_READ_USER_STR_INTO(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ +#define BPF_PROBE_READ_USER_STR_INTO(dst, src, a, ...) ({ \ + ___core_read(bpf_probe_read_user_str, bpf_probe_read_user, \ + dst, (src), a, ##__VA_ARGS__) \ +}) /* * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially @@ -334,12 +391,46 @@ enum bpf_enum_value_kind { * N.B. Only up to 9 "field accessors" are supported, which should be more * than enough for any practical purpose. */ -#define BPF_CORE_READ(src, a, ...) \ - ({ \ - ___type((src), a, ##__VA_ARGS__) __r; \ - BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ - __r; \ - }) +#define BPF_CORE_READ(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* + * Variant of BPF_CORE_READ() for reading from user-space memory. + * + * NOTE: all the source types involved are still *kernel types* and need to + * exist in kernel (or kernel module) BTF, otherwise CO-RE relocation will + * fail. Custom user types are not relocatable with CO-RE. + * The typical situation in which BPF_CORE_READ_USER() might be used is to + * read kernel UAPI types from the user-space memory passed in as a syscall + * input argument. + */ +#define BPF_CORE_READ_USER(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* Non-CO-RE variant of BPF_CORE_READ() */ +#define BPF_PROBE_READ(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_PROBE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) + +/* + * Non-CO-RE variant of BPF_CORE_READ_USER(). + * + * As no CO-RE relocations are emitted, source types can be arbitrary and are + * not restricted to kernel types only. + */ +#define BPF_PROBE_READ_USER(src, a, ...) ({ \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_PROBE_READ_USER_INTO(&__r, (src), a, ##__VA_ARGS__); \ + __r; \ +}) #endif diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 72b251110c4d..ae6c975e0b87 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -30,7 +30,7 @@ #define SEC(NAME) __attribute__((section(NAME), used)) #ifndef __always_inline -#define __always_inline __attribute__((always_inline)) +#define __always_inline inline __attribute__((always_inline)) #endif #ifndef __noinline #define __noinline __attribute__((noinline)) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 9970a288dda5..d9c10830d749 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -858,6 +858,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, Elf_Scn *scn = NULL; Elf *elf = NULL; GElf_Ehdr ehdr; + size_t shstrndx; if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn("failed to init libelf for %s\n", path); @@ -882,7 +883,14 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, pr_warn("failed to get EHDR from %s\n", path); goto done; } - if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { + + if (elf_getshdrstrndx(elf, &shstrndx)) { + pr_warn("failed to get section names section index for %s\n", + path); + goto done; + } + + if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) { pr_warn("failed to get e_shstrndx from %s\n", path); goto done; } @@ -897,7 +905,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, idx, path); goto done; } - name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); + name = elf_strptr(elf, shstrndx, sh.sh_name); if (!name) { pr_warn("failed to get section(%d) name from %s\n", idx, path); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6ae748f6ea11..d43cc3f29dae 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -395,7 +395,8 @@ struct extern_desc { unsigned long long addr; /* target btf_id of the corresponding kernel var. */ - int vmlinux_btf_id; + int kernel_btf_obj_fd; + int kernel_btf_id; /* local btf_id of the ksym extern's type. */ __u32 type_id; @@ -883,24 +884,24 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, if (btf_is_ptr(mtype)) { struct bpf_program *prog; - mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id); + prog = st_ops->progs[i]; + if (!prog) + continue; + kern_mtype = skip_mods_and_typedefs(kern_btf, kern_mtype->type, &kern_mtype_id); - if (!btf_is_func_proto(mtype) || - !btf_is_func_proto(kern_mtype)) { - pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n", + + /* mtype->type must be a func_proto which was + * guaranteed in bpf_object__collect_st_ops_relos(), + * so only check kern_mtype for func_proto here. + */ + if (!btf_is_func_proto(kern_mtype)) { + pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", map->name, mname); return -ENOTSUP; } - prog = st_ops->progs[i]; - if (!prog) { - pr_debug("struct_ops init_kern %s: func ptr %s is not set\n", - map->name, mname); - continue; - } - prog->attach_btf_id = kern_type_id; prog->expected_attach_type = kern_member_idx; @@ -6162,7 +6163,8 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) } else /* EXT_KSYM */ { if (ext->ksym.type_id) { /* typed ksyms */ insn[0].src_reg = BPF_PSEUDO_BTF_ID; - insn[0].imm = ext->ksym.vmlinux_btf_id; + insn[0].imm = ext->ksym.kernel_btf_id; + insn[1].imm = ext->ksym.kernel_btf_obj_fd; } else { /* typeless ksyms */ insn[0].imm = (__u32)ext->ksym.addr; insn[1].imm = ext->ksym.addr >> 32; @@ -7319,7 +7321,8 @@ out: static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) { struct extern_desc *ext; - int i, id; + struct btf *btf; + int i, j, id, btf_fd, err; for (i = 0; i < obj->nr_extern; i++) { const struct btf_type *targ_var, *targ_type; @@ -7331,10 +7334,25 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) if (ext->type != EXT_KSYM || !ext->ksym.type_id) continue; - id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name, - BTF_KIND_VAR); + btf = obj->btf_vmlinux; + btf_fd = 0; + id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR); + if (id == -ENOENT) { + err = load_module_btfs(obj); + if (err) + return err; + + for (j = 0; j < obj->btf_module_cnt; j++) { + btf = obj->btf_modules[j].btf; + /* we assume module BTF FD is always >0 */ + btf_fd = obj->btf_modules[j].fd; + id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR); + if (id != -ENOENT) + break; + } + } if (id <= 0) { - pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n", + pr_warn("extern (ksym) '%s': failed to find BTF ID in kernel BTF(s).\n", ext->name); return -ESRCH; } @@ -7343,24 +7361,19 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) local_type_id = ext->ksym.type_id; /* find target type_id */ - targ_var = btf__type_by_id(obj->btf_vmlinux, id); - targ_var_name = btf__name_by_offset(obj->btf_vmlinux, - targ_var->name_off); - targ_type = skip_mods_and_typedefs(obj->btf_vmlinux, - targ_var->type, - &targ_type_id); + targ_var = btf__type_by_id(btf, id); + targ_var_name = btf__name_by_offset(btf, targ_var->name_off); + targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id); ret = bpf_core_types_are_compat(obj->btf, local_type_id, - obj->btf_vmlinux, targ_type_id); + btf, targ_type_id); if (ret <= 0) { const struct btf_type *local_type; const char *targ_name, *local_name; local_type = btf__type_by_id(obj->btf, local_type_id); - local_name = btf__name_by_offset(obj->btf, - local_type->name_off); - targ_name = btf__name_by_offset(obj->btf_vmlinux, - targ_type->name_off); + local_name = btf__name_by_offset(obj->btf, local_type->name_off); + targ_name = btf__name_by_offset(btf, targ_type->name_off); pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", ext->name, local_type_id, @@ -7370,7 +7383,8 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) } ext->is_set = true; - ext->ksym.vmlinux_btf_id = id; + ext->ksym.kernel_btf_obj_fd = btf_fd; + ext->ksym.kernel_btf_id = id; pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n", ext->name, id, btf_kind_str(targ_var), targ_var_name); } diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e3e41ceeb1bc..ffbb588724d8 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -46,6 +46,11 @@ #define PF_XDP AF_XDP #endif +enum xsk_prog { + XSK_PROG_FALLBACK, + XSK_PROG_REDIRECT_FLAGS, +}; + struct xsk_umem { struct xsk_ring_prod *fill_save; struct xsk_ring_cons *comp_save; @@ -351,6 +356,54 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) +static enum xsk_prog get_xsk_prog(void) +{ + enum xsk_prog detected = XSK_PROG_FALLBACK; + struct bpf_load_program_attr prog_attr; + struct bpf_create_map_attr map_attr; + __u32 size_out, retval, duration; + char data_in = 0, data_out; + struct bpf_insn insns[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, XDP_PASS), + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; + int prog_fd, map_fd, ret; + + memset(&map_attr, 0, sizeof(map_attr)); + map_attr.map_type = BPF_MAP_TYPE_XSKMAP; + map_attr.key_size = sizeof(int); + map_attr.value_size = sizeof(int); + map_attr.max_entries = 1; + + map_fd = bpf_create_map_xattr(&map_attr); + if (map_fd < 0) + return detected; + + insns[0].imm = map_fd; + + memset(&prog_attr, 0, sizeof(prog_attr)); + prog_attr.prog_type = BPF_PROG_TYPE_XDP; + prog_attr.insns = insns; + prog_attr.insns_cnt = ARRAY_SIZE(insns); + prog_attr.license = "GPL"; + + prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); + if (prog_fd < 0) { + close(map_fd); + return detected; + } + + ret = bpf_prog_test_run(prog_fd, 0, &data_in, 1, &data_out, &size_out, &retval, &duration); + if (!ret && retval == XDP_PASS) + detected = XSK_PROG_REDIRECT_FLAGS; + close(prog_fd); + close(map_fd); + return detected; +} + static int xsk_load_xdp_prog(struct xsk_socket *xsk) { static const int log_buf_size = 16 * 1024; @@ -358,7 +411,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) char log_buf[log_buf_size]; int err, prog_fd; - /* This is the C-program: + /* This is the fallback C-program: * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) * { * int ret, index = ctx->rx_queue_index; @@ -414,9 +467,31 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) /* The jumps are to this instruction */ BPF_EXIT_INSN(), }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, + /* This is the post-5.3 kernel C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS); + * } + */ + struct bpf_insn prog_redirect_flags[] = { + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; + size_t insns_cnt[] = {sizeof(prog) / sizeof(struct bpf_insn), + sizeof(prog_redirect_flags) / sizeof(struct bpf_insn), + }; + struct bpf_insn *progs[] = {prog, prog_redirect_flags}; + enum xsk_prog option = get_xsk_prog(); + + prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, progs[option], insns_cnt[option], "LGPL-2.1 or BSD-2-Clause", 0, log_buf, log_buf_size); if (prog_fd < 0) { @@ -442,7 +517,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) struct ifreq ifr = {}; int fd, err, ret; - fd = socket(AF_INET, SOCK_DGRAM, 0); + fd = socket(AF_LOCAL, SOCK_DGRAM, 0); if (fd < 0) return -errno; |