diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2022-08-02 20:06:12 +0300 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2022-08-02 20:06:12 +0300 |
| commit | 8bb5e7f4dcd9b9ef22a3ea25c9066a8a968f12dd (patch) | |
| tree | 0f1383880607a227142f9388a066959926233ff1 /tools/lib | |
| parent | 2a96271fb66c499e4a89d76a89d3d01170c10bef (diff) | |
| parent | 7c744d00990ea999d27f306f6db5ccb61b1304b2 (diff) | |
| download | linux-8bb5e7f4dcd9b9ef22a3ea25c9066a8a968f12dd.tar.xz | |
Merge branch 'next' into for-linus
Prepare input updates for 5.20 (or 6.0) merge window.
Diffstat (limited to 'tools/lib')
62 files changed, 6715 insertions, 858 deletions
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index a13e9c7f1fc5..e21e1b40b525 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -60,7 +60,7 @@ $(LIBFILE): $(API_IN) clean: $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ - find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) FORCE: diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c index 5e6cb9debe37..f0f195207fca 100644 --- a/tools/lib/api/fd/array.c +++ b/tools/lib/api/fd/array.c @@ -88,6 +88,23 @@ int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags return pos; } +int fdarray__dup_entry_from(struct fdarray *fda, int pos, struct fdarray *from) +{ + struct pollfd *entry; + int npos; + + if (pos >= from->nr) + return -EINVAL; + + entry = &from->entries[pos]; + + npos = fdarray__add(fda, entry->fd, entry->events, from->priv[pos].flags); + if (npos >= 0) + fda->priv[npos] = from->priv[pos]; + + return npos; +} + int fdarray__filter(struct fdarray *fda, short revents, void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), void *arg) diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h index 7fcf21a33c0c..60ad197c8ee9 100644 --- a/tools/lib/api/fd/array.h +++ b/tools/lib/api/fd/array.h @@ -42,6 +42,7 @@ struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow); void fdarray__delete(struct fdarray *fda); int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags); +int fdarray__dup_entry_from(struct fdarray *fda, int pos, struct fdarray *from); int fdarray__poll(struct fdarray *fda, int timeout); int fdarray__filter(struct fdarray *fda, short revents, void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index db466ef7be9d..354f8cdc0880 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -72,31 +72,31 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, return result != 0; } -int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits) +bool __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) { unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] != bitmap2[k]) - return 0; + return false; if (bits % BITS_PER_LONG) if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) - return 0; + return false; - return 1; + return true; } -int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits) +bool __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) { unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & bitmap2[k]) - return 1; + return true; if (bits % BITS_PER_LONG) if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) - return 1; - return 0; + return true; + return false; } diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 94f0a146bb7b..31a1a9015902 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ - btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o + btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ + usdt.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index f947b61b2107..a1265b152027 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -60,7 +60,7 @@ ifndef VERBOSE VERBOSE = 0 endif -INCLUDES = -I$(if $(OUTPUT),$(OUTPUT),.) \ +INCLUDES = -I$(or $(OUTPUT),.) \ -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi export prefix libdir src obj @@ -127,11 +127,11 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) @@ -194,7 +194,7 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT) sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'| \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ diff -u $(OUTPUT)libbpf_global_syms.tmp \ @@ -239,7 +239,7 @@ install_lib: all_cmd SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ - skel_internal.h libbpf_version.h + skel_internal.h libbpf_version.h usdt.bpf.h GEN_HDRS := $(BPF_GENERATED) INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 550b4cbb6c99..240186aac8e6 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -29,6 +29,7 @@ #include <errno.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/kernel.h> #include <limits.h> #include <sys/resource.h> #include "bpf.h" @@ -111,7 +112,7 @@ int probe_memcg_account(void) BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), BPF_EXIT_INSN(), }; - size_t insn_cnt = sizeof(insns) / sizeof(insns[0]); + size_t insn_cnt = ARRAY_SIZE(insns); union bpf_attr attr; int prog_fd; @@ -207,86 +208,6 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err_errno(fd); } -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) -{ - LIBBPF_OPTS(bpf_map_create_opts, p); - - p.map_flags = create_attr->map_flags; - p.numa_node = create_attr->numa_node; - p.btf_fd = create_attr->btf_fd; - p.btf_key_type_id = create_attr->btf_key_type_id; - p.btf_value_type_id = create_attr->btf_value_type_id; - p.map_ifindex = create_attr->map_ifindex; - if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) - p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id; - else - p.inner_map_fd = create_attr->inner_map_fd; - - return bpf_map_create(create_attr->map_type, create_attr->name, - create_attr->key_size, create_attr->value_size, - create_attr->max_entries, &p); -} - -int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags, int node) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts); - - opts.map_flags = map_flags; - if (node >= 0) { - opts.numa_node = node; - opts.map_flags |= BPF_F_NUMA_NODE; - } - - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - - return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags, int node) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts); - - opts.inner_map_fd = inner_map_fd; - opts.map_flags = map_flags; - if (node >= 0) { - opts.map_flags |= BPF_F_NUMA_NODE; - opts.numa_node = node; - } - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); -} - -int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, - .inner_map_fd = inner_map_fd, - .map_flags = map_flags, - ); - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); -} - static void * alloc_zero_tailing_info(const void *orecord, __u32 cnt, __u32 actual_rec_size, __u32 expected_rec_size) @@ -638,6 +559,20 @@ int bpf_map_delete_elem(int fd, const void *key) return libbpf_err_errno(ret); } +int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags) +{ + union bpf_attr attr; + int ret; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.flags = flags; + + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + return libbpf_err_errno(ret); +} + int bpf_map_get_next_key(int fd, const void *key, void *next_key) { union bpf_attr attr; @@ -754,10 +689,10 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, .flags = flags, ); - return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts); + return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts); } -int bpf_prog_attach_xattr(int prog_fd, int target_fd, +int bpf_prog_attach_opts(int prog_fd, int target_fd, enum bpf_attach_type type, const struct bpf_prog_attach_opts *opts) { @@ -778,6 +713,11 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd, return libbpf_err_errno(ret); } +__attribute__((alias("bpf_prog_attach_opts"))) +int bpf_prog_attach_xattr(int prog_fd, int target_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts); + int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { union bpf_attr attr; @@ -811,7 +751,7 @@ int bpf_link_create(int prog_fd, int target_fd, { __u32 target_btf_id, iter_info_len; union bpf_attr attr; - int fd; + int fd, err; if (!OPTS_VALID(opts, bpf_link_create_opts)) return libbpf_err(-EINVAL); @@ -848,6 +788,23 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, perf_event)) return libbpf_err(-EINVAL); break; + case BPF_TRACE_KPROBE_MULTI: + attr.link_create.kprobe_multi.flags = OPTS_GET(opts, kprobe_multi.flags, 0); + attr.link_create.kprobe_multi.cnt = OPTS_GET(opts, kprobe_multi.cnt, 0); + attr.link_create.kprobe_multi.syms = ptr_to_u64(OPTS_GET(opts, kprobe_multi.syms, 0)); + attr.link_create.kprobe_multi.addrs = ptr_to_u64(OPTS_GET(opts, kprobe_multi.addrs, 0)); + attr.link_create.kprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, kprobe_multi.cookies, 0)); + if (!OPTS_ZEROED(opts, kprobe_multi)) + return libbpf_err(-EINVAL); + break; + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + case BPF_LSM_MAC: + attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0); + if (!OPTS_ZEROED(opts, tracing)) + return libbpf_err(-EINVAL); + break; default: if (!OPTS_ZEROED(opts, flags)) return libbpf_err(-EINVAL); @@ -855,7 +812,37 @@ int bpf_link_create(int prog_fd, int target_fd, } proceed: fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); - return libbpf_err_errno(fd); + if (fd >= 0) + return fd; + /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry + * and other similar programs + */ + err = -errno; + if (err != -EINVAL) + return libbpf_err(err); + + /* if user used features not supported by + * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately + */ + if (attr.link_create.target_fd || attr.link_create.target_btf_id) + return libbpf_err(err); + if (!OPTS_ZEROED(opts, sz)) + return libbpf_err(err); + + /* otherwise, for few select kinds of programs that can be + * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as + * a fallback for older kernels + */ + switch (attach_type) { + case BPF_TRACE_RAW_TP: + case BPF_LSM_MAC: + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + return bpf_raw_tracepoint_open(NULL, prog_fd); + default: + return libbpf_err(err); + } } int bpf_link_detach(int link_fd) @@ -989,6 +976,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) memset(&attr, 0, sizeof(attr)); attr.test.prog_fd = prog_fd; + attr.test.batch_size = OPTS_GET(opts, batch_size, 0); attr.test.cpu = OPTS_GET(opts, cpu, 0); attr.test.flags = OPTS_GET(opts, flags, 0); attr.test.repeat = OPTS_GET(opts, repeat, 0); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 14e0d97ad2cf..cabc03703e29 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -61,48 +61,6 @@ LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts); -struct bpf_create_map_attr { - const char *name; - enum bpf_map_type map_type; - __u32 map_flags; - __u32 key_size; - __u32 value_size; - __u32 max_entries; - __u32 numa_node; - __u32 btf_fd; - __u32 btf_key_type_id; - __u32 btf_value_type_id; - __u32 map_ifindex; - union { - __u32 inner_map_fd; - __u32 btf_vmlinux_value_type_id; - }; -}; - -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags); - struct bpf_prog_load_opts { size_t sz; /* size of this struct for forward/backward compatibility */ @@ -244,6 +202,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); +LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd); @@ -391,6 +350,10 @@ struct bpf_prog_attach_opts { LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); +LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead") LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd, enum bpf_attach_type type, const struct bpf_prog_attach_opts *opts); @@ -409,10 +372,20 @@ struct bpf_link_create_opts { struct { __u64 bpf_cookie; } perf_event; + struct { + __u32 flags; + __u32 cnt; + const char **syms; + const unsigned long *addrs; + const __u64 *cookies; + } kprobe_multi; + struct { + __u64 cookie; + } tracing; }; size_t :0; }; -#define bpf_link_create_opts__last_field perf_event +#define bpf_link_create_opts__last_field kprobe_multi.cookies LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, @@ -449,12 +422,14 @@ struct bpf_prog_test_run_attr { * out: length of cxt_out */ }; +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead") LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); /* * bpf_prog_test_run does not check that data_out is large enough. Consider - * using bpf_prog_test_run_xattr instead. + * using bpf_prog_test_run_opts instead. */ +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead") LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); @@ -506,8 +481,9 @@ struct bpf_test_run_opts { __u32 duration; /* out: average per repetition in ns */ __u32 flags; __u32 cpu; + __u32 batch_size; }; -#define bpf_test_run_opts__last_field cpu +#define bpf_test_run_opts__last_field batch_size LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts); diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index e4aa9996a550..fd48b1ff59ca 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -110,21 +110,50 @@ enum bpf_enum_value_kind { val; \ }) +#define ___bpf_field_ref1(field) (field) +#define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field) +#define ___bpf_field_ref(args...) \ + ___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args) + /* * Convenience macro to check that field actually exists in target kernel's. * Returns: * 1, if matching field is present in target kernel; * 0, if no matching field found. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_exists(p->my_field); + * - field reference through type and field names: + * bpf_core_field_exists(struct my_type, my_field). */ -#define bpf_core_field_exists(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) +#define bpf_core_field_exists(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_EXISTS) /* * Convenience macro to get the byte size of a field. Works for integers, * struct/unions, pointers, arrays, and enums. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_size(p->my_field); + * - field reference through type and field names: + * bpf_core_field_size(struct my_type, my_field). + */ +#define bpf_core_field_size(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE) + +/* + * Convenience macro to get field's byte offset. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_offset(p->my_field); + * - field reference through type and field names: + * bpf_core_field_offset(struct my_type, my_field). */ -#define bpf_core_field_size(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) +#define bpf_core_field_offset(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_OFFSET) /* * Convenience macro to get BTF type ID of a specified type, using a local BTF diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 963b1060d944..fb04eaf367f1 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -76,6 +76,30 @@ #endif /* + * Compiler (optimization) barrier. + */ +#ifndef barrier +#define barrier() asm volatile("" ::: "memory") +#endif + +/* Variable-specific compiler (optimization) barrier. It's a no-op which makes + * compiler believe that there is some black box modification of a given + * variable and thus prevents compiler from making extra assumption about its + * value and potential simplifications and optimizations on this variable. + * + * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of + * a variable, making some code patterns unverifiable. Putting barrier_var() + * in place will ensure that cast is performed before the barrier_var() + * invocation, because compiler has to pessimistically assume that embedded + * asm section might perform some extra operations on that variable. + * + * This is a variable-specific variant of more global barrier(). + */ +#ifndef barrier_var +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) +#endif + +/* * Helper macro to throw a compilation error if __bpf_unreachable() gets * built into the resulting code. This works given BPF back end does not * implement __builtin_trap(). This is useful to assert that certain paths @@ -133,7 +157,7 @@ struct bpf_map_def { unsigned int value_size; unsigned int max_entries; unsigned int map_flags; -}; +} __attribute__((deprecated("use BTF-defined maps in .maps section"))); enum libbpf_pin_type { LIBBPF_PIN_NONE, @@ -149,6 +173,8 @@ enum libbpf_tristate { #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) +#define __kptr __attribute__((btf_type_tag("kptr"))) +#define __kptr_ref __attribute__((btf_type_tag("kptr_ref"))) #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 90f56b0f585f..01ce121c302d 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -27,6 +27,9 @@ #elif defined(__TARGET_ARCH_riscv) #define bpf_target_riscv #define bpf_target_defined +#elif defined(__TARGET_ARCH_arc) + #define bpf_target_arc + #define bpf_target_defined #else /* Fall back to what the compiler says */ @@ -54,6 +57,9 @@ #elif defined(__riscv) && __riscv_xlen == 64 #define bpf_target_riscv #define bpf_target_defined +#elif defined(__arc__) + #define bpf_target_arc + #define bpf_target_defined #endif /* no compiler target */ #endif @@ -76,6 +82,9 @@ #define __PT_RC_REG ax #define __PT_SP_REG sp #define __PT_IP_REG ip +/* syscall uses r10 for PARM4 */ +#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10) +#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10) #else @@ -105,6 +114,9 @@ #define __PT_RC_REG rax #define __PT_SP_REG rsp #define __PT_IP_REG rip +/* syscall uses r10 for PARM4 */ +#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10) +#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10) #endif /* __i386__ */ @@ -112,6 +124,10 @@ #elif defined(bpf_target_s390) +struct pt_regs___s390 { + unsigned long orig_gpr2; +}; + /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) #define __PT_PARM1_REG gprs[2] @@ -124,6 +140,8 @@ #define __PT_RC_REG gprs[2] #define __PT_SP_REG gprs[15] #define __PT_IP_REG psw.addr +#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2) #elif defined(bpf_target_arm) @@ -140,6 +158,10 @@ #elif defined(bpf_target_arm64) +struct pt_regs___arm64 { + unsigned long orig_x0; +}; + /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ #define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) #define __PT_PARM1_REG regs[0] @@ -152,6 +174,8 @@ #define __PT_RC_REG regs[0] #define __PT_SP_REG sp #define __PT_IP_REG pc +#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma("GCC error \"use PT_REGS_PARM1_CORE_SYSCALL() instead\""); 0l; }) +#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0) #elif defined(bpf_target_mips) @@ -178,6 +202,8 @@ #define __PT_RC_REG gpr[3] #define __PT_SP_REG sp #define __PT_IP_REG nip +/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx #elif defined(bpf_target_sparc) @@ -206,10 +232,29 @@ #define __PT_PARM4_REG a3 #define __PT_PARM5_REG a4 #define __PT_RET_REG ra -#define __PT_FP_REG fp +#define __PT_FP_REG s0 #define __PT_RC_REG a5 #define __PT_SP_REG sp -#define __PT_IP_REG epc +#define __PT_IP_REG pc +/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx + +#elif defined(bpf_target_arc) + +/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */ +#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) +#define __PT_PARM1_REG scratch.r0 +#define __PT_PARM2_REG scratch.r1 +#define __PT_PARM3_REG scratch.r2 +#define __PT_PARM4_REG scratch.r3 +#define __PT_PARM5_REG scratch.r4 +#define __PT_RET_REG scratch.blink +#define __PT_FP_REG __unsupported__ +#define __PT_RC_REG scratch.r0 +#define __PT_SP_REG scratch.sp +#define __PT_IP_REG scratch.ret +/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx #endif @@ -263,6 +308,26 @@ struct pt_regs; #endif +#ifndef PT_REGS_PARM1_SYSCALL +#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1(x) +#endif +#define PT_REGS_PARM2_SYSCALL(x) PT_REGS_PARM2(x) +#define PT_REGS_PARM3_SYSCALL(x) PT_REGS_PARM3(x) +#ifndef PT_REGS_PARM4_SYSCALL +#define PT_REGS_PARM4_SYSCALL(x) PT_REGS_PARM4(x) +#endif +#define PT_REGS_PARM5_SYSCALL(x) PT_REGS_PARM5(x) + +#ifndef PT_REGS_PARM1_CORE_SYSCALL +#define PT_REGS_PARM1_CORE_SYSCALL(x) PT_REGS_PARM1_CORE(x) +#endif +#define PT_REGS_PARM2_CORE_SYSCALL(x) PT_REGS_PARM2_CORE(x) +#define PT_REGS_PARM3_CORE_SYSCALL(x) PT_REGS_PARM3_CORE(x) +#ifndef PT_REGS_PARM4_CORE_SYSCALL +#define PT_REGS_PARM4_CORE_SYSCALL(x) PT_REGS_PARM4_CORE(x) +#endif +#define PT_REGS_PARM5_CORE_SYSCALL(x) PT_REGS_PARM5_CORE(x) + #else /* defined(bpf_target_defined) */ #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) @@ -290,8 +355,30 @@ struct pt_regs; #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM1_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM2_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM3_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM4_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM5_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + +#define PT_REGS_PARM1_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM2_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM3_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM4_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM5_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + #endif /* defined(bpf_target_defined) */ +/* + * When invoked from a syscall handler kprobe, returns a pointer to a + * struct pt_regs containing syscall arguments and suitable for passing to + * PT_REGS_PARMn_SYSCALL() and PT_REGS_PARMn_CORE_SYSCALL(). + */ +#ifndef PT_REGS_SYSCALL_REGS +/* By default, assume that the arch selects ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ((struct pt_regs *)PT_REGS_PARM1(ctx)) +#endif + #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b #endif @@ -406,4 +493,39 @@ typeof(name(0)) name(struct pt_regs *ctx) \ } \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) +#define ___bpf_syscall_args0() ctx +#define ___bpf_syscall_args1(x) ___bpf_syscall_args0(), (void *)PT_REGS_PARM1_CORE_SYSCALL(regs) +#define ___bpf_syscall_args2(x, args...) ___bpf_syscall_args1(args), (void *)PT_REGS_PARM2_CORE_SYSCALL(regs) +#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs) +#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs) +#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs) +#define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args) + +/* + * BPF_KPROBE_SYSCALL is a variant of BPF_KPROBE, which is intended for + * tracing syscall functions, like __x64_sys_close. It hides the underlying + * platform-specific low-level way of getting syscall input arguments from + * struct pt_regs, and provides a familiar typed and named function arguments + * syntax and semantics of accessing syscall input parameters. + * + * Original struct pt_regs* context is preserved as 'ctx' argument. This might + * be necessary when using BPF helpers like bpf_perf_event_output(). + * + * This macro relies on BPF CO-RE support. + */ +#define BPF_KPROBE_SYSCALL(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + struct pt_regs *regs = PT_REGS_SYSCALL_REGS(ctx); \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_syscall_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 9aa19c89f758..bb1e06eb1eca 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1620,20 +1620,37 @@ static int btf_commit_type(struct btf *btf, int data_sz) struct btf_pipe { const struct btf *src; struct btf *dst; + struct hashmap *str_off_map; /* map string offsets from src to dst */ }; static int btf_rewrite_str(__u32 *str_off, void *ctx) { struct btf_pipe *p = ctx; - int off; + void *mapped_off; + int off, err; if (!*str_off) /* nothing to do for empty strings */ return 0; + if (p->str_off_map && + hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) { + *str_off = (__u32)(long)mapped_off; + return 0; + } + off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off)); if (off < 0) return off; + /* Remember string mapping from src to dst. It avoids + * performing expensive string comparisons. + */ + if (p->str_off_map) { + err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off); + if (err) + return err; + } + *str_off = off; return 0; } @@ -1680,6 +1697,9 @@ static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) return 0; } +static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx); +static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx); + int btf__add_btf(struct btf *btf, const struct btf *src_btf) { struct btf_pipe p = { .src = src_btf, .dst = btf }; @@ -1713,6 +1733,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) if (!off) return libbpf_err(-ENOMEM); + /* Map the string offsets from src_btf to the offsets from btf to improve performance */ + p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(p.str_off_map)) + return libbpf_err(-ENOMEM); + /* bulk copy types data for all types from src_btf */ memcpy(t, src_btf->types_data, data_sz); @@ -1754,6 +1779,8 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) btf->hdr->str_off += data_sz; btf->nr_types += cnt; + hashmap__free(p.str_off_map); + /* return type ID of the first added BTF type */ return btf->start_id + btf->nr_types - cnt; err_out: @@ -1767,6 +1794,8 @@ err_out: * wasn't modified, so doesn't need restoring, see big comment above */ btf->hdr->str_len = old_strs_len; + hashmap__free(p.str_off_map); + return libbpf_err(err); } @@ -2597,6 +2626,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; + size_t sec_cnt = 0; /* The start of the info sec (including the __u32 record_size). */ void *info; @@ -2660,8 +2690,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - total_record_size = sec_hdrlen + - (__u64)num_records * record_size; + total_record_size = sec_hdrlen + (__u64)num_records * record_size; if (info_left < total_record_size) { pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); @@ -2670,12 +2699,14 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, info_left -= total_record_size; sinfo = (void *)sinfo + total_record_size; + sec_cnt++; } ext_info = ext_sec->ext_info; ext_info->len = ext_sec->len - sizeof(__u32); ext_info->rec_size = record_size; ext_info->info = info + sizeof(__u32); + ext_info->sec_cnt = sec_cnt; return 0; } @@ -2759,6 +2790,9 @@ void btf_ext__free(struct btf_ext *btf_ext) { if (IS_ERR_OR_NULL(btf_ext)) return; + free(btf_ext->func_info.sec_idxs); + free(btf_ext->line_info.sec_idxs); + free(btf_ext->core_relo_info.sec_idxs); free(btf_ext->data); free(btf_ext); } @@ -2797,10 +2831,8 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) if (err) goto done; - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) { - err = -EINVAL; - goto done; - } + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + goto done; /* skip core relos parsing */ err = btf_ext_setup_core_relos(btf_ext); if (err) diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 061839f04525..951ac7475794 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -147,11 +147,10 @@ LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API void btf__set_fd(struct btf *btf, int fd); -LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__raw_data() instead") -LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_DEPRECATED_SINCE(0, 7, "this API is not necessary when BTF-defined maps are used") LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __u32 expected_key_size, __u32 expected_value_size, @@ -159,8 +158,7 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); -LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, - __u32 *size); +LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size); LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions") int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, @@ -171,8 +169,10 @@ int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, void **line_info, __u32 *cnt); -LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); -LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info is deprecated; write custom func_info parsing to fetch rec_size") +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info is deprecated; write custom line_info parsing to fetch rec_size") +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); @@ -375,8 +375,28 @@ btf_dump__dump_type_data(struct btf_dump *d, __u32 id, const struct btf_dump_type_data_opts *opts); /* - * A set of helpers for easier BTF types handling + * A set of helpers for easier BTF types handling. + * + * The inline functions below rely on constants from the kernel headers which + * may not be available for applications including this header file. To avoid + * compilation errors, we define all the constants here that were added after + * the initial introduction of the BTF_KIND* constants. */ +#ifndef BTF_KIND_FUNC +#define BTF_KIND_FUNC 12 /* Function */ +#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ +#endif +#ifndef BTF_KIND_VAR +#define BTF_KIND_VAR 14 /* Variable */ +#define BTF_KIND_DATASEC 15 /* Section */ +#endif +#ifndef BTF_KIND_FLOAT +#define BTF_KIND_FLOAT 16 /* Floating point */ +#endif +/* The kernel header switched to enums, so these two were never #defined */ +#define BTF_KIND_DECL_TAG 17 /* Decl Tag */ +#define BTF_KIND_TYPE_TAG 18 /* Type Tag */ + static inline __u16 btf_kind(const struct btf_type *t) { return BTF_INFO_KIND(t->info); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index b9a3260c83cb..6b1bc1f43728 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1505,6 +1505,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id, if (s->name_resolved) return *cached_name ? *cached_name : orig_name; + if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) { + s->name_resolved = 1; + return orig_name; + } + dup_cnt = btf_dump_name_dups(d, name_map, orig_name); if (dup_cnt > 1) { const size_t max_len = 256; @@ -1861,14 +1866,16 @@ static int btf_dump_array_data(struct btf_dump *d, { const struct btf_array *array = btf_array(t); const struct btf_type *elem_type; - __u32 i, elem_size = 0, elem_type_id; + __u32 i, elem_type_id; + __s64 elem_size; bool is_array_member; elem_type_id = array->type; elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); elem_size = btf__resolve_size(d->btf, elem_type_id); if (elem_size <= 0) { - pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id); + pr_warn("unexpected elem size %zd for array type [%u]\n", + (ssize_t)elem_size, id); return -EINVAL; } diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 8ecef1088ba2..927745b08014 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -1043,18 +1043,27 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, value = add_data(gen, pvalue, value_size); key = add_data(gen, &zero, sizeof(zero)); - /* if (map_desc[map_idx].initial_value) - * copy_from_user(value, initial_value, value_size); + /* if (map_desc[map_idx].initial_value) { + * if (ctx->flags & BPF_SKEL_KERNEL) + * bpf_probe_read_kernel(value, value_size, initial_value); + * else + * bpf_copy_from_user(value, value_size, initial_value); + * } */ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, sizeof(struct bpf_loader_ctx) + sizeof(struct bpf_map_desc) * map_idx + offsetof(struct bpf_map_desc, initial_value))); - emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 4)); + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 8)); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, value)); emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size)); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct bpf_loader_ctx, flags))); + emit(gen, BPF_JMP_IMM(BPF_JSET, BPF_REG_0, BPF_SKEL_KERNEL, 2)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); map_update_attr = add_data(gen, &attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index 3c20b126d60d..aeb09c288716 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -75,7 +75,7 @@ void hashmap__clear(struct hashmap *map) void hashmap__free(struct hashmap *map) { - if (!map) + if (IS_ERR_OR_NULL(map)) return; hashmap__clear(map); @@ -238,4 +238,3 @@ bool hashmap__delete(struct hashmap *map, const void *key, return true; } - diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7f10dd501a52..e89cc9c885b3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -156,14 +156,6 @@ enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE; int libbpf_set_strict_mode(enum libbpf_strict_mode mode) { - /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to - * get all possible values we compensate last +1, and then (2*x - 1) - * to get the bit mask - */ - if (mode != LIBBPF_STRICT_ALL - && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1))) - return errno = EINVAL, -EINVAL; - libbpf_mode = mode; return 0; } @@ -209,12 +201,6 @@ struct reloc_desc { }; }; -struct bpf_sec_def; - -typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); -typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_opts *opts, long cookie); -typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie); - /* stored as sec_def->cookie for all libbpf-supported SEC()s */ enum sec_def_flags { SEC_NONE = 0, @@ -235,17 +221,22 @@ enum sec_def_flags { SEC_SLEEPABLE = 8, /* allow non-strict prefix matching */ SEC_SLOPPY_PFX = 16, + /* BPF program support non-linear XDP buffer */ + SEC_XDP_FRAGS = 32, + /* deprecated sec definitions not supposed to be used */ + SEC_DEPRECATED = 64, }; struct bpf_sec_def { - const char *sec; + char *sec; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; long cookie; + int handler_id; - init_fn_t init_fn; - preload_fn_t preload_fn; - attach_fn_t attach_fn; + libbpf_prog_setup_fn_t prog_setup_fn; + libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; + libbpf_prog_attach_fn_t prog_attach_fn; }; /* @@ -311,7 +302,7 @@ struct bpf_program { void *priv; bpf_program_clear_priv_t clear_priv; - bool load; + bool autoload; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; @@ -366,6 +357,7 @@ enum libbpf_map_type { }; struct bpf_map { + struct bpf_object *obj; char *name; /* real_name is defined for special internal maps (.rodata*, * .data*, .bss, .kconfig) and preserves their original ELF section @@ -395,7 +387,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; - bool skipped; + bool autocreate; __u64 map_extra; }; @@ -492,6 +484,8 @@ struct elf_state { int st_ops_shndx; }; +struct usdt_manager; + struct bpf_object { char name[BPF_OBJ_NAME_LEN]; char license[64]; @@ -554,6 +548,8 @@ struct bpf_object { size_t fd_array_cap; size_t fd_array_cnt; + struct usdt_manager *usdt_man; + char path[]; }; @@ -677,7 +673,18 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->insns_cnt = prog->sec_insn_cnt; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->load = true; + + /* libbpf's convention for SEC("?abc...") is that it's just like + * SEC("abc...") but the corresponding bpf_program starts out with + * autoload set to false. + */ + if (sec_name[0] == '?') { + prog->autoload = false; + /* from now on forget there was ? in section name */ + sec_name++; + } else { + prog->autoload = true; + } prog->instances.fds = NULL; prog->instances.nr = -1; @@ -1227,10 +1234,8 @@ static void bpf_object__elf_finish(struct bpf_object *obj) if (!obj->efile.elf) return; - if (obj->efile.elf) { - elf_end(obj->efile.elf); - obj->efile.elf = NULL; - } + elf_end(obj->efile.elf); + obj->efile.elf = NULL; obj->efile.symbols = NULL; obj->efile.st_ops_data = NULL; @@ -1378,22 +1383,20 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size) { - int ret = -ENOENT; Elf_Data *data; Elf_Scn *scn; - *size = 0; if (!name) return -EINVAL; scn = elf_sec_by_name(obj, name); data = elf_sec_data(obj, scn); if (data) { - ret = 0; /* found it */ *size = data->d_size; + return 0; /* found it */ } - return *size ? 0 : ret; + return -ENOENT; } static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off) @@ -1408,8 +1411,11 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { Elf64_Sym *sym = elf_sym_by_idx(obj, si); - if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || - ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + continue; + + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) continue; sname = elf_sym_str(obj, sym->st_name); @@ -1428,36 +1434,21 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) { - struct bpf_map *new_maps; - size_t new_cap; - int i; - - if (obj->nr_maps < obj->maps_cap) - return &obj->maps[obj->nr_maps++]; - - new_cap = max((size_t)4, obj->maps_cap * 3 / 2); - new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps)); - if (!new_maps) { - pr_warn("alloc maps for object failed\n"); - return ERR_PTR(-ENOMEM); - } + struct bpf_map *map; + int err; - obj->maps_cap = new_cap; - obj->maps = new_maps; + err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, + sizeof(*obj->maps), obj->nr_maps + 1); + if (err) + return ERR_PTR(err); - /* zero out new maps */ - memset(obj->maps + obj->nr_maps, 0, - (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps)); - /* - * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin) - * when failure (zclose won't close negative fd)). - */ - for (i = obj->nr_maps; i < obj->maps_cap; i++) { - obj->maps[i].fd = -1; - obj->maps[i].inner_map_fd = -1; - } + map = &obj->maps[obj->nr_maps++]; + map->obj = obj; + map->fd = -1; + map->inner_map_fd = -1; + map->autocreate = true; - return &obj->maps[obj->nr_maps++]; + return map; } static size_t bpf_map_mmap_sz(const struct bpf_map *map) @@ -1529,6 +1520,9 @@ static char *internal_map_name(struct bpf_object *obj, const char *real_name) } static int +bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map); + +static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, const char *real_name, int sec_idx, void *data, size_t data_sz) { @@ -1575,6 +1569,9 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, return err; } + /* failures are fine because of maps like .rodata.str1.1 */ + (void) bpf_map_find_btf_info(obj, map); + if (data) memcpy(map->mmaped, data, data_sz); @@ -1937,6 +1934,11 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) if (obj->efile.maps_shndx < 0) return 0; + if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) { + pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n"); + return -EOPNOTSUPP; + } + if (!symbols) return -EINVAL; @@ -1999,6 +2001,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) return -LIBBPF_ERRNO__FORMAT; } + pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name); + if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); return -ENOTSUP; @@ -2050,6 +2054,9 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) } memcpy(&map->def, def, sizeof(struct bpf_map_def)); } + + /* btf info may not exist but fill it in if it does exist */ + (void) bpf_map_find_btf_info(obj, map); } return 0; } @@ -2538,6 +2545,10 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, fill_map_from_def(map->inner_map, &inner_def); } + err = bpf_map_find_btf_info(obj, map); + if (err) + return err; + return 0; } @@ -2740,6 +2751,9 @@ static int bpf_object__init_btf(struct bpf_object *obj, btf__set_pointer_size(obj->btf, 8); } if (btf_ext_data) { + struct btf_ext_info *ext_segs[3]; + int seg_num, sec_num; + if (!obj->btf) { pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", BTF_EXT_ELF_SEC, BTF_ELF_SEC); @@ -2753,6 +2767,43 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = NULL; goto out; } + + /* setup .BTF.ext to ELF section mapping */ + ext_segs[0] = &obj->btf_ext->func_info; + ext_segs[1] = &obj->btf_ext->line_info; + ext_segs[2] = &obj->btf_ext->core_relo_info; + for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { + struct btf_ext_info *seg = ext_segs[seg_num]; + const struct btf_ext_info_sec *sec; + const char *sec_name; + Elf_Scn *scn; + + if (seg->sec_cnt == 0) + continue; + + seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); + if (!seg->sec_idxs) { + err = -ENOMEM; + goto out; + } + + sec_num = 0; + for_each_btf_ext_sec(seg, sec) { + /* preventively increment index to avoid doing + * this before every continue below + */ + sec_num++; + + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) + continue; + scn = elf_sec_by_name(obj, sec_name); + if (!scn) + continue; + + seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); + } + } } out: if (err && libbpf_needs_btf(obj)) { @@ -2792,7 +2843,7 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, goto sort_vars; ret = find_elf_sec_sz(obj, name, &size); - if (ret || !size || (t->size && t->size != size)) { + if (ret || !size) { pr_debug("Invalid size for section %s: %u bytes\n", name, size); return -ENOENT; } @@ -2911,7 +2962,7 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) } bpf_object__for_each_program(prog, obj) { - if (!prog->load) + if (!prog->autoload) continue; if (prog_needs_vmlinux_btf(prog)) return true; @@ -3836,7 +3887,14 @@ static bool prog_is_subprog(const struct bpf_object *obj, * .text programs are subprograms (even if they are not called from * other programs), because libbpf never explicitly supported mixing * SEC()-designated BPF programs and .text entry-point BPF programs. + * + * In libbpf 1.0 strict mode, we always consider .text + * programs to be subprograms. */ + + if (libbpf_mode & LIBBPF_STRICT_SEC_NAME) + return prog->sec_idx == obj->efile.text_shndx; + return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; } @@ -4181,6 +4239,9 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) __u32 key_type_id = 0, value_type_id = 0; int ret; + if (!obj->btf) + return -ENOENT; + /* if it's BTF-defined map, we don't need to search for type IDs. * For struct_ops map, it does not need btf_key_type_id and * btf_value_type_id. @@ -4190,9 +4251,13 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map) return 0; if (!bpf_map__is_internal(map)) { + pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n"); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, def->value_size, &key_type_id, &value_type_id); +#pragma GCC diagnostic pop } else { /* * LLVM annotates global data differently in BTF, that is, @@ -4245,6 +4310,20 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) return 0; } +bool bpf_map__autocreate(const struct bpf_map *map) +{ + return map->autocreate; +} + +int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) +{ + if (map->obj->loaded) + return libbpf_err(-EBUSY); + + map->autocreate = autocreate; + return 0; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; @@ -4564,7 +4643,7 @@ static int probe_kern_probe_read_kernel(void) }; int fd, insn_cnt = ARRAY_SIZE(insns); - fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); return probe_fd(fd); } @@ -4655,6 +4734,18 @@ static int probe_perf_link(void) return link_fd < 0 && err == -EBADF; } +static int probe_kern_bpf_cookie(void) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); +} + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -4717,6 +4808,9 @@ static struct kern_feature_desc { [FEAT_MEMCG_ACCOUNT] = { "memcg-based memory accounting", probe_memcg_account, }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, }; bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -4800,8 +4894,8 @@ bpf_object__reuse_map(struct bpf_map *map) } err = bpf_map__reuse_fd(map, pin_fd); + close(pin_fd); if (err) { - close(pin_fd); return err; } map->pinned = true; @@ -4849,12 +4943,47 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); +static bool is_pow_of_2(size_t x) +{ + return x && (x & (x - 1)); +} + +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); struct bpf_map_def *def = &map->def; const char *map_name = NULL; - __u32 max_entries; int err = 0; if (kernel_supports(obj, FEAT_PROG_NAME)) @@ -4864,25 +4993,10 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; - if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { - int nr_cpus; - - nr_cpus = libbpf_num_possible_cpus(); - if (nr_cpus < 0) { - pr_warn("map '%s': failed to determine number of system CPUs: %d\n", - map->name, nr_cpus); - return nr_cpus; - } - pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); - max_entries = nr_cpus; - } else { - max_entries = def->max_entries; - } - if (bpf_map__is_struct_ops(map)) create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; - if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) { + if (obj->btf && btf__fd(obj->btf) >= 0) { create_attr.btf_fd = btf__fd(obj->btf); create_attr.btf_key_type_id = map->btf_key_type_id; create_attr.btf_value_type_id = map->btf_value_type_id; @@ -4903,6 +5017,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } switch (def->type) { + case BPF_MAP_TYPE_RINGBUF: + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + /* fallthrough */ case BPF_MAP_TYPE_PERF_EVENT_ARRAY: case BPF_MAP_TYPE_CGROUP_ARRAY: case BPF_MAP_TYPE_STACK_TRACE: @@ -4916,7 +5033,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: - case BPF_MAP_TYPE_RINGBUF: create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -4928,7 +5044,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (obj->gen_loader) { bpf_gen__map_create(obj->gen_loader, def->type, map_name, - def->key_size, def->value_size, max_entries, + def->key_size, def->value_size, def->max_entries, &create_attr, is_inner ? -1 : map - obj->maps); /* Pretend to have valid FD to pass various fd >= 0 checks. * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. @@ -4937,7 +5053,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } else { map->fd = bpf_map_create(def->type, map_name, def->key_size, def->value_size, - max_entries, &create_attr); + def->max_entries, &create_attr); } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -4954,7 +5070,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b map->btf_value_type_id = 0; map->fd = bpf_map_create(def->type, map_name, def->key_size, def->value_size, - max_entries, &create_attr); + def->max_entries, &create_attr); } err = map->fd < 0 ? -errno : 0; @@ -5058,6 +5174,24 @@ static int bpf_object_init_prog_arrays(struct bpf_object *obj) return 0; } +static int map_set_def_max_entries(struct bpf_map *map) +{ + if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) { + int nr_cpus; + + nr_cpus = libbpf_num_possible_cpus(); + if (nr_cpus < 0) { + pr_warn("map '%s': failed to determine number of system CPUs: %d\n", + map->name, nr_cpus); + return nr_cpus; + } + pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); + map->def.max_entries = nr_cpus; + } + + return 0; +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -5084,12 +5218,18 @@ bpf_object__create_maps(struct bpf_object *obj) * bpf_object loading will succeed just fine even on old * kernels. */ - if (bpf_map__is_internal(map) && - !kernel_supports(obj, FEAT_GLOBAL_DATA)) { - map->skipped = true; + if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) + map->autocreate = false; + + if (!map->autocreate) { + pr_debug("map '%s': skipped auto-creating...\n", map->name); continue; } + err = map_set_def_max_entries(map); + if (err) + goto err_out; + retried = false; retry: if (map->pin_path) { @@ -5185,18 +5325,21 @@ size_t bpf_core_essential_name_len(const char *name) return n; } -static void bpf_core_free_cands(struct bpf_core_cand_list *cands) +void bpf_core_free_cands(struct bpf_core_cand_list *cands) { + if (!cands) + return; + free(cands->cands); free(cands); } -static int bpf_core_add_cands(struct bpf_core_cand *local_cand, - size_t local_essent_len, - const struct btf *targ_btf, - const char *targ_btf_name, - int targ_start_id, - struct bpf_core_cand_list *cands) +int bpf_core_add_cands(struct bpf_core_cand *local_cand, + size_t local_essent_len, + const struct btf *targ_btf, + const char *targ_btf_name, + int targ_start_id, + struct bpf_core_cand_list *cands) { struct bpf_core_cand *new_cands, *cand; const struct btf_type *t, *local_t; @@ -5523,11 +5666,28 @@ static int record_relo_core(struct bpf_program *prog, return 0; } -static int bpf_core_apply_relo(struct bpf_program *prog, - const struct bpf_core_relo *relo, - int relo_idx, - const struct btf *local_btf, - struct hashmap *cand_cache) +static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) +{ + struct reloc_desc *relo; + int i; + + for (i = 0; i < prog->nr_reloc; i++) { + relo = &prog->reloc_desc[i]; + if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) + continue; + + return relo->core_relo; + } + + return NULL; +} + +static int bpf_core_resolve_relo(struct bpf_program *prog, + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, + struct hashmap *cand_cache, + struct bpf_core_relo_res *targ_res) { struct bpf_core_spec specs_scratch[3] = {}; const void *type_key = u32_as_hash_key(relo->type_id); @@ -5536,20 +5696,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, const struct btf_type *local_type; const char *local_name; __u32 local_id = relo->type_id; - struct bpf_insn *insn; - int insn_idx, err; - - if (relo->insn_off % BPF_INSN_SZ) - return -EINVAL; - insn_idx = relo->insn_off / BPF_INSN_SZ; - /* adjust insn_idx from section frame of reference to the local - * program's frame of reference; (sub-)program code is not yet - * relocated, so it's enough to just subtract in-section offset - */ - insn_idx = insn_idx - prog->sec_insn_off; - if (insn_idx >= prog->insns_cnt) - return -EINVAL; - insn = &prog->insns[insn_idx]; + int err; local_type = btf__type_by_id(local_btf, local_id); if (!local_type) @@ -5559,15 +5706,6 @@ static int bpf_core_apply_relo(struct bpf_program *prog, if (!local_name) return -EINVAL; - if (prog->obj->gen_loader) { - const char *spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - - pr_debug("record_relo_core: prog %td insn[%d] %s %s %s final insn_idx %d\n", - prog - prog->obj->programs, relo->insn_off / 8, - btf_kind_str(local_type), local_name, spec_str, insn_idx); - return record_relo_core(prog, relo, insn_idx); - } - if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && !hashmap__find(cand_cache, type_key, (void **)&cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); @@ -5584,21 +5722,23 @@ static int bpf_core_apply_relo(struct bpf_program *prog, } } - return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, - relo_idx, local_btf, cands, specs_scratch); + return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, + targ_res); } static int bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) { const struct btf_ext_info_sec *sec; + struct bpf_core_relo_res targ_res; const struct bpf_core_relo *rec; const struct btf_ext_info *seg; struct hashmap_entry *entry; struct hashmap *cand_cache = NULL; struct bpf_program *prog; + struct bpf_insn *insn; const char *sec_name; - int i, err = 0, insn_idx, sec_idx; + int i, err = 0, insn_idx, sec_idx, sec_num; if (obj->btf_ext->core_relo_info.len == 0) return 0; @@ -5619,54 +5759,75 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) } seg = &obj->btf_ext->core_relo_info; + sec_num = 0; for_each_btf_ext_sec(seg, sec) { + sec_idx = seg->sec_idxs[sec_num]; + sec_num++; + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (str_is_empty(sec_name)) { err = -EINVAL; goto out; } - /* bpf_object's ELF is gone by now so it's not easy to find - * section index by section name, but we can find *any* - * bpf_program within desired section name and use it's - * prog->sec_idx to do a proper search by section index and - * instruction offset - */ - prog = NULL; - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (strcmp(prog->sec_name, sec_name) == 0) - break; - } - if (!prog) { - pr_warn("sec '%s': failed to find a BPF program\n", sec_name); - return -ENOENT; - } - sec_idx = prog->sec_idx; - pr_debug("sec '%s': found %d CO-RE relocations\n", - sec_name, sec->num_info); + pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { + if (rec->insn_off % BPF_INSN_SZ) + return -EINVAL; insn_idx = rec->insn_off / BPF_INSN_SZ; prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { - pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n", - sec_name, insn_idx, i); - err = -EINVAL; - goto out; + /* When __weak subprog is "overridden" by another instance + * of the subprog from a different object file, linker still + * appends all the .BTF.ext info that used to belong to that + * eliminated subprogram. + * This is similar to what x86-64 linker does for relocations. + * So just ignore such relocations just like we ignore + * subprog instructions when discovering subprograms. + */ + pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", + sec_name, i, insn_idx); + continue; } /* no need to apply CO-RE relocation if the program is * not going to be loaded */ - if (!prog->load) + if (!prog->autoload) + continue; + + /* adjust insn_idx from section frame of reference to the local + * program's frame of reference; (sub-)program code is not yet + * relocated, so it's enough to just subtract in-section offset + */ + insn_idx = insn_idx - prog->sec_insn_off; + if (insn_idx >= prog->insns_cnt) + return -EINVAL; + insn = &prog->insns[insn_idx]; + + err = record_relo_core(prog, rec, insn_idx); + if (err) { + pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", + prog->name, i, err); + goto out; + } + + if (prog->obj->gen_loader) continue; - err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache); + err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); if (err) { pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", prog->name, i, err); goto out; } + + err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); + if (err) { + pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", + prog->name, i, insn_idx, err); + goto out; + } } } @@ -5684,6 +5845,36 @@ out: return err; } +/* base map load ldimm64 special constant, used also for log fixup logic */ +#define MAP_LDIMM64_POISON_BASE 2001000000 +#define MAP_LDIMM64_POISON_PFX "200100" + +static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, + int insn_idx, struct bpf_insn *insn, + int map_idx, const struct bpf_map *map) +{ + int i; + + pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", + prog->name, relo_idx, insn_idx, map_idx, map->name); + + /* we turn single ldimm64 into two identical invalid calls */ + for (i = 0; i < 2; i++) { + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with something like: + * invalid func unknown#2001000123 + * where lower 123 is map index into obj->maps[] array + */ + insn->imm = MAP_LDIMM64_POISON_BASE + map_idx; + + insn++; + } +} + /* Relocate data references within program code: * - map references; * - global variable references; @@ -5697,33 +5888,35 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + const struct bpf_map *map; struct extern_desc *ext; switch (relo->type) { case RELO_LD64: + map = &obj->maps[relo->map_idx]; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX; insn[0].imm = relo->map_idx; - } else { + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_FD; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_DATA: + map = &obj->maps[relo->map_idx]; insn[1].imm = insn[0].imm + relo->sym_off; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; - } else { - const struct bpf_map *map = &obj->maps[relo->map_idx]; - - if (map->skipped) { - pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", - prog->name, i); - return -ENOTSUP; - } + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_EXTERN_VAR: @@ -5793,14 +5986,13 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj, void *rec, *rec_end, *new_prog_info; const struct btf_ext_info_sec *sec; size_t old_sz, new_sz; - const char *sec_name; - int i, off_adj; + int i, sec_num, sec_idx, off_adj; + sec_num = 0; for_each_btf_ext_sec(ext_info, sec) { - sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); - if (!sec_name) - return -EINVAL; - if (strcmp(sec_name, prog->sec_name) != 0) + sec_idx = ext_info->sec_idxs[sec_num]; + sec_num++; + if (prog->sec_idx != sec_idx) continue; for_each_btf_ext_rec(ext_info, sec, i, rec) { @@ -6195,7 +6387,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) if (err) return err; - return 0; } @@ -6256,8 +6447,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err); return err; } - if (obj->gen_loader) - bpf_object__sort_relos(obj); + bpf_object__sort_relos(obj); } /* Before relocating calls pre-process relocations and mark @@ -6293,7 +6483,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_calls(obj, prog); @@ -6308,7 +6498,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_data(obj, prog); if (err) { @@ -6317,8 +6507,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } - if (!obj->gen_loader) - bpf_object__free_relocs(obj); + return 0; } @@ -6549,9 +6738,9 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name, int *btf_obj_fd, int *btf_type_id); -/* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */ -static int libbpf_preload_prog(struct bpf_program *prog, - struct bpf_prog_load_opts *opts, long cookie) +/* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */ +static int libbpf_prepare_prog_load(struct bpf_program *prog, + struct bpf_prog_load_opts *opts, long cookie) { enum sec_def_flags def = cookie; @@ -6562,13 +6751,35 @@ static int libbpf_preload_prog(struct bpf_program *prog, if (def & SEC_SLEEPABLE) opts->prog_flags |= BPF_F_SLEEPABLE; - if ((prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_LSM || - prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { + if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) + opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; + + if (def & SEC_DEPRECATED) { + pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n", + prog->sec_name); + } + + if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; const char *attach_name; - attach_name = strchr(prog->sec_name, '/') + 1; + attach_name = strchr(prog->sec_name, '/'); + if (!attach_name) { + /* if BPF program is annotated with just SEC("fentry") + * (or similar) without declaratively specifying + * target, then it is expected that target will be + * specified with bpf_program__set_attach_target() at + * runtime before BPF object load step. If not, then + * there is nothing to load into the kernel as BPF + * verifier won't be able to validate BPF program + * correctness anyways. + */ + pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", + prog->name); + return -EINVAL; + } + attach_name++; /* skip over / */ + err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); if (err) return err; @@ -6588,6 +6799,8 @@ static int libbpf_preload_prog(struct bpf_program *prog, return 0; } +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); + static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, const char *license, __u32 kern_version, @@ -6640,13 +6853,15 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog load_attr.fd_array = obj->fd_array; /* adjust load_attr if sec_def provides custom preload callback */ - if (prog->sec_def && prog->sec_def->preload_fn) { - err = prog->sec_def->preload_fn(prog, &load_attr, prog->sec_def->cookie); + if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { + err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); if (err < 0) { pr_warn("prog '%s': failed to prepare load attributes: %d\n", prog->name, err); return err; } + insns = prog->insns; + insns_cnt = prog->insns_cnt; } if (obj->gen_loader) { @@ -6658,7 +6873,7 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog } retry_load: - /* if log_level is zero, we don't request logs initiallly even if + /* if log_level is zero, we don't request logs initially even if * custom log_buf is specified; if the program load fails, then we'll * bump log_level to 1 and use either custom log_buf or we'll allocate * our own and retry the load to get details on what failed @@ -6734,6 +6949,10 @@ retry_load: goto retry_load; ret = -errno; + + /* post-process verifier log to improve error descriptions */ + fixup_verifier_log(prog, log_buf, log_buf_size); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); pr_perm_msg(ret); @@ -6742,10 +6961,6 @@ retry_load: pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", prog->name, log_buf); } - if (insns_cnt >= BPF_MAXINSNS) { - pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", - prog->name, insns_cnt, BPF_MAXINSNS); - } out: if (own_log_buf) @@ -6753,6 +6968,169 @@ out: return ret; } +static char *find_prev_line(char *buf, char *cur) +{ + char *p; + + if (cur == buf) /* end of a log buf */ + return NULL; + + p = cur - 1; + while (p - 1 >= buf && *(p - 1) != '\n') + p--; + + return p; +} + +static void patch_log(char *buf, size_t buf_sz, size_t log_sz, + char *orig, size_t orig_sz, const char *patch) +{ + /* size of the remaining log content to the right from the to-be-replaced part */ + size_t rem_sz = (buf + log_sz) - (orig + orig_sz); + size_t patch_sz = strlen(patch); + + if (patch_sz != orig_sz) { + /* If patch line(s) are longer than original piece of verifier log, + * shift log contents by (patch_sz - orig_sz) bytes to the right + * starting from after to-be-replaced part of the log. + * + * If patch line(s) are shorter than original piece of verifier log, + * shift log contents by (orig_sz - patch_sz) bytes to the left + * starting from after to-be-replaced part of the log + * + * We need to be careful about not overflowing available + * buf_sz capacity. If that's the case, we'll truncate the end + * of the original log, as necessary. + */ + if (patch_sz > orig_sz) { + if (orig + patch_sz >= buf + buf_sz) { + /* patch is big enough to cover remaining space completely */ + patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; + rem_sz = 0; + } else if (patch_sz - orig_sz > buf_sz - log_sz) { + /* patch causes part of remaining log to be truncated */ + rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); + } + } + /* shift remaining log to the right by calculated amount */ + memmove(orig + patch_sz, orig + orig_sz, rem_sz); + } + + memcpy(orig, patch, patch_sz); +} + +static void fixup_log_failed_core_relo(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#195896080 + * line2 -> invalid func unknown#195896080 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. We extract + * instruction index to find corresponding CO-RE relocation and + * replace this part of the log with more relevant information about + * failed CO-RE relocation. + */ + const struct bpf_core_relo *relo; + struct bpf_core_spec spec; + char patch[512], spec_buf[256]; + int insn_idx, err, spec_len; + + if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) + return; + + relo = find_relo_core(prog, insn_idx); + if (!relo) + return; + + err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); + if (err) + return; + + spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); + snprintf(patch, sizeof(patch), + "%d: <invalid CO-RE relocation>\n" + "failed to resolve CO-RE relocation %s%s\n", + insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_log_missing_map_load(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#2001000345 + * line2 -> invalid func unknown#2001000345 + * line3 -> <anything else or end of buffer> + * + * "123" is the index of the instruction that was poisoned. + * "345" in "2001000345" are map index in obj->maps to fetch map name. + */ + struct bpf_object *obj = prog->obj; + const struct bpf_map *map; + int insn_idx, map_idx; + char patch[128]; + + if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) + return; + + map_idx -= MAP_LDIMM64_POISON_BASE; + if (map_idx < 0 || map_idx >= obj->nr_maps) + return; + map = &obj->maps[map_idx]; + + snprintf(patch, sizeof(patch), + "%d: <invalid BPF map reference>\n" + "BPF map '%s' is referenced but wasn't created\n", + insn_idx, map->name); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) +{ + /* look for familiar error patterns in last N lines of the log */ + const size_t max_last_line_cnt = 10; + char *prev_line, *cur_line, *next_line; + size_t log_sz; + int i; + + if (!buf) + return; + + log_sz = strlen(buf) + 1; + next_line = buf + log_sz - 1; + + for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { + cur_line = find_prev_line(buf, next_line); + if (!cur_line) + return; + + /* failed CO-RE relocation case */ + if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } else if (str_has_pfx(cur_line, "invalid func unknown#"MAP_LDIMM64_POISON_PFX)) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } + } +} + static int bpf_program_record_relos(struct bpf_program *prog) { struct bpf_object *obj = prog->obj; @@ -6898,7 +7276,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) { + if (!prog->autoload) { pr_debug("prog '%s': skipped loading\n", prog->name); continue; } @@ -6907,8 +7285,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) if (err) return err; } - if (obj->gen_loader) - bpf_object__free_relocs(obj); + + bpf_object__free_relocs(obj); return 0; } @@ -6928,8 +7306,8 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object continue; } - bpf_program__set_type(prog, prog->sec_def->prog_type); - bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); + prog->type = prog->sec_def->prog_type; + prog->expected_attach_type = prog->sec_def->expected_attach_type; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -6941,8 +7319,8 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object /* sec_def can have custom callback which should be called * after bpf_program is initialized to adjust its properties */ - if (prog->sec_def->init_fn) { - err = prog->sec_def->init_fn(prog, prog->sec_def->cookie); + if (prog->sec_def->prog_setup_fn) { + err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); if (err < 0) { pr_warn("prog '%s': failed to initialize: %d\n", prog->name, err); @@ -7146,12 +7524,10 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) return 0; } -static int bpf_object__read_kallsyms_file(struct bpf_object *obj) +int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) { char sym_type, sym_name[500]; unsigned long long sym_addr; - const struct btf_type *t; - struct extern_desc *ext; int ret, err = 0; FILE *f; @@ -7170,35 +7546,51 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj) if (ret != 3) { pr_warn("failed to read kallsyms entry: %d\n", ret); err = -EINVAL; - goto out; + break; } - ext = find_extern_by_name(obj, sym_name); - if (!ext || ext->type != EXT_KSYM) - continue; - - t = btf__type_by_id(obj->btf, ext->btf_id); - if (!btf_is_var(t)) - continue; - - if (ext->is_set && ext->ksym.addr != sym_addr) { - pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", - sym_name, ext->ksym.addr, sym_addr); - err = -EINVAL; - goto out; - } - if (!ext->is_set) { - ext->is_set = true; - ext->ksym.addr = sym_addr; - pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr); - } + err = cb(sym_addr, sym_type, sym_name, ctx); + if (err) + break; } -out: fclose(f); return err; } +static int kallsyms_cb(unsigned long long sym_addr, char sym_type, + const char *sym_name, void *ctx) +{ + struct bpf_object *obj = ctx; + const struct btf_type *t; + struct extern_desc *ext; + + ext = find_extern_by_name(obj, sym_name); + if (!ext || ext->type != EXT_KSYM) + return 0; + + t = btf__type_by_id(obj->btf, ext->btf_id); + if (!btf_is_var(t)) + return 0; + + if (ext->is_set && ext->ksym.addr != sym_addr) { + pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", + sym_name, ext->ksym.addr, sym_addr); + return -EINVAL; + } + if (!ext->is_set) { + ext->is_set = true; + ext->ksym.addr = sym_addr; + pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr); + } + return 0; +} + +static int bpf_object__read_kallsyms_file(struct bpf_object *obj) +{ + return libbpf_kallsyms_parse(kallsyms_cb, obj); +} + static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, __u16 kind, struct btf **res_btf, struct module_btf **res_mod_btf) @@ -7883,10 +8275,8 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path) return 0; } -const char *bpf_map__get_pin_path(const struct bpf_map *map) -{ - return map->pin_path; -} +__alias(bpf_map__pin_path) +const char *bpf_map__get_pin_path(const struct bpf_map *map); const char *bpf_map__pin_path(const struct bpf_map *map) { @@ -7925,7 +8315,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) char *pin_path = NULL; char buf[PATH_MAX]; - if (map->skipped) + if (!map->autocreate) continue; if (path) { @@ -8140,6 +8530,9 @@ void bpf_object__close(struct bpf_object *obj) if (obj->clear_priv) obj->clear_priv(obj, obj->priv); + usdt_manager_free(obj->usdt_man); + obj->usdt_man = NULL; + bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); bpf_object_unload(obj); @@ -8363,7 +8756,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) bool bpf_program__autoload(const struct bpf_program *prog) { - return prog->load; + return prog->autoload; } int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) @@ -8371,7 +8764,7 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) if (prog->obj->loaded) return libbpf_err(-EINVAL); - prog->load = autoload; + prog->autoload = autoload; return 0; } @@ -8397,6 +8790,26 @@ size_t bpf_program__insn_cnt(const struct bpf_program *prog) return prog->insns_cnt; } +int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt) +{ + struct bpf_insn *insns; + + if (prog->obj->loaded) + return -EBUSY; + + insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", prog->name); + return -ENOMEM; + } + memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); + + prog->insns = insns; + prog->insns_cnt = new_insn_cnt; + return 0; +} + int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { @@ -8451,14 +8864,21 @@ static int bpf_program_nth_fd(const struct bpf_program *prog, int n) return fd; } -enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog) +__alias(bpf_program__type) +enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); + +enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) { return prog->type; } -void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) +int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + prog->type = type; + return 0; } static bool bpf_program__is_type(const struct bpf_program *prog, @@ -8472,8 +8892,7 @@ int bpf_program__set_##NAME(struct bpf_program *prog) \ { \ if (!prog) \ return libbpf_err(-EINVAL); \ - bpf_program__set_type(prog, TYPE); \ - return 0; \ + return bpf_program__set_type(prog, TYPE); \ } \ \ bool bpf_program__is_##NAME(const struct bpf_program *prog) \ @@ -8495,16 +8914,22 @@ BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS); BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT); BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP); -enum bpf_attach_type -bpf_program__get_expected_attach_type(const struct bpf_program *prog) +__alias(bpf_program__expected_attach_type) +enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); + +enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog) { return prog->expected_attach_type; } -void bpf_program__set_expected_attach_type(struct bpf_program *prog, +int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + prog->expected_attach_type = type; + return 0; } __u32 bpf_program__flags(const struct bpf_program *prog) @@ -8556,52 +8981,64 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log } #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ - .sec = sec_pfx, \ + .sec = (char *)sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ .expected_attach_type = atype, \ .cookie = (long)(flags), \ - .preload_fn = libbpf_preload_prog, \ + .prog_prepare_load_fn = libbpf_prepare_prog_load, \ __VA_ARGS__ \ } -static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie); -static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie); +static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); static const struct bpf_sec_def section_defs[] = { SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), - SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), + SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), - SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX), + SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED), SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fexit/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry.s/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fmod_ret.s/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fexit.s/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), - SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), - SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), + SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), + SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), - SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), - SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), + SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), + SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), + SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE | SEC_DEPRECATED), + SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), + SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), + SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE | SEC_DEPRECATED), + SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX), @@ -8643,61 +9080,167 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX), }; -#define MAX_TYPE_NAME_SIZE 32 +static size_t custom_sec_def_cnt; +static struct bpf_sec_def *custom_sec_defs; +static struct bpf_sec_def custom_fallback_def; +static bool has_custom_fallback_def; -static const struct bpf_sec_def *find_sec_def(const char *sec_name) +static int last_custom_sec_def_handler_id; + +int libbpf_register_prog_handler(const char *sec, + enum bpf_prog_type prog_type, + enum bpf_attach_type exp_attach_type, + const struct libbpf_prog_handler_opts *opts) { - const struct bpf_sec_def *sec_def; - enum sec_def_flags sec_flags; - int i, n = ARRAY_SIZE(section_defs), len; - bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME; + struct bpf_sec_def *sec_def; - for (i = 0; i < n; i++) { - sec_def = §ion_defs[i]; - sec_flags = sec_def->cookie; - len = strlen(sec_def->sec); + if (!OPTS_VALID(opts, libbpf_prog_handler_opts)) + return libbpf_err(-EINVAL); - /* "type/" always has to have proper SEC("type/extras") form */ - if (sec_def->sec[len - 1] == '/') { - if (str_has_pfx(sec_name, sec_def->sec)) - return sec_def; - continue; - } + if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */ + return libbpf_err(-E2BIG); - /* "type+" means it can be either exact SEC("type") or - * well-formed SEC("type/extras") with proper '/' separator - */ - if (sec_def->sec[len - 1] == '+') { - len--; - /* not even a prefix */ - if (strncmp(sec_name, sec_def->sec, len) != 0) - continue; - /* exact match or has '/' separator */ - if (sec_name[len] == '\0' || sec_name[len] == '/') - return sec_def; - continue; - } + if (sec) { + sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1, + sizeof(*sec_def)); + if (!sec_def) + return libbpf_err(-ENOMEM); - /* SEC_SLOPPY_PFX definitions are allowed to be just prefix - * matches, unless strict section name mode - * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the - * match has to be exact. - */ - if ((sec_flags & SEC_SLOPPY_PFX) && !strict) { - if (str_has_pfx(sec_name, sec_def->sec)) - return sec_def; - continue; - } + custom_sec_defs = sec_def; + sec_def = &custom_sec_defs[custom_sec_def_cnt]; + } else { + if (has_custom_fallback_def) + return libbpf_err(-EBUSY); - /* Definitions not marked SEC_SLOPPY_PFX (e.g., - * SEC("syscall")) are exact matches in both modes. - */ - if (strcmp(sec_name, sec_def->sec) == 0) + sec_def = &custom_fallback_def; + } + + sec_def->sec = sec ? strdup(sec) : NULL; + if (sec && !sec_def->sec) + return libbpf_err(-ENOMEM); + + sec_def->prog_type = prog_type; + sec_def->expected_attach_type = exp_attach_type; + sec_def->cookie = OPTS_GET(opts, cookie, 0); + + sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL); + sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL); + sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL); + + sec_def->handler_id = ++last_custom_sec_def_handler_id; + + if (sec) + custom_sec_def_cnt++; + else + has_custom_fallback_def = true; + + return sec_def->handler_id; +} + +int libbpf_unregister_prog_handler(int handler_id) +{ + struct bpf_sec_def *sec_defs; + int i; + + if (handler_id <= 0) + return libbpf_err(-EINVAL); + + if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) { + memset(&custom_fallback_def, 0, sizeof(custom_fallback_def)); + has_custom_fallback_def = false; + return 0; + } + + for (i = 0; i < custom_sec_def_cnt; i++) { + if (custom_sec_defs[i].handler_id == handler_id) + break; + } + + if (i == custom_sec_def_cnt) + return libbpf_err(-ENOENT); + + free(custom_sec_defs[i].sec); + for (i = i + 1; i < custom_sec_def_cnt; i++) + custom_sec_defs[i - 1] = custom_sec_defs[i]; + custom_sec_def_cnt--; + + /* try to shrink the array, but it's ok if we couldn't */ + sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs)); + if (sec_defs) + custom_sec_defs = sec_defs; + + return 0; +} + +static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name, + bool allow_sloppy) +{ + size_t len = strlen(sec_def->sec); + + /* "type/" always has to have proper SEC("type/extras") form */ + if (sec_def->sec[len - 1] == '/') { + if (str_has_pfx(sec_name, sec_def->sec)) + return true; + return false; + } + + /* "type+" means it can be either exact SEC("type") or + * well-formed SEC("type/extras") with proper '/' separator + */ + if (sec_def->sec[len - 1] == '+') { + len--; + /* not even a prefix */ + if (strncmp(sec_name, sec_def->sec, len) != 0) + return false; + /* exact match or has '/' separator */ + if (sec_name[len] == '\0' || sec_name[len] == '/') + return true; + return false; + } + + /* SEC_SLOPPY_PFX definitions are allowed to be just prefix + * matches, unless strict section name mode + * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the + * match has to be exact. + */ + if (allow_sloppy && str_has_pfx(sec_name, sec_def->sec)) + return true; + + /* Definitions not marked SEC_SLOPPY_PFX (e.g., + * SEC("syscall")) are exact matches in both modes. + */ + return strcmp(sec_name, sec_def->sec) == 0; +} + +static const struct bpf_sec_def *find_sec_def(const char *sec_name) +{ + const struct bpf_sec_def *sec_def; + int i, n; + bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME, allow_sloppy; + + n = custom_sec_def_cnt; + for (i = 0; i < n; i++) { + sec_def = &custom_sec_defs[i]; + if (sec_def_matches(sec_def, sec_name, false)) return sec_def; } + + n = ARRAY_SIZE(section_defs); + for (i = 0; i < n; i++) { + sec_def = §ion_defs[i]; + allow_sloppy = (sec_def->cookie & SEC_SLOPPY_PFX) && !strict; + if (sec_def_matches(sec_def, sec_name, allow_sloppy)) + return sec_def; + } + + if (has_custom_fallback_def) + return &custom_fallback_def; + return NULL; } +#define MAX_TYPE_NAME_SIZE 32 + static char *libbpf_get_type_names(bool attach_type) { int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; @@ -8713,7 +9256,7 @@ static char *libbpf_get_type_names(bool attach_type) const struct bpf_sec_def *sec_def = §ion_defs[i]; if (attach_type) { - if (sec_def->preload_fn != libbpf_preload_prog) + if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) continue; if (!(sec_def->cookie & SEC_ATTACHABLE)) @@ -9096,7 +9639,7 @@ int libbpf_attach_type_by_name(const char *name, return libbpf_err(-EINVAL); } - if (sec_def->preload_fn != libbpf_preload_prog) + if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load) return libbpf_err(-EINVAL); if (!(sec_def->cookie & SEC_ATTACHABLE)) return libbpf_err(-EINVAL); @@ -9406,6 +9949,110 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) return libbpf_err_ptr(-ENOTSUP); } +static int validate_map_op(const struct bpf_map *map, size_t key_sz, + size_t value_sz, bool check_value_sz) +{ + if (map->fd <= 0) + return -ENOENT; + + if (map->def.key_size != key_sz) { + pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", + map->name, key_sz, map->def.key_size); + return -EINVAL; + } + + if (!check_value_sz) + return 0; + + switch (map->def.type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { + int num_cpu = libbpf_num_possible_cpus(); + size_t elem_sz = roundup(map->def.value_size, 8); + + if (value_sz != num_cpu * elem_sz) { + pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", + map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); + return -EINVAL; + } + break; + } + default: + if (map->def.value_size != value_sz) { + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", + map->name, value_sz, map->def.value_size); + return -EINVAL; + } + break; + } + return 0; +} + +int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_lookup_elem_flags(map->fd, key, value, flags); +} + +int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_update_elem(map->fd, key, value, flags); +} + +int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) + return libbpf_err(err); + + return bpf_map_delete_elem_flags(map->fd, key, flags); +} + +int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); +} + +int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) + return libbpf_err(err); + + return bpf_map_get_next_key(map->fd, cur_key, next_key); +} + long libbpf_get_error(const void *ptr) { if (!IS_ERR_OR_NULL(ptr)) @@ -9443,7 +10090,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, open_attr.file = attr->file; open_attr.prog_type = attr->prog_type; - obj = bpf_object__open_xattr(&open_attr); + obj = __bpf_object__open_xattr(&open_attr, 0); err = libbpf_get_error(obj); if (err) return libbpf_err(-ENOENT); @@ -9456,11 +10103,10 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, * bpf_object__open guessed */ if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { - bpf_program__set_type(prog, attr->prog_type); - bpf_program__set_expected_attach_type(prog, - attach_type); + prog->type = attr->prog_type; + prog->expected_attach_type = attach_type; } - if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) { + if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) { /* * we haven't guessed from section name and user * didn't provide a fallback type, too bad... @@ -9477,7 +10123,7 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, } bpf_object__for_each_map(map, obj) { - if (!bpf_map__is_offload_neutral(map)) + if (map->def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) map->map_ifindex = attr->ifindex; } @@ -9512,14 +10158,6 @@ int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, return bpf_prog_load_xattr2(&attr, pobj, prog_fd); } -struct bpf_link { - int (*detach)(struct bpf_link *link); - void (*dealloc)(struct bpf_link *link); - char *pin_path; /* NULL, if not pinned */ - int fd; /* hook FD, -1 if not applicable */ - bool disconnected; -}; - /* Replace link's underlying BPF program with the new one */ int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { @@ -10070,14 +10708,152 @@ struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog, return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } -static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie) +/* Adapted from perf/util/string.c */ +static bool glob_match(const char *str, const char *pat) +{ + while (*str && *pat && *pat != '*') { + if (*pat == '?') { /* Matches any single character */ + str++; + pat++; + continue; + } + if (*str != *pat) + return false; + str++; + pat++; + } + /* Check wild card */ + if (*pat == '*') { + while (*pat == '*') + pat++; + if (!*pat) /* Tail wild card matches all */ + return true; + while (*str) + if (glob_match(str++, pat)) + return true; + } + return !*str && !*pat; +} + +struct kprobe_multi_resolve { + const char *pattern; + unsigned long *addrs; + size_t cap; + size_t cnt; +}; + +static int +resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type, + const char *sym_name, void *ctx) +{ + struct kprobe_multi_resolve *res = ctx; + int err; + + if (!glob_match(sym_name, res->pattern)) + return 0; + + err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long), + res->cnt + 1); + if (err) + return err; + + res->addrs[res->cnt++] = (unsigned long) sym_addr; + return 0; +} + +struct bpf_link * +bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, + const char *pattern, + const struct bpf_kprobe_multi_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, lopts); + struct kprobe_multi_resolve res = { + .pattern = pattern, + }; + struct bpf_link *link = NULL; + char errmsg[STRERR_BUFSIZE]; + const unsigned long *addrs; + int err, link_fd, prog_fd; + const __u64 *cookies; + const char **syms; + bool retprobe; + size_t cnt; + + if (!OPTS_VALID(opts, bpf_kprobe_multi_opts)) + return libbpf_err_ptr(-EINVAL); + + syms = OPTS_GET(opts, syms, false); + addrs = OPTS_GET(opts, addrs, false); + cnt = OPTS_GET(opts, cnt, false); + cookies = OPTS_GET(opts, cookies, false); + + if (!pattern && !addrs && !syms) + return libbpf_err_ptr(-EINVAL); + if (pattern && (addrs || syms || cookies || cnt)) + return libbpf_err_ptr(-EINVAL); + if (!pattern && !cnt) + return libbpf_err_ptr(-EINVAL); + if (addrs && syms) + return libbpf_err_ptr(-EINVAL); + + if (pattern) { + err = libbpf_kallsyms_parse(resolve_kprobe_multi_cb, &res); + if (err) + goto error; + if (!res.cnt) { + err = -ENOENT; + goto error; + } + addrs = res.addrs; + cnt = res.cnt; + } + + retprobe = OPTS_GET(opts, retprobe, false); + + lopts.kprobe_multi.syms = syms; + lopts.kprobe_multi.addrs = addrs; + lopts.kprobe_multi.cookies = cookies; + lopts.kprobe_multi.cnt = cnt; + lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0; + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto error; + } + link->detach = &bpf_link__detach_fd; + + prog_fd = bpf_program__fd(prog); + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts); + if (link_fd < 0) { + err = -errno; + pr_warn("prog '%s': failed to attach: %s\n", + prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + goto error; + } + link->fd = link_fd; + free(res.addrs); + return link; + +error: + free(link); + free(res.addrs); + return libbpf_err_ptr(err); +} + +static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) { DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); unsigned long offset = 0; - struct bpf_link *link; const char *func_name; char *func; - int n, err; + int n; + + *link = NULL; + + /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ + if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) + return 0; opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); if (opts.retprobe) @@ -10087,21 +10863,50 @@ static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cooki n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); if (n < 1) { - err = -EINVAL; pr_warn("kprobe name is invalid: %s\n", func_name); - return libbpf_err_ptr(err); + return -EINVAL; } if (opts.retprobe && offset != 0) { free(func); - err = -EINVAL; pr_warn("kretprobes do not support offset specification\n"); - return libbpf_err_ptr(err); + return -EINVAL; } opts.offset = offset; - link = bpf_program__attach_kprobe_opts(prog, func, &opts); + *link = bpf_program__attach_kprobe_opts(prog, func, &opts); free(func); - return link; + return libbpf_get_error(*link); +} + +static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + const char *spec; + char *pattern; + int n; + + *link = NULL; + + /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ + if (strcmp(prog->sec_name, "kprobe.multi") == 0 || + strcmp(prog->sec_name, "kretprobe.multi") == 0) + return 0; + + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); + if (opts.retprobe) + spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; + else + spec = prog->sec_name + sizeof("kprobe.multi/") - 1; + + n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern); + if (n < 1) { + pr_warn("kprobe multi pattern is invalid: %s\n", pattern); + return -EINVAL; + } + + *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts); + free(pattern); + return libbpf_get_error(*link); } static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, @@ -10183,6 +10988,273 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, return pfd; } +/* uprobes deal in relative offsets; subtract the base address associated with + * the mapped binary. See Documentation/trace/uprobetracer.rst for more + * details. + */ +static long elf_find_relative_offset(const char *filename, Elf *elf, long addr) +{ + size_t n; + int i; + + if (elf_getphdrnum(elf, &n)) { + pr_warn("elf: failed to find program headers for '%s': %s\n", filename, + elf_errmsg(-1)); + return -ENOENT; + } + + for (i = 0; i < n; i++) { + int seg_start, seg_end, seg_offset; + GElf_Phdr phdr; + + if (!gelf_getphdr(elf, i, &phdr)) { + pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename, + elf_errmsg(-1)); + return -ENOENT; + } + if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X)) + continue; + + seg_start = phdr.p_vaddr; + seg_end = seg_start + phdr.p_memsz; + seg_offset = phdr.p_offset; + if (addr >= seg_start && addr < seg_end) + return addr - seg_start + seg_offset; + } + pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename); + return -ENOENT; +} + +/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ +static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) +{ + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + + if (!gelf_getshdr(scn, &sh)) + continue; + if (sh.sh_type == sh_type) + return scn; + } + return NULL; +} + +/* Find offset of function name in object specified by path. "name" matches + * symbol name or name@@LIB for library functions. + */ +static long elf_find_func_offset(const char *binary_path, const char *name) +{ + int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + bool is_shared_lib, is_name_qualified; + char errmsg[STRERR_BUFSIZE]; + long ret = -ENOENT; + size_t name_len; + GElf_Ehdr ehdr; + Elf *elf; + + fd = open(binary_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + ret = -errno; + pr_warn("failed to open %s: %s\n", binary_path, + libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + return ret; + } + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); + close(fd); + return -LIBBPF_ERRNO__FORMAT; + } + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + /* for shared lib case, we do not need to calculate relative offset */ + is_shared_lib = ehdr.e_type == ET_DYN; + + name_len = strlen(name); + /* Does name specify "@@LIB"? */ + is_name_qualified = strstr(name, "@@") != NULL; + + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if + * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically + * linked binary may not have SHT_DYMSYM, so absence of a section should not be + * reported as a warning/error. + */ + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + size_t nr_syms, strtabidx, idx; + Elf_Data *symbols = NULL; + Elf_Scn *scn = NULL; + int last_bind = -1; + const char *sname; + GElf_Shdr sh; + + scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); + if (!scn) { + pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", + binary_path); + continue; + } + if (!gelf_getshdr(scn, &sh)) + continue; + strtabidx = sh.sh_link; + symbols = elf_getdata(scn, 0); + if (!symbols) { + pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", + binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + nr_syms = symbols->d_size / sh.sh_entsize; + + for (idx = 0; idx < nr_syms; idx++) { + int curr_bind; + GElf_Sym sym; + + if (!gelf_getsym(symbols, idx, &sym)) + continue; + + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + continue; + + sname = elf_strptr(elf, strtabidx, sym.st_name); + if (!sname) + continue; + + curr_bind = GELF_ST_BIND(sym.st_info); + + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ + if (strncmp(sname, name, name_len) != 0) + continue; + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') + continue; + + if (ret >= 0) { + /* handle multiple matches */ + if (last_bind != STB_WEAK && curr_bind != STB_WEAK) { + /* Only accept one non-weak bind. */ + pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", + sname, name, binary_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } else if (curr_bind == STB_WEAK) { + /* already have a non-weak bind, and + * this is a weak bind, so ignore. + */ + continue; + } + } + ret = sym.st_value; + last_bind = curr_bind; + } + /* For binaries that are not shared libraries, we need relative offset */ + if (ret > 0 && !is_shared_lib) + ret = elf_find_relative_offset(binary_path, elf, ret); + if (ret > 0) + break; + } + + if (ret > 0) { + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, + ret); + } else { + if (ret == 0) { + pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, + is_shared_lib ? "should not be 0 in a shared library" : + "try using shared library path instead"); + ret = -ENOENT; + } else { + pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); + } + } +out: + elf_end(elf); + close(fd); + return ret; +} + +static const char *arch_specific_lib_paths(void) +{ + /* + * Based on https://packages.debian.org/sid/libc6. + * + * Assume that the traced program is built for the same architecture + * as libbpf, which should cover the vast majority of cases. + */ +#if defined(__x86_64__) + return "/lib/x86_64-linux-gnu"; +#elif defined(__i386__) + return "/lib/i386-linux-gnu"; +#elif defined(__s390x__) + return "/lib/s390x-linux-gnu"; +#elif defined(__s390__) + return "/lib/s390-linux-gnu"; +#elif defined(__arm__) && defined(__SOFTFP__) + return "/lib/arm-linux-gnueabi"; +#elif defined(__arm__) && !defined(__SOFTFP__) + return "/lib/arm-linux-gnueabihf"; +#elif defined(__aarch64__) + return "/lib/aarch64-linux-gnu"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 + return "/lib/mips64el-linux-gnuabi64"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 + return "/lib/mipsel-linux-gnu"; +#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return "/lib/powerpc64le-linux-gnu"; +#elif defined(__sparc__) && defined(__arch64__) + return "/lib/sparc64-linux-gnu"; +#elif defined(__riscv) && __riscv_xlen == 64 + return "/lib/riscv64-linux-gnu"; +#else + return NULL; +#endif +} + +/* Get full path to program/shared library. */ +static int resolve_full_path(const char *file, char *result, size_t result_sz) +{ + const char *search_paths[3] = {}; + int i; + + if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { + search_paths[0] = getenv("LD_LIBRARY_PATH"); + search_paths[1] = "/usr/lib64:/usr/lib"; + search_paths[2] = arch_specific_lib_paths(); + } else { + search_paths[0] = getenv("PATH"); + search_paths[1] = "/usr/bin:/usr/sbin"; + } + + for (i = 0; i < ARRAY_SIZE(search_paths); i++) { + const char *s; + + if (!search_paths[i]) + continue; + for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { + char *next_path; + int seg_len; + + if (s[0] == ':') + s++; + next_path = strchr(s, ':'); + seg_len = next_path ? next_path - s : strlen(s); + if (!seg_len) + continue; + snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); + /* ensure it is an executable file/link */ + if (access(result, R_OK | X_OK) < 0) + continue; + pr_debug("resolved '%s' to '%s'\n", file, result); + return 0; + } + } + return -ENOENT; +} + LIBBPF_API struct bpf_link * bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, @@ -10190,10 +11262,12 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + char full_binary_path[PATH_MAX]; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; bool retprobe, legacy; + const char *func_name; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); @@ -10202,12 +11276,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + if (binary_path && !strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, full_binary_path, + sizeof(full_binary_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, binary_path, err); + return libbpf_err_ptr(err); + } + binary_path = full_binary_path; + } + func_name = OPTS_GET(opts, func_name, NULL); + if (func_name) { + long sym_off; + + if (!binary_path) { + pr_warn("prog '%s': name-based attach requires binary_path\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + sym_off = elf_find_func_offset(binary_path, func_name); + if (sym_off < 0) + return libbpf_err_ptr(sym_off); + func_offset += sym_off; + } + legacy = determine_uprobe_perf_type() < 0; if (!legacy) { pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[512]; + char probe_name[PATH_MAX + 64]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); @@ -10255,6 +11354,60 @@ err_out: } +/* Format of u[ret]probe section definition supporting auto-attach: + * u[ret]probe/binary:function[+offset] + * + * binary can be an absolute/relative path or a filename; the latter is resolved to a + * full binary path via bpf_program__attach_uprobe_opts. + * + * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be + * specified (and auto-attach is not possible) or the above format is specified for + * auto-attach. + */ +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; + int n, ret = -EINVAL; + long offset = 0; + + *link = NULL; + + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", + &probe_type, &binary_path, &func_name, &offset); + switch (n) { + case 1: + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + ret = 0; + break; + case 2: + pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", + prog->name, prog->sec_name); + break; + case 3: + case 4: + opts.retprobe = strcmp(probe_type, "uretprobe") == 0; + if (opts.retprobe && offset != 0) { + pr_warn("prog '%s': uretprobes do not support offset specification\n", + prog->name); + break; + } + opts.func_name = func_name; + *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); + ret = libbpf_get_error(*link); + break; + default: + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, + prog->sec_name); + break; + } + free(probe_type); + free(binary_path); + free(func_name); + + return ret; +} + struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, @@ -10265,6 +11418,85 @@ struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); } +struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts) +{ + char resolved_path[512]; + struct bpf_object *obj = prog->obj; + struct bpf_link *link; + __u64 usdt_cookie; + int err; + + if (!OPTS_VALID(opts, bpf_uprobe_opts)) + return libbpf_err_ptr(-EINVAL); + + if (bpf_program__fd(prog) < 0) { + pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + if (!strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, binary_path, err); + return libbpf_err_ptr(err); + } + binary_path = resolved_path; + } + + /* USDT manager is instantiated lazily on first USDT attach. It will + * be destroyed together with BPF object in bpf_object__close(). + */ + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + if (!obj->usdt_man) { + obj->usdt_man = usdt_manager_new(obj); + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + } + + usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); + link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, + usdt_provider, usdt_name, usdt_cookie); + err = libbpf_get_error(link); + if (err) + return libbpf_err_ptr(err); + return link; +} + +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + char *path = NULL, *provider = NULL, *name = NULL; + const char *sec_name; + int n, err; + + sec_name = bpf_program__section_name(prog); + if (strcmp(sec_name, "usdt") == 0) { + /* no auto-attach for just SEC("usdt") */ + *link = NULL; + return 0; + } + + n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); + if (n != 3) { + pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n", + sec_name); + err = -EINVAL; + } else { + *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, + provider, name, NULL); + err = libbpf_get_error(*link); + } + free(path); + free(provider); + free(name); + return err; +} + static int determine_tracepoint_id(const char *tp_category, const char *tp_name) { @@ -10356,14 +11588,19 @@ struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog, return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL); } -static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) +static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { char *sec_name, *tp_cat, *tp_name; - struct bpf_link *link; + + *link = NULL; + + /* no auto-attach for SEC("tp") or SEC("tracepoint") */ + if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) + return 0; sec_name = strdup(prog->sec_name); if (!sec_name) - return libbpf_err_ptr(-ENOMEM); + return -ENOMEM; /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */ if (str_has_pfx(prog->sec_name, "tp/")) @@ -10373,14 +11610,14 @@ static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie) tp_name = strchr(tp_cat, '/'); if (!tp_name) { free(sec_name); - return libbpf_err_ptr(-EINVAL); + return -EINVAL; } *tp_name = '\0'; tp_name++; - link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); + *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); free(sec_name); - return link; + return libbpf_get_error(*link); } struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, @@ -10413,39 +11650,59 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr return link; } -static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie) +static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { static const char *const prefixes[] = { - "raw_tp/", - "raw_tracepoint/", - "raw_tp.w/", - "raw_tracepoint.w/", + "raw_tp", + "raw_tracepoint", + "raw_tp.w", + "raw_tracepoint.w", }; size_t i; const char *tp_name = NULL; + *link = NULL; + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { - if (str_has_pfx(prog->sec_name, prefixes[i])) { - tp_name = prog->sec_name + strlen(prefixes[i]); - break; - } + size_t pfx_len; + + if (!str_has_pfx(prog->sec_name, prefixes[i])) + continue; + + pfx_len = strlen(prefixes[i]); + /* no auto-attach case of, e.g., SEC("raw_tp") */ + if (prog->sec_name[pfx_len] == '\0') + return 0; + + if (prog->sec_name[pfx_len] != '/') + continue; + + tp_name = prog->sec_name + pfx_len + 1; + break; } + if (!tp_name) { pr_warn("prog '%s': invalid section name '%s'\n", prog->name, prog->sec_name); - return libbpf_err_ptr(-EINVAL); + return -EINVAL; } - return bpf_program__attach_raw_tracepoint(prog, tp_name); + *link = bpf_program__attach_raw_tracepoint(prog, tp_name); + return libbpf_get_error(link); } /* Common logic for all BPF program types that attach to a btf_id */ -static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog) +static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) { + LIBBPF_OPTS(bpf_link_create_opts, link_opts); char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, pfd; + if (!OPTS_VALID(opts, bpf_trace_opts)) + return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); @@ -10457,7 +11714,9 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - pfd = bpf_raw_tracepoint_open(NULL, prog_fd); + /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ + link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); if (pfd < 0) { pfd = -errno; free(link); @@ -10466,27 +11725,35 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(pfd); } link->fd = pfd; - return (struct bpf_link *)link; + return link; } struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); +} + +struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) +{ + return bpf_program__attach_btf_id(prog, opts); } struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); } -static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie) +static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - return bpf_program__attach_trace(prog); + *link = bpf_program__attach_trace(prog); + return libbpf_get_error(*link); } -static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie) +static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - return bpf_program__attach_lsm(prog); + *link = bpf_program__attach_lsm(prog); + return libbpf_get_error(*link); } static struct bpf_link * @@ -10511,7 +11778,7 @@ bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - attach_type = bpf_program__get_expected_attach_type(prog); + attach_type = bpf_program__expected_attach_type(prog); link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts); if (link_fd < 0) { link_fd = -errno; @@ -10615,17 +11882,33 @@ bpf_program__attach_iter(const struct bpf_program *prog, return link; } -static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie) +static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link) { - return bpf_program__attach_iter(prog, NULL); + *link = bpf_program__attach_iter(prog, NULL); + return libbpf_get_error(*link); } struct bpf_link *bpf_program__attach(const struct bpf_program *prog) { - if (!prog->sec_def || !prog->sec_def->attach_fn) - return libbpf_err_ptr(-ESRCH); + struct bpf_link *link = NULL; + int err; + + if (!prog->sec_def || !prog->sec_def->prog_attach_fn) + return libbpf_err_ptr(-EOPNOTSUPP); - return prog->sec_def->attach_fn(prog, prog->sec_def->cookie); + err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link); + if (err) + return libbpf_err_ptr(err); + + /* When calling bpf_program__attach() explicitly, auto-attach support + * is expected to work, so NULL returned link is considered an error. + * This is different for skeleton's attach, see comment in + * bpf_object__attach_skeleton(). + */ + if (!link) + return libbpf_err_ptr(-EOPNOTSUPP); + + return link; } static int bpf_link__detach_struct_ops(struct bpf_link *link) @@ -10912,7 +12195,7 @@ struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, { struct perf_buffer_params p = {}; - if (page_cnt == 0 || !attr) + if (!attr) return libbpf_err_ptr(-EINVAL); if (!OPTS_VALID(opts, perf_buffer_raw_opts)) @@ -10953,7 +12236,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, __u32 map_info_len; int err, i, j, n; - if (page_cnt & (page_cnt - 1)) { + if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) { pr_warn("page count should be power of two, but is %zu\n", page_cnt); return ERR_PTR(-EINVAL); @@ -11640,6 +12923,49 @@ int libbpf_num_possible_cpus(void) return tmp_cpus; } +static int populate_skeleton_maps(const struct bpf_object *obj, + struct bpf_map_skeleton *maps, + size_t map_cnt) +{ + int i; + + for (i = 0; i < map_cnt; i++) { + struct bpf_map **map = maps[i].map; + const char *name = maps[i].name; + void **mmaped = maps[i].mmaped; + + *map = bpf_object__find_map_by_name(obj, name); + if (!*map) { + pr_warn("failed to find skeleton map '%s'\n", name); + return -ESRCH; + } + + /* externs shouldn't be pre-setup from user code */ + if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) + *mmaped = (*map)->mmaped; + } + return 0; +} + +static int populate_skeleton_progs(const struct bpf_object *obj, + struct bpf_prog_skeleton *progs, + size_t prog_cnt) +{ + int i; + + for (i = 0; i < prog_cnt; i++) { + struct bpf_program **prog = progs[i].prog; + const char *name = progs[i].name; + + *prog = bpf_object__find_program_by_name(obj, name); + if (!*prog) { + pr_warn("failed to find skeleton program '%s'\n", name); + return -ESRCH; + } + } + return 0; +} + int bpf_object__open_skeleton(struct bpf_object_skeleton *s, const struct bpf_object_open_opts *opts) { @@ -11647,7 +12973,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, .object_name = s->name, ); struct bpf_object *obj; - int i, err; + int err; /* Attempt to preserve opts->object_name, unless overriden by user * explicitly. Overwriting object name for skeletons is discouraged, @@ -11670,37 +12996,91 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, } *s->obj = obj; + err = populate_skeleton_maps(obj, s->maps, s->map_cnt); + if (err) { + pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); + return libbpf_err(err); + } - for (i = 0; i < s->map_cnt; i++) { - struct bpf_map **map = s->maps[i].map; - const char *name = s->maps[i].name; - void **mmaped = s->maps[i].mmaped; + err = populate_skeleton_progs(obj, s->progs, s->prog_cnt); + if (err) { + pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); + return libbpf_err(err); + } - *map = bpf_object__find_map_by_name(obj, name); - if (!*map) { - pr_warn("failed to find skeleton map '%s'\n", name); - return libbpf_err(-ESRCH); - } + return 0; +} - /* externs shouldn't be pre-setup from user code */ - if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) - *mmaped = (*map)->mmaped; +int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) +{ + int err, len, var_idx, i; + const char *var_name; + const struct bpf_map *map; + struct btf *btf; + __u32 map_type_id; + const struct btf_type *map_type, *var_type; + const struct bpf_var_skeleton *var_skel; + struct btf_var_secinfo *var; + + if (!s->obj) + return libbpf_err(-EINVAL); + + btf = bpf_object__btf(s->obj); + if (!btf) { + pr_warn("subskeletons require BTF at runtime (object %s)\n", + bpf_object__name(s->obj)); + return libbpf_err(-errno); } - for (i = 0; i < s->prog_cnt; i++) { - struct bpf_program **prog = s->progs[i].prog; - const char *name = s->progs[i].name; + err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt); + if (err) { + pr_warn("failed to populate subskeleton maps: %d\n", err); + return libbpf_err(err); + } - *prog = bpf_object__find_program_by_name(obj, name); - if (!*prog) { - pr_warn("failed to find skeleton program '%s'\n", name); - return libbpf_err(-ESRCH); - } + err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt); + if (err) { + pr_warn("failed to populate subskeleton maps: %d\n", err); + return libbpf_err(err); } + for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { + var_skel = &s->vars[var_idx]; + map = *var_skel->map; + map_type_id = bpf_map__btf_value_type_id(map); + map_type = btf__type_by_id(btf, map_type_id); + + if (!btf_is_datasec(map_type)) { + pr_warn("type for map '%1$s' is not a datasec: %2$s", + bpf_map__name(map), + __btf_kind_str(btf_kind(map_type))); + return libbpf_err(-EINVAL); + } + + len = btf_vlen(map_type); + var = btf_var_secinfos(map_type); + for (i = 0; i < len; i++, var++) { + var_type = btf__type_by_id(btf, var->type); + var_name = btf__name_by_offset(btf, var_type->name_off); + if (strcmp(var_name, var_skel->name) == 0) { + *var_skel->addr = map->mmaped + var->offset; + break; + } + } + } return 0; } +void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s) +{ + if (!s) + return; + free(s->maps); + free(s->progs); + free(s->vars); + free(s); +} + int bpf_object__load_skeleton(struct bpf_object_skeleton *s) { int i, err; @@ -11762,20 +13142,34 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; - if (!prog->load) + if (!prog->autoload) continue; /* auto-attaching not supported for this program */ - if (!prog->sec_def || !prog->sec_def->attach_fn) + if (!prog->sec_def || !prog->sec_def->prog_attach_fn) continue; - *link = bpf_program__attach(prog); - err = libbpf_get_error(*link); + /* if user already set the link manually, don't attempt auto-attach */ + if (*link) + continue; + + err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); if (err) { - pr_warn("failed to auto-attach program '%s': %d\n", + pr_warn("prog '%s': failed to auto-attach: %d\n", bpf_program__name(prog), err); return libbpf_err(err); } + + /* It's possible that for some SEC() definitions auto-attach + * is supported in some cases (e.g., if definition completely + * specifies target information), but is not in other cases. + * SEC("uprobe") is one such case. If user specified target + * binary and function name, such BPF program can be + * auto-attached. But if not, it shouldn't trigger skeleton's + * attach to fail. It should just be skipped. + * attach_fn signals such case with returning 0 (no error) and + * setting link to NULL. + */ } return 0; @@ -11795,6 +13189,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) { + if (!s) + return; + if (s->progs) bpf_object__detach_skeleton(s); if (s->obj) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 8b9bc5e90c2b..9e9a3fd3edd8 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -180,9 +180,11 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); /* deprecated bpf_object__open variants */ +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open_mem() instead") LIBBPF_API struct bpf_object * bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, const char *name); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open_file() instead") LIBBPF_API struct bpf_object * bpf_object__open_xattr(struct bpf_object_open_attr *attr); @@ -244,8 +246,10 @@ struct bpf_object *bpf_object__next(struct bpf_object *prev); (pos) = (tmp), (tmp) = bpf_object__next(tmp)) typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *); +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv, bpf_object_clear_priv_t clear_priv); +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog); LIBBPF_API int @@ -277,9 +281,10 @@ bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog) typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv, bpf_program_clear_priv_t clear_priv); - +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog); LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); @@ -318,6 +323,24 @@ struct bpf_insn; * different. */ LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_insns()** can set BPF program's underlying + * BPF instructions. + * + * WARNING: This is a very advanced libbpf API and users need to know + * what they are doing. This should be used from prog_prepare_load_fn + * callback only. + * + * @param prog BPF program for which to return instructions + * @param new_insns a pointer to an array of BPF instructions + * @param new_insn_cnt number of `struct bpf_insn`'s that form + * specified BPF program + * @return 0, on success; negative error code, otherwise + */ +LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt); + /** * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s * that form specified BPF program. @@ -373,7 +396,31 @@ struct bpf_link; LIBBPF_API struct bpf_link *bpf_link__open(const char *path); LIBBPF_API int bpf_link__fd(const struct bpf_link *link); LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); +/** + * @brief **bpf_link__pin()** pins the BPF link to a file + * in the BPF FS specified by a path. This increments the links + * reference count, allowing it to stay loaded after the process + * which loaded it has exited. + * + * @param link BPF link to pin, must already be loaded + * @param path file path in a BPF file system + * @return 0, on success; negative error code, otherwise + */ + LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); + +/** + * @brief **bpf_link__unpin()** unpins the BPF link from a file + * in the BPFFS specified by a path. This decrements the links + * reference count. + * + * The file pinning the BPF link can also be unlinked by a different + * process in which case this function will return an error. + * + * @param prog BPF program to unpin + * @param path file path to the pin in a BPF file system + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int bpf_link__unpin(struct bpf_link *link); LIBBPF_API int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog); @@ -381,6 +428,22 @@ LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__detach(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); +/** + * @brief **bpf_program__attach()** is a generic function for attaching + * a BPF program based on auto-detection of program type, attach type, + * and extra paremeters, where applicable. + * + * @param prog BPF program to attach + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + * + * This is supported for: + * - kprobe/kretprobe (depends on SEC() definition) + * - uprobe/uretprobe (depends on SEC() definition) + * - tracepoint + * - raw tracepoint + * - tracing programs (typed raw TP/fentry/fexit/fmod_ret) + */ LIBBPF_API struct bpf_link * bpf_program__attach(const struct bpf_program *prog); @@ -420,6 +483,29 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const char *func_name, const struct bpf_kprobe_opts *opts); +struct bpf_kprobe_multi_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* array of function symbols to attach */ + const char **syms; + /* array of function addresses to attach */ + const unsigned long *addrs; + /* array of user-provided values fetchable through bpf_get_attach_cookie */ + const __u64 *cookies; + /* number of elements in syms/addrs/cookies arrays */ + size_t cnt; + /* create return kprobes */ + bool retprobe; + size_t :0; +}; + +#define bpf_kprobe_multi_opts__last_field retprobe + +LIBBPF_API struct bpf_link * +bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, + const char *pattern, + const struct bpf_kprobe_multi_opts *opts); + struct bpf_uprobe_opts { /* size of this struct, for forward/backward compatiblity */ size_t sz; @@ -431,9 +517,17 @@ struct bpf_uprobe_opts { __u64 bpf_cookie; /* uprobe is return probe, invoked at function return time */ bool retprobe; + /* Function name to attach to. Could be an unqualified ("abc") or library-qualified + * "abc@LIBXYZ" name. To specify function entry, func_name should be set while + * func_offset argument to bpf_prog__attach_uprobe_opts() should be 0. To trace an + * offset within a function, specify func_name and use func_offset argument to specify + * offset within the function. Shared library functions must specify the shared library + * binary_path. + */ + const char *func_name; size_t :0; }; -#define bpf_uprobe_opts__last_field retprobe +#define bpf_uprobe_opts__last_field func_name /** * @brief **bpf_program__attach_uprobe()** attaches a BPF program @@ -475,6 +569,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts); +struct bpf_usdt_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value accessible through usdt_cookie() */ + __u64 usdt_cookie; + size_t :0; +}; +#define bpf_usdt_opts__last_field usdt_cookie + +/** + * @brief **bpf_program__attach_usdt()** is just like + * bpf_program__attach_uprobe_opts() except it covers USDT (User-space + * Statically Defined Tracepoint) attachment, instead of attaching to + * user-space function entry or exit. + * + * @param prog BPF program to attach + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains provided USDT probe + * @param usdt_provider USDT provider name + * @param usdt_name USDT probe name + * @param opts Options for altering program attachment + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */ +LIBBPF_API struct bpf_link * +bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts); + struct bpf_tracepoint_opts { /* size of this struct, for forward/backward compatiblity */ size_t sz; @@ -496,9 +621,21 @@ bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, LIBBPF_API struct bpf_link * bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, const char *tp_name); + +struct bpf_trace_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value fetchable through bpf_get_attach_cookie() */ + __u64 cookie; +}; +#define bpf_trace_opts__last_field cookie + LIBBPF_API struct bpf_link * bpf_program__attach_trace(const struct bpf_program *prog); LIBBPF_API struct bpf_link * +bpf_program__attach_trace_opts(const struct bpf_program *prog, const struct bpf_trace_opts *opts); + +LIBBPF_API struct bpf_link * bpf_program__attach_lsm(const struct bpf_program *prog); LIBBPF_API struct bpf_link * bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd); @@ -591,27 +728,65 @@ LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n); /* * Adjust type of BPF program. Default is kprobe. */ +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); -LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); -LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, - enum bpf_prog_type type); +LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_type()** sets the program + * type of the passed BPF program. + * @param prog BPF program to set the program type for + * @param type program type to set the BPF map to have + * @return error code; or 0 if no error. An error occurs + * if the object is already loaded. + * + * This must be called before the BPF object is loaded, + * otherwise it has no effect and an error is returned. + */ +LIBBPF_API int bpf_program__set_type(struct bpf_program *prog, + enum bpf_prog_type type); LIBBPF_API enum bpf_attach_type -bpf_program__get_expected_attach_type(const struct bpf_program *prog); -LIBBPF_API void +bpf_program__expected_attach_type(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_expected_attach_type()** sets the + * attach type of the passed BPF program. This is used for + * auto-detection of attachment when programs are loaded. + * @param prog BPF program to set the attach type for + * @param type attach type to set the BPF map to have + * @return error code; or 0 if no error. An error occurs + * if the object is already loaded. + * + * This must be called before the BPF object is loaded, + * otherwise it has no effect and an error is returned. + */ +LIBBPF_API int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); @@ -627,22 +802,46 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); +/** + * @brief **bpf_program__set_attach_target()** sets BTF-based attach target + * for supported BPF program types: + * - BTF-aware raw tracepoints (tp_btf); + * - fentry/fexit/fmod_ret; + * - lsm; + * - freplace. + * @param prog BPF program to set the attach type for + * @param type attach type to set the BPF map to have + * @return error code; or 0 if no error occurred. + */ LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead") LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog); /* @@ -698,6 +897,28 @@ LIBBPF_API struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); /** + * @brief **bpf_map__set_autocreate()** sets whether libbpf has to auto-create + * BPF map during BPF object load phase. + * @param map the BPF map instance + * @param autocreate whether to create BPF map during BPF object load + * @return 0 on success; -EBUSY if BPF object was already loaded + * + * **bpf_map__set_autocreate()** allows to opt-out from libbpf auto-creating + * BPF map. By default, libbpf will attempt to create every single BPF map + * defined in BPF object file using BPF_MAP_CREATE command of bpf() syscall + * and fill in map FD in BPF instructions. + * + * This API allows to opt-out of this process for specific map instance. This + * can be useful if host kernel doesn't support such BPF map type or used + * combination of flags and user application wants to avoid creating such + * a map in the first place. User is still responsible to make sure that their + * BPF-side code that expects to use such missing BPF map is recognized by BPF + * verifier as dead code, otherwise BPF verifier will reject such BPF program. + */ +LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate); +LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); + +/** * @brief **bpf_map__fd()** gets the file descriptor of the passed * BPF map * @param map the BPF map instance @@ -706,7 +927,8 @@ bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); LIBBPF_API int bpf_map__fd(const struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); /* get map definition */ -LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead") +const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); /* get map name */ LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); /* get/set map type */ @@ -715,6 +937,7 @@ LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type); /* get/set map size (max_entries) */ LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map); LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__set_max_entries() instead") LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); /* get/set map flags */ LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map); @@ -739,8 +962,10 @@ LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map); LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv); +LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated") LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); @@ -757,7 +982,6 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); */ LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); -LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); @@ -767,6 +991,110 @@ LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); /** + * @brief **bpf_map__lookup_elem()** allows to lookup BPF map value + * corresponding to provided key. + * @param map BPF map to lookup element in + * @param key pointer to memory containing bytes of the key used for lookup + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory in which looked up value will be stored + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__lookup_elem()** is high-level equivalent of + * **bpf_map_lookup_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__update_elem()** allows to insert or update value in BPF + * map that corresponds to provided key. + * @param map BPF map to insert to or update element in + * @param key pointer to memory containing bytes of the key + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory containing bytes of the value + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__update_elem()** is high-level equivalent of + * **bpf_map_update_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__delete_elem()** allows to delete element in BPF map that + * corresponds to provided key. + * @param map BPF map to delete element from + * @param key pointer to memory containing bytes of the key + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__delete_elem()** is high-level equivalent of + * **bpf_map_delete_elem()** API with added check for key size. + */ +LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags); + +/** + * @brief **bpf_map__lookup_and_delete_elem()** allows to lookup BPF map value + * corresponding to provided key and atomically delete it afterwards. + * @param map BPF map to lookup element in + * @param key pointer to memory containing bytes of the key used for lookup + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory in which looked up value will be stored + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of + * **bpf_map_lookup_and_delete_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__get_next_key()** allows to iterate BPF map keys by + * fetching next key that follows current key. + * @param map BPF map to fetch next key from + * @param cur_key pointer to memory containing bytes of current key or NULL to + * fetch the first key + * @param next_key pointer to memory to write next key into + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @return 0, on success; -ENOENT if **cur_key** is the last key in BPF map; + * negative error, otherwise + * + * **bpf_map__get_next_key()** is high-level equivalent of + * **bpf_map_get_next_key()** API with added check for key size. + */ +LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz); + +/** * @brief **libbpf_get_error()** extracts the error code from the passed * pointer * @param ptr pointer returned from libbpf API function @@ -832,13 +1160,42 @@ struct bpf_xdp_set_link_opts { }; #define bpf_xdp_set_link_opts__last_field old_fd +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead") LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead") LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, const struct bpf_xdp_set_link_opts *opts); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead") LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead") LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, size_t info_size, __u32 flags); +struct bpf_xdp_attach_opts { + size_t sz; + int old_prog_fd; + size_t :0; +}; +#define bpf_xdp_attach_opts__last_field old_prog_fd + +struct bpf_xdp_query_opts { + size_t sz; + __u32 prog_id; /* output */ + __u32 drv_prog_id; /* output */ + __u32 hw_prog_id; /* output */ + __u32 skb_prog_id; /* output */ + __u8 attach_mode; /* output */ + size_t :0; +}; +#define bpf_xdp_query_opts__last_field attach_mode + +LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, + const struct bpf_xdp_attach_opts *opts); +LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags, + const struct bpf_xdp_attach_opts *opts); +LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts); +LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id); + /* TC related API */ enum bpf_tc_attach_point { BPF_TC_INGRESS = 1 << 0, @@ -1226,6 +1583,35 @@ LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s); LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s); LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s); +struct bpf_var_skeleton { + const char *name; + struct bpf_map **map; + void **addr; +}; + +struct bpf_object_subskeleton { + size_t sz; /* size of this struct, for forward/backward compatibility */ + + const struct bpf_object *obj; + + int map_cnt; + int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */ + struct bpf_map_skeleton *maps; + + int prog_cnt; + int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */ + struct bpf_prog_skeleton *progs; + + int var_cnt; + int var_skel_sz; /* sizeof(struct bpf_var_skeleton) */ + struct bpf_var_skeleton *vars; +}; + +LIBBPF_API int +bpf_object__open_subskeleton(struct bpf_object_subskeleton *s); +LIBBPF_API void +bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s); + struct gen_loader_opts { size_t sz; /* size of this struct, for forward/backward compatiblity */ const char *data; @@ -1265,6 +1651,115 @@ LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); +/* + * Custom handling of BPF program's SEC() definitions + */ + +struct bpf_prog_load_opts; /* defined in bpf.h */ + +/* Called during bpf_object__open() for each recognized BPF program. Callback + * can use various bpf_program__set_*() setters to adjust whatever properties + * are necessary. + */ +typedef int (*libbpf_prog_setup_fn_t)(struct bpf_program *prog, long cookie); + +/* Called right before libbpf performs bpf_prog_load() to load BPF program + * into the kernel. Callback can adjust opts as necessary. + */ +typedef int (*libbpf_prog_prepare_load_fn_t)(struct bpf_program *prog, + struct bpf_prog_load_opts *opts, long cookie); + +/* Called during skeleton attach or through bpf_program__attach(). If + * auto-attach is not supported, callback should return 0 and set link to + * NULL (it's not considered an error during skeleton attach, but it will be + * an error for bpf_program__attach() calls). On error, error should be + * returned directly and link set to NULL. On success, return 0 and set link + * to a valid struct bpf_link. + */ +typedef int (*libbpf_prog_attach_fn_t)(const struct bpf_program *prog, long cookie, + struct bpf_link **link); + +struct libbpf_prog_handler_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* User-provided value that is passed to prog_setup_fn, + * prog_prepare_load_fn, and prog_attach_fn callbacks. Allows user to + * register one set of callbacks for multiple SEC() definitions and + * still be able to distinguish them, if necessary. For example, + * libbpf itself is using this to pass necessary flags (e.g., + * sleepable flag) to a common internal SEC() handler. + */ + long cookie; + /* BPF program initialization callback (see libbpf_prog_setup_fn_t). + * Callback is optional, pass NULL if it's not necessary. + */ + libbpf_prog_setup_fn_t prog_setup_fn; + /* BPF program loading callback (see libbpf_prog_prepare_load_fn_t). + * Callback is optional, pass NULL if it's not necessary. + */ + libbpf_prog_prepare_load_fn_t prog_prepare_load_fn; + /* BPF program attach callback (see libbpf_prog_attach_fn_t). + * Callback is optional, pass NULL if it's not necessary. + */ + libbpf_prog_attach_fn_t prog_attach_fn; +}; +#define libbpf_prog_handler_opts__last_field prog_attach_fn + +/** + * @brief **libbpf_register_prog_handler()** registers a custom BPF program + * SEC() handler. + * @param sec section prefix for which custom handler is registered + * @param prog_type BPF program type associated with specified section + * @param exp_attach_type Expected BPF attach type associated with specified section + * @param opts optional cookie, callbacks, and other extra options + * @return Non-negative handler ID is returned on success. This handler ID has + * to be passed to *libbpf_unregister_prog_handler()* to unregister such + * custom handler. Negative error code is returned on error. + * + * *sec* defines which SEC() definitions are handled by this custom handler + * registration. *sec* can have few different forms: + * - if *sec* is just a plain string (e.g., "abc"), it will match only + * SEC("abc"). If BPF program specifies SEC("abc/whatever") it will result + * in an error; + * - if *sec* is of the form "abc/", proper SEC() form is + * SEC("abc/something"), where acceptable "something" should be checked by + * *prog_init_fn* callback, if there are additional restrictions; + * - if *sec* is of the form "abc+", it will successfully match both + * SEC("abc") and SEC("abc/whatever") forms; + * - if *sec* is NULL, custom handler is registered for any BPF program that + * doesn't match any of the registered (custom or libbpf's own) SEC() + * handlers. There could be only one such generic custom handler registered + * at any given time. + * + * All custom handlers (except the one with *sec* == NULL) are processed + * before libbpf's own SEC() handlers. It is allowed to "override" libbpf's + * SEC() handlers by registering custom ones for the same section prefix + * (i.e., it's possible to have custom SEC("perf_event/LLC-load-misses") + * handler). + * + * Note, like much of global libbpf APIs (e.g., libbpf_set_print(), + * libbpf_set_strict_mode(), etc)) these APIs are not thread-safe. User needs + * to ensure synchronization if there is a risk of running this API from + * multiple threads simultaneously. + */ +LIBBPF_API int libbpf_register_prog_handler(const char *sec, + enum bpf_prog_type prog_type, + enum bpf_attach_type exp_attach_type, + const struct libbpf_prog_handler_opts *opts); +/** + * @brief *libbpf_unregister_prog_handler()* unregisters previously registered + * custom BPF program SEC() handler. + * @param handler_id handler ID returned by *libbpf_register_prog_handler()* + * after successful registration + * @return 0 on success, negative error code if handler isn't found + * + * Note, like much of global libbpf APIs (e.g., libbpf_set_print(), + * libbpf_set_strict_mode(), etc)) these APIs are not thread-safe. User needs + * to ensure synchronization if there is a risk of running this API from + * multiple threads simultaneously. + */ +LIBBPF_API int libbpf_unregister_prog_handler(int handler_id); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 529783967793..52973cffc20c 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -247,6 +247,7 @@ LIBBPF_0.0.8 { bpf_link_create; bpf_link_update; bpf_map__set_initial_value; + bpf_prog_attach_opts; bpf_program__attach_cgroup; bpf_program__attach_lsm; bpf_program__is_lsm; @@ -423,12 +424,42 @@ LIBBPF_0.6.0 { LIBBPF_0.7.0 { global: bpf_btf_load; + bpf_program__expected_attach_type; bpf_program__log_buf; bpf_program__log_level; bpf_program__set_log_buf; bpf_program__set_log_level; + bpf_program__type; + bpf_xdp_attach; + bpf_xdp_detach; + bpf_xdp_query; + bpf_xdp_query_id; libbpf_probe_bpf_helper; libbpf_probe_bpf_map_type; libbpf_probe_bpf_prog_type; libbpf_set_memlock_rlim_max; +} LIBBPF_0.6.0; + +LIBBPF_0.8.0 { + global: + bpf_map__autocreate; + bpf_map__get_next_key; + bpf_map__delete_elem; + bpf_map__lookup_and_delete_elem; + bpf_map__lookup_elem; + bpf_map__set_autocreate; + bpf_map__update_elem; + bpf_map_delete_elem_flags; + bpf_object__destroy_subskeleton; + bpf_object__open_subskeleton; + bpf_program__attach_kprobe_multi_opts; + bpf_program__attach_trace_opts; + bpf_program__attach_usdt; + bpf_program__set_insns; + libbpf_register_prog_handler; + libbpf_unregister_prog_handler; +} LIBBPF_0.7.0; + +LIBBPF_1.0.0 { + local: *; }; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 1565679eb432..4abdbe2fea9d 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -92,6 +92,9 @@ # define offsetofend(TYPE, FIELD) \ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) #endif +#ifndef __alias +#define __alias(symbol) __attribute__((alias(#symbol))) +#endif /* Check whether a string `str` has prefix `pfx`, regardless if `pfx` is * a string literal known at compilation time or char * pointer known only at @@ -100,6 +103,17 @@ #define str_has_pfx(str, pfx) \ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) +/* suffix check */ +static inline bool str_has_sfx(const char *str, const char *sfx) +{ + size_t str_len = strlen(str); + size_t sfx_len = strlen(sfx); + + if (sfx_len <= str_len) + return strcmp(str + str_len - sfx_len, sfx); + return false; +} + /* Symbol versioning is different between static and shared library. * Properly versioned symbols are needed for shared library, but * only the symbol of the new version is needed for static library. @@ -145,6 +159,15 @@ do { \ #ifndef __has_builtin #define __has_builtin(x) 0 #endif + +struct bpf_link { + int (*detach)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); + char *pin_path; /* NULL, if not pinned */ + int fd; /* hook FD, -1 if not applicable */ + bool disconnected; +}; + /* * Re-implement glibc's reallocarray() for libbpf internal-only use. * reallocarray(), unfortunately, is not available in all versions of glibc, @@ -326,6 +349,8 @@ enum kern_feature_id { FEAT_BTF_TYPE_TAG, /* memcg-based accounting for BPF maps and progs */ FEAT_MEMCG_ACCOUNT, + /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */ + FEAT_BPF_COOKIE, __FEAT_CNT, }; @@ -351,6 +376,13 @@ struct btf_ext_info { void *info; __u32 rec_size; __u32 len; + /* optional (maintained internally by libbpf) mapping between .BTF.ext + * section and corresponding ELF section. This is used to join + * information like CO-RE relocation records with corresponding BPF + * programs defined in ELF sections + */ + __u32 *sec_idxs; + int sec_cnt; }; #define for_each_btf_ext_sec(seg, sec) \ @@ -446,6 +478,11 @@ __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, extern enum libbpf_strict_mode libbpf_mode; +typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type, + const char *sym_name, void *ctx); + +int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *arg); + /* handle direct returned errors */ static inline int libbpf_err(int ret) { @@ -526,4 +563,21 @@ static inline int ensure_good_fd(int fd) return fd; } +/* The following two functions are exposed to bpftool */ +int bpf_core_add_cands(struct bpf_core_cand *local_cand, + size_t local_essent_len, + const struct btf *targ_btf, + const char *targ_btf_name, + int targ_start_id, + struct bpf_core_cand_list *cands); +void bpf_core_free_cands(struct bpf_core_cand_list *cands); + +struct usdt_manager *usdt_manager_new(struct bpf_object *obj); +void usdt_manager_free(struct usdt_manager *man); +struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, + const struct bpf_program *prog, + pid_t pid, const char *path, + const char *usdt_provider, const char *usdt_name, + __u64 usdt_cookie); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 79131f761a27..d7bcbd01f66f 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -54,6 +54,10 @@ enum libbpf_strict_mode { * * Note, in this mode the program pin path will be based on the * function name instead of section name. + * + * Additionally, routines in the .text section are always considered + * sub-programs. Legacy behavior allows for a single routine in .text + * to be a program. */ LIBBPF_STRICT_SEC_NAME = 0x04, /* @@ -73,6 +77,11 @@ enum libbpf_strict_mode { * operation. */ LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10, + /* + * Error out on any SEC("maps") map definition, which are deprecated + * in favor of BTF-defined map definitions in SEC(".maps"). + */ + LIBBPF_STRICT_MAP_DEFINITIONS = 0x20, __LIBBPF_STRICT_LAST, }; @@ -81,6 +90,23 @@ LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); #define DECLARE_LIBBPF_OPTS LIBBPF_OPTS +/* "Discouraged" APIs which don't follow consistent libbpf naming patterns. + * They are normally a trivial aliases or wrappers for proper APIs and are + * left to minimize unnecessary disruption for users of libbpf. But they + * shouldn't be used going forward. + */ + +struct bpf_program; +struct bpf_map; +struct btf; +struct btf_ext; + +LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); +LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); +LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); +LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); +LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 0fefefc3500b..2fb2f4290080 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -3,7 +3,7 @@ #ifndef __LIBBPF_VERSION_H #define __LIBBPF_VERSION_H -#define LIBBPF_MAJOR_VERSION 0 -#define LIBBPF_MINOR_VERSION 7 +#define LIBBPF_MAJOR_VERSION 1 +#define LIBBPF_MINOR_VERSION 0 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 39f25e09b51e..cbc8967d5402 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -87,29 +87,75 @@ enum { NL_DONE, }; +static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags) +{ + int len; + + do { + len = recvmsg(sock, mhdr, flags); + } while (len < 0 && (errno == EINTR || errno == EAGAIN)); + + if (len < 0) + return -errno; + return len; +} + +static int alloc_iov(struct iovec *iov, int len) +{ + void *nbuf; + + nbuf = realloc(iov->iov_base, len); + if (!nbuf) + return -ENOMEM; + + iov->iov_base = nbuf; + iov->iov_len = len; + return 0; +} + static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, void *cookie) { + struct iovec iov = {}; + struct msghdr mhdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; bool multipart = true; struct nlmsgerr *err; struct nlmsghdr *nh; - char buf[4096]; int len, ret; + ret = alloc_iov(&iov, 4096); + if (ret) + goto done; + while (multipart) { start: multipart = false; - len = recv(sock, buf, sizeof(buf), 0); + len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC); if (len < 0) { - ret = -errno; + ret = len; + goto done; + } + + if (len > iov.iov_len) { + ret = alloc_iov(&iov, len); + if (ret) + goto done; + } + + len = netlink_recvmsg(sock, &mhdr, 0); + if (len < 0) { + ret = len; goto done; } if (len == 0) break; - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len); nh = NLMSG_NEXT(nh, len)) { if (nh->nlmsg_pid != nl_pid) { ret = -LIBBPF_ERRNO__WRNGPID; @@ -130,7 +176,8 @@ start: libbpf_nla_dump_errormsg(nh); goto done; case NLMSG_DONE: - return 0; + ret = 0; + goto done; default: break; } @@ -142,15 +189,17 @@ start: case NL_NEXT: goto start; case NL_DONE: - return 0; + ret = 0; + goto done; default: - return ret; + goto done; } } } } ret = 0; done: + free(iov.iov_base); return ret; } @@ -217,6 +266,28 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); } +int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts) +{ + int old_prog_fd, err; + + if (!OPTS_VALID(opts, bpf_xdp_attach_opts)) + return libbpf_err(-EINVAL); + + old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); + if (old_prog_fd) + flags |= XDP_FLAGS_REPLACE; + else + old_prog_fd = -1; + + err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags); + return libbpf_err(err); +} + +int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts) +{ + return bpf_xdp_attach(ifindex, -1, flags, opts); +} + int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, const struct bpf_xdp_set_link_opts *opts) { @@ -303,69 +374,98 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) return 0; } -int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, - size_t info_size, __u32 flags) +int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) { - struct xdp_id_md xdp_id = {}; - __u32 mask; - int ret; struct libbpf_nla_req req = { .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), .nh.nlmsg_type = RTM_GETLINK, .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, .ifinfo.ifi_family = AF_PACKET, }; + struct xdp_id_md xdp_id = {}; + int err; - if (flags & ~XDP_FLAGS_MASK || !info_size) + if (!OPTS_VALID(opts, bpf_xdp_query_opts)) + return libbpf_err(-EINVAL); + + if (xdp_flags & ~XDP_FLAGS_MASK) return libbpf_err(-EINVAL); /* Check whether the single {HW,DRV,SKB} mode is set */ - flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); - mask = flags - 1; - if (flags && flags & mask) + xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE; + if (xdp_flags & (xdp_flags - 1)) return libbpf_err(-EINVAL); xdp_id.ifindex = ifindex; - xdp_id.flags = flags; + xdp_id.flags = xdp_flags; - ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, + err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, get_xdp_info, &xdp_id); - if (!ret) { - size_t sz = min(info_size, sizeof(xdp_id.info)); + if (err) + return libbpf_err(err); - memcpy(info, &xdp_id.info, sz); - memset((void *) info + sz, 0, info_size - sz); - } + OPTS_SET(opts, prog_id, xdp_id.info.prog_id); + OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id); + OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id); + OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id); + OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode); - return libbpf_err(ret); + return 0; } -static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) +int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, + size_t info_size, __u32 flags) { - flags &= XDP_FLAGS_MODES; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); + size_t sz; + int err; - if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) - return info->prog_id; - if (flags & XDP_FLAGS_DRV_MODE) - return info->drv_prog_id; - if (flags & XDP_FLAGS_HW_MODE) - return info->hw_prog_id; - if (flags & XDP_FLAGS_SKB_MODE) - return info->skb_prog_id; + if (!info_size) + return libbpf_err(-EINVAL); + + err = bpf_xdp_query(ifindex, flags, &opts); + if (err) + return libbpf_err(err); + + /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts + * layout after sz field + */ + sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode)); + memcpy(info, &opts.prog_id, sz); + memset((void *)info + sz, 0, info_size - sz); return 0; } -int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) { - struct xdp_link_info info; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); int ret; - ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); - if (!ret) - *prog_id = get_xdp_id(&info, flags); + ret = bpf_xdp_query(ifindex, flags, &opts); + if (ret) + return libbpf_err(ret); - return libbpf_err(ret); + flags &= XDP_FLAGS_MODES; + + if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags) + *prog_id = opts.prog_id; + else if (flags & XDP_FLAGS_DRV_MODE) + *prog_id = opts.drv_prog_id; + else if (flags & XDP_FLAGS_HW_MODE) + *prog_id = opts.hw_prog_id; + else if (flags & XDP_FLAGS_SKB_MODE) + *prog_id = opts.skb_prog_id; + else + *prog_id = 0; + + return 0; +} + + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + return bpf_xdp_query_id(ifindex, flags, prog_id); } typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 910865e29edc..ba4453dfd1ed 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -178,29 +178,28 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access * string to specify enumerator's value index that need to be relocated. */ -static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, - __u32 type_id, - const char *spec_str, - enum bpf_core_relo_kind relo_kind, - struct bpf_core_spec *spec) +int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + const struct bpf_core_relo *relo, + struct bpf_core_spec *spec) { int access_idx, parsed_len, i; struct bpf_core_accessor *acc; const struct btf_type *t; - const char *name; + const char *name, *spec_str; __u32 id; __s64 sz; + spec_str = btf__name_by_offset(btf, relo->access_str_off); if (str_is_empty(spec_str) || *spec_str == ':') return -EINVAL; memset(spec, 0, sizeof(*spec)); spec->btf = btf; - spec->root_type_id = type_id; - spec->relo_kind = relo_kind; + spec->root_type_id = relo->type_id; + spec->relo_kind = relo->kind; /* type-based relocations don't have a field access string */ - if (core_relo_is_type_based(relo_kind)) { + if (core_relo_is_type_based(relo->kind)) { if (strcmp(spec_str, "0")) return -EINVAL; return 0; @@ -221,7 +220,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, if (spec->raw_len == 0) return -EINVAL; - t = skip_mods_and_typedefs(btf, type_id, &id); + t = skip_mods_and_typedefs(btf, relo->type_id, &id); if (!t) return -EINVAL; @@ -231,7 +230,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, acc->idx = access_idx; spec->len++; - if (core_relo_is_enumval_based(relo_kind)) { + if (core_relo_is_enumval_based(relo->kind)) { if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) return -EINVAL; @@ -240,7 +239,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, return 0; } - if (!core_relo_is_field_based(relo_kind)) + if (!core_relo_is_field_based(relo->kind)) return -EINVAL; sz = btf__resolve_size(btf, id); @@ -301,7 +300,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, spec->bit_offset += access_idx * sz * 8; } else { pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", - prog_name, type_id, spec_str, i, id, btf_kind_str(t)); + prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t)); return -EINVAL; } } @@ -775,31 +774,6 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, return 0; } -struct bpf_core_relo_res -{ - /* expected value in the instruction, unless validate == false */ - __u32 orig_val; - /* new value that needs to be patched up to */ - __u32 new_val; - /* relocation unsuccessful, poison instruction, but don't fail load */ - bool poison; - /* some relocations can't be validated against orig_val */ - bool validate; - /* for field byte offset relocations or the forms: - * *(T *)(rX + <off>) = rY - * rX = *(T *)(rY + <off>), - * we remember original and resolved field size to adjust direct - * memory loads of pointers and integers; this is necessary for 32-bit - * host kernel architectures, but also allows to automatically - * relocate fields that were resized from, e.g., u32 to u64, etc. - */ - bool fail_memsz_adjust; - __u32 orig_sz; - __u32 orig_type_id; - __u32 new_sz; - __u32 new_type_id; -}; - /* Calculate original and target relocation values, given local and target * specs and relocation kind. These values are calculated for each candidate. * If there are multiple candidates, resulting values should all be consistent @@ -951,9 +925,9 @@ static int insn_bytes_to_bpf_size(__u32 sz) * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. */ -static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, - int insn_idx, const struct bpf_core_relo *relo, - int relo_idx, const struct bpf_core_relo_res *res) +int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, + int insn_idx, const struct bpf_core_relo *relo, + int relo_idx, const struct bpf_core_relo_res *res) { __u32 orig_val, new_val; __u8 class; @@ -1080,55 +1054,70 @@ poison: * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b */ -static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec) +int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) { const struct btf_type *t; const struct btf_enum *e; const char *s; __u32 type_id; - int i; + int i, len = 0; + +#define append_buf(fmt, args...) \ + ({ \ + int r; \ + r = snprintf(buf, buf_sz, fmt, ##args); \ + len += r; \ + if (r >= buf_sz) \ + r = buf_sz; \ + buf += r; \ + buf_sz -= r; \ + }) type_id = spec->root_type_id; t = btf_type_by_id(spec->btf, type_id); s = btf__name_by_offset(spec->btf, t->name_off); - libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); + append_buf("<%s> [%u] %s %s", + core_relo_kind_str(spec->relo_kind), + type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); if (core_relo_is_type_based(spec->relo_kind)) - return; + return len; if (core_relo_is_enumval_based(spec->relo_kind)) { t = skip_mods_and_typedefs(spec->btf, type_id, NULL); e = btf_enum(t) + spec->raw_spec[0]; s = btf__name_by_offset(spec->btf, e->name_off); - libbpf_print(level, "::%s = %u", s, e->val); - return; + append_buf("::%s = %u", s, e->val); + return len; } if (core_relo_is_field_based(spec->relo_kind)) { for (i = 0; i < spec->len; i++) { if (spec->spec[i].name) - libbpf_print(level, ".%s", spec->spec[i].name); + append_buf(".%s", spec->spec[i].name); else if (i > 0 || spec->spec[i].idx > 0) - libbpf_print(level, "[%u]", spec->spec[i].idx); + append_buf("[%u]", spec->spec[i].idx); } - libbpf_print(level, " ("); + append_buf(" ("); for (i = 0; i < spec->raw_len; i++) - libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); + append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); if (spec->bit_offset % 8) - libbpf_print(level, " @ offset %u.%u)", - spec->bit_offset / 8, spec->bit_offset % 8); + append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8); else - libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); - return; + append_buf(" @ offset %u)", spec->bit_offset / 8); + return len; } + + return len; +#undef append_buf } /* - * CO-RE relocate single instruction. + * Calculate CO-RE relocation target result. * * The outline and important points of the algorithm: * 1. For given local type, find corresponding candidate target types. @@ -1177,22 +1166,22 @@ static void bpf_core_dump_spec(const char *prog_name, int level, const struct bp * between multiple relocations for the same type ID and is updated as some * of the candidates are pruned due to structural incompatibility. */ -int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, - int insn_idx, - const struct bpf_core_relo *relo, - int relo_idx, - const struct btf *local_btf, - struct bpf_core_cand_list *cands, - struct bpf_core_spec *specs_scratch) +int bpf_core_calc_relo_insn(const char *prog_name, + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, + struct bpf_core_cand_list *cands, + struct bpf_core_spec *specs_scratch, + struct bpf_core_relo_res *targ_res) { struct bpf_core_spec *local_spec = &specs_scratch[0]; struct bpf_core_spec *cand_spec = &specs_scratch[1]; struct bpf_core_spec *targ_spec = &specs_scratch[2]; - struct bpf_core_relo_res cand_res, targ_res; + struct bpf_core_relo_res cand_res; const struct btf_type *local_type; const char *local_name; __u32 local_id; - const char *spec_str; + char spec_buf[256]; int i, j, err; local_id = relo->type_id; @@ -1201,38 +1190,34 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, if (!local_name) return -EINVAL; - spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - if (str_is_empty(spec_str)) - return -EINVAL; - - err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, - relo->kind, local_spec); + err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec); if (err) { + const char *spec_str; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", prog_name, relo_idx, local_id, btf_kind_str(local_type), str_is_empty(local_name) ? "<anon>" : local_name, - spec_str, err); + spec_str ?: "<?>", err); return -EINVAL; } - pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, - relo_idx, core_relo_kind_str(relo->kind), relo->kind); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec); + pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf); /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { /* bpf_insn's imm value could get out of sync during linking */ - memset(&targ_res, 0, sizeof(targ_res)); - targ_res.validate = false; - targ_res.poison = false; - targ_res.orig_val = local_spec->root_type_id; - targ_res.new_val = local_spec->root_type_id; - goto patch_insn; + memset(targ_res, 0, sizeof(*targ_res)); + targ_res->validate = false; + targ_res->poison = false; + targ_res->orig_val = local_spec->root_type_id; + targ_res->new_val = local_spec->root_type_id; + return 0; } /* libbpf doesn't support candidate search for anonymous types */ - if (str_is_empty(spec_str)) { + if (str_is_empty(local_name)) { pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); return -EOPNOTSUPP; @@ -1242,17 +1227,15 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, err = bpf_core_spec_match(local_spec, cands->cands[i].btf, cands->cands[i].id, cand_spec); if (err < 0) { - pr_warn("prog '%s': relo #%d: error matching candidate #%d ", - prog_name, relo_idx, i); - bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec); - libbpf_print(LIBBPF_WARN, ": %d\n", err); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); + pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ", + prog_name, relo_idx, i, spec_buf, err); return err; } - pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, - relo_idx, err == 0 ? "non-matching" : "matching", i); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); + pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name, + relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf); if (err == 0) continue; @@ -1262,7 +1245,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, return err; if (j == 0) { - targ_res = cand_res; + *targ_res = cand_res; *targ_spec = *cand_spec; } else if (cand_spec->bit_offset != targ_spec->bit_offset) { /* if there are many field relo candidates, they @@ -1272,7 +1255,8 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, prog_name, relo_idx, cand_spec->bit_offset, targ_spec->bit_offset); return -EINVAL; - } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { + } else if (cand_res.poison != targ_res->poison || + cand_res.new_val != targ_res->new_val) { /* all candidates should result in the same relocation * decision and value, otherwise it's dangerous to * proceed due to ambiguity @@ -1280,7 +1264,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", prog_name, relo_idx, cand_res.poison ? "failure" : "success", cand_res.new_val, - targ_res.poison ? "failure" : "success", targ_res.new_val); + targ_res->poison ? "failure" : "success", targ_res->new_val); return -EINVAL; } @@ -1314,19 +1298,10 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, prog_name, relo_idx); /* calculate single target relo result explicitly */ - err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, &targ_res); + err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, targ_res); if (err) return err; } -patch_insn: - /* bpf_core_patch_insn() should know how to handle missing targ_spec */ - err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res); - if (err) { - pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", - prog_name, relo_idx, relo->insn_off / 8, err); - return -EINVAL; - } - return 0; } diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 17799819ad7c..073039d8ca4f 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -44,14 +44,50 @@ struct bpf_core_spec { __u32 bit_offset; }; -int bpf_core_apply_relo_insn(const char *prog_name, - struct bpf_insn *insn, int insn_idx, - const struct bpf_core_relo *relo, int relo_idx, - const struct btf *local_btf, - struct bpf_core_cand_list *cands, - struct bpf_core_spec *specs_scratch); +struct bpf_core_relo_res { + /* expected value in the instruction, unless validate == false */ + __u32 orig_val; + /* new value that needs to be patched up to */ + __u32 new_val; + /* relocation unsuccessful, poison instruction, but don't fail load */ + bool poison; + /* some relocations can't be validated against orig_val */ + bool validate; + /* for field byte offset relocations or the forms: + * *(T *)(rX + <off>) = rY + * rX = *(T *)(rY + <off>), + * we remember original and resolved field size to adjust direct + * memory loads of pointers and integers; this is necessary for 32-bit + * host kernel architectures, but also allows to automatically + * relocate fields that were resized from, e.g., u32 to u64, etc. + */ + bool fail_memsz_adjust; + __u32 orig_sz; + __u32 orig_type_id; + __u32 new_sz; + __u32 new_type_id; +}; + int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id); size_t bpf_core_essential_name_len(const char *name); + +int bpf_core_calc_relo_insn(const char *prog_name, + const struct bpf_core_relo *relo, int relo_idx, + const struct btf *local_btf, + struct bpf_core_cand_list *cands, + struct bpf_core_spec *specs_scratch, + struct bpf_core_relo_res *targ_res); + +int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, + int insn_idx, const struct bpf_core_relo *relo, + int relo_idx, const struct bpf_core_relo_res *res); + +int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + const struct bpf_core_relo *relo, + struct bpf_core_spec *spec); + +int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec); + #endif diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 0b84d8e6b72a..bd6f4505e7b1 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -3,9 +3,19 @@ #ifndef __SKEL_INTERNAL_H #define __SKEL_INTERNAL_H +#ifdef __KERNEL__ +#include <linux/fdtable.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/slab.h> +#include <linux/bpf.h> +#else #include <unistd.h> #include <sys/syscall.h> #include <sys/mman.h> +#include <stdlib.h> +#include "bpf.h" +#endif #ifndef __NR_bpf # if defined(__mips__) && defined(_ABIO32) @@ -25,24 +35,23 @@ * requested during loader program generation. */ struct bpf_map_desc { - union { - /* input for the loader prog */ - struct { - __aligned_u64 initial_value; - __u32 max_entries; - }; - /* output of the loader prog */ - struct { - int map_fd; - }; - }; + /* output of the loader prog */ + int map_fd; + /* input for the loader prog */ + __u32 max_entries; + __aligned_u64 initial_value; }; struct bpf_prog_desc { int prog_fd; }; +enum { + BPF_SKEL_KERNEL = (1ULL << 0), +}; + struct bpf_loader_ctx { - size_t sz; + __u32 sz; + __u32 flags; __u32 log_level; __u32 log_size; __u64 log_buf; @@ -57,12 +66,144 @@ struct bpf_load_and_run_opts { const char *errstr; }; +long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size); + static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, unsigned int size) { +#ifdef __KERNEL__ + return bpf_sys_bpf(cmd, attr, size); +#else return syscall(__NR_bpf, cmd, attr, size); +#endif +} + +#ifdef __KERNEL__ +static inline int close(int fd) +{ + return close_fd(fd); +} + +static inline void *skel_alloc(size_t size) +{ + struct bpf_loader_ctx *ctx = kzalloc(size, GFP_KERNEL); + + if (!ctx) + return NULL; + ctx->flags |= BPF_SKEL_KERNEL; + return ctx; +} + +static inline void skel_free(const void *p) +{ + kfree(p); +} + +/* skel->bss/rodata maps are populated the following way: + * + * For kernel use: + * skel_prep_map_data() allocates kernel memory that kernel module can directly access. + * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value. + * The loader program will perform probe_read_kernel() from maps.rodata.initial_value. + * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and + * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree + * is not nessary. + * + * For user space: + * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly. + * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value. + * The loader program will perform copy_from_user() from maps.rodata.initial_value. + * skel_finalize_map_data() remaps bpf array map value from the kernel memory into + * skel->rodata address. + * + * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for + * both kernel and user space. The generated loader program does + * either bpf_probe_read_kernel() or bpf_copy_from_user() from initial_value + * depending on bpf_loader_ctx->flags. + */ +static inline void skel_free_map_data(void *p, __u64 addr, size_t sz) +{ + if (addr != ~0ULL) + kvfree(p); + /* When addr == ~0ULL the 'p' points to + * ((struct bpf_array *)map)->value. See skel_finalize_map_data. + */ +} + +static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz) +{ + void *addr; + + addr = kvmalloc(val_sz, GFP_KERNEL); + if (!addr) + return NULL; + memcpy(addr, val, val_sz); + return addr; +} + +static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd) +{ + struct bpf_map *map; + void *addr = NULL; + + kvfree((void *) (long) *init_val); + *init_val = ~0ULL; + + /* At this point bpf_load_and_run() finished without error and + * 'fd' is a valid bpf map FD. All sanity checks below should succeed. + */ + map = bpf_map_get(fd); + if (IS_ERR(map)) + return NULL; + if (map->map_type != BPF_MAP_TYPE_ARRAY) + goto out; + addr = ((struct bpf_array *)map)->value; + /* the addr stays valid, since FD is not closed */ +out: + bpf_map_put(map); + return addr; +} + +#else + +static inline void *skel_alloc(size_t size) +{ + return calloc(1, size); +} + +static inline void skel_free(void *p) +{ + free(p); +} + +static inline void skel_free_map_data(void *p, __u64 addr, size_t sz) +{ + munmap(p, sz); +} + +static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz) +{ + void *addr; + + addr = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (addr == (void *) -1) + return NULL; + memcpy(addr, val, val_sz); + return addr; } +static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd) +{ + void *addr; + + addr = mmap((void *) (long) *init_val, mmap_sz, flags, MAP_SHARED | MAP_FIXED, fd, 0); + if (addr == (void *) -1) + return NULL; + return addr; +} +#endif + static inline int skel_closenz(int fd) { if (fd > 0) @@ -70,22 +211,94 @@ static inline int skel_closenz(int fd) return -EINVAL; } +#ifndef offsetofend +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) +#endif + +static inline int skel_map_create(enum bpf_map_type map_type, + const char *map_name, + __u32 key_size, + __u32 value_size, + __u32 max_entries) +{ + const size_t attr_sz = offsetofend(union bpf_attr, map_extra); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + + attr.map_type = map_type; + strncpy(attr.map_name, map_name, sizeof(attr.map_name)); + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + + return skel_sys_bpf(BPF_MAP_CREATE, &attr, attr_sz); +} + +static inline int skel_map_update_elem(int fd, const void *key, + const void *value, __u64 flags) +{ + const size_t attr_sz = offsetofend(union bpf_attr, flags); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.map_fd = fd; + attr.key = (long) key; + attr.value = (long) value; + attr.flags = flags; + + return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz); +} + +static inline int skel_raw_tracepoint_open(const char *name, int prog_fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.raw_tracepoint.name = (long) name; + attr.raw_tracepoint.prog_fd = prog_fd; + + return skel_sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz); +} + +static inline int skel_link_create(int prog_fd, int target_fd, + enum bpf_attach_type attach_type) +{ + const size_t attr_sz = offsetofend(union bpf_attr, link_create.iter_info_len); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.link_create.prog_fd = prog_fd; + attr.link_create.target_fd = target_fd; + attr.link_create.attach_type = attach_type; + + return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz); +} + +#ifdef __KERNEL__ +#define set_err +#else +#define set_err err = -errno +#endif + static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) { int map_fd = -1, prog_fd = -1, key = 0, err; union bpf_attr attr; - map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL); + err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1); if (map_fd < 0) { opts->errstr = "failed to create loader map"; - err = -errno; + set_err; goto out; } - err = bpf_map_update_elem(map_fd, &key, opts->data, 0); + err = skel_map_update_elem(map_fd, &key, opts->data, 0); if (err < 0) { opts->errstr = "failed to update loader map"; - err = -errno; + set_err; goto out; } @@ -100,10 +313,10 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) attr.log_size = opts->ctx->log_size; attr.log_buf = opts->ctx->log_buf; attr.prog_flags = BPF_F_SLEEPABLE; - prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); if (prog_fd < 0) { opts->errstr = "failed to load loader prog"; - err = -errno; + set_err; goto out; } @@ -115,10 +328,12 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) if (err < 0 || (int)attr.test.retval < 0) { opts->errstr = "failed to execute loader prog"; if (err < 0) { - err = -errno; + set_err; } else { err = (int)attr.test.retval; +#ifndef __KERNEL__ errno = -err; +#endif } goto out; } diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h new file mode 100644 index 000000000000..4181fddb3687 --- /dev/null +++ b/tools/lib/bpf/usdt.bpf.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#ifndef __USDT_BPF_H__ +#define __USDT_BPF_H__ + +#include <linux/errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* Below types and maps are internal implementation details of libbpf's USDT + * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should + * be considered an unstable API as well and might be adjusted based on user + * feedback from using libbpf's USDT support in production. + */ + +/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal + * map that keeps track of USDT argument specifications. This might be + * necessary if there are a lot of USDT attachments. + */ +#ifndef BPF_USDT_MAX_SPEC_CNT +#define BPF_USDT_MAX_SPEC_CNT 256 +#endif +/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal + * map that keeps track of IP (memory address) mapping to USDT argument + * specification. + * Note, if kernel supports BPF cookies, this map is not used and could be + * resized all the way to 1 to save a bit of memory. + */ +#ifndef BPF_USDT_MAX_IP_CNT +#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) +#endif +/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is + * the only dependency on CO-RE, so if it's undesirable, user can override + * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not. + */ +#ifndef BPF_USDT_HAS_BPF_COOKIE +#define BPF_USDT_HAS_BPF_COOKIE \ + bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt) +#endif + +enum __bpf_usdt_arg_type { + BPF_USDT_ARG_CONST, + BPF_USDT_ARG_REG, + BPF_USDT_ARG_REG_DEREF, +}; + +struct __bpf_usdt_arg_spec { + /* u64 scalar interpreted depending on arg_type, see below */ + __u64 val_off; + /* arg location case, see bpf_udst_arg() for details */ + enum __bpf_usdt_arg_type arg_type; + /* offset of referenced register within struct pt_regs */ + short reg_off; + /* whether arg should be interpreted as signed value */ + bool arg_signed; + /* number of bits that need to be cleared and, optionally, + * sign-extended to cast arguments that are 1, 2, or 4 bytes + * long into final 8-byte u64/s64 value returned to user + */ + char arg_bitshift; +}; + +/* should match USDT_MAX_ARG_CNT in usdt.c exactly */ +#define BPF_USDT_MAX_ARG_CNT 12 +struct __bpf_usdt_spec { + struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT]; + __u64 usdt_cookie; + short arg_cnt; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, BPF_USDT_MAX_SPEC_CNT); + __type(key, int); + __type(value, struct __bpf_usdt_spec); +} __bpf_usdt_specs SEC(".maps") __weak; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, BPF_USDT_MAX_IP_CNT); + __type(key, long); + __type(value, __u32); +} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; + +/* don't rely on user's BPF code to have latest definition of bpf_func_id */ +enum bpf_func_id___usdt { + BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */ +}; + +static __always_inline +int __bpf_usdt_spec_id(struct pt_regs *ctx) +{ + if (!BPF_USDT_HAS_BPF_COOKIE) { + long ip = PT_REGS_IP(ctx); + int *spec_id_ptr; + + spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip); + return spec_id_ptr ? *spec_id_ptr : -ESRCH; + } + + return bpf_get_attach_cookie(ctx); +} + +/* Return number of USDT arguments defined for currently traced USDT. */ +__weak __hidden +int bpf_usdt_arg_cnt(struct pt_regs *ctx) +{ + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + return spec->arg_cnt; +} + +/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. + * Returns 0 on success; negative error, otherwise. + * On error *res is guaranteed to be set to zero. + */ +__weak __hidden +int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) +{ + struct __bpf_usdt_spec *spec; + struct __bpf_usdt_arg_spec *arg_spec; + unsigned long val; + int err, spec_id; + + *res = 0; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + if (arg_num >= BPF_USDT_MAX_ARG_CNT || arg_num >= spec->arg_cnt) + return -ENOENT; + + arg_spec = &spec->args[arg_num]; + switch (arg_spec->arg_type) { + case BPF_USDT_ARG_CONST: + /* Arg is just a constant ("-4@$-9" in USDT arg spec). + * value is recorded in arg_spec->val_off directly. + */ + val = arg_spec->val_off; + break; + case BPF_USDT_ARG_REG: + /* Arg is in a register (e.g, "8@%rax" in USDT arg spec), + * so we read the contents of that register directly from + * struct pt_regs. To keep things simple user-space parts + * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + break; + case BPF_USDT_ARG_REG_DEREF: + /* Arg is in memory addressed by register, plus some offset + * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is + * identified like with BPF_USDT_ARG_REG case, and the offset + * is in arg_spec->val_off. We first fetch register contents + * from pt_regs, then do another user-space probe read to + * fetch argument value itself. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off); + if (err) + return err; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + val >>= arg_spec->arg_bitshift; +#endif + break; + default: + return -EINVAL; + } + + /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing + * necessary upper arg_bitshift bits, with sign extension if argument + * is signed + */ + val <<= arg_spec->arg_bitshift; + if (arg_spec->arg_signed) + val = ((long)val) >> arg_spec->arg_bitshift; + else + val = val >> arg_spec->arg_bitshift; + *res = val; + return 0; +} + +/* Retrieve user-specified cookie value provided during attach as + * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie + * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself + * utilizing BPF cookies internally, so user can't use BPF cookie directly + * for USDT programs and has to use bpf_usdt_cookie() API instead. + */ +__weak __hidden +long bpf_usdt_cookie(struct pt_regs *ctx) +{ + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return 0; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return 0; + + return spec->usdt_cookie; +} + +/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */ +#define ___bpf_usdt_args0() ctx +#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; }) +#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; }) +#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; }) +#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; }) +#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; }) +#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; }) +#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; }) +#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; }) +#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; }) +#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; }) +#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; }) +#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; }) +#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args) + +/* + * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for + * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes. + * Original struct pt_regs * context is preserved as 'ctx' argument. + */ +#define BPF_USDT(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_usdt_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + +#endif /* __USDT_BPF_H__ */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c new file mode 100644 index 000000000000..f1c9339cfbbc --- /dev/null +++ b/tools/lib/bpf/usdt.c @@ -0,0 +1,1518 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libelf.h> +#include <gelf.h> +#include <unistd.h> +#include <linux/ptrace.h> +#include <linux/kernel.h> + +/* s8 will be marked as poison while it's a reg of riscv */ +#if defined(__riscv) +#define rv_s8 s8 +#endif + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "hashmap.h" + +/* libbpf's USDT support consists of BPF-side state/code and user-space + * state/code working together in concert. BPF-side parts are defined in + * usdt.bpf.h header library. User-space state is encapsulated by struct + * usdt_manager and all the supporting code centered around usdt_manager. + * + * usdt.bpf.h defines two BPF maps that usdt_manager expects: USDT spec map + * and IP-to-spec-ID map, which is auxiliary map necessary for kernels that + * don't support BPF cookie (see below). These two maps are implicitly + * embedded into user's end BPF object file when user's code included + * usdt.bpf.h. This means that libbpf doesn't do anything special to create + * these USDT support maps. They are created by normal libbpf logic of + * instantiating BPF maps when opening and loading BPF object. + * + * As such, libbpf is basically unaware of the need to do anything + * USDT-related until the very first call to bpf_program__attach_usdt(), which + * can be called by user explicitly or happen automatically during skeleton + * attach (or, equivalently, through generic bpf_program__attach() call). At + * this point, libbpf will instantiate and initialize struct usdt_manager and + * store it in bpf_object. USDT manager is per-BPF object construct, as each + * independent BPF object might or might not have USDT programs, and thus all + * the expected USDT-related state. There is no coordination between two + * bpf_object in parts of USDT attachment, they are oblivious of each other's + * existence and libbpf is just oblivious, dealing with bpf_object-specific + * USDT state. + * + * Quick crash course on USDTs. + * + * From user-space application's point of view, USDT is essentially just + * a slightly special function call that normally has zero overhead, unless it + * is being traced by some external entity (e.g, BPF-based tool). Here's how + * a typical application can trigger USDT probe: + * + * #include <sys/sdt.h> // provided by systemtap-sdt-devel package + * // folly also provide similar functionality in folly/tracing/StaticTracepoint.h + * + * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y); + * + * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each + * individual USDT has a fixed number of arguments (3 in the above example) + * and specifies values of each argument as if it was a function call. + * + * USDT call is actually not a function call, but is instead replaced by + * a single NOP instruction (thus zero overhead, effectively). But in addition + * to that, those USDT macros generate special SHT_NOTE ELF records in + * .note.stapsdt ELF section. Here's an example USDT definition as emitted by + * `readelf -n <binary>`: + * + * stapsdt 0x00000089 NT_STAPSDT (SystemTap probe descriptors) + * Provider: test + * Name: usdt12 + * Location: 0x0000000000549df3, Base: 0x00000000008effa4, Semaphore: 0x0000000000a4606e + * Arguments: -4@-1204(%rbp) -4@%edi -8@-1216(%rbp) -8@%r8 -4@$5 -8@%r9 8@%rdx 8@%r10 -4@$-9 -2@%cx -2@%ax -1@%sil + * + * In this case we have USDT test:usdt12 with 12 arguments. + * + * Location and base are offsets used to calculate absolute IP address of that + * NOP instruction that kernel can replace with an interrupt instruction to + * trigger instrumentation code (BPF program for all that we care about). + * + * Semaphore above is and optional feature. It records an address of a 2-byte + * refcount variable (normally in '.probes' ELF section) used for signaling if + * there is anything that is attached to USDT. This is useful for user + * applications if, for example, they need to prepare some arguments that are + * passed only to USDTs and preparation is expensive. By checking if USDT is + * "activated", an application can avoid paying those costs unnecessarily. + * Recent enough kernel has built-in support for automatically managing this + * refcount, which libbpf expects and relies on. If USDT is defined without + * associated semaphore, this value will be zero. See selftests for semaphore + * examples. + * + * Arguments is the most interesting part. This USDT specification string is + * providing information about all the USDT arguments and their locations. The + * part before @ sign defined byte size of the argument (1, 2, 4, or 8) and + * whether the argument is signed or unsigned (negative size means signed). + * The part after @ sign is assembly-like definition of argument location + * (see [0] for more details). Technically, assembler can provide some pretty + * advanced definitions, but libbpf is currently supporting three most common + * cases: + * 1) immediate constant, see 5th and 9th args above (-4@$5 and -4@-9); + * 2) register value, e.g., 8@%rdx, which means "unsigned 8-byte integer + * whose value is in register %rdx"; + * 3) memory dereference addressed by register, e.g., -4@-1204(%rbp), which + * specifies signed 32-bit integer stored at offset -1204 bytes from + * memory address stored in %rbp. + * + * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + * + * During attachment, libbpf parses all the relevant USDT specifications and + * prepares `struct usdt_spec` (USDT spec), which is then provided to BPF-side + * code through spec map. This allows BPF applications to quickly fetch the + * actual value at runtime using a simple BPF-side code. + * + * With basics out of the way, let's go over less immediately obvious aspects + * of supporting USDTs. + * + * First, there is no special USDT BPF program type. It is actually just + * a uprobe BPF program (which for kernel, at least currently, is just a kprobe + * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference + * that uprobe is usually attached at the function entry, while USDT will + * normally will be somewhere inside the function. But it should always be + * pointing to NOP instruction, which makes such uprobes the fastest uprobe + * kind. + * + * Second, it's important to realize that such STAP_PROBEn(provider, name, ...) + * macro invocations can end up being inlined many-many times, depending on + * specifics of each individual user application. So single conceptual USDT + * (identified by provider:name pair of identifiers) is, generally speaking, + * multiple uprobe locations (USDT call sites) in different places in user + * application. Further, again due to inlining, each USDT call site might end + * up having the same argument #N be located in a different place. In one call + * site it could be a constant, in another will end up in a register, and in + * yet another could be some other register or even somewhere on the stack. + * + * As such, "attaching to USDT" means (in general case) attaching the same + * uprobe BPF program to multiple target locations in user application, each + * potentially having a completely different USDT spec associated with it. + * To wire all this up together libbpf allocates a unique integer spec ID for + * each unique USDT spec. Spec IDs are allocated as sequential small integers + * so that they can be used as keys in array BPF map (for performance reasons). + * Spec ID allocation and accounting is big part of what usdt_manager is + * about. This state has to be maintained per-BPF object and coordinate + * between different USDT attachments within the same BPF object. + * + * Spec ID is the key in spec BPF map, value is the actual USDT spec layed out + * as struct usdt_spec. Each invocation of BPF program at runtime needs to + * know its associated spec ID. It gets it either through BPF cookie, which + * libbpf sets to spec ID during attach time, or, if kernel is too old to + * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such + * case. The latter means that some modes of operation can't be supported + * without BPF cookie. Such mode is attaching to shared library "generically", + * without specifying target process. In such case, it's impossible to + * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode + * is not supported without BPF cookie support. + * + * Note that libbpf is using BPF cookie functionality for its own internal + * needs, so user itself can't rely on BPF cookie feature. To that end, libbpf + * provides conceptually equivalent USDT cookie support. It's still u64 + * user-provided value that can be associated with USDT attachment. Note that + * this will be the same value for all USDT call sites within the same single + * *logical* USDT attachment. This makes sense because to user attaching to + * USDT is a single BPF program triggered for singular USDT probe. The fact + * that this is done at multiple actual locations is a mostly hidden + * implementation details. This USDT cookie value can be fetched with + * bpf_usdt_cookie(ctx) API provided by usdt.bpf.h + * + * Lastly, while single USDT can have tons of USDT call sites, it doesn't + * necessarily have that many different USDT specs. It very well might be + * that 1000 USDT call sites only need 5 different USDT specs, because all the + * arguments are typically contained in a small set of registers or stack + * locations. As such, it's wasteful to allocate as many USDT spec IDs as + * there are USDT call sites. So libbpf tries to be frugal and performs + * on-the-fly deduplication during a single USDT attachment to only allocate + * the minimal required amount of unique USDT specs (and thus spec IDs). This + * is trivially achieved by using USDT spec string (Arguments string from USDT + * note) as a lookup key in a hashmap. USDT spec string uniquely defines + * everything about how to fetch USDT arguments, so two USDT call sites + * sharing USDT spec string can safely share the same USDT spec and spec ID. + * Note, this spec string deduplication is happening only during the same USDT + * attachment, so each USDT spec shares the same USDT cookie value. This is + * not generally true for other USDT attachments within the same BPF object, + * as even if USDT spec string is the same, USDT cookie value can be + * different. It was deemed excessive to try to deduplicate across independent + * USDT attachments by taking into account USDT spec string *and* USDT cookie + * value, which would complicated spec ID accounting significantly for little + * gain. + */ + +#define USDT_BASE_SEC ".stapsdt.base" +#define USDT_SEMA_SEC ".probes" +#define USDT_NOTE_SEC ".note.stapsdt" +#define USDT_NOTE_TYPE 3 +#define USDT_NOTE_NAME "stapsdt" + +/* should match exactly enum __bpf_usdt_arg_type from usdt.bpf.h */ +enum usdt_arg_type { + USDT_ARG_CONST, + USDT_ARG_REG, + USDT_ARG_REG_DEREF, +}; + +/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */ +struct usdt_arg_spec { + __u64 val_off; + enum usdt_arg_type arg_type; + short reg_off; + bool arg_signed; + char arg_bitshift; +}; + +/* should match BPF_USDT_MAX_ARG_CNT in usdt.bpf.h */ +#define USDT_MAX_ARG_CNT 12 + +/* should match struct __bpf_usdt_spec from usdt.bpf.h */ +struct usdt_spec { + struct usdt_arg_spec args[USDT_MAX_ARG_CNT]; + __u64 usdt_cookie; + short arg_cnt; +}; + +struct usdt_note { + const char *provider; + const char *name; + /* USDT args specification string, e.g.: + * "-4@%esi -4@-24(%rbp) -4@%ecx 2@%ax 8@%rdx" + */ + const char *args; + long loc_addr; + long base_addr; + long sema_addr; +}; + +struct usdt_target { + long abs_ip; + long rel_ip; + long sema_off; + struct usdt_spec spec; + const char *spec_str; +}; + +struct usdt_manager { + struct bpf_map *specs_map; + struct bpf_map *ip_to_spec_id_map; + + int *free_spec_ids; + size_t free_spec_cnt; + size_t next_free_spec_id; + + bool has_bpf_cookie; + bool has_sema_refcnt; +}; + +struct usdt_manager *usdt_manager_new(struct bpf_object *obj) +{ + static const char *ref_ctr_sysfs_path = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset"; + struct usdt_manager *man; + struct bpf_map *specs_map, *ip_to_spec_id_map; + + specs_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_specs"); + ip_to_spec_id_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_ip_to_spec_id"); + if (!specs_map || !ip_to_spec_id_map) { + pr_warn("usdt: failed to find USDT support BPF maps, did you forget to include bpf/usdt.bpf.h?\n"); + return ERR_PTR(-ESRCH); + } + + man = calloc(1, sizeof(*man)); + if (!man) + return ERR_PTR(-ENOMEM); + + man->specs_map = specs_map; + man->ip_to_spec_id_map = ip_to_spec_id_map; + + /* Detect if BPF cookie is supported for kprobes. + * We don't need IP-to-ID mapping if we can use BPF cookies. + * Added in: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value") + */ + man->has_bpf_cookie = kernel_supports(obj, FEAT_BPF_COOKIE); + + /* Detect kernel support for automatic refcounting of USDT semaphore. + * If this is not supported, USDTs with semaphores will not be supported. + * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") + */ + man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0; + + return man; +} + +void usdt_manager_free(struct usdt_manager *man) +{ + if (IS_ERR_OR_NULL(man)) + return; + + free(man->free_spec_ids); + free(man); +} + +static int sanity_check_usdt_elf(Elf *elf, const char *path) +{ + GElf_Ehdr ehdr; + int endianness; + + if (elf_kind(elf) != ELF_K_ELF) { + pr_warn("usdt: unrecognized ELF kind %d for '%s'\n", elf_kind(elf), path); + return -EBADF; + } + + switch (gelf_getclass(elf)) { + case ELFCLASS64: + if (sizeof(void *) != 8) { + pr_warn("usdt: attaching to 64-bit ELF binary '%s' is not supported\n", path); + return -EBADF; + } + break; + case ELFCLASS32: + if (sizeof(void *) != 4) { + pr_warn("usdt: attaching to 32-bit ELF binary '%s' is not supported\n", path); + return -EBADF; + } + break; + default: + pr_warn("usdt: unsupported ELF class for '%s'\n", path); + return -EBADF; + } + + if (!gelf_getehdr(elf, &ehdr)) + return -EINVAL; + + if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) { + pr_warn("usdt: unsupported type of ELF binary '%s' (%d), only ET_EXEC and ET_DYN are supported\n", + path, ehdr.e_type); + return -EBADF; + } + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + endianness = ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + endianness = ELFDATA2MSB; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + if (endianness != ehdr.e_ident[EI_DATA]) { + pr_warn("usdt: ELF endianness mismatch for '%s'\n", path); + return -EBADF; + } + + return 0; +} + +static int find_elf_sec_by_name(Elf *elf, const char *sec_name, GElf_Shdr *shdr, Elf_Scn **scn) +{ + Elf_Scn *sec = NULL; + size_t shstrndx; + + if (elf_getshdrstrndx(elf, &shstrndx)) + return -EINVAL; + + /* check if ELF is corrupted and avoid calling elf_strptr if yes */ + if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) + return -EINVAL; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *name; + + if (!gelf_getshdr(sec, shdr)) + return -EINVAL; + + name = elf_strptr(elf, shstrndx, shdr->sh_name); + if (name && strcmp(sec_name, name) == 0) { + *scn = sec; + return 0; + } + } + + return -ENOENT; +} + +struct elf_seg { + long start; + long end; + long offset; + bool is_exec; +}; + +static int cmp_elf_segs(const void *_a, const void *_b) +{ + const struct elf_seg *a = _a; + const struct elf_seg *b = _b; + + return a->start < b->start ? -1 : 1; +} + +static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, size_t *seg_cnt) +{ + GElf_Phdr phdr; + size_t n; + int i, err; + struct elf_seg *seg; + void *tmp; + + *seg_cnt = 0; + + if (elf_getphdrnum(elf, &n)) { + err = -errno; + return err; + } + + for (i = 0; i < n; i++) { + if (!gelf_getphdr(elf, i, &phdr)) { + err = -errno; + return err; + } + + pr_debug("usdt: discovered PHDR #%d in '%s': vaddr 0x%lx memsz 0x%lx offset 0x%lx type 0x%lx flags 0x%lx\n", + i, path, (long)phdr.p_vaddr, (long)phdr.p_memsz, (long)phdr.p_offset, + (long)phdr.p_type, (long)phdr.p_flags); + if (phdr.p_type != PT_LOAD) + continue; + + tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); + if (!tmp) + return -ENOMEM; + + *segs = tmp; + seg = *segs + *seg_cnt; + (*seg_cnt)++; + + seg->start = phdr.p_vaddr; + seg->end = phdr.p_vaddr + phdr.p_memsz; + seg->offset = phdr.p_offset; + seg->is_exec = phdr.p_flags & PF_X; + } + + if (*seg_cnt == 0) { + pr_warn("usdt: failed to find PT_LOAD program headers in '%s'\n", path); + return -ESRCH; + } + + qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); + return 0; +} + +static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) +{ + char path[PATH_MAX], line[PATH_MAX], mode[16]; + size_t seg_start, seg_end, seg_off; + struct elf_seg *seg; + int tmp_pid, i, err; + FILE *f; + + *seg_cnt = 0; + + /* Handle containerized binaries only accessible from + * /proc/<pid>/root/<path>. They will be reported as just /<path> in + * /proc/<pid>/maps. + */ + if (sscanf(lib_path, "/proc/%d/root%s", &tmp_pid, path) == 2 && pid == tmp_pid) + goto proceed; + + if (!realpath(lib_path, path)) { + pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n", + lib_path, -errno); + libbpf_strlcpy(path, lib_path, sizeof(path)); + } + +proceed: + sprintf(line, "/proc/%d/maps", pid); + f = fopen(line, "r"); + if (!f) { + err = -errno; + pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", + line, lib_path, err); + return err; + } + + /* We need to handle lines with no path at the end: + * + * 7f5c6f5d1000-7f5c6f5d3000 rw-p 001c7000 08:04 21238613 /usr/lib64/libc-2.17.so + * 7f5c6f5d3000-7f5c6f5d8000 rw-p 00000000 00:00 0 + * 7f5c6f5d8000-7f5c6f5d9000 r-xp 00000000 103:01 362990598 /data/users/andriin/linux/tools/bpf/usdt/libhello_usdt.so + */ + while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n", + &seg_start, &seg_end, mode, &seg_off, line) == 5) { + void *tmp; + + /* to handle no path case (see above) we need to capture line + * without skipping any whitespaces. So we need to strip + * leading whitespaces manually here + */ + i = 0; + while (isblank(line[i])) + i++; + if (strcmp(line + i, path) != 0) + continue; + + pr_debug("usdt: discovered segment for lib '%s': addrs %zx-%zx mode %s offset %zx\n", + path, seg_start, seg_end, mode, seg_off); + + /* ignore non-executable sections for shared libs */ + if (mode[2] != 'x') + continue; + + tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); + if (!tmp) { + err = -ENOMEM; + goto err_out; + } + + *segs = tmp; + seg = *segs + *seg_cnt; + *seg_cnt += 1; + + seg->start = seg_start; + seg->end = seg_end; + seg->offset = seg_off; + seg->is_exec = true; + } + + if (*seg_cnt == 0) { + pr_warn("usdt: failed to find '%s' (resolved to '%s') within PID %d memory mappings\n", + lib_path, path, pid); + err = -ESRCH; + goto err_out; + } + + qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); + err = 0; +err_out: + fclose(f); + return err; +} + +static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative) +{ + struct elf_seg *seg; + int i; + + if (relative) { + /* for shared libraries, address is relative offset and thus + * should be fall within logical offset-based range of + * [offset_start, offset_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start)) + return seg; + } + } else { + /* for binaries, address is absolute and thus should be within + * absolute address range of [seg_start, seg_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->start <= addr && addr < seg->end) + return seg; + } + } + + return NULL; +} + +static int parse_usdt_note(Elf *elf, const char *path, long base_addr, + GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, + struct usdt_note *usdt_note); + +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); + +static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, + const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, + struct usdt_target **out_targets, size_t *out_target_cnt) +{ + size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; + struct elf_seg *segs = NULL, *lib_segs = NULL; + struct usdt_target *targets = NULL, *target; + long base_addr = 0; + Elf_Scn *notes_scn, *base_scn; + GElf_Shdr base_shdr, notes_shdr; + GElf_Ehdr ehdr; + GElf_Nhdr nhdr; + Elf_Data *data; + int err; + + *out_targets = NULL; + *out_target_cnt = 0; + + err = find_elf_sec_by_name(elf, USDT_NOTE_SEC, ¬es_shdr, ¬es_scn); + if (err) { + pr_warn("usdt: no USDT notes section (%s) found in '%s'\n", USDT_NOTE_SEC, path); + return err; + } + + if (notes_shdr.sh_type != SHT_NOTE || !gelf_getehdr(elf, &ehdr)) { + pr_warn("usdt: invalid USDT notes section (%s) in '%s'\n", USDT_NOTE_SEC, path); + return -EINVAL; + } + + err = parse_elf_segs(elf, path, &segs, &seg_cnt); + if (err) { + pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err); + goto err_out; + } + + /* .stapsdt.base ELF section is optional, but is used for prelink + * offset compensation (see a big comment further below) + */ + if (find_elf_sec_by_name(elf, USDT_BASE_SEC, &base_shdr, &base_scn) == 0) + base_addr = base_shdr.sh_addr; + + data = elf_getdata(notes_scn, 0); + off = 0; + while ((off = gelf_getnote(data, off, &nhdr, &name_off, &desc_off)) > 0) { + long usdt_abs_ip, usdt_rel_ip, usdt_sema_off = 0; + struct usdt_note note; + struct elf_seg *seg = NULL; + void *tmp; + + err = parse_usdt_note(elf, path, base_addr, &nhdr, + data->d_buf, name_off, desc_off, ¬e); + if (err) + goto err_out; + + if (strcmp(note.provider, usdt_provider) != 0 || strcmp(note.name, usdt_name) != 0) + continue; + + /* We need to compensate "prelink effect". See [0] for details, + * relevant parts quoted here: + * + * Each SDT probe also expands into a non-allocated ELF note. You can + * find this by looking at SHT_NOTE sections and decoding the format; + * see below for details. Because the note is non-allocated, it means + * there is no runtime cost, and also preserved in both stripped files + * and .debug files. + * + * However, this means that prelink won't adjust the note's contents + * for address offsets. Instead, this is done via the .stapsdt.base + * section. This is a special section that is added to the text. We + * will only ever have one of these sections in a final link and it + * will only ever be one byte long. Nothing about this section itself + * matters, we just use it as a marker to detect prelink address + * adjustments. + * + * Each probe note records the link-time address of the .stapsdt.base + * section alongside the probe PC address. The decoder compares the + * base address stored in the note with the .stapsdt.base section's + * sh_addr. Initially these are the same, but the section header will + * be adjusted by prelink. So the decoder applies the difference to + * the probe PC address to get the correct prelinked PC address; the + * same adjustment is applied to the semaphore address, if any. + * + * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + */ + usdt_rel_ip = usdt_abs_ip = note.loc_addr; + if (base_addr) { + usdt_abs_ip += base_addr - note.base_addr; + usdt_rel_ip += base_addr - note.base_addr; + } + + if (ehdr.e_type == ET_EXEC) { + /* When attaching uprobes (which what USDTs basically + * are) kernel expects a relative IP to be specified, + * so if we are attaching to an executable ELF binary + * (i.e., not a shared library), we need to calculate + * proper relative IP based on ELF's load address + */ + seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_abs_ip); + goto err_out; + } + if (!seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + usdt_abs_ip); + goto err_out; + } + + usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset); + } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */ + /* If we don't have BPF cookie support but need to + * attach to a shared library, we'll need to know and + * record absolute addresses of attach points due to + * the need to lookup USDT spec by absolute IP of + * triggered uprobe. Doing this resolution is only + * possible when we have a specific PID of the process + * that's using specified shared library. BPF cookie + * removes the absolute address limitation as we don't + * need to do this lookup (we just use BPF cookie as + * an index of USDT spec), so for newer kernels with + * BPF cookie support libbpf supports USDT attachment + * to shared libraries with no PID filter. + */ + if (pid < 0) { + pr_warn("usdt: attaching to shared libraries without specific PID is not supported on current kernel\n"); + err = -ENOTSUP; + goto err_out; + } + + /* lib_segs are lazily initialized only if necessary */ + if (lib_seg_cnt == 0) { + err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt); + if (err) { + pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", + pid, path, err); + goto err_out; + } + } + + seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_rel_ip); + goto err_out; + } + + usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset); + } + + pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n", + usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", path, + note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args, + seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0); + + /* Adjust semaphore address to be a relative offset */ + if (note.sema_addr) { + if (!man->has_sema_refcnt) { + pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n", + usdt_provider, usdt_name, path); + err = -ENOTSUP; + goto err_out; + } + + seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n", + usdt_provider, usdt_name, path, note.sema_addr); + goto err_out; + } + if (seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx] for semaphore of '%s:%s' at 0x%lx is executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + note.sema_addr); + goto err_out; + } + + usdt_sema_off = note.sema_addr - (seg->start - seg->offset); + + pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n", + usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", + path, note.sema_addr, note.base_addr, usdt_sema_off, + seg->start, seg->end, seg->offset); + } + + /* Record adjusted addresses and offsets and parse USDT spec */ + tmp = libbpf_reallocarray(targets, target_cnt + 1, sizeof(*targets)); + if (!tmp) { + err = -ENOMEM; + goto err_out; + } + targets = tmp; + + target = &targets[target_cnt]; + memset(target, 0, sizeof(*target)); + + target->abs_ip = usdt_abs_ip; + target->rel_ip = usdt_rel_ip; + target->sema_off = usdt_sema_off; + + /* notes->args references strings from Elf itself, so they can + * be referenced safely until elf_end() call + */ + target->spec_str = note.args; + + err = parse_usdt_spec(&target->spec, ¬e, usdt_cookie); + if (err) + goto err_out; + + target_cnt++; + } + + *out_targets = targets; + *out_target_cnt = target_cnt; + err = target_cnt; + +err_out: + free(segs); + free(lib_segs); + if (err < 0) + free(targets); + return err; +} + +struct bpf_link_usdt { + struct bpf_link link; + + struct usdt_manager *usdt_man; + + size_t spec_cnt; + int *spec_ids; + + size_t uprobe_cnt; + struct { + long abs_ip; + struct bpf_link *link; + } *uprobes; +}; + +static int bpf_link_usdt_detach(struct bpf_link *link) +{ + struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + struct usdt_manager *man = usdt_link->usdt_man; + int i; + + for (i = 0; i < usdt_link->uprobe_cnt; i++) { + /* detach underlying uprobe link */ + bpf_link__destroy(usdt_link->uprobes[i].link); + /* there is no need to update specs map because it will be + * unconditionally overwritten on subsequent USDT attaches, + * but if BPF cookies are not used we need to remove entry + * from ip_to_spec_id map, otherwise we'll run into false + * conflicting IP errors + */ + if (!man->has_bpf_cookie) { + /* not much we can do about errors here */ + (void)bpf_map_delete_elem(bpf_map__fd(man->ip_to_spec_id_map), + &usdt_link->uprobes[i].abs_ip); + } + } + + /* try to return the list of previously used spec IDs to usdt_manager + * for future reuse for subsequent USDT attaches + */ + if (!man->free_spec_ids) { + /* if there were no free spec IDs yet, just transfer our IDs */ + man->free_spec_ids = usdt_link->spec_ids; + man->free_spec_cnt = usdt_link->spec_cnt; + usdt_link->spec_ids = NULL; + } else { + /* otherwise concat IDs */ + size_t new_cnt = man->free_spec_cnt + usdt_link->spec_cnt; + int *new_free_ids; + + new_free_ids = libbpf_reallocarray(man->free_spec_ids, new_cnt, + sizeof(*new_free_ids)); + /* If we couldn't resize free_spec_ids, we'll just leak + * a bunch of free IDs; this is very unlikely to happen and if + * system is so exhausted on memory, it's the least of user's + * concerns, probably. + * So just do our best here to return those IDs to usdt_manager. + */ + if (new_free_ids) { + memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids, + usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids)); + man->free_spec_ids = new_free_ids; + man->free_spec_cnt = new_cnt; + } + } + + return 0; +} + +static void bpf_link_usdt_dealloc(struct bpf_link *link) +{ + struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + + free(usdt_link->spec_ids); + free(usdt_link->uprobes); + free(usdt_link); +} + +static size_t specs_hash_fn(const void *key, void *ctx) +{ + const char *s = key; + + return str_hash(s); +} + +static bool specs_equal_fn(const void *key1, const void *key2, void *ctx) +{ + const char *s1 = key1; + const char *s2 = key2; + + return strcmp(s1, s2) == 0; +} + +static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash, + struct bpf_link_usdt *link, struct usdt_target *target, + int *spec_id, bool *is_new) +{ + void *tmp; + int err; + + /* check if we already allocated spec ID for this spec string */ + if (hashmap__find(specs_hash, target->spec_str, &tmp)) { + *spec_id = (long)tmp; + *is_new = false; + return 0; + } + + /* otherwise it's a new ID that needs to be set up in specs map and + * returned back to usdt_manager when USDT link is detached + */ + tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids)); + if (!tmp) + return -ENOMEM; + link->spec_ids = tmp; + + /* get next free spec ID, giving preference to free list, if not empty */ + if (man->free_spec_cnt) { + *spec_id = man->free_spec_ids[man->free_spec_cnt - 1]; + + /* cache spec ID for current spec string for future lookups */ + err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); + if (err) + return err; + + man->free_spec_cnt--; + } else { + /* don't allocate spec ID bigger than what fits in specs map */ + if (man->next_free_spec_id >= bpf_map__max_entries(man->specs_map)) + return -E2BIG; + + *spec_id = man->next_free_spec_id; + + /* cache spec ID for current spec string for future lookups */ + err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); + if (err) + return err; + + man->next_free_spec_id++; + } + + /* remember new spec ID in the link for later return back to free list on detach */ + link->spec_ids[link->spec_cnt] = *spec_id; + link->spec_cnt++; + *is_new = true; + return 0; +} + +struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, + pid_t pid, const char *path, + const char *usdt_provider, const char *usdt_name, + __u64 usdt_cookie) +{ + int i, fd, err, spec_map_fd, ip_map_fd; + LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct hashmap *specs_hash = NULL; + struct bpf_link_usdt *link = NULL; + struct usdt_target *targets = NULL; + size_t target_cnt; + Elf *elf; + + spec_map_fd = bpf_map__fd(man->specs_map); + ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); + + /* TODO: perform path resolution similar to uprobe's */ + fd = open(path, O_RDONLY); + if (fd < 0) { + err = -errno; + pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); + return libbpf_err_ptr(err); + } + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + err = -EBADF; + pr_warn("usdt: failed to parse ELF binary '%s': %s\n", path, elf_errmsg(-1)); + goto err_out; + } + + err = sanity_check_usdt_elf(elf, path); + if (err) + goto err_out; + + /* normalize PID filter */ + if (pid < 0) + pid = -1; + else if (pid == 0) + pid = getpid(); + + /* discover USDT in given binary, optionally limiting + * activations to a given PID, if pid > 0 + */ + err = collect_usdt_targets(man, elf, path, pid, usdt_provider, usdt_name, + usdt_cookie, &targets, &target_cnt); + if (err <= 0) { + err = (err == 0) ? -ENOENT : err; + goto err_out; + } + + specs_hash = hashmap__new(specs_hash_fn, specs_equal_fn, NULL); + if (IS_ERR(specs_hash)) { + err = PTR_ERR(specs_hash); + goto err_out; + } + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto err_out; + } + + link->usdt_man = man; + link->link.detach = &bpf_link_usdt_detach; + link->link.dealloc = &bpf_link_usdt_dealloc; + + link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); + if (!link->uprobes) { + err = -ENOMEM; + goto err_out; + } + + for (i = 0; i < target_cnt; i++) { + struct usdt_target *target = &targets[i]; + struct bpf_link *uprobe_link; + bool is_new; + int spec_id; + + /* Spec ID can be either reused or newly allocated. If it is + * newly allocated, we'll need to fill out spec map, otherwise + * entire spec should be valid and can be just used by a new + * uprobe. We reuse spec when USDT arg spec is identical. We + * also never share specs between two different USDT + * attachments ("links"), so all the reused specs already + * share USDT cookie value implicitly. + */ + err = allocate_spec_id(man, specs_hash, link, target, &spec_id, &is_new); + if (err) + goto err_out; + + if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) { + err = -errno; + pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n", + spec_id, usdt_provider, usdt_name, path, err); + goto err_out; + } + if (!man->has_bpf_cookie && + bpf_map_update_elem(ip_map_fd, &target->abs_ip, &spec_id, BPF_NOEXIST)) { + err = -errno; + if (err == -EEXIST) { + pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n", + spec_id, usdt_provider, usdt_name, path); + } else { + pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n", + target->abs_ip, spec_id, usdt_provider, usdt_name, + path, err); + } + goto err_out; + } + + opts.ref_ctr_offset = target->sema_off; + opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; + uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, + target->rel_ip, &opts); + err = libbpf_get_error(uprobe_link); + if (err) { + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", + i, usdt_provider, usdt_name, path, err); + goto err_out; + } + + link->uprobes[i].link = uprobe_link; + link->uprobes[i].abs_ip = target->abs_ip; + link->uprobe_cnt++; + } + + free(targets); + hashmap__free(specs_hash); + elf_end(elf); + close(fd); + + return &link->link; + +err_out: + if (link) + bpf_link__destroy(&link->link); + free(targets); + hashmap__free(specs_hash); + if (elf) + elf_end(elf); + close(fd); + return libbpf_err_ptr(err); +} + +/* Parse out USDT ELF note from '.note.stapsdt' section. + * Logic inspired by perf's code. + */ +static int parse_usdt_note(Elf *elf, const char *path, long base_addr, + GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, + struct usdt_note *note) +{ + const char *provider, *name, *args; + long addrs[3]; + size_t len; + + /* sanity check USDT note name and type first */ + if (strncmp(data + name_off, USDT_NOTE_NAME, nhdr->n_namesz) != 0) + return -EINVAL; + if (nhdr->n_type != USDT_NOTE_TYPE) + return -EINVAL; + + /* sanity check USDT note contents ("description" in ELF terminology) */ + len = nhdr->n_descsz; + data = data + desc_off; + + /* +3 is the very minimum required to store three empty strings */ + if (len < sizeof(addrs) + 3) + return -EINVAL; + + /* get location, base, and semaphore addrs */ + memcpy(&addrs, data, sizeof(addrs)); + + /* parse string fields: provider, name, args */ + provider = data + sizeof(addrs); + + name = (const char *)memchr(provider, '\0', data + len - provider); + if (!name) /* non-zero-terminated provider */ + return -EINVAL; + name++; + if (name >= data + len || *name == '\0') /* missing or empty name */ + return -EINVAL; + + args = memchr(name, '\0', data + len - name); + if (!args) /* non-zero-terminated name */ + return -EINVAL; + ++args; + if (args >= data + len) /* missing arguments spec */ + return -EINVAL; + + note->provider = provider; + note->name = name; + if (*args == '\0' || *args == ':') + note->args = ""; + else + note->args = args; + note->loc_addr = addrs[0]; + note->base_addr = addrs[1]; + note->sema_addr = addrs[2]; + + return 0; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg); + +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie) +{ + const char *s; + int len; + + spec->usdt_cookie = usdt_cookie; + spec->arg_cnt = 0; + + s = note->args; + while (s[0]) { + if (spec->arg_cnt >= USDT_MAX_ARG_CNT) { + pr_warn("usdt: too many USDT arguments (> %d) for '%s:%s' with args spec '%s'\n", + USDT_MAX_ARG_CNT, note->provider, note->name, note->args); + return -E2BIG; + } + + len = parse_usdt_arg(s, spec->arg_cnt, &spec->args[spec->arg_cnt]); + if (len < 0) + return len; + + s += len; + spec->arg_cnt++; + } + + return 0; +} + +/* Architecture-specific logic for parsing USDT argument location specs */ + +#if defined(__x86_64__) || defined(__i386__) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *names[4]; + size_t pt_regs_off; + } reg_map[] = { +#ifdef __x86_64__ +#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg64) +#else +#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg32) +#endif + { {"rip", "eip", "", ""}, reg_off(rip, eip) }, + { {"rax", "eax", "ax", "al"}, reg_off(rax, eax) }, + { {"rbx", "ebx", "bx", "bl"}, reg_off(rbx, ebx) }, + { {"rcx", "ecx", "cx", "cl"}, reg_off(rcx, ecx) }, + { {"rdx", "edx", "dx", "dl"}, reg_off(rdx, edx) }, + { {"rsi", "esi", "si", "sil"}, reg_off(rsi, esi) }, + { {"rdi", "edi", "di", "dil"}, reg_off(rdi, edi) }, + { {"rbp", "ebp", "bp", "bpl"}, reg_off(rbp, ebp) }, + { {"rsp", "esp", "sp", "spl"}, reg_off(rsp, esp) }, +#undef reg_off +#ifdef __x86_64__ + { {"r8", "r8d", "r8w", "r8b"}, offsetof(struct pt_regs, r8) }, + { {"r9", "r9d", "r9w", "r9b"}, offsetof(struct pt_regs, r9) }, + { {"r10", "r10d", "r10w", "r10b"}, offsetof(struct pt_regs, r10) }, + { {"r11", "r11d", "r11w", "r11b"}, offsetof(struct pt_regs, r11) }, + { {"r12", "r12d", "r12w", "r12b"}, offsetof(struct pt_regs, r12) }, + { {"r13", "r13d", "r13w", "r13b"}, offsetof(struct pt_regs, r13) }, + { {"r14", "r14d", "r14w", "r14b"}, offsetof(struct pt_regs, r14) }, + { {"r15", "r15d", "r15w", "r15b"}, offsetof(struct pt_regs, r15) }, +#endif + }; + int i, j; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + for (j = 0; j < ARRAY_SIZE(reg_map[i].names); j++) { + if (strcmp(reg_name, reg_map[i].names[j]) == 0) + return reg_map[i].pt_regs_off; + } + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + /* Memory dereference case, e.g., -4@-20(%rbp) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -4@%eax */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ $%ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@$71 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#elif defined(__s390x__) + +/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */ + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + unsigned int reg; + int arg_sz, len; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", &arg_sz, &off, ®, &len) == 3) { + /* Memory dereference case, e.g., -2@-28(%r15) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + if (reg > 15) { + pr_warn("usdt: unrecognized register '%%r%u'\n", reg); + return -EINVAL; + } + arg->reg_off = offsetof(user_pt_regs, gprs[reg]); + } else if (sscanf(arg_str, " %d @ %%r%u %n", &arg_sz, ®, &len) == 2) { + /* Register read case, e.g., -8@%r0 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + if (reg > 15) { + pr_warn("usdt: unrecognized register '%%r%u'\n", reg); + return -EINVAL; + } + arg->reg_off = offsetof(user_pt_regs, gprs[reg]); + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@71 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#elif defined(__aarch64__) + +static int calc_pt_regs_off(const char *reg_name) +{ + int reg_num; + + if (sscanf(reg_name, "x%d", ®_num) == 1) { + if (reg_num >= 0 && reg_num < 31) + return offsetof(struct user_pt_regs, regs[reg_num]); + } else if (strcmp(reg_name, "sp") == 0) { + return offsetof(struct user_pt_regs, sp); + } + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ \[ %m[a-z0-9], %ld ] %n", &arg_sz, ®_name, &off, &len) == 3) { + /* Memory dereference case, e.g., -4@[sp, 96] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ \[ %m[a-z0-9] ] %n", &arg_sz, ®_name, &len) == 2) { + /* Memory dereference case, e.g., -4@[sp] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -8@x4 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#elif defined(__riscv) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *name; + size_t pt_regs_off; + } reg_map[] = { + { "ra", offsetof(struct user_regs_struct, ra) }, + { "sp", offsetof(struct user_regs_struct, sp) }, + { "gp", offsetof(struct user_regs_struct, gp) }, + { "tp", offsetof(struct user_regs_struct, tp) }, + { "a0", offsetof(struct user_regs_struct, a0) }, + { "a1", offsetof(struct user_regs_struct, a1) }, + { "a2", offsetof(struct user_regs_struct, a2) }, + { "a3", offsetof(struct user_regs_struct, a3) }, + { "a4", offsetof(struct user_regs_struct, a4) }, + { "a5", offsetof(struct user_regs_struct, a5) }, + { "a6", offsetof(struct user_regs_struct, a6) }, + { "a7", offsetof(struct user_regs_struct, a7) }, + { "s0", offsetof(struct user_regs_struct, s0) }, + { "s1", offsetof(struct user_regs_struct, s1) }, + { "s2", offsetof(struct user_regs_struct, s2) }, + { "s3", offsetof(struct user_regs_struct, s3) }, + { "s4", offsetof(struct user_regs_struct, s4) }, + { "s5", offsetof(struct user_regs_struct, s5) }, + { "s6", offsetof(struct user_regs_struct, s6) }, + { "s7", offsetof(struct user_regs_struct, s7) }, + { "s8", offsetof(struct user_regs_struct, rv_s8) }, + { "s9", offsetof(struct user_regs_struct, s9) }, + { "s10", offsetof(struct user_regs_struct, s10) }, + { "s11", offsetof(struct user_regs_struct, s11) }, + { "t0", offsetof(struct user_regs_struct, t0) }, + { "t1", offsetof(struct user_regs_struct, t1) }, + { "t2", offsetof(struct user_regs_struct, t2) }, + { "t3", offsetof(struct user_regs_struct, t3) }, + { "t4", offsetof(struct user_regs_struct, t4) }, + { "t5", offsetof(struct user_regs_struct, t5) }, + { "t6", offsetof(struct user_regs_struct, t6) }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + if (strcmp(reg_name, reg_map[i].name) == 0) + return reg_map[i].pt_regs_off; + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + /* Memory dereference case, e.g., -8@-88(s0) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -8@a1 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#else + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n"); + return -ENOTSUP; +} + +#endif diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index edafe56664f3..af136f73b09d 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -481,8 +481,8 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) BPF_EMIT_CALL(BPF_FUNC_redirect_map), BPF_EXIT_INSN(), }; - size_t insns_cnt[] = {sizeof(prog) / sizeof(struct bpf_insn), - sizeof(prog_redirect_flags) / sizeof(struct bpf_insn), + size_t insns_cnt[] = {ARRAY_SIZE(prog), + ARRAY_SIZE(prog_redirect_flags), }; struct bpf_insn *progs[] = {prog, prog_redirect_flags}; enum xsk_prog option = get_xsk_prog(); @@ -1193,12 +1193,23 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, int xsk_umem__delete(struct xsk_umem *umem) { + struct xdp_mmap_offsets off; + int err; + if (!umem) return 0; if (umem->refcount) return -EBUSY; + err = xsk_get_mmap_offsets(umem->fd, &off); + if (!err && umem->fill_save && umem->comp_save) { + munmap(umem->fill_save->ring - off.fr.desc, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp_save->ring - off.cr.desc, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); + } + close(umem->fd); free(umem); diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 32c5051c24eb..a8f1a237931b 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -62,11 +62,12 @@ SYNOPSIS struct perf_thread_map; struct perf_thread_map *perf_thread_map__new_dummy(void); + struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array); - void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid); - char *perf_thread_map__comm(struct perf_thread_map *map, int thread); + void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid); + char *perf_thread_map__comm(struct perf_thread_map *map, int idx); int perf_thread_map__nr(struct perf_thread_map *threads); - pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread); + pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx); struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map); void perf_thread_map__put(struct perf_thread_map *map); diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 08fe6e3c4089..21df023a2103 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -153,7 +153,7 @@ $(TESTS_STATIC): $(TESTS_IN) $(LIBPERF_A) $(LIBAPI) $(QUIET_LINK)$(CC) -o $@ $^ $(TESTS_SHARED): $(TESTS_IN) $(LIBAPI) - $(QUIET_LINK)$(CC) -o $@ -L$(if $(OUTPUT),$(OUTPUT),.) $^ -lperf + $(QUIET_LINK)$(CC) -o $@ -L$(or $(OUTPUT),.) $^ -lperf make-tests: libs $(TESTS_SHARED) $(TESTS_STATIC) diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index ee66760f1e63..384d5e076ee4 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -319,6 +319,26 @@ struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map) return map->nr > 0 ? map->map[map->nr - 1] : result; } +/** Is 'b' a subset of 'a'. */ +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b) +{ + if (a == b || !b) + return true; + if (!a || b->nr > a->nr) + return false; + + for (int i = 0, j = 0; i < a->nr; i++) { + if (a->map[i].cpu > b->map[j].cpu) + return false; + if (a->map[i].cpu == b->map[j].cpu) { + j++; + if (j == b->nr) + return true; + } + } + return false; +} + /* * Merge two cpumaps * @@ -335,17 +355,12 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, int i, j, k; struct perf_cpu_map *merged; - if (!orig && !other) - return NULL; - if (!orig) { - perf_cpu_map__get(other); - return other; - } - if (!other) - return orig; - if (orig->nr == other->nr && - !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu))) + if (perf_cpu_map__is_subset(orig, other)) return orig; + if (perf_cpu_map__is_subset(other, orig)) { + perf_cpu_map__put(orig); + return perf_cpu_map__get(other); + } tmp_len = orig->nr + other->nr; tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 9a770bfdc804..e6c98a6e3908 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -23,6 +23,7 @@ #include <perf/cpumap.h> #include <perf/threadmap.h> #include <api/fd/array.h> +#include "internal.h" void perf_evlist__init(struct perf_evlist *evlist) { @@ -39,19 +40,23 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (!evsel->own_cpus || evlist->has_user_cpus) { + if (!evsel->own_cpus || + (!evsel->system_wide && evlist->has_user_cpus) || + (!evsel->system_wide && + !evsel->requires_cpu && + perf_cpu_map__empty(evlist->user_requested_cpus))) { perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->cpus); - } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) { - perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->cpus); + evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); } else if (evsel->cpus != evsel->own_cpus) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evsel->own_cpus); } - perf_thread_map__put(evsel->threads); - evsel->threads = perf_thread_map__get(evlist->threads); + if (!evsel->system_wide) { + perf_thread_map__put(evsel->threads); + evsel->threads = perf_thread_map__get(evlist->threads); + } + evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); } @@ -59,6 +64,10 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { struct perf_evsel *evsel; + /* Recomputing all_cpus, so start with a blank slate. */ + perf_cpu_map__put(evlist->all_cpus); + evlist->all_cpus = NULL; + perf_evlist__for_each_evsel(evlist, evsel) __perf_evlist__propagate_maps(evlist, evsel); } @@ -123,10 +132,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { - perf_cpu_map__put(evlist->cpus); + perf_cpu_map__put(evlist->user_requested_cpus); perf_cpu_map__put(evlist->all_cpus); perf_thread_map__put(evlist->threads); - evlist->cpus = NULL; + evlist->user_requested_cpus = NULL; evlist->all_cpus = NULL; evlist->threads = NULL; fdarray__exit(&evlist->pollfd); @@ -155,9 +164,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, * original reference count of 1. If that is not the case it is up to * the caller to increase the reference count. */ - if (cpus != evlist->cpus) { - perf_cpu_map__put(evlist->cpus); - evlist->cpus = perf_cpu_map__get(cpus); + if (cpus != evlist->user_requested_cpus) { + perf_cpu_map__put(evlist->user_requested_cpus); + evlist->user_requested_cpus = perf_cpu_map__get(cpus); } if (threads != evlist->threads) { @@ -294,7 +303,7 @@ add: int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nr_cpus = perf_cpu_map__nr(evlist->cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); int nr_threads = perf_thread_map__nr(evlist->threads); int nfds = 0; struct perf_evsel *evsel; @@ -424,9 +433,9 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_ static int mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, - int thread, int *_output, int *_output_overwrite) + int thread, int *_output, int *_output_overwrite, int *nr_mmaps) { - struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx); struct perf_evsel *evsel; int revent; @@ -474,9 +483,14 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, */ refcount_set(&map->refcnt, 2); + if (ops->idx) + ops->idx(evlist, evsel, mp, idx); + if (ops->mmap(map, mp, *output, evlist_cpu) < 0) return -1; + *nr_mmaps += 1; + if (!idx) perf_evlist__set_mmap_first(evlist, map, overwrite); } else { @@ -506,53 +520,28 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, } static int -mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, - struct perf_mmap_param *mp) -{ - int thread; - int nr_threads = perf_thread_map__nr(evlist->threads); - - for (thread = 0; thread < nr_threads; thread++) { - int output = -1; - int output_overwrite = -1; - - if (ops->idx) - ops->idx(evlist, mp, thread, false); - - if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, - &output, &output_overwrite)) - goto out_unmap; - } - - return 0; - -out_unmap: - perf_evlist__munmap(evlist); - return -1; -} - -static int mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) { int nr_threads = perf_thread_map__nr(evlist->threads); - int nr_cpus = perf_cpu_map__nr(evlist->cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); + int nr_mmaps = 0; int cpu, thread; for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; int output_overwrite = -1; - if (ops->idx) - ops->idx(evlist, mp, cpu, true); - for (thread = 0; thread < nr_threads; thread++) { if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, - thread, &output, &output_overwrite)) + thread, &output, &output_overwrite, &nr_mmaps)) goto out_unmap; } } + if (nr_mmaps != evlist->nr_mmaps) + pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps); + return 0; out_unmap: @@ -564,9 +553,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) { int nr_mmaps; - nr_mmaps = perf_cpu_map__nr(evlist->cpus); - if (perf_cpu_map__empty(evlist->cpus)) - nr_mmaps = perf_thread_map__nr(evlist->threads); + /* One for each CPU */ + nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); + if (perf_cpu_map__empty(evlist->all_cpus)) { + /* Plus one for each thread */ + nr_mmaps += perf_thread_map__nr(evlist->threads); + /* Minus the per-thread CPU (-1) */ + nr_mmaps -= 1; + } return nr_mmaps; } @@ -576,8 +570,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_mmap_param *mp) { struct perf_evsel *evsel; - const struct perf_cpu_map *cpus = evlist->cpus; - const struct perf_thread_map *threads = evlist->threads; if (!ops || !ops->get || !ops->mmap) return -EINVAL; @@ -589,16 +581,13 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, perf_evlist__for_each_entry(evlist, evsel) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) + perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0) return -ENOMEM; } if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - if (perf_cpu_map__empty(cpus)) - return mmap_per_thread(evlist, ops, mp); - return mmap_per_cpu(evlist, ops, mp); } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 210ea7c06ce8..952f3520d5c2 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -149,23 +149,30 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, int fd, group_fd, *evsel_fd; evsel_fd = FD(evsel, idx, thread); - if (evsel_fd == NULL) - return -EINVAL; + if (evsel_fd == NULL) { + err = -EINVAL; + goto out; + } err = get_group_fd(evsel, idx, thread, &group_fd); if (err < 0) - return err; + goto out; fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, cpu, group_fd, 0); - if (fd < 0) - return -errno; + if (fd < 0) { + err = -errno; + goto out; + } *evsel_fd = fd; } } +out: + if (err) + perf_evsel__close(evsel); return err; } @@ -328,6 +335,17 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, return 0; } +static int perf_evsel__ioctl(struct perf_evsel *evsel, int ioc, void *arg, + int cpu_map_idx, int thread) +{ + int *fd = FD(evsel, cpu_map_idx, thread); + + if (fd == NULL || *fd < 0) + return -1; + + return ioctl(*fd, ioc, arg); +} + static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ioc, void *arg, int cpu_map_idx) @@ -335,13 +353,7 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int err; - int *fd = FD(evsel, cpu_map_idx, thread); - - if (fd == NULL || *fd < 0) - return -1; - - err = ioctl(*fd, ioc, arg); + int err = perf_evsel__ioctl(evsel, ioc, arg, cpu_map_idx, thread); if (err) return err; @@ -355,6 +367,21 @@ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx) return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx); } +int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread) +{ + struct perf_cpu cpu __maybe_unused; + int idx; + int err; + + perf_cpu_map__for_each_cpu(cpu, idx, evsel->cpus) { + err = perf_evsel__ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, idx, thread); + if (err) + return err; + } + + return 0; +} + int perf_evsel__enable(struct perf_evsel *evsel) { int i; diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 1973a18c096b..35dd29642296 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -25,5 +25,6 @@ struct perf_cpu_map { #endif int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); +bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b); #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 4cefade540bd..6f89aec3e608 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -19,7 +19,12 @@ struct perf_evlist { int nr_entries; int nr_groups; bool has_user_cpus; - struct perf_cpu_map *cpus; + /** + * The cpus passed from the command line or all online CPUs by + * default. + */ + struct perf_cpu_map *user_requested_cpus; + /** The union of all evsel cpu maps. */ struct perf_cpu_map *all_cpus; struct perf_thread_map *threads; int nr_mmaps; @@ -33,7 +38,8 @@ struct perf_evlist { }; typedef void -(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); +(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_evsel*, + struct perf_mmap_param*, int); typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); typedef int diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index cfc9ebd7968e..2a912a1f1989 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -49,7 +49,18 @@ struct perf_evsel { /* parse modifier helper */ int nr_members; + /* + * system_wide is for events that need to be on every CPU, irrespective + * of user requested CPUs or threads. Map propagation will set cpus to + * this event's own_cpus, whereby they will contribute to evlist + * all_cpus. + */ bool system_wide; + /* + * Some events, for example uncore events, require a CPU. + * i.e. it cannot be the 'any CPU' value of -1. + */ + bool requires_cpu; int idx; }; diff --git a/tools/lib/perf/include/internal/lib.h b/tools/lib/perf/include/internal/lib.h index 5175d491b2d4..85471a4b900f 100644 --- a/tools/lib/perf/include/internal/lib.h +++ b/tools/lib/perf/include/internal/lib.h @@ -9,4 +9,6 @@ extern unsigned int page_size; ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); +ssize_t preadn(int fd, void *buf, size_t n, off_t offs); + #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 4a2edbdb5e2b..24de795b09bb 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -31,4 +31,7 @@ LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_c (idx) < perf_cpu_map__nr(cpus); \ (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) +#define perf_cpu_map__for_each_idx(idx, cpus) \ + for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++) + #endif /* __LIBPERF_CPUMAP_H */ diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 75ee385fb078..e7758707cadd 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -240,7 +240,7 @@ struct id_index_entry { struct perf_record_id_index { struct perf_event_header header; __u64 nr; - struct id_index_entry entries[0]; + struct id_index_entry entries[]; }; struct perf_record_auxtrace_info { diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index 2a9516b42d15..699c0ed97d34 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -36,6 +36,7 @@ LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); +LIBPERF_API int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread); LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h index a7c50de8d010..8b40e7777cea 100644 --- a/tools/lib/perf/include/perf/threadmap.h +++ b/tools/lib/perf/include/perf/threadmap.h @@ -8,11 +8,12 @@ struct perf_thread_map; LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void); +LIBPERF_API struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array); -LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid); -LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread); +LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid); +LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx); LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads); -LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread); +LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx); LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map); LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map); diff --git a/tools/lib/perf/lib.c b/tools/lib/perf/lib.c index 18658931fc71..696fb0ea67c6 100644 --- a/tools/lib/perf/lib.c +++ b/tools/lib/perf/lib.c @@ -38,6 +38,26 @@ ssize_t readn(int fd, void *buf, size_t n) return ion(true, fd, buf, n); } +ssize_t preadn(int fd, void *buf, size_t n, off_t offs) +{ + size_t left = n; + + while (left) { + ssize_t ret = pread(fd, buf, left, offs); + + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) + return ret; + + left -= ret; + buf += ret; + offs += ret; + } + + return n; +} + /* * Write exactly 'n' bytes or return an error. */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 6fa0d651576b..190b56ae923a 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -12,6 +12,7 @@ LIBPERF_0.0.1 { perf_cpu_map__empty; perf_cpu_map__max; perf_cpu_map__has; + perf_thread_map__new_array; perf_thread_map__new_dummy; perf_thread_map__set_pid; perf_thread_map__comm; diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index fa854c83b7e7..ed616fc19b4f 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -69,7 +69,7 @@ static int test_stat_cpu(void) perf_evlist__set_maps(evlist, cpus, NULL); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { cpus = perf_evsel__cpus(evsel); @@ -130,7 +130,7 @@ static int test_stat_thread(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { perf_evsel__read(evsel, 0, 0, &counts); @@ -187,7 +187,7 @@ static int test_stat_thread_enable(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__for_each_evsel(evlist, evsel) { perf_evsel__read(evsel, 0, 0, &counts); @@ -507,7 +507,7 @@ static int test_stat_multiplexing(void) perf_evlist__set_maps(evlist, NULL, threads); err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); + __T("failed to open evlist", err == 0); perf_evlist__enable(evlist); diff --git a/tools/lib/perf/tests/test-threadmap.c b/tools/lib/perf/tests/test-threadmap.c index 5e2a0291e94c..f728ad7002bb 100644 --- a/tools/lib/perf/tests/test-threadmap.c +++ b/tools/lib/perf/tests/test-threadmap.c @@ -11,9 +11,43 @@ static int libperf_print(enum libperf_print_level level, return vfprintf(stderr, fmt, ap); } +static int test_threadmap_array(int nr, pid_t *array) +{ + struct perf_thread_map *threads; + int i; + + threads = perf_thread_map__new_array(nr, array); + __T("Failed to allocate new thread map", threads); + + __T("Unexpected number of threads", perf_thread_map__nr(threads) == nr); + + for (i = 0; i < nr; i++) { + __T("Unexpected initial value of thread", + perf_thread_map__pid(threads, i) == (array ? array[i] : -1)); + } + + for (i = 1; i < nr; i++) + perf_thread_map__set_pid(threads, i, i * 100); + + __T("Unexpected value of thread 0", + perf_thread_map__pid(threads, 0) == (array ? array[0] : -1)); + + for (i = 1; i < nr; i++) { + __T("Unexpected thread value", + perf_thread_map__pid(threads, i) == i * 100); + } + + perf_thread_map__put(threads); + + return 0; +} + +#define THREADS_NR 10 int test_threadmap(int argc, char **argv) { struct perf_thread_map *threads; + pid_t thr_array[THREADS_NR]; + int i; __T_START; @@ -27,6 +61,13 @@ int test_threadmap(int argc, char **argv) perf_thread_map__put(threads); perf_thread_map__put(threads); + test_threadmap_array(THREADS_NR, NULL); + + for (i = 0; i < THREADS_NR; i++) + thr_array[i] = i + 100; + + test_threadmap_array(THREADS_NR, thr_array); + __T_END; return tests_failed == 0 ? 0 : -1; } diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c index e92c368b0a6c..07968f3ea093 100644 --- a/tools/lib/perf/threadmap.c +++ b/tools/lib/perf/threadmap.c @@ -32,28 +32,38 @@ struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, in #define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr) -void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid) +void perf_thread_map__set_pid(struct perf_thread_map *map, int idx, pid_t pid) { - map->map[thread].pid = pid; + map->map[idx].pid = pid; } -char *perf_thread_map__comm(struct perf_thread_map *map, int thread) +char *perf_thread_map__comm(struct perf_thread_map *map, int idx) { - return map->map[thread].comm; + return map->map[idx].comm; } -struct perf_thread_map *perf_thread_map__new_dummy(void) +struct perf_thread_map *perf_thread_map__new_array(int nr_threads, pid_t *array) { - struct perf_thread_map *threads = thread_map__alloc(1); + struct perf_thread_map *threads = thread_map__alloc(nr_threads); + int i; + + if (!threads) + return NULL; + + for (i = 0; i < nr_threads; i++) + perf_thread_map__set_pid(threads, i, array ? array[i] : -1); + + threads->nr = nr_threads; + refcount_set(&threads->refcnt, 1); - if (threads != NULL) { - perf_thread_map__set_pid(threads, 0, -1); - threads->nr = 1; - refcount_set(&threads->refcnt, 1); - } return threads; } +struct perf_thread_map *perf_thread_map__new_dummy(void) +{ + return perf_thread_map__new_array(1, NULL); +} + static void perf_thread_map__delete(struct perf_thread_map *threads) { if (threads) { @@ -85,7 +95,7 @@ int perf_thread_map__nr(struct perf_thread_map *threads) return threads ? threads->nr : 1; } -pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread) +pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx) { - return map->map[thread].pid; + return map->map[idx].pid; } diff --git a/tools/lib/slab.c b/tools/lib/slab.c new file mode 100644 index 000000000000..959997fb0652 --- /dev/null +++ b/tools/lib/slab.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <string.h> + +#include <urcu/uatomic.h> +#include <linux/slab.h> +#include <malloc.h> +#include <linux/gfp.h> + +int kmalloc_nr_allocated; +int kmalloc_verbose; + +void *kmalloc(size_t size, gfp_t gfp) +{ + void *ret; + + if (!(gfp & __GFP_DIRECT_RECLAIM)) + return NULL; + + ret = malloc(size); + uatomic_inc(&kmalloc_nr_allocated); + if (kmalloc_verbose) + printf("Allocating %p from malloc\n", ret); + if (gfp & __GFP_ZERO) + memset(ret, 0, size); + return ret; +} + +void kfree(void *p) +{ + if (!p) + return; + uatomic_dec(&kmalloc_nr_allocated); + if (kmalloc_verbose) + printf("Freeing %p to malloc\n", p); + free(p); +} diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 1c777a72bb39..8f1a09cdfd17 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -63,7 +63,7 @@ $(LIBFILE): $(SUBCMD_IN) clean: $(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \ - find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) FORCE: diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index 39ebf6192016..9fa75943f2ed 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -806,9 +806,9 @@ static int option__cmp(const void *va, const void *vb) static struct option *options__order(const struct option *opts) { - int nr_opts = 0, len; + int nr_opts = 0, nr_group = 0, len; const struct option *o = opts; - struct option *ordered; + struct option *opt, *ordered, *group; for (o = opts; o->type != OPTION_END; o++) ++nr_opts; @@ -819,7 +819,18 @@ static struct option *options__order(const struct option *opts) goto out; memcpy(ordered, opts, len); - qsort(ordered, nr_opts, sizeof(*o), option__cmp); + /* sort each option group individually */ + for (opt = group = ordered; opt->type != OPTION_END; opt++) { + if (opt->type == OPTION_GROUP) { + qsort(group, nr_group, sizeof(*opt), option__cmp); + group = opt + 1; + nr_group = 0; + continue; + } + nr_group++; + } + qsort(group, nr_group, sizeof(*opt), option__cmp); + out: return ordered; } diff --git a/tools/lib/thermal/.gitignore b/tools/lib/thermal/.gitignore new file mode 100644 index 000000000000..5d2aeda80fea --- /dev/null +++ b/tools/lib/thermal/.gitignore @@ -0,0 +1,2 @@ +libthermal.so* +libthermal.pc diff --git a/tools/lib/thermal/Build b/tools/lib/thermal/Build new file mode 100644 index 000000000000..4a892d9e24f9 --- /dev/null +++ b/tools/lib/thermal/Build @@ -0,0 +1,5 @@ +libthermal-y += commands.o +libthermal-y += events.o +libthermal-y += thermal_nl.o +libthermal-y += sampling.o +libthermal-y += thermal.o diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile new file mode 100644 index 000000000000..2d0d255fd0e1 --- /dev/null +++ b/tools/lib/thermal/Makefile @@ -0,0 +1,165 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +# Most of this file is copied from tools/lib/perf/Makefile + +LIBTHERMAL_VERSION = 0 +LIBTHERMAL_PATCHLEVEL = 0 +LIBTHERMAL_EXTRAVERSION = 1 + +MAKEFLAGS += --no-print-directory + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +# $(info Determined 'srctree' to be $(srctree)) +endif + +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +include $(srctree)/tools/scripts/Makefile.include +include $(srctree)/tools/scripts/Makefile.arch + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) +libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) + +ifeq ("$(origin V)", "command line") + VERBOSE = $(V) +endif +ifndef VERBOSE + VERBOSE = 0 +endif + +ifeq ($(VERBOSE),1) + Q = +else + Q = @ +endif + +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif + +INCLUDES = \ +-I/usr/include/libnl3 \ +-I$(srctree)/tools/lib/thermal/include \ +-I$(srctree)/tools/lib/ \ +-I$(srctree)/tools/include \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/ \ +-I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \ +-I$(srctree)/tools/include/uapi + +# Append required CFLAGS +override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += -Werror -Wall +override CFLAGS += -fPIC +override CFLAGS += $(INCLUDES) +override CFLAGS += -fvisibility=hidden +override CFGLAS += -Wl,-L. +override CFGLAS += -Wl,-lthermal + +all: + +export srctree OUTPUT CC LD CFLAGS V +export DESTDIR DESTDIR_SQ + +include $(srctree)/tools/build/Makefile.include + +VERSION_SCRIPT := libthermal.map + +PATCHLEVEL = $(LIBTHERMAL_PATCHLEVEL) +EXTRAVERSION = $(LIBTHERMAL_EXTRAVERSION) +VERSION = $(LIBTHERMAL_VERSION).$(LIBTHERMAL_PATCHLEVEL).$(LIBTHERMAL_EXTRAVERSION) + +LIBTHERMAL_SO := $(OUTPUT)libthermal.so.$(VERSION) +LIBTHERMAL_A := $(OUTPUT)libthermal.a +LIBTHERMAL_IN := $(OUTPUT)libthermal-in.o +LIBTHERMAL_PC := $(OUTPUT)libthermal.pc +LIBTHERMAL_ALL := $(LIBTHERMAL_A) $(OUTPUT)libthermal.so* + +THERMAL_UAPI := include/uapi/linux/thermal.h + +$(THERMAL_UAPI): FORCE + ln -sf $(srctree)/$@ $(srctree)/tools/$@ + +$(LIBTHERMAL_IN): FORCE + $(Q)$(MAKE) $(build)=libthermal + +$(LIBTHERMAL_A): $(LIBTHERMAL_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBTHERMAL_IN) + +$(LIBTHERMAL_SO): $(LIBTHERMAL_IN) + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libthermal.so \ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ + @ln -sf $(@F) $(OUTPUT)libthermal.so + @ln -sf $(@F) $(OUTPUT)libthermal.so.$(LIBTHERMAL_VERSION) + + +libs: $(THERMAL_UAPI) $(LIBTHERMAL_A) $(LIBTHERMAL_SO) $(LIBTHERMAL_PC) + +all: fixdep + $(Q)$(MAKE) libs + +clean: + $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \ + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) + +$(LIBTHERMAL_PC): + $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ + -e "s|@LIBDIR@|$(libdir_SQ)|" \ + -e "s|@VERSION@|$(VERSION)|" \ + < libthermal.pc.template > $@ + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: libs + $(call QUIET_INSTALL, $(LIBTHERMAL_ALL)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,include/thermal.h,$(prefix)/include/thermal,644); \ + +install_pkgconfig: $(LIBTHERMAL_PC) + $(call QUIET_INSTALL, $(LIBTHERMAL_PC)) \ + $(call do_install,$(LIBTHERMAL_PC),$(libdir_SQ)/pkgconfig,644) + +install_doc: + $(Q)$(MAKE) -C Documentation install-man install-html install-examples + +install: install_lib install_headers install_pkgconfig + +FORCE: + +.PHONY: all install clean FORCE diff --git a/tools/lib/thermal/commands.c b/tools/lib/thermal/commands.c new file mode 100644 index 000000000000..73d4d4e8d6ec --- /dev/null +++ b/tools/lib/thermal/commands.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +static struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = { + /* Thermal zone */ + [THERMAL_GENL_ATTR_TZ] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_TRIP_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_TYPE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_TRIP_HYST] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_MODE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_CDEV_WEIGHT] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_TZ_NAME] = { .type = NLA_STRING }, + + /* Governor(s) */ + [THERMAL_GENL_ATTR_TZ_GOV] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_TZ_GOV_NAME] = { .type = NLA_STRING }, + + /* Cooling devices */ + [THERMAL_GENL_ATTR_CDEV] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_CDEV_ID] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_CUR_STATE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING }, +}; + +static int parse_tz_get(struct genl_info *info, struct thermal_zone **tz) +{ + struct nlattr *attr; + struct thermal_zone *__tz = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_ID) { + + size++; + + __tz = realloc(__tz, sizeof(*__tz) * (size + 2)); + if (!__tz) + return THERMAL_ERROR; + + __tz[size - 1].id = nla_get_u32(attr); + } + + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_NAME) + nla_strlcpy(__tz[size - 1].name, attr, + THERMAL_NAME_LENGTH); + } + + if (__tz) + __tz[size].id = -1; + + *tz = __tz; + + return THERMAL_SUCCESS; +} + +static int parse_cdev_get(struct genl_info *info, struct thermal_cdev **cdev) +{ + struct nlattr *attr; + struct thermal_cdev *__cdev = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_CDEV], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_ID) { + + size++; + + __cdev = realloc(__cdev, sizeof(*__cdev) * (size + 2)); + if (!__cdev) + return THERMAL_ERROR; + + __cdev[size - 1].id = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_NAME) { + nla_strlcpy(__cdev[size - 1].name, attr, + THERMAL_NAME_LENGTH); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_CUR_STATE) + __cdev[size - 1].cur_state = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_CDEV_MAX_STATE) + __cdev[size - 1].max_state = nla_get_u32(attr); + } + + if (__cdev) + __cdev[size].id = -1; + + *cdev = __cdev; + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_trip(struct genl_info *info, struct thermal_zone *tz) +{ + struct nlattr *attr; + struct thermal_trip *__tt = NULL; + size_t size = 0; + int rem; + + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_TZ_TRIP], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_ID) { + + size++; + + __tt = realloc(__tt, sizeof(*__tt) * (size + 2)); + if (!__tt) + return THERMAL_ERROR; + + __tt[size - 1].id = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TYPE) + __tt[size - 1].type = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_TEMP) + __tt[size - 1].temp = nla_get_u32(attr); + + if (nla_type(attr) == THERMAL_GENL_ATTR_TZ_TRIP_HYST) + __tt[size - 1].hyst = nla_get_u32(attr); + } + + if (__tt) + __tt[size].id = -1; + + tz->trip = __tt; + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_temp(struct genl_info *info, struct thermal_zone *tz) +{ + int id = -1; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_ID]) + id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]); + + if (tz->id != id) + return THERMAL_ERROR; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]) + tz->temp = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_TEMP]); + + return THERMAL_SUCCESS; +} + +static int parse_tz_get_gov(struct genl_info *info, struct thermal_zone *tz) +{ + int id = -1; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_ID]) + id = nla_get_u32(info->attrs[THERMAL_GENL_ATTR_TZ_ID]); + + if (tz->id != id) + return THERMAL_ERROR; + + if (info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME]) { + nla_strlcpy(tz->governor, + info->attrs[THERMAL_GENL_ATTR_TZ_GOV_NAME], + THERMAL_NAME_LENGTH); + } + + return THERMAL_SUCCESS; +} + +static int handle_netlink(struct nl_cache_ops *unused, + struct genl_cmd *cmd, + struct genl_info *info, void *arg) +{ + int ret; + + switch (cmd->c_id) { + + case THERMAL_GENL_CMD_TZ_GET_ID: + ret = parse_tz_get(info, arg); + break; + + case THERMAL_GENL_CMD_CDEV_GET: + ret = parse_cdev_get(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_TEMP: + ret = parse_tz_get_temp(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_TRIP: + ret = parse_tz_get_trip(info, arg); + break; + + case THERMAL_GENL_CMD_TZ_GET_GOV: + ret = parse_tz_get_gov(info, arg); + break; + + default: + return THERMAL_ERROR; + } + + return ret; +} + +static struct genl_cmd thermal_cmds[] = { + { + .c_id = THERMAL_GENL_CMD_TZ_GET_ID, + .c_name = (char *)"List thermal zones", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_GOV, + .c_name = (char *)"Get governor", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_TEMP, + .c_name = (char *)"Get thermal zone temperature", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_TZ_GET_TRIP, + .c_name = (char *)"Get thermal zone trip points", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_CDEV_GET, + .c_name = (char *)"Get cooling devices", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, +}; + +static struct genl_ops thermal_cmd_ops = { + .o_name = (char *)"thermal", + .o_cmds = thermal_cmds, + .o_ncmds = ARRAY_SIZE(thermal_cmds), +}; + +static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int cmd, + int flags, void *arg) +{ + struct nl_msg *msg; + void *hdr; + + msg = nlmsg_alloc(); + if (!msg) + return THERMAL_ERROR; + + hdr = genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, thermal_cmd_ops.o_id, + 0, flags, cmd, THERMAL_GENL_VERSION); + if (!hdr) + return THERMAL_ERROR; + + if (id >= 0 && nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id)) + return THERMAL_ERROR; + + if (nl_send_msg(th->sk_cmd, th->cb_cmd, msg, genl_handle_msg, arg)) + return THERMAL_ERROR; + + nlmsg_free(msg); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, struct thermal_zone **tz) +{ + return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_TZ_GET_ID, + NLM_F_DUMP | NLM_F_ACK, tz); +} + +thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, struct thermal_cdev **tc) +{ + return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_CDEV_GET, + NLM_F_DUMP | NLM_F_ACK, tc); +} + +thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, struct thermal_zone *tz) +{ + return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TRIP, + 0, tz); +} + +thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, struct thermal_zone *tz) +{ + return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz); +} + +thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz) +{ + return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz); +} + +thermal_error_t thermal_cmd_exit(struct thermal_handler *th) +{ + if (genl_unregister_family(&thermal_cmd_ops)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_cmd, th->cb_cmd); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_cmd_init(struct thermal_handler *th) +{ + int ret; + int family; + + if (nl_thermal_connect(&th->sk_cmd, &th->cb_cmd)) + return THERMAL_ERROR; + + ret = genl_register_family(&thermal_cmd_ops); + if (ret) + return THERMAL_ERROR; + + ret = genl_ops_resolve(th->sk_cmd, &thermal_cmd_ops); + if (ret) + return THERMAL_ERROR; + + family = genl_ctrl_resolve(th->sk_cmd, "nlctrl"); + if (family != GENL_ID_CTRL) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/events.c b/tools/lib/thermal/events.c new file mode 100644 index 000000000000..a7a55d1a0c4c --- /dev/null +++ b/tools/lib/thermal/events.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <linux/netlink.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + + +#include <thermal.h> +#include "thermal_nl.h" + +/* + * Optimization: fill this array to tell which event we do want to pay + * attention to. That happens at init time with the ops + * structure. Each ops will enable the event and the general handler + * will be able to discard the event if there is not ops associated + * with it. + */ +static int enabled_ops[__THERMAL_GENL_EVENT_MAX]; + +static int handle_thermal_event(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + struct thermal_handler_param *thp = arg; + struct thermal_events_ops *ops = &thp->th->ops->events; + + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + arg = thp->arg; + + /* + * This is an event we don't care of, bail out. + */ + if (!enabled_ops[genlhdr->cmd]) + return THERMAL_SUCCESS; + + switch (genlhdr->cmd) { + + case THERMAL_GENL_EVENT_TZ_CREATE: + return ops->tz_create(nla_get_string(attrs[THERMAL_GENL_ATTR_TZ_NAME]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_DELETE: + return ops->tz_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_ENABLE: + return ops->tz_enable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_DISABLE: + return ops->tz_disable(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_CHANGE: + return ops->trip_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_ADD: + return ops->trip_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TYPE]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_HYST]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_DELETE: + return ops->trip_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_UP: + return ops->trip_high(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + + case THERMAL_GENL_EVENT_TZ_TRIP_DOWN: + return ops->trip_low(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TRIP_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + + case THERMAL_GENL_EVENT_CDEV_ADD: + return ops->cdev_add(nla_get_string(attrs[THERMAL_GENL_ATTR_CDEV_NAME]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_MAX_STATE]), arg); + + case THERMAL_GENL_EVENT_CDEV_DELETE: + return ops->cdev_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), arg); + + case THERMAL_GENL_EVENT_CDEV_STATE_UPDATE: + return ops->cdev_update(nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_CDEV_CUR_STATE]), arg); + + case THERMAL_GENL_EVENT_TZ_GOV_CHANGE: + return ops->gov_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_string(attrs[THERMAL_GENL_ATTR_GOV_NAME]), arg); + default: + return -1; + } +} + +static void thermal_events_ops_init(struct thermal_events_ops *ops) +{ + enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create; + enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete; + enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable; + enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add; + enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update; + enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change; +} + +thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg) +{ + struct thermal_handler_param thp = { .th = th, .arg = arg }; + + if (!th) + return THERMAL_ERROR; + + if (nl_cb_set(th->cb_event, NL_CB_VALID, NL_CB_CUSTOM, + handle_thermal_event, &thp)) + return THERMAL_ERROR; + + return nl_recvmsgs(th->sk_event, th->cb_event); +} + +int thermal_events_fd(struct thermal_handler *th) +{ + if (!th) + return -1; + + return nl_socket_get_fd(th->sk_event); +} + +thermal_error_t thermal_events_exit(struct thermal_handler *th) +{ + if (nl_unsubscribe_thermal(th->sk_event, th->cb_event, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_event, th->cb_event); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_events_init(struct thermal_handler *th) +{ + thermal_events_ops_init(&th->ops->events); + + if (nl_thermal_connect(&th->sk_event, &th->cb_event)) + return THERMAL_ERROR; + + if (nl_subscribe_thermal(th->sk_event, th->cb_event, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/include/thermal.h b/tools/lib/thermal/include/thermal.h new file mode 100644 index 000000000000..1abc560602cf --- /dev/null +++ b/tools/lib/thermal/include/thermal.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __LIBTHERMAL_H +#define __LIBTHERMAL_H + +#include <linux/thermal.h> + +#ifndef LIBTHERMAL_API +#define LIBTHERMAL_API __attribute__((visibility("default"))) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +struct thermal_sampling_ops { + int (*tz_temp)(int tz_id, int temp, void *arg); +}; + +struct thermal_events_ops { + int (*tz_create)(const char *name, int tz_id, void *arg); + int (*tz_delete)(int tz_id, void *arg); + int (*tz_enable)(int tz_id, void *arg); + int (*tz_disable)(int tz_id, void *arg); + int (*trip_high)(int tz_id, int trip_id, int temp, void *arg); + int (*trip_low)(int tz_id, int trip_id, int temp, void *arg); + int (*trip_add)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg); + int (*trip_change)(int tz_id, int trip_id, int type, int temp, int hyst, void *arg); + int (*trip_delete)(int tz_id, int trip_id, void *arg); + int (*cdev_add)(const char *name, int cdev_id, int max_state, void *arg); + int (*cdev_delete)(int cdev_id, void *arg); + int (*cdev_update)(int cdev_id, int cur_state, void *arg); + int (*gov_change)(int tz_id, const char *gov_name, void *arg); +}; + +struct thermal_ops { + struct thermal_sampling_ops sampling; + struct thermal_events_ops events; +}; + +struct thermal_trip { + int id; + int type; + int temp; + int hyst; +}; + +struct thermal_zone { + int id; + int temp; + char name[THERMAL_NAME_LENGTH]; + char governor[THERMAL_NAME_LENGTH]; + struct thermal_trip *trip; +}; + +struct thermal_cdev { + int id; + char name[THERMAL_NAME_LENGTH]; + int max_state; + int min_state; + int cur_state; +}; + +typedef enum { + THERMAL_ERROR = -1, + THERMAL_SUCCESS = 0, +} thermal_error_t; + +struct thermal_handler; + +typedef int (*cb_tz_t)(struct thermal_zone *, void *); + +typedef int (*cb_tt_t)(struct thermal_trip *, void *); + +typedef int (*cb_tc_t)(struct thermal_cdev *, void *); + +LIBTHERMAL_API int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg); + +LIBTHERMAL_API int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg); + +LIBTHERMAL_API int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, + const char *name); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id); + +LIBTHERMAL_API struct thermal_zone *thermal_zone_discover(struct thermal_handler *th); + +LIBTHERMAL_API struct thermal_handler *thermal_init(struct thermal_ops *ops); + +LIBTHERMAL_API void thermal_exit(struct thermal_handler *th); + +/* + * Netlink thermal events + */ +LIBTHERMAL_API thermal_error_t thermal_events_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_events_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg); + +LIBTHERMAL_API int thermal_events_fd(struct thermal_handler *th); + +/* + * Netlink thermal commands + */ +LIBTHERMAL_API thermal_error_t thermal_cmd_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_cmd_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, + struct thermal_zone **tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, + struct thermal_cdev **tc); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, + struct thermal_zone *tz); + +/* + * Netlink thermal samples + */ +LIBTHERMAL_API thermal_error_t thermal_sampling_exit(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_sampling_init(struct thermal_handler *th); + +LIBTHERMAL_API thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg); + +LIBTHERMAL_API int thermal_sampling_fd(struct thermal_handler *th); + +#endif /* __LIBTHERMAL_H */ + +#ifdef __cplusplus +} +#endif diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map new file mode 100644 index 000000000000..d5e77738c7a4 --- /dev/null +++ b/tools/lib/thermal/libthermal.map @@ -0,0 +1,25 @@ +LIBTHERMAL_0.0.1 { + global: + thermal_init; + for_each_thermal_zone; + for_each_thermal_trip; + for_each_thermal_cdev; + thermal_zone_find_by_name; + thermal_zone_find_by_id; + thermal_zone_discover; + thermal_init; + thermal_events_init; + thermal_events_handle; + thermal_events_fd; + thermal_cmd_init; + thermal_cmd_get_tz; + thermal_cmd_get_cdev; + thermal_cmd_get_trip; + thermal_cmd_get_governor; + thermal_cmd_get_temp; + thermal_sampling_init; + thermal_sampling_handle; + thermal_sampling_fd; +local: + *; +}; diff --git a/tools/lib/thermal/libthermal.pc.template b/tools/lib/thermal/libthermal.pc.template new file mode 100644 index 000000000000..6f3769731b59 --- /dev/null +++ b/tools/lib/thermal/libthermal.pc.template @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=${prefix}/include + +Name: libthermal +Description: thermal library +Requires: libnl-3.0 libnl-genl-3.0 +Version: @VERSION@ +Libs: -L${libdir} -lnl-genl-3 -lnl-3 +Cflags: -I${includedir} -I{include}/libnl3 diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c new file mode 100644 index 000000000000..ee818f4e9654 --- /dev/null +++ b/tools/lib/thermal/sampling.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +static int handle_thermal_sample(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + struct thermal_handler_param *thp = arg; + struct thermal_handler *th = thp->th; + + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + switch (genlhdr->cmd) { + + case THERMAL_GENL_SAMPLING_TEMP: + return th->ops->sampling.tz_temp( + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), arg); + default: + return THERMAL_ERROR; + } +} + +thermal_error_t thermal_sampling_handle(struct thermal_handler *th, void *arg) +{ + struct thermal_handler_param thp = { .th = th, .arg = arg }; + + if (!th) + return THERMAL_ERROR; + + if (nl_cb_set(th->cb_sampling, NL_CB_VALID, NL_CB_CUSTOM, + handle_thermal_sample, &thp)) + return THERMAL_ERROR; + + return nl_recvmsgs(th->sk_sampling, th->cb_sampling); +} + +int thermal_sampling_fd(struct thermal_handler *th) +{ + if (!th) + return -1; + + return nl_socket_get_fd(th->sk_sampling); +} + +thermal_error_t thermal_sampling_exit(struct thermal_handler *th) +{ + if (nl_unsubscribe_thermal(th->sk_sampling, th->cb_sampling, + THERMAL_GENL_EVENT_GROUP_NAME)) + return THERMAL_ERROR; + + nl_thermal_disconnect(th->sk_sampling, th->cb_sampling); + + return THERMAL_SUCCESS; +} + +thermal_error_t thermal_sampling_init(struct thermal_handler *th) +{ + if (nl_thermal_connect(&th->sk_sampling, &th->cb_sampling)) + return THERMAL_ERROR; + + if (nl_subscribe_thermal(th->sk_sampling, th->cb_sampling, + THERMAL_GENL_SAMPLING_GROUP_NAME)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/thermal.c b/tools/lib/thermal/thermal.c new file mode 100644 index 000000000000..72a76dc205bc --- /dev/null +++ b/tools/lib/thermal/thermal.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <stdio.h> +#include <thermal.h> + +#include "thermal_nl.h" + +int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg) +{ + int i, ret = 0; + + if (!cdev) + return 0; + + for (i = 0; cdev[i].id != -1; i++) + ret |= cb(&cdev[i], arg); + + return ret; +} + +int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg) +{ + int i, ret = 0; + + if (!tt) + return 0; + + for (i = 0; tt[i].id != -1; i++) + ret |= cb(&tt[i], arg); + + return ret; +} + +int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg) +{ + int i, ret = 0; + + if (!tz) + return 0; + + for (i = 0; tz[i].id != -1; i++) + ret |= cb(&tz[i], arg); + + return ret; +} + +struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, + const char *name) +{ + int i; + + if (!tz || !name) + return NULL; + + for (i = 0; tz[i].id != -1; i++) { + if (!strcmp(tz[i].name, name)) + return &tz[i]; + } + + return NULL; +} + +struct thermal_zone *thermal_zone_find_by_id(struct thermal_zone *tz, int id) +{ + int i; + + if (!tz || id < 0) + return NULL; + + for (i = 0; tz[i].id != -1; i++) { + if (tz[i].id == id) + return &tz[i]; + } + + return NULL; +} + +static int __thermal_zone_discover(struct thermal_zone *tz, void *th) +{ + if (thermal_cmd_get_trip(th, tz) < 0) + return -1; + + if (thermal_cmd_get_governor(th, tz)) + return -1; + + return 0; +} + +struct thermal_zone *thermal_zone_discover(struct thermal_handler *th) +{ + struct thermal_zone *tz; + + if (thermal_cmd_get_tz(th, &tz) < 0) + return NULL; + + if (for_each_thermal_zone(tz, __thermal_zone_discover, th)) + return NULL; + + return tz; +} + +void thermal_exit(struct thermal_handler *th) +{ + thermal_cmd_exit(th); + thermal_events_exit(th); + thermal_sampling_exit(th); + + free(th); +} + +struct thermal_handler *thermal_init(struct thermal_ops *ops) +{ + struct thermal_handler *th; + + th = malloc(sizeof(*th)); + if (!th) + return NULL; + th->ops = ops; + + if (thermal_events_init(th)) + goto out_free; + + if (thermal_sampling_init(th)) + goto out_free; + + if (thermal_cmd_init(th)) + goto out_free; + + return th; + +out_free: + free(th); + + return NULL; +} diff --git a/tools/lib/thermal/thermal_nl.c b/tools/lib/thermal/thermal_nl.c new file mode 100644 index 000000000000..b05cf9569858 --- /dev/null +++ b/tools/lib/thermal/thermal_nl.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: LGPL-2.1+ +// Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <thermal.h> +#include "thermal_nl.h" + +struct handler_args { + const char *group; + int id; +}; + +static __thread int err; +static __thread int done; + +static int nl_seq_check_handler(struct nl_msg *msg, void *arg) +{ + return NL_OK; +} + +static int nl_error_handler(struct sockaddr_nl *nla, struct nlmsgerr *nl_err, + void *arg) +{ + int *ret = arg; + + if (ret) + *ret = nl_err->error; + + return NL_STOP; +} + +static int nl_finish_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + + if (ret) + *ret = 1; + + return NL_OK; +} + +static int nl_ack_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + + if (ret) + *ret = 1; + + return NL_OK; +} + +int nl_send_msg(struct nl_sock *sock, struct nl_cb *cb, struct nl_msg *msg, + int (*rx_handler)(struct nl_msg *, void *), void *data) +{ + if (!rx_handler) + return THERMAL_ERROR; + + err = nl_send_auto_complete(sock, msg); + if (err < 0) + return err; + + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, rx_handler, data); + + err = done = 0; + + while (err == 0 && done == 0) + nl_recvmsgs(sock, cb); + + return err; +} + +static int nl_family_handler(struct nl_msg *msg, void *arg) +{ + struct handler_args *grp = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int rem_mcgrp; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return THERMAL_ERROR; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) { + + struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1]; + + nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX, + nla_data(mcgrp), nla_len(mcgrp), NULL); + + if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] || + !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]) + continue; + + if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]), + grp->group, + nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]))) + continue; + + grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]); + + break; + } + + return THERMAL_SUCCESS; +} + +static int nl_get_multicast_id(struct nl_sock *sock, struct nl_cb *cb, + const char *family, const char *group) +{ + struct nl_msg *msg; + int ret = 0, ctrlid; + struct handler_args grp = { + .group = group, + .id = -ENOENT, + }; + + msg = nlmsg_alloc(); + if (!msg) + return THERMAL_ERROR; + + ctrlid = genl_ctrl_resolve(sock, "nlctrl"); + + genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + + nla_put_string(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = nl_send_msg(sock, cb, msg, nl_family_handler, &grp); + if (ret) + goto nla_put_failure; + + ret = grp.id; + +nla_put_failure: + nlmsg_free(msg); + return ret; +} + +int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb) +{ + struct nl_cb *cb; + struct nl_sock *sock; + + cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!cb) + return THERMAL_ERROR; + + sock = nl_socket_alloc(); + if (!sock) + goto out_cb_free; + + if (genl_connect(sock)) + goto out_socket_free; + + if (nl_cb_err(cb, NL_CB_CUSTOM, nl_error_handler, &err) || + nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, nl_finish_handler, &done) || + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, nl_ack_handler, &done) || + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check_handler, &done)) + return THERMAL_ERROR; + + *nl_sock = sock; + *nl_cb = cb; + + return THERMAL_SUCCESS; + +out_socket_free: + nl_socket_free(sock); +out_cb_free: + nl_cb_put(cb); + return THERMAL_ERROR; +} + +void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb) +{ + nl_close(nl_sock); + nl_socket_free(nl_sock); + nl_cb_put(nl_cb); +} + +int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group) +{ + int mcid; + + mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME, + group); + if (mcid < 0) + return THERMAL_ERROR; + + if (nl_socket_drop_membership(nl_sock, mcid)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} + +int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group) +{ + int mcid; + + mcid = nl_get_multicast_id(nl_sock, nl_cb, THERMAL_GENL_FAMILY_NAME, + group); + if (mcid < 0) + return THERMAL_ERROR; + + if (nl_socket_add_membership(nl_sock, mcid)) + return THERMAL_ERROR; + + return THERMAL_SUCCESS; +} diff --git a/tools/lib/thermal/thermal_nl.h b/tools/lib/thermal/thermal_nl.h new file mode 100644 index 000000000000..ddf635642f07 --- /dev/null +++ b/tools/lib/thermal/thermal_nl.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +/* Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> */ +#ifndef __THERMAL_H +#define __THERMAL_H + +#include <netlink/netlink.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/mngt.h> +#include <netlink/genl/ctrl.h> + +struct thermal_handler { + int done; + int error; + struct thermal_ops *ops; + struct nl_msg *msg; + struct nl_sock *sk_event; + struct nl_sock *sk_sampling; + struct nl_sock *sk_cmd; + struct nl_cb *cb_cmd; + struct nl_cb *cb_event; + struct nl_cb *cb_sampling; +}; + +struct thermal_handler_param { + struct thermal_handler *th; + void *arg; +}; + +/* + * Low level netlink + */ +extern int nl_subscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group); + +extern int nl_unsubscribe_thermal(struct nl_sock *nl_sock, struct nl_cb *nl_cb, + const char *group); + +extern int nl_thermal_connect(struct nl_sock **nl_sock, struct nl_cb **nl_cb); + +extern void nl_thermal_disconnect(struct nl_sock *nl_sock, struct nl_cb *nl_cb); + +extern int nl_send_msg(struct nl_sock *sock, struct nl_cb *nl_cb, struct nl_msg *msg, + int (*rx_handler)(struct nl_msg *, void *), + void *data); + +#endif /* __THERMAL_H */ |
