From 1ce3a60e3c287479f15147ffc61c35b2e436a0d5 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 30 Mar 2022 16:26:36 +0100 Subject: libbpf: auto-resolve programs/libraries when necessary for uprobes bpf_program__attach_uprobe_opts() requires a binary_path argument specifying binary to instrument. Supporting simply specifying "libc.so.6" or "foo" should be possible too. Library search checks LD_LIBRARY_PATH, then /usr/lib64, /usr/lib. This allows users to run BPF programs prefixed with LD_LIBRARY_PATH=/path2/lib while still searching standard locations. Similarly for non .so files, we check PATH and /usr/bin, /usr/sbin. Path determination will be useful for auto-attach of BPF uprobe programs using SEC() definition. Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1648654000-21758-2-git-send-email-alan.maguire@oracle.com --- tools/lib/bpf/libbpf.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 809fe209cdcc..ba6b61336bc7 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10517,6 +10517,46 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, return pfd; } +/* Get full path to program/shared library. */ +static int resolve_full_path(const char *file, char *result, size_t result_sz) +{ + const char *search_paths[2]; + int i; + + if (strstr(file, ".so")) { + search_paths[0] = getenv("LD_LIBRARY_PATH"); + search_paths[1] = "/usr/lib64:/usr/lib"; + } else { + search_paths[0] = getenv("PATH"); + search_paths[1] = "/usr/bin:/usr/sbin"; + } + + for (i = 0; i < ARRAY_SIZE(search_paths); i++) { + const char *s; + + if (!search_paths[i]) + continue; + for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { + char *next_path; + int seg_len; + + if (s[0] == ':') + s++; + next_path = strchr(s, ':'); + seg_len = next_path ? next_path - s : strlen(s); + if (!seg_len) + continue; + snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); + /* ensure it is an executable file/link */ + if (access(result, R_OK | X_OK) < 0) + continue; + pr_debug("resolved '%s' to '%s'\n", file, result); + return 0; + } + } + return -ENOENT; +} + LIBBPF_API struct bpf_link * bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, @@ -10524,6 +10564,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + char full_binary_path[PATH_MAX]; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; @@ -10536,12 +10577,23 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + if (binary_path && !strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, full_binary_path, + sizeof(full_binary_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s'\n", + prog->name, binary_path); + return libbpf_err_ptr(err); + } + binary_path = full_binary_path; + } + legacy = determine_uprobe_perf_type() < 0; if (!legacy) { pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[512]; + char probe_name[PATH_MAX + 64]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); -- cgit v1.2.3 From 433966e3ae04165811b116af492a684bad7a158c Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 30 Mar 2022 16:26:37 +0100 Subject: libbpf: Support function name-based attach uprobes kprobe attach is name-based, using lookups of kallsyms to translate a function name to an address. Currently uprobe attach is done via an offset value as described in [1]. Extend uprobe opts for attach to include a function name which can then be converted into a uprobe-friendly offset. The calcualation is done in several steps: 1. First, determine the symbol address using libelf; this gives us the offset as reported by objdump 2. If the function is a shared library function - and the binary provided is a shared library - no further work is required; the address found is the required address 3. Finally, if the function is local, subtract the base address associated with the object, retrieved from ELF program headers. The resultant value is then added to the func_offset value passed in to specify the uprobe attach address. So specifying a func_offset of 0 along with a function name "printf" will attach to printf entry. The modes of operation supported are then 1. to attach to a local function in a binary; function "foo1" in "/usr/bin/foo" 2. to attach to a shared library function in a shared library - function "malloc" in libc. [1] https://www.kernel.org/doc/html/latest/trace/uprobetracer.html Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1648654000-21758-3-git-send-email-alan.maguire@oracle.com --- tools/lib/bpf/libbpf.c | 204 +++++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 10 ++- 2 files changed, 213 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ba6b61336bc7..f6e5a0217841 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10517,6 +10517,195 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, return pfd; } +/* uprobes deal in relative offsets; subtract the base address associated with + * the mapped binary. See Documentation/trace/uprobetracer.rst for more + * details. + */ +static long elf_find_relative_offset(const char *filename, Elf *elf, long addr) +{ + size_t n; + int i; + + if (elf_getphdrnum(elf, &n)) { + pr_warn("elf: failed to find program headers for '%s': %s\n", filename, + elf_errmsg(-1)); + return -ENOENT; + } + + for (i = 0; i < n; i++) { + int seg_start, seg_end, seg_offset; + GElf_Phdr phdr; + + if (!gelf_getphdr(elf, i, &phdr)) { + pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename, + elf_errmsg(-1)); + return -ENOENT; + } + if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X)) + continue; + + seg_start = phdr.p_vaddr; + seg_end = seg_start + phdr.p_memsz; + seg_offset = phdr.p_offset; + if (addr >= seg_start && addr < seg_end) + return addr - seg_start + seg_offset; + } + pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename); + return -ENOENT; +} + +/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ +static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) +{ + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + + if (!gelf_getshdr(scn, &sh)) + continue; + if (sh.sh_type == sh_type) + return scn; + } + return NULL; +} + +/* Find offset of function name in object specified by path. "name" matches + * symbol name or name@@LIB for library functions. + */ +static long elf_find_func_offset(const char *binary_path, const char *name) +{ + int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + bool is_shared_lib, is_name_qualified; + char errmsg[STRERR_BUFSIZE]; + long ret = -ENOENT; + size_t name_len; + GElf_Ehdr ehdr; + Elf *elf; + + fd = open(binary_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + ret = -errno; + pr_warn("failed to open %s: %s\n", binary_path, + libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + return ret; + } + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); + close(fd); + return -LIBBPF_ERRNO__FORMAT; + } + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + /* for shared lib case, we do not need to calculate relative offset */ + is_shared_lib = ehdr.e_type == ET_DYN; + + name_len = strlen(name); + /* Does name specify "@@LIB"? */ + is_name_qualified = strstr(name, "@@") != NULL; + + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if + * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically + * linked binary may not have SHT_DYMSYM, so absence of a section should not be + * reported as a warning/error. + */ + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + size_t nr_syms, strtabidx, idx; + Elf_Data *symbols = NULL; + Elf_Scn *scn = NULL; + int last_bind = -1; + const char *sname; + GElf_Shdr sh; + + scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); + if (!scn) { + pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", + binary_path); + continue; + } + if (!gelf_getshdr(scn, &sh)) + continue; + strtabidx = sh.sh_link; + symbols = elf_getdata(scn, 0); + if (!symbols) { + pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", + binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + nr_syms = symbols->d_size / sh.sh_entsize; + + for (idx = 0; idx < nr_syms; idx++) { + int curr_bind; + GElf_Sym sym; + + if (!gelf_getsym(symbols, idx, &sym)) + continue; + + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + continue; + + sname = elf_strptr(elf, strtabidx, sym.st_name); + if (!sname) + continue; + + curr_bind = GELF_ST_BIND(sym.st_info); + + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ + if (strncmp(sname, name, name_len) != 0) + continue; + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') + continue; + + if (ret >= 0) { + /* handle multiple matches */ + if (last_bind != STB_WEAK && curr_bind != STB_WEAK) { + /* Only accept one non-weak bind. */ + pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", + sname, name, binary_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } else if (curr_bind == STB_WEAK) { + /* already have a non-weak bind, and + * this is a weak bind, so ignore. + */ + continue; + } + } + ret = sym.st_value; + last_bind = curr_bind; + } + /* For binaries that are not shared libraries, we need relative offset */ + if (ret > 0 && !is_shared_lib) + ret = elf_find_relative_offset(binary_path, elf, ret); + if (ret > 0) + break; + } + + if (ret > 0) { + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, + ret); + } else { + if (ret == 0) { + pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, + is_shared_lib ? "should not be 0 in a shared library" : + "try using shared library path instead"); + ret = -ENOENT; + } else { + pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); + } + } +out: + elf_end(elf); + close(fd); + return ret; +} + /* Get full path to program/shared library. */ static int resolve_full_path(const char *file, char *result, size_t result_sz) { @@ -10569,6 +10758,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, size_t ref_ctr_off; int pfd, err; bool retprobe, legacy; + const char *func_name; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); @@ -10587,6 +10777,20 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, } binary_path = full_binary_path; } + func_name = OPTS_GET(opts, func_name, NULL); + if (func_name) { + long sym_off; + + if (!binary_path) { + pr_warn("prog '%s': name-based attach requires binary_path\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + sym_off = elf_find_func_offset(binary_path, func_name); + if (sym_off < 0) + return libbpf_err_ptr(sym_off); + func_offset += sym_off; + } legacy = determine_uprobe_perf_type() < 0; if (!legacy) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 05dde85e19a6..28cd2062d0df 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -459,9 +459,17 @@ struct bpf_uprobe_opts { __u64 bpf_cookie; /* uprobe is return probe, invoked at function return time */ bool retprobe; + /* Function name to attach to. Could be an unqualified ("abc") or library-qualified + * "abc@LIBXYZ" name. To specify function entry, func_name should be set while + * func_offset argument to bpf_prog__attach_uprobe_opts() should be 0. To trace an + * offset within a function, specify func_name and use func_offset argument to specify + * offset within the function. Shared library functions must specify the shared library + * binary_path. + */ + const char *func_name; size_t :0; }; -#define bpf_uprobe_opts__last_field retprobe +#define bpf_uprobe_opts__last_field func_name /** * @brief **bpf_program__attach_uprobe()** attaches a BPF program -- cgit v1.2.3 From 39f8dc43b7a05ab0e352655a14a9d613c2308b92 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 30 Mar 2022 16:26:38 +0100 Subject: libbpf: Add auto-attach for uprobes based on section name Now that u[ret]probes can use name-based specification, it makes sense to add support for auto-attach based on SEC() definition. The format proposed is SEC("u[ret]probe/binary:[raw_offset|[function_name[+offset]]") For example, to trace malloc() in libc: SEC("uprobe/libc.so.6:malloc") ...or to trace function foo2 in /usr/bin/foo: SEC("uprobe//usr/bin/foo:foo2") Auto-attach is done for all tasks (pid -1). prog can be an absolute path or simply a program/library name; in the latter case, we use PATH/LD_LIBRARY_PATH to resolve the full path, falling back to standard locations (/usr/bin:/usr/sbin or /usr/lib64:/usr/lib) if the file is not found via environment-variable specified locations. Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1648654000-21758-4-git-send-email-alan.maguire@oracle.com --- tools/lib/bpf/libbpf.c | 74 +++++++++++++++++++++- .../testing/selftests/bpf/progs/test_bpf_cookie.c | 4 +- .../selftests/bpf/progs/test_task_pt_regs.c | 2 +- tools/testing/selftests/bpf/progs/trigger_bench.c | 2 +- 4 files changed, 76 insertions(+), 6 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f6e5a0217841..6d2be53e4ba9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8630,6 +8630,7 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log } static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); @@ -8642,9 +8643,9 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), @@ -10845,6 +10846,75 @@ err_out: } +/* Format of u[ret]probe section definition supporting auto-attach: + * u[ret]probe/binary:function[+offset] + * + * binary can be an absolute/relative path or a filename; the latter is resolved to a + * full binary path via bpf_program__attach_uprobe_opts. + * + * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be + * specified (and auto-attach is not possible) or the above format is specified for + * auto-attach. + */ +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); + char *func, *probe_name, *func_end; + char *func_name, binary_path[512]; + unsigned long long raw_offset; + size_t offset = 0; + int n; + + *link = NULL; + + opts.retprobe = str_has_pfx(prog->sec_name, "uretprobe"); + if (opts.retprobe) + probe_name = prog->sec_name + sizeof("uretprobe") - 1; + else + probe_name = prog->sec_name + sizeof("uprobe") - 1; + if (probe_name[0] == '/') + probe_name++; + + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + if (strlen(probe_name) == 0) + return 0; + + snprintf(binary_path, sizeof(binary_path), "%s", probe_name); + /* ':' should be prior to function+offset */ + func_name = strrchr(binary_path, ':'); + if (!func_name) { + pr_warn("section '%s' missing ':function[+offset]' specification\n", + prog->sec_name); + return -EINVAL; + } + func_name[0] = '\0'; + func_name++; + n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); + if (n < 1) { + pr_warn("uprobe name '%s' is invalid\n", func_name); + return -EINVAL; + } + if (opts.retprobe && offset != 0) { + free(func); + pr_warn("uretprobes do not support offset specification\n"); + return -EINVAL; + } + + /* Is func a raw address? */ + errno = 0; + raw_offset = strtoull(func, &func_end, 0); + if (!errno && !*func_end) { + free(func); + func = NULL; + offset = (size_t)raw_offset; + } + opts.func_name = func; + + *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); + free(func); + return libbpf_get_error(*link); +} + struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c index 2d3a7710e2ce..0e2222968918 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c @@ -37,14 +37,14 @@ int handle_kretprobe(struct pt_regs *ctx) return 0; } -SEC("uprobe/trigger_func") +SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { update(ctx, &uprobe_res); return 0; } -SEC("uretprobe/trigger_func") +SEC("uretprobe") int handle_uretprobe(struct pt_regs *ctx) { update(ctx, &uretprobe_res); diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c index e6cb09259408..1926facba122 100644 --- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c +++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c @@ -14,7 +14,7 @@ char current_regs[PT_REGS_SIZE] = {}; char ctx_regs[PT_REGS_SIZE] = {}; int uprobe_res = 0; -SEC("uprobe/trigger_func") +SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { struct task_struct *current; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 2ab049b54d6c..694e7cec1823 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -54,7 +54,7 @@ int bench_trigger_fmodret(void *ctx) return -22; } -SEC("uprobe/self/uprobe_target") +SEC("uprobe") int bench_trigger_uprobe(void *ctx) { __sync_add_and_fetch(&hits, 1); -- cgit v1.2.3 From e93f39998d8f8ed456dfbb4ca68f9a159906cc6f Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Mon, 4 Apr 2022 08:53:20 +0800 Subject: libbpf: Don't return -EINVAL if hdr_len < offsetofend(core_relo_len) Since core relos is an optional part of the .BTF.ext ELF section, we should skip parsing it instead of returning -EINVAL if header size is less than offsetofend(struct btf_ext_header, core_relo_len). Signed-off-by: Yuntao Wang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220404005320.1723055-1-ytcoode@gmail.com --- tools/lib/bpf/btf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1383e26c5d1f..d124e9e533f0 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2826,10 +2826,8 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) if (err) goto done; - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) { - err = -EINVAL; - goto done; - } + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + goto done; /* skip core relos parsing */ err = btf_ext_setup_core_relos(btf_ext); if (err) -- cgit v1.2.3 From 568189310c2096e204674edd2f0da036cd50676a Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 5 Apr 2022 00:50:20 +0200 Subject: libbpf: Support Debian in resolve_full_path() attach_probe selftest fails on Debian-based distros with `failed to resolve full path for 'libc.so.6'`. The reason is that these distros embraced multiarch to the point where even for the "main" architecture they store libc in /lib/. This is configured in /etc/ld.so.conf and in theory it's possible to replicate the loader's parsing and processing logic in libbpf, however a much simpler solution is to just enumerate the known library paths. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220404225020.51029-1-iii@linux.ibm.com --- tools/lib/bpf/libbpf.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6d2be53e4ba9..91ce94b61f7f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10707,15 +10707,53 @@ out: return ret; } +static const char *arch_specific_lib_paths(void) +{ + /* + * Based on https://packages.debian.org/sid/libc6. + * + * Assume that the traced program is built for the same architecture + * as libbpf, which should cover the vast majority of cases. + */ +#if defined(__x86_64__) + return "/lib/x86_64-linux-gnu"; +#elif defined(__i386__) + return "/lib/i386-linux-gnu"; +#elif defined(__s390x__) + return "/lib/s390x-linux-gnu"; +#elif defined(__s390__) + return "/lib/s390-linux-gnu"; +#elif defined(__arm__) && defined(__SOFTFP__) + return "/lib/arm-linux-gnueabi"; +#elif defined(__arm__) && !defined(__SOFTFP__) + return "/lib/arm-linux-gnueabihf"; +#elif defined(__aarch64__) + return "/lib/aarch64-linux-gnu"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64 + return "/lib/mips64el-linux-gnuabi64"; +#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32 + return "/lib/mipsel-linux-gnu"; +#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return "/lib/powerpc64le-linux-gnu"; +#elif defined(__sparc__) && defined(__arch64__) + return "/lib/sparc64-linux-gnu"; +#elif defined(__riscv) && __riscv_xlen == 64 + return "/lib/riscv64-linux-gnu"; +#else + return NULL; +#endif +} + /* Get full path to program/shared library. */ static int resolve_full_path(const char *file, char *result, size_t result_sz) { - const char *search_paths[2]; + const char *search_paths[3] = {}; int i; if (strstr(file, ".so")) { search_paths[0] = getenv("LD_LIBRARY_PATH"); search_paths[1] = "/usr/lib64:/usr/lib"; + search_paths[2] = arch_specific_lib_paths(); } else { search_paths[0] = getenv("PATH"); search_paths[1] = "/usr/bin:/usr/sbin"; -- cgit v1.2.3 From d72e2968fb2583460062e15d53760d44e2d09ae6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 4 Apr 2022 16:41:56 -0700 Subject: libbpf: Add BPF-side of USDT support Add BPF-side implementation of libbpf-provided USDT support. This consists of single header library, usdt.bpf.h, which is meant to be used from user's BPF-side source code. This header is added to the list of installed libbpf header, along bpf_helpers.h and others. BPF-side implementation consists of two BPF maps: - spec map, which contains "a USDT spec" which encodes information necessary to be able to fetch USDT arguments and other information (argument count, user-provided cookie value, etc) at runtime; - IP-to-spec-ID map, which is only used on kernels that don't support BPF cookie feature. It allows to lookup spec ID based on the place in user application that triggers USDT program. These maps have default sizes, 256 and 1024, which are chosen conservatively to not waste a lot of space, but handling a lot of common cases. But there could be cases when user application needs to either trace a lot of different USDTs, or USDTs are heavily inlined and their arguments are located in a lot of differing locations. For such cases it might be necessary to size those maps up, which libbpf allows to do by overriding BPF_USDT_MAX_SPEC_CNT and BPF_USDT_MAX_IP_CNT macros. It is an important aspect to keep in mind. Single USDT (user-space equivalent of kernel tracepoint) can have multiple USDT "call sites". That is, single logical USDT is triggered from multiple places in user application. This can happen due to function inlining. Each such inlined instance of USDT invocation can have its own unique USDT argument specification (instructions about the location of the value of each of USDT arguments). So while USDT looks very similar to usual uprobe or kernel tracepoint, under the hood it's actually a collection of uprobes, each potentially needing different spec to know how to fetch arguments. User-visible API consists of three helper functions: - bpf_usdt_arg_cnt(), which returns number of arguments of current USDT; - bpf_usdt_arg(), which reads value of specified USDT argument (by it's zero-indexed position) and returns it as 64-bit value; - bpf_usdt_cookie(), which functions like BPF cookie for USDT programs; this is necessary as libbpf doesn't allow specifying actual BPF cookie and utilizes it internally for USDT support implementation. Each bpf_usdt_xxx() APIs expect struct pt_regs * context, passed into BPF program. On kernels that don't support BPF cookie it is used to fetch absolute IP address of the underlying uprobe. usdt.bpf.h also provides BPF_USDT() macro, which functions like BPF_PROG() and BPF_KPROBE() and allows much more user-friendly way to get access to USDT arguments, if USDT definition is static and known to the user. It is expected that majority of use cases won't have to use bpf_usdt_arg_cnt() and bpf_usdt_arg() directly and BPF_USDT() will cover all their needs. Last, usdt.bpf.h is utilizing BPF CO-RE for one single purpose: to detect kernel support for BPF cookie. If BPF CO-RE dependency is undesirable, user application can redefine BPF_USDT_HAS_BPF_COOKIE to either a boolean constant (or equivalently zero and non-zero), or even point it to its own .rodata variable that can be specified from user's application user-space code. It is important that BPF_USDT_HAS_BPF_COOKIE is known to BPF verifier as static value (thus .rodata and not just .data), as otherwise BPF code will still contain bpf_get_attach_cookie() BPF helper call and will fail validation at runtime, if not dead-code eliminated. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Alan Maguire Link: https://lore.kernel.org/bpf/20220404234202.331384-2-andrii@kernel.org --- tools/lib/bpf/Makefile | 2 +- tools/lib/bpf/usdt.bpf.h | 256 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 tools/lib/bpf/usdt.bpf.h (limited to 'tools/lib') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index b8b37fe76006..b4fbe8bed555 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -239,7 +239,7 @@ install_lib: all_cmd SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \ - skel_internal.h libbpf_version.h + skel_internal.h libbpf_version.h usdt.bpf.h GEN_HDRS := $(BPF_GENERATED) INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h new file mode 100644 index 000000000000..60237acf6b02 --- /dev/null +++ b/tools/lib/bpf/usdt.bpf.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#ifndef __USDT_BPF_H__ +#define __USDT_BPF_H__ + +#include +#include +#include +#include + +/* Below types and maps are internal implementation details of libbpf's USDT + * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should + * be considered an unstable API as well and might be adjusted based on user + * feedback from using libbpf's USDT support in production. + */ + +/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal + * map that keeps track of USDT argument specifications. This might be + * necessary if there are a lot of USDT attachments. + */ +#ifndef BPF_USDT_MAX_SPEC_CNT +#define BPF_USDT_MAX_SPEC_CNT 256 +#endif +/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal + * map that keeps track of IP (memory address) mapping to USDT argument + * specification. + * Note, if kernel supports BPF cookies, this map is not used and could be + * resized all the way to 1 to save a bit of memory. + */ +#ifndef BPF_USDT_MAX_IP_CNT +#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT) +#endif +/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is + * the only dependency on CO-RE, so if it's undesirable, user can override + * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not. + */ +#ifndef BPF_USDT_HAS_BPF_COOKIE +#define BPF_USDT_HAS_BPF_COOKIE \ + bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt) +#endif + +enum __bpf_usdt_arg_type { + BPF_USDT_ARG_CONST, + BPF_USDT_ARG_REG, + BPF_USDT_ARG_REG_DEREF, +}; + +struct __bpf_usdt_arg_spec { + /* u64 scalar interpreted depending on arg_type, see below */ + __u64 val_off; + /* arg location case, see bpf_udst_arg() for details */ + enum __bpf_usdt_arg_type arg_type; + /* offset of referenced register within struct pt_regs */ + short reg_off; + /* whether arg should be interpreted as signed value */ + bool arg_signed; + /* number of bits that need to be cleared and, optionally, + * sign-extended to cast arguments that are 1, 2, or 4 bytes + * long into final 8-byte u64/s64 value returned to user + */ + char arg_bitshift; +}; + +/* should match USDT_MAX_ARG_CNT in usdt.c exactly */ +#define BPF_USDT_MAX_ARG_CNT 12 +struct __bpf_usdt_spec { + struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT]; + __u64 usdt_cookie; + short arg_cnt; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, BPF_USDT_MAX_SPEC_CNT); + __type(key, int); + __type(value, struct __bpf_usdt_spec); +} __bpf_usdt_specs SEC(".maps") __weak; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, BPF_USDT_MAX_IP_CNT); + __type(key, long); + __type(value, __u32); +} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak; + +/* don't rely on user's BPF code to have latest definition of bpf_func_id */ +enum bpf_func_id___usdt { + BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */ +}; + +static __always_inline +int __bpf_usdt_spec_id(struct pt_regs *ctx) +{ + if (!BPF_USDT_HAS_BPF_COOKIE) { + long ip = PT_REGS_IP(ctx); + int *spec_id_ptr; + + spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip); + return spec_id_ptr ? *spec_id_ptr : -ESRCH; + } + + return bpf_get_attach_cookie(ctx); +} + +/* Return number of USDT arguments defined for currently traced USDT. */ +static inline __noinline +int bpf_usdt_arg_cnt(struct pt_regs *ctx) +{ + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + return spec->arg_cnt; +} + +/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. + * Returns 0 on success; negative error, otherwise. + * On error *res is guaranteed to be set to zero. + */ +static inline __noinline +int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) +{ + struct __bpf_usdt_spec *spec; + struct __bpf_usdt_arg_spec *arg_spec; + unsigned long val; + int err, spec_id; + + *res = 0; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + if (arg_num >= BPF_USDT_MAX_ARG_CNT || arg_num >= spec->arg_cnt) + return -ENOENT; + + arg_spec = &spec->args[arg_num]; + switch (arg_spec->arg_type) { + case BPF_USDT_ARG_CONST: + /* Arg is just a constant ("-4@$-9" in USDT arg spec). + * value is recorded in arg_spec->val_off directly. + */ + val = arg_spec->val_off; + break; + case BPF_USDT_ARG_REG: + /* Arg is in a register (e.g, "8@%rax" in USDT arg spec), + * so we read the contents of that register directly from + * struct pt_regs. To keep things simple user-space parts + * record offsetof(struct pt_regs, ) in arg_spec->reg_off. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + break; + case BPF_USDT_ARG_REG_DEREF: + /* Arg is in memory addressed by register, plus some offset + * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is + * identified lik with BPF_USDT_ARG_REG case, and the offset + * is in arg_spec->val_off. We first fetch register contents + * from pt_regs, then do another user-space probe read to + * fetch argument value itself. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off); + if (err) + return err; + break; + default: + return -EINVAL; + } + + /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing + * necessary upper arg_bitshift bits, with sign extension if argument + * is signed + */ + val <<= arg_spec->arg_bitshift; + if (arg_spec->arg_signed) + val = ((long)val) >> arg_spec->arg_bitshift; + else + val = val >> arg_spec->arg_bitshift; + *res = val; + return 0; +} + +/* Retrieve user-specified cookie value provided during attach as + * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie + * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself + * utilizaing BPF cookies internally, so user can't use BPF cookie directly + * for USDT programs and has to use bpf_usdt_cookie() API instead. + */ +static inline __noinline +long bpf_usdt_cookie(struct pt_regs *ctx) +{ + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return 0; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return 0; + + return spec->usdt_cookie; +} + +/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */ +#define ___bpf_usdt_args0() ctx +#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; }) +#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; }) +#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; }) +#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; }) +#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; }) +#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; }) +#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; }) +#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; }) +#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; }) +#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; }) +#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; }) +#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; }) +#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args) + +/* + * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for + * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes. + * Original struct pt_regs * context is preserved as 'ctx' argument. + */ +#define BPF_USDT(name, args...) \ +name(struct pt_regs *ctx); \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args); \ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_usdt_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __attribute__((always_inline)) typeof(name(0)) \ +____##name(struct pt_regs *ctx, ##args) + +#endif /* __USDT_BPF_H__ */ -- cgit v1.2.3 From 2e4913e025fdef740972ac70277297436cccb27f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 4 Apr 2022 16:41:57 -0700 Subject: libbpf: Wire up USDT API and bpf_link integration Wire up libbpf USDT support APIs without yet implementing all the nitty-gritty details of USDT discovery, spec parsing, and BPF map initialization. User-visible user-space API is simple and is conceptually very similar to uprobe API. bpf_program__attach_usdt() API allows to programmatically attach given BPF program to a USDT, specified through binary path (executable or shared lib), USDT provider and name. Also, just like in uprobe case, PID filter is specified (0 - self, -1 - any process, or specific PID). Optionally, USDT cookie value can be specified. Such single API invocation will try to discover given USDT in specified binary and will use (potentially many) BPF uprobes to attach this program in correct locations. Just like any bpf_program__attach_xxx() APIs, bpf_link is returned that represents this attachment. It is a virtual BPF link that doesn't have direct kernel object, as it can consist of multiple underlying BPF uprobe links. As such, attachment is not atomic operation and there can be brief moment when some USDT call sites are attached while others are still in the process of attaching. This should be taken into consideration by user. But bpf_program__attach_usdt() guarantees that in the case of success all USDT call sites are successfully attached, or all the successfuly attachments will be detached as soon as some USDT call sites failed to be attached. So, in theory, there could be cases of failed bpf_program__attach_usdt() call which did trigger few USDT program invocations. This is unavoidable due to multi-uprobe nature of USDT and has to be handled by user, if it's important to create an illusion of atomicity. USDT BPF programs themselves are marked in BPF source code as either SEC("usdt"), in which case they won't be auto-attached through skeleton's __attach() method, or it can have a full definition, which follows the spirit of fully-specified uprobes: SEC("usdt/::"). In the latter case skeleton's attach method will attempt auto-attachment. Similarly, generic bpf_program__attach() will have enought information to go off of for parameterless attachment. USDT BPF programs are actually uprobes, and as such for kernel they are marked as BPF_PROG_TYPE_KPROBE. Another part of this patch is USDT-related feature probing: - BPF cookie support detection from user-space; - detection of kernel support for auto-refcounting of USDT semaphore. The latter is optional. If kernel doesn't support such feature and USDT doesn't rely on USDT semaphores, no error is returned. But if libbpf detects that USDT requires setting semaphores and kernel doesn't support this, libbpf errors out with explicit pr_warn() message. Libbpf doesn't support poking process's memory directly to increment semaphore value, like BCC does on legacy kernels, due to inherent raciness and danger of such process memory manipulation. Libbpf let's kernel take care of this properly or gives up. Logistically, all the extra USDT-related infrastructure of libbpf is put into a separate usdt.c file and abstracted behind struct usdt_manager. Each bpf_object has lazily-initialized usdt_manager pointer, which is only instantiated if USDT programs are attempted to be attached. Closing BPF object frees up usdt_manager resources. usdt_manager keeps track of USDT spec ID assignment and few other small things. Subsequent patches will fill out remaining missing pieces of USDT initialization and setup logic. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Alan Maguire Link: https://lore.kernel.org/bpf/20220404234202.331384-3-andrii@kernel.org --- tools/lib/bpf/Build | 3 +- tools/lib/bpf/libbpf.c | 115 ++++++++++- tools/lib/bpf/libbpf.h | 31 +++ tools/lib/bpf/libbpf.map | 1 + tools/lib/bpf/libbpf_internal.h | 19 ++ tools/lib/bpf/usdt.c | 429 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 587 insertions(+), 11 deletions(-) create mode 100644 tools/lib/bpf/usdt.c (limited to 'tools/lib') diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 94f0a146bb7b..31a1a9015902 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ - btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o + btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ + usdt.o diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 91ce94b61f7f..1111e9d16e01 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -483,6 +483,8 @@ struct elf_state { int st_ops_shndx; }; +struct usdt_manager; + struct bpf_object { char name[BPF_OBJ_NAME_LEN]; char license[64]; @@ -545,6 +547,8 @@ struct bpf_object { size_t fd_array_cap; size_t fd_array_cnt; + struct usdt_manager *usdt_man; + char path[]; }; @@ -4678,6 +4682,18 @@ static int probe_perf_link(void) return link_fd < 0 && err == -EBADF; } +static int probe_kern_bpf_cookie(void) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(ret); +} + enum kern_feature_result { FEAT_UNKNOWN = 0, FEAT_SUPPORTED = 1, @@ -4740,6 +4756,9 @@ static struct kern_feature_desc { [FEAT_MEMCG_ACCOUNT] = { "memcg-based memory accounting", probe_memcg_account, }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, }; bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) @@ -8200,6 +8219,9 @@ void bpf_object__close(struct bpf_object *obj) if (obj->clear_priv) obj->clear_priv(obj, obj->priv); + usdt_manager_free(obj->usdt_man); + obj->usdt_man = NULL; + bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); bpf_object_unload(obj); @@ -8631,6 +8653,7 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); @@ -8648,6 +8671,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED), SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), @@ -9693,14 +9717,6 @@ int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, return bpf_prog_load_xattr2(&attr, pobj, prog_fd); } -struct bpf_link { - int (*detach)(struct bpf_link *link); - void (*dealloc)(struct bpf_link *link); - char *pin_path; /* NULL, if not pinned */ - int fd; /* hook FD, -1 if not applicable */ - bool disconnected; -}; - /* Replace link's underlying BPF program with the new one */ int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { @@ -10810,8 +10826,8 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, err = resolve_full_path(binary_path, full_binary_path, sizeof(full_binary_path)); if (err) { - pr_warn("prog '%s': failed to resolve full path for '%s'\n", - prog->name, binary_path); + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, binary_path, err); return libbpf_err_ptr(err); } binary_path = full_binary_path; @@ -10963,6 +10979,85 @@ struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts); } +struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts) +{ + char resolved_path[512]; + struct bpf_object *obj = prog->obj; + struct bpf_link *link; + long usdt_cookie; + int err; + + if (!OPTS_VALID(opts, bpf_uprobe_opts)) + return libbpf_err_ptr(-EINVAL); + + if (bpf_program__fd(prog) < 0) { + pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + if (!strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", + prog->name, binary_path, err); + return libbpf_err_ptr(err); + } + binary_path = resolved_path; + } + + /* USDT manager is instantiated lazily on first USDT attach. It will + * be destroyed together with BPF object in bpf_object__close(). + */ + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + if (!obj->usdt_man) { + obj->usdt_man = usdt_manager_new(obj); + if (IS_ERR(obj->usdt_man)) + return libbpf_ptr(obj->usdt_man); + } + + usdt_cookie = OPTS_GET(opts, usdt_cookie, 0); + link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path, + usdt_provider, usdt_name, usdt_cookie); + err = libbpf_get_error(link); + if (err) + return libbpf_err_ptr(err); + return link; +} + +static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + char *path = NULL, *provider = NULL, *name = NULL; + const char *sec_name; + int n, err; + + sec_name = bpf_program__section_name(prog); + if (strcmp(sec_name, "usdt") == 0) { + /* no auto-attach for just SEC("usdt") */ + *link = NULL; + return 0; + } + + n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name); + if (n != 3) { + pr_warn("invalid section '%s', expected SEC(\"usdt/::\")\n", + sec_name); + err = -EINVAL; + } else { + *link = bpf_program__attach_usdt(prog, -1 /* any process */, path, + provider, name, NULL); + err = libbpf_get_error(*link); + } + free(path); + free(provider); + free(name); + return err; +} + static int determine_tracepoint_id(const char *tp_category, const char *tp_name) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 28cd2062d0df..63d66f1adf1a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -511,6 +511,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, const struct bpf_uprobe_opts *opts); +struct bpf_usdt_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value accessible through usdt_cookie() */ + __u64 usdt_cookie; + size_t :0; +}; +#define bpf_usdt_opts__last_field usdt_cookie + +/** + * @brief **bpf_program__attach_usdt()** is just like + * bpf_program__attach_uprobe_opts() except it covers USDT (User-space + * Statically Defined Tracepoint) attachment, instead of attaching to + * user-space function entry or exit. + * + * @param prog BPF program to attach + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains provided USDT probe + * @param usdt_provider USDT provider name + * @param usdt_name USDT probe name + * @param opts Options for altering program attachment + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */ +LIBBPF_API struct bpf_link * +bpf_program__attach_usdt(const struct bpf_program *prog, + pid_t pid, const char *binary_path, + const char *usdt_provider, const char *usdt_name, + const struct bpf_usdt_opts *opts); + struct bpf_tracepoint_opts { /* size of this struct, for forward/backward compatiblity */ size_t sz; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index dd35ee58bfaa..82f6d62176dd 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -444,6 +444,7 @@ LIBBPF_0.8.0 { global: bpf_object__destroy_subskeleton; bpf_object__open_subskeleton; + bpf_program__attach_usdt; libbpf_register_prog_handler; libbpf_unregister_prog_handler; bpf_program__attach_kprobe_multi_opts; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index b6247dc7f8eb..dd0d4ccfa649 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -148,6 +148,15 @@ do { \ #ifndef __has_builtin #define __has_builtin(x) 0 #endif + +struct bpf_link { + int (*detach)(struct bpf_link *link); + void (*dealloc)(struct bpf_link *link); + char *pin_path; /* NULL, if not pinned */ + int fd; /* hook FD, -1 if not applicable */ + bool disconnected; +}; + /* * Re-implement glibc's reallocarray() for libbpf internal-only use. * reallocarray(), unfortunately, is not available in all versions of glibc, @@ -329,6 +338,8 @@ enum kern_feature_id { FEAT_BTF_TYPE_TAG, /* memcg-based accounting for BPF maps and progs */ FEAT_MEMCG_ACCOUNT, + /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */ + FEAT_BPF_COOKIE, __FEAT_CNT, }; @@ -543,4 +554,12 @@ int bpf_core_add_cands(struct bpf_core_cand *local_cand, struct bpf_core_cand_list *cands); void bpf_core_free_cands(struct bpf_core_cand_list *cands); +struct usdt_manager *usdt_manager_new(struct bpf_object *obj); +void usdt_manager_free(struct usdt_manager *man); +struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, + const struct bpf_program *prog, + pid_t pid, const char *path, + const char *usdt_provider, const char *usdt_name, + long usdt_cookie); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c new file mode 100644 index 000000000000..781aa1d128f1 --- /dev/null +++ b/tools/lib/bpf/usdt.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "hashmap.h" + +/* libbpf's USDT support consists of BPF-side state/code and user-space + * state/code working together in concert. BPF-side parts are defined in + * usdt.bpf.h header library. User-space state is encapsulated by struct + * usdt_manager and all the supporting code centered around usdt_manager. + * + * usdt.bpf.h defines two BPF maps that usdt_manager expects: USDT spec map + * and IP-to-spec-ID map, which is auxiliary map necessary for kernels that + * don't support BPF cookie (see below). These two maps are implicitly + * embedded into user's end BPF object file when user's code included + * usdt.bpf.h. This means that libbpf doesn't do anything special to create + * these USDT support maps. They are created by normal libbpf logic of + * instantiating BPF maps when opening and loading BPF object. + * + * As such, libbpf is basically unaware of the need to do anything + * USDT-related until the very first call to bpf_program__attach_usdt(), which + * can be called by user explicitly or happen automatically during skeleton + * attach (or, equivalently, through generic bpf_program__attach() call). At + * this point, libbpf will instantiate and initialize struct usdt_manager and + * store it in bpf_object. USDT manager is per-BPF object construct, as each + * independent BPF object might or might not have USDT programs, and thus all + * the expected USDT-related state. There is no coordination between two + * bpf_object in parts of USDT attachment, they are oblivious of each other's + * existence and libbpf is just oblivious, dealing with bpf_object-specific + * USDT state. + * + * Quick crash course on USDTs. + * + * From user-space application's point of view, USDT is essentially just + * a slightly special function call that normally has zero overhead, unless it + * is being traced by some external entity (e.g, BPF-based tool). Here's how + * a typical application can trigger USDT probe: + * + * #include // provided by systemtap-sdt-devel package + * // folly also provide similar functionality in folly/tracing/StaticTracepoint.h + * + * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y); + * + * USDT is identified by it's : pair of names. Each + * individual USDT has a fixed number of arguments (3 in the above example) + * and specifies values of each argument as if it was a function call. + * + * USDT call is actually not a function call, but is instead replaced by + * a single NOP instruction (thus zero overhead, effectively). But in addition + * to that, those USDT macros generate special SHT_NOTE ELF records in + * .note.stapsdt ELF section. Here's an example USDT definition as emitted by + * `readelf -n `: + * + * stapsdt 0x00000089 NT_STAPSDT (SystemTap probe descriptors) + * Provider: test + * Name: usdt12 + * Location: 0x0000000000549df3, Base: 0x00000000008effa4, Semaphore: 0x0000000000a4606e + * Arguments: -4@-1204(%rbp) -4@%edi -8@-1216(%rbp) -8@%r8 -4@$5 -8@%r9 8@%rdx 8@%r10 -4@$-9 -2@%cx -2@%ax -1@%sil + * + * In this case we have USDT test:usdt12 with 12 arguments. + * + * Location and base are offsets used to calculate absolute IP address of that + * NOP instruction that kernel can replace with an interrupt instruction to + * trigger instrumentation code (BPF program for all that we care about). + * + * Semaphore above is and optional feature. It records an address of a 2-byte + * refcount variable (normally in '.probes' ELF section) used for signaling if + * there is anything that is attached to USDT. This is useful for user + * applications if, for example, they need to prepare some arguments that are + * passed only to USDTs and preparation is expensive. By checking if USDT is + * "activated", an application can avoid paying those costs unnecessarily. + * Recent enough kernel has built-in support for automatically managing this + * refcount, which libbpf expects and relies on. If USDT is defined without + * associated semaphore, this value will be zero. See selftests for semaphore + * examples. + * + * Arguments is the most interesting part. This USDT specification string is + * providing information about all the USDT arguments and their locations. The + * part before @ sign defined byte size of the argument (1, 2, 4, or 8) and + * whether the argument is signed or unsigned (negative size means signed). + * The part after @ sign is assembly-like definition of argument location + * (see [0] for more details). Technically, assembler can provide some pretty + * advanced definitions, but libbpf is currently supporting three most common + * cases: + * 1) immediate constant, see 5th and 9th args above (-4@$5 and -4@-9); + * 2) register value, e.g., 8@%rdx, which means "unsigned 8-byte integer + * whose value is in register %rdx"; + * 3) memory dereference addressed by register, e.g., -4@-1204(%rbp), which + * specifies signed 32-bit integer stored at offset -1204 bytes from + * memory address stored in %rbp. + * + * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + * + * During attachment, libbpf parses all the relevant USDT specifications and + * prepares `struct usdt_spec` (USDT spec), which is then provided to BPF-side + * code through spec map. This allows BPF applications to quickly fetch the + * actual value at runtime using a simple BPF-side code. + * + * With basics out of the way, let's go over less immeditately obvious aspects + * of supporting USDTs. + * + * First, there is no special USDT BPF program type. It is actually just + * a uprobe BPF program (which for kernel, at least currently, is just a kprobe + * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference + * that uprobe is usually attached at the function entry, while USDT will + * normally will be somewhere inside the function. But it should always be + * pointing to NOP instruction, which makes such uprobes the fastest uprobe + * kind. + * + * Second, it's important to realize that such STAP_PROBEn(provider, name, ...) + * macro invocations can end up being inlined many-many times, depending on + * specifics of each individual user application. So single conceptual USDT + * (identified by provider:name pair of identifiers) is, generally speaking, + * multiple uprobe locations (USDT call sites) in different places in user + * application. Further, again due to inlining, each USDT call site might end + * up having the same argument #N be located in a different place. In one call + * site it could be a constant, in another will end up in a register, and in + * yet another could be some other register or even somewhere on the stack. + * + * As such, "attaching to USDT" means (in general case) attaching the same + * uprobe BPF program to multiple target locations in user application, each + * potentially having a completely different USDT spec associated with it. + * To wire all this up together libbpf allocates a unique integer spec ID for + * each unique USDT spec. Spec IDs are allocated as sequential small integers + * so that they can be used as keys in array BPF map (for performance reasons). + * Spec ID allocation and accounting is big part of what usdt_manager is + * about. This state has to be maintained per-BPF object and coordinate + * between different USDT attachments within the same BPF object. + * + * Spec ID is the key in spec BPF map, value is the actual USDT spec layed out + * as struct usdt_spec. Each invocation of BPF program at runtime needs to + * know its associated spec ID. It gets it either through BPF cookie, which + * libbpf sets to spec ID during attach time, or, if kernel is too old to + * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such + * case. The latter means that some modes of operation can't be supported + * without BPF cookie. Such mode is attaching to shared library "generically", + * without specifying target process. In such case, it's impossible to + * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode + * is not supported without BPF cookie support. + * + * Note that libbpf is using BPF cookie functionality for its own internal + * needs, so user itself can't rely on BPF cookie feature. To that end, libbpf + * provides conceptually equivalent USDT cookie support. It's still u64 + * user-provided value that can be associated with USDT attachment. Note that + * this will be the same value for all USDT call sites within the same single + * *logical* USDT attachment. This makes sense because to user attaching to + * USDT is a single BPF program triggered for singular USDT probe. The fact + * that this is done at multiple actual locations is a mostly hidden + * implementation details. This USDT cookie value can be fetched with + * bpf_usdt_cookie(ctx) API provided by usdt.bpf.h + * + * Lastly, while single USDT can have tons of USDT call sites, it doesn't + * necessarily have that many different USDT specs. It very well might be + * that 1000 USDT call sites only need 5 different USDT specs, because all the + * arguments are typically contained in a small set of registers or stack + * locations. As such, it's wasteful to allocate as many USDT spec IDs as + * there are USDT call sites. So libbpf tries to be frugal and performs + * on-the-fly deduplication during a single USDT attachment to only allocate + * the minimal required amount of unique USDT specs (and thus spec IDs). This + * is trivially achieved by using USDT spec string (Arguments string from USDT + * note) as a lookup key in a hashmap. USDT spec string uniquely defines + * everything about how to fetch USDT arguments, so two USDT call sites + * sharing USDT spec string can safely share the same USDT spec and spec ID. + * Note, this spec string deduplication is happening only during the same USDT + * attachment, so each USDT spec shares the same USDT cookie value. This is + * not generally true for other USDT attachments within the same BPF object, + * as even if USDT spec string is the same, USDT cookie value can be + * different. It was deemed excessive to try to deduplicate across independent + * USDT attachments by taking into account USDT spec string *and* USDT cookie + * value, which would complicated spec ID accounting significantly for little + * gain. + */ + +struct usdt_target { + long abs_ip; + long rel_ip; + long sema_off; +}; + +struct usdt_manager { + struct bpf_map *specs_map; + struct bpf_map *ip_to_spec_id_map; + + bool has_bpf_cookie; + bool has_sema_refcnt; +}; + +struct usdt_manager *usdt_manager_new(struct bpf_object *obj) +{ + static const char *ref_ctr_sysfs_path = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset"; + struct usdt_manager *man; + struct bpf_map *specs_map, *ip_to_spec_id_map; + + specs_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_specs"); + ip_to_spec_id_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_ip_to_spec_id"); + if (!specs_map || !ip_to_spec_id_map) { + pr_warn("usdt: failed to find USDT support BPF maps, did you forget to include bpf/usdt.bpf.h?\n"); + return ERR_PTR(-ESRCH); + } + + man = calloc(1, sizeof(*man)); + if (!man) + return ERR_PTR(-ENOMEM); + + man->specs_map = specs_map; + man->ip_to_spec_id_map = ip_to_spec_id_map; + + /* Detect if BPF cookie is supported for kprobes. + * We don't need IP-to-ID mapping if we can use BPF cookies. + * Added in: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value") + */ + man->has_bpf_cookie = kernel_supports(obj, FEAT_BPF_COOKIE); + + /* Detect kernel support for automatic refcounting of USDT semaphore. + * If this is not supported, USDTs with semaphores will not be supported. + * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe") + */ + man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0; + + return man; +} + +void usdt_manager_free(struct usdt_manager *man) +{ + if (IS_ERR_OR_NULL(man)) + return; + + free(man); +} + +static int sanity_check_usdt_elf(Elf *elf, const char *path) +{ + GElf_Ehdr ehdr; + int endianness; + + if (elf_kind(elf) != ELF_K_ELF) { + pr_warn("usdt: unrecognized ELF kind %d for '%s'\n", elf_kind(elf), path); + return -EBADF; + } + + switch (gelf_getclass(elf)) { + case ELFCLASS64: + if (sizeof(void *) != 8) { + pr_warn("usdt: attaching to 64-bit ELF binary '%s' is not supported\n", path); + return -EBADF; + } + break; + case ELFCLASS32: + if (sizeof(void *) != 4) { + pr_warn("usdt: attaching to 32-bit ELF binary '%s' is not supported\n", path); + return -EBADF; + } + break; + default: + pr_warn("usdt: unsupported ELF class for '%s'\n", path); + return -EBADF; + } + + if (!gelf_getehdr(elf, &ehdr)) + return -EINVAL; + + if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) { + pr_warn("usdt: unsupported type of ELF binary '%s' (%d), only ET_EXEC and ET_DYN are supported\n", + path, ehdr.e_type); + return -EBADF; + } + +#if __BYTE_ORDER == __LITTLE_ENDIAN + endianness = ELFDATA2LSB; +#elif __BYTE_ORDER == __BIG_ENDIAN + endianness = ELFDATA2MSB; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + if (endianness != ehdr.e_ident[EI_DATA]) { + pr_warn("usdt: ELF endianness mismatch for '%s'\n", path); + return -EBADF; + } + + return 0; +} + +static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, + const char *usdt_provider, const char *usdt_name, long usdt_cookie, + struct usdt_target **out_targets, size_t *out_target_cnt) +{ + return -ENOTSUP; +} + +struct bpf_link_usdt { + struct bpf_link link; + + struct usdt_manager *usdt_man; + + size_t uprobe_cnt; + struct { + long abs_ip; + struct bpf_link *link; + } *uprobes; +}; + +static int bpf_link_usdt_detach(struct bpf_link *link) +{ + struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + int i; + + for (i = 0; i < usdt_link->uprobe_cnt; i++) { + /* detach underlying uprobe link */ + bpf_link__destroy(usdt_link->uprobes[i].link); + } + + return 0; +} + +static void bpf_link_usdt_dealloc(struct bpf_link *link) +{ + struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + + free(usdt_link->uprobes); + free(usdt_link); +} + +struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, + pid_t pid, const char *path, + const char *usdt_provider, const char *usdt_name, + long usdt_cookie) +{ + LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct bpf_link_usdt *link = NULL; + struct usdt_target *targets = NULL; + size_t target_cnt; + int i, fd, err; + Elf *elf; + + /* TODO: perform path resolution similar to uprobe's */ + fd = open(path, O_RDONLY); + if (fd < 0) { + err = -errno; + pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); + return libbpf_err_ptr(err); + } + + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + err = -EBADF; + pr_warn("usdt: failed to parse ELF binary '%s': %s\n", path, elf_errmsg(-1)); + goto err_out; + } + + err = sanity_check_usdt_elf(elf, path); + if (err) + goto err_out; + + /* normalize PID filter */ + if (pid < 0) + pid = -1; + else if (pid == 0) + pid = getpid(); + + /* discover USDT in given binary, optionally limiting + * activations to a given PID, if pid > 0 + */ + err = collect_usdt_targets(man, elf, path, pid, usdt_provider, usdt_name, + usdt_cookie, &targets, &target_cnt); + if (err <= 0) { + err = (err == 0) ? -ENOENT : err; + goto err_out; + } + + link = calloc(1, sizeof(*link)); + if (!link) { + err = -ENOMEM; + goto err_out; + } + + link->usdt_man = man; + link->link.detach = &bpf_link_usdt_detach; + link->link.dealloc = &bpf_link_usdt_dealloc; + + link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); + if (!link->uprobes) { + err = -ENOMEM; + goto err_out; + } + + for (i = 0; i < target_cnt; i++) { + struct usdt_target *target = &targets[i]; + struct bpf_link *uprobe_link; + + opts.ref_ctr_offset = target->sema_off; + uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, + target->rel_ip, &opts); + err = libbpf_get_error(uprobe_link); + if (err) { + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", + i, usdt_provider, usdt_name, path, err); + goto err_out; + } + + link->uprobes[i].link = uprobe_link; + link->uprobes[i].abs_ip = target->abs_ip; + link->uprobe_cnt++; + } + + elf_end(elf); + close(fd); + + return &link->link; + +err_out: + bpf_link__destroy(&link->link); + + if (elf) + elf_end(elf); + close(fd); + return libbpf_err_ptr(err); +} -- cgit v1.2.3 From 74cc6311cec906daf1d64cefe4922dbf79c416c9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 4 Apr 2022 16:41:58 -0700 Subject: libbpf: Add USDT notes parsing and resolution logic Implement architecture-agnostic parts of USDT parsing logic. The code is the documentation in this case, it's futile to try to succinctly describe how USDT parsing is done in any sort of concreteness. But still, USDTs are recorded in special ELF notes section (.note.stapsdt), where each USDT call site is described separately. Along with USDT provider and USDT name, each such note contains USDT argument specification, which uses assembly-like syntax to describe how to fetch value of USDT argument. USDT arg spec could be just a constant, or a register, or a register dereference (most common cases in x86_64), but it technically can be much more complicated cases, like offset relative to global symbol and stuff like that. One of the later patches will implement most common subset of this for x86 and x86-64 architectures, which seems to handle a lot of real-world production application. USDT arg spec contains a compact encoding allowing usdt.bpf.h from previous patch to handle the above 3 cases. Instead of recording which register might be needed, we encode register's offset within struct pt_regs to simplify BPF-side implementation. USDT argument can be of different byte sizes (1, 2, 4, and 8) and signed or unsigned. To handle this, libbpf pre-calculates necessary bit shifts to do proper casting and sign-extension in a short sequences of left and right shifts. The rest is in the code with sometimes extensive comments and references to external "documentation" for USDTs. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Alan Maguire Reviewed-by: Dave Marchevsky Link: https://lore.kernel.org/bpf/20220404234202.331384-4-andrii@kernel.org --- tools/lib/bpf/usdt.c | 582 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 581 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 781aa1d128f1..f1670e3014ed 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -183,10 +183,56 @@ * gain. */ +#define USDT_BASE_SEC ".stapsdt.base" +#define USDT_SEMA_SEC ".probes" +#define USDT_NOTE_SEC ".note.stapsdt" +#define USDT_NOTE_TYPE 3 +#define USDT_NOTE_NAME "stapsdt" + +/* should match exactly enum __bpf_usdt_arg_type from bpf_usdt.bpf.h */ +enum usdt_arg_type { + USDT_ARG_CONST, + USDT_ARG_REG, + USDT_ARG_REG_DEREF, +}; + +/* should match exactly struct __bpf_usdt_arg_spec from bpf_usdt.bpf.h */ +struct usdt_arg_spec { + __u64 val_off; + enum usdt_arg_type arg_type; + short reg_off; + bool arg_signed; + char arg_bitshift; +}; + +/* should match BPF_USDT_MAX_ARG_CNT in usdt.bpf.h */ +#define USDT_MAX_ARG_CNT 12 + +/* should match struct __bpf_usdt_spec from usdt.bpf.h */ +struct usdt_spec { + struct usdt_arg_spec args[USDT_MAX_ARG_CNT]; + __u64 usdt_cookie; + short arg_cnt; +}; + +struct usdt_note { + const char *provider; + const char *name; + /* USDT args specification string, e.g.: + * "-4@%esi -4@-24(%rbp) -4@%ecx 2@%ax 8@%rdx" + */ + const char *args; + long loc_addr; + long base_addr; + long sema_addr; +}; + struct usdt_target { long abs_ip; long rel_ip; long sema_off; + struct usdt_spec spec; + const char *spec_str; }; struct usdt_manager { @@ -292,11 +338,450 @@ static int sanity_check_usdt_elf(Elf *elf, const char *path) return 0; } +static int find_elf_sec_by_name(Elf *elf, const char *sec_name, GElf_Shdr *shdr, Elf_Scn **scn) +{ + Elf_Scn *sec = NULL; + size_t shstrndx; + + if (elf_getshdrstrndx(elf, &shstrndx)) + return -EINVAL; + + /* check if ELF is corrupted and avoid calling elf_strptr if yes */ + if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) + return -EINVAL; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *name; + + if (!gelf_getshdr(sec, shdr)) + return -EINVAL; + + name = elf_strptr(elf, shstrndx, shdr->sh_name); + if (name && strcmp(sec_name, name) == 0) { + *scn = sec; + return 0; + } + } + + return -ENOENT; +} + +struct elf_seg { + long start; + long end; + long offset; + bool is_exec; +}; + +static int cmp_elf_segs(const void *_a, const void *_b) +{ + const struct elf_seg *a = _a; + const struct elf_seg *b = _b; + + return a->start < b->start ? -1 : 1; +} + +static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, size_t *seg_cnt) +{ + GElf_Phdr phdr; + size_t n; + int i, err; + struct elf_seg *seg; + void *tmp; + + *seg_cnt = 0; + + if (elf_getphdrnum(elf, &n)) { + err = -errno; + return err; + } + + for (i = 0; i < n; i++) { + if (!gelf_getphdr(elf, i, &phdr)) { + err = -errno; + return err; + } + + pr_debug("usdt: discovered PHDR #%d in '%s': vaddr 0x%lx memsz 0x%lx offset 0x%lx type 0x%lx flags 0x%lx\n", + i, path, (long)phdr.p_vaddr, (long)phdr.p_memsz, (long)phdr.p_offset, + (long)phdr.p_type, (long)phdr.p_flags); + if (phdr.p_type != PT_LOAD) + continue; + + tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); + if (!tmp) + return -ENOMEM; + + *segs = tmp; + seg = *segs + *seg_cnt; + (*seg_cnt)++; + + seg->start = phdr.p_vaddr; + seg->end = phdr.p_vaddr + phdr.p_memsz; + seg->offset = phdr.p_offset; + seg->is_exec = phdr.p_flags & PF_X; + } + + if (*seg_cnt == 0) { + pr_warn("usdt: failed to find PT_LOAD program headers in '%s'\n", path); + return -ESRCH; + } + + qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); + return 0; +} + +static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) +{ + char path[PATH_MAX], line[PATH_MAX], mode[16]; + size_t seg_start, seg_end, seg_off; + struct elf_seg *seg; + int tmp_pid, i, err; + FILE *f; + + *seg_cnt = 0; + + /* Handle containerized binaries only accessible from + * /proc//root/. They will be reported as just / in + * /proc//maps. + */ + if (sscanf(lib_path, "/proc/%d/root%s", &tmp_pid, path) == 2 && pid == tmp_pid) + goto proceed; + + if (!realpath(lib_path, path)) { + pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n", + lib_path, -errno); + strcpy(path, lib_path); + } + +proceed: + sprintf(line, "/proc/%d/maps", pid); + f = fopen(line, "r"); + if (!f) { + err = -errno; + pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", + line, lib_path, err); + return err; + } + + /* We need to handle lines with no path at the end: + * + * 7f5c6f5d1000-7f5c6f5d3000 rw-p 001c7000 08:04 21238613 /usr/lib64/libc-2.17.so + * 7f5c6f5d3000-7f5c6f5d8000 rw-p 00000000 00:00 0 + * 7f5c6f5d8000-7f5c6f5d9000 r-xp 00000000 103:01 362990598 /data/users/andriin/linux/tools/bpf/usdt/libhello_usdt.so + */ + while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n", + &seg_start, &seg_end, mode, &seg_off, line) == 5) { + void *tmp; + + /* to handle no path case (see above) we need to capture line + * without skipping any whitespaces. So we need to strip + * leading whitespaces manually here + */ + i = 0; + while (isblank(line[i])) + i++; + if (strcmp(line + i, path) != 0) + continue; + + pr_debug("usdt: discovered segment for lib '%s': addrs %zx-%zx mode %s offset %zx\n", + path, seg_start, seg_end, mode, seg_off); + + /* ignore non-executable sections for shared libs */ + if (mode[2] != 'x') + continue; + + tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs)); + if (!tmp) { + err = -ENOMEM; + goto err_out; + } + + *segs = tmp; + seg = *segs + *seg_cnt; + *seg_cnt += 1; + + seg->start = seg_start; + seg->end = seg_end; + seg->offset = seg_off; + seg->is_exec = true; + } + + if (*seg_cnt == 0) { + pr_warn("usdt: failed to find '%s' (resolved to '%s') within PID %d memory mappings\n", + lib_path, path, pid); + err = -ESRCH; + goto err_out; + } + + qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs); + err = 0; +err_out: + fclose(f); + return err; +} + +static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative) +{ + struct elf_seg *seg; + int i; + + if (relative) { + /* for shared libraries, address is relative offset and thus + * should be fall within logical offset-based range of + * [offset_start, offset_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start)) + return seg; + } + } else { + /* for binaries, address is absolute and thus should be within + * absolute address range of [seg_start, seg_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->start <= addr && addr < seg->end) + return seg; + } + } + + return NULL; +} + +static int parse_usdt_note(Elf *elf, const char *path, long base_addr, + GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, + struct usdt_note *usdt_note); + +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie); + static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, const char *usdt_provider, const char *usdt_name, long usdt_cookie, struct usdt_target **out_targets, size_t *out_target_cnt) { - return -ENOTSUP; + size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; + struct elf_seg *segs = NULL, *lib_segs = NULL; + struct usdt_target *targets = NULL, *target; + long base_addr = 0; + Elf_Scn *notes_scn, *base_scn; + GElf_Shdr base_shdr, notes_shdr; + GElf_Ehdr ehdr; + GElf_Nhdr nhdr; + Elf_Data *data; + int err; + + *out_targets = NULL; + *out_target_cnt = 0; + + err = find_elf_sec_by_name(elf, USDT_NOTE_SEC, ¬es_shdr, ¬es_scn); + if (err) { + pr_warn("usdt: no USDT notes section (%s) found in '%s'\n", USDT_NOTE_SEC, path); + return err; + } + + if (notes_shdr.sh_type != SHT_NOTE || !gelf_getehdr(elf, &ehdr)) { + pr_warn("usdt: invalid USDT notes section (%s) in '%s'\n", USDT_NOTE_SEC, path); + return -EINVAL; + } + + err = parse_elf_segs(elf, path, &segs, &seg_cnt); + if (err) { + pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err); + goto err_out; + } + + /* .stapsdt.base ELF section is optional, but is used for prelink + * offset compensation (see a big comment further below) + */ + if (find_elf_sec_by_name(elf, USDT_BASE_SEC, &base_shdr, &base_scn) == 0) + base_addr = base_shdr.sh_addr; + + data = elf_getdata(notes_scn, 0); + off = 0; + while ((off = gelf_getnote(data, off, &nhdr, &name_off, &desc_off)) > 0) { + long usdt_abs_ip, usdt_rel_ip, usdt_sema_off = 0; + struct usdt_note note; + struct elf_seg *seg = NULL; + void *tmp; + + err = parse_usdt_note(elf, path, base_addr, &nhdr, + data->d_buf, name_off, desc_off, ¬e); + if (err) + goto err_out; + + if (strcmp(note.provider, usdt_provider) != 0 || strcmp(note.name, usdt_name) != 0) + continue; + + /* We need to compensate "prelink effect". See [0] for details, + * relevant parts quoted here: + * + * Each SDT probe also expands into a non-allocated ELF note. You can + * find this by looking at SHT_NOTE sections and decoding the format; + * see below for details. Because the note is non-allocated, it means + * there is no runtime cost, and also preserved in both stripped files + * and .debug files. + * + * However, this means that prelink won't adjust the note's contents + * for address offsets. Instead, this is done via the .stapsdt.base + * section. This is a special section that is added to the text. We + * will only ever have one of these sections in a final link and it + * will only ever be one byte long. Nothing about this section itself + * matters, we just use it as a marker to detect prelink address + * adjustments. + * + * Each probe note records the link-time address of the .stapsdt.base + * section alongside the probe PC address. The decoder compares the + * base address stored in the note with the .stapsdt.base section's + * sh_addr. Initially these are the same, but the section header will + * be adjusted by prelink. So the decoder applies the difference to + * the probe PC address to get the correct prelinked PC address; the + * same adjustment is applied to the semaphore address, if any. + * + * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation + */ + usdt_rel_ip = usdt_abs_ip = note.loc_addr; + if (base_addr) { + usdt_abs_ip += base_addr - note.base_addr; + usdt_rel_ip += base_addr - note.base_addr; + } + + if (ehdr.e_type == ET_EXEC) { + /* When attaching uprobes (which what USDTs basically + * are) kernel expects a relative IP to be specified, + * so if we are attaching to an executable ELF binary + * (i.e., not a shared library), we need to calculate + * proper relative IP based on ELF's load address + */ + seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_abs_ip); + goto err_out; + } + if (!seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + usdt_abs_ip); + goto err_out; + } + + usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset); + } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */ + /* If we don't have BPF cookie support but need to + * attach to a shared library, we'll need to know and + * record absolute addresses of attach points due to + * the need to lookup USDT spec by absolute IP of + * triggered uprobe. Doing this resolution is only + * possible when we have a specific PID of the process + * that's using specified shared library. BPF cookie + * removes the absolute address limitation as we don't + * need to do this lookup (we just use BPF cookie as + * an index of USDT spec), so for newer kernels with + * BPF cookie support libbpf supports USDT attachment + * to shared libraries with no PID filter. + */ + if (pid < 0) { + pr_warn("usdt: attaching to shared libaries without specific PID is not supported on current kernel\n"); + err = -ENOTSUP; + goto err_out; + } + + /* lib_segs are lazily initialized only if necessary */ + if (lib_seg_cnt == 0) { + err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt); + if (err) { + pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", + pid, path, err); + goto err_out; + } + } + + seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_rel_ip); + goto err_out; + } + + usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset); + } + + pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n", + usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", path, + note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args, + seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0); + + /* Adjust semaphore address to be a relative offset */ + if (note.sema_addr) { + if (!man->has_sema_refcnt) { + pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n", + usdt_provider, usdt_name, path); + err = -ENOTSUP; + goto err_out; + } + + seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n", + usdt_provider, usdt_name, path, note.sema_addr); + goto err_out; + } + if (seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx] for semaphore of '%s:%s' at 0x%lx is executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + note.sema_addr); + goto err_out; + } + + usdt_sema_off = note.sema_addr - (seg->start - seg->offset); + + pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n", + usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", + path, note.sema_addr, note.base_addr, usdt_sema_off, + seg->start, seg->end, seg->offset); + } + + /* Record adjusted addresses and offsets and parse USDT spec */ + tmp = libbpf_reallocarray(targets, target_cnt + 1, sizeof(*targets)); + if (!tmp) { + err = -ENOMEM; + goto err_out; + } + targets = tmp; + + target = &targets[target_cnt]; + memset(target, 0, sizeof(*target)); + + target->abs_ip = usdt_abs_ip; + target->rel_ip = usdt_rel_ip; + target->sema_off = usdt_sema_off; + + /* notes->args references strings from Elf itself, so they can + * be referenced safely until elf_end() call + */ + target->spec_str = note.args; + + err = parse_usdt_spec(&target->spec, ¬e, usdt_cookie); + if (err) + goto err_out; + + target_cnt++; + } + + *out_targets = targets; + *out_target_cnt = target_cnt; + err = target_cnt; + +err_out: + free(segs); + free(lib_segs); + if (err < 0) + free(targets); + return err; } struct bpf_link_usdt { @@ -414,6 +899,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct link->uprobe_cnt++; } + free(targets); elf_end(elf); close(fd); @@ -422,8 +908,102 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct err_out: bpf_link__destroy(&link->link); + free(targets); if (elf) elf_end(elf); close(fd); return libbpf_err_ptr(err); } + +/* Parse out USDT ELF note from '.note.stapsdt' section. + * Logic inspired by perf's code. + */ +static int parse_usdt_note(Elf *elf, const char *path, long base_addr, + GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, + struct usdt_note *note) +{ + const char *provider, *name, *args; + long addrs[3]; + size_t len; + + /* sanity check USDT note name and type first */ + if (strncmp(data + name_off, USDT_NOTE_NAME, nhdr->n_namesz) != 0) + return -EINVAL; + if (nhdr->n_type != USDT_NOTE_TYPE) + return -EINVAL; + + /* sanity check USDT note contents ("description" in ELF terminology) */ + len = nhdr->n_descsz; + data = data + desc_off; + + /* +3 is the very minimum required to store three empty strings */ + if (len < sizeof(addrs) + 3) + return -EINVAL; + + /* get location, base, and semaphore addrs */ + memcpy(&addrs, data, sizeof(addrs)); + + /* parse string fields: provider, name, args */ + provider = data + sizeof(addrs); + + name = (const char *)memchr(provider, '\0', data + len - provider); + if (!name) /* non-zero-terminated provider */ + return -EINVAL; + name++; + if (name >= data + len || *name == '\0') /* missing or empty name */ + return -EINVAL; + + args = memchr(name, '\0', data + len - name); + if (!args) /* non-zero-terminated name */ + return -EINVAL; + ++args; + if (args >= data + len) /* missing arguments spec */ + return -EINVAL; + + note->provider = provider; + note->name = name; + if (*args == '\0' || *args == ':') + note->args = ""; + else + note->args = args; + note->loc_addr = addrs[0]; + note->base_addr = addrs[1]; + note->sema_addr = addrs[2]; + + return 0; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg); + +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie) +{ + const char *s; + int len; + + spec->usdt_cookie = usdt_cookie; + spec->arg_cnt = 0; + + s = note->args; + while (s[0]) { + if (spec->arg_cnt >= USDT_MAX_ARG_CNT) { + pr_warn("usdt: too many USDT arguments (> %d) for '%s:%s' with args spec '%s'\n", + USDT_MAX_ARG_CNT, note->provider, note->name, note->args); + return -E2BIG; + } + + len = parse_usdt_arg(s, spec->arg_cnt, &spec->args[spec->arg_cnt]); + if (len < 0) + return len; + + s += len; + spec->arg_cnt++; + } + + return 0; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n"); + return -ENOTSUP; +} -- cgit v1.2.3 From 999783c8bbda2e82390cb8c39ed9e3954cf51b82 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 4 Apr 2022 16:41:59 -0700 Subject: libbpf: Wire up spec management and other arch-independent USDT logic Last part of architecture-agnostic user-space USDT handling logic is to set up BPF spec and, optionally, IP-to-ID maps from user-space. usdt_manager performs a compact spec ID allocation to utilize fixed-sized BPF maps as efficiently as possible. We also use hashmap to deduplicate USDT arg spec strings and map identical strings to single USDT spec, minimizing the necessary BPF map size. usdt_manager supports arbitrary sequences of attachment and detachment, both of the same USDT and multiple different USDTs and internally maintains a free list of unused spec IDs. bpf_link_usdt's logic is extended with proper setup and teardown of this spec ID free list and supporting BPF maps. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Alan Maguire Reviewed-by: Dave Marchevsky Link: https://lore.kernel.org/bpf/20220404234202.331384-5-andrii@kernel.org --- tools/lib/bpf/usdt.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 167 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index f1670e3014ed..2799387c5465 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -239,6 +239,10 @@ struct usdt_manager { struct bpf_map *specs_map; struct bpf_map *ip_to_spec_id_map; + int *free_spec_ids; + size_t free_spec_cnt; + size_t next_free_spec_id; + bool has_bpf_cookie; bool has_sema_refcnt; }; @@ -283,6 +287,7 @@ void usdt_manager_free(struct usdt_manager *man) if (IS_ERR_OR_NULL(man)) return; + free(man->free_spec_ids); free(man); } @@ -789,6 +794,9 @@ struct bpf_link_usdt { struct usdt_manager *usdt_man; + size_t spec_cnt; + int *spec_ids; + size_t uprobe_cnt; struct { long abs_ip; @@ -799,11 +807,52 @@ struct bpf_link_usdt { static int bpf_link_usdt_detach(struct bpf_link *link) { struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + struct usdt_manager *man = usdt_link->usdt_man; int i; for (i = 0; i < usdt_link->uprobe_cnt; i++) { /* detach underlying uprobe link */ bpf_link__destroy(usdt_link->uprobes[i].link); + /* there is no need to update specs map because it will be + * unconditionally overwritten on subsequent USDT attaches, + * but if BPF cookies are not used we need to remove entry + * from ip_to_spec_id map, otherwise we'll run into false + * conflicting IP errors + */ + if (!man->has_bpf_cookie) { + /* not much we can do about errors here */ + (void)bpf_map_delete_elem(bpf_map__fd(man->ip_to_spec_id_map), + &usdt_link->uprobes[i].abs_ip); + } + } + + /* try to return the list of previously used spec IDs to usdt_manager + * for future reuse for subsequent USDT attaches + */ + if (!man->free_spec_ids) { + /* if there were no free spec IDs yet, just transfer our IDs */ + man->free_spec_ids = usdt_link->spec_ids; + man->free_spec_cnt = usdt_link->spec_cnt; + usdt_link->spec_ids = NULL; + } else { + /* otherwise concat IDs */ + size_t new_cnt = man->free_spec_cnt + usdt_link->spec_cnt; + int *new_free_ids; + + new_free_ids = libbpf_reallocarray(man->free_spec_ids, new_cnt, + sizeof(*new_free_ids)); + /* If we couldn't resize free_spec_ids, we'll just leak + * a bunch of free IDs; this is very unlikely to happen and if + * system is so exausted on memory, it's the least of user's + * concerns, probably. + * So just do our best here to return those IDs to usdt_manager. + */ + if (new_free_ids) { + memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids, + usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids)); + man->free_spec_ids = new_free_ids; + man->free_spec_cnt = new_cnt; + } } return 0; @@ -813,22 +862,96 @@ static void bpf_link_usdt_dealloc(struct bpf_link *link) { struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link); + free(usdt_link->spec_ids); free(usdt_link->uprobes); free(usdt_link); } +static size_t specs_hash_fn(const void *key, void *ctx) +{ + const char *s = key; + + return str_hash(s); +} + +static bool specs_equal_fn(const void *key1, const void *key2, void *ctx) +{ + const char *s1 = key1; + const char *s2 = key2; + + return strcmp(s1, s2) == 0; +} + +static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash, + struct bpf_link_usdt *link, struct usdt_target *target, + int *spec_id, bool *is_new) +{ + void *tmp; + int err; + + /* check if we already allocated spec ID for this spec string */ + if (hashmap__find(specs_hash, target->spec_str, &tmp)) { + *spec_id = (long)tmp; + *is_new = false; + return 0; + } + + /* otherwise it's a new ID that needs to be set up in specs map and + * returned back to usdt_manager when USDT link is detached + */ + tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids)); + if (!tmp) + return -ENOMEM; + link->spec_ids = tmp; + + /* get next free spec ID, giving preference to free list, if not empty */ + if (man->free_spec_cnt) { + *spec_id = man->free_spec_ids[man->free_spec_cnt - 1]; + + /* cache spec ID for current spec string for future lookups */ + err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); + if (err) + return err; + + man->free_spec_cnt--; + } else { + /* don't allocate spec ID bigger than what fits in specs map */ + if (man->next_free_spec_id >= bpf_map__max_entries(man->specs_map)) + return -E2BIG; + + *spec_id = man->next_free_spec_id; + + /* cache spec ID for current spec string for future lookups */ + err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id); + if (err) + return err; + + man->next_free_spec_id++; + } + + /* remember new spec ID in the link for later return back to free list on detach */ + link->spec_ids[link->spec_cnt] = *spec_id; + link->spec_cnt++; + *is_new = true; + return 0; +} + struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, pid_t pid, const char *path, const char *usdt_provider, const char *usdt_name, long usdt_cookie) { + int i, fd, err, spec_map_fd, ip_map_fd; LIBBPF_OPTS(bpf_uprobe_opts, opts); + struct hashmap *specs_hash = NULL; struct bpf_link_usdt *link = NULL; struct usdt_target *targets = NULL; size_t target_cnt; - int i, fd, err; Elf *elf; + spec_map_fd = bpf_map__fd(man->specs_map); + ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); + /* TODO: perform path resolution similar to uprobe's */ fd = open(path, O_RDONLY); if (fd < 0) { @@ -864,6 +987,12 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct goto err_out; } + specs_hash = hashmap__new(specs_hash_fn, specs_equal_fn, NULL); + if (IS_ERR(specs_hash)) { + err = PTR_ERR(specs_hash); + goto err_out; + } + link = calloc(1, sizeof(*link)); if (!link) { err = -ENOMEM; @@ -883,8 +1012,43 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct for (i = 0; i < target_cnt; i++) { struct usdt_target *target = &targets[i]; struct bpf_link *uprobe_link; + bool is_new; + int spec_id; + + /* Spec ID can be either reused or newly allocated. If it is + * newly allocated, we'll need to fill out spec map, otherwise + * entire spec should be valid and can be just used by a new + * uprobe. We reuse spec when USDT arg spec is identical. We + * also never share specs between two different USDT + * attachments ("links"), so all the reused specs already + * share USDT cookie value implicitly. + */ + err = allocate_spec_id(man, specs_hash, link, target, &spec_id, &is_new); + if (err) + goto err_out; + + if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) { + err = -errno; + pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n", + spec_id, usdt_provider, usdt_name, path, err); + goto err_out; + } + if (!man->has_bpf_cookie && + bpf_map_update_elem(ip_map_fd, &target->abs_ip, &spec_id, BPF_NOEXIST)) { + err = -errno; + if (err == -EEXIST) { + pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n", + spec_id, usdt_provider, usdt_name, path); + } else { + pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n", + target->abs_ip, spec_id, usdt_provider, usdt_name, + path, err); + } + goto err_out; + } opts.ref_ctr_offset = target->sema_off; + opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, target->rel_ip, &opts); err = libbpf_get_error(uprobe_link); @@ -900,6 +1064,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct } free(targets); + hashmap__free(specs_hash); elf_end(elf); close(fd); @@ -909,6 +1074,7 @@ err_out: bpf_link__destroy(&link->link); free(targets); + hashmap__free(specs_hash); if (elf) elf_end(elf); close(fd); -- cgit v1.2.3 From 4c59e584d1581b1bca143dda83d5c3e5baddbf20 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 4 Apr 2022 16:42:00 -0700 Subject: libbpf: Add x86-specific USDT arg spec parsing logic Add x86/x86_64-specific USDT argument specification parsing. Each architecture will require their own logic, as all this is arch-specific assembly-based notation. Architectures that libbpf doesn't support for USDTs will pr_warn() with specific error and return -ENOTSUP. We use sscanf() as a very powerful and easy to use string parser. Those spaces in sscanf's format string mean "skip any whitespaces", which is pretty nifty (and somewhat little known) feature. All this was tested on little-endian architecture, so bit shifts are probably off on big-endian, which our CI will hopefully prove. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Reviewed-by: Alan Maguire Reviewed-by: Dave Marchevsky Link: https://lore.kernel.org/bpf/20220404234202.331384-6-andrii@kernel.org --- tools/lib/bpf/usdt.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 2799387c5465..1bce2eab5e89 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -1168,8 +1168,113 @@ static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, return 0; } +/* Architecture-specific logic for parsing USDT argument location specs */ + +#if defined(__x86_64__) || defined(__i386__) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *names[4]; + size_t pt_regs_off; + } reg_map[] = { +#if __x86_64__ +#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg64) +#else +#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg32) +#endif + { {"rip", "eip", "", ""}, reg_off(rip, eip) }, + { {"rax", "eax", "ax", "al"}, reg_off(rax, eax) }, + { {"rbx", "ebx", "bx", "bl"}, reg_off(rbx, ebx) }, + { {"rcx", "ecx", "cx", "cl"}, reg_off(rcx, ecx) }, + { {"rdx", "edx", "dx", "dl"}, reg_off(rdx, edx) }, + { {"rsi", "esi", "si", "sil"}, reg_off(rsi, esi) }, + { {"rdi", "edi", "di", "dil"}, reg_off(rdi, edi) }, + { {"rbp", "ebp", "bp", "bpl"}, reg_off(rbp, ebp) }, + { {"rsp", "esp", "sp", "spl"}, reg_off(rsp, esp) }, +#undef reg_off +#if __x86_64__ + { {"r8", "r8d", "r8w", "r8b"}, offsetof(struct pt_regs, r8) }, + { {"r9", "r9d", "r9w", "r9b"}, offsetof(struct pt_regs, r9) }, + { {"r10", "r10d", "r10w", "r10b"}, offsetof(struct pt_regs, r10) }, + { {"r11", "r11d", "r11w", "r11b"}, offsetof(struct pt_regs, r11) }, + { {"r12", "r12d", "r12w", "r12b"}, offsetof(struct pt_regs, r12) }, + { {"r13", "r13d", "r13w", "r13b"}, offsetof(struct pt_regs, r13) }, + { {"r14", "r14d", "r14w", "r14b"}, offsetof(struct pt_regs, r14) }, + { {"r15", "r15d", "r15w", "r15b"}, offsetof(struct pt_regs, r15) }, +#endif + }; + int i, j; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + for (j = 0; j < ARRAY_SIZE(reg_map[i].names); j++) { + if (strcmp(reg_name, reg_map[i].names[j]) == 0) + return reg_map[i].pt_regs_off; + } + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + /* Memory dereference case, e.g., -4@-20(%rbp) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -4@%eax */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ $%ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@$71 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + +#else + static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) { pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n"); return -ENOTSUP; } + +#endif -- cgit v1.2.3 From a8d600f6bcd453f1807703b5a016212f5484ffa1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 6 Apr 2022 09:08:35 +0100 Subject: libbpf: Fix spelling mistake "libaries" -> "libraries" There is a spelling mistake in a pr_warn message. Fix it. Signed-off-by: Colin Ian King Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220406080835.14879-1-colin.i.king@gmail.com --- tools/lib/bpf/usdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 1bce2eab5e89..c5acf2824fcc 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -687,7 +687,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * * to shared libraries with no PID filter. */ if (pid < 0) { - pr_warn("usdt: attaching to shared libaries without specific PID is not supported on current kernel\n"); + pr_warn("usdt: attaching to shared libraries without specific PID is not supported on current kernel\n"); err = -ENOTSUP; goto err_out; } -- cgit v1.2.3 From a1c9d61b19cbc0b9618c0a0400c304ecb63221d5 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 6 Apr 2022 12:43:49 +0100 Subject: libbpf: Improve library identification for uprobe binary path resolution In the process of doing path resolution for uprobe attach, libraries are identified by matching a ".so" substring in the binary_path. This matches a lot of patterns that do not conform to library.so[.version] format, so instead match a ".so" _suffix_, and if that fails match a ".so." substring for the versioned library case. Suggested-by: Andrii Nakryiko Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1649245431-29956-2-git-send-email-alan.maguire@oracle.com --- tools/lib/bpf/libbpf.c | 2 +- tools/lib/bpf/libbpf_internal.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1111e9d16e01..c92226a150d0 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10766,7 +10766,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz) const char *search_paths[3] = {}; int i; - if (strstr(file, ".so")) { + if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { search_paths[0] = getenv("LD_LIBRARY_PATH"); search_paths[1] = "/usr/lib64:/usr/lib"; search_paths[2] = arch_specific_lib_paths(); diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index dd0d4ccfa649..080272421f6c 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -103,6 +103,17 @@ #define str_has_pfx(str, pfx) \ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) +/* suffix check */ +static inline bool str_has_sfx(const char *str, const char *sfx) +{ + size_t str_len = strlen(str); + size_t sfx_len = strlen(sfx); + + if (sfx_len <= str_len) + return strcmp(str + str_len - sfx_len, sfx); + return false; +} + /* Symbol versioning is different between static and shared library. * Properly versioned symbols are needed for shared library, but * only the symbol of the new version is needed for static library. -- cgit v1.2.3 From 90db26e6be01cea519d380c59db3491e75b96b7f Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 6 Apr 2022 12:43:50 +0100 Subject: libbpf: Improve string parsing for uprobe auto-attach For uprobe auto-attach, the parsing can be simplified for the SEC() name to a single sscanf(); the return value of the sscanf can then be used to distinguish between sections that simply specify "u[ret]probe" (and thus cannot auto-attach), those that specify "u[ret]probe/binary_path:function+offset" etc. Suggested-by: Andrii Nakryiko Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1649245431-29956-3-git-send-email-alan.maguire@oracle.com --- tools/lib/bpf/libbpf.c | 81 ++++++++++++++++++++------------------------------ 1 file changed, 33 insertions(+), 48 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index c92226a150d0..016ecdd1c3e1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10913,60 +10913,45 @@ err_out: static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); - char *func, *probe_name, *func_end; - char *func_name, binary_path[512]; - unsigned long long raw_offset; - size_t offset = 0; - int n; + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; + int n, ret = -EINVAL; + long offset = 0; *link = NULL; - opts.retprobe = str_has_pfx(prog->sec_name, "uretprobe"); - if (opts.retprobe) - probe_name = prog->sec_name + sizeof("uretprobe") - 1; - else - probe_name = prog->sec_name + sizeof("uprobe") - 1; - if (probe_name[0] == '/') - probe_name++; - - /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ - if (strlen(probe_name) == 0) - return 0; - - snprintf(binary_path, sizeof(binary_path), "%s", probe_name); - /* ':' should be prior to function+offset */ - func_name = strrchr(binary_path, ':'); - if (!func_name) { - pr_warn("section '%s' missing ':function[+offset]' specification\n", + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", + &probe_type, &binary_path, &func_name, &offset); + switch (n) { + case 1: + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + ret = 0; + break; + case 2: + pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n", + prog->name, prog->sec_name); + break; + case 3: + case 4: + opts.retprobe = strcmp(probe_type, "uretprobe") == 0; + if (opts.retprobe && offset != 0) { + pr_warn("prog '%s': uretprobes do not support offset specification\n", + prog->name); + break; + } + opts.func_name = func_name; + *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); + ret = libbpf_get_error(*link); + break; + default: + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, prog->sec_name); - return -EINVAL; - } - func_name[0] = '\0'; - func_name++; - n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); - if (n < 1) { - pr_warn("uprobe name '%s' is invalid\n", func_name); - return -EINVAL; - } - if (opts.retprobe && offset != 0) { - free(func); - pr_warn("uretprobes do not support offset specification\n"); - return -EINVAL; - } - - /* Is func a raw address? */ - errno = 0; - raw_offset = strtoull(func, &func_end, 0); - if (!errno && !*func_end) { - free(func); - func = NULL; - offset = (size_t)raw_offset; + break; } - opts.func_name = func; + free(probe_type); + free(binary_path); + free(func_name); - *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); - free(func); - return libbpf_get_error(*link); + return ret; } struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, -- cgit v1.2.3 From e58c5c9717460851047f63b8615ea0760a6f3a2e Mon Sep 17 00:00:00 2001 From: Haowen Bai Date: Thu, 7 Apr 2022 10:38:17 +0800 Subject: libbpf: Potential NULL dereference in usdt_manager_attach_usdt() link could be null but still dereference bpf_link__destroy(&link->link) and it will lead to a null pointer access. Signed-off-by: Haowen Bai Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1649299098-2069-1-git-send-email-baihaowen@meizu.com --- tools/lib/bpf/usdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index c5acf2824fcc..bb1e88613343 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -1071,8 +1071,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct return &link->link; err_out: - bpf_link__destroy(&link->link); - + if (link) + bpf_link__destroy(&link->link); free(targets); hashmap__free(specs_hash); if (elf) -- cgit v1.2.3 From ded6dffaed5edc68f1e64b523353da14db673460 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 7 Apr 2022 13:38:42 -0700 Subject: libbpf: Fix use #ifdef instead of #if to avoid compiler warning As reported by Naresh: perf build errors on i386 [1] on Linux next-20220407 [2] usdt.c:1181:5: error: "__x86_64__" is not defined, evaluates to 0 [-Werror=undef] 1181 | #if __x86_64__ | ^~~~~~~~~~ usdt.c:1196:5: error: "__x86_64__" is not defined, evaluates to 0 [-Werror=undef] 1196 | #if __x86_64__ | ^~~~~~~~~~ cc1: all warnings being treated as errors Use #ifdef instead of #if to avoid this. Fixes: 4c59e584d158 ("libbpf: Add x86-specific USDT arg spec parsing logic") Reported-by: Naresh Kamboju Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220407203842.3019904-1-andrii@kernel.org --- tools/lib/bpf/usdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index bb1e88613343..b699e720136a 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -1178,7 +1178,7 @@ static int calc_pt_regs_off(const char *reg_name) const char *names[4]; size_t pt_regs_off; } reg_map[] = { -#if __x86_64__ +#ifdef __x86_64__ #define reg_off(reg64, reg32) offsetof(struct pt_regs, reg64) #else #define reg_off(reg64, reg32) offsetof(struct pt_regs, reg32) @@ -1193,7 +1193,7 @@ static int calc_pt_regs_off(const char *reg_name) { {"rbp", "ebp", "bp", "bpl"}, reg_off(rbp, ebp) }, { {"rsp", "esp", "sp", "spl"}, reg_off(rsp, esp) }, #undef reg_off -#if __x86_64__ +#ifdef __x86_64__ { {"r8", "r8d", "r8w", "r8b"}, offsetof(struct pt_regs, r8) }, { {"r9", "r9d", "r9w", "r9b"}, offsetof(struct pt_regs, r9) }, { {"r10", "r10d", "r10w", "r10b"}, offsetof(struct pt_regs, r10) }, -- cgit v1.2.3 From e1b6df598aa86e351437135c76ed38c9a5e3d397 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 7 Apr 2022 23:44:09 +0200 Subject: libbpf: Minor style improvements in USDT code Fix several typos and references to non-existing headers. Also use __BYTE_ORDER__ instead of __BYTE_ORDER for consistency with the rest of the bpf code - see commit 45f2bebc8079 ("libbpf: Fix endianness detection in BPF_CORE_READ_BITFIELD_PROBED()") for rationale). Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220407214411.257260-2-iii@linux.ibm.com --- tools/lib/bpf/usdt.bpf.h | 4 ++-- tools/lib/bpf/usdt.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 60237acf6b02..420d743734e1 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -166,7 +166,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) case BPF_USDT_ARG_REG_DEREF: /* Arg is in memory addressed by register, plus some offset * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is - * identified lik with BPF_USDT_ARG_REG case, and the offset + * identified like with BPF_USDT_ARG_REG case, and the offset * is in arg_spec->val_off. We first fetch register contents * from pt_regs, then do another user-space probe read to * fetch argument value itself. @@ -198,7 +198,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) /* Retrieve user-specified cookie value provided during attach as * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself - * utilizaing BPF cookies internally, so user can't use BPF cookie directly + * utilizing BPF cookies internally, so user can't use BPF cookie directly * for USDT programs and has to use bpf_usdt_cookie() API instead. */ static inline __noinline diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index b699e720136a..3dcb79f1e3a7 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -108,7 +108,7 @@ * code through spec map. This allows BPF applications to quickly fetch the * actual value at runtime using a simple BPF-side code. * - * With basics out of the way, let's go over less immeditately obvious aspects + * With basics out of the way, let's go over less immediately obvious aspects * of supporting USDTs. * * First, there is no special USDT BPF program type. It is actually just @@ -189,14 +189,14 @@ #define USDT_NOTE_TYPE 3 #define USDT_NOTE_NAME "stapsdt" -/* should match exactly enum __bpf_usdt_arg_type from bpf_usdt.bpf.h */ +/* should match exactly enum __bpf_usdt_arg_type from usdt.bpf.h */ enum usdt_arg_type { USDT_ARG_CONST, USDT_ARG_REG, USDT_ARG_REG_DEREF, }; -/* should match exactly struct __bpf_usdt_arg_spec from bpf_usdt.bpf.h */ +/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */ struct usdt_arg_spec { __u64 val_off; enum usdt_arg_type arg_type; @@ -328,9 +328,9 @@ static int sanity_check_usdt_elf(Elf *elf, const char *path) return -EBADF; } -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ endianness = ELFDATA2LSB; -#elif __BYTE_ORDER == __BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ endianness = ELFDATA2MSB; #else # error "Unrecognized __BYTE_ORDER__" @@ -843,7 +843,7 @@ static int bpf_link_usdt_detach(struct bpf_link *link) sizeof(*new_free_ids)); /* If we couldn't resize free_spec_ids, we'll just leak * a bunch of free IDs; this is very unlikely to happen and if - * system is so exausted on memory, it's the least of user's + * system is so exhausted on memory, it's the least of user's * concerns, probably. * So just do our best here to return those IDs to usdt_manager. */ -- cgit v1.2.3 From 6f403d9d530635f533577d37929c61474d6c5d7f Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 7 Apr 2022 23:44:10 +0200 Subject: libbpf: Make BPF-side of USDT support work on big-endian machines BPF_USDT_ARG_REG_DEREF handling always reads 8 bytes, regardless of the actual argument size. On little-endian the relevant argument bits end up in the lower bits of val, and later on the code that handles all the argument types expects them to be there. On big-endian they end up in the upper bits of val, breaking that expectation. Fix by right-shifting val on big-endian. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220407214411.257260-3-iii@linux.ibm.com --- tools/lib/bpf/usdt.bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 420d743734e1..881a2422a8ef 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -177,6 +177,9 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off); if (err) return err; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + val >>= arg_spec->arg_bitshift; +#endif break; default: return -EINVAL; -- cgit v1.2.3 From bd022685bd441056365e9a44a6bf940f45054250 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 7 Apr 2022 23:44:11 +0200 Subject: libbpf: Add s390-specific USDT arg spec parsing logic The logic is superficially similar to that of x86, but the small differences (no need for register table and dynamic allocation of register names, no $ sign before constants) make maintaining a common implementation too burdensome. Therefore simply add a s390x-specific version of parse_usdt_arg(). Note that while bcc supports index registers, this patch does not. This should not be a problem in most cases, since s390 uses a default value "nor" for STAP_SDT_ARG_CONSTRAINT. Signed-off-by: Ilya Leoshkevich Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220407214411.257260-4-iii@linux.ibm.com --- tools/lib/bpf/usdt.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 3dcb79f1e3a7..30c495a6554c 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -1269,6 +1269,61 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return len; } +#elif defined(__s390x__) + +/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */ + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + unsigned int reg; + int arg_sz, len; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", &arg_sz, &off, ®, &len) == 3) { + /* Memory dereference case, e.g., -2@-28(%r15) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + if (reg > 15) { + pr_warn("usdt: unrecognized register '%%r%u'\n", reg); + return -EINVAL; + } + arg->reg_off = offsetof(user_pt_regs, gprs[reg]); + } else if (sscanf(arg_str, " %d @ %%r%u %n", &arg_sz, ®, &len) == 2) { + /* Register read case, e.g., -8@%r0 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + if (reg > 15) { + pr_warn("usdt: unrecognized register '%%r%u'\n", reg); + return -EINVAL; + } + arg->reg_off = offsetof(user_pt_regs, gprs[reg]); + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@71 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + #else static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) -- cgit v1.2.3 From 3c0dfe6e4c43ea0cf252ff4cb7a332423866d488 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 7 Apr 2022 16:04:45 -0700 Subject: libbpf: Use strlcpy() in path resolution fallback logic Coverity static analyzer complains that strcpy() can cause buffer overflow. Use libbpf_strlcpy() instead to be 100% sure this doesn't happen. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220407230446.3980075-1-andrii@kernel.org --- tools/lib/bpf/usdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 30c495a6554c..acf2d99a9e77 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -456,7 +456,7 @@ static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, if (!realpath(lib_path, path)) { pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n", lib_path, -errno); - strcpy(path, lib_path); + libbpf_strlcpy(path, lib_path, sizeof(path)); } proceed: -- cgit v1.2.3 From 3a06ec0a996dc8c4bc518f0b6bedc3587dd15169 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 7 Apr 2022 16:04:46 -0700 Subject: libbpf: Allow WEAK and GLOBAL bindings during BTF fixup During BTF fix up for global variables, global variable can be global weak and will have STB_WEAK binding in ELF. Support such global variables in addition to non-weak ones. This is not the problem when using BPF static linking, as BPF static linker "fixes up" BTF during generation so that libbpf doesn't have to do it anymore during bpf_object__open(), which led to this not being noticed for a while, along with a pretty rare (currently) use of __weak variables and maps. Reported-by: Hengqi Chen Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220407230446.3980075-2-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 016ecdd1c3e1..9deb1fc67f19 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1401,8 +1401,11 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) { Elf64_Sym *sym = elf_sym_by_idx(obj, si); - if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL || - ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT) + continue; + + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && + ELF64_ST_BIND(sym->st_info) != STB_WEAK) continue; sname = elf_sym_str(obj, sym->st_name); -- cgit v1.2.3 From e89d57d938c8fa80c457982154ed6110804814fe Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Apr 2022 11:14:23 -0700 Subject: libbpf: Don't error out on CO-RE relos for overriden weak subprogs During BPF static linking, all the ELF relocations and .BTF.ext information (including CO-RE relocations) are preserved for __weak subprograms that were logically overriden by either previous weak subprogram instance or by corresponding "strong" (non-weak) subprogram. This is just how native user-space linkers work, nothing new. But libbpf is over-zealous when processing CO-RE relocation to error out when CO-RE relocation belonging to such eliminated weak subprogram is encountered. Instead of erroring out on this expected situation, log debug-level message and skip the relocation. Fixes: db2b8b06423c ("libbpf: Support CO-RE relocations for multi-prog sections") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220408181425.2287230-2-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9deb1fc67f19..465b7c0996f1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5687,10 +5687,17 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) insn_idx = rec->insn_off / BPF_INSN_SZ; prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { - pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n", - sec_name, insn_idx, i); - err = -EINVAL; - goto out; + /* When __weak subprog is "overridden" by another instance + * of the subprog from a different object file, linker still + * appends all the .BTF.ext info that used to belong to that + * eliminated subprogram. + * This is similar to what x86-64 linker does for relocations. + * So just ignore such relocations just like we ignore + * subprog instructions when discovering subprograms. + */ + pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n", + sec_name, i, insn_idx); + continue; } /* no need to apply CO-RE relocation if the program is * not going to be loaded -- cgit v1.2.3 From 2fa5b0f290e19bb34393e1983be511aab18b683e Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Apr 2022 11:14:24 -0700 Subject: libbpf: Use weak hidden modifier for USDT BPF-side API functions Use __weak __hidden for bpf_usdt_xxx() APIs instead of much more confusing `static inline __noinline`. This was previously impossible due to libbpf erroring out on CO-RE relocations pointing to eliminated weak subprogs. Now that previous patch fixed this issue, switch back to __weak __hidden as it's a more direct way of specifying the desired behavior. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220408181425.2287230-3-andrii@kernel.org --- tools/lib/bpf/usdt.bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 881a2422a8ef..4181fddb3687 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -103,7 +103,7 @@ int __bpf_usdt_spec_id(struct pt_regs *ctx) } /* Return number of USDT arguments defined for currently traced USDT. */ -static inline __noinline +__weak __hidden int bpf_usdt_arg_cnt(struct pt_regs *ctx) { struct __bpf_usdt_spec *spec; @@ -124,7 +124,7 @@ int bpf_usdt_arg_cnt(struct pt_regs *ctx) * Returns 0 on success; negative error, otherwise. * On error *res is guaranteed to be set to zero. */ -static inline __noinline +__weak __hidden int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) { struct __bpf_usdt_spec *spec; @@ -204,7 +204,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) * utilizing BPF cookies internally, so user can't use BPF cookie directly * for USDT programs and has to use bpf_usdt_cookie() API instead. */ -static inline __noinline +__weak __hidden long bpf_usdt_cookie(struct pt_regs *ctx) { struct __bpf_usdt_spec *spec; -- cgit v1.2.3 From 0738599856542bab0ebcd73cab9d8f15bddedcee Mon Sep 17 00:00:00 2001 From: Vladimir Isaev Date: Sat, 9 Apr 2022 01:44:42 +0300 Subject: libbpf: Add ARC support to bpf_tracing.h Add PT_REGS macros suitable for ARCompact and ARCv2. Signed-off-by: Vladimir Isaev Signed-off-by: Sergey Matyukevich Signed-off-by: Andrii Nakryiko Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20220408224442.599566-1-geomatsi@gmail.com --- tools/include/uapi/asm/bpf_perf_event.h | 2 ++ tools/lib/bpf/bpf_tracing.h | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'tools/lib') diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h index 39acc149d843..d7dfeab0d71a 100644 --- a/tools/include/uapi/asm/bpf_perf_event.h +++ b/tools/include/uapi/asm/bpf_perf_event.h @@ -1,5 +1,7 @@ #if defined(__aarch64__) #include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h" +#elif defined(__arc__) +#include "../../arch/arc/include/uapi/asm/bpf_perf_event.h" #elif defined(__s390__) #include "../../arch/s390/include/uapi/asm/bpf_perf_event.h" #elif defined(__riscv) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index e3a8c947e89f..01ce121c302d 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -27,6 +27,9 @@ #elif defined(__TARGET_ARCH_riscv) #define bpf_target_riscv #define bpf_target_defined +#elif defined(__TARGET_ARCH_arc) + #define bpf_target_arc + #define bpf_target_defined #else /* Fall back to what the compiler says */ @@ -54,6 +57,9 @@ #elif defined(__riscv) && __riscv_xlen == 64 #define bpf_target_riscv #define bpf_target_defined +#elif defined(__arc__) + #define bpf_target_arc + #define bpf_target_defined #endif /* no compiler target */ #endif @@ -233,6 +239,23 @@ struct pt_regs___arm64 { /* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ #define PT_REGS_SYSCALL_REGS(ctx) ctx +#elif defined(bpf_target_arc) + +/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */ +#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) +#define __PT_PARM1_REG scratch.r0 +#define __PT_PARM2_REG scratch.r1 +#define __PT_PARM3_REG scratch.r2 +#define __PT_PARM4_REG scratch.r3 +#define __PT_PARM5_REG scratch.r4 +#define __PT_RET_REG scratch.blink +#define __PT_FP_REG __unsupported__ +#define __PT_RC_REG scratch.r0 +#define __PT_SP_REG scratch.sp +#define __PT_IP_REG scratch.ret +/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */ +#define PT_REGS_SYSCALL_REGS(ctx) ctx + #endif #if defined(bpf_target_defined) -- cgit v1.2.3 From d252a4a499a07bec21c65873f605c3a1ef52ffed Mon Sep 17 00:00:00 2001 From: Runqing Yang Date: Sat, 9 Apr 2022 22:49:28 +0800 Subject: libbpf: Fix a bug with checking bpf_probe_read_kernel() support in old kernels Background: Libbpf automatically replaces calls to BPF bpf_probe_read_{kernel,user} [_str]() helpers with bpf_probe_read[_str](), if libbpf detects that kernel doesn't support new APIs. Specifically, libbpf invokes the probe_kern_probe_read_kernel function to load a small eBPF program into the kernel in which bpf_probe_read_kernel API is invoked and lets the kernel checks whether the new API is valid. If the loading fails, libbpf considers the new API invalid and replaces it with the old API. static int probe_kern_probe_read_kernel(void) { struct bpf_insn insns[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }; int fd, insn_cnt = ARRAY_SIZE(insns); fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); return probe_fd(fd); } Bug: On older kernel versions [0], the kernel checks whether the version number provided in the bpf syscall, matches the LINUX_VERSION_CODE. If not matched, the bpf syscall fails. eBPF However, the probe_kern_probe_read_kernel code does not set the kernel version number provided to the bpf syscall, which causes the loading process alwasys fails for old versions. It means that libbpf will replace the new API with the old one even the kernel supports the new one. Solution: After a discussion in [1], the solution is using BPF_PROG_TYPE_TRACEPOINT program type instead of BPF_PROG_TYPE_KPROBE because kernel does not enfoce version check for tracepoint programs. I test the patch in old kernels (4.18 and 4.19) and it works well. [0] https://elixir.bootlin.com/linux/v4.19/source/kernel/bpf/syscall.c#L1360 [1] Closes: https://github.com/libbpf/libbpf/issues/473 Signed-off-by: Runqing Yang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220409144928.27499-1-rainkin1993@gmail.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 465b7c0996f1..bf4f7ac54ebf 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4594,7 +4594,7 @@ static int probe_kern_probe_read_kernel(void) }; int fd, insn_cnt = ARRAY_SIZE(insns); - fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); return probe_fd(fd); } -- cgit v1.2.3 From 0f8619929c572609f7cdfa366d0424c2c2552e60 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Mon, 11 Apr 2022 16:21:36 +0100 Subject: libbpf: Usdt aarch64 arg parsing support Parsing of USDT arguments is architecture-specific. On aarch64 it is relatively easy since registers used are x[0-31] and sp. Format is slightly different compared to x86_64. Possible forms are: - "size@[reg[,offset]]" for dereferences, e.g. "-8@[sp,76]" and "-4@[sp]"; - "size@reg" for register values, e.g. "-4@x0"; - "size@value" for raw values, e.g. "-8@1". Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1649690496-1902-2-git-send-email-alan.maguire@oracle.com --- tools/lib/bpf/usdt.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index acf2d99a9e77..934c25301ac1 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -1324,6 +1324,82 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return len; } +#elif defined(__aarch64__) + +static int calc_pt_regs_off(const char *reg_name) +{ + int reg_num; + + if (sscanf(reg_name, "x%d", ®_num) == 1) { + if (reg_num >= 0 && reg_num < 31) + return offsetof(struct user_pt_regs, regs[reg_num]); + } else if (strcmp(reg_name, "sp") == 0) { + return offsetof(struct user_pt_regs, sp); + } + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ \[ %m[a-z0-9], %ld ] %n", &arg_sz, ®_name, &off, &len) == 3) { + /* Memory dereference case, e.g., -4@[sp, 96] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ \[ %m[a-z0-9] ] %n", &arg_sz, ®_name, &len) == 2) { + /* Memory dereference case, e.g., -4@[sp] */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -8@x4 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + #else static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) -- cgit v1.2.3 From a3820c48111247f4ec2ca2949597f8fa57d2c424 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 18 Apr 2022 17:24:50 -0700 Subject: libbpf: Support opting out from autoloading BPF programs declaratively Establish SEC("?abc") naming convention (i.e., adding question mark in front of otherwise normal section name) that allows to set corresponding program's autoload property to false. This is effectively just a declarative way to do bpf_program__set_autoload(prog, false). Having a way to do this declaratively in BPF code itself is useful and convenient for various scenarios. E.g., for testing, when BPF object consists of multiple independent BPF programs that each needs to be tested separately. Opting out all of them by default and then setting autoload to true for just one of them at a time simplifies testing code (see next patch for few conversions in BPF selftests taking advantage of this new feature). Another real-world use case is in libbpf-tools for cases when different BPF programs have to be picked depending on particulars of the host kernel due to various incompatible changes (like kernel function renames or signature change, or to pick kprobe vs fentry depending on corresponding kernel support for the latter). Marking all the different BPF program candidates as non-autoloaded declaratively makes this more obvious in BPF source code and allows simpler code in user-space code. When BPF program marked as SEC("?abc") it is otherwise treated just like SEC("abc") and bpf_program__section_name() reported will be "abc". Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220419002452.632125-1-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index bf4f7ac54ebf..68cc134d070d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -302,7 +302,7 @@ struct bpf_program { void *priv; bpf_program_clear_priv_t clear_priv; - bool load; + bool autoload; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; @@ -672,7 +672,18 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->insns_cnt = prog->sec_insn_cnt; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->load = true; + + /* libbpf's convention for SEC("?abc...") is that it's just like + * SEC("abc...") but the corresponding bpf_program starts out with + * autoload set to false. + */ + if (sec_name[0] == '?') { + prog->autoload = false; + /* from now on forget there was ? in section name */ + sec_name++; + } else { + prog->autoload = true; + } prog->instances.fds = NULL; prog->instances.nr = -1; @@ -2927,7 +2938,7 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) } bpf_object__for_each_program(prog, obj) { - if (!prog->load) + if (!prog->autoload) continue; if (prog_needs_vmlinux_btf(prog)) return true; @@ -5702,7 +5713,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) /* no need to apply CO-RE relocation if the program is * not going to be loaded */ - if (!prog->load) + if (!prog->autoload) continue; /* adjust insn_idx from section frame of reference to the local @@ -6363,7 +6374,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_calls(obj, prog); @@ -6378,7 +6389,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) + if (!prog->autoload) continue; err = bpf_object__relocate_data(obj, prog); if (err) { @@ -6975,7 +6986,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; - if (!prog->load) { + if (!prog->autoload) { pr_debug("prog '%s': skipped loading\n", prog->name); continue; } @@ -8455,7 +8466,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) bool bpf_program__autoload(const struct bpf_program *prog) { - return prog->load; + return prog->autoload; } int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) @@ -8463,7 +8474,7 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) if (prog->obj->loaded) return libbpf_err(-EINVAL); - prog->load = autoload; + prog->autoload = autoload; return 0; } @@ -12665,7 +12676,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; - if (!prog->load) + if (!prog->autoload) continue; /* auto-attaching not supported for this program */ -- cgit v1.2.3 From 5af25a410acb8d34acb11024d752f0ea3491decf Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 19 Apr 2022 22:52:37 +0800 Subject: libbpf: Fix usdt_cookie being cast to 32 bits The usdt_cookie is defined as __u64, which should not be used as a long type because it will be cast to 32 bits in 32-bit platforms. Signed-off-by: Pu Lehui Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220419145238.482134-2-pulehui@huawei.com --- tools/lib/bpf/libbpf.c | 2 +- tools/lib/bpf/libbpf_internal.h | 2 +- tools/lib/bpf/usdt.c | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 68cc134d070d..8375021800f3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10993,7 +10993,7 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, char resolved_path[512]; struct bpf_object *obj = prog->obj; struct bpf_link *link; - long usdt_cookie; + __u64 usdt_cookie; int err; if (!OPTS_VALID(opts, bpf_uprobe_opts)) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 080272421f6c..054cd8e93d7c 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -571,6 +571,6 @@ struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, pid_t pid, const char *path, const char *usdt_provider, const char *usdt_name, - long usdt_cookie); + __u64 usdt_cookie); #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 934c25301ac1..8e77a7260113 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -557,10 +557,10 @@ static int parse_usdt_note(Elf *elf, const char *path, long base_addr, GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, struct usdt_note *usdt_note); -static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie); +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, - const char *usdt_provider, const char *usdt_name, long usdt_cookie, + const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, struct usdt_target **out_targets, size_t *out_target_cnt) { size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; @@ -939,7 +939,7 @@ static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog, pid_t pid, const char *path, const char *usdt_provider, const char *usdt_name, - long usdt_cookie) + __u64 usdt_cookie) { int i, fd, err, spec_map_fd, ip_map_fd; LIBBPF_OPTS(bpf_uprobe_opts, opts); @@ -1141,7 +1141,7 @@ static int parse_usdt_note(Elf *elf, const char *path, long base_addr, static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg); -static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, long usdt_cookie) +static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie) { const char *s; int len; -- cgit v1.2.3 From 58ca8b0572cd3bbaac60d14d2b1b4f38b389ad93 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 19 Apr 2022 22:52:38 +0800 Subject: libbpf: Support riscv USDT argument parsing logic Add riscv-specific USDT argument specification parsing logic. riscv USDT argument format is shown below: - Memory dereference case: "size@off(reg)", e.g. "-8@-88(s0)" - Constant value case: "size@val", e.g. "4@5" - Register read case: "size@reg", e.g. "-8@a1" s8 will be marked as poison while it's a reg of riscv, we need to alias it in advance. Both RV32 and RV64 have been tested. Signed-off-by: Pu Lehui Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220419145238.482134-3-pulehui@huawei.com --- tools/lib/bpf/usdt.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 8e77a7260113..f1c9339cfbbc 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -10,6 +10,11 @@ #include #include +/* s8 will be marked as poison while it's a reg of riscv */ +#if defined(__riscv) +#define rv_s8 s8 +#endif + #include "bpf.h" #include "libbpf.h" #include "libbpf_common.h" @@ -1400,6 +1405,108 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec return len; } +#elif defined(__riscv) + +static int calc_pt_regs_off(const char *reg_name) +{ + static struct { + const char *name; + size_t pt_regs_off; + } reg_map[] = { + { "ra", offsetof(struct user_regs_struct, ra) }, + { "sp", offsetof(struct user_regs_struct, sp) }, + { "gp", offsetof(struct user_regs_struct, gp) }, + { "tp", offsetof(struct user_regs_struct, tp) }, + { "a0", offsetof(struct user_regs_struct, a0) }, + { "a1", offsetof(struct user_regs_struct, a1) }, + { "a2", offsetof(struct user_regs_struct, a2) }, + { "a3", offsetof(struct user_regs_struct, a3) }, + { "a4", offsetof(struct user_regs_struct, a4) }, + { "a5", offsetof(struct user_regs_struct, a5) }, + { "a6", offsetof(struct user_regs_struct, a6) }, + { "a7", offsetof(struct user_regs_struct, a7) }, + { "s0", offsetof(struct user_regs_struct, s0) }, + { "s1", offsetof(struct user_regs_struct, s1) }, + { "s2", offsetof(struct user_regs_struct, s2) }, + { "s3", offsetof(struct user_regs_struct, s3) }, + { "s4", offsetof(struct user_regs_struct, s4) }, + { "s5", offsetof(struct user_regs_struct, s5) }, + { "s6", offsetof(struct user_regs_struct, s6) }, + { "s7", offsetof(struct user_regs_struct, s7) }, + { "s8", offsetof(struct user_regs_struct, rv_s8) }, + { "s9", offsetof(struct user_regs_struct, s9) }, + { "s10", offsetof(struct user_regs_struct, s10) }, + { "s11", offsetof(struct user_regs_struct, s11) }, + { "t0", offsetof(struct user_regs_struct, t0) }, + { "t1", offsetof(struct user_regs_struct, t1) }, + { "t2", offsetof(struct user_regs_struct, t2) }, + { "t3", offsetof(struct user_regs_struct, t3) }, + { "t4", offsetof(struct user_regs_struct, t4) }, + { "t5", offsetof(struct user_regs_struct, t5) }, + { "t6", offsetof(struct user_regs_struct, t6) }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(reg_map); i++) { + if (strcmp(reg_name, reg_map[i].name) == 0) + return reg_map[i].pt_regs_off; + } + + pr_warn("usdt: unrecognized register '%s'\n", reg_name); + return -ENOENT; +} + +static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) +{ + char *reg_name = NULL; + int arg_sz, len, reg_off; + long off; + + if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, ®_name, &len) == 3) { + /* Memory dereference case, e.g., -8@-88(s0) */ + arg->arg_type = USDT_ARG_REG_DEREF; + arg->val_off = off; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) { + /* Constant value case, e.g., 4@5 */ + arg->arg_type = USDT_ARG_CONST; + arg->val_off = off; + arg->reg_off = 0; + } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, ®_name, &len) == 2) { + /* Register read case, e.g., -8@a1 */ + arg->arg_type = USDT_ARG_REG; + arg->val_off = 0; + reg_off = calc_pt_regs_off(reg_name); + free(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; + } else { + pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str); + return -EINVAL; + } + + arg->arg_signed = arg_sz < 0; + if (arg_sz < 0) + arg_sz = -arg_sz; + + switch (arg_sz) { + case 1: case 2: case 4: case 8: + arg->arg_bitshift = 64 - arg_sz * 8; + break; + default: + pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n", + arg_num, arg_str, arg_sz); + return -EINVAL; + } + + return len; +} + #else static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg) -- cgit v1.2.3 From 93442f132b94721c7143ace7f43e51a9025f46fd Mon Sep 17 00:00:00 2001 From: Grant Seltzer Date: Wed, 20 Apr 2022 12:12:24 -0400 Subject: libbpf: Add error returns to two API functions This adds an error return to the following API functions: - bpf_program__set_expected_attach_type() - bpf_program__set_type() In both cases, the error occurs when the BPF object has already been loaded when the function is called. In this case -EBUSY is returned. Signed-off-by: Grant Seltzer Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220420161226.86803-1-grantseltzer@gmail.com --- tools/lib/bpf/libbpf.c | 12 ++++++++++-- tools/lib/bpf/libbpf.h | 6 +++--- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8375021800f3..342340aee948 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8562,9 +8562,13 @@ enum bpf_prog_type bpf_program__type(const struct bpf_program *prog) return prog->type; } -void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) +int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + prog->type = type; + return 0; } static bool bpf_program__is_type(const struct bpf_program *prog, @@ -8609,10 +8613,14 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program return prog->expected_attach_type; } -void bpf_program__set_expected_attach_type(struct bpf_program *prog, +int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + prog->expected_attach_type = type; + return 0; } __u32 bpf_program__flags(const struct bpf_program *prog) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 63d66f1adf1a..66735623ca63 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -686,12 +686,12 @@ LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog); -LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, - enum bpf_prog_type type); +LIBBPF_API int bpf_program__set_type(struct bpf_program *prog, + enum bpf_prog_type type); LIBBPF_API enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog); -LIBBPF_API void +LIBBPF_API int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); -- cgit v1.2.3 From df286716321350d1ca370d5737acf5a10b20ee9e Mon Sep 17 00:00:00 2001 From: Grant Seltzer Date: Wed, 20 Apr 2022 12:12:25 -0400 Subject: libbpf: Update API functions usage to check error This updates usage of the following API functions within libbpf so their newly added error return is checked: - bpf_program__set_expected_attach_type() - bpf_program__set_type() Signed-off-by: Grant Seltzer Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220420161226.86803-2-grantseltzer@gmail.com --- tools/lib/bpf/libbpf.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 342340aee948..94940497354b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7016,8 +7016,8 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object continue; } - bpf_program__set_type(prog, prog->sec_def->prog_type); - bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type); + prog->type = prog->sec_def->prog_type; + prog->expected_attach_type = prog->sec_def->expected_attach_type; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -8582,8 +8582,7 @@ int bpf_program__set_##NAME(struct bpf_program *prog) \ { \ if (!prog) \ return libbpf_err(-EINVAL); \ - bpf_program__set_type(prog, TYPE); \ - return 0; \ + return bpf_program__set_type(prog, TYPE); \ } \ \ bool bpf_program__is_##NAME(const struct bpf_program *prog) \ @@ -9690,9 +9689,8 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, * bpf_object__open guessed */ if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { - bpf_program__set_type(prog, attr->prog_type); - bpf_program__set_expected_attach_type(prog, - attach_type); + prog->type = attr->prog_type; + prog->expected_attach_type = attach_type; } if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) { /* -- cgit v1.2.3 From a66ab9a9e66ab868de7063622d91442181deba0f Mon Sep 17 00:00:00 2001 From: Grant Seltzer Date: Wed, 20 Apr 2022 12:12:26 -0400 Subject: libbpf: Add documentation to API functions This adds documentation for the following API functions: - bpf_program__set_expected_attach_type() - bpf_program__set_type() - bpf_program__set_attach_target() - bpf_program__attach() - bpf_program__pin() - bpf_program__unpin() Signed-off-by: Grant Seltzer Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220420161226.86803-3-grantseltzer@gmail.com --- tools/lib/bpf/libbpf.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 66735623ca63..cdbfee60ea3e 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -378,7 +378,31 @@ struct bpf_link; LIBBPF_API struct bpf_link *bpf_link__open(const char *path); LIBBPF_API int bpf_link__fd(const struct bpf_link *link); LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); +/** + * @brief **bpf_link__pin()** pins the BPF link to a file + * in the BPF FS specified by a path. This increments the links + * reference count, allowing it to stay loaded after the process + * which loaded it has exited. + * + * @param link BPF link to pin, must already be loaded + * @param path file path in a BPF file system + * @return 0, on success; negative error code, otherwise + */ + LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); + +/** + * @brief **bpf_link__unpin()** unpins the BPF link from a file + * in the BPFFS specified by a path. This decrements the links + * reference count. + * + * The file pinning the BPF link can also be unlinked by a different + * process in which case this function will return an error. + * + * @param prog BPF program to unpin + * @param path file path to the pin in a BPF file system + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int bpf_link__unpin(struct bpf_link *link); LIBBPF_API int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog); @@ -386,6 +410,22 @@ LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__detach(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); +/** + * @brief **bpf_program__attach()** is a generic function for attaching + * a BPF program based on auto-detection of program type, attach type, + * and extra paremeters, where applicable. + * + * @param prog BPF program to attach + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + * + * This is supported for: + * - kprobe/kretprobe (depends on SEC() definition) + * - uprobe/uretprobe (depends on SEC() definition) + * - tracepoint + * - raw tracepoint + * - tracing programs (typed raw TP/fentry/fexit/fmod_ret) + */ LIBBPF_API struct bpf_link * bpf_program__attach(const struct bpf_program *prog); @@ -686,11 +726,36 @@ LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead") LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog); LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_type()** sets the program + * type of the passed BPF program. + * @param prog BPF program to set the program type for + * @param type program type to set the BPF map to have + * @return error code; or 0 if no error. An error occurs + * if the object is already loaded. + * + * This must be called before the BPF object is loaded, + * otherwise it has no effect and an error is returned. + */ LIBBPF_API int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); LIBBPF_API enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_expected_attach_type()** sets the + * attach type of the passed BPF program. This is used for + * auto-detection of attachment when programs are loaded. + * @param prog BPF program to set the attach type for + * @param type attach type to set the BPF map to have + * @return error code; or 0 if no error. An error occurs + * if the object is already loaded. + * + * This must be called before the BPF object is loaded, + * otherwise it has no effect and an error is returned. + */ LIBBPF_API int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); @@ -707,6 +772,17 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); +/** + * @brief **bpf_program__set_attach_target()** sets BTF-based attach target + * for supported BPF program types: + * - BTF-aware raw tracepoints (tp_btf); + * - fentry/fexit/fmod_ret; + * - lsm; + * - freplace. + * @param prog BPF program to set the attach type for + * @param type attach type to set the BPF map to have + * @return error code; or 0 if no error occurred. + */ LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); -- cgit v1.2.3 From b71a2ebf74ef509b6b6926c78549e183c3b63947 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Thu, 21 Apr 2022 11:18:03 +0800 Subject: libbpf: Remove redundant non-null checks on obj_elf Obj_elf is already non-null checked at the function entry, so remove redundant non-null checks on obj_elf. Signed-off-by: Gaosheng Cui Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220421031803.2283974-1-cuigaosheng1@huawei.com --- tools/lib/bpf/libbpf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 94940497354b..873a29ce7781 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1233,10 +1233,8 @@ static void bpf_object__elf_finish(struct bpf_object *obj) if (!obj->efile.elf) return; - if (obj->efile.elf) { - elf_end(obj->efile.elf); - obj->efile.elf = NULL; - } + elf_end(obj->efile.elf); + obj->efile.elf = NULL; obj->efile.symbols = NULL; obj->efile.st_ops_data = NULL; -- cgit v1.2.3 From 8462e0b46fe2d4c56d0a7de705228e3bf1da03d9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 20 Apr 2022 20:39:44 -0700 Subject: libbpf: Teach bpf_link_create() to fallback to bpf_raw_tracepoint_open() Teach bpf_link_create() to fallback to bpf_raw_tracepoint_open() on older kernels for programs that are attachable through BPF_RAW_TRACEPOINT_OPEN. This makes bpf_link_create() more unified and convenient interface for creating bpf_link-based attachments. With this approach end users can just use bpf_link_create() for tp_btf/fentry/fexit/fmod_ret/lsm program attachments without needing to care about kernel support, as libbpf will handle this transparently. On the other hand, as newer features (like BPF cookie) are added to LINK_CREATE interface, they will be readily usable though the same bpf_link_create() API without any major refactoring from user's standpoint. bpf_program__attach_btf_id() is now using bpf_link_create() internally as well and will take advantaged of this unified interface when BPF cookie is added for fentry/fexit. Doing proactive feature detection of LINK_CREATE support for fentry/tp_btf/etc is quite involved. It requires parsing vmlinux BTF, determining some stable and guaranteed to be in all kernels versions target BTF type (either raw tracepoint or fentry target function), actually attaching this program and thus potentially affecting the performance of the host kernel briefly, etc. So instead we are taking much simpler "lazy" approach of falling back to bpf_raw_tracepoint_open() call only if initial LINK_CREATE command fails. For modern kernels this will mean zero added overhead, while older kernels will incur minimal overhead with a single fast-failing LINK_CREATE call. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Reviewed-by: Kui-Feng Lee Link: https://lore.kernel.org/bpf/20220421033945.3602803-3-andrii@kernel.org --- tools/lib/bpf/bpf.c | 34 ++++++++++++++++++++++++++++++++-- tools/lib/bpf/libbpf.c | 3 ++- 2 files changed, 34 insertions(+), 3 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index cf27251adb92..a9d292c106c2 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -817,7 +817,7 @@ int bpf_link_create(int prog_fd, int target_fd, { __u32 target_btf_id, iter_info_len; union bpf_attr attr; - int fd; + int fd, err; if (!OPTS_VALID(opts, bpf_link_create_opts)) return libbpf_err(-EINVAL); @@ -870,7 +870,37 @@ int bpf_link_create(int prog_fd, int target_fd, } proceed: fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); - return libbpf_err_errno(fd); + if (fd >= 0) + return fd; + /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry + * and other similar programs + */ + err = -errno; + if (err != -EINVAL) + return libbpf_err(err); + + /* if user used features not supported by + * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately + */ + if (attr.link_create.target_fd || attr.link_create.target_btf_id) + return libbpf_err(err); + if (!OPTS_ZEROED(opts, sz)) + return libbpf_err(err); + + /* otherwise, for few select kinds of programs that can be + * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as + * a fallback for older kernels + */ + switch (attach_type) { + case BPF_TRACE_RAW_TP: + case BPF_LSM_MAC: + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + return bpf_raw_tracepoint_open(NULL, prog_fd); + default: + return libbpf_err(err); + } } int bpf_link_detach(int link_fd) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 873a29ce7781..9a213aaaac8a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11260,7 +11260,8 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; - pfd = bpf_raw_tracepoint_open(NULL, prog_fd); + /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), NULL); if (pfd < 0) { pfd = -errno; free(link); -- cgit v1.2.3 From 4bbac9a1f58fb74b436fbef43ec16017a580019a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 22 Apr 2022 19:23:42 +0300 Subject: libperf evsel: Factor out perf_evsel__ioctl() Factor out perf_evsel__ioctl() so it can be reused. Signed-off-by: Adrian Hunter Cc: Alexey Bayduraev Cc: Ian Rogers Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20220422162402.147958-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evsel.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 210ea7c06ce8..20ae9f5f8b30 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -328,6 +328,17 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, return 0; } +static int perf_evsel__ioctl(struct perf_evsel *evsel, int ioc, void *arg, + int cpu_map_idx, int thread) +{ + int *fd = FD(evsel, cpu_map_idx, thread); + + if (fd == NULL || *fd < 0) + return -1; + + return ioctl(*fd, ioc, arg); +} + static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ioc, void *arg, int cpu_map_idx) @@ -335,13 +346,7 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int err; - int *fd = FD(evsel, cpu_map_idx, thread); - - if (fd == NULL || *fd < 0) - return -1; - - err = ioctl(*fd, ioc, arg); + int err = perf_evsel__ioctl(evsel, ioc, arg, cpu_map_idx, thread); if (err) return err; -- cgit v1.2.3 From 003fed595c0f37d0ad112447f5f942654979426c Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Sun, 24 Apr 2022 22:34:20 +0800 Subject: libbpf: Remove unnecessary type cast The link variable is already of type 'struct bpf_link *', casting it to 'struct bpf_link *' is redundant, drop it. Signed-off-by: Yuntao Wang Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220424143420.457082-1-ytcoode@gmail.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9a213aaaac8a..cc1a8fc47f72 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11270,7 +11270,7 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro return libbpf_err_ptr(pfd); } link->fd = pfd; - return (struct bpf_link *)link; + return link; } struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) -- cgit v1.2.3 From ef89654f2bc7459f45b40be80de6cd3765ef8539 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Mon, 25 Apr 2022 03:18:58 +0530 Subject: libbpf: Add kptr type tag macros to bpf_helpers.h Include convenience definitions: __kptr: Unreferenced kptr __kptr_ref: Referenced kptr Users can use them to tag the pointer type meant to be used with the new support directly in the map value definition. Signed-off-by: Kumar Kartikeya Dwivedi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220424214901.2743946-11-memxor@gmail.com --- tools/lib/bpf/bpf_helpers.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 44df982d2a5c..5de3eb267125 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -149,6 +149,13 @@ enum libbpf_tristate { #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) +#if __has_attribute(btf_type_tag) +#define __kptr __attribute__((btf_type_tag("kptr"))) +#define __kptr_ref __attribute__((btf_type_tag("kptr_ref"))) +#else +#define __kptr +#define __kptr_ref +#endif #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b -- cgit v1.2.3 From afe98d46ba22316acfd198eb5cd4db2ef2d427d7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Apr 2022 17:45:02 -0700 Subject: libbpf: Fix anonymous type check in CO-RE logic Use type name for checking whether CO-RE relocation is referring to anonymous type. Using spec string makes no sense. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220426004511.2691730-2-andrii@kernel.org --- tools/lib/bpf/relo_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index f946f23eab20..adaa22160692 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -1207,7 +1207,7 @@ int bpf_core_calc_relo_insn(const char *prog_name, } /* libbpf doesn't support candidate search for anonymous types */ - if (str_is_empty(spec_str)) { + if (str_is_empty(local_name)) { pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); return -EOPNOTSUPP; -- cgit v1.2.3 From 0994a54c5202114ad0e3b3a0f1326e810b23ad38 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Apr 2022 17:45:03 -0700 Subject: libbpf: Drop unhelpful "program too large" guess libbpf pretends it knows actual limit of BPF program instructions based on UAPI headers it compiled with. There is neither any guarantee that UAPI headers match host kernel, nor BPF verifier actually uses BPF_MAXINSNS constant anymore. Just drop unhelpful "guess", BPF verifier will emit actual reason for failure in its logs anyways. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220426004511.2691730-3-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cc1a8fc47f72..b8cc862687a2 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6828,10 +6828,6 @@ retry_load: pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", prog->name, log_buf); } - if (insns_cnt >= BPF_MAXINSNS) { - pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", - prog->name, insns_cnt, BPF_MAXINSNS); - } out: if (own_log_buf) -- cgit v1.2.3 From 966a7509325395c51c5f6d89e7352b0585e4804b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Apr 2022 17:45:04 -0700 Subject: libbpf: Fix logic for finding matching program for CO-RE relocation Fix the bug in bpf_object__relocate_core() which can lead to finding invalid matching BPF program when processing CO-RE relocation. IF matching program is not found, last encountered program will be assumed to be correct program and thus error detection won't detect the problem. Fixes: 9c82a63cf370 ("libbpf: Fix CO-RE relocs against .text section") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220426004511.2691730-4-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b8cc862687a2..946b4590c4d3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5677,9 +5677,10 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) */ prog = NULL; for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; - if (strcmp(prog->sec_name, sec_name) == 0) + if (strcmp(obj->programs[i].sec_name, sec_name) == 0) { + prog = &obj->programs[i]; break; + } } if (!prog) { pr_warn("sec '%s': failed to find a BPF program\n", sec_name); -- cgit v1.2.3 From 11d5daa89254ba2233d422777d52dbf24666b280 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Apr 2022 17:45:05 -0700 Subject: libbpf: Avoid joining .BTF.ext data with BPF programs by section name Instead of using ELF section names as a joining key between .BTF.ext and corresponding BPF programs, pre-build .BTF.ext section number to ELF section index mapping during bpf_object__open() and use it later for matching .BTF.ext information (func/line info or CO-RE relocations) to their respective BPF programs and subprograms. This simplifies corresponding joining logic and let's libbpf do manipulations with BPF program's ELF sections like dropping leading '?' character for non-autoloaded programs. Original joining logic in bpf_object__relocate_core() (see relevant comment that's now removed) was never elegant, so it's a good improvement regardless. But it also avoids unnecessary internal assumptions about preserving original ELF section name as BPF program's section name (which was broken when SEC("?abc") support was added). Fixes: a3820c481112 ("libbpf: Support opting out from autoloading BPF programs declaratively") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220426004511.2691730-5-andrii@kernel.org --- tools/lib/bpf/btf.c | 9 +++-- tools/lib/bpf/libbpf.c | 78 +++++++++++++++++++++++++++-------------- tools/lib/bpf/libbpf_internal.h | 7 ++++ 3 files changed, 65 insertions(+), 29 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d124e9e533f0..bb1e06eb1eca 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2626,6 +2626,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; + size_t sec_cnt = 0; /* The start of the info sec (including the __u32 record_size). */ void *info; @@ -2689,8 +2690,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - total_record_size = sec_hdrlen + - (__u64)num_records * record_size; + total_record_size = sec_hdrlen + (__u64)num_records * record_size; if (info_left < total_record_size) { pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); @@ -2699,12 +2699,14 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, info_left -= total_record_size; sinfo = (void *)sinfo + total_record_size; + sec_cnt++; } ext_info = ext_sec->ext_info; ext_info->len = ext_sec->len - sizeof(__u32); ext_info->rec_size = record_size; ext_info->info = info + sizeof(__u32); + ext_info->sec_cnt = sec_cnt; return 0; } @@ -2788,6 +2790,9 @@ void btf_ext__free(struct btf_ext *btf_ext) { if (IS_ERR_OR_NULL(btf_ext)) return; + free(btf_ext->func_info.sec_idxs); + free(btf_ext->line_info.sec_idxs); + free(btf_ext->core_relo_info.sec_idxs); free(btf_ext->data); free(btf_ext); } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 946b4590c4d3..81f2b987fd77 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2765,6 +2765,9 @@ static int bpf_object__init_btf(struct bpf_object *obj, btf__set_pointer_size(obj->btf, 8); } if (btf_ext_data) { + struct btf_ext_info *ext_segs[3]; + int seg_num, sec_num; + if (!obj->btf) { pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", BTF_EXT_ELF_SEC, BTF_ELF_SEC); @@ -2778,6 +2781,43 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = NULL; goto out; } + + /* setup .BTF.ext to ELF section mapping */ + ext_segs[0] = &obj->btf_ext->func_info; + ext_segs[1] = &obj->btf_ext->line_info; + ext_segs[2] = &obj->btf_ext->core_relo_info; + for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) { + struct btf_ext_info *seg = ext_segs[seg_num]; + const struct btf_ext_info_sec *sec; + const char *sec_name; + Elf_Scn *scn; + + if (seg->sec_cnt == 0) + continue; + + seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs)); + if (!seg->sec_idxs) { + err = -ENOMEM; + goto out; + } + + sec_num = 0; + for_each_btf_ext_sec(seg, sec) { + /* preventively increment index to avoid doing + * this before every continue below + */ + sec_num++; + + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) + continue; + scn = elf_sec_by_name(obj, sec_name); + if (!scn) + continue; + + seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn); + } + } } out: if (err && libbpf_needs_btf(obj)) { @@ -5642,7 +5682,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) struct bpf_program *prog; struct bpf_insn *insn; const char *sec_name; - int i, err = 0, insn_idx, sec_idx; + int i, err = 0, insn_idx, sec_idx, sec_num; if (obj->btf_ext->core_relo_info.len == 0) return 0; @@ -5663,33 +5703,18 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) } seg = &obj->btf_ext->core_relo_info; + sec_num = 0; for_each_btf_ext_sec(seg, sec) { + sec_idx = seg->sec_idxs[sec_num]; + sec_num++; + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (str_is_empty(sec_name)) { err = -EINVAL; goto out; } - /* bpf_object's ELF is gone by now so it's not easy to find - * section index by section name, but we can find *any* - * bpf_program within desired section name and use it's - * prog->sec_idx to do a proper search by section index and - * instruction offset - */ - prog = NULL; - for (i = 0; i < obj->nr_programs; i++) { - if (strcmp(obj->programs[i].sec_name, sec_name) == 0) { - prog = &obj->programs[i]; - break; - } - } - if (!prog) { - pr_warn("sec '%s': failed to find a BPF program\n", sec_name); - return -ENOENT; - } - sec_idx = prog->sec_idx; - pr_debug("sec '%s': found %d CO-RE relocations\n", - sec_name, sec->num_info); + pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { if (rec->insn_off % BPF_INSN_SZ) @@ -5873,14 +5898,13 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj, void *rec, *rec_end, *new_prog_info; const struct btf_ext_info_sec *sec; size_t old_sz, new_sz; - const char *sec_name; - int i, off_adj; + int i, sec_num, sec_idx, off_adj; + sec_num = 0; for_each_btf_ext_sec(ext_info, sec) { - sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); - if (!sec_name) - return -EINVAL; - if (strcmp(sec_name, prog->sec_name) != 0) + sec_idx = ext_info->sec_idxs[sec_num]; + sec_num++; + if (prog->sec_idx != sec_idx) continue; for_each_btf_ext_rec(ext_info, sec, i, rec) { diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 054cd8e93d7c..4abdbe2fea9d 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -376,6 +376,13 @@ struct btf_ext_info { void *info; __u32 rec_size; __u32 len; + /* optional (maintained internally by libbpf) mapping between .BTF.ext + * section and corresponding ELF section. This is used to join + * information like CO-RE relocation records with corresponding BPF + * programs defined in ELF sections + */ + __u32 *sec_idxs; + int sec_cnt; }; #define for_each_btf_ext_sec(seg, sec) \ -- cgit v1.2.3 From 185cfe837fdbb1fcc0f6b8fbcf5fdd2d1fccd3ad Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Apr 2022 17:45:07 -0700 Subject: libbpf: Record subprog-resolved CO-RE relocations unconditionally Previously, libbpf recorded CO-RE relocations with insns_idx resolved according to finalized subprog locations (which are appended at the end of entry BPF program) to simplify the job of light skeleton generator. This is necessary because once subprogs' instructions are appended to main entry BPF program all the subprog instruction indices are shifted and that shift is different for each entry (main) BPF program, so it's generally impossible to map final absolute insn_idx of the finalized BPF program to their original locations inside subprograms. This information is now going to be used not only during light skeleton generation, but also to map absolute instruction index to subprog's instruction and its corresponding CO-RE relocation. So start recording these relocations always, not just when obj->gen_loader is set. This information is going to be freed at the end of bpf_object__load() step, as before (but this can change in the future if there will be a need for this information post load step). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220426004511.2691730-7-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 81f2b987fd77..109fc86335f6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5749,16 +5749,16 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) return -EINVAL; insn = &prog->insns[insn_idx]; - if (prog->obj->gen_loader) { - err = record_relo_core(prog, rec, insn_idx); - if (err) { - pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", - prog->name, i, err); - goto out; - } - continue; + err = record_relo_core(prog, rec, insn_idx); + if (err) { + pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", + prog->name, i, err); + goto out; } + if (prog->obj->gen_loader) + continue; + err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); if (err) { pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", @@ -6299,7 +6299,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) if (err) return err; - return 0; } @@ -6360,8 +6359,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err); return err; } - if (obj->gen_loader) - bpf_object__sort_relos(obj); + bpf_object__sort_relos(obj); } /* Before relocating calls pre-process relocations and mark @@ -6421,8 +6419,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } - if (!obj->gen_loader) - bpf_object__free_relocs(obj); + return 0; } @@ -7014,8 +7011,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) if (err) return err; } - if (obj->gen_loader) - bpf_object__free_relocs(obj); + + bpf_object__free_relocs(obj); return 0; } -- cgit v1.2.3 From b58af63aab11e4ae00fe96de9505759cfdde8ee9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Apr 2022 17:45:08 -0700 Subject: libbpf: Refactor CO-RE relo human description formatting routine Refactor how CO-RE relocation is formatted. Now it dumps human-readable representation, currently used by libbpf in either debug or error message output during CO-RE relocation resolution process, into provided buffer. This approach allows for better reuse of this functionality outside of CO-RE relocation resolution, which we'll use in next patch for providing better error message for BPF verifier rejecting BPF program due to unguarded failed CO-RE relocation. It also gets rid of annoying "stitching" of libbpf_print() calls, which was the only place where we did this. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220426004511.2691730-8-andrii@kernel.org --- tools/lib/bpf/relo_core.c | 64 ++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 26 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index adaa22160692..13d36a705464 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -1055,51 +1055,66 @@ poison: * [] () + => @, * where is a C-syntax view of recorded field access, e.g.: x.a[3].b */ -static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec) +static int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) { const struct btf_type *t; const struct btf_enum *e; const char *s; __u32 type_id; - int i; + int i, len = 0; + +#define append_buf(fmt, args...) \ + ({ \ + int r; \ + r = snprintf(buf, buf_sz, fmt, ##args); \ + len += r; \ + if (r >= buf_sz) \ + r = buf_sz; \ + buf += r; \ + buf_sz -= r; \ + }) type_id = spec->root_type_id; t = btf_type_by_id(spec->btf, type_id); s = btf__name_by_offset(spec->btf, t->name_off); - libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "" : s); + append_buf("<%s> [%u] %s %s", + core_relo_kind_str(spec->relo_kind), + type_id, btf_kind_str(t), str_is_empty(s) ? "" : s); if (core_relo_is_type_based(spec->relo_kind)) - return; + return len; if (core_relo_is_enumval_based(spec->relo_kind)) { t = skip_mods_and_typedefs(spec->btf, type_id, NULL); e = btf_enum(t) + spec->raw_spec[0]; s = btf__name_by_offset(spec->btf, e->name_off); - libbpf_print(level, "::%s = %u", s, e->val); - return; + append_buf("::%s = %u", s, e->val); + return len; } if (core_relo_is_field_based(spec->relo_kind)) { for (i = 0; i < spec->len; i++) { if (spec->spec[i].name) - libbpf_print(level, ".%s", spec->spec[i].name); + append_buf(".%s", spec->spec[i].name); else if (i > 0 || spec->spec[i].idx > 0) - libbpf_print(level, "[%u]", spec->spec[i].idx); + append_buf("[%u]", spec->spec[i].idx); } - libbpf_print(level, " ("); + append_buf(" ("); for (i = 0; i < spec->raw_len; i++) - libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); + append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); if (spec->bit_offset % 8) - libbpf_print(level, " @ offset %u.%u)", - spec->bit_offset / 8, spec->bit_offset % 8); + append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8); else - libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); - return; + append_buf(" @ offset %u)", spec->bit_offset / 8); + return len; } + + return len; +#undef append_buf } /* @@ -1168,6 +1183,7 @@ int bpf_core_calc_relo_insn(const char *prog_name, const char *local_name; __u32 local_id; const char *spec_str; + char spec_buf[256]; int i, j, err; local_id = relo->type_id; @@ -1190,10 +1206,8 @@ int bpf_core_calc_relo_insn(const char *prog_name, return -EINVAL; } - pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, - relo_idx, core_relo_kind_str(relo->kind), relo->kind); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec); + pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf); /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { @@ -1217,17 +1231,15 @@ int bpf_core_calc_relo_insn(const char *prog_name, err = bpf_core_spec_match(local_spec, cands->cands[i].btf, cands->cands[i].id, cand_spec); if (err < 0) { - pr_warn("prog '%s': relo #%d: error matching candidate #%d ", - prog_name, relo_idx, i); - bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec); - libbpf_print(LIBBPF_WARN, ": %d\n", err); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); + pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ", + prog_name, relo_idx, i, spec_buf, err); return err; } - pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, - relo_idx, err == 0 ? "non-matching" : "matching", i); - bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); + bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); + pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name, + relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf); if (err == 0) continue; -- cgit v1.2.3 From 14032f2644534ecd5693ebfeef44cbf0d989a7fe Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Apr 2022 17:45:09 -0700 Subject: libbpf: Simplify bpf_core_parse_spec() signature Simplify bpf_core_parse_spec() signature to take struct bpf_core_relo as an input instead of requiring callers to decompose them into type_id, relo, spec_str, etc. This makes using and reusing this helper easier. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220426004511.2691730-9-andrii@kernel.org --- tools/lib/bpf/relo_core.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 13d36a705464..4a9ad0cfb474 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -179,28 +179,27 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * string to specify enumerator's value index that need to be relocated. */ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, - __u32 type_id, - const char *spec_str, - enum bpf_core_relo_kind relo_kind, + const struct bpf_core_relo *relo, struct bpf_core_spec *spec) { int access_idx, parsed_len, i; struct bpf_core_accessor *acc; const struct btf_type *t; - const char *name; + const char *name, *spec_str; __u32 id; __s64 sz; + spec_str = btf__name_by_offset(btf, relo->access_str_off); if (str_is_empty(spec_str) || *spec_str == ':') return -EINVAL; memset(spec, 0, sizeof(*spec)); spec->btf = btf; - spec->root_type_id = type_id; - spec->relo_kind = relo_kind; + spec->root_type_id = relo->type_id; + spec->relo_kind = relo->kind; /* type-based relocations don't have a field access string */ - if (core_relo_is_type_based(relo_kind)) { + if (core_relo_is_type_based(relo->kind)) { if (strcmp(spec_str, "0")) return -EINVAL; return 0; @@ -221,7 +220,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, if (spec->raw_len == 0) return -EINVAL; - t = skip_mods_and_typedefs(btf, type_id, &id); + t = skip_mods_and_typedefs(btf, relo->type_id, &id); if (!t) return -EINVAL; @@ -231,7 +230,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, acc->idx = access_idx; spec->len++; - if (core_relo_is_enumval_based(relo_kind)) { + if (core_relo_is_enumval_based(relo->kind)) { if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) return -EINVAL; @@ -240,7 +239,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, return 0; } - if (!core_relo_is_field_based(relo_kind)) + if (!core_relo_is_field_based(relo->kind)) return -EINVAL; sz = btf__resolve_size(btf, id); @@ -301,7 +300,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, spec->bit_offset += access_idx * sz * 8; } else { pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", - prog_name, type_id, spec_str, i, id, btf_kind_str(t)); + prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t)); return -EINVAL; } } @@ -1182,7 +1181,6 @@ int bpf_core_calc_relo_insn(const char *prog_name, const struct btf_type *local_type; const char *local_name; __u32 local_id; - const char *spec_str; char spec_buf[256]; int i, j, err; @@ -1192,17 +1190,15 @@ int bpf_core_calc_relo_insn(const char *prog_name, if (!local_name) return -EINVAL; - spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - if (str_is_empty(spec_str)) - return -EINVAL; - - err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, - relo->kind, local_spec); + err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec); if (err) { + const char *spec_str; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", prog_name, relo_idx, local_id, btf_kind_str(local_type), str_is_empty(local_name) ? "" : local_name, - spec_str, err); + spec_str ?: "", err); return -EINVAL; } -- cgit v1.2.3 From 9fdc4273b8dad70dbf8f7fc1b94eadc1c1f6c934 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 25 Apr 2022 17:45:10 -0700 Subject: libbpf: Fix up verifier log for unguarded failed CO-RE relos Teach libbpf to post-process BPF verifier log on BPF program load failure and detect known error patterns to provide user with more context. Currently there is one such common situation: an "unguarded" failed BPF CO-RE relocation. While failing CO-RE relocation is expected, it is expected to be property guarded in BPF code such that BPF verifier always eliminates BPF instructions corresponding to such failed CO-RE relos as dead code. In cases when user failed to take such precautions, BPF verifier provides the best log it can: 123: (85) call unknown#195896080 invalid func unknown#195896080 Such incomprehensible log error is due to libbpf "poisoning" BPF instruction that corresponds to failed CO-RE relocation by replacing it with invalid `call 0xbad2310` instruction (195896080 == 0xbad2310 reads "bad relo" if you squint hard enough). Luckily, libbpf has all the necessary information to look up CO-RE relocation that failed and provide more human-readable description of what's going on: 5: failed to resolve CO-RE relocation [6] struct task_struct___bad.fake_field_subprog (0:2 @ offset 8) This hopefully makes it much easier to understand what's wrong with user's BPF program without googling magic constants. This BPF verifier log fixup is setup to be extensible and is going to be used for at least one other upcoming feature of libbpf in follow up patches. Libbpf is parsing lines of BPF verifier log starting from the very end. Currently it processes up to 10 lines of code looking for familiar patterns. This avoids wasting lots of CPU processing huge verifier logs (especially for log_level=2 verbosity level). Actual verification error should normally be found in last few lines, so this should work reliably. If libbpf needs to expand log beyond available log_buf_size, it truncates the end of the verifier log. Given verifier log normally ends with something like: processed 2 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0 ... truncating this on program load error isn't too bad (end user can always increase log size, if it needs to get complete log). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220426004511.2691730-10-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/relo_core.c | 8 +-- tools/lib/bpf/relo_core.h | 6 ++ 3 files changed, 154 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 109fc86335f6..73a5192defb3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5626,6 +5626,22 @@ static int record_relo_core(struct bpf_program *prog, return 0; } +static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx) +{ + struct reloc_desc *relo; + int i; + + for (i = 0; i < prog->nr_reloc; i++) { + relo = &prog->reloc_desc[i]; + if (relo->type != RELO_CORE || relo->insn_idx != insn_idx) + continue; + + return relo->core_relo; + } + + return NULL; +} + static int bpf_core_resolve_relo(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, @@ -6696,6 +6712,8 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, return 0; } +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz); + static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, const char *license, __u32 kern_version, @@ -6842,6 +6860,10 @@ retry_load: goto retry_load; ret = -errno; + + /* post-process verifier log to improve error descriptions */ + fixup_verifier_log(prog, log_buf, log_buf_size); + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); pr_perm_msg(ret); @@ -6857,6 +6879,128 @@ out: return ret; } +static char *find_prev_line(char *buf, char *cur) +{ + char *p; + + if (cur == buf) /* end of a log buf */ + return NULL; + + p = cur - 1; + while (p - 1 >= buf && *(p - 1) != '\n') + p--; + + return p; +} + +static void patch_log(char *buf, size_t buf_sz, size_t log_sz, + char *orig, size_t orig_sz, const char *patch) +{ + /* size of the remaining log content to the right from the to-be-replaced part */ + size_t rem_sz = (buf + log_sz) - (orig + orig_sz); + size_t patch_sz = strlen(patch); + + if (patch_sz != orig_sz) { + /* If patch line(s) are longer than original piece of verifier log, + * shift log contents by (patch_sz - orig_sz) bytes to the right + * starting from after to-be-replaced part of the log. + * + * If patch line(s) are shorter than original piece of verifier log, + * shift log contents by (orig_sz - patch_sz) bytes to the left + * starting from after to-be-replaced part of the log + * + * We need to be careful about not overflowing available + * buf_sz capacity. If that's the case, we'll truncate the end + * of the original log, as necessary. + */ + if (patch_sz > orig_sz) { + if (orig + patch_sz >= buf + buf_sz) { + /* patch is big enough to cover remaining space completely */ + patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1; + rem_sz = 0; + } else if (patch_sz - orig_sz > buf_sz - log_sz) { + /* patch causes part of remaining log to be truncated */ + rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz); + } + } + /* shift remaining log to the right by calculated amount */ + memmove(orig + patch_sz, orig + orig_sz, rem_sz); + } + + memcpy(orig, patch, patch_sz); +} + +static void fixup_log_failed_core_relo(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#195896080 + * line2 -> invalid func unknown#195896080 + * line3 -> + * + * "123" is the index of the instruction that was poisoned. We extract + * instruction index to find corresponding CO-RE relocation and + * replace this part of the log with more relevant information about + * failed CO-RE relocation. + */ + const struct bpf_core_relo *relo; + struct bpf_core_spec spec; + char patch[512], spec_buf[256]; + int insn_idx, err; + + if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) + return; + + relo = find_relo_core(prog, insn_idx); + if (!relo) + return; + + err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec); + if (err) + return; + + bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); + snprintf(patch, sizeof(patch), + "%d: \n" + "failed to resolve CO-RE relocation %s\n", + insn_idx, spec_buf); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + +static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) +{ + /* look for familiar error patterns in last N lines of the log */ + const size_t max_last_line_cnt = 10; + char *prev_line, *cur_line, *next_line; + size_t log_sz; + int i; + + if (!buf) + return; + + log_sz = strlen(buf) + 1; + next_line = buf + log_sz - 1; + + for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) { + cur_line = find_prev_line(buf, next_line); + if (!cur_line) + return; + + /* failed CO-RE relocation case */ + if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; + } + } +} + static int bpf_program_record_relos(struct bpf_program *prog) { struct bpf_object *obj = prog->obj; diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 4a9ad0cfb474..ba4453dfd1ed 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -178,9 +178,9 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access * string to specify enumerator's value index that need to be relocated. */ -static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, - const struct bpf_core_relo *relo, - struct bpf_core_spec *spec) +int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + const struct bpf_core_relo *relo, + struct bpf_core_spec *spec) { int access_idx, parsed_len, i; struct bpf_core_accessor *acc; @@ -1054,7 +1054,7 @@ poison: * [] () + => @, * where is a C-syntax view of recorded field access, e.g.: x.a[3].b */ -static int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) +int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec) { const struct btf_type *t; const struct btf_enum *e; diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index a28bf3711ce2..073039d8ca4f 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -84,4 +84,10 @@ int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, int insn_idx, const struct bpf_core_relo *relo, int relo_idx, const struct bpf_core_relo_res *res); +int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, + const struct bpf_core_relo *relo, + struct bpf_core_spec *spec); + +int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec); + #endif -- cgit v1.2.3 From 9af8efc45eb12c3f24a7053f994985ad28b4f29b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 28 Apr 2022 11:53:47 -0700 Subject: libbpf: Allow "incomplete" basic tracing SEC() definitions In a lot of cases the target of kprobe/kretprobe, tracepoint, raw tracepoint, etc BPF program might not be known at the compilation time and will be discovered at runtime. This was always a supported case by libbpf, with APIs like bpf_program__attach_{kprobe,tracepoint,etc}() accepting full target definition, regardless of what was defined in SEC() definition in BPF source code. Unfortunately, up till now libbpf still enforced users to specify at least something for the fake target, e.g., SEC("kprobe/whatever"), which is cumbersome and somewhat misleading. This patch allows target-less SEC() definitions for basic tracing BPF program types: - kprobe/kretprobe; - multi-kprobe/multi-kretprobe; - tracepoints; - raw tracepoints. Such target-less SEC() definitions are meant to specify declaratively proper BPF program type only. Attachment of them will have to be handled programmatically using correct APIs. As such, skeleton's auto-attachment of such BPF programs is skipped and generic bpf_program__attach() will fail, if attempted, due to the lack of enough target information. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20220428185349.3799599-2-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 69 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 18 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 73a5192defb3..e840e48d3a76 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8853,22 +8853,22 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), - SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), - SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), + SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe), SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), - SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), - SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), + SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED), SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX), - SEC_DEF("tracepoint/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("tp/", TRACEPOINT, 0, SEC_NONE, attach_tp), - SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp/", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tracepoint.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("raw_tp.w/", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), + SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), + SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), @@ -10595,6 +10595,12 @@ static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf char *func; int n; + *link = NULL; + + /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */ + if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0) + return 0; + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/"); if (opts.retprobe) func_name = prog->sec_name + sizeof("kretprobe/") - 1; @@ -10625,6 +10631,13 @@ static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, stru char *pattern; int n; + *link = NULL; + + /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */ + if (strcmp(prog->sec_name, "kprobe.multi") == 0 || + strcmp(prog->sec_name, "kretprobe.multi") == 0) + return 0; + opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/"); if (opts.retprobe) spec = prog->sec_name + sizeof("kretprobe.multi/") - 1; @@ -11329,6 +11342,12 @@ static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_lin if (!sec_name) return -ENOMEM; + *link = NULL; + + /* no auto-attach for SEC("tp") or SEC("tracepoint") */ + if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) + return 0; + /* extract "tp//" or "tracepoint//" */ if (str_has_pfx(prog->sec_name, "tp/")) tp_cat = sec_name + sizeof("tp/") - 1; @@ -11380,20 +11399,34 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *pr static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link) { static const char *const prefixes[] = { - "raw_tp/", - "raw_tracepoint/", - "raw_tp.w/", - "raw_tracepoint.w/", + "raw_tp", + "raw_tracepoint", + "raw_tp.w", + "raw_tracepoint.w", }; size_t i; const char *tp_name = NULL; + *link = NULL; + for (i = 0; i < ARRAY_SIZE(prefixes); i++) { - if (str_has_pfx(prog->sec_name, prefixes[i])) { - tp_name = prog->sec_name + strlen(prefixes[i]); - break; - } + size_t pfx_len; + + if (!str_has_pfx(prog->sec_name, prefixes[i])) + continue; + + pfx_len = strlen(prefixes[i]); + /* no auto-attach case of, e.g., SEC("raw_tp") */ + if (prog->sec_name[pfx_len] == '\0') + return 0; + + if (prog->sec_name[pfx_len] != '/') + continue; + + tp_name = prog->sec_name + pfx_len + 1; + break; } + if (!tp_name) { pr_warn("prog '%s': invalid section name '%s'\n", prog->name, prog->sec_name); -- cgit v1.2.3 From cc7d8f2c8ecc003a67d4cf189d04124461524a16 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 28 Apr 2022 11:53:48 -0700 Subject: libbpf: Support target-less SEC() definitions for BTF-backed programs Similar to previous patch, support target-less definitions like SEC("fentry"), SEC("freplace"), etc. For such BTF-backed program types it is expected that user will specify BTF target programmatically at runtime using bpf_program__set_attach_target() *before* load phase. If not, libbpf will report this as an error. Aslo use SEC_ATTACH_BTF flag instead of explicitly listing a set of types that are expected to require attach_btf_id. This was an accidental omission during custom SEC() support refactoring. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20220428185349.3799599-3-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 49 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 17 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e840e48d3a76..c9aa5b1278fc 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6682,17 +6682,32 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; - if (def & SEC_DEPRECATED) + if (def & SEC_DEPRECATED) { pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n", prog->sec_name); + } - if ((prog->type == BPF_PROG_TYPE_TRACING || - prog->type == BPF_PROG_TYPE_LSM || - prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { + if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; const char *attach_name; - attach_name = strchr(prog->sec_name, '/') + 1; + attach_name = strchr(prog->sec_name, '/'); + if (!attach_name) { + /* if BPF program is annotated with just SEC("fentry") + * (or similar) without declaratively specifying + * target, then it is expected that target will be + * specified with bpf_program__set_attach_target() at + * runtime before BPF object load step. If not, then + * there is nothing to load into the kernel as BPF + * verifier won't be able to validate BPF program + * correctness anyways. + */ + pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n", + prog->name); + return -EINVAL; + } + attach_name++; /* skip over / */ + err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id); if (err) return err; @@ -8869,18 +8884,18 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp), - SEC_DEF("tp_btf/", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fmod_ret/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fexit/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("fentry.s/", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fmod_ret.s/", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("fexit.s/", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), - SEC_DEF("freplace/", EXT, 0, SEC_ATTACH_BTF, attach_trace), - SEC_DEF("lsm/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), - SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), - SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), - SEC_DEF("iter.s/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), + SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), + SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), + SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), + SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter), SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), -- cgit v1.2.3 From b198881d4b4c22c499168421b44eff3913a22fb1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 27 Apr 2022 21:15:20 -0700 Subject: libbpf: Append "..." in fixed up log if CO-RE spec is truncated Detect CO-RE spec truncation and append "..." to make user aware that there was supposed to be more of the spec there. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220428041523.4089853-2-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index c9aa5b1278fc..e4d2df8dffad 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6962,7 +6962,7 @@ static void fixup_log_failed_core_relo(struct bpf_program *prog, const struct bpf_core_relo *relo; struct bpf_core_spec spec; char patch[512], spec_buf[256]; - int insn_idx, err; + int insn_idx, err, spec_len; if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1) return; @@ -6975,11 +6975,11 @@ static void fixup_log_failed_core_relo(struct bpf_program *prog, if (err) return; - bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); + spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec); snprintf(patch, sizeof(patch), "%d: \n" - "failed to resolve CO-RE relocation %s\n", - insn_idx, spec_buf); + "failed to resolve CO-RE relocation %s%s\n", + insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : ""); patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); } -- cgit v1.2.3 From 69721203b1f3f9d123ae0f81bbf41f9a85185859 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 27 Apr 2022 21:15:21 -0700 Subject: libbpf: Use libbpf_mem_ensure() when allocating new map Reuse libbpf_mem_ensure() when adding a new map to the list of maps inside bpf_object. It takes care of proper resizing and reallocating of map array and zeroing out newly allocated memory. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220428041523.4089853-3-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e4d2df8dffad..47284586836a 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1433,36 +1433,19 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) { - struct bpf_map *new_maps; - size_t new_cap; - int i; - - if (obj->nr_maps < obj->maps_cap) - return &obj->maps[obj->nr_maps++]; - - new_cap = max((size_t)4, obj->maps_cap * 3 / 2); - new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps)); - if (!new_maps) { - pr_warn("alloc maps for object failed\n"); - return ERR_PTR(-ENOMEM); - } + struct bpf_map *map; + int err; - obj->maps_cap = new_cap; - obj->maps = new_maps; + err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap, + sizeof(*obj->maps), obj->nr_maps + 1); + if (err) + return ERR_PTR(err); - /* zero out new maps */ - memset(obj->maps + obj->nr_maps, 0, - (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps)); - /* - * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin) - * when failure (zclose won't close negative fd)). - */ - for (i = obj->nr_maps; i < obj->maps_cap; i++) { - obj->maps[i].fd = -1; - obj->maps[i].inner_map_fd = -1; - } + map = &obj->maps[obj->nr_maps++]; + map->fd = -1; + map->inner_map_fd = -1; - return &obj->maps[obj->nr_maps++]; + return map; } static size_t bpf_map_mmap_sz(const struct bpf_map *map) -- cgit v1.2.3 From ec41817b4af5114825621fe9b31cb861480f6cd7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 27 Apr 2022 21:15:22 -0700 Subject: libbpf: Allow to opt-out from creating BPF maps Add bpf_map__set_autocreate() API that allows user to opt-out from libbpf automatically creating BPF map during BPF object load. This is a useful feature when building CO-RE-enabled BPF application that takes advantage of some new-ish BPF map type (e.g., socket-local storage) if kernel supports it, but otherwise uses some alternative way (e.g., extra HASH map). In such case, being able to disable the creation of a map that kernel doesn't support allows to successfully create and load BPF object file with all its other maps and programs. It's still up to user to make sure that no "live" code in any of their BPF programs are referencing such map instance, which can be achieved by guarding such code with CO-RE relocation check or by using .rodata global variables. If user fails to properly guard such code to turn it into "dead code", libbpf will helpfully post-process BPF verifier log and will provide more meaningful error and map name that needs to be guarded properly. As such, instead of: ; value = bpf_map_lookup_elem(&missing_map, &zero); 4: (85) call unknown#2001000000 invalid func unknown#2001000000 ... user will see: ; value = bpf_map_lookup_elem(&missing_map, &zero); 4: BPF map 'missing_map' is referenced but wasn't created Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220428041523.4089853-4-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 124 +++++++++++++++++++++++++++++++++++++++++------ tools/lib/bpf/libbpf.h | 22 +++++++++ tools/lib/bpf/libbpf.map | 4 +- 3 files changed, 133 insertions(+), 17 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 47284586836a..63c0f412266c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -357,6 +357,7 @@ enum libbpf_map_type { }; struct bpf_map { + struct bpf_object *obj; char *name; /* real_name is defined for special internal maps (.rodata*, * .data*, .bss, .kconfig) and preserves their original ELF section @@ -386,7 +387,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; - bool skipped; + bool autocreate; __u64 map_extra; }; @@ -1442,8 +1443,10 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return ERR_PTR(err); map = &obj->maps[obj->nr_maps++]; + map->obj = obj; map->fd = -1; map->inner_map_fd = -1; + map->autocreate = true; return map; } @@ -4307,6 +4310,20 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) return 0; } +bool bpf_map__autocreate(const struct bpf_map *map) +{ + return map->autocreate; +} + +int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) +{ + if (map->obj->loaded) + return libbpf_err(-EBUSY); + + map->autocreate = autocreate; + return 0; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; @@ -5163,9 +5180,11 @@ bpf_object__create_maps(struct bpf_object *obj) * bpf_object loading will succeed just fine even on old * kernels. */ - if (bpf_map__is_internal(map) && - !kernel_supports(obj, FEAT_GLOBAL_DATA)) { - map->skipped = true; + if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA)) + map->autocreate = false; + + if (!map->autocreate) { + pr_debug("map '%s': skipped auto-creating...\n", map->name); continue; } @@ -5788,6 +5807,36 @@ out: return err; } +/* base map load ldimm64 special constant, used also for log fixup logic */ +#define MAP_LDIMM64_POISON_BASE 2001000000 +#define MAP_LDIMM64_POISON_PFX "200100" + +static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx, + int insn_idx, struct bpf_insn *insn, + int map_idx, const struct bpf_map *map) +{ + int i; + + pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n", + prog->name, relo_idx, insn_idx, map_idx, map->name); + + /* we turn single ldimm64 into two identical invalid calls */ + for (i = 0; i < 2; i++) { + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with something like: + * invalid func unknown#2001000123 + * where lower 123 is map index into obj->maps[] array + */ + insn->imm = MAP_LDIMM64_POISON_BASE + map_idx; + + insn++; + } +} + /* Relocate data references within program code: * - map references; * - global variable references; @@ -5801,33 +5850,35 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + const struct bpf_map *map; struct extern_desc *ext; switch (relo->type) { case RELO_LD64: + map = &obj->maps[relo->map_idx]; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX; insn[0].imm = relo->map_idx; - } else { + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_FD; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_DATA: + map = &obj->maps[relo->map_idx]; insn[1].imm = insn[0].imm + relo->sym_off; if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; - } else { - const struct bpf_map *map = &obj->maps[relo->map_idx]; - - if (map->skipped) { - pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", - prog->name, i); - return -ENOTSUP; - } + } else if (map->autocreate) { insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[relo->map_idx].fd; + insn[0].imm = map->fd; + } else { + poison_map_ldimm64(prog, i, relo->insn_idx, insn, + relo->map_idx, map); } break; case RELO_EXTERN_VAR: @@ -6967,6 +7018,39 @@ static void fixup_log_failed_core_relo(struct bpf_program *prog, patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); } +static void fixup_log_missing_map_load(struct bpf_program *prog, + char *buf, size_t buf_sz, size_t log_sz, + char *line1, char *line2, char *line3) +{ + /* Expected log for failed and not properly guarded CO-RE relocation: + * line1 -> 123: (85) call unknown#2001000345 + * line2 -> invalid func unknown#2001000345 + * line3 -> + * + * "123" is the index of the instruction that was poisoned. + * "345" in "2001000345" are map index in obj->maps to fetch map name. + */ + struct bpf_object *obj = prog->obj; + const struct bpf_map *map; + int insn_idx, map_idx; + char patch[128]; + + if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2) + return; + + map_idx -= MAP_LDIMM64_POISON_BASE; + if (map_idx < 0 || map_idx >= obj->nr_maps) + return; + map = &obj->maps[map_idx]; + + snprintf(patch, sizeof(patch), + "%d: \n" + "BPF map '%s' is referenced but wasn't created\n", + insn_idx, map->name); + + patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch); +} + static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz) { /* look for familiar error patterns in last N lines of the log */ @@ -6995,6 +7079,14 @@ static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_s fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz, prev_line, cur_line, next_line); return; + } else if (str_has_pfx(cur_line, "invalid func unknown#"MAP_LDIMM64_POISON_PFX)) { + prev_line = find_prev_line(buf, cur_line); + if (!prev_line) + continue; + + fixup_log_missing_map_load(prog, buf, buf_sz, log_sz, + prev_line, cur_line, next_line); + return; } } } @@ -8183,7 +8275,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) char *pin_path = NULL; char buf[PATH_MAX]; - if (map->skipped) + if (!map->autocreate) continue; if (path) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index cdbfee60ea3e..114b1f6f73a5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -866,6 +866,28 @@ struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object LIBBPF_API struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map); +/** + * @brief **bpf_map__set_autocreate()** sets whether libbpf has to auto-create + * BPF map during BPF object load phase. + * @param map the BPF map instance + * @param autocreate whether to create BPF map during BPF object load + * @return 0 on success; -EBUSY if BPF object was already loaded + * + * **bpf_map__set_autocreate()** allows to opt-out from libbpf auto-creating + * BPF map. By default, libbpf will attempt to create every single BPF map + * defined in BPF object file using BPF_MAP_CREATE command of bpf() syscall + * and fill in map FD in BPF instructions. + * + * This API allows to opt-out of this process for specific map instance. This + * can be useful if host kernel doesn't support such BPF map type or used + * combination of flags and user application wants to avoid creating such + * a map in the first place. User is still responsible to make sure that their + * BPF-side code that expects to use such missing BPF map is recognized by BPF + * verifier as dead code, otherwise BPF verifier will reject such BPF program. + */ +LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate); +LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); + /** * @brief **bpf_map__fd()** gets the file descriptor of the passed * BPF map diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 82f6d62176dd..b5bc84039407 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -442,10 +442,12 @@ LIBBPF_0.7.0 { LIBBPF_0.8.0 { global: + bpf_map__autocreate; + bpf_map__set_autocreate; bpf_object__destroy_subskeleton; bpf_object__open_subskeleton; + bpf_program__attach_kprobe_multi_opts; bpf_program__attach_usdt; libbpf_register_prog_handler; libbpf_unregister_prog_handler; - bpf_program__attach_kprobe_multi_opts; } LIBBPF_0.7.0; -- cgit v1.2.3 From 33cd6928039c6bf18cf0baec936924d908e6c89b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 2 May 2022 21:17:53 -0700 Subject: perf evlist: Clear all_cpus before propagating all_cpus is merged into during propagation. Initially all_cpus is set from PMU sysfs. perf_evlist__set_maps() will recompute it and change evsel->cpus to user_requested_cpus if they are given. If all_cpus isn't cleared then the union of the user_requested_cpus and PMU sysfs values is set to all_cpus, whereas just user_requested_cpus is necessary. To avoid this make all_cpus empty prior to propagation. Reviewed-by: Adrian Hunter Signed-off-by: Ian Rogers Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Alexey Bayduraev Cc: Andi Kleen Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: German Gomez Cc: James Clark Cc: Jiri Olsa Cc: John Fastabend Cc: John Garry Cc: KP Singh Cc: Kajol Jain Cc: Leo Yan Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Song Liu Cc: Stephane Eranian Cc: Suzuki Poulouse Cc: Will Deacon Cc: Yonghong Song Link: http://lore.kernel.org/lkml/20220503041757.2365696-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index a09315538a30..974b4585f93e 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -59,6 +59,10 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { struct perf_evsel *evsel; + /* Recomputing all_cpus, so start with a blank slate. */ + perf_cpu_map__put(evlist->all_cpus); + evlist->all_cpus = NULL; + perf_evlist__for_each_evsel(evlist, evsel) __perf_evlist__propagate_maps(evlist, evsel); } -- cgit v1.2.3 From 8e2f618e8be66f74e6e088281ca72deb5c87cf04 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 8 May 2022 17:41:41 -0700 Subject: libbpf: Make __kptr and __kptr_ref unconditionally use btf_type_tag() attr It will be annoying and surprising for users of __kptr and __kptr_ref if libbpf silently ignores them just because Clang used for compilation didn't support btf_type_tag(). It's much better to get clear compiler error than debug BPF verifier failures later on. Fixes: ef89654f2bc7 ("libbpf: Add kptr type tag macros to bpf_helpers.h") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220509004148.1801791-3-andrii@kernel.org --- tools/lib/bpf/bpf_helpers.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 5de3eb267125..bbae9a057bc8 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -149,13 +149,8 @@ enum libbpf_tristate { #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) -#if __has_attribute(btf_type_tag) #define __kptr __attribute__((btf_type_tag("kptr"))) #define __kptr_ref __attribute__((btf_type_tag("kptr_ref"))) -#else -#define __kptr -#define __kptr_ref -#endif #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b -- cgit v1.2.3 From 73d0280f6b79c936770698250549ba9f62682a45 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 8 May 2022 17:41:42 -0700 Subject: libbpf: Improve usability of field-based CO-RE helpers Allow to specify field reference in two ways: - if user has variable of necessary type, they can use variable-based reference (my_var.my_field or my_var_ptr->my_field). This was the only supported syntax up till now. - now, bpf_core_field_exists() and bpf_core_field_size() support also specifying field in a fashion similar to offsetof() macro, by specifying type of the containing struct/union separately and field name separately: bpf_core_field_exists(struct my_type, my_field). This forms is quite often more convenient in practice and it matches type-based CO-RE helpers that support specifying type by its name without requiring any variables. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220509004148.1801791-4-andrii@kernel.org --- tools/lib/bpf/bpf_core_read.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index e4aa9996a550..5ad415f9051f 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -110,21 +110,38 @@ enum bpf_enum_value_kind { val; \ }) +#define ___bpf_field_ref1(field) (field) +#define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field) +#define ___bpf_field_ref(args...) \ + ___bpf_apply(___bpf_field_ref, ___bpf_narg(args))(args) + /* * Convenience macro to check that field actually exists in target kernel's. * Returns: * 1, if matching field is present in target kernel; * 0, if no matching field found. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_exists(p->my_field); + * - field reference through type and field names: + * bpf_core_field_exists(struct my_type, my_field). */ -#define bpf_core_field_exists(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) +#define bpf_core_field_exists(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_EXISTS) /* * Convenience macro to get the byte size of a field. Works for integers, * struct/unions, pointers, arrays, and enums. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_size(p->my_field); + * - field reference through type and field names: + * bpf_core_field_size(struct my_type, my_field). */ -#define bpf_core_field_size(field) \ - __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) +#define bpf_core_field_size(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE) /* * Convenience macro to get BTF type ID of a specified type, using a local BTF -- cgit v1.2.3 From 7715f549a9d80a82428a7925fa4a00518c53c35c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 8 May 2022 17:41:44 -0700 Subject: libbpf: Complete field-based CO-RE helpers with field offset helper Add bpf_core_field_offset() helper to complete field-based CO-RE helpers. This helper can be useful for feature-detection and for some more advanced cases of field reading (e.g., reading flexible array members). Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220509004148.1801791-6-andrii@kernel.org --- tools/lib/bpf/bpf_core_read.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 5ad415f9051f..fd48b1ff59ca 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -143,6 +143,18 @@ enum bpf_enum_value_kind { #define bpf_core_field_size(field...) \ __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_SIZE) +/* + * Convenience macro to get field's byte offset. + * + * Supports two forms: + * - field reference through variable access: + * bpf_core_field_offset(p->my_field); + * - field reference through type and field names: + * bpf_core_field_offset(struct my_type, my_field). + */ +#define bpf_core_field_offset(field...) \ + __builtin_preserve_field_info(___bpf_field_ref(field), BPF_FIELD_BYTE_OFFSET) + /* * Convenience macro to get BTF type ID of a specified type, using a local BTF * information. Return 32-bit unsigned integer with type ID from program's own -- cgit v1.2.3 From f760d0537925e2973ed3adc2e590aa2968d0e8dc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 8 May 2022 17:41:46 -0700 Subject: libbpf: Provide barrier() and barrier_var() in bpf_helpers.h Add barrier() and barrier_var() macros into bpf_helpers.h to be used by end users. While a bit advanced and specialized instruments, they are sometimes indispensable. Instead of requiring each user to figure out exact asm volatile incantations for themselves, provide them from bpf_helpers.h. Also remove conflicting definitions from selftests. Some tests rely on barrier_var() definition being nothing, those will still work as libbpf does the #ifndef/#endif guarding for barrier() and barrier_var(), allowing users to redefine them, if necessary. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220509004148.1801791-8-andrii@kernel.org --- tools/lib/bpf/bpf_helpers.h | 24 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/exhandler_kern.c | 2 -- tools/testing/selftests/bpf/progs/loop5.c | 1 - tools/testing/selftests/bpf/progs/profiler1.c | 1 - tools/testing/selftests/bpf/progs/pyperf.h | 2 -- .../testing/selftests/bpf/progs/test_pkt_access.c | 2 -- 6 files changed, 24 insertions(+), 8 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index bbae9a057bc8..fb04eaf367f1 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -75,6 +75,30 @@ }) #endif +/* + * Compiler (optimization) barrier. + */ +#ifndef barrier +#define barrier() asm volatile("" ::: "memory") +#endif + +/* Variable-specific compiler (optimization) barrier. It's a no-op which makes + * compiler believe that there is some black box modification of a given + * variable and thus prevents compiler from making extra assumption about its + * value and potential simplifications and optimizations on this variable. + * + * E.g., compiler might often delay or even omit 32-bit to 64-bit casting of + * a variable, making some code patterns unverifiable. Putting barrier_var() + * in place will ensure that cast is performed before the barrier_var() + * invocation, because compiler has to pessimistically assume that embedded + * asm section might perform some extra operations on that variable. + * + * This is a variable-specific variant of more global barrier(). + */ +#ifndef barrier_var +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) +#endif + /* * Helper macro to throw a compilation error if __bpf_unreachable() gets * built into the resulting code. This works given BPF back end does not diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c index dd9b30a0f0fc..20d009e2d266 100644 --- a/tools/testing/selftests/bpf/progs/exhandler_kern.c +++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c @@ -7,8 +7,6 @@ #include #include -#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) - char _license[] SEC("license") = "GPL"; unsigned int exception_triggered; diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c index 913791923fa3..1b13f37f85ec 100644 --- a/tools/testing/selftests/bpf/progs/loop5.c +++ b/tools/testing/selftests/bpf/progs/loop5.c @@ -2,7 +2,6 @@ // Copyright (c) 2019 Facebook #include #include -#define barrier() __asm__ __volatile__("": : :"memory") char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c index 4df9088bfc00..fb6b13522949 100644 --- a/tools/testing/selftests/bpf/progs/profiler1.c +++ b/tools/testing/selftests/bpf/progs/profiler1.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) #define UNROLL #define INLINE __always_inline #include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 5d3dc4d66d47..6c7b1fb268d6 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -171,8 +171,6 @@ struct process_frame_ctx { bool done; }; -#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) - static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) { int zero = 0; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 0558544e1ff0..5cd7c096f62d 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -14,8 +14,6 @@ #include #include -#define barrier() __asm__ __volatile__("": : :"memory") - /* llvm will optimize both subprograms into exactly the same BPF assembly * * Disassembly of section .text: -- cgit v1.2.3 From 0087a681fa8c22f719a567317e8f8f894d734b9c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 8 May 2022 17:41:47 -0700 Subject: libbpf: Automatically fix up BPF_MAP_TYPE_RINGBUF size, if necessary Kernel imposes a pretty particular restriction on ringbuf map size. It has to be a power-of-2 multiple of page size. While generally this isn't hard for user to satisfy, sometimes it's impossible to do this declaratively in BPF source code or just plain inconvenient to do at runtime. One such example might be BPF libraries that are supposed to work on different architectures, which might not agree on what the common page size is. Let libbpf find the right size for user instead, if it turns out to not satisfy kernel requirements. If user didn't set size at all, that's most probably a mistake so don't upsize such zero size to one full page, though. Also we need to be careful about not overflowing __u32 max_entries. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220509004148.1801791-9-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 63c0f412266c..15117b9a4d1e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4943,6 +4943,44 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); +static bool is_pow_of_2(size_t x) +{ + return x && (x & (x - 1)); +} + +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 i, mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (i = 0, mul = 1; ; i++, mul <<= 1) { + if (mul > UINT_MAX / page_sz) /* prevent __u32 overflow */ + break; + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); @@ -4981,6 +5019,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } switch (def->type) { + case BPF_MAP_TYPE_RINGBUF: + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + /* fallthrough */ case BPF_MAP_TYPE_PERF_EVENT_ARRAY: case BPF_MAP_TYPE_CGROUP_ARRAY: case BPF_MAP_TYPE_STACK_TRACE: @@ -4994,7 +5035,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_QUEUE: case BPF_MAP_TYPE_STACK: - case BPF_MAP_TYPE_RINGBUF: create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; -- cgit v1.2.3 From 00632610c2f0b732ae87b8c7e7e1375abaeb01a0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 May 2022 15:25:40 +0300 Subject: libperf evsel: Add perf_evsel__enable_thread() Add perf_evsel__enable_thread() as a counterpart to perf_evsel__enable_cpu(), to enable all events for a thread. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20220506122601.367589-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evsel.c | 15 +++++++++++++++ tools/lib/perf/include/perf/evsel.h | 1 + 2 files changed, 16 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 20ae9f5f8b30..c1d58673f6ef 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -360,6 +360,21 @@ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx) return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx); } +int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread) +{ + struct perf_cpu cpu __maybe_unused; + int idx; + int err; + + perf_cpu_map__for_each_cpu(cpu, idx, evsel->cpus) { + err = perf_evsel__ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, idx, thread); + if (err) + return err; + } + + return 0; +} + int perf_evsel__enable(struct perf_evsel *evsel) { int i; diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index 2a9516b42d15..699c0ed97d34 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -36,6 +36,7 @@ LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); +LIBPERF_API int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread); LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); -- cgit v1.2.3 From 6a7b8a5a30e60e27cd2489af3d0a441280b441e6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 May 2022 15:25:44 +0300 Subject: libperf evlist: Remove ->idx() per_cpu parameter Remove ->idx() per_cpu parameter because it isn't needed. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20220506122601.367589-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 4 ++-- tools/lib/perf/include/internal/evlist.h | 2 +- tools/perf/util/evlist.c | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 974b4585f93e..5e8ad854fa8a 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -521,7 +521,7 @@ mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int output_overwrite = -1; if (ops->idx) - ops->idx(evlist, mp, thread, false); + ops->idx(evlist, mp, thread); if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, &output, &output_overwrite)) @@ -548,7 +548,7 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int output_overwrite = -1; if (ops->idx) - ops->idx(evlist, mp, cpu, true); + ops->idx(evlist, mp, cpu); for (thread = 0; thread < nr_threads; thread++) { if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index e3e64f37db7b..0d5c830431a7 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -38,7 +38,7 @@ struct perf_evlist { }; typedef void -(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); +(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int); typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); typedef int diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f1309b39afe4..09a1d3400fd9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -748,10 +748,11 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, static void perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, struct perf_mmap_param *_mp, - int idx, bool per_cpu) + int idx) { struct evlist *evlist = container_of(_evlist, struct evlist, core); struct mmap_params *mp = container_of(_mp, struct mmap_params, core); + bool per_cpu = !perf_cpu_map__empty(_evlist->user_requested_cpus); auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu); } -- cgit v1.2.3 From d8fe2efb65acdc213eb180b7853fc1121c1bff37 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 May 2022 15:25:45 +0300 Subject: libperf evlist: Move ->idx() into mmap_per_evsel() Move ->idx() into mmap_per_evsel() in preparation for adding evsel as a parameter. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20220506122601.367589-8-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 5e8ad854fa8a..4fce417432aa 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -478,6 +478,9 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, */ refcount_set(&map->refcnt, 2); + if (ops->idx) + ops->idx(evlist, mp, idx); + if (ops->mmap(map, mp, *output, evlist_cpu) < 0) return -1; @@ -520,9 +523,6 @@ mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int output = -1; int output_overwrite = -1; - if (ops->idx) - ops->idx(evlist, mp, thread); - if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, &output, &output_overwrite)) goto out_unmap; @@ -547,9 +547,6 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int output = -1; int output_overwrite = -1; - if (ops->idx) - ops->idx(evlist, mp, cpu); - for (thread = 0; thread < nr_threads; thread++) { if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, thread, &output, &output_overwrite)) -- cgit v1.2.3 From 8f111be6434de90c9743ea522c32b384d203a8de Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 6 May 2022 15:25:46 +0300 Subject: libperf evlist: Add evsel as a parameter to ->idx() Add evsel as a parameter to ->idx() in preparation for correctly determining whether an auxtrace mmap is needed. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20220506122601.367589-9-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 2 +- tools/lib/perf/include/internal/evlist.h | 3 ++- tools/perf/util/evlist.c | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 4fce417432aa..ed66f2e38464 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -479,7 +479,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, refcount_set(&map->refcnt, 2); if (ops->idx) - ops->idx(evlist, mp, idx); + ops->idx(evlist, evsel, mp, idx); if (ops->mmap(map, mp, *output, evlist_cpu) < 0) return -1; diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 0d5c830431a7..6f89aec3e608 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -38,7 +38,8 @@ struct perf_evlist { }; typedef void -(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int); +(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_evsel*, + struct perf_mmap_param*, int); typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); typedef int diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 09a1d3400fd9..7ae56b062f44 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -747,6 +747,7 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, static void perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, + struct perf_evsel *_evsel __maybe_unused, struct perf_mmap_param *_mp, int idx) { -- cgit v1.2.3 From 129b9c5ee2c18c3e36ec289140b5149f301118d1 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Tue, 10 May 2022 13:59:22 -0700 Subject: libbpf: Assign cookies to links in libbpf. Add a cookie field to the attributes of bpf_link_create(). Add bpf_program__attach_trace_opts() to attach a cookie to a link. Signed-off-by: Kui-Feng Lee Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220510205923.3206889-5-kuifeng@fb.com --- tools/lib/bpf/bpf.c | 8 ++++++++ tools/lib/bpf/bpf.h | 3 +++ tools/lib/bpf/libbpf.c | 20 ++++++++++++++++---- tools/lib/bpf/libbpf.h | 12 ++++++++++++ tools/lib/bpf/libbpf.map | 1 + 5 files changed, 40 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a9d292c106c2..5660268e103f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -863,6 +863,14 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, kprobe_multi)) return libbpf_err(-EINVAL); break; + case BPF_TRACE_FENTRY: + case BPF_TRACE_FEXIT: + case BPF_MODIFY_RETURN: + case BPF_LSM_MAC: + attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0); + if (!OPTS_ZEROED(opts, tracing)) + return libbpf_err(-EINVAL); + break; default: if (!OPTS_ZEROED(opts, flags)) return libbpf_err(-EINVAL); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index f4b4afb6d4ba..34af2232928c 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -420,6 +420,9 @@ struct bpf_link_create_opts { const unsigned long *addrs; const __u64 *cookies; } kprobe_multi; + struct { + __u64 cookie; + } tracing; }; size_t :0; }; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 15117b9a4d1e..a5904c0ac794 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11568,12 +11568,17 @@ static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf } /* Common logic for all BPF program types that attach to a btf_id */ -static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog) +static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) { + LIBBPF_OPTS(bpf_link_create_opts, link_opts); char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, pfd; + if (!OPTS_VALID(opts, bpf_trace_opts)) + return libbpf_err_ptr(-EINVAL); + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); @@ -11586,7 +11591,8 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro link->detach = &bpf_link__detach_fd; /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */ - pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), NULL); + link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0); + pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts); if (pfd < 0) { pfd = -errno; free(link); @@ -11600,12 +11606,18 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); +} + +struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog, + const struct bpf_trace_opts *opts) +{ + return bpf_program__attach_btf_id(prog, opts); } struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog) { - return bpf_program__attach_btf_id(prog); + return bpf_program__attach_btf_id(prog, NULL); } static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 114b1f6f73a5..a1fb91810378 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -603,8 +603,20 @@ bpf_program__attach_tracepoint_opts(const struct bpf_program *prog, LIBBPF_API struct bpf_link * bpf_program__attach_raw_tracepoint(const struct bpf_program *prog, const char *tp_name); + +struct bpf_trace_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + /* custom user-provided value fetchable through bpf_get_attach_cookie() */ + __u64 cookie; +}; +#define bpf_trace_opts__last_field cookie + LIBBPF_API struct bpf_link * bpf_program__attach_trace(const struct bpf_program *prog); +LIBBPF_API struct bpf_link * +bpf_program__attach_trace_opts(const struct bpf_program *prog, const struct bpf_trace_opts *opts); + LIBBPF_API struct bpf_link * bpf_program__attach_lsm(const struct bpf_program *prog); LIBBPF_API struct bpf_link * diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index b5bc84039407..80819e26a976 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -447,6 +447,7 @@ LIBBPF_0.8.0 { bpf_object__destroy_subskeleton; bpf_object__open_subskeleton; bpf_program__attach_kprobe_multi_opts; + bpf_program__attach_trace_opts; bpf_program__attach_usdt; libbpf_register_prog_handler; libbpf_unregister_prog_handler; -- cgit v1.2.3 From 5eefe17c7ae41bac4d2d281669e8357a10f4d5a4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 10 May 2022 11:51:59 -0700 Subject: libbpf: Clean up ringbuf size adjustment implementation Drop unused iteration variable, move overflow prevention check into the for loop. Fixes: 0087a681fa8c ("libbpf: Automatically fix up BPF_MAP_TYPE_RINGBUF size, if necessary") Reported-by: Nathan Chancellor Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220510185159.754299-1-andrii@kernel.org --- tools/lib/bpf/libbpf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a5904c0ac794..0e74fae25896 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4951,7 +4951,7 @@ static bool is_pow_of_2(size_t x) static size_t adjust_ringbuf_sz(size_t sz) { __u32 page_sz = sysconf(_SC_PAGE_SIZE); - __u32 i, mul; + __u32 mul; /* if user forgot to set any size, make sure they see error */ if (sz == 0) @@ -4967,9 +4967,7 @@ static size_t adjust_ringbuf_sz(size_t sz) * user-set size to satisfy both user size request and kernel * requirements and substitute correct max_entries for map creation. */ - for (i = 0, mul = 1; ; i++, mul <<= 1) { - if (mul > UINT_MAX / page_sz) /* prevent __u32 overflow */ - break; + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { if (mul * page_sz > sz) return mul * page_sz; } -- cgit v1.2.3 From b63b3c490eeeedd324e194929bd0aa8ba553f875 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 10 May 2022 09:46:57 +0200 Subject: libbpf: Add bpf_program__set_insns function Adding bpf_program__set_insns that allows to set new instructions for a BPF program. This is a very advanced libbpf API and users need to know what they are doing. This should be used from prog_prepare_load_fn callback only. We can have changed instructions after calling prog_prepare_load_fn callback, reloading them. One of the users of this new API will be perf's internal BPF prologue generation. Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220510074659.2557731-2-jolsa@kernel.org --- tools/lib/bpf/libbpf.c | 22 ++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 18 ++++++++++++++++++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 41 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 0e74fae25896..4867a930628b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6860,6 +6860,8 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog prog->name, err); return err; } + insns = prog->insns; + insns_cnt = prog->insns_cnt; } if (obj->gen_loader) { @@ -8788,6 +8790,26 @@ size_t bpf_program__insn_cnt(const struct bpf_program *prog) return prog->insns_cnt; } +int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt) +{ + struct bpf_insn *insns; + + if (prog->obj->loaded) + return -EBUSY; + + insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", prog->name); + return -ENOMEM; + } + memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); + + prog->insns = insns; + prog->insns_cnt = new_insn_cnt; + return 0; +} + int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index a1fb91810378..21984dcd6dbe 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -323,6 +323,24 @@ struct bpf_insn; * different. */ LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog); + +/** + * @brief **bpf_program__set_insns()** can set BPF program's underlying + * BPF instructions. + * + * WARNING: This is a very advanced libbpf API and users need to know + * what they are doing. This should be used from prog_prepare_load_fn + * callback only. + * + * @param prog BPF program for which to return instructions + * @param new_insns a pointer to an array of BPF instructions + * @param new_insn_cnt number of `struct bpf_insn`'s that form + * specified BPF program + * @return 0, on success; negative error code, otherwise + */ +LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog, + struct bpf_insn *new_insns, size_t new_insn_cnt); + /** * @brief **bpf_program__insn_cnt()** returns number of `struct bpf_insn`'s * that form specified BPF program. diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 80819e26a976..008da8db1d94 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -449,6 +449,7 @@ LIBBPF_0.8.0 { bpf_program__attach_kprobe_multi_opts; bpf_program__attach_trace_opts; bpf_program__attach_usdt; + bpf_program__set_insns; libbpf_register_prog_handler; libbpf_unregister_prog_handler; } LIBBPF_0.7.0; -- cgit v1.2.3 From 737d0646a83cdc65c070a9de61a1ef106cca5ff1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 12 May 2022 15:07:12 -0700 Subject: libbpf: Add safer high-level wrappers for map operations Add high-level API wrappers for most common and typical BPF map operations that works directly on instances of struct bpf_map * (so you don't have to call bpf_map__fd()) and validate key/value size expectations. These helpers require users to specify key (and value, where appropriate) sizes when performing lookup/update/delete/etc. This forces user to actually think and validate (for themselves) those. This is a good thing as user is expected by kernel to implicitly provide correct key/value buffer sizes and kernel will just read/write necessary amount of data. If it so happens that user doesn't set up buffers correctly (which bit people for per-CPU maps especially) kernel either randomly overwrites stack data or return -EFAULT, depending on user's luck and circumstances. These high-level APIs are meant to prevent such unpleasant and hard to debug bugs. This patch also adds bpf_map_delete_elem_flags() low-level API and requires passing flags to bpf_map__delete_elem() API for consistency across all similar APIs, even though currently kernel doesn't expect any extra flags for BPF_MAP_DELETE_ELEM operation. List of map operations that get these high-level APIs: - bpf_map_lookup_elem; - bpf_map_update_elem; - bpf_map_delete_elem; - bpf_map_lookup_and_delete_elem; - bpf_map_get_next_key. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20220512220713.2617964-1-andrii@kernel.org --- tools/lib/bpf/bpf.c | 14 +++++++ tools/lib/bpf/bpf.h | 1 + tools/lib/bpf/libbpf.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 104 +++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.map | 6 +++ 5 files changed, 229 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5660268e103f..4677644d80f4 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -639,6 +639,20 @@ int bpf_map_delete_elem(int fd, const void *key) return libbpf_err_errno(ret); } +int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags) +{ + union bpf_attr attr; + int ret; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.flags = flags; + + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + return libbpf_err_errno(ret); +} + int bpf_map_get_next_key(int fd, const void *key, void *next_key) { union bpf_attr attr; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 34af2232928c..2e0d3731e4c0 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -244,6 +244,7 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); +LIBBPF_API int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4867a930628b..9aae886cbabf 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9949,6 +9949,110 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) return libbpf_err_ptr(-ENOTSUP); } +static int validate_map_op(const struct bpf_map *map, size_t key_sz, + size_t value_sz, bool check_value_sz) +{ + if (map->fd <= 0) + return -ENOENT; + + if (map->def.key_size != key_sz) { + pr_warn("map '%s': unexpected key size %zu provided, expected %u\n", + map->name, key_sz, map->def.key_size); + return -EINVAL; + } + + if (!check_value_sz) + return 0; + + switch (map->def.type) { + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: { + int num_cpu = libbpf_num_possible_cpus(); + size_t elem_sz = roundup(map->def.value_size, 8); + + if (value_sz != num_cpu * elem_sz) { + pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", + map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); + return -EINVAL; + } + break; + } + default: + if (map->def.value_size != value_sz) { + pr_warn("map '%s': unexpected value size %zu provided, expected %u\n", + map->name, value_sz, map->def.value_size); + return -EINVAL; + } + break; + } + return 0; +} + +int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_lookup_elem_flags(map->fd, key, value, flags); +} + +int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_update_elem(map->fd, key, value, flags); +} + +int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) + return libbpf_err(err); + + return bpf_map_delete_elem_flags(map->fd, key, flags); +} + +int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags) +{ + int err; + + err = validate_map_op(map, key_sz, value_sz, true); + if (err) + return libbpf_err(err); + + return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags); +} + +int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz) +{ + int err; + + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + if (err) + return libbpf_err(err); + + return bpf_map_get_next_key(map->fd, cur_key, next_key); +} + long libbpf_get_error(const void *ptr) { if (!IS_ERR_OR_NULL(ptr)) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 21984dcd6dbe..9e9a3fd3edd8 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -990,6 +990,110 @@ LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); +/** + * @brief **bpf_map__lookup_elem()** allows to lookup BPF map value + * corresponding to provided key. + * @param map BPF map to lookup element in + * @param key pointer to memory containing bytes of the key used for lookup + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory in which looked up value will be stored + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__lookup_elem()** is high-level equivalent of + * **bpf_map_lookup_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__update_elem()** allows to insert or update value in BPF + * map that corresponds to provided key. + * @param map BPF map to insert to or update element in + * @param key pointer to memory containing bytes of the key + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory containing bytes of the value + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__update_elem()** is high-level equivalent of + * **bpf_map_update_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + const void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__delete_elem()** allows to delete element in BPF map that + * corresponds to provided key. + * @param map BPF map to delete element from + * @param key pointer to memory containing bytes of the key + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__delete_elem()** is high-level equivalent of + * **bpf_map_delete_elem()** API with added check for key size. + */ +LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, __u64 flags); + +/** + * @brief **bpf_map__lookup_and_delete_elem()** allows to lookup BPF map value + * corresponding to provided key and atomically delete it afterwards. + * @param map BPF map to lookup element in + * @param key pointer to memory containing bytes of the key used for lookup + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @param value pointer to memory in which looked up value will be stored + * @param value_sz size in byte of value data memory; it has to match BPF map + * definition's **value_size**. For per-CPU BPF maps value size has to be + * a product of BPF map value size and number of possible CPUs in the system + * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for + * per-CPU values value size has to be aligned up to closest 8 bytes for + * alignment reasons, so expected size is: `round_up(value_size, 8) + * * libbpf_num_possible_cpus()`. + * @flags extra flags passed to kernel for this operation + * @return 0, on success; negative error, otherwise + * + * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of + * **bpf_map_lookup_and_delete_elem()** API with added check for key and value size. + */ +LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, + const void *key, size_t key_sz, + void *value, size_t value_sz, __u64 flags); + +/** + * @brief **bpf_map__get_next_key()** allows to iterate BPF map keys by + * fetching next key that follows current key. + * @param map BPF map to fetch next key from + * @param cur_key pointer to memory containing bytes of current key or NULL to + * fetch the first key + * @param next_key pointer to memory to write next key into + * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** + * @return 0, on success; -ENOENT if **cur_key** is the last key in BPF map; + * negative error, otherwise + * + * **bpf_map__get_next_key()** is high-level equivalent of + * **bpf_map_get_next_key()** API with added check for key size. + */ +LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map, + const void *cur_key, void *next_key, size_t key_sz); + /** * @brief **libbpf_get_error()** extracts the error code from the passed * pointer diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 008da8db1d94..6b36f46ab5d8 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -443,7 +443,13 @@ LIBBPF_0.7.0 { LIBBPF_0.8.0 { global: bpf_map__autocreate; + bpf_map__get_next_key; + bpf_map__delete_elem; + bpf_map__lookup_and_delete_elem; + bpf_map__lookup_elem; bpf_map__set_autocreate; + bpf_map__update_elem; + bpf_map_delete_elem_flags; bpf_object__destroy_subskeleton; bpf_object__open_subskeleton; bpf_program__attach_kprobe_multi_opts; -- cgit v1.2.3 From ac6a65868a5a45db49d5ee8524df3b701110d844 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 16 May 2022 11:45:47 -0700 Subject: libbpf: fix memory leak in attach_tp for target-less tracepoint program Fix sec_name memory leak if user defines target-less SEC("tp"). Fixes: 9af8efc45eb1 ("libbpf: Allow "incomplete" basic tracing SEC() definitions") Signed-off-by: Andrii Nakryiko Acked-by: David Vernet Link: https://lore.kernel.org/r/20220516184547.3204674-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9aae886cbabf..ef7f302e542f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11592,16 +11592,16 @@ static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_lin { char *sec_name, *tp_cat, *tp_name; - sec_name = strdup(prog->sec_name); - if (!sec_name) - return -ENOMEM; - *link = NULL; /* no auto-attach for SEC("tp") or SEC("tracepoint") */ if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0) return 0; + sec_name = strdup(prog->sec_name); + if (!sec_name) + return -ENOMEM; + /* extract "tp//" or "tracepoint//" */ if (str_has_pfx(prog->sec_name, "tp/")) tp_cat = sec_name + sizeof("tp/") - 1; -- cgit v1.2.3 From 056431ae4d790efd0372c1daf93569fdd2814190 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 18 May 2022 11:59:13 -0700 Subject: libbpf: fix up global symbol counting logic Add the same negative ABS filter that we use in VERSIONED_SYM_COUNT to filter out ABS symbols like LIBBPF_0.8.0. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220518185915.3529475-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 64741c55b8e3..a1265b152027 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -127,7 +127,7 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ sed 's/\[.*\]//' | \ - awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ sed 's/\[.*\]//' | \ -- cgit v1.2.3 From e2371b1632b1c61c1fa726a17b82e6833a9e4d85 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 18 May 2022 11:59:14 -0700 Subject: libbpf: start 1.0 development cycle Start libbpf 1.0 development cycle by adding LIBBPF_1.0.0 section to libbpf.map file and marking all current symbols as local. As we remove all the deprecated APIs we'll populate global list before the final 1.0 release. Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220518185915.3529475-3-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.map | 4 ++++ tools/lib/bpf/libbpf_version.h | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 6b36f46ab5d8..52973cffc20c 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -459,3 +459,7 @@ LIBBPF_0.8.0 { libbpf_register_prog_handler; libbpf_unregister_prog_handler; } LIBBPF_0.7.0; + +LIBBPF_1.0.0 { + local: *; +}; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 61f2039404b6..2fb2f4290080 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -3,7 +3,7 @@ #ifndef __LIBBPF_VERSION_H #define __LIBBPF_VERSION_H -#define LIBBPF_MAJOR_VERSION 0 -#define LIBBPF_MINOR_VERSION 8 +#define LIBBPF_MAJOR_VERSION 1 +#define LIBBPF_MINOR_VERSION 0 #endif /* __LIBBPF_VERSION_H */ -- cgit v1.2.3 From d16495a982324f75e8e65de01475f9533de1db7a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 18 May 2022 11:59:15 -0700 Subject: libbpf: remove bpf_create_map*() APIs To test API removal, get rid of bpf_create_map*() APIs. Perf defines __weak implementation of bpf_map_create() that redirects to old bpf_create_map() and that seems to compile and run fine. Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20220518185915.3529475-4-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf.c | 80 ----------------------------------------------------- tools/lib/bpf/bpf.h | 42 ---------------------------- 2 files changed, 122 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 4677644d80f4..240186aac8e6 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -208,86 +208,6 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err_errno(fd); } -int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) -{ - LIBBPF_OPTS(bpf_map_create_opts, p); - - p.map_flags = create_attr->map_flags; - p.numa_node = create_attr->numa_node; - p.btf_fd = create_attr->btf_fd; - p.btf_key_type_id = create_attr->btf_key_type_id; - p.btf_value_type_id = create_attr->btf_value_type_id; - p.map_ifindex = create_attr->map_ifindex; - if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) - p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id; - else - p.inner_map_fd = create_attr->inner_map_fd; - - return bpf_map_create(create_attr->map_type, create_attr->name, - create_attr->key_size, create_attr->value_size, - create_attr->max_entries, &p); -} - -int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags, int node) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts); - - opts.map_flags = map_flags; - if (node >= 0) { - opts.numa_node = node; - opts.map_flags |= BPF_F_NUMA_NODE; - } - - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - - return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, int max_entries, - __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - - return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); -} - -int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags, int node) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts); - - opts.inner_map_fd = inner_map_fd; - opts.map_flags = map_flags; - if (node >= 0) { - opts.map_flags |= BPF_F_NUMA_NODE; - opts.numa_node = node; - } - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); -} - -int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, - int key_size, int inner_map_fd, int max_entries, - __u32 map_flags) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, - .inner_map_fd = inner_map_fd, - .map_flags = map_flags, - ); - - return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); -} - static void * alloc_zero_tailing_info(const void *orecord, __u32 cnt, __u32 actual_rec_size, __u32 expected_rec_size) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 2e0d3731e4c0..cabc03703e29 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -61,48 +61,6 @@ LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts); -struct bpf_create_map_attr { - const char *name; - enum bpf_map_type map_type; - __u32 map_flags; - __u32 key_size; - __u32 value_size; - __u32 max_entries; - __u32 numa_node; - __u32 btf_fd; - __u32 btf_key_type_id; - __u32 btf_value_type_id; - __u32 map_ifindex; - union { - __u32 inner_map_fd; - __u32 btf_vmlinux_value_type_id; - }; -}; - -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, - int key_size, int value_size, - int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags, int node); -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") -LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, - const char *name, int key_size, - int inner_map_fd, int max_entries, - __u32 map_flags); - struct bpf_prog_load_opts { size_t sz; /* size of this struct for forward/backward compatibility */ -- cgit v1.2.3 From e696f6dbbf9d5c88922a4e2c9ee2ed9b495285ca Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 18 May 2022 20:20:03 -0700 Subject: perf cpumap: Add perf_cpu_map__for_each_idx() A variant of perf_cpu_map__for_each_cpu() that just iterates index values without the corresponding load of the CPU. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Dave Marchevsky Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Kan Liang Cc: Lv Ruyi Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Quentin Monnet Cc: Song Liu Cc: Stephane Eranian Cc: Xing Zhengjun Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20220519032005.1273691-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/cpumap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 4a2edbdb5e2b..24de795b09bb 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -31,4 +31,7 @@ LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_c (idx) < perf_cpu_map__nr(cpus); \ (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) +#define perf_cpu_map__for_each_idx(idx, cpus) \ + for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++) + #endif /* __LIBPERF_CPUMAP_H */ -- cgit v1.2.3 From 618ee7838e409513635320ca9c4c8d52c44f2dd0 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 20 May 2022 18:56:04 +0300 Subject: libperf: Add preadn() Add preadn() to provide pread() and readn() semantics. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Namhyung Kim Link: https://lore.kernel.org/r/ab8918a4-7ac8-a37e-2e2c-28438c422d87@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/internal/lib.h | 2 ++ tools/lib/perf/lib.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/perf/include/internal/lib.h b/tools/lib/perf/include/internal/lib.h index 5175d491b2d4..85471a4b900f 100644 --- a/tools/lib/perf/include/internal/lib.h +++ b/tools/lib/perf/include/internal/lib.h @@ -9,4 +9,6 @@ extern unsigned int page_size; ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); +ssize_t preadn(int fd, void *buf, size_t n, off_t offs); + #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/lib.c b/tools/lib/perf/lib.c index 18658931fc71..696fb0ea67c6 100644 --- a/tools/lib/perf/lib.c +++ b/tools/lib/perf/lib.c @@ -38,6 +38,26 @@ ssize_t readn(int fd, void *buf, size_t n) return ion(true, fd, buf, n); } +ssize_t preadn(int fd, void *buf, size_t n, off_t offs) +{ + size_t left = n; + + while (left) { + ssize_t ret = pread(fd, buf, left, offs); + + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) + return ret; + + left -= ret; + buf += ret; + offs += ret; + } + + return n; +} + /* * Write exactly 'n' bytes or return an error. */ -- cgit v1.2.3 From bb412cf1d712656f27b2a08c492ed9d7591485aa Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 May 2022 13:11:21 +0200 Subject: libbpf: Fix typo in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spelling mistake (triple letters) in comment. Detected with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Andrii Nakryiko Acked-by: Daniel Müller Link: https://lore.kernel.org/bpf/20220521111145.81697-71-Julia.Lawall@inria.fr --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ef7f302e542f..e89cc9c885b3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6873,7 +6873,7 @@ static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_prog } retry_load: - /* if log_level is zero, we don't request logs initiallly even if + /* if log_level is zero, we don't request logs initially even if * custom log_buf is specified; if the program load fails, then we'll * bump log_level to 1 and use either custom log_buf or we'll allocate * our own and retry the load to get details on what failed -- cgit v1.2.3 From 7be1fedd2a0a5b8f20952a675c611815254b74b6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:30 +0300 Subject: perf tools: Allow all_cpus to be a superset of user_requested_cpus To support collection of system-wide events with user requested CPUs, all_cpus must be a superset of user_requested_cpus. In order to support all_cpus to be a superset of user_requested_cpus, all_cpus must be used instead of user_requested_cpus when dealing with CPUs of all events instead of CPUs of requested events. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-10-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 12 ++++++------ tools/perf/builtin-record.c | 18 ++++++++++++------ tools/perf/util/auxtrace.c | 2 +- 3 files changed, 19 insertions(+), 13 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index ed66f2e38464..ec0e4b5da874 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -298,7 +298,7 @@ add: int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); int nr_threads = perf_thread_map__nr(evlist->threads); int nfds = 0; struct perf_evsel *evsel; @@ -430,7 +430,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, int thread, int *_output, int *_output_overwrite) { - struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx); + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx); struct perf_evsel *evsel; int revent; @@ -540,7 +540,7 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) { int nr_threads = perf_thread_map__nr(evlist->threads); - int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); int cpu, thread; for (cpu = 0; cpu < nr_cpus; cpu++) { @@ -565,8 +565,8 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) { int nr_mmaps; - nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus); - if (perf_cpu_map__empty(evlist->user_requested_cpus)) + nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); + if (perf_cpu_map__empty(evlist->all_cpus)) nr_mmaps = perf_thread_map__nr(evlist->threads); return nr_mmaps; @@ -577,7 +577,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_mmap_param *mp) { struct perf_evsel *evsel; - const struct perf_cpu_map *cpus = evlist->user_requested_cpus; + const struct perf_cpu_map *cpus = evlist->all_cpus; if (!ops || !ops->get || !ops->mmap) return -EINVAL; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index c8a79f3a8dff..cf9a7ce429df 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -967,14 +967,20 @@ static void record__thread_data_close_pipes(struct record_thread *thread_data) } } +static bool evlist__per_thread(struct evlist *evlist) +{ + return cpu_map__is_dummy(evlist->core.user_requested_cpus); +} + static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) { int m, tm, nr_mmaps = evlist->core.nr_mmaps; struct mmap *mmap = evlist->mmap; struct mmap *overwrite_mmap = evlist->overwrite_mmap; - struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; + struct perf_cpu_map *cpus = evlist->core.all_cpus; + bool per_thread = evlist__per_thread(evlist); - if (cpu_map__is_dummy(cpus)) + if (per_thread) thread_data->nr_mmaps = nr_mmaps; else thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, @@ -995,7 +1001,7 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { - if (cpu_map__is_dummy(cpus) || + if (per_thread || test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { if (thread_data->maps) { thread_data->maps[tm] = &mmap[m]; @@ -1870,7 +1876,7 @@ static int record__synthesize(struct record *rec, bool tail) return err; } - err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus, + err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, process_synthesized_event, NULL); if (err < 0) { pr_err("Couldn't synthesize cpu map.\n"); @@ -3668,12 +3674,12 @@ static int record__init_thread_default_masks(struct record *rec, struct perf_cpu static int record__init_thread_masks(struct record *rec) { int ret = 0; - struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus; + struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; if (!record__threads_enabled(rec)) return record__init_thread_default_masks(rec, cpus); - if (cpu_map__is_dummy(cpus)) { + if (evlist__per_thread(rec->evlist)) { pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); return -EINVAL; } diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index ac4e4660932d..511dd3caa1bc 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -181,7 +181,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, mp->idx = idx; if (per_cpu) { - mp->cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, idx); + mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); if (evlist->core.threads) mp->tid = perf_thread_map__pid(evlist->core.threads, 0); else -- cgit v1.2.3 From ae4f8ae16a07896403c90305d4b9be27f657c1fc Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:31 +0300 Subject: libperf evlist: Allow mixing per-thread and per-cpu mmaps mmap_per_evsel() will skip events that do not match the CPU, so all CPUs can be iterated in any case. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-11-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 36 +++++++----------------------------- 1 file changed, 7 insertions(+), 29 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index ec0e4b5da874..eae1f6179dad 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -512,29 +512,6 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, return 0; } -static int -mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, - struct perf_mmap_param *mp) -{ - int thread; - int nr_threads = perf_thread_map__nr(evlist->threads); - - for (thread = 0; thread < nr_threads; thread++) { - int output = -1; - int output_overwrite = -1; - - if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, - &output, &output_overwrite)) - goto out_unmap; - } - - return 0; - -out_unmap: - perf_evlist__munmap(evlist); - return -1; -} - static int mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) @@ -565,9 +542,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) { int nr_mmaps; + /* One for each CPU */ nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); - if (perf_cpu_map__empty(evlist->all_cpus)) - nr_mmaps = perf_thread_map__nr(evlist->threads); + if (perf_cpu_map__empty(evlist->all_cpus)) { + /* Plus one for each thread */ + nr_mmaps += perf_thread_map__nr(evlist->threads); + /* Minus the per-thread CPU (-1) */ + nr_mmaps -= 1; + } return nr_mmaps; } @@ -577,7 +559,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_mmap_param *mp) { struct perf_evsel *evsel; - const struct perf_cpu_map *cpus = evlist->all_cpus; if (!ops || !ops->get || !ops->mmap) return -EINVAL; @@ -596,9 +577,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - if (perf_cpu_map__empty(cpus)) - return mmap_per_thread(evlist, ops, mp); - return mmap_per_cpu(evlist, ops, mp); } -- cgit v1.2.3 From 4ce47d842d4c16c07b135b8a7975b8f0672bcc0e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:32 +0300 Subject: libperf evlist: Check nr_mmaps is correct Print an error message if the predetermined number of mmaps is incorrect. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-12-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index eae1f6179dad..f51fdb899d19 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -23,6 +23,7 @@ #include #include #include +#include "internal.h" void perf_evlist__init(struct perf_evlist *evlist) { @@ -428,7 +429,7 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_ static int mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, - int thread, int *_output, int *_output_overwrite) + int thread, int *_output, int *_output_overwrite, int *nr_mmaps) { struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx); struct perf_evsel *evsel; @@ -484,6 +485,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, if (ops->mmap(map, mp, *output, evlist_cpu) < 0) return -1; + *nr_mmaps += 1; + if (!idx) perf_evlist__set_mmap_first(evlist, map, overwrite); } else { @@ -518,6 +521,7 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, { int nr_threads = perf_thread_map__nr(evlist->threads); int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); + int nr_mmaps = 0; int cpu, thread; for (cpu = 0; cpu < nr_cpus; cpu++) { @@ -526,11 +530,14 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, for (thread = 0; thread < nr_threads; thread++) { if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, - thread, &output, &output_overwrite)) + thread, &output, &output_overwrite, &nr_mmaps)) goto out_unmap; } } + if (nr_mmaps != evlist->nr_mmaps) + pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps); + return 0; out_unmap: -- cgit v1.2.3 From d3345fecf9e5f63be7946a1e5bf1f5695c67b445 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:33 +0300 Subject: perf stat: Add requires_cpu flag for uncore Uncore events require a CPU i.e. it cannot be -1. The evsel system_wide flag is intended for events that should be on every CPU, which does not make sense for uncore events because uncore events do not map one-to-one with CPUs. These 2 requirements are not exactly the same, so introduce a new flag 'requires_cpu' for the uncore case. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-13-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 4 +++- tools/lib/perf/include/internal/evsel.h | 1 + tools/perf/builtin-stat.c | 5 +---- tools/perf/util/evsel.c | 1 + tools/perf/util/parse-events.c | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index f51fdb899d19..1c801f8da44f 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -43,7 +43,9 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, if (!evsel->own_cpus || evlist->has_user_cpus) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); - } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) { + } else if (!evsel->system_wide && + !evsel->requires_cpu && + perf_cpu_map__empty(evlist->user_requested_cpus)) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); } else if (evsel->cpus != evsel->own_cpus) { diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index cfc9ebd7968e..77fbb8b97e5c 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -50,6 +50,7 @@ struct perf_evsel { /* parse modifier helper */ int nr_members; bool system_wide; + bool requires_cpu; int idx; }; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7e6cc8bdf061..4ce87a8eb7d7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -382,9 +382,6 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_ if (!counter->supported) return -ENOENT; - if (counter->core.system_wide) - nthreads = 1; - for (thread = 0; thread < nthreads; thread++) { struct perf_counts_values *count; @@ -2261,7 +2258,7 @@ static void setup_system_wide(int forks) struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { - if (!counter->core.system_wide && + if (!counter->core.requires_cpu && strcmp(counter->name, "duration_time")) { return; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ef169ad15236..050b1c69a738 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -409,6 +409,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->core.threads = perf_thread_map__get(orig->core.threads); evsel->core.nr_members = orig->core.nr_members; evsel->core.system_wide = orig->core.system_wide; + evsel->core.requires_cpu = orig->core.requires_cpu; if (orig->name) { evsel->name = strdup(orig->name); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 30a9d915853d..7ed235740431 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -365,7 +365,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->core.cpus = cpus; evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->core.system_wide = pmu ? pmu->is_uncore : false; + evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; if (name) -- cgit v1.2.3 From f5fb6d4efe15a2f0d2c0c175c3827ac594023996 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:34 +0300 Subject: libperf evsel: Add comments for booleans Add comments for 'system_wide' and 'requires_cpu' booleans Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Ian Rogers Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-14-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/internal/evsel.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 77fbb8b97e5c..2a912a1f1989 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -49,7 +49,17 @@ struct perf_evsel { /* parse modifier helper */ int nr_members; + /* + * system_wide is for events that need to be on every CPU, irrespective + * of user requested CPUs or threads. Map propagation will set cpus to + * this event's own_cpus, whereby they will contribute to evlist + * all_cpus. + */ bool system_wide; + /* + * Some events, for example uncore events, require a CPU. + * i.e. it cannot be the 'any CPU' value of -1. + */ bool requires_cpu; int idx; }; -- cgit v1.2.3 From 298613b8e3f68a1aef2370cd6a9dad462b6c0457 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:35 +0300 Subject: perf tools: Allow system-wide events to keep their own CPUs Currently, user_requested_cpus supplants system-wide CPUs when the evlist has_user_cpus. Change that so that system-wide events retain their own CPUs and they are added to all_cpus. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-15-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 1c801f8da44f..9a6801b53274 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -40,12 +40,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (!evsel->own_cpus || evlist->has_user_cpus) { - perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); - } else if (!evsel->system_wide && - !evsel->requires_cpu && - perf_cpu_map__empty(evlist->user_requested_cpus)) { + if (!evsel->own_cpus || + (!evsel->system_wide && evlist->has_user_cpus) || + (!evsel->system_wide && + !evsel->requires_cpu && + perf_cpu_map__empty(evlist->user_requested_cpus))) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); } else if (evsel->cpus != evsel->own_cpus) { -- cgit v1.2.3 From a41e24f6c3ffdd001f976f9bd76634f2163715f5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 24 May 2022 10:54:36 +0300 Subject: perf tools: Allow system-wide events to keep their own threads System-wide events do not have threads, so do not propagate threads to them. Signed-off-by: Adrian Hunter Acked-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexey Bayduraev Cc: Jiri Olsa Cc: Leo Yan Link: https://lore.kernel.org/r/20220524075436.29144-16-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 9a6801b53274..e6c98a6e3908 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -52,8 +52,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, evsel->cpus = perf_cpu_map__get(evsel->own_cpus); } - perf_thread_map__put(evsel->threads); - evsel->threads = perf_thread_map__get(evlist->threads); + if (!evsel->system_wide) { + perf_thread_map__put(evsel->threads); + evsel->threads = perf_thread_map__get(evlist->threads); + } + evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); } -- cgit v1.2.3 From 005f17007f47495dbbb659aa5db7e581065d16e7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 18 May 2022 13:52:22 -0700 Subject: bitmap: Fix return values to be unsigned Both nodemask and bitmap routines had mixed return values that provided potentially signed return values that could never happen. This was leading to the compiler getting confusing about the range of possible return values (it was thinking things could be negative where they could not be). In preparation for fixing nodemask, fix all the bitmap routines that should be returning unsigned (or bool) values. Cc: Yury Norov Cc: Rasmus Villemoes Cc: Christophe de Dinechin Cc: Alexey Dobriyan Cc: Andy Shevchenko Cc: Andrew Morton Cc: Zhen Lei Signed-off-by: Kees Cook Signed-off-by: Yury Norov --- include/linux/bitmap.h | 25 +++++++++++++------------ lib/bitmap.c | 30 +++++++++++++++--------------- tools/include/linux/bitmap.h | 17 +++++++++-------- tools/lib/bitmap.c | 20 ++++++++++---------- 4 files changed, 47 insertions(+), 45 deletions(-) (limited to 'tools/lib') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 71147b7d721b..2e6cd5681040 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -134,8 +134,8 @@ unsigned long *devm_bitmap_zalloc(struct device *dev, * lib/bitmap.c provides these functions: */ -int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); +bool __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); bool __pure __bitmap_or_equal(const unsigned long *src1, const unsigned long *src2, const unsigned long *src3, @@ -159,10 +159,10 @@ int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, void __bitmap_replace(unsigned long *dst, const unsigned long *old, const unsigned long *new, const unsigned long *mask, unsigned int nbits); -int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); -int __bitmap_subset(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int nbits); +bool __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +bool __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits); void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); @@ -353,8 +353,8 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr #endif #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) -static inline int bitmap_equal(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static inline bool bitmap_equal(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); @@ -384,8 +384,9 @@ static inline bool bitmap_or_equal(const unsigned long *src1, return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits)); } -static inline int bitmap_intersects(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static inline bool bitmap_intersects(const unsigned long *src1, + const unsigned long *src2, + unsigned int nbits) { if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; @@ -393,8 +394,8 @@ static inline int bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } -static inline int bitmap_subset(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static inline bool bitmap_subset(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); diff --git a/lib/bitmap.c b/lib/bitmap.c index 4061a5dd2bc6..b18e31ea6e66 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -45,19 +45,19 @@ * for the best explanations of this ordering. */ -int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits) +bool __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) { unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] != bitmap2[k]) - return 0; + return false; if (bits % BITS_PER_LONG) if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) - return 0; + return false; - return 1; + return true; } EXPORT_SYMBOL(__bitmap_equal); @@ -303,33 +303,33 @@ void __bitmap_replace(unsigned long *dst, } EXPORT_SYMBOL(__bitmap_replace); -int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits) +bool __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) { unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & bitmap2[k]) - return 1; + return true; if (bits % BITS_PER_LONG) if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) - return 1; - return 0; + return true; + return false; } EXPORT_SYMBOL(__bitmap_intersects); -int __bitmap_subset(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits) +bool __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) { unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & ~bitmap2[k]) - return 0; + return false; if (bits % BITS_PER_LONG) if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) - return 0; - return 1; + return false; + return true; } EXPORT_SYMBOL(__bitmap_subset); diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index ea97804d04d4..afdf93bebaaf 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -16,11 +16,11 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); -int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits); +bool __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits); void bitmap_clear(unsigned long *map, unsigned int start, int len); -int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits); +bool __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) @@ -162,8 +162,8 @@ static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) -static inline int bitmap_equal(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static inline bool bitmap_equal(const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits)); @@ -173,8 +173,9 @@ static inline int bitmap_equal(const unsigned long *src1, return __bitmap_equal(src1, src2, nbits); } -static inline int bitmap_intersects(const unsigned long *src1, - const unsigned long *src2, unsigned int nbits) +static inline bool bitmap_intersects(const unsigned long *src1, + const unsigned long *src2, + unsigned int nbits) { if (small_const_nbits(nbits)) return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index db466ef7be9d..354f8cdc0880 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -72,31 +72,31 @@ int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, return result != 0; } -int __bitmap_equal(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits) +bool __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) { unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] != bitmap2[k]) - return 0; + return false; if (bits % BITS_PER_LONG) if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) - return 0; + return false; - return 1; + return true; } -int __bitmap_intersects(const unsigned long *bitmap1, - const unsigned long *bitmap2, unsigned int bits) +bool __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) { unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & bitmap2[k]) - return 1; + return true; if (bits % BITS_PER_LONG) if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) - return 1; - return 0; + return true; + return false; } -- cgit v1.2.3 From 94725994cfd768b9ee1bd06f15c252694b1e9b89 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 8 Jun 2022 22:23:52 -0700 Subject: libperf evsel: Open shouldn't leak fd on failure If perf_event_open() fails the fd is opened but it is only freed by closing (not by delete). Typically when an open fails you don't call close and so this results in a memory leak. To avoid this, add a close when open fails. Signed-off-by: Ian Rogers Reviewed-By: Kajol Jain Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Anshuman Khandual Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Rob Herring Cc: Stephane Eranian Link: https://lore.kernel.org/r/20220609052355.1300162-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evsel.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index c1d58673f6ef..952f3520d5c2 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -149,23 +149,30 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, int fd, group_fd, *evsel_fd; evsel_fd = FD(evsel, idx, thread); - if (evsel_fd == NULL) - return -EINVAL; + if (evsel_fd == NULL) { + err = -EINVAL; + goto out; + } err = get_group_fd(evsel, idx, thread, &group_fd); if (err < 0) - return err; + goto out; fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, cpu, group_fd, 0); - if (fd < 0) - return -errno; + if (fd < 0) { + err = -errno; + goto out; + } *evsel_fd = fd; } } +out: + if (err) + perf_evsel__close(evsel); return err; } -- cgit v1.2.3