From 1ce3a60e3c287479f15147ffc61c35b2e436a0d5 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 30 Mar 2022 16:26:36 +0100 Subject: libbpf: auto-resolve programs/libraries when necessary for uprobes bpf_program__attach_uprobe_opts() requires a binary_path argument specifying binary to instrument. Supporting simply specifying "libc.so.6" or "foo" should be possible too. Library search checks LD_LIBRARY_PATH, then /usr/lib64, /usr/lib. This allows users to run BPF programs prefixed with LD_LIBRARY_PATH=/path2/lib while still searching standard locations. Similarly for non .so files, we check PATH and /usr/bin, /usr/sbin. Path determination will be useful for auto-attach of BPF uprobe programs using SEC() definition. Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1648654000-21758-2-git-send-email-alan.maguire@oracle.com --- tools/lib/bpf/libbpf.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) (limited to 'tools/lib/bpf') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 809fe209cdcc..ba6b61336bc7 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10517,6 +10517,46 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, return pfd; } +/* Get full path to program/shared library. */ +static int resolve_full_path(const char *file, char *result, size_t result_sz) +{ + const char *search_paths[2]; + int i; + + if (strstr(file, ".so")) { + search_paths[0] = getenv("LD_LIBRARY_PATH"); + search_paths[1] = "/usr/lib64:/usr/lib"; + } else { + search_paths[0] = getenv("PATH"); + search_paths[1] = "/usr/bin:/usr/sbin"; + } + + for (i = 0; i < ARRAY_SIZE(search_paths); i++) { + const char *s; + + if (!search_paths[i]) + continue; + for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { + char *next_path; + int seg_len; + + if (s[0] == ':') + s++; + next_path = strchr(s, ':'); + seg_len = next_path ? next_path - s : strlen(s); + if (!seg_len) + continue; + snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); + /* ensure it is an executable file/link */ + if (access(result, R_OK | X_OK) < 0) + continue; + pr_debug("resolved '%s' to '%s'\n", file, result); + return 0; + } + } + return -ENOENT; +} + LIBBPF_API struct bpf_link * bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, @@ -10524,6 +10564,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + char full_binary_path[PATH_MAX]; struct bpf_link *link; size_t ref_ctr_off; int pfd, err; @@ -10536,12 +10577,23 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0); pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0); + if (binary_path && !strchr(binary_path, '/')) { + err = resolve_full_path(binary_path, full_binary_path, + sizeof(full_binary_path)); + if (err) { + pr_warn("prog '%s': failed to resolve full path for '%s'\n", + prog->name, binary_path); + return libbpf_err_ptr(err); + } + binary_path = full_binary_path; + } + legacy = determine_uprobe_perf_type() < 0; if (!legacy) { pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[512]; + char probe_name[PATH_MAX + 64]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); -- cgit v1.2.3 From 433966e3ae04165811b116af492a684bad7a158c Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 30 Mar 2022 16:26:37 +0100 Subject: libbpf: Support function name-based attach uprobes kprobe attach is name-based, using lookups of kallsyms to translate a function name to an address. Currently uprobe attach is done via an offset value as described in [1]. Extend uprobe opts for attach to include a function name which can then be converted into a uprobe-friendly offset. The calcualation is done in several steps: 1. First, determine the symbol address using libelf; this gives us the offset as reported by objdump 2. If the function is a shared library function - and the binary provided is a shared library - no further work is required; the address found is the required address 3. Finally, if the function is local, subtract the base address associated with the object, retrieved from ELF program headers. The resultant value is then added to the func_offset value passed in to specify the uprobe attach address. So specifying a func_offset of 0 along with a function name "printf" will attach to printf entry. The modes of operation supported are then 1. to attach to a local function in a binary; function "foo1" in "/usr/bin/foo" 2. to attach to a shared library function in a shared library - function "malloc" in libc. [1] https://www.kernel.org/doc/html/latest/trace/uprobetracer.html Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1648654000-21758-3-git-send-email-alan.maguire@oracle.com --- tools/lib/bpf/libbpf.c | 204 +++++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 10 ++- 2 files changed, 213 insertions(+), 1 deletion(-) (limited to 'tools/lib/bpf') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ba6b61336bc7..f6e5a0217841 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -10517,6 +10517,195 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, return pfd; } +/* uprobes deal in relative offsets; subtract the base address associated with + * the mapped binary. See Documentation/trace/uprobetracer.rst for more + * details. + */ +static long elf_find_relative_offset(const char *filename, Elf *elf, long addr) +{ + size_t n; + int i; + + if (elf_getphdrnum(elf, &n)) { + pr_warn("elf: failed to find program headers for '%s': %s\n", filename, + elf_errmsg(-1)); + return -ENOENT; + } + + for (i = 0; i < n; i++) { + int seg_start, seg_end, seg_offset; + GElf_Phdr phdr; + + if (!gelf_getphdr(elf, i, &phdr)) { + pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename, + elf_errmsg(-1)); + return -ENOENT; + } + if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X)) + continue; + + seg_start = phdr.p_vaddr; + seg_end = seg_start + phdr.p_memsz; + seg_offset = phdr.p_offset; + if (addr >= seg_start && addr < seg_end) + return addr - seg_start + seg_offset; + } + pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename); + return -ENOENT; +} + +/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ +static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) +{ + while ((scn = elf_nextscn(elf, scn)) != NULL) { + GElf_Shdr sh; + + if (!gelf_getshdr(scn, &sh)) + continue; + if (sh.sh_type == sh_type) + return scn; + } + return NULL; +} + +/* Find offset of function name in object specified by path. "name" matches + * symbol name or name@@LIB for library functions. + */ +static long elf_find_func_offset(const char *binary_path, const char *name) +{ + int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; + bool is_shared_lib, is_name_qualified; + char errmsg[STRERR_BUFSIZE]; + long ret = -ENOENT; + size_t name_len; + GElf_Ehdr ehdr; + Elf *elf; + + fd = open(binary_path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + ret = -errno; + pr_warn("failed to open %s: %s\n", binary_path, + libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + return ret; + } + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (!elf) { + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); + close(fd); + return -LIBBPF_ERRNO__FORMAT; + } + if (!gelf_getehdr(elf, &ehdr)) { + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + /* for shared lib case, we do not need to calculate relative offset */ + is_shared_lib = ehdr.e_type == ET_DYN; + + name_len = strlen(name); + /* Does name specify "@@LIB"? */ + is_name_qualified = strstr(name, "@@") != NULL; + + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if + * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically + * linked binary may not have SHT_DYMSYM, so absence of a section should not be + * reported as a warning/error. + */ + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { + size_t nr_syms, strtabidx, idx; + Elf_Data *symbols = NULL; + Elf_Scn *scn = NULL; + int last_bind = -1; + const char *sname; + GElf_Shdr sh; + + scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); + if (!scn) { + pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", + binary_path); + continue; + } + if (!gelf_getshdr(scn, &sh)) + continue; + strtabidx = sh.sh_link; + symbols = elf_getdata(scn, 0); + if (!symbols) { + pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", + binary_path, elf_errmsg(-1)); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } + nr_syms = symbols->d_size / sh.sh_entsize; + + for (idx = 0; idx < nr_syms; idx++) { + int curr_bind; + GElf_Sym sym; + + if (!gelf_getsym(symbols, idx, &sym)) + continue; + + if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) + continue; + + sname = elf_strptr(elf, strtabidx, sym.st_name); + if (!sname) + continue; + + curr_bind = GELF_ST_BIND(sym.st_info); + + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ + if (strncmp(sname, name, name_len) != 0) + continue; + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') + continue; + + if (ret >= 0) { + /* handle multiple matches */ + if (last_bind != STB_WEAK && curr_bind != STB_WEAK) { + /* Only accept one non-weak bind. */ + pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", + sname, name, binary_path); + ret = -LIBBPF_ERRNO__FORMAT; + goto out; + } else if (curr_bind == STB_WEAK) { + /* already have a non-weak bind, and + * this is a weak bind, so ignore. + */ + continue; + } + } + ret = sym.st_value; + last_bind = curr_bind; + } + /* For binaries that are not shared libraries, we need relative offset */ + if (ret > 0 && !is_shared_lib) + ret = elf_find_relative_offset(binary_path, elf, ret); + if (ret > 0) + break; + } + + if (ret > 0) { + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, + ret); + } else { + if (ret == 0) { + pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, + is_shared_lib ? "should not be 0 in a shared library" : + "try using shared library path instead"); + ret = -ENOENT; + } else { + pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); + } + } +out: + elf_end(elf); + close(fd); + return ret; +} + /* Get full path to program/shared library. */ static int resolve_full_path(const char *file, char *result, size_t result_sz) { @@ -10569,6 +10758,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, size_t ref_ctr_off; int pfd, err; bool retprobe, legacy; + const char *func_name; if (!OPTS_VALID(opts, bpf_uprobe_opts)) return libbpf_err_ptr(-EINVAL); @@ -10587,6 +10777,20 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, } binary_path = full_binary_path; } + func_name = OPTS_GET(opts, func_name, NULL); + if (func_name) { + long sym_off; + + if (!binary_path) { + pr_warn("prog '%s': name-based attach requires binary_path\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + sym_off = elf_find_func_offset(binary_path, func_name); + if (sym_off < 0) + return libbpf_err_ptr(sym_off); + func_offset += sym_off; + } legacy = determine_uprobe_perf_type() < 0; if (!legacy) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 05dde85e19a6..28cd2062d0df 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -459,9 +459,17 @@ struct bpf_uprobe_opts { __u64 bpf_cookie; /* uprobe is return probe, invoked at function return time */ bool retprobe; + /* Function name to attach to. Could be an unqualified ("abc") or library-qualified + * "abc@LIBXYZ" name. To specify function entry, func_name should be set while + * func_offset argument to bpf_prog__attach_uprobe_opts() should be 0. To trace an + * offset within a function, specify func_name and use func_offset argument to specify + * offset within the function. Shared library functions must specify the shared library + * binary_path. + */ + const char *func_name; size_t :0; }; -#define bpf_uprobe_opts__last_field retprobe +#define bpf_uprobe_opts__last_field func_name /** * @brief **bpf_program__attach_uprobe()** attaches a BPF program -- cgit v1.2.3 From 39f8dc43b7a05ab0e352655a14a9d613c2308b92 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 30 Mar 2022 16:26:38 +0100 Subject: libbpf: Add auto-attach for uprobes based on section name Now that u[ret]probes can use name-based specification, it makes sense to add support for auto-attach based on SEC() definition. The format proposed is SEC("u[ret]probe/binary:[raw_offset|[function_name[+offset]]") For example, to trace malloc() in libc: SEC("uprobe/libc.so.6:malloc") ...or to trace function foo2 in /usr/bin/foo: SEC("uprobe//usr/bin/foo:foo2") Auto-attach is done for all tasks (pid -1). prog can be an absolute path or simply a program/library name; in the latter case, we use PATH/LD_LIBRARY_PATH to resolve the full path, falling back to standard locations (/usr/bin:/usr/sbin or /usr/lib64:/usr/lib) if the file is not found via environment-variable specified locations. Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1648654000-21758-4-git-send-email-alan.maguire@oracle.com --- tools/lib/bpf/libbpf.c | 74 +++++++++++++++++++++- .../testing/selftests/bpf/progs/test_bpf_cookie.c | 4 +- .../selftests/bpf/progs/test_task_pt_regs.c | 2 +- tools/testing/selftests/bpf/progs/trigger_bench.c | 2 +- 4 files changed, 76 insertions(+), 6 deletions(-) (limited to 'tools/lib/bpf') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f6e5a0217841..6d2be53e4ba9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8630,6 +8630,7 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log } static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); @@ -8642,9 +8643,9 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX), SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe), - SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE), + SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe), SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), @@ -10845,6 +10846,75 @@ err_out: } +/* Format of u[ret]probe section definition supporting auto-attach: + * u[ret]probe/binary:function[+offset] + * + * binary can be an absolute/relative path or a filename; the latter is resolved to a + * full binary path via bpf_program__attach_uprobe_opts. + * + * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be + * specified (and auto-attach is not possible) or the above format is specified for + * auto-attach. + */ +static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); + char *func, *probe_name, *func_end; + char *func_name, binary_path[512]; + unsigned long long raw_offset; + size_t offset = 0; + int n; + + *link = NULL; + + opts.retprobe = str_has_pfx(prog->sec_name, "uretprobe"); + if (opts.retprobe) + probe_name = prog->sec_name + sizeof("uretprobe") - 1; + else + probe_name = prog->sec_name + sizeof("uprobe") - 1; + if (probe_name[0] == '/') + probe_name++; + + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ + if (strlen(probe_name) == 0) + return 0; + + snprintf(binary_path, sizeof(binary_path), "%s", probe_name); + /* ':' should be prior to function+offset */ + func_name = strrchr(binary_path, ':'); + if (!func_name) { + pr_warn("section '%s' missing ':function[+offset]' specification\n", + prog->sec_name); + return -EINVAL; + } + func_name[0] = '\0'; + func_name++; + n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); + if (n < 1) { + pr_warn("uprobe name '%s' is invalid\n", func_name); + return -EINVAL; + } + if (opts.retprobe && offset != 0) { + free(func); + pr_warn("uretprobes do not support offset specification\n"); + return -EINVAL; + } + + /* Is func a raw address? */ + errno = 0; + raw_offset = strtoull(func, &func_end, 0); + if (!errno && !*func_end) { + free(func); + func = NULL; + offset = (size_t)raw_offset; + } + opts.func_name = func; + + *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts); + free(func); + return libbpf_get_error(*link); +} + struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c index 2d3a7710e2ce..0e2222968918 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c @@ -37,14 +37,14 @@ int handle_kretprobe(struct pt_regs *ctx) return 0; } -SEC("uprobe/trigger_func") +SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { update(ctx, &uprobe_res); return 0; } -SEC("uretprobe/trigger_func") +SEC("uretprobe") int handle_uretprobe(struct pt_regs *ctx) { update(ctx, &uretprobe_res); diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c index e6cb09259408..1926facba122 100644 --- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c +++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c @@ -14,7 +14,7 @@ char current_regs[PT_REGS_SIZE] = {}; char ctx_regs[PT_REGS_SIZE] = {}; int uprobe_res = 0; -SEC("uprobe/trigger_func") +SEC("uprobe") int handle_uprobe(struct pt_regs *ctx) { struct task_struct *current; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 2ab049b54d6c..694e7cec1823 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -54,7 +54,7 @@ int bench_trigger_fmodret(void *ctx) return -22; } -SEC("uprobe/self/uprobe_target") +SEC("uprobe") int bench_trigger_uprobe(void *ctx) { __sync_add_and_fetch(&hits, 1); -- cgit v1.2.3