diff options
author | Alexei Starovoitov <ast@kernel.org> | 2018-05-25 04:18:20 +0300 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2018-05-25 04:18:21 +0300 |
commit | f80acbd233382619f597f785f8c238084dc62e21 (patch) | |
tree | de94bbbf01500ee35347b4351c2934e88ae710e5 /samples | |
parent | 31ad39239f089ef93bb51f9918618f18f45c78d9 (diff) | |
parent | b04df400c30235fa347313c9e2a0695549bd2c8e (diff) | |
download | linux-f80acbd233382619f597f785f8c238084dc62e21.tar.xz |
Merge branch 'bpf-task-fd-query'
Yonghong Song says:
====================
Currently, suppose a userspace application has loaded a bpf program
and attached it to a tracepoint/kprobe/uprobe, and a bpf
introspection tool, e.g., bpftool, wants to show which bpf program
is attached to which tracepoint/kprobe/uprobe. Such attachment
information will be really useful to understand the overall bpf
deployment in the system.
There is a name field (16 bytes) for each program, which could
be used to encode the attachment point. There are some drawbacks
for this approaches. First, bpftool user (e.g., an admin) may not
really understand the association between the name and the
attachment point. Second, if one program is attached to multiple
places, encoding a proper name which can imply all these
attachments becomes difficult.
This patch introduces a new bpf subcommand BPF_TASK_FD_QUERY.
Given a pid and fd, this command will return bpf related information
to user space. Right now it only supports tracepoint/kprobe/uprobe
perf event fd's. For such a fd, BPF_TASK_FD_QUERY will return
. prog_id
. tracepoint name, or
. k[ret]probe funcname + offset or kernel addr, or
. u[ret]probe filename + offset
to the userspace.
The user can use "bpftool prog" to find more information about
bpf program itself with prog_id.
Patch #1 adds function perf_get_event() in kernel/events/core.c.
Patch #2 implements the bpf subcommand BPF_TASK_FD_QUERY.
Patch #3 syncs tools bpf.h header and also add bpf_task_fd_query()
in the libbpf library for samples/selftests/bpftool to use.
Patch #4 adds ksym_get_addr() utility function.
Patch #5 add a test in samples/bpf for querying k[ret]probes and
u[ret]probes.
Patch #6 add a test in tools/testing/selftests/bpf for querying
raw_tracepoint and tracepoint.
Patch #7 add a new subcommand "perf" to bpftool.
Changelogs:
v4 -> v5:
. return strlen(buf) instead of strlen(buf) + 1
in the attr.buf_len. As long as user provides
non-empty buffer, it will be filed with empty
string, truncated string, or full string
based on the buffer size and the length of
to-be-copied string.
v3 -> v4:
. made attr buf_len input/output. The length of
actual buffter is written to buf_len so user space knows
what is actually needed. If user provides a buffer
with length >= 1 but less than required, do partial
copy and return -ENOSPC.
. code simplification with put_user.
. changed query result attach_info to fd_type.
. add tests at selftests/bpf to test zero len, null buf and
insufficient buf.
v2 -> v3:
. made perf_get_event() return perf_event pointer const.
this was to ensure that event fields are not meddled.
. detect whether newly BPF_TASK_FD_QUERY is supported or
not in "bpftool perf" and warn users if it is not.
v1 -> v2:
. changed bpf subcommand name from BPF_PERF_EVENT_QUERY
to BPF_TASK_FD_QUERY.
. fixed various "bpftool perf" issues and added documentation
and auto-completion.
====================
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'samples')
-rw-r--r-- | samples/bpf/Makefile | 4 | ||||
-rw-r--r-- | samples/bpf/task_fd_query_kern.c | 19 | ||||
-rw-r--r-- | samples/bpf/task_fd_query_user.c | 382 |
3 files changed, 405 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 62d1aa1a4cf3..7dc85ed0ce4b 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -51,6 +51,7 @@ hostprogs-y += cpustat hostprogs-y += xdp_adjust_tail hostprogs-y += xdpsock hostprogs-y += xdp_fwd +hostprogs-y += task_fd_query # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := bpf_load.o xdpsock_user.o xdp_fwd-objs := bpf_load.o xdp_fwd_user.o +task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -160,6 +162,7 @@ always += cpustat_kern.o always += xdp_adjust_tail_kern.o always += xdpsock_kern.o always += xdp_fwd_kern.o +always += task_fd_query_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/ +HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/ HOST_LOADLIBES += $(LIBBPF) -lelf HOSTLOADLIBES_tracex4 += -lrt diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c new file mode 100644 index 000000000000..f4b0a9ea674d --- /dev/null +++ b/samples/bpf/task_fd_query_kern.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/version.h> +#include <linux/ptrace.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +SEC("kprobe/blk_start_request") +int bpf_prog1(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kretprobe/blk_account_io_completion") +int bpf_prog2(struct pt_regs *ctx) +{ + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c new file mode 100644 index 000000000000..8381d792f138 --- /dev/null +++ b/samples/bpf/task_fd_query_user.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <stdint.h> +#include <fcntl.h> +#include <linux/bpf.h> +#include <sys/ioctl.h> +#include <sys/resource.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "libbpf.h" +#include "bpf_load.h" +#include "bpf_util.h" +#include "perf-sys.h" +#include "trace_helpers.h" + +#define CHECK_PERROR_RET(condition) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("FAIL: %s:\n", __func__); \ + perror(" "); \ + return -1; \ + } \ +}) + +#define CHECK_AND_RET(condition) ({ \ + int __ret = !!(condition); \ + if (__ret) \ + return -1; \ +}) + +static __u64 ptr_to_u64(void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type" +static int bpf_find_probe_type(const char *event_type) +{ + char buf[256]; + int fd, ret; + + ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + + fd = open(buf, O_RDONLY); + CHECK_PERROR_RET(fd < 0); + + ret = read(fd, buf, sizeof(buf)); + close(fd); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + + errno = 0; + ret = (int)strtol(buf, NULL, 10); + CHECK_PERROR_RET(errno); + return ret; +} + +#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe" +static int bpf_get_retprobe_bit(const char *event_type) +{ + char buf[256]; + int fd, ret; + + ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + + fd = open(buf, O_RDONLY); + CHECK_PERROR_RET(fd < 0); + + ret = read(fd, buf, sizeof(buf)); + close(fd); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + CHECK_PERROR_RET(strlen(buf) < strlen("config:")); + + errno = 0; + ret = (int)strtol(buf + strlen("config:"), NULL, 10); + CHECK_PERROR_RET(errno); + return ret; +} + +static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name, + __u32 expected_fd_type) +{ + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + char buf[256]; + int err; + + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len, + &prog_id, &fd_type, &probe_offset, + &probe_addr); + if (err < 0) { + printf("FAIL: %s, for event_fd idx %d, fn_name %s\n", + __func__, prog_fd_idx, fn_name); + perror(" :"); + return -1; + } + if (strcmp(buf, fn_name) != 0 || + fd_type != expected_fd_type || + probe_offset != 0x0 || probe_addr != 0x0) { + printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n", + prog_fd_idx); + printf("buf: %s, fd_type: %u, probe_offset: 0x%llx," + " probe_addr: 0x%llx\n", + buf, fd_type, probe_offset, probe_addr); + return -1; + } + return 0; +} + +static int test_nondebug_fs_kuprobe_common(const char *event_type, + const char *name, __u64 offset, __u64 addr, bool is_return, + char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, + __u64 *probe_offset, __u64 *probe_addr) +{ + int is_return_bit = bpf_get_retprobe_bit(event_type); + int type = bpf_find_probe_type(event_type); + struct perf_event_attr attr = {}; + int fd; + + if (type < 0 || is_return_bit < 0) { + printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n", + __func__, type, is_return_bit); + return -1; + } + + attr.sample_period = 1; + attr.wakeup_events = 1; + if (is_return) + attr.config |= 1 << is_return_bit; + + if (name) { + attr.config1 = ptr_to_u64((void *)name); + attr.config2 = offset; + } else { + attr.config1 = 0; + attr.config2 = addr; + } + attr.size = sizeof(attr); + attr.type = type; + + fd = sys_perf_event_open(&attr, -1, 0, -1, 0); + CHECK_PERROR_RET(fd < 0); + + CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0); + CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); + CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len, + prog_id, fd_type, probe_offset, probe_addr) < 0); + + return 0; +} + +static int test_nondebug_fs_probe(const char *event_type, const char *name, + __u64 offset, __u64 addr, bool is_return, + __u32 expected_fd_type, + __u32 expected_ret_fd_type, + char *buf, __u32 buf_len) +{ + __u64 probe_offset, probe_addr; + __u32 prog_id, fd_type; + int err; + + err = test_nondebug_fs_kuprobe_common(event_type, name, + offset, addr, is_return, + buf, &buf_len, &prog_id, + &fd_type, &probe_offset, + &probe_addr); + if (err < 0) { + printf("FAIL: %s, " + "for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n", + __func__, name ? name : "", offset, addr, is_return); + perror(" :"); + return -1; + } + if ((is_return && fd_type != expected_ret_fd_type) || + (!is_return && fd_type != expected_fd_type)) { + printf("FAIL: %s, incorrect fd_type %u\n", + __func__, fd_type); + return -1; + } + if (name) { + if (strcmp(name, buf) != 0) { + printf("FAIL: %s, incorrect buf %s\n", __func__, buf); + return -1; + } + if (probe_offset != offset) { + printf("FAIL: %s, incorrect probe_offset 0x%llx\n", + __func__, probe_offset); + return -1; + } + } else { + if (buf_len != 0) { + printf("FAIL: %s, incorrect buf %p\n", + __func__, buf); + return -1; + } + + if (probe_addr != addr) { + printf("FAIL: %s, incorrect probe_addr 0x%llx\n", + __func__, probe_addr); + return -1; + } + } + return 0; +} + +static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) +{ + const char *event_type = "uprobe"; + struct perf_event_attr attr = {}; + char buf[256], event_alias[256]; + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + int err, res, kfd, efd; + ssize_t bytes; + + snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", + event_type); + kfd = open(buf, O_WRONLY | O_APPEND, 0); + CHECK_PERROR_RET(kfd < 0); + + res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid()); + CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias)); + + res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", + is_return ? 'r' : 'p', event_type, event_alias, + binary_path, offset); + CHECK_PERROR_RET(res < 0 || res >= sizeof(buf)); + CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0); + + close(kfd); + kfd = -1; + + snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id", + event_type, event_alias); + efd = open(buf, O_RDONLY, 0); + CHECK_PERROR_RET(efd < 0); + + bytes = read(efd, buf, sizeof(buf)); + CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf)); + close(efd); + buf[bytes] = '\0'; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_period = 1; + attr.wakeup_events = 1; + kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + CHECK_PERROR_RET(kfd < 0); + CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); + CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0); + + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len, + &prog_id, &fd_type, &probe_offset, + &probe_addr); + if (err < 0) { + printf("FAIL: %s, binary_path %s\n", __func__, binary_path); + perror(" :"); + return -1; + } + if ((is_return && fd_type != BPF_FD_TYPE_URETPROBE) || + (!is_return && fd_type != BPF_FD_TYPE_UPROBE)) { + printf("FAIL: %s, incorrect fd_type %u\n", __func__, + fd_type); + return -1; + } + if (strcmp(binary_path, buf) != 0) { + printf("FAIL: %s, incorrect buf %s\n", __func__, buf); + return -1; + } + if (probe_offset != offset) { + printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__, + probe_offset); + return -1; + } + + close(kfd); + return 0; +} + +int main(int argc, char **argv) +{ + struct rlimit r = {1024*1024, RLIM_INFINITY}; + extern char __executable_start; + char filename[256], buf[256]; + __u64 uprobe_file_offset; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + + if (load_kallsyms()) { + printf("failed to process /proc/kallsyms\n"); + return 1; + } + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + /* test two functions in the corresponding *_kern.c file */ + CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request", + BPF_FD_TYPE_KPROBE)); + CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion", + BPF_FD_TYPE_KRETPROBE)); + + /* test nondebug fs kprobe */ + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0, + false, BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); +#ifdef __x86_64__ + /* set a kprobe on "bpf_check + 0x5", which is x64 specific */ + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0, + false, BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); +#endif + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0, + true, BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), false, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), false, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + NULL, 0)); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), true, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), true, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + 0, 0)); + + /* test nondebug fs uprobe */ + /* the calculation of uprobe file offset is based on gcc 7.3.1 on x64 + * and the default linker script, which defines __executable_start as + * the start of the .text section. The calculation could be different + * on different systems with different compilers. The right way is + * to parse the ELF file. We took a shortcut here. + */ + uprobe_file_offset = (__u64)main - (__u64)&__executable_start; + CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0], + uprobe_file_offset, 0x0, false, + BPF_FD_TYPE_UPROBE, + BPF_FD_TYPE_URETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0], + uprobe_file_offset, 0x0, true, + BPF_FD_TYPE_UPROBE, + BPF_FD_TYPE_URETPROBE, + buf, sizeof(buf))); + + /* test debug fs uprobe */ + CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset, + false)); + CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset, + true)); + + return 0; +} |