diff options
Diffstat (limited to 'tools/testing/selftests/bpf')
-rw-r--r-- | tools/testing/selftests/bpf/DENYLIST.s390x | 1 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/cgroup_helpers.c | 202 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/cgroup_helpers.h | 19 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/btf_dump.c | 4 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c | 357 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/cgroup_iter.c | 224 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/bpf_iter.h | 7 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c | 226 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/cgroup_iter.c | 39 |
9 files changed, 1030 insertions, 49 deletions
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index 37bafcbf952a..736b65f61022 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -67,3 +67,4 @@ xdp_synproxy # JIT does not support calling kernel f unpriv_bpf_disabled # fentry setget_sockopt # attach unexpected error: -524 (trampoline) cb_refs # expected error message unexpected error: -524 (trampoline) +cgroup_hierarchical_stats # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 9d59c3990ca8..e914cc45b766 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -33,49 +33,52 @@ #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" #define CGROUP_WORK_DIR "/cgroup-test-work-dir" -#define format_cgroup_path(buf, path) \ + +#define format_cgroup_path_pid(buf, path, pid) \ snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \ - CGROUP_WORK_DIR, getpid(), path) + CGROUP_WORK_DIR, pid, path) + +#define format_cgroup_path(buf, path) \ + format_cgroup_path_pid(buf, path, getpid()) + +#define format_parent_cgroup_path(buf, path) \ + format_cgroup_path_pid(buf, path, getppid()) #define format_classid_path(buf) \ snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ CGROUP_WORK_DIR) -/** - * enable_all_controllers() - Enable all available cgroup v2 controllers - * - * Enable all available cgroup v2 controllers in order to increase - * the code coverage. - * - * If successful, 0 is returned. - */ -static int enable_all_controllers(char *cgroup_path) +static int __enable_controllers(const char *cgroup_path, const char *controllers) { char path[PATH_MAX + 1]; - char buf[PATH_MAX]; + char enable[PATH_MAX + 1]; char *c, *c2; int fd, cfd; ssize_t len; - snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path); - fd = open(path, O_RDONLY); - if (fd < 0) { - log_err("Opening cgroup.controllers: %s", path); - return 1; - } - - len = read(fd, buf, sizeof(buf) - 1); - if (len < 0) { + /* If not controllers are passed, enable all available controllers */ + if (!controllers) { + snprintf(path, sizeof(path), "%s/cgroup.controllers", + cgroup_path); + fd = open(path, O_RDONLY); + if (fd < 0) { + log_err("Opening cgroup.controllers: %s", path); + return 1; + } + len = read(fd, enable, sizeof(enable) - 1); + if (len < 0) { + close(fd); + log_err("Reading cgroup.controllers: %s", path); + return 1; + } else if (len == 0) { /* No controllers to enable */ + close(fd); + return 0; + } + enable[len] = 0; close(fd); - log_err("Reading cgroup.controllers: %s", path); - return 1; + } else { + strncpy(enable, controllers, sizeof(enable)); } - buf[len] = 0; - close(fd); - - /* No controllers available? We're probably on cgroup v1. */ - if (len == 0) - return 0; snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path); cfd = open(path, O_RDWR); @@ -84,7 +87,7 @@ static int enable_all_controllers(char *cgroup_path) return 1; } - for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { + for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) { if (dprintf(cfd, "+%s\n", c) <= 0) { log_err("Enabling controller %s: %s", c, path); close(cfd); @@ -96,6 +99,87 @@ static int enable_all_controllers(char *cgroup_path) } /** + * enable_controllers() - Enable cgroup v2 controllers + * @relative_path: The cgroup path, relative to the workdir + * @controllers: List of controllers to enable in cgroup.controllers format + * + * + * Enable given cgroup v2 controllers, if @controllers is NULL, enable all + * available controllers. + * + * If successful, 0 is returned. + */ +int enable_controllers(const char *relative_path, const char *controllers) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path(cgroup_path, relative_path); + return __enable_controllers(cgroup_path, controllers); +} + +static int __write_cgroup_file(const char *cgroup_path, const char *file, + const char *buf) +{ + char file_path[PATH_MAX + 1]; + int fd; + + snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file); + fd = open(file_path, O_RDWR); + if (fd < 0) { + log_err("Opening %s", file_path); + return 1; + } + + if (dprintf(fd, "%s", buf) <= 0) { + log_err("Writing to %s", file_path); + close(fd); + return 1; + } + close(fd); + return 0; +} + +/** + * write_cgroup_file() - Write to a cgroup file + * @relative_path: The cgroup path, relative to the workdir + * @file: The name of the file in cgroupfs to write to + * @buf: Buffer to write to the file + * + * Write to a file in the given cgroup's directory. + * + * If successful, 0 is returned. + */ +int write_cgroup_file(const char *relative_path, const char *file, + const char *buf) +{ + char cgroup_path[PATH_MAX - 24]; + + format_cgroup_path(cgroup_path, relative_path); + return __write_cgroup_file(cgroup_path, file, buf); +} + +/** + * write_cgroup_file_parent() - Write to a cgroup file in the parent process + * workdir + * @relative_path: The cgroup path, relative to the parent process workdir + * @file: The name of the file in cgroupfs to write to + * @buf: Buffer to write to the file + * + * Write to a file in the given cgroup's directory under the parent process + * workdir. + * + * If successful, 0 is returned. + */ +int write_cgroup_file_parent(const char *relative_path, const char *file, + const char *buf) +{ + char cgroup_path[PATH_MAX - 24]; + + format_parent_cgroup_path(cgroup_path, relative_path); + return __write_cgroup_file(cgroup_path, file, buf); +} + +/** * setup_cgroup_environment() - Setup the cgroup environment * * After calling this function, cleanup_cgroup_environment should be called @@ -133,7 +217,9 @@ int setup_cgroup_environment(void) return 1; } - if (enable_all_controllers(cgroup_workdir)) + /* Enable all available controllers to increase test coverage */ + if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) || + __enable_controllers(cgroup_workdir, NULL)) return 1; return 0; @@ -173,7 +259,7 @@ static int join_cgroup_from_top(const char *cgroup_path) /** * join_cgroup() - Join a cgroup - * @path: The cgroup path, relative to the workdir, to join + * @relative_path: The cgroup path, relative to the workdir, to join * * This function expects a cgroup to already be created, relative to the cgroup * work dir, and it joins it. For example, passing "/my-cgroup" as the path @@ -182,11 +268,27 @@ static int join_cgroup_from_top(const char *cgroup_path) * * On success, it returns 0, otherwise on failure it returns 1. */ -int join_cgroup(const char *path) +int join_cgroup(const char *relative_path) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path(cgroup_path, relative_path); + return join_cgroup_from_top(cgroup_path); +} + +/** + * join_parent_cgroup() - Join a cgroup in the parent process workdir + * @relative_path: The cgroup path, relative to parent process workdir, to join + * + * See join_cgroup(). + * + * On success, it returns 0, otherwise on failure it returns 1. + */ +int join_parent_cgroup(const char *relative_path) { char cgroup_path[PATH_MAX + 1]; - format_cgroup_path(cgroup_path, path); + format_parent_cgroup_path(cgroup_path, relative_path); return join_cgroup_from_top(cgroup_path); } @@ -213,8 +315,26 @@ void cleanup_cgroup_environment(void) } /** + * get_root_cgroup() - Get the FD of the root cgroup + * + * On success, it returns the file descriptor. On failure, it returns -1. + * If there is a failure, it prints the error to stderr. + */ +int get_root_cgroup(void) +{ + int fd; + + fd = open(CGROUP_MOUNT_PATH, O_RDONLY); + if (fd < 0) { + log_err("Opening root cgroup"); + return -1; + } + return fd; +} + +/** * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD - * @path: The cgroup path, relative to the workdir, to join + * @relative_path: The cgroup path, relative to the workdir, to join * * This function creates a cgroup under the top level workdir and returns the * file descriptor. It is idempotent. @@ -222,14 +342,14 @@ void cleanup_cgroup_environment(void) * On success, it returns the file descriptor. On failure it returns -1. * If there is a failure, it prints the error to stderr. */ -int create_and_get_cgroup(const char *path) +int create_and_get_cgroup(const char *relative_path) { char cgroup_path[PATH_MAX + 1]; int fd; - format_cgroup_path(cgroup_path, path); + format_cgroup_path(cgroup_path, relative_path); if (mkdir(cgroup_path, 0777) && errno != EEXIST) { - log_err("mkdiring cgroup %s .. %s", path, cgroup_path); + log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path); return -1; } @@ -244,13 +364,13 @@ int create_and_get_cgroup(const char *path) /** * get_cgroup_id() - Get cgroup id for a particular cgroup path - * @path: The cgroup path, relative to the workdir, to join + * @relative_path: The cgroup path, relative to the workdir, to join * * On success, it returns the cgroup id. On failure it returns 0, * which is an invalid cgroup id. * If there is a failure, it prints the error to stderr. */ -unsigned long long get_cgroup_id(const char *path) +unsigned long long get_cgroup_id(const char *relative_path) { int dirfd, err, flags, mount_id, fhsize; union { @@ -261,7 +381,7 @@ unsigned long long get_cgroup_id(const char *path) struct file_handle *fhp, *fhp2; unsigned long long ret = 0; - format_cgroup_path(cgroup_workdir, path); + format_cgroup_path(cgroup_workdir, relative_path); dirfd = AT_FDCWD; flags = 0; diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index fcc9cb91b211..3358734356ab 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -10,11 +10,18 @@ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) /* cgroupv2 related */ -int cgroup_setup_and_join(const char *path); -int create_and_get_cgroup(const char *path); -unsigned long long get_cgroup_id(const char *path); - -int join_cgroup(const char *path); +int enable_controllers(const char *relative_path, const char *controllers); +int write_cgroup_file(const char *relative_path, const char *file, + const char *buf); +int write_cgroup_file_parent(const char *relative_path, const char *file, + const char *buf); +int cgroup_setup_and_join(const char *relative_path); +int get_root_cgroup(void); +int create_and_get_cgroup(const char *relative_path); +unsigned long long get_cgroup_id(const char *relative_path); + +int join_cgroup(const char *relative_path); +int join_parent_cgroup(const char *relative_path); int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); @@ -26,4 +33,4 @@ int join_classid(void); int setup_classid_environment(void); void cleanup_classid_environment(void); -#endif /* __CGROUP_HELPERS_H */
\ No newline at end of file +#endif /* __CGROUP_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 5fce7008d1ff..a1bae92be1fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -764,8 +764,8 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, /* union with nested struct */ TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT, - "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}", - { .map = { .map_fd = 1 }}); + "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},}", + { .cgroup = { .order = 1, .cgroup_fd = 1, }}); /* struct skb with nested structs/unions; because type output is so * complex, we don't do a string comparison, just verify we return diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c new file mode 100644 index 000000000000..101a6d70b863 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Functions to manage eBPF programs attached to cgroup subsystems + * + * Copyright 2022 Google LLC. + */ +#include <asm-generic/errno.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/bpf.h> + +#include "cgroup_helpers.h" +#include "cgroup_hierarchical_stats.skel.h" + +#define PAGE_SIZE 4096 +#define MB(x) (x << 20) + +#define BPFFS_ROOT "/sys/fs/bpf/" +#define BPFFS_VMSCAN BPFFS_ROOT"vmscan/" + +#define CG_ROOT_NAME "root" +#define CG_ROOT_ID 1 + +#define CGROUP_PATH(p, n) {.path = p"/"n, .name = n} + +static struct { + const char *path, *name; + unsigned long long id; + int fd; +} cgroups[] = { + CGROUP_PATH("/", "test"), + CGROUP_PATH("/test", "child1"), + CGROUP_PATH("/test", "child2"), + CGROUP_PATH("/test/child1", "child1_1"), + CGROUP_PATH("/test/child1", "child1_2"), + CGROUP_PATH("/test/child2", "child2_1"), + CGROUP_PATH("/test/child2", "child2_2"), +}; + +#define N_CGROUPS ARRAY_SIZE(cgroups) +#define N_NON_LEAF_CGROUPS 3 + +static int root_cgroup_fd; +static bool mounted_bpffs; + +/* reads file at 'path' to 'buf', returns 0 on success. */ +static int read_from_file(const char *path, char *buf, size_t size) +{ + int fd, len; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + len = read(fd, buf, size); + close(fd); + if (len < 0) + return len; + + buf[len] = 0; + return 0; +} + +/* mounts bpffs and mkdir for reading stats, returns 0 on success. */ +static int setup_bpffs(void) +{ + int err; + + /* Mount bpffs */ + err = mount("bpf", BPFFS_ROOT, "bpf", 0, NULL); + mounted_bpffs = !err; + if (ASSERT_FALSE(err && errno != EBUSY, "mount")) + return err; + + /* Create a directory to contain stat files in bpffs */ + err = mkdir(BPFFS_VMSCAN, 0755); + if (!ASSERT_OK(err, "mkdir")) + return err; + + return 0; +} + +static void cleanup_bpffs(void) +{ + /* Remove created directory in bpffs */ + ASSERT_OK(rmdir(BPFFS_VMSCAN), "rmdir "BPFFS_VMSCAN); + + /* Unmount bpffs, if it wasn't already mounted when we started */ + if (mounted_bpffs) + return; + + ASSERT_OK(umount(BPFFS_ROOT), "unmount bpffs"); +} + +/* sets up cgroups, returns 0 on success. */ +static int setup_cgroups(void) +{ + int i, fd, err; + + err = setup_cgroup_environment(); + if (!ASSERT_OK(err, "setup_cgroup_environment")) + return err; + + root_cgroup_fd = get_root_cgroup(); + if (!ASSERT_GE(root_cgroup_fd, 0, "get_root_cgroup")) + return root_cgroup_fd; + + for (i = 0; i < N_CGROUPS; i++) { + fd = create_and_get_cgroup(cgroups[i].path); + if (!ASSERT_GE(fd, 0, "create_and_get_cgroup")) + return fd; + + cgroups[i].fd = fd; + cgroups[i].id = get_cgroup_id(cgroups[i].path); + + /* + * Enable memcg controller for the entire hierarchy. + * Note that stats are collected for all cgroups in a hierarchy + * with memcg enabled anyway, but are only exposed for cgroups + * that have memcg enabled. + */ + if (i < N_NON_LEAF_CGROUPS) { + err = enable_controllers(cgroups[i].path, "memory"); + if (!ASSERT_OK(err, "enable_controllers")) + return err; + } + } + return 0; +} + +static void cleanup_cgroups(void) +{ + close(root_cgroup_fd); + for (int i = 0; i < N_CGROUPS; i++) + close(cgroups[i].fd); + cleanup_cgroup_environment(); +} + +/* Sets up cgroup hiearchary, returns 0 on success. */ +static int setup_hierarchy(void) +{ + return setup_bpffs() || setup_cgroups(); +} + +static void destroy_hierarchy(void) +{ + cleanup_cgroups(); + cleanup_bpffs(); +} + +static int reclaimer(const char *cgroup_path, size_t size) +{ + static char size_buf[128]; + char *buf, *ptr; + int err; + + /* Join cgroup in the parent process workdir */ + if (join_parent_cgroup(cgroup_path)) + return EACCES; + + /* Allocate memory */ + buf = malloc(size); + if (!buf) + return ENOMEM; + + /* Write to memory to make sure it's actually allocated */ + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 1; + + /* Try to reclaim memory */ + snprintf(size_buf, 128, "%lu", size); + err = write_cgroup_file_parent(cgroup_path, "memory.reclaim", size_buf); + + free(buf); + /* memory.reclaim returns EAGAIN if the amount is not fully reclaimed */ + if (err && errno != EAGAIN) + return errno; + + return 0; +} + +static int induce_vmscan(void) +{ + int i, status; + + /* + * In every leaf cgroup, run a child process that allocates some memory + * and attempts to reclaim some of it. + */ + for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++) { + pid_t pid; + + /* Create reclaimer child */ + pid = fork(); + if (pid == 0) { + status = reclaimer(cgroups[i].path, MB(5)); + exit(status); + } + + /* Cleanup reclaimer child */ + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status), "reclaimer exited"); + ASSERT_EQ(WEXITSTATUS(status), 0, "reclaim exit code"); + } + return 0; +} + +static unsigned long long +get_cgroup_vmscan_delay(unsigned long long cgroup_id, const char *file_name) +{ + unsigned long long vmscan = 0, id = 0; + static char buf[128], path[128]; + + /* For every cgroup, read the file generated by cgroup_iter */ + snprintf(path, 128, "%s%s", BPFFS_VMSCAN, file_name); + if (!ASSERT_OK(read_from_file(path, buf, 128), "read cgroup_iter")) + return 0; + + /* Check the output file formatting */ + ASSERT_EQ(sscanf(buf, "cg_id: %llu, total_vmscan_delay: %llu\n", + &id, &vmscan), 2, "output format"); + + /* Check that the cgroup_id is displayed correctly */ + ASSERT_EQ(id, cgroup_id, "cgroup_id"); + /* Check that the vmscan reading is non-zero */ + ASSERT_GT(vmscan, 0, "vmscan_reading"); + return vmscan; +} + +static void check_vmscan_stats(void) +{ + unsigned long long vmscan_readings[N_CGROUPS], vmscan_root; + int i; + + for (i = 0; i < N_CGROUPS; i++) { + vmscan_readings[i] = get_cgroup_vmscan_delay(cgroups[i].id, + cgroups[i].name); + } + + /* Read stats for root too */ + vmscan_root = get_cgroup_vmscan_delay(CG_ROOT_ID, CG_ROOT_NAME); + + /* Check that child1 == child1_1 + child1_2 */ + ASSERT_EQ(vmscan_readings[1], vmscan_readings[3] + vmscan_readings[4], + "child1_vmscan"); + /* Check that child2 == child2_1 + child2_2 */ + ASSERT_EQ(vmscan_readings[2], vmscan_readings[5] + vmscan_readings[6], + "child2_vmscan"); + /* Check that test == child1 + child2 */ + ASSERT_EQ(vmscan_readings[0], vmscan_readings[1] + vmscan_readings[2], + "test_vmscan"); + /* Check that root >= test */ + ASSERT_GE(vmscan_root, vmscan_readings[1], "root_vmscan"); +} + +/* Creates iter link and pins in bpffs, returns 0 on success, -errno on failure. + */ +static int setup_cgroup_iter(struct cgroup_hierarchical_stats *obj, + int cgroup_fd, const char *file_name) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo = {}; + struct bpf_link *link; + static char path[128]; + int err; + + /* + * Create an iter link, parameterized by cgroup_fd. We only want to + * traverse one cgroup, so set the traversal order to "self". + */ + linfo.cgroup.cgroup_fd = cgroup_fd; + linfo.cgroup.order = BPF_ITER_SELF_ONLY; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + link = bpf_program__attach_iter(obj->progs.dump_vmscan, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + return -EFAULT; + + /* Pin the link to a bpffs file */ + snprintf(path, 128, "%s%s", BPFFS_VMSCAN, file_name); + err = bpf_link__pin(link, path); + ASSERT_OK(err, "pin cgroup_iter"); + + /* Remove the link, leaving only the ref held by the pinned file */ + bpf_link__destroy(link); + return err; +} + +/* Sets up programs for collecting stats, returns 0 on success. */ +static int setup_progs(struct cgroup_hierarchical_stats **skel) +{ + int i, err; + + *skel = cgroup_hierarchical_stats__open_and_load(); + if (!ASSERT_OK_PTR(*skel, "open_and_load")) + return 1; + + /* Attach cgroup_iter program that will dump the stats to cgroups */ + for (i = 0; i < N_CGROUPS; i++) { + err = setup_cgroup_iter(*skel, cgroups[i].fd, cgroups[i].name); + if (!ASSERT_OK(err, "setup_cgroup_iter")) + return err; + } + + /* Also dump stats for root */ + err = setup_cgroup_iter(*skel, root_cgroup_fd, CG_ROOT_NAME); + if (!ASSERT_OK(err, "setup_cgroup_iter")) + return err; + + bpf_program__set_autoattach((*skel)->progs.dump_vmscan, false); + err = cgroup_hierarchical_stats__attach(*skel); + if (!ASSERT_OK(err, "attach")) + return err; + + return 0; +} + +static void destroy_progs(struct cgroup_hierarchical_stats *skel) +{ + static char path[128]; + int i; + + for (i = 0; i < N_CGROUPS; i++) { + /* Delete files in bpffs that cgroup_iters are pinned in */ + snprintf(path, 128, "%s%s", BPFFS_VMSCAN, + cgroups[i].name); + ASSERT_OK(remove(path), "remove cgroup_iter pin"); + } + + /* Delete root file in bpffs */ + snprintf(path, 128, "%s%s", BPFFS_VMSCAN, CG_ROOT_NAME); + ASSERT_OK(remove(path), "remove cgroup_iter root pin"); + cgroup_hierarchical_stats__destroy(skel); +} + +void test_cgroup_hierarchical_stats(void) +{ + struct cgroup_hierarchical_stats *skel = NULL; + + if (setup_hierarchy()) + goto hierarchy_cleanup; + if (setup_progs(&skel)) + goto cleanup; + if (induce_vmscan()) + goto cleanup; + check_vmscan_stats(); +cleanup: + destroy_progs(skel); +hierarchy_cleanup: + destroy_hierarchy(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c new file mode 100644 index 000000000000..38958c37b9ce --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Google */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "cgroup_iter.skel.h" +#include "cgroup_helpers.h" + +#define ROOT 0 +#define PARENT 1 +#define CHILD1 2 +#define CHILD2 3 +#define NUM_CGROUPS 4 + +#define PROLOGUE "prologue\n" +#define EPILOGUE "epilogue\n" + +static const char *cg_path[] = { + "/", "/parent", "/parent/child1", "/parent/child2" +}; + +static int cg_fd[] = {-1, -1, -1, -1}; +static unsigned long long cg_id[] = {0, 0, 0, 0}; +static char expected_output[64]; + +static int setup_cgroups(void) +{ + int fd, i = 0; + + for (i = 0; i < NUM_CGROUPS; i++) { + fd = create_and_get_cgroup(cg_path[i]); + if (fd < 0) + return fd; + + cg_fd[i] = fd; + cg_id[i] = get_cgroup_id(cg_path[i]); + } + return 0; +} + +static void cleanup_cgroups(void) +{ + int i; + + for (i = 0; i < NUM_CGROUPS; i++) + close(cg_fd[i]); +} + +static void read_from_cgroup_iter(struct bpf_program *prog, int cgroup_fd, + int order, const char *testname) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + struct bpf_link *link; + int len, iter_fd; + static char buf[128]; + size_t left; + char *p; + + memset(&linfo, 0, sizeof(linfo)); + linfo.cgroup.cgroup_fd = cgroup_fd; + linfo.cgroup.order = order; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(prog, &opts); + if (!ASSERT_OK_PTR(link, "attach_iter")) + return; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (iter_fd < 0) + goto free_link; + + memset(buf, 0, sizeof(buf)); + left = ARRAY_SIZE(buf); + p = buf; + while ((len = read(iter_fd, p, left)) > 0) { + p += len; + left -= len; + } + + ASSERT_STREQ(buf, expected_output, testname); + + /* read() after iter finishes should be ok. */ + if (len == 0) + ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read"); + + close(iter_fd); +free_link: + bpf_link__destroy(link); +} + +/* Invalid cgroup. */ +static void test_invalid_cgroup(struct cgroup_iter *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + struct bpf_link *link; + + memset(&linfo, 0, sizeof(linfo)); + linfo.cgroup.cgroup_fd = (__u32)-1; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts); + ASSERT_ERR_PTR(link, "attach_iter"); + bpf_link__destroy(link); +} + +/* Specifying both cgroup_fd and cgroup_id is invalid. */ +static void test_invalid_cgroup_spec(struct cgroup_iter *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + union bpf_iter_link_info linfo; + struct bpf_link *link; + + memset(&linfo, 0, sizeof(linfo)); + linfo.cgroup.cgroup_fd = (__u32)cg_fd[PARENT]; + linfo.cgroup.cgroup_id = (__u64)cg_id[PARENT]; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + + link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts); + ASSERT_ERR_PTR(link, "attach_iter"); + bpf_link__destroy(link); +} + +/* Preorder walk prints parent and child in order. */ +static void test_walk_preorder(struct cgroup_iter *skel) +{ + snprintf(expected_output, sizeof(expected_output), + PROLOGUE "%8llu\n%8llu\n%8llu\n" EPILOGUE, + cg_id[PARENT], cg_id[CHILD1], cg_id[CHILD2]); + + read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT], + BPF_ITER_DESCENDANTS_PRE, "preorder"); +} + +/* Postorder walk prints child and parent in order. */ +static void test_walk_postorder(struct cgroup_iter *skel) +{ + snprintf(expected_output, sizeof(expected_output), + PROLOGUE "%8llu\n%8llu\n%8llu\n" EPILOGUE, + cg_id[CHILD1], cg_id[CHILD2], cg_id[PARENT]); + + read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT], + BPF_ITER_DESCENDANTS_POST, "postorder"); +} + +/* Walking parents prints parent and then root. */ +static void test_walk_ancestors_up(struct cgroup_iter *skel) +{ + /* terminate the walk when ROOT is met. */ + skel->bss->terminal_cgroup = cg_id[ROOT]; + + snprintf(expected_output, sizeof(expected_output), + PROLOGUE "%8llu\n%8llu\n" EPILOGUE, + cg_id[PARENT], cg_id[ROOT]); + + read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT], + BPF_ITER_ANCESTORS_UP, "ancestors_up"); + + skel->bss->terminal_cgroup = 0; +} + +/* Early termination prints parent only. */ +static void test_early_termination(struct cgroup_iter *skel) +{ + /* terminate the walk after the first element is processed. */ + skel->bss->terminate_early = 1; + + snprintf(expected_output, sizeof(expected_output), + PROLOGUE "%8llu\n" EPILOGUE, cg_id[PARENT]); + + read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT], + BPF_ITER_DESCENDANTS_PRE, "early_termination"); + + skel->bss->terminate_early = 0; +} + +/* Waling self prints self only. */ +static void test_walk_self_only(struct cgroup_iter *skel) +{ + snprintf(expected_output, sizeof(expected_output), + PROLOGUE "%8llu\n" EPILOGUE, cg_id[PARENT]); + + read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT], + BPF_ITER_SELF_ONLY, "self_only"); +} + +void test_cgroup_iter(void) +{ + struct cgroup_iter *skel = NULL; + + if (setup_cgroup_environment()) + return; + + if (setup_cgroups()) + goto out; + + skel = cgroup_iter__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_iter__open_and_load")) + goto out; + + if (test__start_subtest("cgroup_iter__invalid_cgroup")) + test_invalid_cgroup(skel); + if (test__start_subtest("cgroup_iter__invalid_cgroup_spec")) + test_invalid_cgroup_spec(skel); + if (test__start_subtest("cgroup_iter__preorder")) + test_walk_preorder(skel); + if (test__start_subtest("cgroup_iter__postorder")) + test_walk_postorder(skel); + if (test__start_subtest("cgroup_iter__ancestors_up_walk")) + test_walk_ancestors_up(skel); + if (test__start_subtest("cgroup_iter__early_termination")) + test_early_termination(skel); + if (test__start_subtest("cgroup_iter__self_only")) + test_walk_self_only(skel); +out: + cgroup_iter__destroy(skel); + cleanup_cgroups(); + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index e9846606690d..c41ee80533ca 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -17,6 +17,7 @@ #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used #define bpf_iter__sockmap bpf_iter__sockmap___not_used #define bpf_iter__bpf_link bpf_iter__bpf_link___not_used +#define bpf_iter__cgroup bpf_iter__cgroup___not_used #define btf_ptr btf_ptr___not_used #define BTF_F_COMPACT BTF_F_COMPACT___not_used #define BTF_F_NONAME BTF_F_NONAME___not_used @@ -40,6 +41,7 @@ #undef bpf_iter__bpf_sk_storage_map #undef bpf_iter__sockmap #undef bpf_iter__bpf_link +#undef bpf_iter__cgroup #undef btf_ptr #undef BTF_F_COMPACT #undef BTF_F_NONAME @@ -141,6 +143,11 @@ struct bpf_iter__bpf_link { struct bpf_link *link; }; +struct bpf_iter__cgroup { + struct bpf_iter_meta *meta; + struct cgroup *cgroup; +} __attribute__((preserve_access_index)); + struct btf_ptr { void *ptr; __u32 type_id; diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c new file mode 100644 index 000000000000..8ab4253a1592 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Functions to manage eBPF programs attached to cgroup subsystems + * + * Copyright 2022 Google LLC. + */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +/* + * Start times are stored per-task, not per-cgroup, as multiple tasks in one + * cgroup can perform reclaim concurrently. + */ +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, __u64); +} vmscan_start_time SEC(".maps"); + +struct vmscan_percpu { + /* Previous percpu state, to figure out if we have new updates */ + __u64 prev; + /* Current percpu state */ + __u64 state; +}; + +struct vmscan { + /* State propagated through children, pending aggregation */ + __u64 pending; + /* Total state, including all cpus and all children */ + __u64 state; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 100); + __type(key, __u64); + __type(value, struct vmscan_percpu); +} pcpu_cgroup_vmscan_elapsed SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 100); + __type(key, __u64); + __type(value, struct vmscan); +} cgroup_vmscan_elapsed SEC(".maps"); + +extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym; +extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym; + +static struct cgroup *task_memcg(struct task_struct *task) +{ + int cgrp_id; + +#if __has_builtin(__builtin_preserve_enum_value) + cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id, memory_cgrp_id); +#else + cgrp_id = memory_cgrp_id; +#endif + return task->cgroups->subsys[cgrp_id]->cgroup; +} + +static uint64_t cgroup_id(struct cgroup *cgrp) +{ + return cgrp->kn->id; +} + +static int create_vmscan_percpu_elem(__u64 cg_id, __u64 state) +{ + struct vmscan_percpu pcpu_init = {.state = state, .prev = 0}; + + return bpf_map_update_elem(&pcpu_cgroup_vmscan_elapsed, &cg_id, + &pcpu_init, BPF_NOEXIST); +} + +static int create_vmscan_elem(__u64 cg_id, __u64 state, __u64 pending) +{ + struct vmscan init = {.state = state, .pending = pending}; + + return bpf_map_update_elem(&cgroup_vmscan_elapsed, &cg_id, + &init, BPF_NOEXIST); +} + +SEC("tp_btf/mm_vmscan_memcg_reclaim_begin") +int BPF_PROG(vmscan_start, int order, gfp_t gfp_flags) +{ + struct task_struct *task = bpf_get_current_task_btf(); + __u64 *start_time_ptr; + + start_time_ptr = bpf_task_storage_get(&vmscan_start_time, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (start_time_ptr) + *start_time_ptr = bpf_ktime_get_ns(); + return 0; +} + +SEC("tp_btf/mm_vmscan_memcg_reclaim_end") +int BPF_PROG(vmscan_end, unsigned long nr_reclaimed) +{ + struct vmscan_percpu *pcpu_stat; + struct task_struct *current = bpf_get_current_task_btf(); + struct cgroup *cgrp; + __u64 *start_time_ptr; + __u64 current_elapsed, cg_id; + __u64 end_time = bpf_ktime_get_ns(); + + /* + * cgrp is the first parent cgroup of current that has memcg enabled in + * its subtree_control, or NULL if memcg is disabled in the entire tree. + * In a cgroup hierarchy like this: + * a + * / \ + * b c + * If "a" has memcg enabled, while "b" doesn't, then processes in "b" + * will accumulate their stats directly to "a". This makes sure that no + * stats are lost from processes in leaf cgroups that don't have memcg + * enabled, but only exposes stats for cgroups that have memcg enabled. + */ + cgrp = task_memcg(current); + if (!cgrp) + return 0; + + cg_id = cgroup_id(cgrp); + start_time_ptr = bpf_task_storage_get(&vmscan_start_time, current, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!start_time_ptr) + return 0; + + current_elapsed = end_time - *start_time_ptr; + pcpu_stat = bpf_map_lookup_elem(&pcpu_cgroup_vmscan_elapsed, + &cg_id); + if (pcpu_stat) + pcpu_stat->state += current_elapsed; + else if (create_vmscan_percpu_elem(cg_id, current_elapsed)) + return 0; + + cgroup_rstat_updated(cgrp, bpf_get_smp_processor_id()); + return 0; +} + +SEC("fentry/bpf_rstat_flush") +int BPF_PROG(vmscan_flush, struct cgroup *cgrp, struct cgroup *parent, int cpu) +{ + struct vmscan_percpu *pcpu_stat; + struct vmscan *total_stat, *parent_stat; + __u64 cg_id = cgroup_id(cgrp); + __u64 parent_cg_id = parent ? cgroup_id(parent) : 0; + __u64 *pcpu_vmscan; + __u64 state; + __u64 delta = 0; + + /* Add CPU changes on this level since the last flush */ + pcpu_stat = bpf_map_lookup_percpu_elem(&pcpu_cgroup_vmscan_elapsed, + &cg_id, cpu); + if (pcpu_stat) { + state = pcpu_stat->state; + delta += state - pcpu_stat->prev; + pcpu_stat->prev = state; + } + + total_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed, &cg_id); + if (!total_stat) { + if (create_vmscan_elem(cg_id, delta, 0)) + return 0; + + goto update_parent; + } + + /* Collect pending stats from subtree */ + if (total_stat->pending) { + delta += total_stat->pending; + total_stat->pending = 0; + } + + /* Propagate changes to this cgroup's total */ + total_stat->state += delta; + +update_parent: + /* Skip if there are no changes to propagate, or no parent */ + if (!delta || !parent_cg_id) + return 0; + + /* Propagate changes to cgroup's parent */ + parent_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed, + &parent_cg_id); + if (parent_stat) + parent_stat->pending += delta; + else + create_vmscan_elem(parent_cg_id, 0, delta); + return 0; +} + +SEC("iter.s/cgroup") +int BPF_PROG(dump_vmscan, struct bpf_iter_meta *meta, struct cgroup *cgrp) +{ + struct seq_file *seq = meta->seq; + struct vmscan *total_stat; + __u64 cg_id = cgrp ? cgroup_id(cgrp) : 0; + + /* Do nothing for the terminal call */ + if (!cg_id) + return 1; + + /* Flush the stats to make sure we get the most updated numbers */ + cgroup_rstat_flush(cgrp); + + total_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed, &cg_id); + if (!total_stat) { + BPF_SEQ_PRINTF(seq, "cg_id: %llu, total_vmscan_delay: 0\n", + cg_id); + } else { + BPF_SEQ_PRINTF(seq, "cg_id: %llu, total_vmscan_delay: %llu\n", + cg_id, total_stat->state); + } + + /* + * We only dump stats for one cgroup here, so return 1 to stop + * iteration after the first cgroup. + */ + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter.c b/tools/testing/selftests/bpf/progs/cgroup_iter.c new file mode 100644 index 000000000000..de03997322a7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_iter.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Google */ + +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; +int terminate_early = 0; +u64 terminal_cgroup = 0; + +static inline u64 cgroup_id(struct cgroup *cgrp) +{ + return cgrp->kn->id; +} + +SEC("iter/cgroup") +int cgroup_id_printer(struct bpf_iter__cgroup *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct cgroup *cgrp = ctx->cgroup; + + /* epilogue */ + if (cgrp == NULL) { + BPF_SEQ_PRINTF(seq, "epilogue\n"); + return 0; + } + + /* prologue */ + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, "prologue\n"); + + BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp)); + + if (terminal_cgroup == cgroup_id(cgrp)) + return 1; + + return terminate_early ? 1 : 0; +} |