From ae460c022453337850bdc36a36bf7596a6cfcf99 Mon Sep 17 00:00:00 2001 From: Yoshiki Komachi Date: Tue, 21 Apr 2020 09:05:27 +0900 Subject: bpf_helpers.h: Add note for building with vmlinux.h or linux/types.h The following error was shown when a bpf program was compiled without vmlinux.h auto-generated from BTF: # clang -I./linux/tools/lib/ -I/lib/modules/$(uname -r)/build/include/ \ -O2 -Wall -target bpf -emit-llvm -c bpf_prog.c -o bpf_prog.bc ... In file included from linux/tools/lib/bpf/bpf_helpers.h:5: linux/tools/lib/bpf/bpf_helper_defs.h:56:82: error: unknown type name '__u64' ... It seems that bpf programs are intended for being built together with the vmlinux.h (which will have all the __u64 and other typedefs). But users may mistakenly think "include " is missing because the vmlinux.h is not common for non-bpf developers. IMO, an explicit comment therefore should be added to bpf_helpers.h as this patch shows. Signed-off-by: Yoshiki Komachi Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/1587427527-29399-1-git-send-email-komachi.yoshiki@gmail.com --- tools/lib/bpf/bpf_helpers.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index f69cc208778a..60aad054eea1 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -2,6 +2,12 @@ #ifndef __BPF_HELPERS__ #define __BPF_HELPERS__ +/* + * Note that bpf programs need to include either + * vmlinux.h (auto-generated from BTF) or linux/types.h + * in advance since bpf_helper_defs.h uses such types + * as __u64. + */ #include "bpf_helper_defs.h" #define __uint(name, val) int (*name)[val] -- cgit v1.2.3 From e411eb257b331bf44cbe8845b5351260c8222c6c Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Sun, 26 Apr 2020 14:36:35 +0800 Subject: libbpf: Return err if bpf_object__load failed bpf_object__load() has various return code, when it failed to load object, it must return err instead of -EINVAL. Signed-off-by: Mao Wenan Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200426063635.130680-3-maowenan@huawei.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8f480e29a6b0..8e1dc6980fac 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7006,7 +7006,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, err = bpf_object__load(obj); if (err) { bpf_object__close(obj); - return -EINVAL; + return err; } *pobj = obj; -- cgit v1.2.3 From a6bbdf2e750f245d219d39f3c3d06ace2c5871e6 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 28 Apr 2020 17:07:09 +0800 Subject: libbpf: Remove unneeded semicolon in btf_dump_emit_type Fixes the following coccicheck warning: tools/lib/bpf/btf_dump.c:661:4-5: Unneeded semicolon Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/1588064829-70613-1-git-send-email-zou_wei@huawei.com --- tools/lib/bpf/btf_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 0c28ee82834b..de07e559a11d 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -658,7 +658,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) if (!btf_dump_is_blacklisted(d, id)) { btf_dump_emit_typedef_def(d, id, t, 0); btf_dump_printf(d, ";\n\n"); - }; + } tstate->fwd_emitted = 1; break; default: -- cgit v1.2.3 From 0dbc866832a0fbf9f2b98d412da44c5cfd1b7756 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:09 -0700 Subject: libbpf: Add low-level APIs for new bpf_link commands Add low-level API calls for bpf_link_get_next_id() and bpf_link_get_fd_by_id(). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-6-andriin@fb.com --- tools/lib/bpf/bpf.c | 19 +++++++++++++++++-- tools/lib/bpf/bpf.h | 4 +++- tools/lib/bpf/libbpf.map | 6 ++++++ 3 files changed, 26 insertions(+), 3 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5cc1b0785d18..8f2f0958d446 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -721,6 +721,11 @@ int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id) return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID); } +int bpf_link_get_next_id(__u32 start_id, __u32 *next_id) +{ + return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID); +} + int bpf_prog_get_fd_by_id(__u32 id) { union bpf_attr attr; @@ -751,13 +756,23 @@ int bpf_btf_get_fd_by_id(__u32 id) return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); } -int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) +int bpf_link_get_fd_by_id(__u32 id) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.link_id = id; + + return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) { union bpf_attr attr; int err; memset(&attr, 0, sizeof(attr)); - attr.info.bpf_fd = prog_fd; + attr.info.bpf_fd = bpf_fd; attr.info.info_len = *info_len; attr.info.info = ptr_to_u64(info); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 46d47afdd887..335b457b3a25 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -216,10 +216,12 @@ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id); +LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id); LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); LIBBPF_API int bpf_map_get_fd_by_id(__u32 id); LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id); -LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); +LIBBPF_API int bpf_link_get_fd_by_id(__u32 id); +LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len); LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index bb8831605b25..7cd49aa38005 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -254,3 +254,9 @@ LIBBPF_0.0.8 { bpf_program__set_lsm; bpf_set_link_xdp_fd_opts; } LIBBPF_0.0.7; + +LIBBPF_0.0.9 { + global: + bpf_link_get_fd_by_id; + bpf_link_get_next_id; +} LIBBPF_0.0.8; -- cgit v1.2.3 From 41017e56af6cf99122c86655f60fe4e1b75ecf48 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:27:37 -0700 Subject: libbpf: Refactor BTF-defined map definition parsing logic Factor out BTF map definition logic into stand-alone routine for easier reuse for map-in-map case. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429002739.48006-2-andriin@fb.com --- tools/lib/bpf/libbpf.c | 195 ++++++++++++++++++++++++++----------------------- 1 file changed, 103 insertions(+), 92 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8e1dc6980fac..7d10436d7b58 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1914,109 +1914,54 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) return 0; } -static int bpf_object__init_user_btf_map(struct bpf_object *obj, - const struct btf_type *sec, - int var_idx, int sec_idx, - const Elf_Data *data, bool strict, - const char *pin_root_path) + +static int parse_btf_map_def(struct bpf_object *obj, + struct bpf_map *map, + const struct btf_type *def, + bool strict, + const char *pin_root_path) { - const struct btf_type *var, *def, *t; - const struct btf_var_secinfo *vi; - const struct btf_var *var_extra; + const struct btf_type *t; const struct btf_member *m; - const char *map_name; - struct bpf_map *map; int vlen, i; - vi = btf_var_secinfos(sec) + var_idx; - var = btf__type_by_id(obj->btf, vi->type); - var_extra = btf_var(var); - map_name = btf__name_by_offset(obj->btf, var->name_off); - vlen = btf_vlen(var); - - if (map_name == NULL || map_name[0] == '\0') { - pr_warn("map #%d: empty name.\n", var_idx); - return -EINVAL; - } - if ((__u64)vi->offset + vi->size > data->d_size) { - pr_warn("map '%s' BTF data is corrupted.\n", map_name); - return -EINVAL; - } - if (!btf_is_var(var)) { - pr_warn("map '%s': unexpected var kind %u.\n", - map_name, btf_kind(var)); - return -EINVAL; - } - if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && - var_extra->linkage != BTF_VAR_STATIC) { - pr_warn("map '%s': unsupported var linkage %u.\n", - map_name, var_extra->linkage); - return -EOPNOTSUPP; - } - - def = skip_mods_and_typedefs(obj->btf, var->type, NULL); - if (!btf_is_struct(def)) { - pr_warn("map '%s': unexpected def kind %u.\n", - map_name, btf_kind(var)); - return -EINVAL; - } - if (def->size > vi->size) { - pr_warn("map '%s': invalid def size.\n", map_name); - return -EINVAL; - } - - map = bpf_object__add_map(obj); - if (IS_ERR(map)) - return PTR_ERR(map); - map->name = strdup(map_name); - if (!map->name) { - pr_warn("map '%s': failed to alloc map name.\n", map_name); - return -ENOMEM; - } - map->libbpf_type = LIBBPF_MAP_UNSPEC; - map->def.type = BPF_MAP_TYPE_UNSPEC; - map->sec_idx = sec_idx; - map->sec_offset = vi->offset; - pr_debug("map '%s': at sec_idx %d, offset %zu.\n", - map_name, map->sec_idx, map->sec_offset); - vlen = btf_vlen(def); m = btf_members(def); for (i = 0; i < vlen; i++, m++) { const char *name = btf__name_by_offset(obj->btf, m->name_off); if (!name) { - pr_warn("map '%s': invalid field #%d.\n", map_name, i); + pr_warn("map '%s': invalid field #%d.\n", map->name, i); return -EINVAL; } if (strcmp(name, "type") == 0) { - if (!get_map_field_int(map_name, obj->btf, m, + if (!get_map_field_int(map->name, obj->btf, m, &map->def.type)) return -EINVAL; pr_debug("map '%s': found type = %u.\n", - map_name, map->def.type); + map->name, map->def.type); } else if (strcmp(name, "max_entries") == 0) { - if (!get_map_field_int(map_name, obj->btf, m, + if (!get_map_field_int(map->name, obj->btf, m, &map->def.max_entries)) return -EINVAL; pr_debug("map '%s': found max_entries = %u.\n", - map_name, map->def.max_entries); + map->name, map->def.max_entries); } else if (strcmp(name, "map_flags") == 0) { - if (!get_map_field_int(map_name, obj->btf, m, + if (!get_map_field_int(map->name, obj->btf, m, &map->def.map_flags)) return -EINVAL; pr_debug("map '%s': found map_flags = %u.\n", - map_name, map->def.map_flags); + map->name, map->def.map_flags); } else if (strcmp(name, "key_size") == 0) { __u32 sz; - if (!get_map_field_int(map_name, obj->btf, m, &sz)) + if (!get_map_field_int(map->name, obj->btf, m, &sz)) return -EINVAL; pr_debug("map '%s': found key_size = %u.\n", - map_name, sz); + map->name, sz); if (map->def.key_size && map->def.key_size != sz) { pr_warn("map '%s': conflicting key size %u != %u.\n", - map_name, map->def.key_size, sz); + map->name, map->def.key_size, sz); return -EINVAL; } map->def.key_size = sz; @@ -2026,25 +1971,25 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, t = btf__type_by_id(obj->btf, m->type); if (!t) { pr_warn("map '%s': key type [%d] not found.\n", - map_name, m->type); + map->name, m->type); return -EINVAL; } if (!btf_is_ptr(t)) { pr_warn("map '%s': key spec is not PTR: %u.\n", - map_name, btf_kind(t)); + map->name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", - map_name, t->type, (ssize_t)sz); + map->name, t->type, (ssize_t)sz); return sz; } pr_debug("map '%s': found key [%u], sz = %zd.\n", - map_name, t->type, (ssize_t)sz); + map->name, t->type, (ssize_t)sz); if (map->def.key_size && map->def.key_size != sz) { pr_warn("map '%s': conflicting key size %u != %zd.\n", - map_name, map->def.key_size, (ssize_t)sz); + map->name, map->def.key_size, (ssize_t)sz); return -EINVAL; } map->def.key_size = sz; @@ -2052,13 +1997,13 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } else if (strcmp(name, "value_size") == 0) { __u32 sz; - if (!get_map_field_int(map_name, obj->btf, m, &sz)) + if (!get_map_field_int(map->name, obj->btf, m, &sz)) return -EINVAL; pr_debug("map '%s': found value_size = %u.\n", - map_name, sz); + map->name, sz); if (map->def.value_size && map->def.value_size != sz) { pr_warn("map '%s': conflicting value size %u != %u.\n", - map_name, map->def.value_size, sz); + map->name, map->def.value_size, sz); return -EINVAL; } map->def.value_size = sz; @@ -2068,25 +2013,25 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, t = btf__type_by_id(obj->btf, m->type); if (!t) { pr_warn("map '%s': value type [%d] not found.\n", - map_name, m->type); + map->name, m->type); return -EINVAL; } if (!btf_is_ptr(t)) { pr_warn("map '%s': value spec is not PTR: %u.\n", - map_name, btf_kind(t)); + map->name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", - map_name, t->type, (ssize_t)sz); + map->name, t->type, (ssize_t)sz); return sz; } pr_debug("map '%s': found value [%u], sz = %zd.\n", - map_name, t->type, (ssize_t)sz); + map->name, t->type, (ssize_t)sz); if (map->def.value_size && map->def.value_size != sz) { pr_warn("map '%s': conflicting value size %u != %zd.\n", - map_name, map->def.value_size, (ssize_t)sz); + map->name, map->def.value_size, (ssize_t)sz); return -EINVAL; } map->def.value_size = sz; @@ -2095,44 +2040,110 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, __u32 val; int err; - if (!get_map_field_int(map_name, obj->btf, m, &val)) + if (!get_map_field_int(map->name, obj->btf, m, &val)) return -EINVAL; pr_debug("map '%s': found pinning = %u.\n", - map_name, val); + map->name, val); if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) { pr_warn("map '%s': invalid pinning value %u.\n", - map_name, val); + map->name, val); return -EINVAL; } if (val == LIBBPF_PIN_BY_NAME) { err = build_map_pin_path(map, pin_root_path); if (err) { pr_warn("map '%s': couldn't build pin path.\n", - map_name); + map->name); return err; } } } else { if (strict) { pr_warn("map '%s': unknown field '%s'.\n", - map_name, name); + map->name, name); return -ENOTSUP; } pr_debug("map '%s': ignoring unknown field '%s'.\n", - map_name, name); + map->name, name); } } if (map->def.type == BPF_MAP_TYPE_UNSPEC) { - pr_warn("map '%s': map type isn't specified.\n", map_name); + pr_warn("map '%s': map type isn't specified.\n", map->name); return -EINVAL; } return 0; } +static int bpf_object__init_user_btf_map(struct bpf_object *obj, + const struct btf_type *sec, + int var_idx, int sec_idx, + const Elf_Data *data, bool strict, + const char *pin_root_path) +{ + const struct btf_type *var, *def; + const struct btf_var_secinfo *vi; + const struct btf_var *var_extra; + const char *map_name; + struct bpf_map *map; + + vi = btf_var_secinfos(sec) + var_idx; + var = btf__type_by_id(obj->btf, vi->type); + var_extra = btf_var(var); + map_name = btf__name_by_offset(obj->btf, var->name_off); + + if (map_name == NULL || map_name[0] == '\0') { + pr_warn("map #%d: empty name.\n", var_idx); + return -EINVAL; + } + if ((__u64)vi->offset + vi->size > data->d_size) { + pr_warn("map '%s' BTF data is corrupted.\n", map_name); + return -EINVAL; + } + if (!btf_is_var(var)) { + pr_warn("map '%s': unexpected var kind %u.\n", + map_name, btf_kind(var)); + return -EINVAL; + } + if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && + var_extra->linkage != BTF_VAR_STATIC) { + pr_warn("map '%s': unsupported var linkage %u.\n", + map_name, var_extra->linkage); + return -EOPNOTSUPP; + } + + def = skip_mods_and_typedefs(obj->btf, var->type, NULL); + if (!btf_is_struct(def)) { + pr_warn("map '%s': unexpected def kind %u.\n", + map_name, btf_kind(var)); + return -EINVAL; + } + if (def->size > vi->size) { + pr_warn("map '%s': invalid def size.\n", map_name); + return -EINVAL; + } + + map = bpf_object__add_map(obj); + if (IS_ERR(map)) + return PTR_ERR(map); + map->name = strdup(map_name); + if (!map->name) { + pr_warn("map '%s': failed to alloc map name.\n", map_name); + return -ENOMEM; + } + map->libbpf_type = LIBBPF_MAP_UNSPEC; + map->def.type = BPF_MAP_TYPE_UNSPEC; + map->sec_idx = sec_idx; + map->sec_offset = vi->offset; + pr_debug("map '%s': at sec_idx %d, offset %zu.\n", + map_name, map->sec_idx, map->sec_offset); + + return parse_btf_map_def(obj, map, def, strict, pin_root_path); +} + static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, const char *pin_root_path) { -- cgit v1.2.3 From 2d39d7c56f115148b05d1d8c6b8698a5730c8b53 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:27:38 -0700 Subject: libbpf: Refactor map creation logic and fix cleanup leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Factor out map creation and destruction logic to simplify code and especially error handling. Also fix map FD leak in case of partially successful map creation during bpf_object load operation. Fixes: 57a00f41644f ("libbpf: Add auto-pinning of maps when loading BPF objects") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200429002739.48006-3-andriin@fb.com --- tools/lib/bpf/libbpf.c | 226 ++++++++++++++++++++++++++----------------------- 1 file changed, 121 insertions(+), 105 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7d10436d7b58..9c845cf4cfcf 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3493,107 +3493,111 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) return 0; } +static void bpf_map__destroy(struct bpf_map *map); + +static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) +{ + struct bpf_create_map_attr create_attr; + struct bpf_map_def *def = &map->def; + + memset(&create_attr, 0, sizeof(create_attr)); + + if (obj->caps.name) + create_attr.name = map->name; + create_attr.map_ifindex = map->map_ifindex; + create_attr.map_type = def->type; + create_attr.map_flags = def->map_flags; + create_attr.key_size = def->key_size; + create_attr.value_size = def->value_size; + + if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { + int nr_cpus; + + nr_cpus = libbpf_num_possible_cpus(); + if (nr_cpus < 0) { + pr_warn("map '%s': failed to determine number of system CPUs: %d\n", + map->name, nr_cpus); + return nr_cpus; + } + pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); + create_attr.max_entries = nr_cpus; + } else { + create_attr.max_entries = def->max_entries; + } + + if (bpf_map__is_struct_ops(map)) + create_attr.btf_vmlinux_value_type_id = + map->btf_vmlinux_value_type_id; + + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + if (obj->btf && !bpf_map_find_btf_info(obj, map)) { + create_attr.btf_fd = btf__fd(obj->btf); + create_attr.btf_key_type_id = map->btf_key_type_id; + create_attr.btf_value_type_id = map->btf_value_type_id; + } + + map->fd = bpf_create_map_xattr(&create_attr); + if (map->fd < 0 && (create_attr.btf_key_type_id || + create_attr.btf_value_type_id)) { + char *cp, errmsg[STRERR_BUFSIZE]; + int err = -errno; + + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); + pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", + map->name, cp, err); + create_attr.btf_fd = 0; + create_attr.btf_key_type_id = 0; + create_attr.btf_value_type_id = 0; + map->btf_key_type_id = 0; + map->btf_value_type_id = 0; + map->fd = bpf_create_map_xattr(&create_attr); + } + + if (map->fd < 0) + return -errno; + + return 0; +} + static int bpf_object__create_maps(struct bpf_object *obj) { - struct bpf_create_map_attr create_attr = {}; - int nr_cpus = 0; - unsigned int i; + struct bpf_map *map; + char *cp, errmsg[STRERR_BUFSIZE]; + unsigned int i, j; int err; for (i = 0; i < obj->nr_maps; i++) { - struct bpf_map *map = &obj->maps[i]; - struct bpf_map_def *def = &map->def; - char *cp, errmsg[STRERR_BUFSIZE]; - int *pfd = &map->fd; + map = &obj->maps[i]; if (map->pin_path) { err = bpf_object__reuse_map(map); if (err) { - pr_warn("error reusing pinned map %s\n", + pr_warn("map '%s': error reusing pinned map\n", map->name); - return err; + goto err_out; } } if (map->fd >= 0) { - pr_debug("skip map create (preset) %s: fd=%d\n", + pr_debug("map '%s': skipping creation (preset fd=%d)\n", map->name, map->fd); continue; } - if (obj->caps.name) - create_attr.name = map->name; - create_attr.map_ifindex = map->map_ifindex; - create_attr.map_type = def->type; - create_attr.map_flags = def->map_flags; - create_attr.key_size = def->key_size; - create_attr.value_size = def->value_size; - if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && - !def->max_entries) { - if (!nr_cpus) - nr_cpus = libbpf_num_possible_cpus(); - if (nr_cpus < 0) { - pr_warn("failed to determine number of system CPUs: %d\n", - nr_cpus); - err = nr_cpus; - goto err_out; - } - pr_debug("map '%s': setting size to %d\n", - map->name, nr_cpus); - create_attr.max_entries = nr_cpus; - } else { - create_attr.max_entries = def->max_entries; - } - create_attr.btf_fd = 0; - create_attr.btf_key_type_id = 0; - create_attr.btf_value_type_id = 0; - if (bpf_map_type__is_map_in_map(def->type) && - map->inner_map_fd >= 0) - create_attr.inner_map_fd = map->inner_map_fd; - if (bpf_map__is_struct_ops(map)) - create_attr.btf_vmlinux_value_type_id = - map->btf_vmlinux_value_type_id; - - if (obj->btf && !bpf_map_find_btf_info(obj, map)) { - create_attr.btf_fd = btf__fd(obj->btf); - create_attr.btf_key_type_id = map->btf_key_type_id; - create_attr.btf_value_type_id = map->btf_value_type_id; - } - - *pfd = bpf_create_map_xattr(&create_attr); - if (*pfd < 0 && (create_attr.btf_key_type_id || - create_attr.btf_value_type_id)) { - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, cp, err); - create_attr.btf_fd = 0; - create_attr.btf_key_type_id = 0; - create_attr.btf_value_type_id = 0; - map->btf_key_type_id = 0; - map->btf_value_type_id = 0; - *pfd = bpf_create_map_xattr(&create_attr); - } - - if (*pfd < 0) { - size_t j; + err = bpf_object__create_map(obj, map); + if (err) + goto err_out; - err = -errno; -err_out: - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("failed to create map (name: '%s'): %s(%d)\n", - map->name, cp, err); - pr_perm_msg(err); - for (j = 0; j < i; j++) - zclose(obj->maps[j].fd); - return err; - } + pr_debug("map '%s': created successfully, fd=%d\n", map->name, + map->fd); if (bpf_map__is_internal(map)) { err = bpf_object__populate_internal_map(obj, map); if (err < 0) { - zclose(*pfd); + zclose(map->fd); goto err_out; } } @@ -3601,16 +3605,23 @@ err_out: if (map->pin_path && !map->pinned) { err = bpf_map__pin(map, NULL); if (err) { - pr_warn("failed to auto-pin map name '%s' at '%s'\n", - map->name, map->pin_path); - return err; + pr_warn("map '%s': failed to auto-pin at '%s': %d\n", + map->name, map->pin_path, err); + zclose(map->fd); + goto err_out; } } - - pr_debug("created map %s: fd=%d\n", map->name, *pfd); } return 0; + +err_out: + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); + pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); + pr_perm_msg(err); + for (j = 0; j < i; j++) + zclose(obj->maps[j].fd); + return err; } static int @@ -5966,6 +5977,32 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) return 0; } +static void bpf_map__destroy(struct bpf_map *map) +{ + if (map->clear_priv) + map->clear_priv(map, map->priv); + map->priv = NULL; + map->clear_priv = NULL; + + if (map->mmaped) { + munmap(map->mmaped, bpf_map_mmap_sz(map)); + map->mmaped = NULL; + } + + if (map->st_ops) { + zfree(&map->st_ops->data); + zfree(&map->st_ops->progs); + zfree(&map->st_ops->kern_func_off); + zfree(&map->st_ops); + } + + zfree(&map->name); + zfree(&map->pin_path); + + if (map->fd >= 0) + zclose(map->fd); +} + void bpf_object__close(struct bpf_object *obj) { size_t i; @@ -5981,29 +6018,8 @@ void bpf_object__close(struct bpf_object *obj) btf__free(obj->btf); btf_ext__free(obj->btf_ext); - for (i = 0; i < obj->nr_maps; i++) { - struct bpf_map *map = &obj->maps[i]; - - if (map->clear_priv) - map->clear_priv(map, map->priv); - map->priv = NULL; - map->clear_priv = NULL; - - if (map->mmaped) { - munmap(map->mmaped, bpf_map_mmap_sz(map)); - map->mmaped = NULL; - } - - if (map->st_ops) { - zfree(&map->st_ops->data); - zfree(&map->st_ops->progs); - zfree(&map->st_ops->kern_func_off); - zfree(&map->st_ops); - } - - zfree(&map->name); - zfree(&map->pin_path); - } + for (i = 0; i < obj->nr_maps; i++) + bpf_map__destroy(&obj->maps[i]); zfree(&obj->kconfig); zfree(&obj->externs); -- cgit v1.2.3 From 646f02ffdd49c466cb81642c2b013beb80092d01 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:27:39 -0700 Subject: libbpf: Add BTF-defined map-in-map support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As discussed at LPC 2019 ([0]), this patch brings (a quite belated) support for declarative BTF-defined map-in-map support in libbpf. It allows to define ARRAY_OF_MAPS and HASH_OF_MAPS BPF maps without any user-space initialization code involved. Additionally, it allows to initialize outer map's slots with references to respective inner maps at load time, also completely declaratively. Despite a weak type system of C, the way BTF-defined map-in-map definition works, it's actually quite hard to accidentally initialize outer map with incompatible inner maps. This being C, of course, it's still possible, but even that would be caught at load time and error returned with helpful debug log pointing exactly to the slot that failed to be initialized. As an example, here's a rather advanced HASH_OF_MAPS declaration and initialization example, filling slots #0 and #4 with two inner maps: #include struct inner_map { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 1); __type(key, int); __type(value, int); } inner_map1 SEC(".maps"), inner_map2 SEC(".maps"); struct outer_hash { __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); __uint(max_entries, 5); __uint(key_size, sizeof(int)); __array(values, struct inner_map); } outer_hash SEC(".maps") = { .values = { [0] = &inner_map2, [4] = &inner_map1, }, }; Here's the relevant part of libbpf debug log showing pretty clearly of what's going on with map-in-map initialization: libbpf: .maps relo #0: for 6 value 0 rel.r_offset 96 name 260 ('inner_map1') libbpf: .maps relo #0: map 'outer_arr' slot [0] points to map 'inner_map1' libbpf: .maps relo #1: for 7 value 32 rel.r_offset 112 name 249 ('inner_map2') libbpf: .maps relo #1: map 'outer_arr' slot [2] points to map 'inner_map2' libbpf: .maps relo #2: for 7 value 32 rel.r_offset 144 name 249 ('inner_map2') libbpf: .maps relo #2: map 'outer_hash' slot [0] points to map 'inner_map2' libbpf: .maps relo #3: for 6 value 0 rel.r_offset 176 name 260 ('inner_map1') libbpf: .maps relo #3: map 'outer_hash' slot [4] points to map 'inner_map1' libbpf: map 'inner_map1': created successfully, fd=4 libbpf: map 'inner_map2': created successfully, fd=5 libbpf: map 'outer_hash': created successfully, fd=7 libbpf: map 'outer_hash': slot [0] set to map 'inner_map2' fd=5 libbpf: map 'outer_hash': slot [4] set to map 'inner_map1' fd=4 Notice from the log above that fd=6 (not logged explicitly) is used for inner "prototype" map, necessary for creation of outer map. It is destroyed immediately after outer map is created. See also included selftest with some extra comments explaining extra details of usage. Additionally, similar initialization syntax and libbpf functionality can be used to do initialization of BPF_PROG_ARRAY with references to BPF sub-programs. This can be done in follow up patches, if there will be a demand for this. [0] https://linuxplumbersconf.org/event/4/contributions/448/ Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200429002739.48006-4-andriin@fb.com --- tools/lib/bpf/bpf_helpers.h | 1 + tools/lib/bpf/libbpf.c | 281 +++++++++++++++++++-- .../selftests/bpf/prog_tests/btf_map_in_map.c | 49 ++++ .../selftests/bpf/progs/test_btf_map_in_map.c | 76 ++++++ 4 files changed, 384 insertions(+), 23 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c create mode 100644 tools/testing/selftests/bpf/progs/test_btf_map_in_map.c (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 60aad054eea1..da00b87aa199 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -12,6 +12,7 @@ #define __uint(name, val) int (*name)[val] #define __type(name, val) typeof(val) *name +#define __array(name, val) typeof(val) *name[] /* Helper macro to print out debug messages */ #define bpf_printk(fmt, ...) \ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 9c845cf4cfcf..445ee903f9cd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -310,6 +310,7 @@ struct bpf_map { int map_ifindex; int inner_map_fd; struct bpf_map_def def; + __u32 btf_var_idx; __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 btf_vmlinux_value_type_id; @@ -318,6 +319,9 @@ struct bpf_map { enum libbpf_map_type libbpf_type; void *mmaped; struct bpf_struct_ops *st_ops; + struct bpf_map *inner_map; + void **init_slots; + int init_slots_sz; char *pin_path; bool pinned; bool reused; @@ -389,6 +393,7 @@ struct bpf_object { int nr_reloc_sects; int maps_shndx; int btf_maps_shndx; + __u32 btf_maps_sec_btf_id; int text_shndx; int symbols_shndx; int data_shndx; @@ -1918,7 +1923,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) static int parse_btf_map_def(struct bpf_object *obj, struct bpf_map *map, const struct btf_type *def, - bool strict, + bool strict, bool is_inner, const char *pin_root_path) { const struct btf_type *t; @@ -2036,10 +2041,79 @@ static int parse_btf_map_def(struct bpf_object *obj, } map->def.value_size = sz; map->btf_value_type_id = t->type; + } + else if (strcmp(name, "values") == 0) { + int err; + + if (is_inner) { + pr_warn("map '%s': multi-level inner maps not supported.\n", + map->name); + return -ENOTSUP; + } + if (i != vlen - 1) { + pr_warn("map '%s': '%s' member should be last.\n", + map->name, name); + return -EINVAL; + } + if (!bpf_map_type__is_map_in_map(map->def.type)) { + pr_warn("map '%s': should be map-in-map.\n", + map->name); + return -ENOTSUP; + } + if (map->def.value_size && map->def.value_size != 4) { + pr_warn("map '%s': conflicting value size %u != 4.\n", + map->name, map->def.value_size); + return -EINVAL; + } + map->def.value_size = 4; + t = btf__type_by_id(obj->btf, m->type); + if (!t) { + pr_warn("map '%s': map-in-map inner type [%d] not found.\n", + map->name, m->type); + return -EINVAL; + } + if (!btf_is_array(t) || btf_array(t)->nelems) { + pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n", + map->name); + return -EINVAL; + } + t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type, + NULL); + if (!btf_is_ptr(t)) { + pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n", + map->name, btf_kind(t)); + return -EINVAL; + } + t = skip_mods_and_typedefs(obj->btf, t->type, NULL); + if (!btf_is_struct(t)) { + pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n", + map->name, btf_kind(t)); + return -EINVAL; + } + + map->inner_map = calloc(1, sizeof(*map->inner_map)); + if (!map->inner_map) + return -ENOMEM; + map->inner_map->sec_idx = obj->efile.btf_maps_shndx; + map->inner_map->name = malloc(strlen(map->name) + + sizeof(".inner") + 1); + if (!map->inner_map->name) + return -ENOMEM; + sprintf(map->inner_map->name, "%s.inner", map->name); + + err = parse_btf_map_def(obj, map->inner_map, t, strict, + true /* is_inner */, NULL); + if (err) + return err; } else if (strcmp(name, "pinning") == 0) { __u32 val; int err; + if (is_inner) { + pr_debug("map '%s': inner def can't be pinned.\n", + map->name); + return -EINVAL; + } if (!get_map_field_int(map->name, obj->btf, m, &val)) return -EINVAL; pr_debug("map '%s': found pinning = %u.\n", @@ -2138,10 +2212,11 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, map->def.type = BPF_MAP_TYPE_UNSPEC; map->sec_idx = sec_idx; map->sec_offset = vi->offset; + map->btf_var_idx = var_idx; pr_debug("map '%s': at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset); - return parse_btf_map_def(obj, map, def, strict, pin_root_path); + return parse_btf_map_def(obj, map, def, strict, false, pin_root_path); } static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, @@ -2174,6 +2249,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, name = btf__name_by_offset(obj->btf, t->name_off); if (strcmp(name, MAPS_ELF_SEC) == 0) { sec = t; + obj->efile.btf_maps_sec_btf_id = i; break; } } @@ -2560,7 +2636,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) /* Only do relo for section with exec instructions */ if (!section_have_execinstr(obj, sec) && - strcmp(name, ".rel" STRUCT_OPS_SEC)) { + strcmp(name, ".rel" STRUCT_OPS_SEC) && + strcmp(name, ".rel" MAPS_ELF_SEC)) { pr_debug("skip relo %s(%d) for section(%d)\n", name, idx, sec); continue; @@ -3538,6 +3615,22 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) create_attr.btf_value_type_id = map->btf_value_type_id; } + if (bpf_map_type__is_map_in_map(def->type)) { + if (map->inner_map) { + int err; + + err = bpf_object__create_map(obj, map->inner_map); + if (err) { + pr_warn("map '%s': failed to create inner map: %d\n", + map->name, err); + return err; + } + map->inner_map_fd = bpf_map__fd(map->inner_map); + } + if (map->inner_map_fd >= 0) + create_attr.inner_map_fd = map->inner_map_fd; + } + map->fd = bpf_create_map_xattr(&create_attr); if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -3558,6 +3651,11 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) if (map->fd < 0) return -errno; + if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { + bpf_map__destroy(map->inner_map); + zfree(&map->inner_map); + } + return 0; } @@ -3602,6 +3700,31 @@ bpf_object__create_maps(struct bpf_object *obj) } } + if (map->init_slots_sz) { + for (j = 0; j < map->init_slots_sz; j++) { + const struct bpf_map *targ_map; + int fd; + + if (!map->init_slots[j]) + continue; + + targ_map = map->init_slots[j]; + fd = bpf_map__fd(targ_map); + err = bpf_map_update_elem(map->fd, &j, &fd, 0); + if (err) { + err = -errno; + pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", + map->name, j, targ_map->name, + fd, err); + goto err_out; + } + pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", + map->name, j, targ_map->name, fd); + } + zfree(&map->init_slots); + map->init_slots_sz = 0; + } + if (map->pin_path && !map->pinned) { err = bpf_map__pin(map, NULL); if (err) { @@ -4873,9 +4996,118 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return 0; } -static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj, - GElf_Shdr *shdr, - Elf_Data *data); +static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, + GElf_Shdr *shdr, Elf_Data *data); + +static int bpf_object__collect_map_relos(struct bpf_object *obj, + GElf_Shdr *shdr, Elf_Data *data) +{ + int i, j, nrels, new_sz, ptr_sz = sizeof(void *); + const struct btf_type *sec, *var, *def; + const struct btf_var_secinfo *vi; + const struct btf_member *member; + struct bpf_map *map, *targ_map; + const char *name, *mname; + Elf_Data *symbols; + unsigned int moff; + GElf_Sym sym; + GElf_Rel rel; + void *tmp; + + if (!obj->efile.btf_maps_sec_btf_id || !obj->btf) + return -EINVAL; + sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id); + if (!sec) + return -EINVAL; + + symbols = obj->efile.symbols; + nrels = shdr->sh_size / shdr->sh_entsize; + for (i = 0; i < nrels; i++) { + if (!gelf_getrel(data, i, &rel)) { + pr_warn(".maps relo #%d: failed to get ELF relo\n", i); + return -LIBBPF_ERRNO__FORMAT; + } + if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { + pr_warn(".maps relo #%d: symbol %zx not found\n", + i, (size_t)GELF_R_SYM(rel.r_info)); + return -LIBBPF_ERRNO__FORMAT; + } + name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name) ? : ""; + if (sym.st_shndx != obj->efile.btf_maps_shndx) { + pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", + i, name); + return -LIBBPF_ERRNO__RELOC; + } + + pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n", + i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value, + (size_t)rel.r_offset, sym.st_name, name); + + for (j = 0; j < obj->nr_maps; j++) { + map = &obj->maps[j]; + if (map->sec_idx != obj->efile.btf_maps_shndx) + continue; + + vi = btf_var_secinfos(sec) + map->btf_var_idx; + if (vi->offset <= rel.r_offset && + rel.r_offset + sizeof(void *) <= vi->offset + vi->size) + break; + } + if (j == obj->nr_maps) { + pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n", + i, name, (size_t)rel.r_offset); + return -EINVAL; + } + + if (!bpf_map_type__is_map_in_map(map->def.type)) + return -EINVAL; + if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && + map->def.key_size != sizeof(int)) { + pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", + i, map->name, sizeof(int)); + return -EINVAL; + } + + targ_map = bpf_object__find_map_by_name(obj, name); + if (!targ_map) + return -ESRCH; + + var = btf__type_by_id(obj->btf, vi->type); + def = skip_mods_and_typedefs(obj->btf, var->type, NULL); + if (btf_vlen(def) == 0) + return -EINVAL; + member = btf_members(def) + btf_vlen(def) - 1; + mname = btf__name_by_offset(obj->btf, member->name_off); + if (strcmp(mname, "values")) + return -EINVAL; + + moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8; + if (rel.r_offset - vi->offset < moff) + return -EINVAL; + + moff = rel.r_offset - vi->offset - moff; + if (moff % ptr_sz) + return -EINVAL; + moff /= ptr_sz; + if (moff >= map->init_slots_sz) { + new_sz = moff + 1; + tmp = realloc(map->init_slots, new_sz * ptr_sz); + if (!tmp) + return -ENOMEM; + map->init_slots = tmp; + memset(map->init_slots + map->init_slots_sz, 0, + (new_sz - map->init_slots_sz) * ptr_sz); + map->init_slots_sz = new_sz; + } + map->init_slots[moff] = targ_map; + + pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n", + i, map->name, moff, name); + } + + return 0; +} static int bpf_object__collect_reloc(struct bpf_object *obj) { @@ -4898,21 +5130,17 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) } if (idx == obj->efile.st_ops_shndx) { - err = bpf_object__collect_struct_ops_map_reloc(obj, - shdr, - data); - if (err) - return err; - continue; - } - - prog = bpf_object__find_prog_by_idx(obj, idx); - if (!prog) { - pr_warn("relocation failed: no section(%d)\n", idx); - return -LIBBPF_ERRNO__RELOC; + err = bpf_object__collect_st_ops_relos(obj, shdr, data); + } else if (idx == obj->efile.btf_maps_shndx) { + err = bpf_object__collect_map_relos(obj, shdr, data); + } else { + prog = bpf_object__find_prog_by_idx(obj, idx); + if (!prog) { + pr_warn("relocation failed: no prog in section(%d)\n", idx); + return -LIBBPF_ERRNO__RELOC; + } + err = bpf_program__collect_reloc(prog, shdr, data, obj); } - - err = bpf_program__collect_reloc(prog, shdr, data, obj); if (err) return err; } @@ -5984,6 +6212,14 @@ static void bpf_map__destroy(struct bpf_map *map) map->priv = NULL; map->clear_priv = NULL; + if (map->inner_map) { + bpf_map__destroy(map->inner_map); + zfree(&map->inner_map); + } + + zfree(&map->init_slots); + map->init_slots_sz = 0; + if (map->mmaped) { munmap(map->mmaped, bpf_map_mmap_sz(map)); map->mmaped = NULL; @@ -6543,9 +6779,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, } /* Collect the reloc from ELF and populate the st_ops->progs[] */ -static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj, - GElf_Shdr *shdr, - Elf_Data *data) +static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, + GElf_Shdr *shdr, Elf_Data *data) { const struct btf_member *member; struct bpf_struct_ops *st_ops; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c new file mode 100644 index 000000000000..f7ee8fa377ad --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include + +#include "test_btf_map_in_map.skel.h" + +void test_btf_map_in_map(void) +{ + int duration = 0, err, key = 0, val; + struct test_btf_map_in_map* skel; + + skel = test_btf_map_in_map__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n")) + return; + + err = test_btf_map_in_map__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* inner1 = input, inner2 = input + 1 */ + val = bpf_map__fd(skel->maps.inner_map1); + bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0); + val = bpf_map__fd(skel->maps.inner_map2); + bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0); + skel->bss->input = 1; + usleep(1); + + bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val); + CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val); + CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2); + + /* inner1 = input + 1, inner2 = input */ + val = bpf_map__fd(skel->maps.inner_map2); + bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0); + val = bpf_map__fd(skel->maps.inner_map1); + bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0); + skel->bss->input = 3; + usleep(1); + + bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val); + CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val); + CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3); + +cleanup: + test_btf_map_in_map__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c new file mode 100644 index 000000000000..e5093796be97 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +#include +#include + +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} inner_map1 SEC(".maps"), + inner_map2 SEC(".maps"); + +struct outer_arr { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 3); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + /* it's possible to use anonymous struct as inner map definition here */ + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + /* changing max_entries to 2 will fail during load + * due to incompatibility with inner_map definition */ + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + }); +} outer_arr SEC(".maps") = { + /* (void *) cast is necessary because we didn't use `struct inner_map` + * in __inner(values, ...) + * Actually, a conscious effort is required to screw up initialization + * of inner map slots, which is a great thing! + */ + .values = { (void *)&inner_map1, 0, (void *)&inner_map2 }, +}; + +struct outer_hash { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 5); + __uint(key_size, sizeof(int)); + /* Here everything works flawlessly due to reuse of struct inner_map + * and compiler will complain at the attempt to use non-inner_map + * references below. This is great experience. + */ + __array(values, struct inner_map); +} outer_hash SEC(".maps") = { + .values = { + [0] = &inner_map2, + [4] = &inner_map1, + }, +}; + +int input = 0; + +SEC("raw_tp/sys_enter") +int handle__sys_enter(void *ctx) +{ + struct inner_map *inner_map; + int key = 0, val; + + inner_map = bpf_map_lookup_elem(&outer_arr, &key); + if (!inner_map) + return 1; + val = input; + bpf_map_update_elem(inner_map, &key, &val, 0); + + inner_map = bpf_map_lookup_elem(&outer_hash, &key); + if (!inner_map) + return 1; + val = input + 1; + bpf_map_update_elem(inner_map, &key, &val, 0); + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 229bf8bf4d910510bc1a2fd0b89bd467cd71050d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 18:21:04 -0700 Subject: libbpf: Fix memory leak and possible double-free in hashmap__clear Fix memory leak in hashmap_clear() not freeing hashmap_entry structs for each of the remaining entries. Also NULL-out bucket list to prevent possible double-free between hashmap__clear() and hashmap__free(). Running test_progs-asan flavor clearly showed this problem. Reported-by: Alston Tang Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429012111.277390-5-andriin@fb.com --- tools/lib/bpf/hashmap.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index 54c30c802070..cffb96202e0d 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -59,7 +59,14 @@ struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, void hashmap__clear(struct hashmap *map) { + struct hashmap_entry *cur, *tmp; + int bkt; + + hashmap__for_each_entry_safe(map, cur, tmp, bkt) { + free(cur); + } free(map->buckets); + map->buckets = NULL; map->cap = map->cap_bits = map->sz = 0; } -- cgit v1.2.3 From 3521ffa2ee9a48c3236c93f54ae11c074490ebce Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 18:21:08 -0700 Subject: libbpf: Fix huge memory leak in libbpf_find_vmlinux_btf_id() BTF object wasn't freed. Fixes: a6ed02cac690 ("libbpf: Load btf_vmlinux only once per object.") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Cc: KP Singh Link: https://lore.kernel.org/bpf/20200429012111.277390-9-andriin@fb.com --- tools/lib/bpf/libbpf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 445ee903f9cd..d86ff8214b96 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6934,6 +6934,7 @@ int libbpf_find_vmlinux_btf_id(const char *name, enum bpf_attach_type attach_type) { struct btf *btf; + int err; btf = libbpf_find_kernel_btf(); if (IS_ERR(btf)) { @@ -6941,7 +6942,9 @@ int libbpf_find_vmlinux_btf_id(const char *name, return -EINVAL; } - return __find_vmlinux_btf_id(btf, name, attach_type); + err = __find_vmlinux_btf_id(btf, name, attach_type); + btf__free(btf); + return err; } static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) -- cgit v1.2.3 From 063e688133914505ddb396cc33231f22f12e0685 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 29 Apr 2020 19:14:36 -0700 Subject: libbpf: Fix false uninitialized variable warning Some versions of GCC falsely detect that vi might not be initialized. That's not true, but let's silence it with NULL initialization. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200430021436.1522502-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d86ff8214b96..977add1b73e2 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5003,8 +5003,8 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data) { int i, j, nrels, new_sz, ptr_sz = sizeof(void *); + const struct btf_var_secinfo *vi = NULL; const struct btf_type *sec, *var, *def; - const struct btf_var_secinfo *vi; const struct btf_member *member; struct bpf_map *map, *targ_map; const char *name, *mname; -- cgit v1.2.3 From 0bee106716cfb2c6da81916b968395db22bd7755 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 30 Apr 2020 00:15:05 -0700 Subject: libbpf: Add support for command BPF_ENABLE_STATS bpf_enable_stats() is added to enable given stats. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200430071506.1408910-3-songliubraving@fb.com --- tools/lib/bpf/bpf.c | 10 ++++++++++ tools/lib/bpf/bpf.h | 1 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 12 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 8f2f0958d446..43322f0d6c7f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -841,3 +841,13 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, return err; } + +int bpf_enable_stats(enum bpf_stats_type type) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.enable_stats.type = type; + + return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr)); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 335b457b3a25..1901b2777854 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -231,6 +231,7 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); +LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 7cd49aa38005..e03bd4db827e 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -257,6 +257,7 @@ LIBBPF_0.0.8 { LIBBPF_0.0.9 { global: + bpf_enable_stats; bpf_link_get_fd_by_id; bpf_link_get_next_id; } LIBBPF_0.0.8; -- cgit v1.2.3 From c09add2fbc5aece00a5b54a48ce39fd4e3284d87 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:17 -0700 Subject: tools/libbpf: Add bpf_iter support Two new libbpf APIs are added to support bpf_iter: - bpf_program__attach_iter Given a bpf program and additional parameters, which is none now, returns a bpf_link. - bpf_iter_create syscall level API to create a bpf iterator. The macro BPF_SEQ_PRINTF are also introduced. The format looks like: BPF_SEQ_PRINTF(seq, "task id %d\n", pid); This macro can help bpf program writers with nicer bpf_seq_printf syntax similar to the kernel one. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175917.2476936-1-yhs@fb.com --- tools/lib/bpf/bpf.c | 10 +++++++++ tools/lib/bpf/bpf.h | 2 ++ tools/lib/bpf/bpf_tracing.h | 16 ++++++++++++++ tools/lib/bpf/libbpf.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 9 ++++++++ tools/lib/bpf/libbpf.map | 2 ++ 6 files changed, 91 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 43322f0d6c7f..a7329b671c41 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -619,6 +619,16 @@ int bpf_link_update(int link_fd, int new_prog_fd, return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr)); } +int bpf_iter_create(int link_fd) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.iter_create.link_fd = link_fd; + + return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr)); +} + int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) { diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 1901b2777854..1b6015b21ba8 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -187,6 +187,8 @@ struct bpf_link_update_opts { LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd, const struct bpf_link_update_opts *opts); +LIBBPF_API int bpf_iter_create(int link_fd); + struct bpf_prog_test_run_attr { int prog_fd; int repeat; diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index f3f3c3fb98cb..cf97d07692b4 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -413,4 +413,20 @@ typeof(name(0)) name(struct pt_regs *ctx) \ } \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) +/* + * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values + * in a structure. + */ +#define BPF_SEQ_PRINTF(seq, fmt, args...) \ + ({ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[] = { args }; \ + _Pragma("GCC diagnostic pop") \ + int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \ + ___param, sizeof(___param)); \ + ___ret; \ + }) + #endif diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 977add1b73e2..6c2f46908f4d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6586,6 +6586,8 @@ static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, struct bpf_program *prog); static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, struct bpf_program *prog); +static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, + struct bpf_program *prog); static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), @@ -6629,6 +6631,10 @@ static const struct bpf_sec_def section_defs[] = { .is_attach_btf = true, .expected_attach_type = BPF_LSM_MAC, .attach_fn = attach_lsm), + SEC_DEF("iter/", TRACING, + .expected_attach_type = BPF_TRACE_ITER, + .is_attach_btf = true, + .attach_fn = attach_iter), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -6891,6 +6897,7 @@ invalid_prog: #define BTF_TRACE_PREFIX "btf_trace_" #define BTF_LSM_PREFIX "bpf_lsm_" +#define BTF_ITER_PREFIX "__bpf_iter__" #define BTF_MAX_NAME_SIZE 128 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, @@ -6921,6 +6928,9 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name, else if (attach_type == BPF_LSM_MAC) err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name, BTF_KIND_FUNC); + else if (attach_type == BPF_TRACE_ITER) + err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name, + BTF_KIND_FUNC); else err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); @@ -7848,6 +7858,12 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, return bpf_program__attach_lsm(prog); } +static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + return bpf_program__attach_iter(prog, NULL); +} + struct bpf_link * bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) { @@ -7882,6 +7898,42 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) return link; } +struct bpf_link * +bpf_program__attach_iter(struct bpf_program *prog, + const struct bpf_iter_attach_opts *opts) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link *link; + int prog_fd, link_fd; + + if (!OPTS_VALID(opts, bpf_iter_attach_opts)) + return ERR_PTR(-EINVAL); + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("program '%s': can't attach before loaded\n", + bpf_program__title(prog, false)); + return ERR_PTR(-EINVAL); + } + + link = calloc(1, sizeof(*link)); + if (!link) + return ERR_PTR(-ENOMEM); + link->detach = &bpf_link__detach_fd; + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, NULL); + if (link_fd < 0) { + link_fd = -errno; + free(link); + pr_warn("program '%s': failed to attach to iterator: %s\n", + bpf_program__title(prog, false), + libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + return ERR_PTR(link_fd); + } + link->fd = link_fd; + return link; +} + struct bpf_link *bpf_program__attach(struct bpf_program *prog) { const struct bpf_sec_def *sec_def; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index f1dacecb1619..8ea69558f0a8 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -258,6 +258,15 @@ struct bpf_map; LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); +struct bpf_iter_attach_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ +}; +#define bpf_iter_attach_opts__last_field sz + +LIBBPF_API struct bpf_link * +bpf_program__attach_iter(struct bpf_program *prog, + const struct bpf_iter_attach_opts *opts); + struct bpf_insn; /* diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index e03bd4db827e..0133d469d30b 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -258,6 +258,8 @@ LIBBPF_0.0.8 { LIBBPF_0.0.9 { global: bpf_enable_stats; + bpf_iter_create; bpf_link_get_fd_by_id; bpf_link_get_next_id; + bpf_program__attach_iter; } LIBBPF_0.0.8; -- cgit v1.2.3 From 5fbc220862fc7a53a0455ccd2d96c82141e222d4 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 9 May 2020 10:59:19 -0700 Subject: tools/libpf: Add offsetof/container_of macro in bpf_helpers.h These two helpers will be used later in bpf_iter bpf program bpf_iter_netlink.c. Put them in bpf_helpers.h since they could be useful in other cases. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200509175919.2477104-1-yhs@fb.com --- tools/lib/bpf/bpf_helpers.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index da00b87aa199..f67dce2af802 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -36,6 +36,20 @@ #define __weak __attribute__((weak)) #endif +/* + * Helper macro to manipulate data structures + */ +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#endif +#ifndef container_of +#define container_of(ptr, type, member) \ + ({ \ + void *__mptr = (void *)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); \ + }) +#endif + /* * Helper structure used by eBPF C program * to describe BPF map attributes to libbpf loader -- cgit v1.2.3 From 385bbf7b119a4feb6d6bcf3586f1bb1dd9c5b0a0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 13:50:57 -0500 Subject: bpf, libbpf: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200507185057.GA13981@embeddedor --- kernel/bpf/queue_stack_maps.c | 2 +- tools/lib/bpf/libbpf.c | 2 +- tools/lib/bpf/libbpf_internal.h | 2 +- tools/testing/selftests/bpf/progs/core_reloc_types.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index f697647ceb54..30e1373fd437 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -19,7 +19,7 @@ struct bpf_queue_stack { u32 head, tail; u32 size; /* max_entries + 1 */ - char elements[0] __aligned(8); + char elements[] __aligned(8); }; static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6c2f46908f4d..3da66540b54b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8352,7 +8352,7 @@ error: struct perf_sample_raw { struct perf_event_header header; uint32_t size; - char data[0]; + char data[]; }; struct perf_sample_lost { diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 8c3afbd97747..50d70e90d5f1 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -153,7 +153,7 @@ struct btf_ext_info_sec { __u32 sec_name_off; __u32 num_info; /* Followed by num_info * record_size number of bytes */ - __u8 data[0]; + __u8 data[]; }; /* The minimum bpf_func_info checked by the loader */ diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 6d598cfbdb3e..34d84717c946 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -379,7 +379,7 @@ struct core_reloc_arrays___equiv_zero_sz_arr { struct core_reloc_arrays_substruct c[3]; struct core_reloc_arrays_substruct d[1][2]; /* equivalent to flexible array */ - struct core_reloc_arrays_substruct f[0][2]; + struct core_reloc_arrays_substruct f[][2]; }; struct core_reloc_arrays___fixed_arr { -- cgit v1.2.3 From fd9eef1a132d1974405c3ebf9d5688ec5c51da94 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 12 May 2020 11:04:40 +0200 Subject: libbpf: Fix probe code to return EPERM if encountered When the probe code was failing for any reason ENOTSUP was returned, even if this was due to not having enough lock space. This patch fixes this by returning EPERM to the user application, so it can respond and increase the RLIMIT_MEMLOCK size. Signed-off-by: Eelco Chaudron Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/158927424896.2342.10402475603585742943.stgit@ebuild --- tools/lib/bpf/libbpf.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3da66540b54b..fd882616ab52 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3237,7 +3237,7 @@ int bpf_map__resize(struct bpf_map *map, __u32 max_entries) } static int -bpf_object__probe_name(struct bpf_object *obj) +bpf_object__probe_loading(struct bpf_object *obj) { struct bpf_load_program_attr attr; char *cp, errmsg[STRERR_BUFSIZE]; @@ -3257,15 +3257,36 @@ bpf_object__probe_name(struct bpf_object *obj) ret = bpf_load_program_xattr(&attr, NULL, 0); if (ret < 0) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", - __func__, cp, errno); - return -errno; + ret = errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " + "program. Make sure your kernel supports BPF " + "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " + "set to big enough value.\n", __func__, cp, ret); + return -ret; } close(ret); - /* now try the same program, but with the name */ + return 0; +} + +static int +bpf_object__probe_name(struct bpf_object *obj) +{ + struct bpf_load_program_attr attr; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret; + + /* make sure loading with name works */ + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; attr.name = "test"; ret = bpf_load_program_xattr(&attr, NULL, 0); if (ret >= 0) { @@ -5636,7 +5657,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) obj->loaded = true; - err = bpf_object__probe_caps(obj); + err = bpf_object__probe_loading(obj); + err = err ? : bpf_object__probe_caps(obj); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); err = err ? : bpf_object__sanitize_maps(obj); -- cgit v1.2.3 From 21aef70eade22a656297c28d5da93301915d2ac2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 13 May 2020 11:02:16 -0700 Subject: bpf: Change btf_iter func proto prefix to "bpf_iter_" This is to be consistent with tracing and lsm programs which have prefix "bpf_trace_" and "bpf_lsm_" respectively. Suggested-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200513180216.2949387-1-yhs@fb.com --- include/linux/bpf.h | 6 +++--- tools/lib/bpf/libbpf.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/lib') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cf4b6e44f2bc..ab94dfd8826f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1131,10 +1131,10 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); -#define BPF_ITER_FUNC_PREFIX "__bpf_iter__" +#define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ - extern int __bpf_iter__ ## target(args); \ - int __init __bpf_iter__ ## target(args) { return 0; } + extern int bpf_iter_ ## target(args); \ + int __init bpf_iter_ ## target(args) { return 0; } typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fd882616ab52..292257995487 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6919,7 +6919,7 @@ invalid_prog: #define BTF_TRACE_PREFIX "btf_trace_" #define BTF_LSM_PREFIX "bpf_lsm_" -#define BTF_ITER_PREFIX "__bpf_iter__" +#define BTF_ITER_PREFIX "bpf_iter_" #define BTF_MAX_NAME_SIZE 128 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, -- cgit v1.2.3 From f516acd5397fdbb77ef0aad0798d9ef7c3001d72 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 May 2020 09:50:02 -0700 Subject: libbpf, hashmap: Remove unused #include Remove #include of libbpf_internal.h that is unused. Discussed in this thread: https://lore.kernel.org/lkml/CAEf4BzZRmiEds_8R8g4vaAeWvJzPb4xYLnpF0X2VNY8oTzkphQ@mail.gmail.com/ Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200515165007.217120-3-irogers@google.com --- tools/lib/bpf/hashmap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index bae8879cdf58..e823b35e7371 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -15,7 +15,6 @@ #else #include #endif -#include "libbpf_internal.h" static inline size_t hash_bits(size_t h, int bits) { -- cgit v1.2.3 From 8d35d74f52ae07689e575ea21f7dc2e07dd1392f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 May 2020 09:50:03 -0700 Subject: libbpf, hashmap: Fix signedness warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following warnings: hashmap.c: In function ‘hashmap__clear’: hashmap.h:150:20: error: comparison of integer expressions of different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} [-Werror=sign-compare] 150 | for (bkt = 0; bkt < map->cap; bkt++) \ hashmap.c: In function ‘hashmap_grow’: hashmap.h:150:20: error: comparison of integer expressions of different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} [-Werror=sign-compare] 150 | for (bkt = 0; bkt < map->cap; bkt++) \ Signed-off-by: Ian Rogers Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200515165007.217120-4-irogers@google.com --- tools/lib/bpf/hashmap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index cffb96202e0d..a405dad068f5 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -60,7 +60,7 @@ struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, void hashmap__clear(struct hashmap *map) { struct hashmap_entry *cur, *tmp; - int bkt; + size_t bkt; hashmap__for_each_entry_safe(map, cur, tmp, bkt) { free(cur); @@ -100,8 +100,7 @@ static int hashmap_grow(struct hashmap *map) struct hashmap_entry **new_buckets; struct hashmap_entry *cur, *tmp; size_t new_cap_bits, new_cap; - size_t h; - int bkt; + size_t h, bkt; new_cap_bits = map->cap_bits + 1; if (new_cap_bits < HASHMAP_MIN_CAP_BITS) -- cgit v1.2.3 From f15ed0185de7d471e907783739dffbe397a93142 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 May 2020 00:45:46 +0200 Subject: bpf, libbpf: Enable get{peer, sock}name attach types Trivial patch to add the new get{peer,sock}name attach types to the section definitions in order to hook them up to sock_addr cgroup program type. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Andrey Ignatov Link: https://lore.kernel.org/bpf/7fcd4b1e41a8ebb364754a5975c75a7795051bd2.1589841594.git.daniel@iogearbox.net --- tools/lib/bpf/libbpf.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 292257995487..fa04cbe547ed 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6705,6 +6705,14 @@ static const struct bpf_sec_def section_defs[] = { BPF_CGROUP_UDP4_RECVMSG), BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG), + BPF_EAPROG_SEC("cgroup/getpeername4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET4_GETPEERNAME), + BPF_EAPROG_SEC("cgroup/getpeername6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET6_GETPEERNAME), + BPF_EAPROG_SEC("cgroup/getsockname4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET4_GETSOCKNAME), + BPF_EAPROG_SEC("cgroup/getsockname6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, + BPF_CGROUP_INET6_GETSOCKNAME), BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL), BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, -- cgit v1.2.3 From 272d51af32890632134845ddf35318c11da20c7b Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 26 May 2020 11:21:42 +0200 Subject: libbpf: Add API to consume the perf ring buffer content This new API, perf_buffer__consume, can be used as follows: - When you have a perf ring where wakeup_events is higher than 1, and you have remaining data in the rings you would like to pull out on exit (or maybe based on a timeout). - For low latency cases where you burn a CPU that constantly polls the queues. Signed-off-by: Eelco Chaudron Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/159048487929.89441.7465713173442594608.stgit@ebuild Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 19 +++++++++++++++++++ tools/lib/bpf/libbpf.h | 1 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 21 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index fa04cbe547ed..5d60de6fd818 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8456,6 +8456,25 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) return cnt < 0 ? -errno : cnt; } +int perf_buffer__consume(struct perf_buffer *pb) +{ + int i, err; + + for (i = 0; i < pb->cpu_cnt; i++) { + struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; + + if (!cpu_buf) + continue; + + err = perf_buffer__process_records(pb, cpu_buf); + if (err) { + pr_warn("error while processing records: %d\n", err); + return err; + } + } + return 0; +} + struct bpf_prog_info_array_desc { int array_offset; /* e.g. offset of jited_prog_insns */ int count_offset; /* e.g. offset of jited_prog_len */ diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 8ea69558f0a8..1e2e399a5f2c 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -533,6 +533,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt, LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); +LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb); typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 0133d469d30b..381a7342ecfc 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -262,4 +262,5 @@ LIBBPF_0.0.9 { bpf_link_get_fd_by_id; bpf_link_get_next_id; bpf_program__attach_iter; + perf_buffer__consume; } LIBBPF_0.0.8; -- cgit v1.2.3 From 93581359e7aeb11358018f2e3a737776d1e899ae Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 26 May 2020 20:46:12 +0300 Subject: libbpf: Install headers as part of make install Current 'make install' results in only pkg-config and library binaries being installed. For consistency also install headers as part of "make install" Signed-off-by: Nikolay Borisov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200526174612.5447-1-nborisov@suse.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index aee7f1a83c77..d02c4d910aad 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -264,7 +264,7 @@ install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ $(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644) -install: install_lib install_pkgconfig +install: install_lib install_pkgconfig install_headers ### Cleaning rules -- cgit v1.2.3 From 55983299b7ea94d714c19cdfd8d969ba86e0d7e9 Mon Sep 17 00:00:00 2001 From: Yauheni Kaliuta Date: Mon, 25 May 2020 09:18:46 +0300 Subject: libbpf: Use .so dynamic symbols for abi check Since dynamic symbols are used for dynamic linking it makes sense to use them (readelf --dyn-syms) for abi check. Found with some configuration on powerpc where linker puts local *.plt_call.* symbols into .so. Signed-off-by: Yauheni Kaliuta Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200525061846.16524-1-yauheni.kaliuta@redhat.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index d02c4d910aad..bf8ed134cb8a 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -151,7 +151,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) -VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ +VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) @@ -218,7 +218,7 @@ check_abi: $(OUTPUT)libbpf.so sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ - readelf -s --wide $(OUTPUT)libbpf.so | \ + readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \ sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \ diff -u $(OUTPUT)libbpf_global_syms.tmp \ -- cgit v1.2.3 From 601b05ca6edb0422bf6ce313fbfd55ec7bbbc0fd Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Wed, 27 May 2020 10:42:00 +0200 Subject: libbpf: Fix perf_buffer__free() API for sparse allocs In case the cpu_bufs are sparsely allocated they are not all free'ed. These changes will fix this. Fixes: fb84b8224655 ("libbpf: add perf buffer API") Signed-off-by: Eelco Chaudron Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/159056888305.330763.9684536967379110349.stgit@ebuild Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5d60de6fd818..74d967619dcf 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8137,9 +8137,12 @@ void perf_buffer__free(struct perf_buffer *pb) if (!pb) return; if (pb->cpu_bufs) { - for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) { + for (i = 0; i < pb->cpu_cnt; i++) { struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i]; + if (!cpu_buf) + continue; + bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key); perf_buffer__free_cpu_buf(pb, cpu_buf); } -- cgit v1.2.3 From bf99c936f9478a05d51e9f101f90de70bee9a89c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 29 May 2020 00:54:21 -0700 Subject: libbpf: Add BPF ring buffer support Declaring and instantiating BPF ring buffer doesn't require any changes to libbpf, as it's just another type of maps. So using existing BTF-defined maps syntax with __uint(type, BPF_MAP_TYPE_RINGBUF) and __uint(max_elements, ) is all that's necessary to create and use BPF ring buffer. This patch adds BPF ring buffer consumer to libbpf. It is very similar to perf_buffer implementation in terms of API, but also attempts to fix some minor problems and inconveniences with existing perf_buffer API. ring_buffer support both single ring buffer use case (with just using ring_buffer__new()), as well as allows to add more ring buffers, each with its own callback and context. This allows to efficiently poll and consume multiple, potentially completely independent, ring buffers, using single epoll instance. The latter is actually a problem in practice for applications that are using multiple sets of perf buffers. They have to create multiple instances for struct perf_buffer and poll them independently or in a loop, each approach having its own problems (e.g., inability to use a common poll timeout). struct ring_buffer eliminates this problem by aggregating many independent ring buffer instances under the single "ring buffer manager". Second, perf_buffer's callback can't return error, so applications that need to stop polling due to error in data or data signalling the end, have to use extra mechanisms to signal that polling has to stop. ring_buffer's callback can return error, which will be passed through back to user code and can be acted upon appropariately. Two APIs allow to consume ring buffer data: - ring_buffer__poll(), which will wait for data availability notification and will consume data only from reported ring buffer(s); this API allows to efficiently use resources by reading data only when it becomes available; - ring_buffer__consume(), will attempt to read new records regardless of data availablity notification sub-system. This API is useful for cases when lowest latency is required, in expense of burning CPU resources. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200529075424.3139988-3-andriin@fb.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Build | 2 +- tools/lib/bpf/libbpf.h | 21 ++++ tools/lib/bpf/libbpf.map | 5 + tools/lib/bpf/libbpf_probes.c | 5 + tools/lib/bpf/ringbuf.c | 285 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 317 insertions(+), 1 deletion(-) create mode 100644 tools/lib/bpf/ringbuf.c (limited to 'tools/lib') diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index e3962cfbc9a6..190366d05588 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,3 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ - btf_dump.o + btf_dump.o ringbuf.o diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 1e2e399a5f2c..8528a02d5af8 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -478,6 +478,27 @@ LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, size_t info_size, __u32 flags); +/* Ring buffer APIs */ +struct ring_buffer; + +typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); + +struct ring_buffer_opts { + size_t sz; /* size of this struct, for forward/backward compatiblity */ +}; + +#define ring_buffer_opts__last_field sz + +LIBBPF_API struct ring_buffer * +ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, + const struct ring_buffer_opts *opts); +LIBBPF_API void ring_buffer__free(struct ring_buffer *rb); +LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd, + ring_buffer_sample_fn sample_cb, void *ctx); +LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); +LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); + +/* Perf buffer APIs */ struct perf_buffer; typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 381a7342ecfc..c18860200abb 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -263,4 +263,9 @@ LIBBPF_0.0.9 { bpf_link_get_next_id; bpf_program__attach_iter; perf_buffer__consume; + ring_buffer__add; + ring_buffer__consume; + ring_buffer__free; + ring_buffer__new; + ring_buffer__poll; } LIBBPF_0.0.8; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 2c92059c0c90..10cd8d1891f5 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -238,6 +238,11 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) if (btf_fd < 0) return false; break; + case BPF_MAP_TYPE_RINGBUF: + key_size = 0; + value_size = 0; + max_entries = 4096; + break; case BPF_MAP_TYPE_UNSPEC: case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c new file mode 100644 index 000000000000..bc10fa1d43c7 --- /dev/null +++ b/tools/lib/bpf/ringbuf.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* + * Ring buffer operations. + * + * Copyright (C) 2020 Facebook, Inc. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libbpf.h" +#include "libbpf_internal.h" +#include "bpf.h" + +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + +struct ring { + ring_buffer_sample_fn sample_cb; + void *ctx; + void *data; + unsigned long *consumer_pos; + unsigned long *producer_pos; + unsigned long mask; + int map_fd; +}; + +struct ring_buffer { + struct epoll_event *events; + struct ring *rings; + size_t page_size; + int epoll_fd; + int ring_cnt; +}; + +static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) +{ + if (r->consumer_pos) { + munmap(r->consumer_pos, rb->page_size); + r->consumer_pos = NULL; + } + if (r->producer_pos) { + munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1)); + r->producer_pos = NULL; + } +} + +/* Add extra RINGBUF maps to this ring buffer manager */ +int ring_buffer__add(struct ring_buffer *rb, int map_fd, + ring_buffer_sample_fn sample_cb, void *ctx) +{ + struct bpf_map_info info; + __u32 len = sizeof(info); + struct epoll_event *e; + struct ring *r; + void *tmp; + int err; + + memset(&info, 0, sizeof(info)); + + err = bpf_obj_get_info_by_fd(map_fd, &info, &len); + if (err) { + err = -errno; + pr_warn("ringbuf: failed to get map info for fd=%d: %d\n", + map_fd, err); + return err; + } + + if (info.type != BPF_MAP_TYPE_RINGBUF) { + pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n", + map_fd); + return -EINVAL; + } + + tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings)); + if (!tmp) + return -ENOMEM; + rb->rings = tmp; + + tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events)); + if (!tmp) + return -ENOMEM; + rb->events = tmp; + + r = &rb->rings[rb->ring_cnt]; + memset(r, 0, sizeof(*r)); + + r->map_fd = map_fd; + r->sample_cb = sample_cb; + r->ctx = ctx; + r->mask = info.max_entries - 1; + + /* Map writable consumer page */ + tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, + map_fd, 0); + if (tmp == MAP_FAILED) { + err = -errno; + pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", + map_fd, err); + return err; + } + r->consumer_pos = tmp; + + /* Map read-only producer page and data pages. We map twice as big + * data size to allow simple reading of samples that wrap around the + * end of a ring buffer. See kernel implementation for details. + * */ + tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ, + MAP_SHARED, map_fd, rb->page_size); + if (tmp == MAP_FAILED) { + err = -errno; + ringbuf_unmap_ring(rb, r); + pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", + map_fd, err); + return err; + } + r->producer_pos = tmp; + r->data = tmp + rb->page_size; + + e = &rb->events[rb->ring_cnt]; + memset(e, 0, sizeof(*e)); + + e->events = EPOLLIN; + e->data.fd = rb->ring_cnt; + if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { + err = -errno; + ringbuf_unmap_ring(rb, r); + pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", + map_fd, err); + return err; + } + + rb->ring_cnt++; + return 0; +} + +void ring_buffer__free(struct ring_buffer *rb) +{ + int i; + + if (!rb) + return; + + for (i = 0; i < rb->ring_cnt; ++i) + ringbuf_unmap_ring(rb, &rb->rings[i]); + if (rb->epoll_fd >= 0) + close(rb->epoll_fd); + + free(rb->events); + free(rb->rings); + free(rb); +} + +struct ring_buffer * +ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, + const struct ring_buffer_opts *opts) +{ + struct ring_buffer *rb; + int err; + + if (!OPTS_VALID(opts, ring_buffer_opts)) + return NULL; + + rb = calloc(1, sizeof(*rb)); + if (!rb) + return NULL; + + rb->page_size = getpagesize(); + + rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (rb->epoll_fd < 0) { + err = -errno; + pr_warn("ringbuf: failed to create epoll instance: %d\n", err); + goto err_out; + } + + err = ring_buffer__add(rb, map_fd, sample_cb, ctx); + if (err) + goto err_out; + + return rb; + +err_out: + ring_buffer__free(rb); + return NULL; +} + +static inline int roundup_len(__u32 len) +{ + /* clear out top 2 bits (discard and busy, if set) */ + len <<= 2; + len >>= 2; + /* add length prefix */ + len += BPF_RINGBUF_HDR_SZ; + /* round up to 8 byte alignment */ + return (len + 7) / 8 * 8; +} + +static int ringbuf_process_ring(struct ring* r) +{ + int *len_ptr, len, err, cnt = 0; + unsigned long cons_pos, prod_pos; + bool got_new_data; + void *sample; + + cons_pos = smp_load_acquire(r->consumer_pos); + do { + got_new_data = false; + prod_pos = smp_load_acquire(r->producer_pos); + while (cons_pos < prod_pos) { + len_ptr = r->data + (cons_pos & r->mask); + len = smp_load_acquire(len_ptr); + + /* sample not committed yet, bail out for now */ + if (len & BPF_RINGBUF_BUSY_BIT) + goto done; + + got_new_data = true; + cons_pos += roundup_len(len); + + if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) { + sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ; + err = r->sample_cb(r->ctx, sample, len); + if (err) { + /* update consumer pos and bail out */ + smp_store_release(r->consumer_pos, + cons_pos); + return err; + } + cnt++; + } + + smp_store_release(r->consumer_pos, cons_pos); + } + } while (got_new_data); +done: + return cnt; +} + +/* Consume available ring buffer(s) data without event polling. + * Returns number of records consumed across all registered ring buffers, or + * negative number if any of the callbacks return error. + */ +int ring_buffer__consume(struct ring_buffer *rb) +{ + int i, err, res = 0; + + for (i = 0; i < rb->ring_cnt; i++) { + struct ring *ring = &rb->rings[i]; + + err = ringbuf_process_ring(ring); + if (err < 0) + return err; + res += err; + } + return res; +} + +/* Poll for available data and consume records, if any are available. + * Returns number of records consumed, or negative number, if any of the + * registered callbacks returned error. + */ +int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) +{ + int i, cnt, err, res = 0; + + cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms); + for (i = 0; i < cnt; i++) { + __u32 ring_id = rb->events[i].data.fd; + struct ring *ring = &rb->rings[ring_id]; + + err = ringbuf_process_ring(ring); + if (err < 0) + return err; + res += cnt; + } + return cnt < 0 ? -errno : res; +} -- cgit v1.2.3 From 2778797037a658be71a6c55b54700bf58ba21eb7 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 29 May 2020 16:07:15 -0600 Subject: libbpf: Add SEC name for xdp programs attached to device map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support SEC("xdp_devmap*") as a short cut for loading the program with type BPF_PROG_TYPE_XDP and expected attach type BPF_XDP_DEVMAP. Signed-off-by: David Ahern Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200529220716.75383-5-dsahern@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 74d967619dcf..85d4f1c5fc52 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6657,6 +6657,8 @@ static const struct bpf_sec_def section_defs[] = { .expected_attach_type = BPF_TRACE_ITER, .is_attach_btf = true, .attach_fn = attach_iter), + BPF_EAPROG_SEC("xdp_devmap", BPF_PROG_TYPE_XDP, + BPF_XDP_DEVMAP), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), -- cgit v1.2.3 From febeb6dff7beafcaf89521f6c8ff7b0adac08d54 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 1 Jun 2020 13:26:01 -0700 Subject: libbpf: Add _GNU_SOURCE for reallocarray to ringbuf.c On systems with recent enough glibc, reallocarray compat won't kick in, so reallocarray() itself has to come from stdlib.h include. But _GNU_SOURCE is necessary to enable it. So add it. Fixes: bf99c936f947 ("libbpf: Add BPF ring buffer support") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200601202601.2139477-1-andriin@fb.com --- tools/lib/bpf/ringbuf.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/lib') diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index bc10fa1d43c7..4fc6c6cbb4eb 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -4,6 +4,9 @@ * * Copyright (C) 2020 Facebook, Inc. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include #include #include -- cgit v1.2.3 From d60d81acc2c180e33244857e35ef60072573b000 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sun, 31 May 2020 10:28:40 +0200 Subject: libbpf: Add support for bpf_link-based netns attachment Add bpf_program__attach_nets(), which uses LINK_CREATE subcommand to create an FD-based kernel bpf_link, for attach types tied to network namespace, that is BPF_FLOW_DISSECTOR for the moment. Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200531082846.2117903-7-jakub@cloudflare.com --- tools/lib/bpf/libbpf.c | 23 ++++++++++++++++++----- tools/lib/bpf/libbpf.h | 2 ++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 21 insertions(+), 5 deletions(-) (limited to 'tools/lib') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 85d4f1c5fc52..7f01be2b88b8 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -7896,8 +7896,9 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, return bpf_program__attach_iter(prog, NULL); } -struct bpf_link * -bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) +static struct bpf_link * +bpf_program__attach_fd(struct bpf_program *prog, int target_fd, + const char *target_name) { enum bpf_attach_type attach_type; char errmsg[STRERR_BUFSIZE]; @@ -7917,12 +7918,12 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) link->detach = &bpf_link__detach_fd; attach_type = bpf_program__get_expected_attach_type(prog); - link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL); + link_fd = bpf_link_create(prog_fd, target_fd, attach_type, NULL); if (link_fd < 0) { link_fd = -errno; free(link); - pr_warn("program '%s': failed to attach to cgroup: %s\n", - bpf_program__title(prog, false), + pr_warn("program '%s': failed to attach to %s: %s\n", + bpf_program__title(prog, false), target_name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); return ERR_PTR(link_fd); } @@ -7930,6 +7931,18 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) return link; } +struct bpf_link * +bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd) +{ + return bpf_program__attach_fd(prog, cgroup_fd, "cgroup"); +} + +struct bpf_link * +bpf_program__attach_netns(struct bpf_program *prog, int netns_fd) +{ + return bpf_program__attach_fd(prog, netns_fd, "netns"); +} + struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 8528a02d5af8..334437af3014 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -253,6 +253,8 @@ LIBBPF_API struct bpf_link * bpf_program__attach_lsm(struct bpf_program *prog); LIBBPF_API struct bpf_link * bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd); +LIBBPF_API struct bpf_link * +bpf_program__attach_netns(struct bpf_program *prog, int netns_fd); struct bpf_map; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index c18860200abb..f732c77b7ed0 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -262,6 +262,7 @@ LIBBPF_0.0.9 { bpf_link_get_fd_by_id; bpf_link_get_next_id; bpf_program__attach_iter; + bpf_program__attach_netns; perf_buffer__consume; ring_buffer__add; ring_buffer__consume; -- cgit v1.2.3