diff options
Diffstat (limited to 'tools')
316 files changed, 14751 insertions, 3545 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index ce2ee8f1e361..9306726337fe 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -19,7 +19,6 @@ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS -#define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_MEMFD_SECRET #include <asm-generic/unistd.h> diff --git a/tools/arch/loongarch/include/uapi/asm/unistd.h b/tools/arch/loongarch/include/uapi/asm/unistd.h index 0c743344e92d..8eeaac0087c3 100644 --- a/tools/arch/loongarch/include/uapi/asm/unistd.h +++ b/tools/arch/loongarch/include/uapi/asm/unistd.h @@ -4,6 +4,5 @@ */ #define __ARCH_WANT_SYS_CLONE -#define __ARCH_WANT_SYS_CLONE3 #include <asm-generic/unistd.h> diff --git a/tools/arch/x86/kcpuid/Makefile b/tools/arch/x86/kcpuid/Makefile index 87b554fab14b..d0b4b0ed10ff 100644 --- a/tools/arch/x86/kcpuid/Makefile +++ b/tools/arch/x86/kcpuid/Makefile @@ -19,6 +19,6 @@ clean : @rm -f kcpuid install : kcpuid - install -d $(DESTDIR)$(BINDIR) + install -d $(DESTDIR)$(BINDIR) $(DESTDIR)$(HWDATADIR) install -m 755 -p kcpuid $(DESTDIR)$(BINDIR)/kcpuid - install -m 444 -p cpuid.csv $(HWDATADIR)/cpuid.csv + install -m 444 -p cpuid.csv $(DESTDIR)$(HWDATADIR)/cpuid.csv diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index eaba24320fb2..3f6bca03ad2e 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -28,7 +28,7 @@ BTF COMMANDS | **bpftool** **btf help** | | *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* } -| *FORMAT* := { **raw** | **c** } +| *FORMAT* := { **raw** | **c** [**unsorted**] } | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } @@ -63,7 +63,9 @@ bpftool btf dump *BTF_SRC* pahole. **format** option can be used to override default (raw) output format. Raw - (**raw**) or C-syntax (**c**) output formats are supported. + (**raw**) or C-syntax (**c**) output formats are supported. With C-style + formatting, the output is sorted by default. Use the **unsorted** option + to avoid sorting the output. bpftool btf help Print short help message. diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index dfa4f1bebbb3..ba927379eb20 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -204,10 +204,11 @@ ifeq ($(feature-clang-bpf-co-re),1) BUILD_BPF_SKELS := 1 -$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) ifeq ($(VMLINUX_H),) +$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@ else +$(OUTPUT)vmlinux.h: $(VMLINUX_H) $(Q)cp "$(VMLINUX_H)" $@ endif diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 04afe2ac2228..be99d49b8714 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -930,6 +930,9 @@ _bpftool() format) COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) ) ;; + c) + COMPREPLY=( $( compgen -W "unsorted" -- "$cur" ) ) + ;; *) # emit extra options case ${words[3]} in diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 91fcb75babe3..6789c7a4d5ca 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -20,6 +20,8 @@ #include "json_writer.h" #include "main.h" +#define KFUNC_DECL_TAG "bpf_kfunc" + static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_UNKN] = "UNKNOWN", [BTF_KIND_INT] = "INT", @@ -43,6 +45,13 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_ENUM64] = "ENUM64", }; +struct sort_datum { + int index; + int type_rank; + const char *sort_name; + const char *own_name; +}; + static const char *btf_int_enc_str(__u8 encoding) { switch (encoding) { @@ -454,15 +463,171 @@ static int dump_btf_raw(const struct btf *btf, return 0; } +static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) +{ + LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts); + int cnt = btf__type_cnt(btf); + int i; + + printf("\n/* BPF kfuncs */\n"); + printf("#ifndef BPF_NO_KFUNC_PROTOTYPES\n"); + + for (i = 1; i < cnt; i++) { + const struct btf_type *t = btf__type_by_id(btf, i); + const char *name; + int err; + + if (!btf_is_decl_tag(t)) + continue; + + if (btf_decl_tag(t)->component_idx != -1) + continue; + + name = btf__name_by_offset(btf, t->name_off); + if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG))) + continue; + + t = btf__type_by_id(btf, t->type); + if (!btf_is_func(t)) + continue; + + printf("extern "); + + opts.field_name = btf__name_by_offset(btf, t->name_off); + err = btf_dump__emit_type_decl(d, t->type, &opts); + if (err) + return err; + + printf(" __weak __ksym;\n"); + } + + printf("#endif\n\n"); + + return 0; +} + static void __printf(2, 0) btf_dump_printf(void *ctx, const char *fmt, va_list args) { vfprintf(stdout, fmt, args); } +static int btf_type_rank(const struct btf *btf, __u32 index, bool has_name) +{ + const struct btf_type *t = btf__type_by_id(btf, index); + const int kind = btf_kind(t); + const int max_rank = 10; + + if (t->name_off) + has_name = true; + + switch (kind) { + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + return has_name ? 1 : 0; + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + return 2; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return has_name ? 3 : max_rank; + case BTF_KIND_FUNC_PROTO: + return has_name ? 4 : max_rank; + case BTF_KIND_ARRAY: + if (has_name) + return btf_type_rank(btf, btf_array(t)->type, has_name); + return max_rank; + case BTF_KIND_TYPE_TAG: + case BTF_KIND_CONST: + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + case BTF_KIND_DECL_TAG: + if (has_name) + return btf_type_rank(btf, t->type, has_name); + return max_rank; + default: + return max_rank; + } +} + +static const char *btf_type_sort_name(const struct btf *btf, __u32 index, bool from_ref) +{ + const struct btf_type *t = btf__type_by_id(btf, index); + + switch (btf_kind(t)) { + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: { + int name_off = t->name_off; + + /* Use name of the first element for anonymous enums if allowed */ + if (!from_ref && !t->name_off && btf_vlen(t)) + name_off = btf_enum(t)->name_off; + + return btf__name_by_offset(btf, name_off); + } + case BTF_KIND_ARRAY: + return btf_type_sort_name(btf, btf_array(t)->type, true); + case BTF_KIND_TYPE_TAG: + case BTF_KIND_CONST: + case BTF_KIND_PTR: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_TYPEDEF: + case BTF_KIND_DECL_TAG: + return btf_type_sort_name(btf, t->type, true); + default: + return btf__name_by_offset(btf, t->name_off); + } + return NULL; +} + +static int btf_type_compare(const void *left, const void *right) +{ + const struct sort_datum *d1 = (const struct sort_datum *)left; + const struct sort_datum *d2 = (const struct sort_datum *)right; + int r; + + if (d1->type_rank != d2->type_rank) + return d1->type_rank < d2->type_rank ? -1 : 1; + + r = strcmp(d1->sort_name, d2->sort_name); + if (r) + return r; + + return strcmp(d1->own_name, d2->own_name); +} + +static struct sort_datum *sort_btf_c(const struct btf *btf) +{ + struct sort_datum *datums; + int n; + + n = btf__type_cnt(btf); + datums = malloc(sizeof(struct sort_datum) * n); + if (!datums) + return NULL; + + for (int i = 0; i < n; ++i) { + struct sort_datum *d = datums + i; + const struct btf_type *t = btf__type_by_id(btf, i); + + d->index = i; + d->type_rank = btf_type_rank(btf, i, false); + d->sort_name = btf_type_sort_name(btf, i, false); + d->own_name = btf__name_by_offset(btf, t->name_off); + } + + qsort(datums, n, sizeof(struct sort_datum), btf_type_compare); + + return datums; +} + static int dump_btf_c(const struct btf *btf, - __u32 *root_type_ids, int root_type_cnt) + __u32 *root_type_ids, int root_type_cnt, bool sort_dump) { + struct sort_datum *datums = NULL; struct btf_dump *d; int err = 0, i; @@ -476,6 +641,12 @@ static int dump_btf_c(const struct btf *btf, printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); printf("#endif\n\n"); + printf("#ifndef __ksym\n"); + printf("#define __ksym __attribute__((section(\".ksyms\")))\n"); + printf("#endif\n\n"); + printf("#ifndef __weak\n"); + printf("#define __weak __attribute__((weak))\n"); + printf("#endif\n\n"); if (root_type_cnt) { for (i = 0; i < root_type_cnt; i++) { @@ -486,11 +657,19 @@ static int dump_btf_c(const struct btf *btf, } else { int cnt = btf__type_cnt(btf); + if (sort_dump) + datums = sort_btf_c(btf); for (i = 1; i < cnt; i++) { - err = btf_dump__dump_type(d, i); + int idx = datums ? datums[i].index : i; + + err = btf_dump__dump_type(d, idx); if (err) goto done; } + + err = dump_btf_kfuncs(d, btf); + if (err) + goto done; } printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); @@ -500,6 +679,7 @@ static int dump_btf_c(const struct btf *btf, printf("#endif /* __VMLINUX_H__ */\n"); done: + free(datums); btf_dump__free(d); return err; } @@ -549,10 +729,10 @@ static bool btf_is_kernel_module(__u32 btf_id) static int do_dump(int argc, char **argv) { + bool dump_c = false, sort_dump_c = true; struct btf *btf = NULL, *base = NULL; __u32 root_type_ids[2]; int root_type_cnt = 0; - bool dump_c = false; __u32 btf_id = -1; const char *src; int fd = -1; @@ -663,6 +843,9 @@ static int do_dump(int argc, char **argv) goto done; } NEXT_ARG(); + } else if (is_prefix(*argv, "unsorted")) { + sort_dump_c = false; + NEXT_ARG(); } else { p_err("unrecognized option: '%s'", *argv); err = -EINVAL; @@ -691,7 +874,7 @@ static int do_dump(int argc, char **argv) err = -ENOTSUP; goto done; } - err = dump_btf_c(btf, root_type_ids, root_type_cnt); + err = dump_btf_c(btf, root_type_ids, root_type_cnt, sort_dump_c); } else { err = dump_btf_raw(btf, root_type_ids, root_type_cnt); } @@ -1063,7 +1246,7 @@ static int do_help(int argc, char **argv) " %1$s %2$s help\n" "\n" " BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n" - " FORMAT := { raw | c }\n" + " FORMAT := { raw | c [unsorted] }\n" " " HELP_SPEC_MAP "\n" " " HELP_SPEC_PROGRAM "\n" " " HELP_SPEC_OPTIONS " |\n" diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index af6898c0f388..9af426d43299 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -19,6 +19,38 @@ #include "main.h" +static const int cgroup_attach_types[] = { + BPF_CGROUP_INET_INGRESS, + BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, + BPF_CGROUP_INET_SOCK_RELEASE, + BPF_CGROUP_INET4_BIND, + BPF_CGROUP_INET6_BIND, + BPF_CGROUP_INET4_POST_BIND, + BPF_CGROUP_INET6_POST_BIND, + BPF_CGROUP_INET4_CONNECT, + BPF_CGROUP_INET6_CONNECT, + BPF_CGROUP_UNIX_CONNECT, + BPF_CGROUP_INET4_GETPEERNAME, + BPF_CGROUP_INET6_GETPEERNAME, + BPF_CGROUP_UNIX_GETPEERNAME, + BPF_CGROUP_INET4_GETSOCKNAME, + BPF_CGROUP_INET6_GETSOCKNAME, + BPF_CGROUP_UNIX_GETSOCKNAME, + BPF_CGROUP_UDP4_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UNIX_SENDMSG, + BPF_CGROUP_UDP4_RECVMSG, + BPF_CGROUP_UDP6_RECVMSG, + BPF_CGROUP_UNIX_RECVMSG, + BPF_CGROUP_SOCK_OPS, + BPF_CGROUP_DEVICE, + BPF_CGROUP_SYSCTL, + BPF_CGROUP_GETSOCKOPT, + BPF_CGROUP_SETSOCKOPT, + BPF_LSM_CGROUP +}; + #define HELP_SPEC_ATTACH_FLAGS \ "ATTACH_FLAGS := { multi | override }" @@ -183,13 +215,13 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) static int cgroup_has_attached_progs(int cgroup_fd) { - enum bpf_attach_type type; + unsigned int i = 0; bool no_prog = true; - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { - int count = count_attached_bpf_progs(cgroup_fd, type); + for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) { + int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]); - if (count < 0 && errno != EINVAL) + if (count < 0) return -1; if (count > 0) { diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 958e92acca8e..9b75639434b8 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -410,7 +410,7 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, { const char *prog_name = prog_info->name; const struct btf_type *func_type; - const struct bpf_func_info finfo = {}; + struct bpf_func_info finfo = {}; struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); struct btf *prog_btf = NULL; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index b3979ddc0189..5a4d3240689e 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -848,28 +848,45 @@ out: } static void -codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) +codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool populate_links) { struct bpf_map *map; char ident[256]; - size_t i; + size_t i, map_sz; if (!map_cnt) return; + /* for backward compatibility with old libbpf versions that don't + * handle new BPF skeleton with new struct bpf_map_skeleton definition + * that includes link field, avoid specifying new increased size, + * unless we absolutely have to (i.e., if there are struct_ops maps + * present) + */ + map_sz = offsetof(struct bpf_map_skeleton, link); + if (populate_links) { + bpf_object__for_each_map(map, obj) { + if (bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) { + map_sz = sizeof(struct bpf_map_skeleton); + break; + } + } + } + codegen("\ \n\ - \n\ + \n\ /* maps */ \n\ s->map_cnt = %zu; \n\ - s->map_skel_sz = sizeof(*s->maps); \n\ - s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\ + s->map_skel_sz = %zu; \n\ + s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt,\n\ + sizeof(*s->maps) > %zu ? sizeof(*s->maps) : %zu);\n\ if (!s->maps) { \n\ err = -ENOMEM; \n\ goto err; \n\ } \n\ ", - map_cnt + map_cnt, map_sz, map_sz, map_sz ); i = 0; bpf_object__for_each_map(map, obj) { @@ -878,15 +895,22 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped) codegen("\ \n\ - \n\ - s->maps[%zu].name = \"%s\"; \n\ - s->maps[%zu].map = &obj->maps.%s; \n\ + \n\ + map = (struct bpf_map_skeleton *)((char *)s->maps + %zu * s->map_skel_sz);\n\ + map->name = \"%s\"; \n\ + map->map = &obj->maps.%s; \n\ ", - i, bpf_map__name(map), i, ident); + i, bpf_map__name(map), ident); /* memory-mapped internal maps */ if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) { - printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", - i, ident); + printf("\tmap->mmaped = (void **)&obj->%s;\n", ident); + } + + if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) { + codegen("\ + \n\ + map->link = &obj->links.%s; \n\ + ", ident); } i++; } @@ -1141,7 +1165,7 @@ static void gen_st_ops_shadow_init(struct btf *btf, struct bpf_object *obj) static int do_skeleton(int argc, char **argv) { char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")]; - size_t map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz; + size_t map_cnt = 0, prog_cnt = 0, attach_map_cnt = 0, file_sz, mmap_sz; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data; struct bpf_object *obj = NULL; @@ -1225,6 +1249,10 @@ static int do_skeleton(int argc, char **argv) bpf_map__name(map)); continue; } + + if (bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) + attach_map_cnt++; + map_cnt++; } bpf_object__for_each_program(prog, obj) { @@ -1260,6 +1288,8 @@ static int do_skeleton(int argc, char **argv) #include <stdlib.h> \n\ #include <bpf/libbpf.h> \n\ \n\ + #define BPF_SKEL_SUPPORTS_MAP_AUTO_ATTACH 1 \n\ + \n\ struct %1$s { \n\ struct bpf_object_skeleton *skeleton; \n\ struct bpf_object *obj; \n\ @@ -1297,6 +1327,9 @@ static int do_skeleton(int argc, char **argv) bpf_program__name(prog)); } printf("\t} progs;\n"); + } + + if (prog_cnt + attach_map_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { if (use_loader) @@ -1306,6 +1339,19 @@ static int do_skeleton(int argc, char **argv) printf("\t\tstruct bpf_link *%s;\n", bpf_program__name(prog)); } + + bpf_object__for_each_map(map, obj) { + if (!get_map_ident(map, ident, sizeof(ident))) + continue; + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) + continue; + + if (use_loader) + printf("t\tint %s_fd;\n", ident); + else + printf("\t\tstruct bpf_link *%s;\n", ident); + } + printf("\t} links;\n"); } @@ -1433,6 +1479,7 @@ static int do_skeleton(int argc, char **argv) %1$s__create_skeleton(struct %1$s *obj) \n\ { \n\ struct bpf_object_skeleton *s; \n\ + struct bpf_map_skeleton *map __attribute__((unused));\n\ int err; \n\ \n\ s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ @@ -1448,7 +1495,7 @@ static int do_skeleton(int argc, char **argv) obj_name ); - codegen_maps_skeleton(obj, map_cnt, true /*mmaped*/); + codegen_maps_skeleton(obj, map_cnt, true /*mmaped*/, true /*links*/); codegen_progs_skeleton(obj, prog_cnt, true /*populate_links*/); codegen("\ @@ -1723,6 +1770,7 @@ static int do_subskeleton(int argc, char **argv) { \n\ struct %1$s *obj; \n\ struct bpf_object_subskeleton *s; \n\ + struct bpf_map_skeleton *map __attribute__((unused));\n\ int err; \n\ \n\ obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\ @@ -1786,7 +1834,7 @@ static int do_subskeleton(int argc, char **argv) } } - codegen_maps_skeleton(obj, map_cnt, false /*mmaped*/); + codegen_maps_skeleton(obj, map_cnt, false /*mmaped*/, false /*links*/); codegen_progs_skeleton(obj, prog_cnt, false /*links*/); codegen("\ @@ -2379,15 +2427,6 @@ out: return err; } -static int btfgen_remap_id(__u32 *type_id, void *ctx) -{ - unsigned int *ids = ctx; - - *type_id = ids[*type_id]; - - return 0; -} - /* Generate BTF from relocation information previously recorded */ static struct btf *btfgen_get_btf(struct btfgen_info *info) { @@ -2467,10 +2506,15 @@ static struct btf *btfgen_get_btf(struct btfgen_info *info) /* second pass: fix up type ids */ for (i = 1; i < btf__type_cnt(btf_new); i++) { struct btf_type *btf_type = (struct btf_type *) btf__type_by_id(btf_new, i); + struct btf_field_iter it; + __u32 *type_id; - err = btf_type_visit_type_ids(btf_type, btfgen_remap_id, ids); + err = btf_field_iter_init(&it, btf_type, BTF_FIELD_ITER_IDS); if (err) goto err_out; + + while ((type_id = btf_field_iter_next(&it))) + *type_id = ids[*type_id]; } free(ids); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 1a501cf09e78..40ea743d139f 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1813,6 +1813,10 @@ offload_dev: } if (pinmaps) { + err = create_and_mount_bpffs_dir(pinmaps); + if (err) + goto err_unpin; + err = bpf_object__pin_maps(obj, pinmaps); if (err) { p_err("failed to pin all maps"); diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c index 7bdbcac3cf62..948dde25034e 100644 --- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -29,6 +29,7 @@ enum bpf_link_type___local { }; extern const void bpf_link_fops __ksym; +extern const void bpf_link_fops_poll __ksym __weak; extern const void bpf_map_fops __ksym; extern const void bpf_prog_fops __ksym; extern const void btf_fops __ksym; @@ -84,7 +85,11 @@ int iter(struct bpf_iter__task_file *ctx) fops = &btf_fops; break; case BPF_OBJ_LINK: - fops = &bpf_link_fops; + if (&bpf_link_fops_poll && + file->f_op == &bpf_link_fops_poll) + fops = &bpf_link_fops_poll; + else + fops = &bpf_link_fops; break; default: return 0; diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index 2f80edc682f1..f48c783cb9f7 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -40,17 +40,17 @@ struct { const volatile __u32 num_cpu = 1; const volatile __u32 num_metric = 1; -#define MAX_NUM_MATRICS 4 +#define MAX_NUM_METRICS 4 SEC("fentry/XXX") int BPF_PROG(fentry_XXX) { - struct bpf_perf_event_value___local *ptrs[MAX_NUM_MATRICS]; + struct bpf_perf_event_value___local *ptrs[MAX_NUM_METRICS]; u32 key = bpf_get_smp_processor_id(); u32 i; /* look up before reading, to reduce error */ - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { u32 flag = i; ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag); @@ -58,7 +58,7 @@ int BPF_PROG(fentry_XXX) return 0; } - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { struct bpf_perf_event_value___local reading; int err; @@ -99,14 +99,14 @@ fexit_update_maps(u32 id, struct bpf_perf_event_value___local *after) SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { - struct bpf_perf_event_value___local readings[MAX_NUM_MATRICS]; + struct bpf_perf_event_value___local readings[MAX_NUM_METRICS]; u32 cpu = bpf_get_smp_processor_id(); u32 i, zero = 0; int err; u64 *count; /* read all events before updating the maps, to reduce error */ - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) { err = bpf_perf_event_read_value(&events, cpu + i * num_cpu, (void *)(readings + i), sizeof(*readings)); @@ -116,7 +116,7 @@ int BPF_PROG(fexit_XXX) count = bpf_map_lookup_elem(&counts, &zero); if (count) { *count += 1; - for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) + for (i = 0; i < num_metric && i < MAX_NUM_METRICS; i++) fexit_update_maps(i, &readings[i]); } return 0; diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index af393c7dee1f..936ef95c3d32 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -409,6 +409,14 @@ static int elf_collect(struct object *obj) obj->efile.idlist = data; obj->efile.idlist_shndx = idx; obj->efile.idlist_addr = sh.sh_addr; + } else if (!strcmp(name, BTF_BASE_ELF_SEC)) { + /* If a .BTF.base section is found, do not resolve + * BTF ids relative to vmlinux; resolve relative + * to the .BTF.base section instead. btf__parse_split() + * will take care of this once the base BTF it is + * passed is NULL. + */ + obj->base_btf_path = NULL; } if (compressed_section_fix(elf, scn, &sh)) diff --git a/tools/gpio/gpio-sloppy-logic-analyzer.sh b/tools/gpio/gpio-sloppy-logic-analyzer.sh new file mode 100755 index 000000000000..ed21a110df5e --- /dev/null +++ b/tools/gpio/gpio-sloppy-logic-analyzer.sh @@ -0,0 +1,246 @@ +#!/bin/sh -eu +# SPDX-License-Identifier: GPL-2.0 +# +# Helper script for the Linux Kernel GPIO sloppy logic analyzer +# +# Copyright (C) Wolfram Sang <wsa@sang-engineering.com> +# Copyright (C) Renesas Electronics Corporation + +samplefreq=1000000 +numsamples=250000 +cpusetdefaultdir='/sys/fs/cgroup' +cpusetprefix='cpuset.' +debugdir='/sys/kernel/debug' +ladirname='gpio-sloppy-logic-analyzer' +outputdir="$PWD" +neededcmds='taskset zip' +max_chans=8 +duration= +initcpu= +listinstances=0 +lainstance= +lasysfsdir= +triggerdat= +trigger_bindat= +progname="${0##*/}" +print_help() +{ + cat << EOF +$progname - helper script for the Linux Kernel Sloppy GPIO Logic Analyzer +Available options: + -c|--cpu <n>: which CPU to isolate for sampling. Only needed once. Default <1>. + Remember that a more powerful CPU gives you higher sampling speeds. + Also CPU0 is not recommended as it usually does extra bookkeeping. + -d|--duration-us <SI-n>: number of microseconds to sample. Overrides -n, no default value. + -h|--help: print this help + -i|--instance <str>: name of the logic analyzer in case you have multiple instances. Default + to first instance found + -k|--kernel-debug-dir <str>: path to the kernel debugfs mountpoint. Default: <$debugdir> + -l|--list-instances: list all available instances + -n|--num_samples <SI-n>: number of samples to acquire. Default <$numsamples> + -o|--output-dir <str>: directory to put the result files. Default: current dir + -s|--sample_freq <SI-n>: desired sampling frequency. Might be capped if too large. + Default: <1000000> + -t|--trigger <str>: pattern to use as trigger. <str> consists of two-char pairs. First + char is channel number starting at "1". Second char is trigger level: + "L" - low; "H" - high; "R" - rising; "F" - falling + These pairs can be combined with "+", so "1H+2F" triggers when probe 1 + is high while probe 2 has a falling edge. You can have multiple triggers + combined with ",". So, "1H+2F,1H+2R" is like the example before but it + waits for a rising edge on probe 2 while probe 1 is still high after the + first trigger has been met. + Trigger data will only be used for the next capture and then be erased. + +<SI-n> is an integer value where SI units "T", "G", "M", "K" are recognized, e.g. '1M500K' is 1500000. + +Examples: +Samples $numsamples values at 1MHz with an already prepared CPU or automatically prepares CPU1 if needed, +use the first logic analyzer instance found: + '$progname' +Samples 50us at 2MHz waiting for a falling edge on channel 2. CPU and instance as above: + '$progname -d 50 -s 2M -t "2F"' + +Note that the process exits after checking all parameters but a sub-process still works in +the background. The result is only available once the sub-process finishes. + +Result is a .sr file to be consumed with PulseView from the free Sigrok project. It is +a zip file which also contains the binary sample data which may be consumed by others. +The filename is the logic analyzer instance name plus a since-epoch timestamp. +EOF +} + +fail() +{ + echo "$1" + exit 1 +} + +parse_si() +{ + conv_si="$(printf $1 | sed 's/[tT]+\?/*1000G+/g; s/[gG]+\?/*1000M+/g; s/[mM]+\?/*1000K+/g; s/[kK]+\?/*1000+/g; s/+$//')" + si_val="$((conv_si))" +} +set_newmask() +{ + for f in $(find "$1" -iname "$2"); do echo "$newmask" > "$f" 2>/dev/null || true; done +} + +init_cpu() +{ + isol_cpu="$1" + + [ -d "$lacpusetdir" ] || mkdir "$lacpusetdir" + + cur_cpu=$(cat "${lacpusetfile}cpus") + [ "$cur_cpu" = "$isol_cpu" ] && return + [ -z "$cur_cpu" ] || fail "CPU$isol_cpu requested but CPU$cur_cpu already isolated" + + echo "$isol_cpu" > "${lacpusetfile}cpus" || fail "Could not isolate CPU$isol_cpu. Does it exist?" + echo 1 > "${lacpusetfile}cpu_exclusive" + echo 0 > "${lacpusetfile}mems" + + oldmask=$(cat /proc/irq/default_smp_affinity) + newmask=$(printf "%x" $((0x$oldmask & ~(1 << isol_cpu)))) + + set_newmask '/proc/irq' '*smp_affinity' + set_newmask '/sys/devices/virtual/workqueue/' 'cpumask' + + # Move tasks away from isolated CPU + for p in $(ps -o pid | tail -n +2); do + mask=$(taskset -p "$p") || continue + # Ignore tasks with a custom mask, i.e. not equal $oldmask + [ "${mask##*: }" = "$oldmask" ] || continue + taskset -p "$newmask" "$p" || continue + done 2>/dev/null >/dev/null + + # Big hammer! Working with 'rcu_momentary_dyntick_idle()' for a more fine-grained solution + # still printed warnings. Same for re-enabling the stall detector after sampling. + echo 1 > /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress + + cpufreqgov="/sys/devices/system/cpu/cpu$isol_cpu/cpufreq/scaling_governor" + [ -w "$cpufreqgov" ] && echo 'performance' > "$cpufreqgov" || true +} + +parse_triggerdat() +{ + oldifs="$IFS" + IFS=','; for trig in $1; do + mask=0; val1=0; val2=0 + IFS='+'; for elem in $trig; do + chan=${elem%[lhfrLHFR]} + mode=${elem#$chan} + # Check if we could parse something and the channel number fits + [ "$chan" != "$elem" ] && [ "$chan" -le $max_chans ] || fail "Trigger syntax error: $elem" + bit=$((1 << (chan - 1))) + mask=$((mask | bit)) + case $mode in + [hH]) val1=$((val1 | bit)); val2=$((val2 | bit));; + [fF]) val1=$((val1 | bit));; + [rR]) val2=$((val2 | bit));; + esac + done + trigger_bindat="$trigger_bindat$(printf '\\%o\\%o' $mask $val1)" + [ $val1 -ne $val2 ] && trigger_bindat="$trigger_bindat$(printf '\\%o\\%o' $mask $val2)" + done + IFS="$oldifs" +} + +do_capture() +{ + taskset "$1" echo 1 > "$lasysfsdir"/capture || fail "Capture error! Check kernel log" + + srtmp=$(mktemp -d) + echo 1 > "$srtmp"/version + cp "$lasysfsdir"/sample_data "$srtmp"/logic-1-1 + cat > "$srtmp"/metadata << EOF +[global] +sigrok version=0.2.0 + +[device 1] +capturefile=logic-1 +total probes=$(wc -l < "$lasysfsdir"/meta_data) +samplerate=${samplefreq}Hz +unitsize=1 +EOF + cat "$lasysfsdir"/meta_data >> "$srtmp"/metadata + + zipname="$outputdir/${lasysfsdir##*/}-$(date +%s).sr" + zip -jq "$zipname" "$srtmp"/* + rm -rf "$srtmp" + delay_ack=$(cat "$lasysfsdir"/delay_ns_acquisition) + [ "$delay_ack" -eq 0 ] && delay_ack=1 + echo "Logic analyzer done. Saved '$zipname'" + echo "Max sample frequency this time: $((1000000000 / delay_ack))Hz." +} + +rep=$(getopt -a -l cpu:,duration-us:,help,instance:,list-instances,kernel-debug-dir:,num_samples:,output-dir:,sample_freq:,trigger: -o c:d:hi:k:ln:o:s:t: -- "$@") || exit 1 +eval set -- "$rep" +while true; do + case "$1" in + -c|--cpu) initcpu="$2"; shift;; + -d|--duration-us) parse_si $2; duration=$si_val; shift;; + -h|--help) print_help; exit 0;; + -i|--instance) lainstance="$2"; shift;; + -k|--kernel-debug-dir) debugdir="$2"; shift;; + -l|--list-instances) listinstances=1;; + -n|--num_samples) parse_si $2; numsamples=$si_val; shift;; + -o|--output-dir) outputdir="$2"; shift;; + -s|--sample_freq) parse_si $2; samplefreq=$si_val; shift;; + -t|--trigger) triggerdat="$2"; shift;; + --) break;; + *) fail "error parsing command line: $*";; + esac + shift +done + +for f in $neededcmds; do + command -v "$f" >/dev/null || fail "Command '$f' not found" +done + +# print cpuset mountpoint if any, errorcode > 0 if noprefix option was found +cpusetdir=$(awk '$3 == "cgroup" && $4 ~ /cpuset/ { print $2; exit (match($4, /noprefix/) > 0) }' /proc/self/mounts) || cpusetprefix='' +if [ -z "$cpusetdir" ]; then + cpusetdir="$cpusetdefaultdir" + [ -d $cpusetdir ] || mkdir $cpusetdir + mount -t cgroup -o cpuset none $cpusetdir || fail "Couldn't mount cpusets. Not in kernel or already in use?" +fi + +lacpusetdir="$cpusetdir/$ladirname" +lacpusetfile="$lacpusetdir/$cpusetprefix" +sysfsdir="$debugdir/$ladirname" + +[ "$samplefreq" -ne 0 ] || fail "Invalid sample frequency" + +[ -d "$sysfsdir" ] || fail "Could not find logic analyzer root dir '$sysfsdir'. Module loaded?" +[ -x "$sysfsdir" ] || fail "Could not access logic analyzer root dir '$sysfsdir'. Need root?" + +[ $listinstances -gt 0 ] && find "$sysfsdir" -mindepth 1 -type d | sed 's|.*/||' && exit 0 + +if [ -n "$lainstance" ]; then + lasysfsdir="$sysfsdir/$lainstance" +else + lasysfsdir=$(find "$sysfsdir" -mindepth 1 -type d -print -quit) +fi +[ -d "$lasysfsdir" ] || fail "Logic analyzer directory '$lasysfsdir' not found!" +[ -d "$outputdir" ] || fail "Output directory '$outputdir' not found!" + +[ -n "$initcpu" ] && init_cpu "$initcpu" +[ -d "$lacpusetdir" ] || { echo "Auto-Isolating CPU1"; init_cpu 1; } + +ndelay=$((1000000000 / samplefreq)) +echo "$ndelay" > "$lasysfsdir"/delay_ns + +[ -n "$duration" ] && numsamples=$((samplefreq * duration / 1000000)) +echo $numsamples > "$lasysfsdir"/buf_size + +if [ -n "$triggerdat" ]; then + parse_triggerdat "$triggerdat" + printf "$trigger_bindat" > "$lasysfsdir"/trigger 2>/dev/null || fail "Trigger data '$triggerdat' rejected" +fi + +workcpu=$(cat "${lacpusetfile}effective_cpus") +[ -n "$workcpu" ] || fail "No isolated CPU found" +cpumask=$(printf '%x' $((1 << workcpu))) +instance=${lasysfsdir##*/} +echo "Setting up '$instance': $numsamples samples at ${samplefreq}Hz with ${triggerdat:-no} trigger using CPU$workcpu" +do_capture "$cpumask" & diff --git a/tools/hv/Makefile b/tools/hv/Makefile index bb52871da341..2e60e2c212cd 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -17,6 +17,7 @@ endif MAKEFLAGS += -r override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include +override CFLAGS += -Wno-address-of-packed-member ALL_TARGETS := hv_kvp_daemon hv_vss_daemon ifneq ($(ARCH), aarch64) diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h index 6665e272e213..cd79ddd6170e 100644 --- a/tools/include/nolibc/stdint.h +++ b/tools/include/nolibc/stdint.h @@ -96,6 +96,10 @@ typedef uint64_t uintmax_t; #define UINT_FAST32_MAX SIZE_MAX #define UINT_FAST64_MAX UINT64_MAX +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + #ifndef INT_MIN #define INT_MIN (-__INT_MAX__ - 1) #endif @@ -110,4 +114,19 @@ typedef uint64_t uintmax_t; #define LONG_MAX __LONG_MAX__ #endif +#ifndef ULONG_MAX +#define ULONG_MAX ((unsigned long)(__LONG_MAX__) * 2 + 1) +#endif + +#ifndef LLONG_MIN +#define LLONG_MIN (-__LONG_LONG_MAX__ - 1) +#endif +#ifndef LLONG_MAX +#define LLONG_MAX __LONG_LONG_MAX__ +#endif + +#ifndef ULLONG_MAX +#define ULLONG_MAX ((unsigned long long)(__LONG_LONG_MAX__) * 2 + 1) +#endif + #endif /* _NOLIBC_STDINT_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 16cd4d807251..c968dbbc4ef8 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -376,6 +376,16 @@ int setvbuf(FILE *stream __attribute__((unused)), return 0; } +static __attribute__((unused)) +const char *strerror(int errno) +{ + static char buf[18] = "errno="; + + i64toa_r(errno, &buf[6]); + + return buf; +} + /* make sure to include all global symbols */ #include "nolibc.h" diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 5be9d3c7435a..75aa273c23a6 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -438,6 +438,115 @@ char *u64toa(uint64_t in) return itoa_buffer; } +static __attribute__((unused)) +uintmax_t __strtox(const char *nptr, char **endptr, int base, intmax_t lower_limit, uintmax_t upper_limit) +{ + const char signed_ = lower_limit != 0; + unsigned char neg = 0, overflow = 0; + uintmax_t val = 0, limit, old_val; + char c; + + if (base < 0 || base > 36) { + SET_ERRNO(EINVAL); + goto out; + } + + while (isspace(*nptr)) + nptr++; + + if (*nptr == '+') { + nptr++; + } else if (*nptr == '-') { + neg = 1; + nptr++; + } + + if (signed_ && neg) + limit = -(uintmax_t)lower_limit; + else + limit = upper_limit; + + if ((base == 0 || base == 16) && + (strncmp(nptr, "0x", 2) == 0 || strncmp(nptr, "0X", 2) == 0)) { + base = 16; + nptr += 2; + } else if (base == 0 && strncmp(nptr, "0", 1) == 0) { + base = 8; + nptr += 1; + } else if (base == 0) { + base = 10; + } + + while (*nptr) { + c = *nptr; + + if (c >= '0' && c <= '9') + c -= '0'; + else if (c >= 'a' && c <= 'z') + c = c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') + c = c - 'A' + 10; + else + goto out; + + if (c >= base) + goto out; + + nptr++; + old_val = val; + val *= base; + val += c; + + if (val > limit || val < old_val) + overflow = 1; + } + +out: + if (overflow) { + SET_ERRNO(ERANGE); + val = limit; + } + if (endptr) + *endptr = (char *)nptr; + return neg ? -val : val; +} + +static __attribute__((unused)) +long strtol(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, LONG_MIN, LONG_MAX); +} + +static __attribute__((unused)) +unsigned long strtoul(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, 0, ULONG_MAX); +} + +static __attribute__((unused)) +long long strtoll(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, LLONG_MIN, LLONG_MAX); +} + +static __attribute__((unused)) +unsigned long long strtoull(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, 0, ULLONG_MAX); +} + +static __attribute__((unused)) +intmax_t strtoimax(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, INTMAX_MIN, INTMAX_MAX); +} + +static __attribute__((unused)) +uintmax_t strtoumax(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, 0, UINTMAX_MAX); +} + /* make sure to include all global symbols */ #include "nolibc.h" diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index d983c48a3b6a..a00d53d02723 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -776,12 +776,8 @@ __SYSCALL(__NR_fsmount, sys_fsmount) __SYSCALL(__NR_fspick, sys_fspick) #define __NR_pidfd_open 434 __SYSCALL(__NR_pidfd_open, sys_pidfd_open) - -#ifdef __ARCH_WANT_SYS_CLONE3 #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) -#endif - #define __NR_close_range 436 __SYSCALL(__NR_close_range, sys_close_range) #define __NR_openat2 437 diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 90706a47f6ff..35bcf52dbc65 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1425,6 +1425,8 @@ enum { #define BPF_F_TEST_RUN_ON_CPU (1U << 0) /* If set, XDP frames will be transmitted after processing */ #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */ +#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { @@ -6207,12 +6209,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index b6619199a706..e2cd558ca0b4 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o features.o + usdt.o zip.o elf.o features.o btf_iter.o btf_relocate.o diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 2d0840ef599a..32c00db3b91b 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -116,6 +116,9 @@ struct btf { /* whether strings are already deduplicated */ bool strs_deduped; + /* whether base_btf should be freed in btf_free for this instance */ + bool owns_base; + /* BTF object FD, if loaded into kernel */ int fd; @@ -598,7 +601,7 @@ static int btf_sanity_check(const struct btf *btf) __u32 i, n = btf__type_cnt(btf); int err; - for (i = 1; i < n; i++) { + for (i = btf->start_id; i < n; i++) { t = btf_type_by_id(btf, i); err = btf_validate_type(btf, t, i); if (err) @@ -969,6 +972,8 @@ void btf__free(struct btf *btf) free(btf->raw_data); free(btf->raw_data_swapped); free(btf->type_offs); + if (btf->owns_base) + btf__free(btf->base_btf); free(btf); } @@ -1084,53 +1089,38 @@ struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf) return libbpf_ptr(btf_new(data, size, base_btf)); } -static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, - struct btf_ext **btf_ext) +struct btf_elf_secs { + Elf_Data *btf_data; + Elf_Data *btf_ext_data; + Elf_Data *btf_base_data; +}; + +static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs *secs) { - Elf_Data *btf_data = NULL, *btf_ext_data = NULL; - int err = 0, fd = -1, idx = 0; - struct btf *btf = NULL; Elf_Scn *scn = NULL; - Elf *elf = NULL; + Elf_Data *data; GElf_Ehdr ehdr; size_t shstrndx; + int idx = 0; - if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warn("failed to init libelf for %s\n", path); - return ERR_PTR(-LIBBPF_ERRNO__LIBELF); - } - - fd = open(path, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - err = -errno; - pr_warn("failed to open %s: %s\n", path, strerror(errno)); - return ERR_PTR(err); - } - - err = -LIBBPF_ERRNO__FORMAT; - - elf = elf_begin(fd, ELF_C_READ, NULL); - if (!elf) { - pr_warn("failed to open %s as ELF file\n", path); - goto done; - } if (!gelf_getehdr(elf, &ehdr)) { pr_warn("failed to get EHDR from %s\n", path); - goto done; + goto err; } if (elf_getshdrstrndx(elf, &shstrndx)) { pr_warn("failed to get section names section index for %s\n", path); - goto done; + goto err; } if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL)) { pr_warn("failed to get e_shstrndx from %s\n", path); - goto done; + goto err; } while ((scn = elf_nextscn(elf, scn)) != NULL) { + Elf_Data **field; GElf_Shdr sh; char *name; @@ -1138,42 +1128,102 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, if (gelf_getshdr(scn, &sh) != &sh) { pr_warn("failed to get section(%d) header from %s\n", idx, path); - goto done; + goto err; } name = elf_strptr(elf, shstrndx, sh.sh_name); if (!name) { pr_warn("failed to get section(%d) name from %s\n", idx, path); - goto done; + goto err; } - if (strcmp(name, BTF_ELF_SEC) == 0) { - btf_data = elf_getdata(scn, 0); - if (!btf_data) { - pr_warn("failed to get section(%d, %s) data from %s\n", - idx, name, path); - goto done; - } - continue; - } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) { - btf_ext_data = elf_getdata(scn, 0); - if (!btf_ext_data) { - pr_warn("failed to get section(%d, %s) data from %s\n", - idx, name, path); - goto done; - } + + if (strcmp(name, BTF_ELF_SEC) == 0) + field = &secs->btf_data; + else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) + field = &secs->btf_ext_data; + else if (strcmp(name, BTF_BASE_ELF_SEC) == 0) + field = &secs->btf_base_data; + else continue; + + data = elf_getdata(scn, 0); + if (!data) { + pr_warn("failed to get section(%d, %s) data from %s\n", + idx, name, path); + goto err; } + *field = data; } - if (!btf_data) { + return 0; + +err: + return -LIBBPF_ERRNO__FORMAT; +} + +static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, + struct btf_ext **btf_ext) +{ + struct btf_elf_secs secs = {}; + struct btf *dist_base_btf = NULL; + struct btf *btf = NULL; + int err = 0, fd = -1; + Elf *elf = NULL; + + if (elf_version(EV_CURRENT) == EV_NONE) { + pr_warn("failed to init libelf for %s\n", path); + return ERR_PTR(-LIBBPF_ERRNO__LIBELF); + } + + fd = open(path, O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = -errno; + pr_warn("failed to open %s: %s\n", path, strerror(errno)); + return ERR_PTR(err); + } + + elf = elf_begin(fd, ELF_C_READ, NULL); + if (!elf) { + pr_warn("failed to open %s as ELF file\n", path); + goto done; + } + + err = btf_find_elf_sections(elf, path, &secs); + if (err) + goto done; + + if (!secs.btf_data) { pr_warn("failed to find '%s' ELF section in %s\n", BTF_ELF_SEC, path); err = -ENODATA; goto done; } - btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf); - err = libbpf_get_error(btf); - if (err) + + if (secs.btf_base_data) { + dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size, + NULL); + if (IS_ERR(dist_base_btf)) { + err = PTR_ERR(dist_base_btf); + dist_base_btf = NULL; + goto done; + } + } + + btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size, + dist_base_btf ?: base_btf); + if (IS_ERR(btf)) { + err = PTR_ERR(btf); goto done; + } + if (dist_base_btf && base_btf) { + err = btf__relocate(btf, base_btf); + if (err) + goto done; + btf__free(dist_base_btf); + dist_base_btf = NULL; + } + + if (dist_base_btf) + btf->owns_base = true; switch (gelf_getclass(elf)) { case ELFCLASS32: @@ -1187,11 +1237,12 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, break; } - if (btf_ext && btf_ext_data) { - *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); - err = libbpf_get_error(*btf_ext); - if (err) + if (btf_ext && secs.btf_ext_data) { + *btf_ext = btf_ext__new(secs.btf_ext_data->d_buf, secs.btf_ext_data->d_size); + if (IS_ERR(*btf_ext)) { + err = PTR_ERR(*btf_ext); goto done; + } } else if (btf_ext) { *btf_ext = NULL; } @@ -1205,6 +1256,7 @@ done: if (btf_ext) btf_ext__free(*btf_ext); + btf__free(dist_base_btf); btf__free(btf); return ERR_PTR(err); @@ -1739,9 +1791,8 @@ struct btf_pipe { struct hashmap *str_off_map; /* map string offsets from src to dst */ }; -static int btf_rewrite_str(__u32 *str_off, void *ctx) +static int btf_rewrite_str(struct btf_pipe *p, __u32 *str_off) { - struct btf_pipe *p = ctx; long mapped_off; int off, err; @@ -1771,10 +1822,11 @@ static int btf_rewrite_str(__u32 *str_off, void *ctx) return 0; } -int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type) +static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type) { - struct btf_pipe p = { .src = src_btf, .dst = btf }; + struct btf_field_iter it; struct btf_type *t; + __u32 *str_off; int sz, err; sz = btf_type_size(src_type); @@ -1782,35 +1834,33 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t return libbpf_err(sz); /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) + if (btf_ensure_modifiable(p->dst)) return libbpf_err(-ENOMEM); - t = btf_add_type_mem(btf, sz); + t = btf_add_type_mem(p->dst, sz); if (!t) return libbpf_err(-ENOMEM); memcpy(t, src_type, sz); - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) return libbpf_err(err); - return btf_commit_type(btf, sz); + while ((str_off = btf_field_iter_next(&it))) { + err = btf_rewrite_str(p, str_off); + if (err) + return libbpf_err(err); + } + + return btf_commit_type(p->dst, sz); } -static int btf_rewrite_type_ids(__u32 *type_id, void *ctx) +int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type) { - struct btf *btf = ctx; - - if (!*type_id) /* nothing to do for VOID references */ - return 0; + struct btf_pipe p = { .src = src_btf, .dst = btf }; - /* we haven't updated btf's type count yet, so - * btf->start_id + btf->nr_types - 1 is the type ID offset we should - * add to all newly added BTF types - */ - *type_id += btf->start_id + btf->nr_types - 1; - return 0; + return btf_add_type(&p, src_type); } static size_t btf_dedup_identity_hash_fn(long key, void *ctx); @@ -1858,6 +1908,9 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) memcpy(t, src_btf->types_data, data_sz); for (i = 0; i < cnt; i++) { + struct btf_field_iter it; + __u32 *type_id, *str_off; + sz = btf_type_size(t); if (sz < 0) { /* unlikely, has to be corrupted src_btf */ @@ -1869,15 +1922,31 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) *off = t - btf->types_data; /* add, dedup, and remap strings referenced by this BTF type */ - err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) goto err_out; + while ((str_off = btf_field_iter_next(&it))) { + err = btf_rewrite_str(&p, str_off); + if (err) + goto err_out; + } /* remap all type IDs referenced from this BTF type */ - err = btf_type_visit_type_ids(t, btf_rewrite_type_ids, btf); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (err) goto err_out; + while ((type_id = btf_field_iter_next(&it))) { + if (!*type_id) /* nothing to do for VOID references */ + continue; + + /* we haven't updated btf's type count yet, so + * btf->start_id + btf->nr_types - 1 is the type ID offset we should + * add to all newly added BTF types + */ + *type_id += btf->start_id + btf->nr_types - 1; + } + /* go to next type data and type offset index entry */ t += sz; off++; @@ -3453,11 +3522,19 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void * int i, r; for (i = 0; i < d->btf->nr_types; i++) { + struct btf_field_iter it; struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); + __u32 *str_off; - r = btf_type_visit_str_offs(t, fn, ctx); + r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (r) return r; + + while ((str_off = btf_field_iter_next(&it))) { + r = fn(str_off, ctx); + if (r) + return r; + } } if (!d->btf_ext) @@ -4919,10 +4996,23 @@ static int btf_dedup_remap_types(struct btf_dedup *d) for (i = 0; i < d->btf->nr_types; i++) { struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i); + struct btf_field_iter it; + __u32 *type_id; - r = btf_type_visit_type_ids(t, btf_dedup_remap_type_id, d); + r = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); if (r) return r; + + while ((type_id = btf_field_iter_next(&it))) { + __u32 resolved_id, new_id; + + resolved_id = resolve_type_id(d, *type_id); + new_id = d->hypot_map[resolved_id]; + if (new_id > BTF_MAX_NR_TYPES) + return -EINVAL; + + *type_id = new_id; + } } if (!d->btf_ext) @@ -5003,136 +5093,6 @@ struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_bt return btf__parse_split(path, vmlinux_btf); } -int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx) -{ - int i, n, err; - - switch (btf_kind(t)) { - case BTF_KIND_INT: - case BTF_KIND_FLOAT: - case BTF_KIND_ENUM: - case BTF_KIND_ENUM64: - return 0; - - case BTF_KIND_FWD: - case BTF_KIND_CONST: - case BTF_KIND_VOLATILE: - case BTF_KIND_RESTRICT: - case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: - case BTF_KIND_FUNC: - case BTF_KIND_VAR: - case BTF_KIND_DECL_TAG: - case BTF_KIND_TYPE_TAG: - return visit(&t->type, ctx); - - case BTF_KIND_ARRAY: { - struct btf_array *a = btf_array(t); - - err = visit(&a->type, ctx); - err = err ?: visit(&a->index_type, ctx); - return err; - } - - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - - err = visit(&t->type, ctx); - if (err) - return err; - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - case BTF_KIND_DATASEC: { - struct btf_var_secinfo *m = btf_var_secinfos(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->type, ctx); - if (err) - return err; - } - return 0; - } - - default: - return -EINVAL; - } -} - -int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx) -{ - int i, n, err; - - err = visit(&t->name_off, ctx); - if (err) - return err; - - switch (btf_kind(t)) { - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - struct btf_member *m = btf_members(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_ENUM: { - struct btf_enum *m = btf_enum(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_ENUM64: { - struct btf_enum64 *m = btf_enum64(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - case BTF_KIND_FUNC_PROTO: { - struct btf_param *m = btf_params(t); - - for (i = 0, n = btf_vlen(t); i < n; i++, m++) { - err = visit(&m->name_off, ctx); - if (err) - return err; - } - break; - } - default: - break; - } - - return 0; -} - int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx) { const struct btf_ext_info *seg; @@ -5212,3 +5172,325 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void return 0; } + +struct btf_distill { + struct btf_pipe pipe; + int *id_map; + unsigned int split_start_id; + unsigned int split_start_str; + int diff_id; +}; + +static int btf_add_distilled_type_ids(struct btf_distill *dist, __u32 i) +{ + struct btf_type *split_t = btf_type_by_id(dist->pipe.src, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, split_t, BTF_FIELD_ITER_IDS); + if (err) + return err; + while ((id = btf_field_iter_next(&it))) { + struct btf_type *base_t; + + if (!*id) + continue; + /* split BTF id, not needed */ + if (*id >= dist->split_start_id) + continue; + /* already added ? */ + if (dist->id_map[*id] > 0) + continue; + + /* only a subset of base BTF types should be referenced from + * split BTF; ensure nothing unexpected is referenced. + */ + base_t = btf_type_by_id(dist->pipe.src, *id); + switch (btf_kind(base_t)) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + case BTF_KIND_ARRAY: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_TYPEDEF: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TYPE_TAG: + dist->id_map[*id] = *id; + break; + default: + pr_warn("unexpected reference to base type[%u] of kind [%u] when creating distilled base BTF.\n", + *id, btf_kind(base_t)); + return -EINVAL; + } + /* If a base type is used, ensure types it refers to are + * marked as used also; so for example if we find a PTR to INT + * we need both the PTR and INT. + * + * The only exception is named struct/unions, since distilled + * base BTF composite types have no members. + */ + if (btf_is_composite(base_t) && base_t->name_off) + continue; + err = btf_add_distilled_type_ids(dist, *id); + if (err) + return err; + } + return 0; +} + +static int btf_add_distilled_types(struct btf_distill *dist) +{ + bool adding_to_base = dist->pipe.dst->start_id == 1; + int id = btf__type_cnt(dist->pipe.dst); + struct btf_type *t; + int i, err = 0; + + + /* Add types for each of the required references to either distilled + * base or split BTF, depending on type characteristics. + */ + for (i = 1; i < dist->split_start_id; i++) { + const char *name; + int kind; + + if (!dist->id_map[i]) + continue; + t = btf_type_by_id(dist->pipe.src, i); + kind = btf_kind(t); + name = btf__name_by_offset(dist->pipe.src, t->name_off); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + /* Named int, float, fwd are added to base. */ + if (!adding_to_base) + continue; + err = btf_add_type(&dist->pipe, t); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* Named struct/union are added to base as 0-vlen + * struct/union of same size. Anonymous struct/unions + * are added to split BTF as-is. + */ + if (adding_to_base) { + if (!t->name_off) + continue; + err = btf_add_composite(dist->pipe.dst, kind, name, t->size); + } else { + if (t->name_off) + continue; + err = btf_add_type(&dist->pipe, t); + } + break; + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* Named enum[64]s are added to base as a sized + * enum; relocation will match with appropriately-named + * and sized enum or enum64. + * + * Anonymous enums are added to split BTF as-is. + */ + if (adding_to_base) { + if (!t->name_off) + continue; + err = btf__add_enum(dist->pipe.dst, name, t->size); + } else { + if (t->name_off) + continue; + err = btf_add_type(&dist->pipe, t); + } + break; + case BTF_KIND_ARRAY: + case BTF_KIND_TYPEDEF: + case BTF_KIND_PTR: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TYPE_TAG: + /* All other types are added to split BTF. */ + if (adding_to_base) + continue; + err = btf_add_type(&dist->pipe, t); + break; + default: + pr_warn("unexpected kind when adding base type '%s'[%u] of kind [%u] to distilled base BTF.\n", + name, i, kind); + return -EINVAL; + + } + if (err < 0) + break; + dist->id_map[i] = id++; + } + return err; +} + +/* Split BTF ids without a mapping will be shifted downwards since distilled + * base BTF is smaller than the original base BTF. For those that have a + * mapping (either to base or updated split BTF), update the id based on + * that mapping. + */ +static int btf_update_distilled_type_ids(struct btf_distill *dist, __u32 i) +{ + struct btf_type *t = btf_type_by_id(dist->pipe.dst, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + while ((id = btf_field_iter_next(&it))) { + if (dist->id_map[*id]) + *id = dist->id_map[*id]; + else if (*id >= dist->split_start_id) + *id -= dist->diff_id; + } + return 0; +} + +/* Create updated split BTF with distilled base BTF; distilled base BTF + * consists of BTF information required to clarify the types that split + * BTF refers to, omitting unneeded details. Specifically it will contain + * base types and memberless definitions of named structs, unions and enumerated + * types. Associated reference types like pointers, arrays and anonymous + * structs, unions and enumerated types will be added to split BTF. + * Size is recorded for named struct/unions to help guide matching to the + * target base BTF during later relocation. + * + * The only case where structs, unions or enumerated types are fully represented + * is when they are anonymous; in such cases, the anonymous type is added to + * split BTF in full. + * + * We return newly-created split BTF where the split BTF refers to a newly-created + * distilled base BTF. Both must be freed separately by the caller. + */ +int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf, + struct btf **new_split_btf) +{ + struct btf *new_base = NULL, *new_split = NULL; + const struct btf *old_base; + unsigned int n = btf__type_cnt(src_btf); + struct btf_distill dist = {}; + struct btf_type *t; + int i, err = 0; + + /* src BTF must be split BTF. */ + old_base = btf__base_btf(src_btf); + if (!new_base_btf || !new_split_btf || !old_base) + return libbpf_err(-EINVAL); + + new_base = btf__new_empty(); + if (!new_base) + return libbpf_err(-ENOMEM); + dist.id_map = calloc(n, sizeof(*dist.id_map)); + if (!dist.id_map) { + err = -ENOMEM; + goto done; + } + dist.pipe.src = src_btf; + dist.pipe.dst = new_base; + dist.pipe.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); + if (IS_ERR(dist.pipe.str_off_map)) { + err = -ENOMEM; + goto done; + } + dist.split_start_id = btf__type_cnt(old_base); + dist.split_start_str = old_base->hdr->str_len; + + /* Pass over src split BTF; generate the list of base BTF type ids it + * references; these will constitute our distilled BTF set to be + * distributed over base and split BTF as appropriate. + */ + for (i = src_btf->start_id; i < n; i++) { + err = btf_add_distilled_type_ids(&dist, i); + if (err < 0) + goto done; + } + /* Next add types for each of the required references to base BTF and split BTF + * in turn. + */ + err = btf_add_distilled_types(&dist); + if (err < 0) + goto done; + + /* Create new split BTF with distilled base BTF as its base; the final + * state is split BTF with distilled base BTF that represents enough + * about its base references to allow it to be relocated with the base + * BTF available. + */ + new_split = btf__new_empty_split(new_base); + if (!new_split) { + err = -errno; + goto done; + } + dist.pipe.dst = new_split; + /* First add all split types */ + for (i = src_btf->start_id; i < n; i++) { + t = btf_type_by_id(src_btf, i); + err = btf_add_type(&dist.pipe, t); + if (err < 0) + goto done; + } + /* Now add distilled types to split BTF that are not added to base. */ + err = btf_add_distilled_types(&dist); + if (err < 0) + goto done; + + /* All split BTF ids will be shifted downwards since there are less base + * BTF ids in distilled base BTF. + */ + dist.diff_id = dist.split_start_id - btf__type_cnt(new_base); + + n = btf__type_cnt(new_split); + /* Now update base/split BTF ids. */ + for (i = 1; i < n; i++) { + err = btf_update_distilled_type_ids(&dist, i); + if (err < 0) + break; + } +done: + free(dist.id_map); + hashmap__free(dist.pipe.str_off_map); + if (err) { + btf__free(new_split); + btf__free(new_base); + return libbpf_err(err); + } + *new_base_btf = new_base; + *new_split_btf = new_split; + + return 0; +} + +const struct btf_header *btf_header(const struct btf *btf) +{ + return btf->hdr; +} + +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) +{ + btf->base_btf = (struct btf *)base_btf; + btf->start_id = btf__type_cnt(base_btf); + btf->start_str_off = base_btf->hdr->str_len; +} + +int btf__relocate(struct btf *btf, const struct btf *base_btf) +{ + int err = btf_relocate(btf, base_btf, NULL); + + if (!err) + btf->owns_base = false; + return libbpf_err(err); +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 8e6880d91c84..b68d216837a9 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -18,6 +18,7 @@ extern "C" { #define BTF_ELF_SEC ".BTF" #define BTF_EXT_ELF_SEC ".BTF.ext" +#define BTF_BASE_ELF_SEC ".BTF.base" #define MAPS_ELF_SEC ".maps" struct btf; @@ -107,6 +108,27 @@ LIBBPF_API struct btf *btf__new_empty(void); */ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); +/** + * @brief **btf__distill_base()** creates new versions of the split BTF + * *src_btf* and its base BTF. The new base BTF will only contain the types + * needed to improve robustness of the split BTF to small changes in base BTF. + * When that split BTF is loaded against a (possibly changed) base, this + * distilled base BTF will help update references to that (possibly changed) + * base BTF. + * + * Both the new split and its associated new base BTF must be freed by + * the caller. + * + * If successful, 0 is returned and **new_base_btf** and **new_split_btf** + * will point at new base/split BTF. Both the new split and its associated + * new base BTF must be freed by the caller. + * + * A negative value is returned on error and the thread-local `errno` variable + * is set to the error code as well. + */ +LIBBPF_API int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf, + struct btf **new_split_btf); + LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext); LIBBPF_API struct btf *btf__parse_split(const char *path, struct btf *base_btf); LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext); @@ -231,6 +253,20 @@ struct btf_dedup_opts { LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); +/** + * @brief **btf__relocate()** will check the split BTF *btf* for references + * to base BTF kinds, and verify those references are compatible with + * *base_btf*; if they are, *btf* is adjusted such that is re-parented to + * *base_btf* and type ids and strings are adjusted to accommodate this. + * + * If successful, 0 is returned and **btf** now has **base_btf** as its + * base. + * + * A negative value is returned on error and the thread-local `errno` variable + * is set to the error code as well. + */ +LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf); + struct btf_dump; struct btf_dump_opts { diff --git a/tools/lib/bpf/btf_iter.c b/tools/lib/bpf/btf_iter.c new file mode 100644 index 000000000000..9a6c822c2294 --- /dev/null +++ b/tools/lib/bpf/btf_iter.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Facebook */ +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/btf.h> + +#define btf_var_secinfos(t) (struct btf_var_secinfo *)btf_type_var_secinfo(t) + +#else +#include "btf.h" +#include "libbpf_internal.h" +#endif + +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, + enum btf_field_iter_kind iter_kind) +{ + it->p = NULL; + it->m_idx = -1; + it->off_idx = 0; + it->vlen = 0; + + switch (iter_kind) { + case BTF_FIELD_ITER_IDS: + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + it->desc = (struct btf_field_desc) {}; + break; + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + it->desc = (struct btf_field_desc) { 1, {offsetof(struct btf_type, type)} }; + break; + case BTF_KIND_ARRAY: + it->desc = (struct btf_field_desc) { + 2, {sizeof(struct btf_type) + offsetof(struct btf_array, type), + sizeof(struct btf_type) + offsetof(struct btf_array, index_type)} + }; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + it->desc = (struct btf_field_desc) { + 0, {}, + sizeof(struct btf_member), + 1, {offsetof(struct btf_member, type)} + }; + break; + case BTF_KIND_FUNC_PROTO: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, type)}, + sizeof(struct btf_param), + 1, {offsetof(struct btf_param, type)} + }; + break; + case BTF_KIND_DATASEC: + it->desc = (struct btf_field_desc) { + 0, {}, + sizeof(struct btf_var_secinfo), + 1, {offsetof(struct btf_var_secinfo, type)} + }; + break; + default: + return -EINVAL; + } + break; + case BTF_FIELD_ITER_STRS: + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + it->desc = (struct btf_field_desc) {}; + break; + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_FWD: + case BTF_KIND_ARRAY: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + case BTF_KIND_DATASEC: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)} + }; + break; + case BTF_KIND_ENUM: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_enum), + 1, {offsetof(struct btf_enum, name_off)} + }; + break; + case BTF_KIND_ENUM64: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_enum64), + 1, {offsetof(struct btf_enum64, name_off)} + }; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_member), + 1, {offsetof(struct btf_member, name_off)} + }; + break; + case BTF_KIND_FUNC_PROTO: + it->desc = (struct btf_field_desc) { + 1, {offsetof(struct btf_type, name_off)}, + sizeof(struct btf_param), + 1, {offsetof(struct btf_param, name_off)} + }; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + if (it->desc.m_sz) + it->vlen = btf_vlen(t); + + it->p = t; + return 0; +} + +__u32 *btf_field_iter_next(struct btf_field_iter *it) +{ + if (!it->p) + return NULL; + + if (it->m_idx < 0) { + if (it->off_idx < it->desc.t_off_cnt) + return it->p + it->desc.t_offs[it->off_idx++]; + /* move to per-member iteration */ + it->m_idx = 0; + it->p += sizeof(struct btf_type); + it->off_idx = 0; + } + + /* if type doesn't have members, stop */ + if (it->desc.m_sz == 0) { + it->p = NULL; + return NULL; + } + + if (it->off_idx >= it->desc.m_off_cnt) { + /* exhausted this member's fields, go to the next member */ + it->m_idx++; + it->p += it->desc.m_sz; + it->off_idx = 0; + } + + if (it->m_idx < it->vlen) + return it->p + it->desc.m_offs[it->off_idx++]; + + it->p = NULL; + return NULL; +} diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c new file mode 100644 index 000000000000..17f8b32f94a0 --- /dev/null +++ b/tools/lib/bpf/btf_relocate.c @@ -0,0 +1,519 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/bsearch.h> +#include <linux/btf.h> +#include <linux/sort.h> +#include <linux/string.h> +#include <linux/bpf_verifier.h> + +#define btf_type_by_id (struct btf_type *)btf_type_by_id +#define btf__type_cnt btf_nr_types +#define btf__base_btf btf_base_btf +#define btf__name_by_offset btf_name_by_offset +#define btf__str_by_offset btf_str_by_offset +#define btf_kflag btf_type_kflag + +#define calloc(nmemb, sz) kvcalloc(nmemb, sz, GFP_KERNEL | __GFP_NOWARN) +#define free(ptr) kvfree(ptr) +#define qsort(base, num, sz, cmp) sort(base, num, sz, cmp, NULL) + +#else + +#include "btf.h" +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" + +#endif /* __KERNEL__ */ + +struct btf; + +struct btf_relocate { + struct btf *btf; + const struct btf *base_btf; + const struct btf *dist_base_btf; + unsigned int nr_base_types; + unsigned int nr_split_types; + unsigned int nr_dist_base_types; + int dist_str_len; + int base_str_len; + __u32 *id_map; + __u32 *str_map; +}; + +/* Set temporarily in relocation id_map if distilled base struct/union is + * embedded in a split BTF struct/union; in such a case, size information must + * match between distilled base BTF and base BTF representation of type. + */ +#define BTF_IS_EMBEDDED ((__u32)-1) + +/* <name, size, id> triple used in sorting/searching distilled base BTF. */ +struct btf_name_info { + const char *name; + /* set when search requires a size match */ + bool needs_size: 1; + unsigned int size: 31; + __u32 id; +}; + +static int btf_relocate_rewrite_type_id(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *id; + int err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((id = btf_field_iter_next(&it))) + *id = r->id_map[*id]; + return 0; +} + +/* Simple string comparison used for sorting within BTF, since all distilled + * types are named. If strings match, and size is non-zero for both elements + * fall back to using size for ordering. + */ +static int cmp_btf_name_size(const void *n1, const void *n2) +{ + const struct btf_name_info *ni1 = n1; + const struct btf_name_info *ni2 = n2; + int name_diff = strcmp(ni1->name, ni2->name); + + if (!name_diff && ni1->needs_size && ni2->needs_size) + return ni2->size - ni1->size; + return name_diff; +} + +/* Binary search with a small twist; find leftmost element that matches + * so that we can then iterate through all exact matches. So for example + * searching { "a", "bb", "bb", "c" } we would always match on the + * leftmost "bb". + */ +static struct btf_name_info *search_btf_name_size(struct btf_name_info *key, + struct btf_name_info *vals, + int nelems) +{ + struct btf_name_info *ret = NULL; + int high = nelems - 1; + int low = 0; + + while (low <= high) { + int mid = (low + high)/2; + struct btf_name_info *val = &vals[mid]; + int diff = cmp_btf_name_size(key, val); + + if (diff == 0) + ret = val; + /* even if found, keep searching for leftmost match */ + if (diff <= 0) + high = mid - 1; + else + low = mid + 1; + } + return ret; +} + +/* If a member of a split BTF struct/union refers to a base BTF + * struct/union, mark that struct/union id temporarily in the id_map + * with BTF_IS_EMBEDDED. Members can be const/restrict/volatile/typedef + * reference types, but if a pointer is encountered, the type is no longer + * considered embedded. + */ +static int btf_mark_embedded_composite_type_ids(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *id; + int err; + + if (!btf_is_composite(t)) + return 0; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((id = btf_field_iter_next(&it))) { + __u32 next_id = *id; + + while (next_id) { + t = btf_type_by_id(r->btf, next_id); + switch (btf_kind(t)) { + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VOLATILE: + case BTF_KIND_TYPEDEF: + case BTF_KIND_TYPE_TAG: + next_id = t->type; + break; + case BTF_KIND_ARRAY: { + struct btf_array *a = btf_array(t); + + next_id = a->type; + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + if (next_id < r->nr_dist_base_types) + r->id_map[next_id] = BTF_IS_EMBEDDED; + next_id = 0; + break; + default: + next_id = 0; + break; + } + } + } + + return 0; +} + +/* Build a map from distilled base BTF ids to base BTF ids. To do so, iterate + * through base BTF looking up distilled type (using binary search) equivalents. + */ +static int btf_relocate_map_distilled_base(struct btf_relocate *r) +{ + struct btf_name_info *info, *info_end; + struct btf_type *base_t, *dist_t; + __u8 *base_name_cnt = NULL; + int err = 0; + __u32 id; + + /* generate a sort index array of name/type ids sorted by name for + * distilled base BTF to speed name-based lookups. + */ + info = calloc(r->nr_dist_base_types, sizeof(*info)); + if (!info) { + err = -ENOMEM; + goto done; + } + info_end = info + r->nr_dist_base_types; + for (id = 0; id < r->nr_dist_base_types; id++) { + dist_t = btf_type_by_id(r->dist_base_btf, id); + info[id].name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off); + info[id].id = id; + info[id].size = dist_t->size; + info[id].needs_size = true; + } + qsort(info, r->nr_dist_base_types, sizeof(*info), cmp_btf_name_size); + + /* Mark distilled base struct/union members of split BTF structs/unions + * in id_map with BTF_IS_EMBEDDED; this signals that these types + * need to match both name and size, otherwise embedding the base + * struct/union in the split type is invalid. + */ + for (id = r->nr_dist_base_types; id < r->nr_split_types; id++) { + err = btf_mark_embedded_composite_type_ids(r, id); + if (err) + goto done; + } + + /* Collect name counts for composite types in base BTF. If multiple + * instances of a struct/union of the same name exist, we need to use + * size to determine which to map to since name alone is ambiguous. + */ + base_name_cnt = calloc(r->base_str_len, sizeof(*base_name_cnt)); + if (!base_name_cnt) { + err = -ENOMEM; + goto done; + } + for (id = 1; id < r->nr_base_types; id++) { + base_t = btf_type_by_id(r->base_btf, id); + if (!btf_is_composite(base_t) || !base_t->name_off) + continue; + if (base_name_cnt[base_t->name_off] < 255) + base_name_cnt[base_t->name_off]++; + } + + /* Now search base BTF for matching distilled base BTF types. */ + for (id = 1; id < r->nr_base_types; id++) { + struct btf_name_info *dist_info, base_info = {}; + int dist_kind, base_kind; + + base_t = btf_type_by_id(r->base_btf, id); + /* distilled base consists of named types only. */ + if (!base_t->name_off) + continue; + base_kind = btf_kind(base_t); + base_info.id = id; + base_info.name = btf__name_by_offset(r->base_btf, base_t->name_off); + switch (base_kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + /* These types should match both name and size */ + base_info.needs_size = true; + base_info.size = base_t->size; + break; + case BTF_KIND_FWD: + /* No size considerations for fwds. */ + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* Size only needs to be used for struct/union if there + * are multiple types in base BTF with the same name. + * If there are multiple _distilled_ types with the same + * name (a very unlikely scenario), that doesn't matter + * unless corresponding _base_ types to match them are + * missing. + */ + base_info.needs_size = base_name_cnt[base_t->name_off] > 1; + base_info.size = base_t->size; + break; + default: + continue; + } + /* iterate over all matching distilled base types */ + for (dist_info = search_btf_name_size(&base_info, info, r->nr_dist_base_types); + dist_info != NULL && dist_info < info_end && + cmp_btf_name_size(&base_info, dist_info) == 0; + dist_info++) { + if (!dist_info->id || dist_info->id >= r->nr_dist_base_types) { + pr_warn("base BTF id [%d] maps to invalid distilled base BTF id [%d]\n", + id, dist_info->id); + err = -EINVAL; + goto done; + } + dist_t = btf_type_by_id(r->dist_base_btf, dist_info->id); + dist_kind = btf_kind(dist_t); + + /* Validate that the found distilled type is compatible. + * Do not error out on mismatch as another match may + * occur for an identically-named type. + */ + switch (dist_kind) { + case BTF_KIND_FWD: + switch (base_kind) { + case BTF_KIND_FWD: + if (btf_kflag(dist_t) != btf_kflag(base_t)) + continue; + break; + case BTF_KIND_STRUCT: + if (btf_kflag(base_t)) + continue; + break; + case BTF_KIND_UNION: + if (!btf_kflag(base_t)) + continue; + break; + default: + continue; + } + break; + case BTF_KIND_INT: + if (dist_kind != base_kind || + btf_int_encoding(base_t) != btf_int_encoding(dist_t)) + continue; + break; + case BTF_KIND_FLOAT: + if (dist_kind != base_kind) + continue; + break; + case BTF_KIND_ENUM: + /* ENUM and ENUM64 are encoded as sized ENUM in + * distilled base BTF. + */ + if (base_kind != dist_kind && base_kind != BTF_KIND_ENUM64) + continue; + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + /* size verification is required for embedded + * struct/unions. + */ + if (r->id_map[dist_info->id] == BTF_IS_EMBEDDED && + base_t->size != dist_t->size) + continue; + break; + default: + continue; + } + if (r->id_map[dist_info->id] && + r->id_map[dist_info->id] != BTF_IS_EMBEDDED) { + /* we already have a match; this tells us that + * multiple base types of the same name + * have the same size, since for cases where + * multiple types have the same name we match + * on name and size. In this case, we have + * no way of determining which to relocate + * to in base BTF, so error out. + */ + pr_warn("distilled base BTF type '%s' [%u], size %u has multiple candidates of the same size (ids [%u, %u]) in base BTF\n", + base_info.name, dist_info->id, + base_t->size, id, r->id_map[dist_info->id]); + err = -EINVAL; + goto done; + } + /* map id and name */ + r->id_map[dist_info->id] = id; + r->str_map[dist_t->name_off] = base_t->name_off; + } + } + /* ensure all distilled BTF ids now have a mapping... */ + for (id = 1; id < r->nr_dist_base_types; id++) { + const char *name; + + if (r->id_map[id] && r->id_map[id] != BTF_IS_EMBEDDED) + continue; + dist_t = btf_type_by_id(r->dist_base_btf, id); + name = btf__name_by_offset(r->dist_base_btf, dist_t->name_off); + pr_warn("distilled base BTF type '%s' [%d] is not mapped to base BTF id\n", + name, id); + err = -EINVAL; + break; + } +done: + free(base_name_cnt); + free(info); + return err; +} + +/* distilled base should only have named int/float/enum/fwd/struct/union types. */ +static int btf_relocate_validate_distilled_base(struct btf_relocate *r) +{ + unsigned int i; + + for (i = 1; i < r->nr_dist_base_types; i++) { + struct btf_type *t = btf_type_by_id(r->dist_base_btf, i); + int kind = btf_kind(t); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_FWD: + if (t->name_off) + break; + pr_warn("type [%d], kind [%d] is invalid for distilled base BTF; it is anonymous\n", + i, kind); + return -EINVAL; + default: + pr_warn("type [%d] in distilled based BTF has unexpected kind [%d]\n", + i, kind); + return -EINVAL; + } + } + return 0; +} + +static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i) +{ + struct btf_type *t = btf_type_by_id(r->btf, i); + struct btf_field_iter it; + __u32 *str_off; + int off, err; + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); + if (err) + return err; + + while ((str_off = btf_field_iter_next(&it))) { + if (!*str_off) + continue; + if (*str_off >= r->dist_str_len) { + *str_off += r->base_str_len - r->dist_str_len; + } else { + off = r->str_map[*str_off]; + if (!off) { + pr_warn("string '%s' [offset %u] is not mapped to base BTF", + btf__str_by_offset(r->btf, off), *str_off); + return -ENOENT; + } + *str_off = off; + } + } + return 0; +} + +/* If successful, output of relocation is updated BTF with base BTF pointing + * at base_btf, and type ids, strings adjusted accordingly. + */ +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map) +{ + unsigned int nr_types = btf__type_cnt(btf); + const struct btf_header *dist_base_hdr; + const struct btf_header *base_hdr; + struct btf_relocate r = {}; + int err = 0; + __u32 id, i; + + r.dist_base_btf = btf__base_btf(btf); + if (!base_btf || r.dist_base_btf == base_btf) + return -EINVAL; + + r.nr_dist_base_types = btf__type_cnt(r.dist_base_btf); + r.nr_base_types = btf__type_cnt(base_btf); + r.nr_split_types = nr_types - r.nr_dist_base_types; + r.btf = btf; + r.base_btf = base_btf; + + r.id_map = calloc(nr_types, sizeof(*r.id_map)); + r.str_map = calloc(btf_header(r.dist_base_btf)->str_len, sizeof(*r.str_map)); + dist_base_hdr = btf_header(r.dist_base_btf); + base_hdr = btf_header(r.base_btf); + r.dist_str_len = dist_base_hdr->str_len; + r.base_str_len = base_hdr->str_len; + if (!r.id_map || !r.str_map) { + err = -ENOMEM; + goto err_out; + } + + err = btf_relocate_validate_distilled_base(&r); + if (err) + goto err_out; + + /* Split BTF ids need to be adjusted as base and distilled base + * have different numbers of types, changing the start id of split + * BTF. + */ + for (id = r.nr_dist_base_types; id < nr_types; id++) + r.id_map[id] = id + r.nr_base_types - r.nr_dist_base_types; + + /* Build a map from distilled base ids to actual base BTF ids; it is used + * to update split BTF id references. Also build a str_map mapping from + * distilled base BTF names to base BTF names. + */ + err = btf_relocate_map_distilled_base(&r); + if (err) + goto err_out; + + /* Next, rewrite type ids in split BTF, replacing split ids with updated + * ids based on number of types in base BTF, and base ids with + * relocated ids from base_btf. + */ + for (i = 0, id = r.nr_dist_base_types; i < r.nr_split_types; i++, id++) { + err = btf_relocate_rewrite_type_id(&r, id); + if (err) + goto err_out; + } + /* String offsets now need to be updated using the str_map. */ + for (i = 0; i < r.nr_split_types; i++) { + err = btf_relocate_rewrite_strs(&r, i + r.nr_dist_base_types); + if (err) + goto err_out; + } + /* Finally reset base BTF to be base_btf */ + btf_set_base_btf(btf, base_btf); + + if (id_map) { + *id_map = r.id_map; + r.id_map = NULL; + } +err_out: + free(r.id_map); + free(r.str_map); + return err; +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5401f2df463d..a3be6f8fac09 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -229,7 +229,30 @@ static const char * const prog_type_name[] = { static int __base_pr(enum libbpf_print_level level, const char *format, va_list args) { - if (level == LIBBPF_DEBUG) + const char *env_var = "LIBBPF_LOG_LEVEL"; + static enum libbpf_print_level min_level = LIBBPF_INFO; + static bool initialized; + + if (!initialized) { + char *verbosity; + + initialized = true; + verbosity = getenv(env_var); + if (verbosity) { + if (strcasecmp(verbosity, "warn") == 0) + min_level = LIBBPF_WARN; + else if (strcasecmp(verbosity, "debug") == 0) + min_level = LIBBPF_DEBUG; + else if (strcasecmp(verbosity, "info") == 0) + min_level = LIBBPF_INFO; + else + fprintf(stderr, "libbpf: unrecognized '%s' envvar value: '%s', should be one of 'warn', 'debug', or 'info'.\n", + env_var, verbosity); + } + } + + /* if too verbose, skip logging */ + if (level > min_level) return 0; return vfprintf(stderr, format, args); @@ -549,6 +572,7 @@ struct bpf_map { bool pinned; bool reused; bool autocreate; + bool autoattach; __u64 map_extra; }; @@ -1377,6 +1401,7 @@ static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name, map->def.value_size = type->size; map->def.max_entries = 1; map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0; + map->autoattach = true; map->st_ops = calloc(1, sizeof(*map->st_ops)); if (!map->st_ops) @@ -4796,6 +4821,20 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) return 0; } +int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach) +{ + if (!bpf_map__is_struct_ops(map)) + return libbpf_err(-EINVAL); + + map->autoattach = autoattach; + return 0; +} + +bool bpf_map__autoattach(const struct bpf_map *map) +{ + return map->autoattach; +} + int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info; @@ -10336,7 +10375,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { - if (prev == NULL) + if (prev == NULL && obj != NULL) return obj->maps; return __bpf_map__iter(prev, obj, 1); @@ -10345,7 +10384,7 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) { - if (next == NULL) { + if (next == NULL && obj != NULL) { if (!obj->nr_maps) return NULL; return obj->maps + obj->nr_maps - 1; @@ -12877,8 +12916,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) __u32 zero = 0; int err, fd; - if (!bpf_map__is_struct_ops(map)) + if (!bpf_map__is_struct_ops(map)) { + pr_warn("map '%s': can't attach non-struct_ops map\n", map->name); return libbpf_err_ptr(-EINVAL); + } if (map->fd < 0) { pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name); @@ -13671,14 +13712,15 @@ int libbpf_num_possible_cpus(void) static int populate_skeleton_maps(const struct bpf_object *obj, struct bpf_map_skeleton *maps, - size_t map_cnt) + size_t map_cnt, size_t map_skel_sz) { int i; for (i = 0; i < map_cnt; i++) { - struct bpf_map **map = maps[i].map; - const char *name = maps[i].name; - void **mmaped = maps[i].mmaped; + struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz; + struct bpf_map **map = map_skel->map; + const char *name = map_skel->name; + void **mmaped = map_skel->mmaped; *map = bpf_object__find_map_by_name(obj, name); if (!*map) { @@ -13695,13 +13737,14 @@ static int populate_skeleton_maps(const struct bpf_object *obj, static int populate_skeleton_progs(const struct bpf_object *obj, struct bpf_prog_skeleton *progs, - size_t prog_cnt) + size_t prog_cnt, size_t prog_skel_sz) { int i; for (i = 0; i < prog_cnt; i++) { - struct bpf_program **prog = progs[i].prog; - const char *name = progs[i].name; + struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz; + struct bpf_program **prog = prog_skel->prog; + const char *name = prog_skel->name; *prog = bpf_object__find_program_by_name(obj, name); if (!*prog) { @@ -13742,13 +13785,13 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, } *s->obj = obj; - err = populate_skeleton_maps(obj, s->maps, s->map_cnt); + err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); return libbpf_err(err); } - err = populate_skeleton_progs(obj, s->progs, s->prog_cnt); + err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); return libbpf_err(err); @@ -13778,20 +13821,20 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) return libbpf_err(-errno); } - err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt); + err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { pr_warn("failed to populate subskeleton maps: %d\n", err); return libbpf_err(err); } - err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt); + err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { pr_warn("failed to populate subskeleton maps: %d\n", err); return libbpf_err(err); } for (var_idx = 0; var_idx < s->var_cnt; var_idx++) { - var_skel = &s->vars[var_idx]; + var_skel = (void *)s->vars + var_idx * s->var_skel_sz; map = *var_skel->map; map_type_id = bpf_map__btf_value_type_id(map); map_type = btf__type_by_id(btf, map_type_id); @@ -13838,10 +13881,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) } for (i = 0; i < s->map_cnt; i++) { - struct bpf_map *map = *s->maps[i].map; + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; size_t mmap_sz = bpf_map_mmap_sz(map); int prot, map_fd = map->fd; - void **mmaped = s->maps[i].mmaped; + void **mmaped = map_skel->mmaped; if (!mmaped) continue; @@ -13889,8 +13933,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) int i, err; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_program *prog = *s->progs[i].prog; - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_program *prog = *prog_skel->prog; + struct bpf_link **link = prog_skel->link; if (!prog->autoload || !prog->autoattach) continue; @@ -13922,6 +13967,38 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) */ } + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_map *map = *map_skel->map; + struct bpf_link **link; + + if (!map->autocreate || !map->autoattach) + continue; + + /* only struct_ops maps can be attached */ + if (!bpf_map__is_struct_ops(map)) + continue; + + /* skeleton is created with earlier version of bpftool, notify user */ + if (s->map_skel_sz < offsetofend(struct bpf_map_skeleton, link)) { + pr_warn("map '%s': BPF skeleton version is old, skipping map auto-attachment...\n", + bpf_map__name(map)); + continue; + } + + link = map_skel->link; + if (*link) + continue; + + *link = bpf_map__attach_struct_ops(map); + if (!*link) { + err = -errno; + pr_warn("map '%s': failed to auto-attach: %d\n", bpf_map__name(map), err); + return libbpf_err(err); + } + } + return 0; } @@ -13930,11 +14007,25 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) int i; for (i = 0; i < s->prog_cnt; i++) { - struct bpf_link **link = s->progs[i].link; + struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz; + struct bpf_link **link = prog_skel->link; bpf_link__destroy(*link); *link = NULL; } + + if (s->map_skel_sz < sizeof(struct bpf_map_skeleton)) + return; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; + struct bpf_link **link = map_skel->link; + + if (link) { + bpf_link__destroy(*link); + *link = NULL; + } + } } void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) @@ -13942,8 +14033,7 @@ void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) if (!s) return; - if (s->progs) - bpf_object__detach_skeleton(s); + bpf_object__detach_skeleton(s); if (s->obj) bpf_object__close(*s->obj); free(s->maps); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index c3f77d9260fe..64a6a3d323e3 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -98,7 +98,10 @@ typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, /** * @brief **libbpf_set_print()** sets user-provided log callback function to - * be used for libbpf warnings and informational messages. + * be used for libbpf warnings and informational messages. If the user callback + * is not set, messages are logged to stderr by default. The verbosity of these + * messages can be controlled by setting the environment variable + * LIBBPF_LOG_LEVEL to either warn, info, or debug. * @param fn The log print function. If NULL, libbpf won't print anything. * @return Pointer to old print function. * @@ -976,6 +979,23 @@ LIBBPF_API int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate); LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map); /** + * @brief **bpf_map__set_autoattach()** sets whether libbpf has to auto-attach + * map during BPF skeleton attach phase. + * @param map the BPF map instance + * @param autoattach whether to attach map during BPF skeleton attach phase + * @return 0 on success; negative error code, otherwise + */ +LIBBPF_API int bpf_map__set_autoattach(struct bpf_map *map, bool autoattach); + +/** + * @brief **bpf_map__autoattach()** returns whether BPF map is configured to + * auto-attach during BPF skeleton attach phase. + * @param map the BPF map instance + * @return true if map is set to auto-attach during skeleton attach phase; false, otherwise + */ +LIBBPF_API bool bpf_map__autoattach(const struct bpf_map *map); + +/** * @brief **bpf_map__fd()** gets the file descriptor of the passed * BPF map * @param map the BPF map instance @@ -1669,6 +1689,7 @@ struct bpf_map_skeleton { const char *name; struct bpf_map **map; void **mmaped; + struct bpf_link **link; }; struct bpf_prog_skeleton { diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index c1ce8aa3520b..8f0d9ea3b1b4 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -419,6 +419,10 @@ LIBBPF_1.4.0 { LIBBPF_1.5.0 { global: + btf__distill_base; + btf__relocate; + bpf_map__autoattach; + bpf_map__set_autoattach; bpf_program__attach_sockmap; ring__consume_n; ring_buffer__consume_n; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index a0dcfb82e455..408df59e0771 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -234,6 +234,9 @@ struct btf_type; struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id); const char *btf_kind_str(const struct btf_type *t); const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); +const struct btf_header *btf_header(const struct btf *btf); +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf); +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **id_map); static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t) { @@ -508,11 +511,33 @@ struct bpf_line_info_min { __u32 line_col; }; +enum btf_field_iter_kind { + BTF_FIELD_ITER_IDS, + BTF_FIELD_ITER_STRS, +}; + +struct btf_field_desc { + /* once-per-type offsets */ + int t_off_cnt, t_offs[2]; + /* member struct size, or zero, if no members */ + int m_sz; + /* repeated per-member offsets */ + int m_off_cnt, m_offs[1]; +}; + +struct btf_field_iter { + struct btf_field_desc desc; + void *p; + int m_idx; + int off_idx; + int vlen; +}; + +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, enum btf_field_iter_kind iter_kind); +__u32 *btf_field_iter_next(struct btf_field_iter *it); typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx); typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); -int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx); -int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx); int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, @@ -597,13 +622,9 @@ static inline int ensure_good_fd(int fd) return fd; } -static inline int sys_dup2(int oldfd, int newfd) +static inline int sys_dup3(int oldfd, int newfd, int flags) { -#ifdef __NR_dup2 - return syscall(__NR_dup2, oldfd, newfd); -#else - return syscall(__NR_dup3, oldfd, newfd, 0); -#endif + return syscall(__NR_dup3, oldfd, newfd, flags); } /* Point *fixed_fd* to the same file that *tmp_fd* points to. @@ -614,7 +635,7 @@ static inline int reuse_fd(int fixed_fd, int tmp_fd) { int err; - err = sys_dup2(tmp_fd, fixed_fd); + err = sys_dup3(tmp_fd, fixed_fd, O_CLOEXEC); err = err < 0 ? -errno : 0; close(tmp_fd); /* clean up temporary FD */ return err; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 0d4be829551b..9cd3d4109788 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -957,19 +957,33 @@ static int check_btf_str_off(__u32 *str_off, void *ctx) static int linker_sanity_check_btf(struct src_obj *obj) { struct btf_type *t; - int i, n, err = 0; + int i, n, err; if (!obj->btf) return 0; n = btf__type_cnt(obj->btf); for (i = 1; i < n; i++) { + struct btf_field_iter it; + __u32 *type_id, *str_off; + t = btf_type_by_id(obj->btf, i); - err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf); - err = err ?: btf_type_visit_str_offs(t, check_btf_str_off, obj->btf); + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_IDS); + if (err) + return err; + while ((type_id = btf_field_iter_next(&it))) { + if (*type_id >= n) + return -EINVAL; + } + + err = btf_field_iter_init(&it, t, BTF_FIELD_ITER_STRS); if (err) return err; + while ((str_off = btf_field_iter_next(&it))) { + if (!btf__str_by_offset(obj->btf, *str_off)) + return -EINVAL; + } } return 0; @@ -2213,10 +2227,17 @@ static int linker_fixup_btf(struct src_obj *obj) vi = btf_var_secinfos(t); for (j = 0, m = btf_vlen(t); j < m; j++, vi++) { const struct btf_type *vt = btf__type_by_id(obj->btf, vi->type); - const char *var_name = btf__str_by_offset(obj->btf, vt->name_off); - int var_linkage = btf_var(vt)->linkage; + const char *var_name; + int var_linkage; Elf64_Sym *sym; + /* could be a variable or function */ + if (!btf_is_var(vt)) + continue; + + var_name = btf__str_by_offset(obj->btf, vt->name_off); + var_linkage = btf_var(vt)->linkage; + /* no need to patch up static or extern vars */ if (var_linkage != BTF_VAR_GLOBAL_ALLOCATED) continue; @@ -2234,26 +2255,10 @@ static int linker_fixup_btf(struct src_obj *obj) return 0; } -static int remap_type_id(__u32 *type_id, void *ctx) -{ - int *id_map = ctx; - int new_id = id_map[*type_id]; - - /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ - if (new_id == 0 && *type_id != 0) { - pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id); - return -EINVAL; - } - - *type_id = id_map[*type_id]; - - return 0; -} - static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) { const struct btf_type *t; - int i, j, n, start_id, id; + int i, j, n, start_id, id, err; const char *name; if (!obj->btf) @@ -2324,9 +2329,25 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) n = btf__type_cnt(linker->btf); for (i = start_id; i < n; i++) { struct btf_type *dst_t = btf_type_by_id(linker->btf, i); + struct btf_field_iter it; + __u32 *type_id; - if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map)) - return -EINVAL; + err = btf_field_iter_init(&it, dst_t, BTF_FIELD_ITER_IDS); + if (err) + return err; + + while ((type_id = btf_field_iter_next(&it))) { + int new_id = obj->btf_type_map[*type_id]; + + /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */ + if (new_id == 0 && *type_id != 0) { + pr_warn("failed to find new ID mapping for original BTF type ID %u\n", + *type_id); + return -EINVAL; + } + + *type_id = obj->btf_type_map[*type_id]; + } } /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README index db90a26dbdf4..304162743a5b 100644 --- a/tools/memory-model/Documentation/README +++ b/tools/memory-model/Documentation/README @@ -47,6 +47,10 @@ DESCRIPTION OF FILES README This file. +access-marking.txt + Guidelines for marking intentionally concurrent accesses to + shared memory. + cheatsheet.txt Quick-reference guide to the Linux-kernel memory model. diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt index 65778222183e..3fbe77fd564a 100644 --- a/tools/memory-model/Documentation/access-marking.txt +++ b/tools/memory-model/Documentation/access-marking.txt @@ -6,7 +6,8 @@ normal accesses to shared memory, that is "normal" as in accesses that do not use read-modify-write atomic operations. It also describes how to document these accesses, both with comments and with special assertions processed by the Kernel Concurrency Sanitizer (KCSAN). This discussion -builds on an earlier LWN article [1]. +builds on an earlier LWN article [1] and Linux Foundation mentorship +session [2]. ACCESS-MARKING OPTIONS @@ -24,6 +25,11 @@ The Linux kernel provides the following access-marking options: 4. WRITE_ONCE(), for example, "WRITE_ONCE(a, b);" The various forms of atomic_set() also fit in here. +5. __data_racy, for example "int __data_racy a;" + +6. KCSAN's negative-marking assertions, ASSERT_EXCLUSIVE_ACCESS() + and ASSERT_EXCLUSIVE_WRITER(), are described in the + "ACCESS-DOCUMENTATION OPTIONS" section below. These may be used in combination, as shown in this admittedly improbable example: @@ -31,7 +37,7 @@ example: WRITE_ONCE(a, b + data_race(c + d) + READ_ONCE(e)); Neither plain C-language accesses nor data_race() (#1 and #2 above) place -any sort of constraint on the compiler's choice of optimizations [2]. +any sort of constraint on the compiler's choice of optimizations [3]. In contrast, READ_ONCE() and WRITE_ONCE() (#3 and #4 above) restrict the compiler's use of code-motion and common-subexpression optimizations. Therefore, if a given access is involved in an intentional data race, @@ -205,6 +211,23 @@ because doing otherwise prevents KCSAN from detecting violations of your code's synchronization rules. +Use of __data_racy +------------------ + +Adding the __data_racy type qualifier to the declaration of a variable +causes KCSAN to treat all accesses to that variable as if they were +enclosed by data_race(). However, __data_racy does not affect the +compiler, though one could imagine hardened kernel builds treating the +__data_racy type qualifier as if it was the volatile keyword. + +Note well that __data_racy is subject to the same pointer-declaration +rules as are other type qualifiers such as const and volatile. +For example: + + int __data_racy *p; // Pointer to data-racy data. + int *__data_racy p; // Data-racy pointer to non-data-racy data. + + ACCESS-DOCUMENTATION OPTIONS ============================ @@ -342,7 +365,7 @@ as follows: Because foo is read locklessly, all accesses are marked. The purpose of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy -concurrent lockless write. +concurrent write, whether marked or not. Lock-Protected Writes With Heuristic Lockless Reads @@ -594,5 +617,8 @@ REFERENCES [1] "Concurrency bugs should fear the big bad data-race detector (part 2)" https://lwn.net/Articles/816854/ -[2] "Who's afraid of a big bad optimizing compiler?" +[2] "The Kernel Concurrency Sanitizer" + https://www.linuxfoundation.org/webinars/the-kernel-concurrency-sanitizer + +[3] "Who's afraid of a big bad optimizing compiler?" https://lwn.net/Articles/793253/ diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat index 53b5a492739d..03c12efed66a 100644 --- a/tools/memory-model/lock.cat +++ b/tools/memory-model/lock.cat @@ -54,6 +54,12 @@ flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR *) empty ([LKW] ; po-loc ; [LKR]) \ (po-loc ; [UL] ; po-loc) as lock-nest +(* + * In the same way, spin_is_locked() inside a critical section must always + * return True (no RU events can be in a critical section for the same lock). + *) +empty ([LKW] ; po-loc ; [RU]) \ (po-loc ; [UL] ; po-loc) as nested-is-locked + (* The final value of a spinlock should not be tested *) flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final @@ -79,42 +85,50 @@ empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks (* rfi for LF events: link each LKW to the LF events in its critical section *) let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc) -(* rfe for LF events *) +(* Utility macro to convert a single pair to a single-edge relation *) +let pair-to-relation p = p ++ 0 + +(* + * If a given LF event e is outside a critical section, it cannot read + * internally but it may read from an LKW event in another thread. + * Compute the relation containing these possible edges. + *) +let possible-rfe-noncrit-lf e = (LKW * {e}) & loc & ext + +(* Compute set of sets of possible rfe edges for LF events *) let all-possible-rfe-lf = (* - * Given an LF event r, compute the possible rfe edges for that event - * (all those starting from LKW events in other threads), - * and then convert that relation to a set of single-edge relations. + * Convert the possible-rfe-noncrit-lf relation for e + * to a set of single edges *) - let possible-rfe-lf r = - let pair-to-relation p = p ++ 0 - in map pair-to-relation ((LKW * {r}) & loc & ext) - (* Do this for each LF event r that isn't in rfi-lf *) - in map possible-rfe-lf (LF \ range(rfi-lf)) + let set-of-singleton-rfe-lf e = + map pair-to-relation (possible-rfe-noncrit-lf e) + (* Do this for each LF event e that isn't in rfi-lf *) + in map set-of-singleton-rfe-lf (LF \ range(rfi-lf)) (* Generate all rf relations for LF events *) with rfe-lf from cross(all-possible-rfe-lf) let rf-lf = rfe-lf | rfi-lf (* - * RU, i.e., spin_is_locked() returning False, is slightly different. - * We rely on the memory model to rule out cases where spin_is_locked() - * within one of the lock's critical sections returns False. + * A given RU event e may read internally from the last po-previous UL, + * or it may read from a UL event in another thread or the initial write. + * Compute the relation containing these possible edges. *) - -(* rfi for RU events: an RU may read from the last po-previous UL *) -let rfi-ru = ([UL] ; po-loc ; [RU]) \ ([UL] ; po-loc ; [LKW] ; po-loc) - -(* rfe for RU events: an RU may read from an external UL or the initial write *) -let all-possible-rfe-ru = - let possible-rfe-ru r = - let pair-to-relation p = p ++ 0 - in map pair-to-relation (((UL | IW) * {r}) & loc & ext) - in map possible-rfe-ru RU +let possible-rf-ru e = (((UL * {e}) & po-loc) \ + ([UL] ; po-loc ; [UL] ; po-loc)) | + (((UL | IW) * {e}) & loc & ext) + +(* Compute set of sets of possible rf edges for RU events *) +let all-possible-rf-ru = + (* Convert the possible-rf-ru relation for e to a set of single edges *) + let set-of-singleton-rf-ru e = + map pair-to-relation (possible-rf-ru e) + (* Do this for each RU event e *) + in map set-of-singleton-rf-ru RU (* Generate all rf relations for RU events *) -with rfe-ru from cross(all-possible-rfe-ru) -let rf-ru = rfe-ru | rfi-ru +with rf-ru from cross(all-possible-rf-ru) (* Final rf relation *) let rf = rf | rf-lf | rf-ru diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile index 8e9e09d84e26..d1cdf2a8f826 100644 --- a/tools/net/ynl/Makefile +++ b/tools/net/ynl/Makefile @@ -2,9 +2,12 @@ SUBDIRS = lib generated samples -all: $(SUBDIRS) +all: $(SUBDIRS) libynl.a samples: | lib generated +libynl.a: | lib generated + @echo -e "\tAR $@" + @ar rcs $@ lib/ynl.o generated/*-user.o $(SUBDIRS): @if [ -f "$@/Makefile" ] ; then \ @@ -17,5 +20,6 @@ clean distclean: $(MAKE) -C $$dir $@; \ fi \ done + rm -f libynl.a .PHONY: all clean distclean $(SUBDIRS) diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index f4e8eb79c1b8..0712b5e82eb7 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -16,7 +16,8 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) -CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) +CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ + $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) @@ -25,3 +26,4 @@ CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) +CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h) diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile index dfff3ecd1cba..2887cc5de530 100644 --- a/tools/net/ynl/lib/Makefile +++ b/tools/net/ynl/lib/Makefile @@ -14,7 +14,9 @@ include $(wildcard *.d) all: ynl.a ynl.a: $(OBJS) - ar rcs $@ $(OBJS) + @echo -e "\tAR $@" + @ar rcs $@ $(OBJS) + clean: rm -f *.o *.d *~ rm -rf __pycache__ diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 6cf890080dc0..3c09a7bbfba5 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -45,17 +45,17 @@ struct ynl_policy_attr { enum ynl_policy_type type; unsigned int len; const char *name; - struct ynl_policy_nest *nest; + const struct ynl_policy_nest *nest; }; struct ynl_policy_nest { unsigned int max_attr; - struct ynl_policy_attr *table; + const struct ynl_policy_attr *table; }; struct ynl_parse_arg { struct ynl_sock *ys; - struct ynl_policy_nest *rsp_policy; + const struct ynl_policy_nest *rsp_policy; void *data; }; @@ -79,7 +79,7 @@ static inline void *ynl_dump_obj_next(void *obj) struct ynl_dump_list_type *list; uptr -= offsetof(struct ynl_dump_list_type, data); - list = (void *)uptr; + list = (struct ynl_dump_list_type *)uptr; uptr = (unsigned long)list->next; uptr += offsetof(struct ynl_dump_list_type, data); @@ -119,7 +119,7 @@ struct ynl_dump_state { }; struct ynl_ntf_info { - struct ynl_policy_nest *policy; + const struct ynl_policy_nest *policy; ynl_parse_cb_t cb; size_t alloc_sz; void (*free)(struct ynl_ntf_base_type *ntf); @@ -139,7 +139,7 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg); static inline struct nlmsghdr *ynl_nlmsg_put_header(void *buf) { - struct nlmsghdr *nlh = buf; + struct nlmsghdr *nlh = (struct nlmsghdr *)buf; memset(nlh, 0, sizeof(*nlh)); nlh->nlmsg_len = NLMSG_HDRLEN; @@ -196,7 +196,7 @@ static inline void *ynl_attr_data(const struct nlattr *attr) static inline void *ynl_attr_data_end(const struct nlattr *attr) { - return ynl_attr_data(attr) + ynl_attr_data_len(attr); + return (char *)ynl_attr_data(attr) + ynl_attr_data_len(attr); } #define ynl_attr_for_each(attr, nlh, fixed_hdr_sz) \ @@ -228,7 +228,7 @@ ynl_attr_next(const void *end, const struct nlattr *prev) { struct nlattr *attr; - attr = (void *)((char *)prev + NLA_ALIGN(prev->nla_len)); + attr = (struct nlattr *)((char *)prev + NLA_ALIGN(prev->nla_len)); return ynl_attr_if_good(end, attr); } @@ -237,8 +237,8 @@ ynl_attr_first(const void *start, size_t len, size_t skip) { struct nlattr *attr; - attr = (void *)((char *)start + NLMSG_ALIGN(skip)); - return ynl_attr_if_good(start + len, attr); + attr = (struct nlattr *)((char *)start + NLMSG_ALIGN(skip)); + return ynl_attr_if_good((char *)start + len, attr); } static inline bool @@ -262,9 +262,9 @@ ynl_attr_nest_start(struct nlmsghdr *nlh, unsigned int attr_type) struct nlattr *attr; if (__ynl_attr_put_overflow(nlh, 0)) - return ynl_nlmsg_end_addr(nlh) - NLA_HDRLEN; + return (struct nlattr *)ynl_nlmsg_end_addr(nlh) - 1; - attr = ynl_nlmsg_end_addr(nlh); + attr = (struct nlattr *)ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type | NLA_F_NESTED; nlh->nlmsg_len += NLA_HDRLEN; @@ -286,7 +286,7 @@ ynl_attr_put(struct nlmsghdr *nlh, unsigned int attr_type, if (__ynl_attr_put_overflow(nlh, size)) return; - attr = ynl_nlmsg_end_addr(nlh); + attr = (struct nlattr *)ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type; attr->nla_len = NLA_HDRLEN + size; @@ -305,10 +305,10 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) if (__ynl_attr_put_overflow(nlh, len)) return; - attr = ynl_nlmsg_end_addr(nlh); + attr = (struct nlattr *)ynl_nlmsg_end_addr(nlh); attr->nla_type = attr_type; - strcpy(ynl_attr_data(attr), str); + strcpy((char *)ynl_attr_data(attr), str); attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len); nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index 4b9c091fc86b..fcb18a5a6d70 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -46,7 +46,7 @@ /* -- Netlink boiler plate */ static int -ynl_err_walk_report_one(struct ynl_policy_nest *policy, unsigned int type, +ynl_err_walk_report_one(const struct ynl_policy_nest *policy, unsigned int type, char *str, int str_sz, int *n) { if (!policy) { @@ -75,8 +75,8 @@ ynl_err_walk_report_one(struct ynl_policy_nest *policy, unsigned int type, static int ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, - struct ynl_policy_nest *policy, char *str, int str_sz, - struct ynl_policy_nest **nest_pol) + const struct ynl_policy_nest *policy, char *str, int str_sz, + const struct ynl_policy_nest **nest_pol) { unsigned int astart_off, aend_off; const struct nlattr *attr; @@ -206,7 +206,7 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh, bad_attr[n] = '\0'; } if (tb[NLMSGERR_ATTR_MISS_TYPE]) { - struct ynl_policy_nest *nest_pol = NULL; + const struct ynl_policy_nest *nest_pol = NULL; unsigned int n, off, type; void *start, *end; int n2; @@ -296,7 +296,7 @@ static int ynl_cb_done(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg) int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) { - struct ynl_policy_attr *policy; + const struct ynl_policy_attr *policy; unsigned int type, len; unsigned char *data; diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h index eef7c6324ed4..6cd570b283ea 100644 --- a/tools/net/ynl/lib/ynl.h +++ b/tools/net/ynl/lib/ynl.h @@ -76,7 +76,7 @@ struct ynl_sock { struct ynl_ntf_base_type **ntf_last_next; struct nlmsghdr *nlh; - struct ynl_policy_nest *req_policy; + const struct ynl_policy_nest *req_policy; unsigned char *tx_buf; unsigned char *rx_buf; unsigned char raw_buf[]; diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 35e666928119..d42c1d605969 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -743,6 +743,8 @@ class YnlFamily(SpecFamily): decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order) if 'enum' in attr_spec: decoded = self._decode_enum(decoded, attr_spec) + elif attr_spec.display_hint: + decoded = self._formatted_string(decoded, attr_spec.display_hint) elif attr_spec["type"] == 'indexed-array': decoded = self._decode_array_attr(attr, attr_spec) elif attr_spec["type"] == 'bitfield32': diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index a42d62b23ee0..51529fabd517 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -59,9 +59,9 @@ class Type(SpecAttr): if 'nested-attributes' in attr: self.nested_attrs = attr['nested-attributes'] if self.nested_attrs == family.name: - self.nested_render_name = c_lower(f"{family.name}") + self.nested_render_name = c_lower(f"{family.ident_name}") else: - self.nested_render_name = c_lower(f"{family.name}_{self.nested_attrs}") + self.nested_render_name = c_lower(f"{family.ident_name}_{self.nested_attrs}") if self.nested_attrs in self.family.consts: self.nested_struct_type = 'struct ' + self.nested_render_name + '_' @@ -693,9 +693,9 @@ class Struct: self.nested = type_list is None if family.name == c_lower(space_name): - self.render_name = c_lower(family.name) + self.render_name = c_lower(family.ident_name) else: - self.render_name = c_lower(family.name + '-' + space_name) + self.render_name = c_lower(family.ident_name + '-' + space_name) self.struct_name = 'struct ' + self.render_name if self.nested and space_name in family.consts: self.struct_name += '_' @@ -761,7 +761,7 @@ class EnumEntry(SpecEnumEntry): class EnumSet(SpecEnumSet): def __init__(self, family, yaml): - self.render_name = c_lower(family.name + '-' + yaml['name']) + self.render_name = c_lower(family.ident_name + '-' + yaml['name']) if 'enum-name' in yaml: if yaml['enum-name']: @@ -777,7 +777,7 @@ class EnumSet(SpecEnumSet): else: self.user_type = 'int' - self.value_pfx = yaml.get('name-prefix', f"{family.name}-{yaml['name']}-") + self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-") super().__init__(family, yaml) @@ -802,9 +802,9 @@ class AttrSet(SpecAttrSet): if 'name-prefix' in yaml: pfx = yaml['name-prefix'] elif self.name == family.name: - pfx = family.name + '-a-' + pfx = family.ident_name + '-a-' else: - pfx = f"{family.name}-a-{self.name}-" + pfx = f"{family.ident_name}-a-{self.name}-" self.name_prefix = c_upper(pfx) self.max_name = c_upper(self.yaml.get('attr-max-name', f"{self.name_prefix}max")) self.cnt_name = c_upper(self.yaml.get('attr-cnt-name', f"__{self.name_prefix}max")) @@ -861,7 +861,7 @@ class Operation(SpecOperation): def __init__(self, family, yaml, req_value, rsp_value): super().__init__(family, yaml, req_value, rsp_value) - self.render_name = c_lower(family.name + '_' + self.name) + self.render_name = c_lower(family.ident_name + '_' + self.name) self.dual_policy = ('do' in yaml and 'request' in yaml['do']) and \ ('dump' in yaml and 'request' in yaml['dump']) @@ -911,11 +911,11 @@ class Family(SpecFamily): if 'uapi-header' in self.yaml: self.uapi_header = self.yaml['uapi-header'] else: - self.uapi_header = f"linux/{self.name}.h" + self.uapi_header = f"linux/{self.ident_name}.h" if self.uapi_header.startswith("linux/") and self.uapi_header.endswith('.h'): self.uapi_header_name = self.uapi_header[6:-2] else: - self.uapi_header_name = self.name + self.uapi_header_name = self.ident_name def resolve(self): self.resolve_up(super()) @@ -923,7 +923,7 @@ class Family(SpecFamily): if self.yaml.get('protocol', 'genetlink') not in {'genetlink', 'genetlink-c', 'genetlink-legacy'}: raise Exception("Codegen only supported for genetlink") - self.c_name = c_lower(self.name) + self.c_name = c_lower(self.ident_name) if 'name-prefix' in self.yaml['operations']: self.op_prefix = c_upper(self.yaml['operations']['name-prefix']) else: @@ -1507,12 +1507,12 @@ def print_dump_prototype(ri): def put_typol_fwd(cw, struct): - cw.p(f'extern struct ynl_policy_nest {struct.render_name}_nest;') + cw.p(f'extern const struct ynl_policy_nest {struct.render_name}_nest;') def put_typol(cw, struct): type_max = struct.attr_set.max_name - cw.block_start(line=f'struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =') + cw.block_start(line=f'const struct ynl_policy_attr {struct.render_name}_policy[{type_max} + 1] =') for _, arg in struct.member_list(): arg.attr_typol(cw) @@ -1520,7 +1520,7 @@ def put_typol(cw, struct): cw.block_end(line=';') cw.nl() - cw.block_start(line=f'struct ynl_policy_nest {struct.render_name}_nest =') + cw.block_start(line=f'const struct ynl_policy_nest {struct.render_name}_nest =') cw.p(f'.max_attr = {type_max},') cw.p(f'.table = {struct.render_name}_policy,') cw.block_end(line=';') @@ -2173,7 +2173,7 @@ def print_kernel_op_table_fwd(family, cw, terminate): exported = not kernel_can_gen_family_struct(family) if not terminate or exported: - cw.p(f"/* Ops table for {family.name} */") + cw.p(f"/* Ops table for {family.ident_name} */") pol_to_struct = {'global': 'genl_small_ops', 'per-op': 'genl_ops', @@ -2225,12 +2225,12 @@ def print_kernel_op_table_fwd(family, cw, terminate): continue if 'do' in op: - name = c_lower(f"{family.name}-nl-{op_name}-doit") + name = c_lower(f"{family.ident_name}-nl-{op_name}-doit") cw.write_func_prot('int', name, ['struct sk_buff *skb', 'struct genl_info *info'], suffix=';') if 'dump' in op: - name = c_lower(f"{family.name}-nl-{op_name}-dumpit") + name = c_lower(f"{family.ident_name}-nl-{op_name}-dumpit") cw.write_func_prot('int', name, ['struct sk_buff *skb', 'struct netlink_callback *cb'], suffix=';') cw.nl() @@ -2256,13 +2256,13 @@ def print_kernel_op_table(family, cw): for x in op['dont-validate']])), ) for op_mode in ['do', 'dump']: if op_mode in op: - name = c_lower(f"{family.name}-nl-{op_name}-{op_mode}it") + name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it") members.append((op_mode + 'it', name)) if family.kernel_policy == 'per-op': struct = Struct(family, op['attribute-set'], type_list=op['do']['request']['attributes']) - name = c_lower(f"{family.name}-{op_name}-nl-policy") + name = c_lower(f"{family.ident_name}-{op_name}-nl-policy") members.append(('policy', name)) members.append(('maxattr', struct.attr_max_val.enum_name)) if 'flags' in op: @@ -2294,7 +2294,7 @@ def print_kernel_op_table(family, cw): members.append(('validate', ' | '.join([c_upper('genl-dont-validate-' + x) for x in dont_validate])), ) - name = c_lower(f"{family.name}-nl-{op_name}-{op_mode}it") + name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it") if 'pre' in op[op_mode]: members.append((cb_names[op_mode]['pre'], c_lower(op[op_mode]['pre']))) members.append((op_mode + 'it', name)) @@ -2305,9 +2305,9 @@ def print_kernel_op_table(family, cw): type_list=op[op_mode]['request']['attributes']) if op.dual_policy: - name = c_lower(f"{family.name}-{op_name}-{op_mode}-nl-policy") + name = c_lower(f"{family.ident_name}-{op_name}-{op_mode}-nl-policy") else: - name = c_lower(f"{family.name}-{op_name}-nl-policy") + name = c_lower(f"{family.ident_name}-{op_name}-nl-policy") members.append(('policy', name)) members.append(('maxattr', struct.attr_max_val.enum_name)) flags = (op['flags'] if 'flags' in op else []) + ['cmd-cap-' + op_mode] @@ -2326,7 +2326,7 @@ def print_kernel_mcgrp_hdr(family, cw): cw.block_start('enum') for grp in family.mcgrps['list']: - grp_id = c_upper(f"{family.name}-nlgrp-{grp['name']},") + grp_id = c_upper(f"{family.ident_name}-nlgrp-{grp['name']},") cw.p(grp_id) cw.block_end(';') cw.nl() @@ -2339,7 +2339,7 @@ def print_kernel_mcgrp_src(family, cw): cw.block_start('static const struct genl_multicast_group ' + family.c_name + '_nl_mcgrps[] =') for grp in family.mcgrps['list']: name = grp['name'] - grp_id = c_upper(f"{family.name}-nlgrp-{name}") + grp_id = c_upper(f"{family.ident_name}-nlgrp-{name}") cw.p('[' + grp_id + '] = { "' + name + '", },') cw.block_end(';') cw.nl() @@ -2361,7 +2361,7 @@ def print_kernel_family_struct_src(family, cw): if not kernel_can_gen_family_struct(family): return - cw.block_start(f"struct genl_family {family.name}_nl_family __ro_after_init =") + cw.block_start(f"struct genl_family {family.ident_name}_nl_family __ro_after_init =") cw.p('.name\t\t= ' + family.fam_key + ',') cw.p('.version\t= ' + family.ver_key + ',') cw.p('.netnsok\t= true,') @@ -2429,7 +2429,7 @@ def render_uapi(family, cw): cw.p(' */') uapi_enum_start(family, cw, const, 'name') - name_pfx = const.get('name-prefix', f"{family.name}-{const['name']}-") + name_pfx = const.get('name-prefix', f"{family.ident_name}-{const['name']}-") for entry in enum.entries.values(): suffix = ',' if entry.value_change: @@ -2451,7 +2451,7 @@ def render_uapi(family, cw): cw.nl() elif const['type'] == 'const': defines.append([c_upper(family.get('c-define-name', - f"{family.name}-{const['name']}")), + f"{family.ident_name}-{const['name']}")), const['value']]) if defines: @@ -2529,7 +2529,7 @@ def render_uapi(family, cw): defines = [] for grp in family.mcgrps['list']: name = grp['name'] - defines.append([c_upper(grp.get('c-define-name', f"{family.name}-mcgrp-{name}")), + defines.append([c_upper(grp.get('c-define-name', f"{family.ident_name}-mcgrp-{name}")), f'{name}']) cw.nl() if defines: diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 657e881d2ea4..6c56d0d726b4 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -49,7 +49,7 @@ def inline(text: str) -> str: def sanitize(text: str) -> str: """Remove newlines and multiple spaces""" # This is useful for some fields that are spread across multiple lines - return str(text).replace("\n", "").strip() + return str(text).replace("\n", " ").strip() def rst_fields(key: str, value: str, level: int = 0) -> str: @@ -156,7 +156,10 @@ def parse_do(do_dict: Dict[str, Any], level: int = 0) -> str: lines = [] for key in do_dict.keys(): lines.append(rst_paragraph(bold(key), level + 1)) - lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n") + if key in ['request', 'reply']: + lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n") + else: + lines.append(headroom(level + 2) + do_dict[key] + "\n") return "\n".join(lines) @@ -172,13 +175,13 @@ def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str: def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str: """Parse operations block""" - preprocessed = ["name", "doc", "title", "do", "dump"] + preprocessed = ["name", "doc", "title", "do", "dump", "flags"] linkable = ["fixed-header", "attribute-set"] lines = [] for operation in operations: lines.append(rst_section(namespace, 'operation', operation["name"])) - lines.append(rst_paragraph(sanitize(operation["doc"])) + "\n") + lines.append(rst_paragraph(operation["doc"]) + "\n") for key in operation.keys(): if key in preprocessed: @@ -188,6 +191,8 @@ def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str: if key in linkable: value = rst_ref(namespace, key, value) lines.append(rst_fields(key, value, 0)) + if 'flags' in operation: + lines.append(rst_fields('flags', rst_list_inline(operation['flags']))) if "do" in operation: lines.append(rst_paragraph(":do:", 0)) diff --git a/tools/objtool/Documentation/objtool.txt b/tools/objtool/Documentation/objtool.txt index fe39c2a8ef0d..7c3ee959b63c 100644 --- a/tools/objtool/Documentation/objtool.txt +++ b/tools/objtool/Documentation/objtool.txt @@ -284,6 +284,25 @@ the objtool maintainers. Otherwise the stack frame may not get created before the call. + objtool can help with pinpointing the exact function where it happens: + + $ OBJTOOL_ARGS="--verbose" make arch/x86/kvm/ + + arch/x86/kvm/kvm.o: warning: objtool: .altinstr_replacement+0xc5: call without frame pointer save/setup + arch/x86/kvm/kvm.o: warning: objtool: em_loop.part.0+0x29: (alt) + arch/x86/kvm/kvm.o: warning: objtool: em_loop.part.0+0x0: <=== (sym) + LD [M] arch/x86/kvm/kvm-intel.o + 0000 0000000000028220 <em_loop.part.0>: + 0000 28220: 0f b6 47 61 movzbl 0x61(%rdi),%eax + 0004 28224: 3c e2 cmp $0xe2,%al + 0006 28226: 74 2c je 28254 <em_loop.part.0+0x34> + 0008 28228: 48 8b 57 10 mov 0x10(%rdi),%rdx + 000c 2822c: 83 f0 05 xor $0x5,%eax + 000f 2822f: 48 c1 e0 04 shl $0x4,%rax + 0013 28233: 25 f0 00 00 00 and $0xf0,%eax + 0018 28238: 81 e2 d5 08 00 00 and $0x8d5,%edx + 001e 2823e: 80 ce 02 or $0x2,%dh + ... 2. file.o: warning: objtool: .text+0x53: unreachable instruction diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 3a1d80a7878d..ed6bff0e01dc 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -125,8 +125,14 @@ bool arch_pc_relative_reloc(struct reloc *reloc) #define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0) #define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem()) +/* + * Check the ModRM register. If there is a SIB byte then check with + * the SIB base register. But if the SIB base is 5 (i.e. CFI_BP) and + * ModRM mod is 0 then there is no base register. + */ #define rm_is(reg) (have_SIB() ? \ - sib_base == (reg) && sib_index == CFI_SP : \ + sib_base == (reg) && sib_index == CFI_SP && \ + (sib_base != CFI_BP || modrm_mod != 0) : \ modrm_rm == (reg)) #define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg)) diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 4134d27c696b..4ea0f9815fda 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -9,6 +9,29 @@ void arch_handle_alternative(unsigned short feature, struct special_alt *alt) { + static struct special_alt *group, *prev; + + /* + * Recompute orig_len for nested ALTERNATIVE()s. + */ + if (group && group->orig_sec == alt->orig_sec && + group->orig_off == alt->orig_off) { + + struct special_alt *iter = group; + for (;;) { + unsigned int len = max(iter->orig_len, alt->orig_len); + iter->orig_len = alt->orig_len = len; + + if (iter == prev) + break; + + iter = list_next_entry(iter, list); + } + + } else group = alt; + + prev = alt; + switch (feature) { case X86_FEATURE_SMAP: /* diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 5e21cfb7661d..387d56a7f5fb 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -144,7 +144,7 @@ static bool opts_valid(void) opts.static_call || opts.uaccess) { if (opts.dump_orc) { - ERROR("--dump can't be combined with other options"); + ERROR("--dump can't be combined with other actions"); return false; } @@ -159,7 +159,7 @@ static bool opts_valid(void) if (opts.dump_orc) return true; - ERROR("At least one command required"); + ERROR("At least one action required"); return false; } diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 7ebf29c91184..1e8141ef1b15 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -7,12 +7,16 @@ * Yes, this is unfortunate. A better solution is in the works. */ NORETURN(__fortify_panic) +NORETURN(__ia32_sys_exit) +NORETURN(__ia32_sys_exit_group) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) NORETURN(__reiserfs_panic) NORETURN(__stack_chk_fail) NORETURN(__tdx_hypercall_failed) NORETURN(__ubsan_handle_builtin_unreachable) +NORETURN(__x64_sys_exit) +NORETURN(__x64_sys_exit_group) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 91b1950f5bd8..097a69db82a0 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -84,6 +84,14 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry, entry->new_len); } + orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig); + if (!orig_reloc) { + WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); + return -1; + } + + reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off); + if (entry->feature) { unsigned short feature; @@ -94,14 +102,6 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry, arch_handle_alternative(feature, alt); } - orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig); - if (!orig_reloc) { - WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); - return -1; - } - - reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off); - if (!entry->group || alt->new_len) { new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); if (!new_reloc) { diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 233f2b6edf52..49b79cf0c5cc 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -86,14 +86,6 @@ static struct comm_str *comm_str__new(const char *str) return result; } -static int comm_str__cmp(const void *_lhs, const void *_rhs) -{ - const struct comm_str *lhs = *(const struct comm_str * const *)_lhs; - const struct comm_str *rhs = *(const struct comm_str * const *)_rhs; - - return strcmp(comm_str__str(lhs), comm_str__str(rhs)); -} - static int comm_str__search(const void *_key, const void *_member) { const char *key = _key; @@ -169,9 +161,24 @@ static struct comm_str *comm_strs__findnew(const char *str) } result = comm_str__new(str); if (result) { - comm_strs->strs[comm_strs->num_strs++] = result; - qsort(comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *), - comm_str__cmp); + int low = 0, high = comm_strs->num_strs - 1; + int insert = comm_strs->num_strs; /* Default to inserting at the end. */ + + while (low <= high) { + int mid = low + (high - low) / 2; + int cmp = strcmp(comm_str__str(comm_strs->strs[mid]), str); + + if (cmp < 0) { + low = mid + 1; + } else { + high = mid - 1; + insert = mid; + } + } + memmove(&comm_strs->strs[insert + 1], &comm_strs->strs[insert], + (comm_strs->num_strs - insert) * sizeof(struct comm_str *)); + comm_strs->num_strs++; + comm_strs->strs[insert] = result; } } up_write(&comm_strs->lock); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index ab3d0c01dd63..a69a9c661200 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -203,11 +203,27 @@ int __dsos__add(struct dsos *dsos, struct dso *dso) dsos->dsos = temp; dsos->allocated = to_allocate; } - dsos->dsos[dsos->cnt++] = dso__get(dso); - if (dsos->cnt >= 2 && dsos->sorted) { - dsos->sorted = dsos__cmp_long_name_id_short_name(&dsos->dsos[dsos->cnt - 2], - &dsos->dsos[dsos->cnt - 1]) - <= 0; + if (!dsos->sorted) { + dsos->dsos[dsos->cnt++] = dso__get(dso); + } else { + int low = 0, high = dsos->cnt - 1; + int insert = dsos->cnt; /* Default to inserting at the end. */ + + while (low <= high) { + int mid = low + (high - low) / 2; + int cmp = dsos__cmp_long_name_id_short_name(&dsos->dsos[mid], &dso); + + if (cmp < 0) { + low = mid + 1; + } else { + high = mid - 1; + insert = mid; + } + } + memmove(&dsos->dsos[insert + 1], &dsos->dsos[insert], + (dsos->cnt - insert) * sizeof(struct dso *)); + dsos->cnt++; + dsos->dsos[insert] = dso__get(dso); } dso__set_dsos(dso, dsos); return 0; diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index b53753dee02f..6c02f401069e 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -67,6 +67,7 @@ LANGUAGES = de fr it cs pt ka bindir ?= /usr/bin sbindir ?= /usr/sbin mandir ?= /usr/man +libdir ?= /usr/lib includedir ?= /usr/include localedir ?= /usr/share/locale docdir ?= /usr/share/doc/packages/cpupower @@ -94,15 +95,6 @@ RANLIB = $(CROSS)ranlib HOSTCC = gcc MKDIR = mkdir -# 64bit library detection -include ../../scripts/Makefile.arch - -ifeq ($(IS_64_BIT), 1) -libdir ?= /usr/lib64 -else -libdir ?= /usr/lib -endif - # Now we set up the build system # @@ -332,4 +324,39 @@ uninstall: rm -f $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \ done; -.PHONY: all utils libcpupower update-po create-gmo install-lib install-tools install-man install-gmo install uninstall clean +help: + @echo 'Building targets:' + @echo ' all - Default target. Could be omitted. Put build artifacts' + @echo ' to "O" cmdline option dir (default: current dir)' + @echo ' install - Install previously built project files from the output' + @echo ' dir defined by "O" cmdline option (default: current dir)' + @echo ' to the install dir defined by "DESTDIR" cmdline or' + @echo ' Makefile config block option (default: "")' + @echo ' install-lib - Install previously built library binary from the output' + @echo ' dir defined by "O" cmdline option (default: current dir)' + @echo ' and library headers from "lib/" for userspace to the install' + @echo ' dir defined by "DESTDIR" cmdline (default: "")' + @echo ' install-tools - Install previously built "cpupower" util from the output' + @echo ' dir defined by "O" cmdline option (default: current dir) and' + @echo ' "cpupower-completion.sh" script from the src dir to the' + @echo ' install dir defined by "DESTDIR" cmdline or Makefile' + @echo ' config block option (default: "")' + @echo ' install-man - Install man pages from the "man" src subdir to the' + @echo ' install dir defined by "DESTDIR" cmdline or Makefile' + @echo ' config block option (default: "")' + @echo ' install-gmo - Install previously built language files from the output' + @echo ' dir defined by "O" cmdline option (default: current dir)' + @echo ' to the install dir defined by "DESTDIR" cmdline or Makefile' + @echo ' config block option (default: "")' + @echo ' install-bench - Install previously built "cpufreq-bench" util files from the' + @echo ' output dir defined by "O" cmdline option (default: current dir)' + @echo ' to the install dir defined by "DESTDIR" cmdline or Makefile' + @echo ' config block option (default: "")' + @echo '' + @echo 'Cleaning targets:' + @echo ' clean - Clean build artifacts from the dir defined by "O" cmdline' + @echo ' option (default: current dir)' + @echo ' uninstall - Remove previously installed files from the dir defined by "DESTDIR"' + @echo ' cmdline or Makefile config block option (default: "")' + +.PHONY: all utils libcpupower update-po create-gmo install-lib install-tools install-man install-gmo install uninstall clean help diff --git a/tools/power/cpupower/README b/tools/power/cpupower/README index 1c68f47663b2..2678ed81d311 100644 --- a/tools/power/cpupower/README +++ b/tools/power/cpupower/README @@ -22,16 +22,156 @@ interfaces [depending on configuration, see below]. compilation and installation ---------------------------- -make -su -make install - -should suffice on most systems. It builds libcpupower to put in -/usr/lib; cpupower, cpufreq-bench_plot.sh to put in /usr/bin; and -cpufreq-bench to put in /usr/sbin. If you want to set up the paths -differently and/or want to configure the package to your specific -needs, you need to open "Makefile" with an editor of your choice and -edit the block marked CONFIGURATION. +There are 2 output directories - one for the build output and another for +the installation of the build results, that is the utility, library, +man pages, etc... + +default directory +----------------- + +In the case of default directory, build and install process requires no +additional parameters: + +build +----- + +$ make + +The output directory for the 'make' command is the current directory and +its subdirs in the kernel tree: +tools/power/cpupower + +install +------- + +$ sudo make install + +'make install' command puts targets to default system dirs: + +----------------------------------------------------------------------- +| Installing file | System dir | +----------------------------------------------------------------------- +| libcpupower | /usr/lib | +----------------------------------------------------------------------- +| cpupower | /usr/bin | +----------------------------------------------------------------------- +| cpufreq-bench_plot.sh | /usr/bin | +----------------------------------------------------------------------- +| man pages | /usr/man | +----------------------------------------------------------------------- + +To put it in other words it makes build results available system-wide, +enabling any user to simply start using it without any additional steps + +custom directory +---------------- + +There are 2 make's command-line variables 'O' and 'DESTDIR' that setup +appropriate dirs: +'O' - build directory +'DESTDIR' - installation directory. This variable could also be setup in +the 'CONFIGURATION' block of the "Makefile" + +build +----- + +$ make O=<your_custom_build_catalog> + +Example: +$ make O=/home/hedin/prj/cpupower/build + +install +------- + +$ make O=<your_custom_build_catalog> DESTDIR=<your_custom_install_catalog> + +Example: +$ make O=/home/hedin/prj/cpupower/build DESTDIR=/home/hedin/prj/cpupower \ +> install + +Notice that both variables 'O' and 'DESTDIR' have been provided. The reason +is that the build results are saved in the custom output dir defined by 'O' +variable. So, this dir is the source for the installation step. If only +'DESTDIR' were provided then the 'install' target would assume that the +build directory is the current one, build everything there and install +from the current dir. + +The files will be installed to the following dirs: + +----------------------------------------------------------------------- +| Installing file | System dir | +----------------------------------------------------------------------- +| libcpupower | ${DESTDIR}/usr/lib | +----------------------------------------------------------------------- +| cpupower | ${DESTDIR}/usr/bin | +----------------------------------------------------------------------- +| cpufreq-bench_plot.sh | ${DESTDIR}/usr/bin | +----------------------------------------------------------------------- +| man pages | ${DESTDIR}/usr/man | +----------------------------------------------------------------------- + +If you look at the table for the default 'make' output dirs you will +notice that the only difference with the non-default case is the +${DESTDIR} prefix. So, the structure of the output dirs remains the same +regardles of the root output directory. + + +clean and uninstall +------------------- + +'clean' target is intended for cleanup the build catalog from build results +'uninstall' target is intended for removing installed files from the +installation directory + +default directory +----------------- + +This case is a straightforward one: +$ make clean +$ make uninstall + +custom directory +---------------- + +Use 'O' command line variable to remove previously built files from the +build dir: +$ make O=<your_custom_build_catalog> clean + +Example: +$ make O=/home/hedin/prj/cpupower/build clean + +Use 'DESTDIR' command line variable to uninstall previously installed files +from the given dir: +$ make DESTDIR=<your_custom_install_catalog> + +Example: +make DESTDIR=/home/hedin/prj/cpupower uninstall + + +running the tool +---------------- + +default directory +----------------- + +$ sudo cpupower + +custom directory +---------------- + +When it comes to run the utility from the custom build catalog things +become a little bit complicated as 'just run' approach doesn't work. +Assuming that the current dir is '<your_custom_install_catalog>/usr', +issuing the following command: + +$ sudo ./bin/cpupower +will produce the following error output: +./bin/cpupower: error while loading shared libraries: libcpupower.so.1: +cannot open shared object file: No such file or directory + +The issue is that binary cannot find the 'libcpupower' library. So, we +shall point to the lib dir: +sudo LD_LIBRARY_PATH=lib64/ ./bin/cpupower THANKS diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile index a4b902f9e1c4..34e5894476eb 100644 --- a/tools/power/cpupower/bench/Makefile +++ b/tools/power/cpupower/bench/Makefile @@ -1,4 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 +ifeq ($(MAKELEVEL),0) +$(error This Makefile is not intended to be run standalone, but only as a part \ +of the main one in the parent dir) +endif + OUTPUT := ./ ifeq ("$(origin O)", "command line") ifneq ($(O),) diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1 index 8ee737eefa5c..89af019f8dc4 100644 --- a/tools/power/cpupower/man/cpupower-monitor.1 +++ b/tools/power/cpupower/man/cpupower-monitor.1 @@ -81,11 +81,6 @@ Measure idle and frequency characteristics of an arbitrary command/workload. The executable \fBcommand\fP is forked and upon its exit, statistics gathered since it was forked are displayed. .RE -.PP -\-v -.RS 4 -Increase verbosity if the binary was compiled with the DEBUG option set. -.RE .SH MONITOR DESCRIPTIONS .SS "Idle_Stats" @@ -172,9 +167,11 @@ displayed. "BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 14h Processors" https://support.amd.com/us/Processor_TechDocs/43170.pdf -"Intel® Turbo Boost Technology -in Intel® Coreâ„¢ Microarchitecture (Nehalem) Based Processors" -http://download.intel.com/design/processor/applnots/320354.pdf +"What Is Intel® Turbo Boost Technology?" +https://www.intel.com/content/www/us/en/gaming/resources/turbo-boost.html + +"Power Management - Technology Overview" +https://cdrdv2.intel.com/v1/dl/getContent/637748 "Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide" diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 075e766ff1f3..f746099b5dac 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -35,7 +35,7 @@ static unsigned int avail_monitors; static char *progname; enum operation_mode_e { list = 1, show, show_all }; -static int mode; +static enum operation_mode_e mode; static int interval = 1; static char *show_monitors_param; static struct cpupower_topology cpu_top; diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py index f96f50e0c336..8a3ef94fe88f 100755 --- a/tools/power/pm-graph/bootgraph.py +++ b/tools/power/pm-graph/bootgraph.py @@ -77,12 +77,12 @@ class SystemValues(aslib.SystemValues): fp.close() self.testdir = datetime.now().strftime('boot-%y%m%d-%H%M%S') def kernelVersion(self, msg): - m = re.match('^[Ll]inux *[Vv]ersion *(?P<v>\S*) .*', msg) + m = re.match(r'^[Ll]inux *[Vv]ersion *(?P<v>\S*) .*', msg) if m: return m.group('v') return 'unknown' def checkFtraceKernelVersion(self): - m = re.match('^(?P<x>[0-9]*)\.(?P<y>[0-9]*)\.(?P<z>[0-9]*).*', self.kernel) + m = re.match(r'^(?P<x>[0-9]*)\.(?P<y>[0-9]*)\.(?P<z>[0-9]*).*', self.kernel) if m: val = tuple(map(int, m.groups())) if val >= (4, 10, 0): @@ -324,7 +324,7 @@ def parseKernelLog(): idx = line.find('[') if idx > 1: line = line[idx:] - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if(not m): continue ktime = float(m.group('ktime')) @@ -332,24 +332,24 @@ def parseKernelLog(): break msg = m.group('msg') data.dmesgtext.append(line) - if(ktime == 0.0 and re.match('^Linux version .*', msg)): + if(ktime == 0.0 and re.match(r'^Linux version .*', msg)): if(not sysvals.stamp['kernel']): sysvals.stamp['kernel'] = sysvals.kernelVersion(msg) continue - m = re.match('.* setting system clock to (?P<d>[0-9\-]*)[ A-Z](?P<t>[0-9:]*) UTC.*', msg) + m = re.match(r'.* setting system clock to (?P<d>[0-9\-]*)[ A-Z](?P<t>[0-9:]*) UTC.*', msg) if(m): bt = datetime.strptime(m.group('d')+' '+m.group('t'), '%Y-%m-%d %H:%M:%S') bt = bt - timedelta(seconds=int(ktime)) data.boottime = bt.strftime('%Y-%m-%d_%H:%M:%S') sysvals.stamp['time'] = bt.strftime('%B %d %Y, %I:%M:%S %p') continue - m = re.match('^calling *(?P<f>.*)\+.* @ (?P<p>[0-9]*)', msg) + m = re.match(r'^calling *(?P<f>.*)\+.* @ (?P<p>[0-9]*)', msg) if(m): func = m.group('f') pid = int(m.group('p')) devtemp[func] = (ktime, pid) continue - m = re.match('^initcall *(?P<f>.*)\+.* returned (?P<r>.*) after (?P<t>.*) usecs', msg) + m = re.match(r'^initcall *(?P<f>.*)\+.* returned (?P<r>.*) after (?P<t>.*) usecs', msg) if(m): data.valid = True data.end = ktime @@ -359,7 +359,7 @@ def parseKernelLog(): data.newAction(phase, f, pid, start, ktime, int(r), int(t)) del devtemp[f] continue - if(re.match('^Freeing unused kernel .*', msg)): + if(re.match(r'^Freeing unused kernel .*', msg)): data.tUserMode = ktime data.dmesg['kernel']['end'] = ktime data.dmesg['user']['start'] = ktime diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index 40ad221e8881..ef87e63c05c7 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -86,7 +86,7 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.11' + version = '5.12' ansi = False rs = 0 display = '' @@ -420,11 +420,11 @@ class SystemValues: return value.format(**args) def setOutputFile(self): if self.dmesgfile != '': - m = re.match('(?P<name>.*)_dmesg\.txt.*', self.dmesgfile) + m = re.match(r'(?P<name>.*)_dmesg\.txt.*', self.dmesgfile) if(m): self.htmlfile = m.group('name')+'.html' if self.ftracefile != '': - m = re.match('(?P<name>.*)_ftrace\.txt.*', self.ftracefile) + m = re.match(r'(?P<name>.*)_ftrace\.txt.*', self.ftracefile) if(m): self.htmlfile = m.group('name')+'.html' def systemInfo(self, info): @@ -464,15 +464,15 @@ class SystemValues: if os.path.exists('/proc/cpuinfo'): with open('/proc/cpuinfo', 'r') as fp: for line in fp: - if re.match('^processor[ \t]*:[ \t]*[0-9]*', line): + if re.match(r'^processor[ \t]*:[ \t]*[0-9]*', line): self.cpucount += 1 if os.path.exists('/proc/meminfo'): with open('/proc/meminfo', 'r') as fp: for line in fp: - m = re.match('^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line) + m = re.match(r'^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line) if m: self.memtotal = int(m.group('sz')) - m = re.match('^MemFree:[ \t]*(?P<sz>[0-9]*) *kB', line) + m = re.match(r'^MemFree:[ \t]*(?P<sz>[0-9]*) *kB', line) if m: self.memfree = int(m.group('sz')) if os.path.exists('/etc/os-release'): @@ -539,7 +539,7 @@ class SystemValues: idx = line.find('[') if idx > 1: line = line[idx:] - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if(m): ktime = m.group('ktime') break @@ -553,7 +553,7 @@ class SystemValues: idx = line.find('[') if idx > 1: line = line[idx:] - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if(not m): continue ktime = float(m.group('ktime')) @@ -636,11 +636,11 @@ class SystemValues: # now process the args for arg in sorted(args): arglist[arg] = '' - m = re.match('.* '+arg+'=(?P<arg>.*) ', data); + m = re.match(r'.* '+arg+'=(?P<arg>.*) ', data); if m: arglist[arg] = m.group('arg') else: - m = re.match('.* '+arg+'=(?P<arg>.*)', data); + m = re.match(r'.* '+arg+'=(?P<arg>.*)', data); if m: arglist[arg] = m.group('arg') out = fmt.format(**arglist) @@ -989,7 +989,7 @@ class SystemValues: m = re.match(tp.ftrace_line_fmt, line) if(not m or 'device_pm_callback_start' not in line): continue - m = re.match('.*: (?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*', m.group('msg')); + m = re.match(r'.*: (?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*', m.group('msg')); if(not m): continue dev = m.group('d') @@ -999,7 +999,7 @@ class SystemValues: # now get the syspath for each target device for dirname, dirnames, filenames in os.walk('/sys/devices'): - if(re.match('.*/power', dirname) and 'async' in filenames): + if(re.match(r'.*/power', dirname) and 'async' in filenames): dev = dirname.split('/')[-2] if dev in props and (not props[dev].syspath or len(dirname) < len(props[dev].syspath)): props[dev].syspath = dirname[:-6] @@ -1143,12 +1143,12 @@ class SystemValues: elif value and os.path.exists(file): fp = open(file, 'r+') if fmt == 'radio': - m = re.match('.*\[(?P<v>.*)\].*', fp.read()) + m = re.match(r'.*\[(?P<v>.*)\].*', fp.read()) if m: self.cfgdef[file] = m.group('v') elif fmt == 'acpi': line = fp.read().strip().split('\n')[-1] - m = re.match('.* (?P<v>[0-9A-Fx]*) .*', line) + m = re.match(r'.* (?P<v>[0-9A-Fx]*) .*', line) if m: self.cfgdef[file] = m.group('v') else: @@ -1173,7 +1173,7 @@ class SystemValues: fp = Popen([cmd, '-v'], stdout=PIPE, stderr=PIPE).stderr out = ascii(fp.read()).strip() fp.close() - if re.match('turbostat version .*', out): + if re.match(r'turbostat version .*', out): self.vprint(out) return True return False @@ -1181,33 +1181,33 @@ class SystemValues: cmd = self.getExec('turbostat') rawout = keyline = valline = '' fullcmd = '%s -q -S echo freeze > %s' % (cmd, self.powerfile) - fp = Popen(['sh', '-c', fullcmd], stdout=PIPE, stderr=PIPE).stderr - for line in fp: + fp = Popen(['sh', '-c', fullcmd], stdout=PIPE, stderr=PIPE) + for line in fp.stderr: line = ascii(line) rawout += line if keyline and valline: continue - if re.match('(?i)Avg_MHz.*', line): + if re.match(r'(?i)Avg_MHz.*', line): keyline = line.strip().split() elif keyline: valline = line.strip().split() - fp.close() + fp.wait() if not keyline or not valline or len(keyline) != len(valline): errmsg = 'unrecognized turbostat output:\n'+rawout.strip() self.vprint(errmsg) if not self.verbose: pprint(errmsg) - return '' + return (fp.returncode, '') if self.verbose: pprint(rawout.strip()) out = [] for key in keyline: idx = keyline.index(key) val = valline[idx] - if key == 'SYS%LPI' and not s0ixready and re.match('^[0\.]*$', val): + if key == 'SYS%LPI' and not s0ixready and re.match(r'^[0\.]*$', val): continue out.append('%s=%s' % (key, val)) - return '|'.join(out) + return (fp.returncode, '|'.join(out)) def netfixon(self, net='both'): cmd = self.getExec('netfix') if not cmd: @@ -1232,7 +1232,7 @@ class SystemValues: except: return '' for line in reversed(w.split('\n')): - m = re.match(' *(?P<dev>.*): (?P<stat>[0-9a-f]*) .*', line) + m = re.match(r' *(?P<dev>.*): (?P<stat>[0-9a-f]*) .*', line) if not m or (dev and dev != m.group('dev')): continue return m.group('dev') @@ -1261,14 +1261,14 @@ class SystemValues: return arr = msg.split() for j in range(len(arr)): - if re.match('^[0-9,\-\.]*$', arr[j]): - arr[j] = '[0-9,\-\.]*' + if re.match(r'^[0-9,\-\.]*$', arr[j]): + arr[j] = r'[0-9,\-\.]*' else: arr[j] = arr[j]\ - .replace('\\', '\\\\').replace(']', '\]').replace('[', '\[')\ - .replace('.', '\.').replace('+', '\+').replace('*', '\*')\ - .replace('(', '\(').replace(')', '\)').replace('}', '\}')\ - .replace('{', '\{') + .replace('\\', r'\\\\').replace(']', r'\]').replace('[', r'\[')\ + .replace('.', r'\.').replace('+', r'\+').replace('*', r'\*')\ + .replace('(', r'\(').replace(')', r'\)').replace('}', r'\}')\ + .replace('{', r'\{') mstr = ' *'.join(arr) entry = { 'line': msg, @@ -1340,7 +1340,7 @@ class SystemValues: fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout ret = 'unknown' for line in fp: - m = re.match('[\s]*Monitor is (?P<m>.*)', ascii(line)) + m = re.match(r'[\s]*Monitor is (?P<m>.*)', ascii(line)) if(m and len(m.group('m')) >= 2): out = m.group('m').lower() ret = out[3:] if out[0:2] == 'in' else out @@ -1566,7 +1566,7 @@ class Data: i += 1 if tp.stampInfo(line, sysvals): continue - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if not m: continue t = float(m.group('ktime')) @@ -1574,7 +1574,7 @@ class Data: continue dir = 'suspend' if t < self.tSuspended else 'resume' msg = m.group('msg') - if re.match('capability: warning: .*', msg): + if re.match(r'capability: warning: .*', msg): continue for err in self.errlist: if re.match(self.errlist[err], msg): @@ -1679,8 +1679,8 @@ class Data: ubiquitous = False if kprobename in dtf and 'ub' in dtf[kprobename]: ubiquitous = True - mc = re.match('\(.*\) *(?P<args>.*)', cdata) - mr = re.match('\((?P<caller>\S*).* arg1=(?P<ret>.*)', rdata) + mc = re.match(r'\(.*\) *(?P<args>.*)', cdata) + mr = re.match(r'\((?P<caller>\S*).* arg1=(?P<ret>.*)', rdata) if mc and mr: c = mr.group('caller').split('+')[0] a = mc.group('args').strip() @@ -1997,7 +1997,7 @@ class Data: list = self.dmesg[phase]['list'] mydev = '' for devname in sorted(list): - if name == devname or re.match('^%s\[(?P<num>[0-9]*)\]$' % name, devname): + if name == devname or re.match(r'^%s\[(?P<num>[0-9]*)\]$' % name, devname): mydev = devname if mydev: return list[mydev] @@ -2099,7 +2099,7 @@ class Data: for dev in sorted(list): pdev = list[dev]['par'] pid = list[dev]['pid'] - if(pid < 0 or re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): + if(pid < 0 or re.match(r'[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): continue if pdev and pdev not in real and pdev not in rootlist: rootlist.append(pdev) @@ -2190,26 +2190,26 @@ class Data: if 'resume_complete' in dm: dm['resume_complete']['end'] = time def initcall_debug_call(self, line, quick=False): - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ - 'PM: *calling .* @ (?P<n>.*), parent: (?P<p>.*)', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ + r'PM: *calling .* @ (?P<n>.*), parent: (?P<p>.*)', line) if not m: - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ - 'calling .* @ (?P<n>.*), parent: (?P<p>.*)', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ + r'calling .* @ (?P<n>.*), parent: (?P<p>.*)', line) if not m: - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) calling '+\ - '(?P<f>.*)\+ @ (?P<n>.*), parent: (?P<p>.*)', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) calling '+\ + r'(?P<f>.*)\+ @ (?P<n>.*), parent: (?P<p>.*)', line) if m: return True if quick else m.group('t', 'f', 'n', 'p') return False if quick else ('', '', '', '') def initcall_debug_return(self, line, quick=False): - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: PM: '+\ - '.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: PM: '+\ + r'.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line) if not m: - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ - '.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) .* (?P<f>.*)\: '+\ + r'.* returned (?P<r>[0-9]*) after (?P<dt>[0-9]*) usecs', line) if not m: - m = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) call '+\ - '(?P<f>.*)\+ returned .* after (?P<dt>.*) usecs', line) + m = re.match(r'.*(\[ *)(?P<t>[0-9\.]*)(\]) call '+\ + r'(?P<f>.*)\+ returned .* after (?P<dt>.*) usecs', line) if m: return True if quick else m.group('t', 'f', 'dt') return False if quick else ('', '', '') @@ -2294,28 +2294,28 @@ class FTraceLine: if not m and not d: return # is this a trace event - if(d == 'traceevent' or re.match('^ *\/\* *(?P<msg>.*) \*\/ *$', m)): + if(d == 'traceevent' or re.match(r'^ *\/\* *(?P<msg>.*) \*\/ *$', m)): if(d == 'traceevent'): # nop format trace event msg = m else: # function_graph format trace event - em = re.match('^ *\/\* *(?P<msg>.*) \*\/ *$', m) + em = re.match(r'^ *\/\* *(?P<msg>.*) \*\/ *$', m) msg = em.group('msg') - emm = re.match('^(?P<call>.*?): (?P<msg>.*)', msg) + emm = re.match(r'^(?P<call>.*?): (?P<msg>.*)', msg) if(emm): self.name = emm.group('msg') self.type = emm.group('call') else: self.name = msg - km = re.match('^(?P<n>.*)_cal$', self.type) + km = re.match(r'^(?P<n>.*)_cal$', self.type) if km: self.fcall = True self.fkprobe = True self.type = km.group('n') return - km = re.match('^(?P<n>.*)_ret$', self.type) + km = re.match(r'^(?P<n>.*)_ret$', self.type) if km: self.freturn = True self.fkprobe = True @@ -2327,7 +2327,7 @@ class FTraceLine: if(d): self.length = float(d)/1000000 # the indentation determines the depth - match = re.match('^(?P<d> *)(?P<o>.*)$', m) + match = re.match(r'^(?P<d> *)(?P<o>.*)$', m) if(not match): return self.depth = self.getDepth(match.group('d')) @@ -2337,7 +2337,7 @@ class FTraceLine: self.freturn = True if(len(m) > 1): # includes comment with function name - match = re.match('^} *\/\* *(?P<n>.*) *\*\/$', m) + match = re.match(r'^} *\/\* *(?P<n>.*) *\*\/$', m) if(match): self.name = match.group('n').strip() # function call @@ -2345,13 +2345,13 @@ class FTraceLine: self.fcall = True # function call with children if(m[-1] == '{'): - match = re.match('^(?P<n>.*) *\(.*', m) + match = re.match(r'^(?P<n>.*) *\(.*', m) if(match): self.name = match.group('n').strip() # function call with no children (leaf) elif(m[-1] == ';'): self.freturn = True - match = re.match('^(?P<n>.*) *\(.*', m) + match = re.match(r'^(?P<n>.*) *\(.*', m) if(match): self.name = match.group('n').strip() # something else (possibly a trace marker) @@ -2385,7 +2385,7 @@ class FTraceLine: return False else: if(self.type == 'suspend_resume' and - re.match('suspend_enter\[.*\] begin', self.name)): + re.match(r'suspend_enter\[.*\] begin', self.name)): return True return False def endMarker(self): @@ -2398,7 +2398,7 @@ class FTraceLine: return False else: if(self.type == 'suspend_resume' and - re.match('thaw_processes\[.*\] end', self.name)): + re.match(r'thaw_processes\[.*\] end', self.name)): return True return False @@ -2976,30 +2976,30 @@ class Timeline: # Description: # A list of values describing the properties of these test runs class TestProps: - stampfmt = '# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\ - '(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\ - ' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$' - wififmt = '^# wifi *(?P<d>\S*) *(?P<s>\S*) *(?P<t>[0-9\.]+).*' - tstatfmt = '^# turbostat (?P<t>\S*)' - testerrfmt = '^# enter_sleep_error (?P<e>.*)' - sysinfofmt = '^# sysinfo .*' - cmdlinefmt = '^# command \| (?P<cmd>.*)' - kparamsfmt = '^# kparams \| (?P<kp>.*)' - devpropfmt = '# Device Properties: .*' - pinfofmt = '# platform-(?P<val>[a-z,A-Z,0-9,_]*): (?P<info>.*)' - tracertypefmt = '# tracer: (?P<t>.*)' - firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$' - procexecfmt = 'ps - (?P<ps>.*)$' - procmultifmt = '@(?P<n>[0-9]*)\|(?P<ps>.*)$' + stampfmt = r'# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\ + r'(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\ + r' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$' + wififmt = r'^# wifi *(?P<d>\S*) *(?P<s>\S*) *(?P<t>[0-9\.]+).*' + tstatfmt = r'^# turbostat (?P<t>\S*)' + testerrfmt = r'^# enter_sleep_error (?P<e>.*)' + sysinfofmt = r'^# sysinfo .*' + cmdlinefmt = r'^# command \| (?P<cmd>.*)' + kparamsfmt = r'^# kparams \| (?P<kp>.*)' + devpropfmt = r'# Device Properties: .*' + pinfofmt = r'# platform-(?P<val>[a-z,A-Z,0-9,_]*): (?P<info>.*)' + tracertypefmt = r'# tracer: (?P<t>.*)' + firmwarefmt = r'# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$' + procexecfmt = r'ps - (?P<ps>.*)$' + procmultifmt = r'@(?P<n>[0-9]*)\|(?P<ps>.*)$' ftrace_line_fmt_fg = \ - '^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\ - ' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\ - '[ +!#\*@$]*(?P<dur>[0-9\.]*) .*\| (?P<msg>.*)' + r'^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\ + r' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\ + r'[ +!#\*@$]*(?P<dur>[0-9\.]*) .*\| (?P<msg>.*)' ftrace_line_fmt_nop = \ - ' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\ - '(?P<flags>\S*) *(?P<time>[0-9\.]*): *'+\ - '(?P<msg>.*)' - machinesuspend = 'machine_suspend\[.*' + r' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\ + r'(?P<flags>\S*) *(?P<time>[0-9\.]*): *'+\ + r'(?P<msg>.*)' + machinesuspend = r'machine_suspend\[.*' multiproclist = dict() multiproctime = 0.0 multiproccnt = 0 @@ -3081,14 +3081,14 @@ class TestProps: sv.hostname = data.stamp['host'] sv.suspendmode = data.stamp['mode'] if sv.suspendmode == 'freeze': - self.machinesuspend = 'timekeeping_freeze\[.*' + self.machinesuspend = r'timekeeping_freeze\[.*' else: - self.machinesuspend = 'machine_suspend\[.*' + self.machinesuspend = r'machine_suspend\[.*' if sv.suspendmode == 'command' and sv.ftracefile != '': modes = ['on', 'freeze', 'standby', 'mem', 'disk'] fp = sv.openlog(sv.ftracefile, 'r') for line in fp: - m = re.match('.* machine_suspend\[(?P<mode>.*)\]', line) + m = re.match(r'.* machine_suspend\[(?P<mode>.*)\]', line) if m and m.group('mode') in ['1', '2', '3', '4']: sv.suspendmode = modes[int(m.group('mode'))] data.stamp['mode'] = sv.suspendmode @@ -3401,9 +3401,9 @@ def loadTraceLog(): for i in range(len(blk)): if 'SUSPEND START' in blk[i][3]: first.append(i) - elif re.match('.* timekeeping_freeze.*begin', blk[i][3]): + elif re.match(r'.* timekeeping_freeze.*begin', blk[i][3]): last.append(i) - elif re.match('.* timekeeping_freeze.*end', blk[i][3]): + elif re.match(r'.* timekeeping_freeze.*end', blk[i][3]): first.append(i) elif 'RESUME COMPLETE' in blk[i][3]: last.append(i) @@ -3514,28 +3514,28 @@ def parseTraceLog(live=False): if(t.fevent): if(t.type == 'suspend_resume'): # suspend_resume trace events have two types, begin and end - if(re.match('(?P<name>.*) begin$', t.name)): + if(re.match(r'(?P<name>.*) begin$', t.name)): isbegin = True - elif(re.match('(?P<name>.*) end$', t.name)): + elif(re.match(r'(?P<name>.*) end$', t.name)): isbegin = False else: continue if '[' in t.name: - m = re.match('(?P<name>.*)\[.*', t.name) + m = re.match(r'(?P<name>.*)\[.*', t.name) else: - m = re.match('(?P<name>.*) .*', t.name) + m = re.match(r'(?P<name>.*) .*', t.name) name = m.group('name') # ignore these events if(name.split('[')[0] in tracewatch): continue # -- phase changes -- # start of kernel suspend - if(re.match('suspend_enter\[.*', t.name)): + if(re.match(r'suspend_enter\[.*', t.name)): if(isbegin and data.tKernSus == 0): data.tKernSus = t.time continue # suspend_prepare start - elif(re.match('dpm_prepare\[.*', t.name)): + elif(re.match(r'dpm_prepare\[.*', t.name)): if isbegin and data.first_suspend_prepare: data.first_suspend_prepare = False if data.tKernSus == 0: @@ -3544,15 +3544,15 @@ def parseTraceLog(live=False): phase = data.setPhase('suspend_prepare', t.time, isbegin) continue # suspend start - elif(re.match('dpm_suspend\[.*', t.name)): + elif(re.match(r'dpm_suspend\[.*', t.name)): phase = data.setPhase('suspend', t.time, isbegin) continue # suspend_late start - elif(re.match('dpm_suspend_late\[.*', t.name)): + elif(re.match(r'dpm_suspend_late\[.*', t.name)): phase = data.setPhase('suspend_late', t.time, isbegin) continue # suspend_noirq start - elif(re.match('dpm_suspend_noirq\[.*', t.name)): + elif(re.match(r'dpm_suspend_noirq\[.*', t.name)): phase = data.setPhase('suspend_noirq', t.time, isbegin) continue # suspend_machine/resume_machine @@ -3589,19 +3589,19 @@ def parseTraceLog(live=False): data.tResumed = t.time continue # resume_noirq start - elif(re.match('dpm_resume_noirq\[.*', t.name)): + elif(re.match(r'dpm_resume_noirq\[.*', t.name)): phase = data.setPhase('resume_noirq', t.time, isbegin) continue # resume_early start - elif(re.match('dpm_resume_early\[.*', t.name)): + elif(re.match(r'dpm_resume_early\[.*', t.name)): phase = data.setPhase('resume_early', t.time, isbegin) continue # resume start - elif(re.match('dpm_resume\[.*', t.name)): + elif(re.match(r'dpm_resume\[.*', t.name)): phase = data.setPhase('resume', t.time, isbegin) continue # resume complete start - elif(re.match('dpm_complete\[.*', t.name)): + elif(re.match(r'dpm_complete\[.*', t.name)): phase = data.setPhase('resume_complete', t.time, isbegin) continue # skip trace events inside devices calls @@ -3635,7 +3635,7 @@ def parseTraceLog(live=False): elif(t.type == 'device_pm_callback_start'): if phase not in data.dmesg: continue - m = re.match('(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\ + m = re.match(r'(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\ t.name); if(not m): continue @@ -3650,7 +3650,7 @@ def parseTraceLog(live=False): elif(t.type == 'device_pm_callback_end'): if phase not in data.dmesg: continue - m = re.match('(?P<drv>.*) (?P<d>.*), err.*', t.name); + m = re.match(r'(?P<drv>.*) (?P<d>.*), err.*', t.name); if(not m): continue n = m.group('d') @@ -3904,24 +3904,24 @@ def loadKernelLog(): line = line[idx:] if tp.stampInfo(line, sysvals): continue - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if(not m): continue msg = m.group("msg") - if re.match('PM: Syncing filesystems.*', msg) or \ - re.match('PM: suspend entry.*', msg): + if re.match(r'PM: Syncing filesystems.*', msg) or \ + re.match(r'PM: suspend entry.*', msg): if(data): testruns.append(data) data = Data(len(testruns)) tp.parseStamp(data, sysvals) if(not data): continue - m = re.match('.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg) + m = re.match(r'.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg) if(m): sysvals.stamp['kernel'] = m.group('k') - m = re.match('PM: Preparing system for (?P<m>.*) sleep', msg) + m = re.match(r'PM: Preparing system for (?P<m>.*) sleep', msg) if not m: - m = re.match('PM: Preparing system for sleep \((?P<m>.*)\)', msg) + m = re.match(r'PM: Preparing system for sleep \((?P<m>.*)\)', msg) if m: sysvals.stamp['mode'] = sysvals.suspendmode = m.group('m') data.dmesgtext.append(line) @@ -3984,7 +3984,7 @@ def parseKernelLog(data): 'resume_machine': ['[PM: ]*Timekeeping suspended for.*', 'ACPI: Low-level resume complete.*', 'ACPI: resume from mwait', - 'Suspended for [0-9\.]* seconds'], + r'Suspended for [0-9\.]* seconds'], 'resume_noirq': ['PM: resume from suspend-to-idle', 'ACPI: Waking up from system sleep state.*'], 'resume_early': ['PM: noirq resume of devices complete after.*', @@ -3993,7 +3993,7 @@ def parseKernelLog(data): 'PM: early restore of devices complete after.*'], 'resume_complete': ['PM: resume of devices complete after.*', 'PM: restore of devices complete after.*'], - 'post_resume': ['.*Restarting tasks \.\.\..*'], + 'post_resume': [r'.*Restarting tasks \.\.\..*'], } # action table (expected events that occur and show up in dmesg) @@ -4021,7 +4021,7 @@ def parseKernelLog(data): actions = dict() for line in data.dmesgtext: # parse each dmesg line into the time and message - m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + m = re.match(r'[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) if(m): val = m.group('ktime') try: @@ -4145,26 +4145,26 @@ def parseKernelLog(data): if(a in actions and actions[a][-1]['begin'] == actions[a][-1]['end']): actions[a][-1]['end'] = ktime # now look for CPU on/off events - if(re.match('Disabling non-boot CPUs .*', msg)): + if(re.match(r'Disabling non-boot CPUs .*', msg)): # start of first cpu suspend cpu_start = ktime - elif(re.match('Enabling non-boot CPUs .*', msg)): + elif(re.match(r'Enabling non-boot CPUs .*', msg)): # start of first cpu resume cpu_start = ktime - elif(re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg) \ - or re.match('psci: CPU(?P<cpu>[0-9]*) killed.*', msg)): + elif(re.match(r'smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg) \ + or re.match(r'psci: CPU(?P<cpu>[0-9]*) killed.*', msg)): # end of a cpu suspend, start of the next - m = re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg) + m = re.match(r'smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg) if(not m): - m = re.match('psci: CPU(?P<cpu>[0-9]*) killed.*', msg) + m = re.match(r'psci: CPU(?P<cpu>[0-9]*) killed.*', msg) cpu = 'CPU'+m.group('cpu') if(cpu not in actions): actions[cpu] = [] actions[cpu].append({'begin': cpu_start, 'end': ktime}) cpu_start = ktime - elif(re.match('CPU(?P<cpu>[0-9]*) is up', msg)): + elif(re.match(r'CPU(?P<cpu>[0-9]*) is up', msg)): # end of a cpu resume, start of the next - m = re.match('CPU(?P<cpu>[0-9]*) is up', msg) + m = re.match(r'CPU(?P<cpu>[0-9]*) is up', msg) cpu = 'CPU'+m.group('cpu') if(cpu not in actions): actions[cpu] = [] @@ -4343,7 +4343,8 @@ def createHTMLSummarySimple(testruns, htmlfile, title): list[mode]['data'].append([data['host'], data['kernel'], data['time'], tVal[0], tVal[1], data['url'], res, data['issues'], data['sus_worst'], data['sus_worsttime'], - data['res_worst'], data['res_worsttime'], pkgpc10, syslpi, wifi]) + data['res_worst'], data['res_worsttime'], pkgpc10, syslpi, wifi, + (data['fullmode'] if 'fullmode' in data else mode)]) idx = len(list[mode]['data']) - 1 if res.startswith('fail in'): res = 'fail' @@ -4449,7 +4450,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): elif idx == iMed[i]: tHigh[i] = ' id="%smed" class=medval title="Median"' % tag html += td.format("%d" % (list[mode]['data'].index(d) + 1)) # row - html += td.format(mode) # mode + html += td.format(d[15]) # mode html += td.format(d[0]) # host html += td.format(d[1]) # kernel html += td.format(d[2]) # time @@ -5061,6 +5062,7 @@ def addCSS(hf, sv, testcount=1, kerror=False, extra=''): def addScriptCode(hf, testruns): t0 = testruns[0].start * 1000 tMax = testruns[-1].end * 1000 + hf.write('<script type="text/javascript">\n'); # create an array in javascript memory with the device details detail = ' var devtable = [];\n' for data in testruns: @@ -5068,384 +5070,383 @@ def addScriptCode(hf, testruns): detail += ' devtable[%d] = "%s";\n' % (data.testnumber, topo) detail += ' var bounds = [%f,%f];\n' % (t0, tMax) # add the code which will manipulate the data in the browser - script_code = \ - '<script type="text/javascript">\n'+detail+\ - ' var resolution = -1;\n'\ - ' var dragval = [0, 0];\n'\ - ' function redrawTimescale(t0, tMax, tS) {\n'\ - ' var rline = \'<div class="t" style="left:0;border-left:1px solid black;border-right:0;">\';\n'\ - ' var tTotal = tMax - t0;\n'\ - ' var list = document.getElementsByClassName("tblock");\n'\ - ' for (var i = 0; i < list.length; i++) {\n'\ - ' var timescale = list[i].getElementsByClassName("timescale")[0];\n'\ - ' var m0 = t0 + (tTotal*parseFloat(list[i].style.left)/100);\n'\ - ' var mTotal = tTotal*parseFloat(list[i].style.width)/100;\n'\ - ' var mMax = m0 + mTotal;\n'\ - ' var html = "";\n'\ - ' var divTotal = Math.floor(mTotal/tS) + 1;\n'\ - ' if(divTotal > 1000) continue;\n'\ - ' var divEdge = (mTotal - tS*(divTotal-1))*100/mTotal;\n'\ - ' var pos = 0.0, val = 0.0;\n'\ - ' for (var j = 0; j < divTotal; j++) {\n'\ - ' var htmlline = "";\n'\ - ' var mode = list[i].id[5];\n'\ - ' if(mode == "s") {\n'\ - ' pos = 100 - (((j)*tS*100)/mTotal) - divEdge;\n'\ - ' val = (j-divTotal+1)*tS;\n'\ - ' if(j == divTotal - 1)\n'\ - ' htmlline = \'<div class="t" style="right:\'+pos+\'%"><cS>S→</cS></div>\';\n'\ - ' else\n'\ - ' htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\ - ' } else {\n'\ - ' pos = 100 - (((j)*tS*100)/mTotal);\n'\ - ' val = (j)*tS;\n'\ - ' htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\ - ' if(j == 0)\n'\ - ' if(mode == "r")\n'\ - ' htmlline = rline+"<cS>←R</cS></div>";\n'\ - ' else\n'\ - ' htmlline = rline+"<cS>0ms</div>";\n'\ - ' }\n'\ - ' html += htmlline;\n'\ - ' }\n'\ - ' timescale.innerHTML = html;\n'\ - ' }\n'\ - ' }\n'\ - ' function zoomTimeline() {\n'\ - ' var dmesg = document.getElementById("dmesg");\n'\ - ' var zoombox = document.getElementById("dmesgzoombox");\n'\ - ' var left = zoombox.scrollLeft;\n'\ - ' var val = parseFloat(dmesg.style.width);\n'\ - ' var newval = 100;\n'\ - ' var sh = window.outerWidth / 2;\n'\ - ' if(this.id == "zoomin") {\n'\ - ' newval = val * 1.2;\n'\ - ' if(newval > 910034) newval = 910034;\n'\ - ' dmesg.style.width = newval+"%";\n'\ - ' zoombox.scrollLeft = ((left + sh) * newval / val) - sh;\n'\ - ' } else if (this.id == "zoomout") {\n'\ - ' newval = val / 1.2;\n'\ - ' if(newval < 100) newval = 100;\n'\ - ' dmesg.style.width = newval+"%";\n'\ - ' zoombox.scrollLeft = ((left + sh) * newval / val) - sh;\n'\ - ' } else {\n'\ - ' zoombox.scrollLeft = 0;\n'\ - ' dmesg.style.width = "100%";\n'\ - ' }\n'\ - ' var tS = [10000, 5000, 2000, 1000, 500, 200, 100, 50, 20, 10, 5, 2, 1];\n'\ - ' var t0 = bounds[0];\n'\ - ' var tMax = bounds[1];\n'\ - ' var tTotal = tMax - t0;\n'\ - ' var wTotal = tTotal * 100.0 / newval;\n'\ - ' var idx = 7*window.innerWidth/1100;\n'\ - ' for(var i = 0; (i < tS.length)&&((wTotal / tS[i]) < idx); i++);\n'\ - ' if(i >= tS.length) i = tS.length - 1;\n'\ - ' if(tS[i] == resolution) return;\n'\ - ' resolution = tS[i];\n'\ - ' redrawTimescale(t0, tMax, tS[i]);\n'\ - ' }\n'\ - ' function deviceName(title) {\n'\ - ' var name = title.slice(0, title.indexOf(" ("));\n'\ - ' return name;\n'\ - ' }\n'\ - ' function deviceHover() {\n'\ - ' var name = deviceName(this.title);\n'\ - ' var dmesg = document.getElementById("dmesg");\n'\ - ' var dev = dmesg.getElementsByClassName("thread");\n'\ - ' var cpu = -1;\n'\ - ' if(name.match("CPU_ON\[[0-9]*\]"))\n'\ - ' cpu = parseInt(name.slice(7));\n'\ - ' else if(name.match("CPU_OFF\[[0-9]*\]"))\n'\ - ' cpu = parseInt(name.slice(8));\n'\ - ' for (var i = 0; i < dev.length; i++) {\n'\ - ' dname = deviceName(dev[i].title);\n'\ - ' var cname = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\ - ' if((cpu >= 0 && dname.match("CPU_O[NF]*\\\[*"+cpu+"\\\]")) ||\n'\ - ' (name == dname))\n'\ - ' {\n'\ - ' dev[i].className = "hover "+cname;\n'\ - ' } else {\n'\ - ' dev[i].className = cname;\n'\ - ' }\n'\ - ' }\n'\ - ' }\n'\ - ' function deviceUnhover() {\n'\ - ' var dmesg = document.getElementById("dmesg");\n'\ - ' var dev = dmesg.getElementsByClassName("thread");\n'\ - ' for (var i = 0; i < dev.length; i++) {\n'\ - ' dev[i].className = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\ - ' }\n'\ - ' }\n'\ - ' function deviceTitle(title, total, cpu) {\n'\ - ' var prefix = "Total";\n'\ - ' if(total.length > 3) {\n'\ - ' prefix = "Average";\n'\ - ' total[1] = (total[1]+total[3])/2;\n'\ - ' total[2] = (total[2]+total[4])/2;\n'\ - ' }\n'\ - ' var devtitle = document.getElementById("devicedetailtitle");\n'\ - ' var name = deviceName(title);\n'\ - ' if(cpu >= 0) name = "CPU"+cpu;\n'\ - ' var driver = "";\n'\ - ' var tS = "<t2>(</t2>";\n'\ - ' var tR = "<t2>)</t2>";\n'\ - ' if(total[1] > 0)\n'\ - ' tS = "<t2>("+prefix+" Suspend:</t2><t0> "+total[1].toFixed(3)+" ms</t0> ";\n'\ - ' if(total[2] > 0)\n'\ - ' tR = " <t2>"+prefix+" Resume:</t2><t0> "+total[2].toFixed(3)+" ms<t2>)</t2></t0>";\n'\ - ' var s = title.indexOf("{");\n'\ - ' var e = title.indexOf("}");\n'\ - ' if((s >= 0) && (e >= 0))\n'\ - ' driver = title.slice(s+1, e) + " <t1>@</t1> ";\n'\ - ' if(total[1] > 0 && total[2] > 0)\n'\ - ' devtitle.innerHTML = "<t0>"+driver+name+"</t0> "+tS+tR;\n'\ - ' else\n'\ - ' devtitle.innerHTML = "<t0>"+title+"</t0>";\n'\ - ' return name;\n'\ - ' }\n'\ - ' function deviceDetail() {\n'\ - ' var devinfo = document.getElementById("devicedetail");\n'\ - ' devinfo.style.display = "block";\n'\ - ' var name = deviceName(this.title);\n'\ - ' var cpu = -1;\n'\ - ' if(name.match("CPU_ON\[[0-9]*\]"))\n'\ - ' cpu = parseInt(name.slice(7));\n'\ - ' else if(name.match("CPU_OFF\[[0-9]*\]"))\n'\ - ' cpu = parseInt(name.slice(8));\n'\ - ' var dmesg = document.getElementById("dmesg");\n'\ - ' var dev = dmesg.getElementsByClassName("thread");\n'\ - ' var idlist = [];\n'\ - ' var pdata = [[]];\n'\ - ' if(document.getElementById("devicedetail1"))\n'\ - ' pdata = [[], []];\n'\ - ' var pd = pdata[0];\n'\ - ' var total = [0.0, 0.0, 0.0];\n'\ - ' for (var i = 0; i < dev.length; i++) {\n'\ - ' dname = deviceName(dev[i].title);\n'\ - ' if((cpu >= 0 && dname.match("CPU_O[NF]*\\\[*"+cpu+"\\\]")) ||\n'\ - ' (name == dname))\n'\ - ' {\n'\ - ' idlist[idlist.length] = dev[i].id;\n'\ - ' var tidx = 1;\n'\ - ' if(dev[i].id[0] == "a") {\n'\ - ' pd = pdata[0];\n'\ - ' } else {\n'\ - ' if(pdata.length == 1) pdata[1] = [];\n'\ - ' if(total.length == 3) total[3]=total[4]=0.0;\n'\ - ' pd = pdata[1];\n'\ - ' tidx = 3;\n'\ - ' }\n'\ - ' var info = dev[i].title.split(" ");\n'\ - ' var pname = info[info.length-1];\n'\ - ' pd[pname] = parseFloat(info[info.length-3].slice(1));\n'\ - ' total[0] += pd[pname];\n'\ - ' if(pname.indexOf("suspend") >= 0)\n'\ - ' total[tidx] += pd[pname];\n'\ - ' else\n'\ - ' total[tidx+1] += pd[pname];\n'\ - ' }\n'\ - ' }\n'\ - ' var devname = deviceTitle(this.title, total, cpu);\n'\ - ' var left = 0.0;\n'\ - ' for (var t = 0; t < pdata.length; t++) {\n'\ - ' pd = pdata[t];\n'\ - ' devinfo = document.getElementById("devicedetail"+t);\n'\ - ' var phases = devinfo.getElementsByClassName("phaselet");\n'\ - ' for (var i = 0; i < phases.length; i++) {\n'\ - ' if(phases[i].id in pd) {\n'\ - ' var w = 100.0*pd[phases[i].id]/total[0];\n'\ - ' var fs = 32;\n'\ - ' if(w < 8) fs = 4*w | 0;\n'\ - ' var fs2 = fs*3/4;\n'\ - ' phases[i].style.width = w+"%";\n'\ - ' phases[i].style.left = left+"%";\n'\ - ' phases[i].title = phases[i].id+" "+pd[phases[i].id]+" ms";\n'\ - ' left += w;\n'\ - ' var time = "<t4 style=\\"font-size:"+fs+"px\\">"+pd[phases[i].id]+" ms<br></t4>";\n'\ - ' var pname = "<t3 style=\\"font-size:"+fs2+"px\\">"+phases[i].id.replace(new RegExp("_", "g"), " ")+"</t3>";\n'\ - ' phases[i].innerHTML = time+pname;\n'\ - ' } else {\n'\ - ' phases[i].style.width = "0%";\n'\ - ' phases[i].style.left = left+"%";\n'\ - ' }\n'\ - ' }\n'\ - ' }\n'\ - ' if(typeof devstats !== \'undefined\')\n'\ - ' callDetail(this.id, this.title);\n'\ - ' var cglist = document.getElementById("callgraphs");\n'\ - ' if(!cglist) return;\n'\ - ' var cg = cglist.getElementsByClassName("atop");\n'\ - ' if(cg.length < 10) return;\n'\ - ' for (var i = 0; i < cg.length; i++) {\n'\ - ' cgid = cg[i].id.split("x")[0]\n'\ - ' if(idlist.indexOf(cgid) >= 0) {\n'\ - ' cg[i].style.display = "block";\n'\ - ' } else {\n'\ - ' cg[i].style.display = "none";\n'\ - ' }\n'\ - ' }\n'\ - ' }\n'\ - ' function callDetail(devid, devtitle) {\n'\ - ' if(!(devid in devstats) || devstats[devid].length < 1)\n'\ - ' return;\n'\ - ' var list = devstats[devid];\n'\ - ' var tmp = devtitle.split(" ");\n'\ - ' var name = tmp[0], phase = tmp[tmp.length-1];\n'\ - ' var dd = document.getElementById(phase);\n'\ - ' var total = parseFloat(tmp[1].slice(1));\n'\ - ' var mlist = [];\n'\ - ' var maxlen = 0;\n'\ - ' var info = []\n'\ - ' for(var i in list) {\n'\ - ' if(list[i][0] == "@") {\n'\ - ' info = list[i].split("|");\n'\ - ' continue;\n'\ - ' }\n'\ - ' var tmp = list[i].split("|");\n'\ - ' var t = parseFloat(tmp[0]), f = tmp[1], c = parseInt(tmp[2]);\n'\ - ' var p = (t*100.0/total).toFixed(2);\n'\ - ' mlist[mlist.length] = [f, c, t.toFixed(2), p+"%"];\n'\ - ' if(f.length > maxlen)\n'\ - ' maxlen = f.length;\n'\ - ' }\n'\ - ' var pad = 5;\n'\ - ' if(mlist.length == 0) pad = 30;\n'\ - ' var html = \'<div style="padding-top:\'+pad+\'px"><t3> <b>\'+name+\':</b>\';\n'\ - ' if(info.length > 2)\n'\ - ' html += " start=<b>"+info[1]+"</b>, end=<b>"+info[2]+"</b>";\n'\ - ' if(info.length > 3)\n'\ - ' html += ", length<i>(w/o overhead)</i>=<b>"+info[3]+" ms</b>";\n'\ - ' if(info.length > 4)\n'\ - ' html += ", return=<b>"+info[4]+"</b>";\n'\ - ' html += "</t3></div>";\n'\ - ' if(mlist.length > 0) {\n'\ - ' html += \'<table class=fstat style="padding-top:\'+(maxlen*5)+\'px;"><tr><th>Function</th>\';\n'\ - ' for(var i in mlist)\n'\ - ' html += "<td class=vt>"+mlist[i][0]+"</td>";\n'\ - ' html += "</tr><tr><th>Calls</th>";\n'\ - ' for(var i in mlist)\n'\ - ' html += "<td>"+mlist[i][1]+"</td>";\n'\ - ' html += "</tr><tr><th>Time(ms)</th>";\n'\ - ' for(var i in mlist)\n'\ - ' html += "<td>"+mlist[i][2]+"</td>";\n'\ - ' html += "</tr><tr><th>Percent</th>";\n'\ - ' for(var i in mlist)\n'\ - ' html += "<td>"+mlist[i][3]+"</td>";\n'\ - ' html += "</tr></table>";\n'\ - ' }\n'\ - ' dd.innerHTML = html;\n'\ - ' var height = (maxlen*5)+100;\n'\ - ' dd.style.height = height+"px";\n'\ - ' document.getElementById("devicedetail").style.height = height+"px";\n'\ - ' }\n'\ - ' function callSelect() {\n'\ - ' var cglist = document.getElementById("callgraphs");\n'\ - ' if(!cglist) return;\n'\ - ' var cg = cglist.getElementsByClassName("atop");\n'\ - ' for (var i = 0; i < cg.length; i++) {\n'\ - ' if(this.id == cg[i].id) {\n'\ - ' cg[i].style.display = "block";\n'\ - ' } else {\n'\ - ' cg[i].style.display = "none";\n'\ - ' }\n'\ - ' }\n'\ - ' }\n'\ - ' function devListWindow(e) {\n'\ - ' var win = window.open();\n'\ - ' var html = "<title>"+e.target.innerHTML+"</title>"+\n'\ - ' "<style type=\\"text/css\\">"+\n'\ - ' " ul {list-style-type:circle;padding-left:10px;margin-left:10px;}"+\n'\ - ' "</style>"\n'\ - ' var dt = devtable[0];\n'\ - ' if(e.target.id != "devlist1")\n'\ - ' dt = devtable[1];\n'\ - ' win.document.write(html+dt);\n'\ - ' }\n'\ - ' function errWindow() {\n'\ - ' var range = this.id.split("_");\n'\ - ' var idx1 = parseInt(range[0]);\n'\ - ' var idx2 = parseInt(range[1]);\n'\ - ' var win = window.open();\n'\ - ' var log = document.getElementById("dmesglog");\n'\ - ' var title = "<title>dmesg log</title>";\n'\ - ' var text = log.innerHTML.split("\\n");\n'\ - ' var html = "";\n'\ - ' for(var i = 0; i < text.length; i++) {\n'\ - ' if(i == idx1) {\n'\ - ' html += "<e id=target>"+text[i]+"</e>\\n";\n'\ - ' } else if(i > idx1 && i <= idx2) {\n'\ - ' html += "<e>"+text[i]+"</e>\\n";\n'\ - ' } else {\n'\ - ' html += text[i]+"\\n";\n'\ - ' }\n'\ - ' }\n'\ - ' win.document.write("<style>e{color:red}</style>"+title+"<pre>"+html+"</pre>");\n'\ - ' win.location.hash = "#target";\n'\ - ' win.document.close();\n'\ - ' }\n'\ - ' function logWindow(e) {\n'\ - ' var name = e.target.id.slice(4);\n'\ - ' var win = window.open();\n'\ - ' var log = document.getElementById(name+"log");\n'\ - ' var title = "<title>"+document.title.split(" ")[0]+" "+name+" log</title>";\n'\ - ' win.document.write(title+"<pre>"+log.innerHTML+"</pre>");\n'\ - ' win.document.close();\n'\ - ' }\n'\ - ' function onMouseDown(e) {\n'\ - ' dragval[0] = e.clientX;\n'\ - ' dragval[1] = document.getElementById("dmesgzoombox").scrollLeft;\n'\ - ' document.onmousemove = onMouseMove;\n'\ - ' }\n'\ - ' function onMouseMove(e) {\n'\ - ' var zoombox = document.getElementById("dmesgzoombox");\n'\ - ' zoombox.scrollLeft = dragval[1] + dragval[0] - e.clientX;\n'\ - ' }\n'\ - ' function onMouseUp(e) {\n'\ - ' document.onmousemove = null;\n'\ - ' }\n'\ - ' function onKeyPress(e) {\n'\ - ' var c = e.charCode;\n'\ - ' if(c != 42 && c != 43 && c != 45) return;\n'\ - ' var click = document.createEvent("Events");\n'\ - ' click.initEvent("click", true, false);\n'\ - ' if(c == 43) \n'\ - ' document.getElementById("zoomin").dispatchEvent(click);\n'\ - ' else if(c == 45)\n'\ - ' document.getElementById("zoomout").dispatchEvent(click);\n'\ - ' else if(c == 42)\n'\ - ' document.getElementById("zoomdef").dispatchEvent(click);\n'\ - ' }\n'\ - ' window.addEventListener("resize", function () {zoomTimeline();});\n'\ - ' window.addEventListener("load", function () {\n'\ - ' var dmesg = document.getElementById("dmesg");\n'\ - ' dmesg.style.width = "100%"\n'\ - ' dmesg.onmousedown = onMouseDown;\n'\ - ' document.onmouseup = onMouseUp;\n'\ - ' document.onkeypress = onKeyPress;\n'\ - ' document.getElementById("zoomin").onclick = zoomTimeline;\n'\ - ' document.getElementById("zoomout").onclick = zoomTimeline;\n'\ - ' document.getElementById("zoomdef").onclick = zoomTimeline;\n'\ - ' var list = document.getElementsByClassName("err");\n'\ - ' for (var i = 0; i < list.length; i++)\n'\ - ' list[i].onclick = errWindow;\n'\ - ' var list = document.getElementsByClassName("logbtn");\n'\ - ' for (var i = 0; i < list.length; i++)\n'\ - ' list[i].onclick = logWindow;\n'\ - ' list = document.getElementsByClassName("devlist");\n'\ - ' for (var i = 0; i < list.length; i++)\n'\ - ' list[i].onclick = devListWindow;\n'\ - ' var dev = dmesg.getElementsByClassName("thread");\n'\ - ' for (var i = 0; i < dev.length; i++) {\n'\ - ' dev[i].onclick = deviceDetail;\n'\ - ' dev[i].onmouseover = deviceHover;\n'\ - ' dev[i].onmouseout = deviceUnhover;\n'\ - ' }\n'\ - ' var dev = dmesg.getElementsByClassName("srccall");\n'\ - ' for (var i = 0; i < dev.length; i++)\n'\ - ' dev[i].onclick = callSelect;\n'\ - ' zoomTimeline();\n'\ - ' });\n'\ - '</script>\n' + hf.write(detail); + script_code = r""" var resolution = -1; + var dragval = [0, 0]; + function redrawTimescale(t0, tMax, tS) { + var rline = '<div class="t" style="left:0;border-left:1px solid black;border-right:0;">'; + var tTotal = tMax - t0; + var list = document.getElementsByClassName("tblock"); + for (var i = 0; i < list.length; i++) { + var timescale = list[i].getElementsByClassName("timescale")[0]; + var m0 = t0 + (tTotal*parseFloat(list[i].style.left)/100); + var mTotal = tTotal*parseFloat(list[i].style.width)/100; + var mMax = m0 + mTotal; + var html = ""; + var divTotal = Math.floor(mTotal/tS) + 1; + if(divTotal > 1000) continue; + var divEdge = (mTotal - tS*(divTotal-1))*100/mTotal; + var pos = 0.0, val = 0.0; + for (var j = 0; j < divTotal; j++) { + var htmlline = ""; + var mode = list[i].id[5]; + if(mode == "s") { + pos = 100 - (((j)*tS*100)/mTotal) - divEdge; + val = (j-divTotal+1)*tS; + if(j == divTotal - 1) + htmlline = '<div class="t" style="right:'+pos+'%"><cS>S→</cS></div>'; + else + htmlline = '<div class="t" style="right:'+pos+'%">'+val+'ms</div>'; + } else { + pos = 100 - (((j)*tS*100)/mTotal); + val = (j)*tS; + htmlline = '<div class="t" style="right:'+pos+'%">'+val+'ms</div>'; + if(j == 0) + if(mode == "r") + htmlline = rline+"<cS>←R</cS></div>"; + else + htmlline = rline+"<cS>0ms</div>"; + } + html += htmlline; + } + timescale.innerHTML = html; + } + } + function zoomTimeline() { + var dmesg = document.getElementById("dmesg"); + var zoombox = document.getElementById("dmesgzoombox"); + var left = zoombox.scrollLeft; + var val = parseFloat(dmesg.style.width); + var newval = 100; + var sh = window.outerWidth / 2; + if(this.id == "zoomin") { + newval = val * 1.2; + if(newval > 910034) newval = 910034; + dmesg.style.width = newval+"%"; + zoombox.scrollLeft = ((left + sh) * newval / val) - sh; + } else if (this.id == "zoomout") { + newval = val / 1.2; + if(newval < 100) newval = 100; + dmesg.style.width = newval+"%"; + zoombox.scrollLeft = ((left + sh) * newval / val) - sh; + } else { + zoombox.scrollLeft = 0; + dmesg.style.width = "100%"; + } + var tS = [10000, 5000, 2000, 1000, 500, 200, 100, 50, 20, 10, 5, 2, 1]; + var t0 = bounds[0]; + var tMax = bounds[1]; + var tTotal = tMax - t0; + var wTotal = tTotal * 100.0 / newval; + var idx = 7*window.innerWidth/1100; + for(var i = 0; (i < tS.length)&&((wTotal / tS[i]) < idx); i++); + if(i >= tS.length) i = tS.length - 1; + if(tS[i] == resolution) return; + resolution = tS[i]; + redrawTimescale(t0, tMax, tS[i]); + } + function deviceName(title) { + var name = title.slice(0, title.indexOf(" (")); + return name; + } + function deviceHover() { + var name = deviceName(this.title); + var dmesg = document.getElementById("dmesg"); + var dev = dmesg.getElementsByClassName("thread"); + var cpu = -1; + if(name.match("CPU_ON\[[0-9]*\]")) + cpu = parseInt(name.slice(7)); + else if(name.match("CPU_OFF\[[0-9]*\]")) + cpu = parseInt(name.slice(8)); + for (var i = 0; i < dev.length; i++) { + dname = deviceName(dev[i].title); + var cname = dev[i].className.slice(dev[i].className.indexOf("thread")); + if((cpu >= 0 && dname.match("CPU_O[NF]*\\[*"+cpu+"\\]")) || + (name == dname)) + { + dev[i].className = "hover "+cname; + } else { + dev[i].className = cname; + } + } + } + function deviceUnhover() { + var dmesg = document.getElementById("dmesg"); + var dev = dmesg.getElementsByClassName("thread"); + for (var i = 0; i < dev.length; i++) { + dev[i].className = dev[i].className.slice(dev[i].className.indexOf("thread")); + } + } + function deviceTitle(title, total, cpu) { + var prefix = "Total"; + if(total.length > 3) { + prefix = "Average"; + total[1] = (total[1]+total[3])/2; + total[2] = (total[2]+total[4])/2; + } + var devtitle = document.getElementById("devicedetailtitle"); + var name = deviceName(title); + if(cpu >= 0) name = "CPU"+cpu; + var driver = ""; + var tS = "<t2>(</t2>"; + var tR = "<t2>)</t2>"; + if(total[1] > 0) + tS = "<t2>("+prefix+" Suspend:</t2><t0> "+total[1].toFixed(3)+" ms</t0> "; + if(total[2] > 0) + tR = " <t2>"+prefix+" Resume:</t2><t0> "+total[2].toFixed(3)+" ms<t2>)</t2></t0>"; + var s = title.indexOf("{"); + var e = title.indexOf("}"); + if((s >= 0) && (e >= 0)) + driver = title.slice(s+1, e) + " <t1>@</t1> "; + if(total[1] > 0 && total[2] > 0) + devtitle.innerHTML = "<t0>"+driver+name+"</t0> "+tS+tR; + else + devtitle.innerHTML = "<t0>"+title+"</t0>"; + return name; + } + function deviceDetail() { + var devinfo = document.getElementById("devicedetail"); + devinfo.style.display = "block"; + var name = deviceName(this.title); + var cpu = -1; + if(name.match("CPU_ON\[[0-9]*\]")) + cpu = parseInt(name.slice(7)); + else if(name.match("CPU_OFF\[[0-9]*\]")) + cpu = parseInt(name.slice(8)); + var dmesg = document.getElementById("dmesg"); + var dev = dmesg.getElementsByClassName("thread"); + var idlist = []; + var pdata = [[]]; + if(document.getElementById("devicedetail1")) + pdata = [[], []]; + var pd = pdata[0]; + var total = [0.0, 0.0, 0.0]; + for (var i = 0; i < dev.length; i++) { + dname = deviceName(dev[i].title); + if((cpu >= 0 && dname.match("CPU_O[NF]*\\[*"+cpu+"\\]")) || + (name == dname)) + { + idlist[idlist.length] = dev[i].id; + var tidx = 1; + if(dev[i].id[0] == "a") { + pd = pdata[0]; + } else { + if(pdata.length == 1) pdata[1] = []; + if(total.length == 3) total[3]=total[4]=0.0; + pd = pdata[1]; + tidx = 3; + } + var info = dev[i].title.split(" "); + var pname = info[info.length-1]; + pd[pname] = parseFloat(info[info.length-3].slice(1)); + total[0] += pd[pname]; + if(pname.indexOf("suspend") >= 0) + total[tidx] += pd[pname]; + else + total[tidx+1] += pd[pname]; + } + } + var devname = deviceTitle(this.title, total, cpu); + var left = 0.0; + for (var t = 0; t < pdata.length; t++) { + pd = pdata[t]; + devinfo = document.getElementById("devicedetail"+t); + var phases = devinfo.getElementsByClassName("phaselet"); + for (var i = 0; i < phases.length; i++) { + if(phases[i].id in pd) { + var w = 100.0*pd[phases[i].id]/total[0]; + var fs = 32; + if(w < 8) fs = 4*w | 0; + var fs2 = fs*3/4; + phases[i].style.width = w+"%"; + phases[i].style.left = left+"%"; + phases[i].title = phases[i].id+" "+pd[phases[i].id]+" ms"; + left += w; + var time = "<t4 style=\"font-size:"+fs+"px\">"+pd[phases[i].id]+" ms<br></t4>"; + var pname = "<t3 style=\"font-size:"+fs2+"px\">"+phases[i].id.replace(new RegExp("_", "g"), " ")+"</t3>"; + phases[i].innerHTML = time+pname; + } else { + phases[i].style.width = "0%"; + phases[i].style.left = left+"%"; + } + } + } + if(typeof devstats !== 'undefined') + callDetail(this.id, this.title); + var cglist = document.getElementById("callgraphs"); + if(!cglist) return; + var cg = cglist.getElementsByClassName("atop"); + if(cg.length < 10) return; + for (var i = 0; i < cg.length; i++) { + cgid = cg[i].id.split("x")[0] + if(idlist.indexOf(cgid) >= 0) { + cg[i].style.display = "block"; + } else { + cg[i].style.display = "none"; + } + } + } + function callDetail(devid, devtitle) { + if(!(devid in devstats) || devstats[devid].length < 1) + return; + var list = devstats[devid]; + var tmp = devtitle.split(" "); + var name = tmp[0], phase = tmp[tmp.length-1]; + var dd = document.getElementById(phase); + var total = parseFloat(tmp[1].slice(1)); + var mlist = []; + var maxlen = 0; + var info = [] + for(var i in list) { + if(list[i][0] == "@") { + info = list[i].split("|"); + continue; + } + var tmp = list[i].split("|"); + var t = parseFloat(tmp[0]), f = tmp[1], c = parseInt(tmp[2]); + var p = (t*100.0/total).toFixed(2); + mlist[mlist.length] = [f, c, t.toFixed(2), p+"%"]; + if(f.length > maxlen) + maxlen = f.length; + } + var pad = 5; + if(mlist.length == 0) pad = 30; + var html = '<div style="padding-top:'+pad+'px"><t3> <b>'+name+':</b>'; + if(info.length > 2) + html += " start=<b>"+info[1]+"</b>, end=<b>"+info[2]+"</b>"; + if(info.length > 3) + html += ", length<i>(w/o overhead)</i>=<b>"+info[3]+" ms</b>"; + if(info.length > 4) + html += ", return=<b>"+info[4]+"</b>"; + html += "</t3></div>"; + if(mlist.length > 0) { + html += '<table class=fstat style="padding-top:'+(maxlen*5)+'px;"><tr><th>Function</th>'; + for(var i in mlist) + html += "<td class=vt>"+mlist[i][0]+"</td>"; + html += "</tr><tr><th>Calls</th>"; + for(var i in mlist) + html += "<td>"+mlist[i][1]+"</td>"; + html += "</tr><tr><th>Time(ms)</th>"; + for(var i in mlist) + html += "<td>"+mlist[i][2]+"</td>"; + html += "</tr><tr><th>Percent</th>"; + for(var i in mlist) + html += "<td>"+mlist[i][3]+"</td>"; + html += "</tr></table>"; + } + dd.innerHTML = html; + var height = (maxlen*5)+100; + dd.style.height = height+"px"; + document.getElementById("devicedetail").style.height = height+"px"; + } + function callSelect() { + var cglist = document.getElementById("callgraphs"); + if(!cglist) return; + var cg = cglist.getElementsByClassName("atop"); + for (var i = 0; i < cg.length; i++) { + if(this.id == cg[i].id) { + cg[i].style.display = "block"; + } else { + cg[i].style.display = "none"; + } + } + } + function devListWindow(e) { + var win = window.open(); + var html = "<title>"+e.target.innerHTML+"</title>"+ + "<style type=\"text/css\">"+ + " ul {list-style-type:circle;padding-left:10px;margin-left:10px;}"+ + "</style>" + var dt = devtable[0]; + if(e.target.id != "devlist1") + dt = devtable[1]; + win.document.write(html+dt); + } + function errWindow() { + var range = this.id.split("_"); + var idx1 = parseInt(range[0]); + var idx2 = parseInt(range[1]); + var win = window.open(); + var log = document.getElementById("dmesglog"); + var title = "<title>dmesg log</title>"; + var text = log.innerHTML.split("\n"); + var html = ""; + for(var i = 0; i < text.length; i++) { + if(i == idx1) { + html += "<e id=target>"+text[i]+"</e>\n"; + } else if(i > idx1 && i <= idx2) { + html += "<e>"+text[i]+"</e>\n"; + } else { + html += text[i]+"\n"; + } + } + win.document.write("<style>e{color:red}</style>"+title+"<pre>"+html+"</pre>"); + win.location.hash = "#target"; + win.document.close(); + } + function logWindow(e) { + var name = e.target.id.slice(4); + var win = window.open(); + var log = document.getElementById(name+"log"); + var title = "<title>"+document.title.split(" ")[0]+" "+name+" log</title>"; + win.document.write(title+"<pre>"+log.innerHTML+"</pre>"); + win.document.close(); + } + function onMouseDown(e) { + dragval[0] = e.clientX; + dragval[1] = document.getElementById("dmesgzoombox").scrollLeft; + document.onmousemove = onMouseMove; + } + function onMouseMove(e) { + var zoombox = document.getElementById("dmesgzoombox"); + zoombox.scrollLeft = dragval[1] + dragval[0] - e.clientX; + } + function onMouseUp(e) { + document.onmousemove = null; + } + function onKeyPress(e) { + var c = e.charCode; + if(c != 42 && c != 43 && c != 45) return; + var click = document.createEvent("Events"); + click.initEvent("click", true, false); + if(c == 43) + document.getElementById("zoomin").dispatchEvent(click); + else if(c == 45) + document.getElementById("zoomout").dispatchEvent(click); + else if(c == 42) + document.getElementById("zoomdef").dispatchEvent(click); + } + window.addEventListener("resize", function () {zoomTimeline();}); + window.addEventListener("load", function () { + var dmesg = document.getElementById("dmesg"); + dmesg.style.width = "100%" + dmesg.onmousedown = onMouseDown; + document.onmouseup = onMouseUp; + document.onkeypress = onKeyPress; + document.getElementById("zoomin").onclick = zoomTimeline; + document.getElementById("zoomout").onclick = zoomTimeline; + document.getElementById("zoomdef").onclick = zoomTimeline; + var list = document.getElementsByClassName("err"); + for (var i = 0; i < list.length; i++) + list[i].onclick = errWindow; + var list = document.getElementsByClassName("logbtn"); + for (var i = 0; i < list.length; i++) + list[i].onclick = logWindow; + list = document.getElementsByClassName("devlist"); + for (var i = 0; i < list.length; i++) + list[i].onclick = devListWindow; + var dev = dmesg.getElementsByClassName("thread"); + for (var i = 0; i < dev.length; i++) { + dev[i].onclick = deviceDetail; + dev[i].onmouseover = deviceHover; + dev[i].onmouseout = deviceUnhover; + } + var dev = dmesg.getElementsByClassName("srccall"); + for (var i = 0; i < dev.length; i++) + dev[i].onclick = callSelect; + zoomTimeline(); + }); +</script> """ hf.write(script_code); # Function: executeSuspend @@ -5524,7 +5525,9 @@ def executeSuspend(quiet=False): if ((mode == 'freeze') or (sv.memmode == 's2idle')) \ and sv.haveTurbostat(): # execution will pause here - turbo = sv.turbostat(s0ixready) + retval, turbo = sv.turbostat(s0ixready) + if retval != 0: + tdata['error'] ='turbostat returned %d' % retval if turbo: tdata['turbo'] = turbo else: @@ -5532,6 +5535,7 @@ def executeSuspend(quiet=False): pf.write(mode) # execution will pause here try: + pf.flush() pf.close() except Exception as e: tdata['error'] = str(e) @@ -5633,7 +5637,7 @@ def deviceInfo(output=''): tgtval = 'runtime_status' lines = dict() for dirname, dirnames, filenames in os.walk('/sys/devices'): - if(not re.match('.*/power', dirname) or + if(not re.match(r'.*/power', dirname) or 'control' not in filenames or tgtval not in filenames): continue @@ -5702,6 +5706,40 @@ def getModes(): fp.close() return modes +def dmidecode_backup(out, fatal=False): + cpath, spath, info = '/proc/cpuinfo', '/sys/class/dmi/id', { + 'bios-vendor': 'bios_vendor', + 'bios-version': 'bios_version', + 'bios-release-date': 'bios_date', + 'system-manufacturer': 'sys_vendor', + 'system-product-name': 'product_name', + 'system-version': 'product_version', + 'system-serial-number': 'product_serial', + 'baseboard-manufacturer': 'board_vendor', + 'baseboard-product-name': 'board_name', + 'baseboard-version': 'board_version', + 'baseboard-serial-number': 'board_serial', + 'chassis-manufacturer': 'chassis_vendor', + 'chassis-version': 'chassis_version', + 'chassis-serial-number': 'chassis_serial', + } + for key in info: + if key not in out: + val = sysvals.getVal(os.path.join(spath, info[key])).strip() + if val and val.lower() != 'to be filled by o.e.m.': + out[key] = val + if 'processor-version' not in out and os.path.exists(cpath): + with open(cpath, 'r') as fp: + for line in fp: + m = re.match(r'^model\s*name\s*\:\s*(?P<c>.*)', line) + if m: + out['processor-version'] = m.group('c').strip() + break + if fatal and len(out) < 1: + doError('dmidecode failed to get info from %s or %s' % \ + (sysvals.mempath, spath)) + return out + # Function: dmidecode # Description: # Read the bios tables and pull out system info @@ -5712,6 +5750,8 @@ def getModes(): # A dict object with all available key/values def dmidecode(mempath, fatal=False): out = dict() + if(not (os.path.exists(mempath) and os.access(mempath, os.R_OK))): + return dmidecode_backup(out, fatal) # the list of values to retrieve, with hardcoded (type, idx) info = { @@ -5727,24 +5767,14 @@ def dmidecode(mempath, fatal=False): 'baseboard-version': (2, 6), 'baseboard-serial-number': (2, 7), 'chassis-manufacturer': (3, 4), - 'chassis-type': (3, 5), 'chassis-version': (3, 6), 'chassis-serial-number': (3, 7), 'processor-manufacturer': (4, 7), 'processor-version': (4, 16), } - if(not os.path.exists(mempath)): - if(fatal): - doError('file does not exist: %s' % mempath) - return out - if(not os.access(mempath, os.R_OK)): - if(fatal): - doError('file is not readable: %s' % mempath) - return out # by default use legacy scan, but try to use EFI first - memaddr = 0xf0000 - memsize = 0x10000 + memaddr, memsize = 0xf0000, 0x10000 for ep in ['/sys/firmware/efi/systab', '/proc/efi/systab']: if not os.path.exists(ep) or not os.access(ep, os.R_OK): continue @@ -5765,11 +5795,7 @@ def dmidecode(mempath, fatal=False): fp.seek(memaddr) buf = fp.read(memsize) except: - if(fatal): - doError('DMI table is unreachable, sorry') - else: - pprint('WARNING: /dev/mem is not readable, ignoring DMI data') - return out + return dmidecode_backup(out, fatal) fp.close() # search for either an SM table or DMI table @@ -5785,10 +5811,7 @@ def dmidecode(mempath, fatal=False): break i += 16 if base == 0 and length == 0 and num == 0: - if(fatal): - doError('Neither SMBIOS nor DMI were found') - else: - return out + return dmidecode_backup(out, fatal) # read in the SM or DMI table try: @@ -5796,11 +5819,7 @@ def dmidecode(mempath, fatal=False): fp.seek(base) buf = fp.read(length) except: - if(fatal): - doError('DMI table is unreachable, sorry') - else: - pprint('WARNING: /dev/mem is not readable, ignoring DMI data') - return out + return dmidecode_backup(out, fatal) fp.close() # scan the table for the values we want @@ -6272,7 +6291,10 @@ def find_in_html(html, start, end, firstonly=True): return out def data_from_html(file, outpath, issues, fulldetail=False): - html = open(file, 'r').read() + try: + html = open(file, 'r').read() + except: + html = ascii(open(file, 'rb').read()) sysvals.htmlfile = os.path.relpath(file, outpath) # extract general info suspend = find_in_html(html, 'Kernel Suspend', 'ms') @@ -6290,7 +6312,7 @@ def data_from_html(file, outpath, issues, fulldetail=False): tstr = dt.strftime('%Y/%m/%d %H:%M:%S') error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>') if error: - m = re.match('[a-z0-9]* failed in (?P<p>\S*).*', error) + m = re.match(r'[a-z0-9]* failed in (?P<p>\S*).*', error) if m: result = 'fail in %s' % m.group('p') else: @@ -6307,8 +6329,9 @@ def data_from_html(file, outpath, issues, fulldetail=False): d.end = 999999999 d.dmesgtext = log.split('\n') tp = d.extractErrorInfo() - for msg in tp.msglist: - sysvals.errorSummary(issues, msg) + if len(issues) < 100: + for msg in tp.msglist: + sysvals.errorSummary(issues, msg) if stmp[2] == 'freeze': extra = d.turbostatInfo() elist = dict() @@ -6325,6 +6348,11 @@ def data_from_html(file, outpath, issues, fulldetail=False): line = find_in_html(log, '# netfix ', '\n') if line: extra['netfix'] = line + line = find_in_html(log, '# command ', '\n') + if line: + m = re.match(r'.* -m (?P<m>\S*).*', line) + if m: + extra['fullmode'] = m.group('m') low = find_in_html(html, 'freeze time: <b>', ' ms</b>') for lowstr in ['waking', '+']: if not low: @@ -6334,7 +6362,7 @@ def data_from_html(file, outpath, issues, fulldetail=False): if lowstr == '+': issue = 'S2LOOPx%d' % len(low.split('+')) else: - m = re.match('.*waking *(?P<n>[0-9]*) *times.*', low) + m = re.match(r'.*waking *(?P<n>[0-9]*) *times.*', low) issue = 'S2WAKEx%s' % m.group('n') if m else 'S2WAKExNaN' match = [i for i in issues if i['match'] == issue] if len(match) > 0: @@ -6352,10 +6380,10 @@ def data_from_html(file, outpath, issues, fulldetail=False): # extract device info devices = dict() for line in html.split('\n'): - m = re.match(' *<div id=\"[a,0-9]*\" *title=\"(?P<title>.*)\" class=\"thread.*', line) + m = re.match(r' *<div id=\"[a,0-9]*\" *title=\"(?P<title>.*)\" class=\"thread.*', line) if not m or 'thread kth' in line or 'thread sec' in line: continue - m = re.match('(?P<n>.*) \((?P<t>[0-9,\.]*) ms\) (?P<p>.*)', m.group('title')) + m = re.match(r'(?P<n>.*) \((?P<t>[0-9,\.]*) ms\) (?P<p>.*)', m.group('title')) if not m: continue name, time, phase = m.group('n'), m.group('t'), m.group('p') @@ -6416,9 +6444,9 @@ def genHtml(subdir, force=False): for filename in filenames: file = os.path.join(dirname, filename) if sysvals.usable(file): - if(re.match('.*_dmesg.txt', filename)): + if(re.match(r'.*_dmesg.txt', filename)): sysvals.dmesgfile = file - elif(re.match('.*_ftrace.txt', filename)): + elif(re.match(r'.*_ftrace.txt', filename)): sysvals.ftracefile = file sysvals.setOutputFile() if (sysvals.dmesgfile or sysvals.ftracefile) and sysvals.htmlfile and \ @@ -6441,7 +6469,7 @@ def runSummary(subdir, local=True, genhtml=False): desc = {'host':[],'mode':[],'kernel':[]} for dirname, dirnames, filenames in os.walk(subdir): for filename in filenames: - if(not re.match('.*.html', filename)): + if(not re.match(r'.*.html', filename)): continue data = data_from_html(os.path.join(dirname, filename), outpath, issues) if(not data): diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index 5899c27c2e2e..5127be34869e 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -16,7 +16,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.19"; +static const char *version_str = "v1.20"; static const int supported_api_ver = 3; static struct isst_if_platform_info isst_platform_info; diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index 05efffbca3b7..e05561d00458 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -283,6 +283,8 @@ int isst_set_trl(struct isst_id *id, unsigned long long trl) return 0; } +#define MSR_TRL_FREQ_MULTIPLIER 100 + int isst_set_trl_from_current_tdp(struct isst_id *id, unsigned long long trl) { unsigned long long msr_trl; @@ -310,6 +312,10 @@ int isst_set_trl_from_current_tdp(struct isst_id *id, unsigned long long trl) for (i = 0; i < 8; ++i) { unsigned long long _trl = trl[i]; + /* MSR is always in 100 MHz unit */ + if (isst_get_disp_freq_multiplier() == 1) + _trl /= MSR_TRL_FREQ_MULTIPLIER; + msr_trl |= (_trl << (i * 8)); } } diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 2d6dce2c8f77..b1e6817f1e54 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -14,6 +14,7 @@ turbostat : turbostat.c override CFLAGS += -O2 -Wall -Wextra -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' +override CFLAGS += -DBUILD_BUG_HEADER='"../../../../include/linux/build_bug.h"' override CFLAGS += -D_FILE_OFFSET_BITS=64 override CFLAGS += -D_FORTIFY_SOURCE=2 @@ -44,10 +45,13 @@ snapshot: turbostat @echo "#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h @echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h + @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h + @echo PWD=. > $(SNAPSHOT)/Makefile @echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile @echo "CFLAGS += -DINTEL_FAMILY_HEADER='\"intel-family.h\"'" >> $(SNAPSHOT)/Makefile - @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile + @echo "CFLAGS += -DBUILD_BUG_HEADER='\"build_bug.h\"'" >> $(SNAPSHOT)/Makefile + @sed -e's/.*MSRHEADER.*//' -e's/.*INTEL_FAMILY_HEADER.*//' -e's/.*BUILD_BUG_HEADER.*//' Makefile >> $(SNAPSHOT)/Makefile @rm -f $(SNAPSHOT).tar.gz tar cvzf $(SNAPSHOT).tar.gz $(SNAPSHOT) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 8cdf41906e98..9f5d053d4bc6 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -10,6 +10,7 @@ #define _GNU_SOURCE #include MSRHEADER #include INTEL_FAMILY_HEADER +#include BUILD_BUG_HEADER #include <stdarg.h> #include <stdio.h> #include <err.h> @@ -38,7 +39,6 @@ #include <stdbool.h> #include <assert.h> #include <linux/kernel.h> -#include <linux/build_bug.h> #define UNUSED(x) (void)(x) @@ -5695,9 +5695,6 @@ static void probe_intel_uncore_frequency_cluster(void) if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK)) return; - if (quiet) - return; - for (uncore_max_id = 0;; ++uncore_max_id) { sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", uncore_max_id); @@ -5727,6 +5724,14 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/fabric_cluster_id", path_base); cluster_id = read_sysfs_int(path); + sprintf(path, "%s/current_freq_khz", path_base); + sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); + + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); + + if (quiet) + continue; + sprintf(path, "%s/min_freq_khz", path_base); k = read_sysfs_int(path); sprintf(path, "%s/max_freq_khz", path_base); @@ -5743,11 +5748,6 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/current_freq_khz", path_base); k = read_sysfs_int(path); fprintf(outf, " %d MHz\n", k / 1000); - - sprintf(path, "%s/current_freq_khz", path_base); - sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); - - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); } } @@ -8424,7 +8424,7 @@ void cmdline(int argc, char **argv) * Parse some options early, because they may make other options invalid, * like adding the MSR counter with --add and at the same time using --no-msr. */ - while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) { + while ((opt = getopt_long_only(argc, argv, "MPn:", long_options, &option_index)) != -1) { switch (opt) { case 'M': no_msr = 1; diff --git a/tools/rcu/rcu-updaters.sh b/tools/rcu/rcu-updaters.sh new file mode 100755 index 000000000000..4ef1397927bb --- /dev/null +++ b/tools/rcu/rcu-updaters.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# +# Run bpftrace to obtain a histogram of the types of primitives used to +# initiate RCU grace periods. The count associated with rcu_gp_init() +# is the number of normal (non-expedited) grace periods. +# +# Usage: rcu-updaters.sh [ duration-in-seconds ] +# +# Note that not all kernel builds have all of these functions. In those +# that do not, this script will issue a diagnostic for each that is not +# found, but continue normally for the rest of the functions. + +duration=${1} +if test -n "${duration}" +then + exitclause='interval:s:'"${duration}"' { exit(); }' +else + echo 'Hit control-C to end sample and print results.' +fi +bpftrace -e 'kprobe:kvfree_call_rcu, + kprobe:call_rcu, + kprobe:call_rcu_tasks, + kprobe:call_rcu_tasks_rude, + kprobe:call_rcu_tasks_trace, + kprobe:call_srcu, + kprobe:rcu_barrier, + kprobe:rcu_barrier_tasks, + kprobe:rcu_barrier_tasks_rude, + kprobe:rcu_barrier_tasks_trace, + kprobe:srcu_barrier, + kprobe:synchronize_rcu, + kprobe:synchronize_rcu_expedited, + kprobe:synchronize_rcu_tasks, + kprobe:synchronize_rcu_tasks_rude, + kprobe:synchronize_rcu_tasks_trace, + kprobe:synchronize_srcu, + kprobe:synchronize_srcu_expedited, + kprobe:get_state_synchronize_rcu, + kprobe:get_state_synchronize_rcu_full, + kprobe:start_poll_synchronize_rcu, + kprobe:start_poll_synchronize_rcu_expedited, + kprobe:start_poll_synchronize_rcu_full, + kprobe:start_poll_synchronize_rcu_expedited_full, + kprobe:poll_state_synchronize_rcu, + kprobe:poll_state_synchronize_rcu_full, + kprobe:cond_synchronize_rcu, + kprobe:cond_synchronize_rcu_full, + kprobe:start_poll_synchronize_srcu, + kprobe:poll_state_synchronize_srcu, + kprobe:rcu_gp_init + { @counts[func] = count(); } '"${exitclause}" diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 3482248aa344..90d5afd52dd0 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -630,11 +630,15 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) { struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL); + struct device *dev = &port->dev; if (!cxlhdm) return ERR_PTR(-ENOMEM); cxlhdm->port = port; + cxlhdm->interleave_mask = ~0U; + cxlhdm->iw_cap_mask = ~0UL; + dev_set_drvdata(dev, cxlhdm); return cxlhdm; } diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9039f3709aff..06eed383fdc0 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -21,6 +21,7 @@ TARGETS += drivers/net TARGETS += drivers/net/bonding TARGETS += drivers/net/team TARGETS += drivers/net/virtio_net +TARGETS += drivers/platform/x86/intel/ifs TARGETS += dt TARGETS += efivarfs TARGETS += exec diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c index abe4d58d731d..4c941270d8de 100644 --- a/tools/testing/selftests/arm64/abi/ptrace.c +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -47,7 +47,7 @@ static void test_tpidr(pid_t child) /* ...write a new value.. */ write_iov.iov_len = sizeof(uint64_t); - write_val[0] = read_val[0]++; + write_val[0] = read_val[0] + 1; ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov); ksft_test_result(ret == 0, "write_tpidr_one\n"); diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index 00e52c966281..8362e7ec35ad 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -2,6 +2,7 @@ fp-pidbench fp-ptrace fp-stress fpsimd-test +kernel-test rdvl-sme rdvl-sve sve-probe-vls diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index 55d4f00d9e8e..d171021e4cdd 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -12,6 +12,7 @@ TEST_GEN_PROGS := \ vec-syscfg \ za-fork za-ptrace TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \ + kernel-test \ rdvl-sme rdvl-sve \ sve-test \ ssve-test \ diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index dd31647b00a2..faac24bdefeb 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -319,6 +319,19 @@ static void start_fpsimd(struct child_data *child, int cpu, int copy) ksft_print_msg("Started %s\n", child->name); } +static void start_kernel(struct child_data *child, int cpu, int copy) +{ + int ret; + + ret = asprintf(&child->name, "KERNEL-%d-%d", cpu, copy); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + child_start(child, "./kernel-test"); + + ksft_print_msg("Started %s\n", child->name); +} + static void start_sve(struct child_data *child, int vl, int cpu) { int ret; @@ -438,7 +451,7 @@ int main(int argc, char **argv) int ret; int timeout = 10; int cpus, i, j, c; - int sve_vl_count, sme_vl_count, fpsimd_per_cpu; + int sve_vl_count, sme_vl_count; bool all_children_started = false; int seen_children; int sve_vls[MAX_VLS], sme_vls[MAX_VLS]; @@ -482,12 +495,7 @@ int main(int argc, char **argv) have_sme2 = false; } - /* Force context switching if we only have FPSIMD */ - if (!sve_vl_count && !sme_vl_count) - fpsimd_per_cpu = 2; - else - fpsimd_per_cpu = 1; - tests += cpus * fpsimd_per_cpu; + tests += cpus * 2; ksft_print_header(); ksft_set_plan(tests); @@ -542,8 +550,8 @@ int main(int argc, char **argv) tests); for (i = 0; i < cpus; i++) { - for (j = 0; j < fpsimd_per_cpu; j++) - start_fpsimd(&children[num_children++], i, j); + start_fpsimd(&children[num_children++], i, 0); + start_kernel(&children[num_children++], i, 0); for (j = 0; j < sve_vl_count; j++) start_sve(&children[num_children++], sve_vls[j], i); diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c new file mode 100644 index 000000000000..e8da3b4cbd23 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 ARM Limited. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <unistd.h> + +#include <sys/socket.h> + +#include <linux/kernel.h> +#include <linux/if_alg.h> + +#define DATA_SIZE (16 * 4096) + +static int base, sock; + +static int digest_len; +static char *ref; +static char *digest; +static char *alg_name; + +static struct iovec data_iov; +static int zerocopy[2]; +static int sigs; +static int iter; + +static void handle_exit_signal(int sig, siginfo_t *info, void *context) +{ + printf("Terminated by signal %d, iterations=%d, signals=%d\n", + sig, iter, sigs); + exit(0); +} + +static void handle_kick_signal(int sig, siginfo_t *info, void *context) +{ + sigs++; +} + +static char *drivers[] = { + "crct10dif-arm64-ce", + /* "crct10dif-arm64-neon", - Same priority as generic */ + "sha1-ce", + "sha224-arm64", + "sha224-arm64-neon", + "sha224-ce", + "sha256-arm64", + "sha256-arm64-neon", + "sha256-ce", + "sha384-ce", + "sha512-ce", + "sha3-224-ce", + "sha3-256-ce", + "sha3-384-ce", + "sha3-512-ce", + "sm3-ce", + "sm3-neon", +}; + +static bool create_socket(void) +{ + FILE *proc; + struct sockaddr_alg addr; + char buf[1024]; + char *c, *driver_name; + bool is_shash, match; + int ret, i; + + ret = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (ret < 0) { + if (errno == EAFNOSUPPORT) { + printf("AF_ALG not supported\n"); + return false; + } + + printf("Failed to create AF_ALG socket: %s (%d)\n", + strerror(errno), errno); + return false; + } + base = ret; + + memset(&addr, 0, sizeof(addr)); + addr.salg_family = AF_ALG; + strncpy((char *)addr.salg_type, "hash", sizeof(addr.salg_type)); + + proc = fopen("/proc/crypto", "r"); + if (!proc) { + printf("Unable to open /proc/crypto\n"); + return false; + } + + driver_name = NULL; + is_shash = false; + match = false; + + /* Look through /proc/crypto for a driver with kernel mode FP usage */ + while (!match) { + c = fgets(buf, sizeof(buf), proc); + if (!c) { + if (feof(proc)) { + printf("Nothing found in /proc/crypto\n"); + return false; + } + continue; + } + + /* Algorithm descriptions are separated by a blank line */ + if (*c == '\n') { + if (is_shash && driver_name) { + for (i = 0; i < ARRAY_SIZE(drivers); i++) { + if (strcmp(drivers[i], + driver_name) == 0) { + match = true; + } + } + } + + if (!match) { + digest_len = 0; + + free(driver_name); + driver_name = NULL; + + free(alg_name); + alg_name = NULL; + + is_shash = false; + } + continue; + } + + /* Remove trailing newline */ + c = strchr(buf, '\n'); + if (c) + *c = '\0'; + + /* Find the field/value separator and start of the value */ + c = strchr(buf, ':'); + if (!c) + continue; + c += 2; + + if (strncmp(buf, "digestsize", strlen("digestsize")) == 0) + sscanf(c, "%d", &digest_len); + + if (strncmp(buf, "name", strlen("name")) == 0) + alg_name = strdup(c); + + if (strncmp(buf, "driver", strlen("driver")) == 0) + driver_name = strdup(c); + + if (strncmp(buf, "type", strlen("type")) == 0) + if (strncmp(c, "shash", strlen("shash")) == 0) + is_shash = true; + } + + strncpy((char *)addr.salg_name, alg_name, + sizeof(addr.salg_name) - 1); + + ret = bind(base, (struct sockaddr *)&addr, sizeof(addr)); + if (ret < 0) { + printf("Failed to bind %s: %s (%d)\n", + addr.salg_name, strerror(errno), errno); + return false; + } + + ret = accept(base, NULL, 0); + if (ret < 0) { + printf("Failed to accept %s: %s (%d)\n", + addr.salg_name, strerror(errno), errno); + return false; + } + + sock = ret; + + ret = pipe(zerocopy); + if (ret != 0) { + printf("Failed to create zerocopy pipe: %s (%d)\n", + strerror(errno), errno); + return false; + } + + ref = malloc(digest_len); + if (!ref) { + printf("Failed to allocated %d byte reference\n", digest_len); + return false; + } + + digest = malloc(digest_len); + if (!digest) { + printf("Failed to allocated %d byte digest\n", digest_len); + return false; + } + + return true; +} + +static bool compute_digest(void *buf) +{ + struct iovec iov; + int ret, wrote; + + iov = data_iov; + while (iov.iov_len) { + ret = vmsplice(zerocopy[1], &iov, 1, SPLICE_F_GIFT); + if (ret < 0) { + printf("Failed to send buffer: %s (%d)\n", + strerror(errno), errno); + return false; + } + + wrote = ret; + ret = splice(zerocopy[0], NULL, sock, NULL, wrote, 0); + if (ret < 0) { + printf("Failed to splice buffer: %s (%d)\n", + strerror(errno), errno); + } else if (ret != wrote) { + printf("Short splice: %d < %d\n", ret, wrote); + } + + iov.iov_len -= wrote; + iov.iov_base += wrote; + } + +reread: + ret = recv(sock, buf, digest_len, 0); + if (ret == 0) { + printf("No digest returned\n"); + return false; + } + if (ret != digest_len) { + if (errno == -EAGAIN) + goto reread; + printf("Failed to get digest: %s (%d)\n", + strerror(errno), errno); + return false; + } + + return true; +} + +int main(void) +{ + char *data; + struct sigaction sa; + int ret; + + /* Ensure we have unbuffered output */ + setvbuf(stdout, NULL, _IOLBF, 0); + + /* The parent will communicate with us via signals */ + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handle_exit_signal; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + ret = sigaction(SIGTERM, &sa, NULL); + if (ret < 0) + printf("Failed to install SIGTERM handler: %s (%d)\n", + strerror(errno), errno); + + sa.sa_sigaction = handle_kick_signal; + ret = sigaction(SIGUSR2, &sa, NULL); + if (ret < 0) + printf("Failed to install SIGUSR2 handler: %s (%d)\n", + strerror(errno), errno); + + data = malloc(DATA_SIZE); + if (!data) { + printf("Failed to allocate data buffer\n"); + return EXIT_FAILURE; + } + memset(data, 0, DATA_SIZE); + + data_iov.iov_base = data; + data_iov.iov_len = DATA_SIZE; + + /* + * If we can't create a socket assume it's a lack of system + * support and fall back to a basic FPSIMD test for the + * benefit of fp-stress. + */ + if (!create_socket()) { + execl("./fpsimd-test", "./fpsimd-test", NULL); + printf("Failed to fall back to fspimd-test: %d (%s)\n", + errno, strerror(errno)); + return EXIT_FAILURE; + } + + /* + * Compute a reference digest we hope is repeatable, we do + * this at runtime partly to make it easier to play with + * parameters. + */ + if (!compute_digest(ref)) { + printf("Failed to compute reference digest\n"); + return EXIT_FAILURE; + } + + printf("AF_ALG using %s\n", alg_name); + + while (true) { + if (!compute_digest(digest)) { + printf("Failed to compute digest, iter=%d\n", iter); + return EXIT_FAILURE; + } + + if (memcmp(ref, digest, digest_len) != 0) { + printf("Digest mismatch, iter=%d\n", iter); + return EXIT_FAILURE; + } + + iter++; + } + + return EXIT_FAILURE; +} diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile index 6d29cfde43a2..0a77f35295fb 100644 --- a/tools/testing/selftests/arm64/tags/Makefile +++ b/tools/testing/selftests/arm64/tags/Makefile @@ -2,6 +2,5 @@ CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := tags_test -TEST_PROGS := run_tags_test.sh include ../../lib.mk diff --git a/tools/testing/selftests/arm64/tags/run_tags_test.sh b/tools/testing/selftests/arm64/tags/run_tags_test.sh deleted file mode 100755 index 745f11379930..000000000000 --- a/tools/testing/selftests/arm64/tags/run_tags_test.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -echo "--------------------" -echo "running tags test" -echo "--------------------" -./tags_test -if [ $? -ne 0 ]; then - echo "[FAIL]" -else - echo "[PASS]" -fi diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c index 955f87c1170d..8ae26e496c89 100644 --- a/tools/testing/selftests/arm64/tags/tags_test.c +++ b/tools/testing/selftests/arm64/tags/tags_test.c @@ -17,19 +17,21 @@ int main(void) static int tbi_enabled = 0; unsigned long tag = 0; struct utsname *ptr; - int err; + + ksft_print_header(); + ksft_set_plan(1); if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0) tbi_enabled = 1; ptr = (struct utsname *)malloc(sizeof(*ptr)); if (!ptr) - ksft_exit_fail_msg("Failed to allocate utsname buffer\n"); + ksft_exit_fail_perror("Failed to allocate utsname buffer"); if (tbi_enabled) tag = 0x42; ptr = (struct utsname *)SET_TAG(ptr, tag); - err = uname(ptr); + ksft_test_result(!uname(ptr), "Syscall successful with tagged address\n"); free(ptr); - return err; + ksft_finished(); } diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 0445ac38bc07..3c7c3e79aa93 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -6,6 +6,7 @@ kprobe_multi_test # needs CONFIG_FPROBE module_attach # prog 'kprobe_multi': failed to auto-attach: -95 fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 +tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524 fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index c34adf39eeb2..3ebd77206f98 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -1,9 +1,5 @@ # TEMPORARY # Alphabetical order -exceptions # JIT does not support calling kfunc bpf_throw (exceptions) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) verifier_iterating_callbacks -verifier_arena # JIT does not support arena -arena_htab # JIT does not support arena -arena_atomics diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e0b3887b3d2d..dd49c1d23a60 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -457,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ - test_ringbuf_n.c test_ringbuf_map_key.c + test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h index 567491f3e1b5..68a51dcc0669 100644 --- a/tools/testing/selftests/bpf/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/bpf_arena_common.h @@ -34,10 +34,12 @@ #if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM) #define __arena __attribute__((address_space(1))) +#define __arena_global __attribute__((address_space(1))) #define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */ #define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */ #else #define __arena +#define __arena_global SEC(".addr_space.1") #define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1) #define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0) #endif diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 3d9e4b8c6b81..828556cdc2f0 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -163,7 +163,7 @@ struct bpf_iter_task_vma; extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it, struct task_struct *task, - unsigned long addr) __ksym; + __u64 addr) __ksym; extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym; extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym; @@ -351,6 +351,7 @@ l_true: \ l_continue:; \ }) #else +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define can_loop \ ({ __label__ l_break, l_continue; \ bool ret = true; \ @@ -376,6 +377,33 @@ l_true: \ l_break: break; \ l_continue:; \ }) +#else +#define can_loop \ + ({ __label__ l_break, l_continue; \ + bool ret = true; \ + asm volatile goto("1:.byte 0xe5; \ + .byte 0; \ + .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \ + .short 0" \ + :::: l_break); \ + goto l_continue; \ + l_break: ret = false; \ + l_continue:; \ + ret; \ + }) + +#define cond_break \ + ({ __label__ l_break, l_continue; \ + asm volatile goto("1:.byte 0xe5; \ + .byte 0; \ + .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \ + .short 0" \ + :::: l_break); \ + goto l_continue; \ + l_break: break; \ + l_continue:; \ + }) +#endif #endif #ifndef bpf_nop_mov @@ -524,7 +552,7 @@ extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym; extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq), + int (callback_fn)(void *map, int *key, void *value), unsigned int flags__k, void *aux__ign) __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index be91a6919315..3b6675ab4086 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -77,5 +77,5 @@ extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, struct bpf_key *trusted_keyring) __ksym; extern bool bpf_session_is_return(void) __ksym __weak; -extern long *bpf_session_cookie(void) __ksym __weak; +extern __u64 *bpf_session_cookie(void) __ksym __weak; #endif diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c index b1dd889d5d7d..948eb3962732 100644 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c @@ -22,12 +22,12 @@ static int dummy_init_member(const struct btf_type *t, return 0; } -static int dummy_reg(void *kdata) +static int dummy_reg(void *kdata, struct bpf_link *link) { return 0; } -static void dummy_unreg(void *kdata) +static void dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 2a18bd320e92..f8962a1dd397 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -53,6 +53,13 @@ struct bpf_testmod_struct_arg_4 { int b; }; +struct bpf_testmod_struct_arg_5 { + char a; + short b; + int c; + long d; +}; + __bpf_hook_start(); noinline int @@ -111,6 +118,15 @@ bpf_testmod_test_struct_arg_8(u64 a, void *b, short c, int d, void *e, } noinline int +bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f, + short g, struct bpf_testmod_struct_arg_5 h, long i) +{ + bpf_testmod_test_struct_arg_result = a + (long)b + c + d + (long)e + + f + g + h.a + h.b + h.c + h.d + i; + return bpf_testmod_test_struct_arg_result; +} + +noinline int bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) { bpf_testmod_test_struct_arg_result = a->a; return bpf_testmod_test_struct_arg_result; @@ -154,6 +170,42 @@ __bpf_kfunc void bpf_kfunc_common_test(void) { } +__bpf_kfunc void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, + struct bpf_dynptr *ptr__nullable) +{ +} + +__bpf_kfunc struct bpf_testmod_ctx * +bpf_testmod_ctx_create(int *err) +{ + struct bpf_testmod_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); + if (!ctx) { + *err = -ENOMEM; + return NULL; + } + refcount_set(&ctx->usage, 1); + + return ctx; +} + +static void testmod_free_cb(struct rcu_head *head) +{ + struct bpf_testmod_ctx *ctx; + + ctx = container_of(head, struct bpf_testmod_ctx, rcu); + kfree(ctx); +} + +__bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) +{ + if (!ctx) + return; + if (refcount_dec_and_test(&ctx->usage)) + call_rcu(&ctx->rcu, testmod_free_cb); +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -269,6 +321,7 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3}; struct bpf_testmod_struct_arg_3 *struct_arg3; struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22}; + struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26}; int i = 1; while (bpf_testmod_return_ptr(i)) @@ -283,6 +336,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void *)20, struct_arg4); (void)bpf_testmod_test_struct_arg_8(16, (void *)17, 18, 19, (void *)20, struct_arg4, 23); + (void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20, + 21, 22, struct_arg5, 27); (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); @@ -363,8 +418,15 @@ BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_kfunc_common_test) +BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test) +BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE) BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) +BTF_ID_LIST(bpf_testmod_dtor_ids) +BTF_ID(struct, bpf_testmod_ctx) +BTF_ID(func, bpf_testmod_ctx_release) + static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { .owner = THIS_MODULE, .set = &bpf_testmod_common_kfunc_ids, @@ -820,7 +882,7 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -835,7 +897,7 @@ static int bpf_dummy_reg(void *kdata) return 0; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } @@ -871,7 +933,7 @@ struct bpf_struct_ops bpf_bpf_testmod_ops = { .owner = THIS_MODULE, }; -static int bpf_dummy_reg2(void *kdata) +static int bpf_dummy_reg2(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops2 *ops = kdata; @@ -898,6 +960,12 @@ extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) { + const struct btf_id_dtor_kfunc bpf_testmod_dtors[] = { + { + .btf_id = bpf_testmod_dtor_ids[0], + .kfunc_btf_id = bpf_testmod_dtor_ids[1] + }, + }; int ret; ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); @@ -906,6 +974,9 @@ static int bpf_testmod_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); + ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, + ARRAY_SIZE(bpf_testmod_dtors), + THIS_MODULE); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h index b0d586a6751f..e587a79f2239 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h @@ -80,6 +80,11 @@ struct sendmsg_args { int msglen; }; +struct bpf_testmod_ctx { + struct callback_head rcu; + refcount_t usage; +}; + struct prog_test_ref_kfunc * bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym; void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; @@ -134,4 +139,9 @@ int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) __ksym; int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) __ksym; int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) __ksym; +void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, struct bpf_dynptr *ptr__nullable) __ksym; + +struct bpf_testmod_ctx *bpf_testmod_ctx_create(int *err) __ksym; +void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) __ksym; + #endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index eeabd798bc3a..4ca84c8d9116 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -58,9 +58,12 @@ CONFIG_MPLS=y CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y +CONFIG_NET_ACT_SKBMOD=y +CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y CONFIG_NET_CLS_BPF=y CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_CLS_MATCHALL=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NET_IPGRE=y @@ -80,8 +83,22 @@ CONFIG_NETFILTER_XT_TARGET_CT=y CONFIG_NETKIT=y CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y +CONFIG_NF_TABLES=y +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y +CONFIG_NETFILTER_INGRESS=y +CONFIG_NF_FLOW_TABLE=y +CONFIG_NF_FLOW_TABLE_INET=y +CONFIG_NETFILTER_NETLINK=y +CONFIG_NFT_FLOW_OFFLOAD=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP6_NF_FILTER=y CONFIG_NF_NAT=y CONFIG_RC_CORE=y CONFIG_SECURITY=y diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 35250e6cde7f..e0cba4178e41 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -94,7 +94,8 @@ static int __start_server(int type, const struct sockaddr *addr, socklen_t addrl if (settimeo(fd, opts->timeout_ms)) goto error_close; - if (opts->post_socket_cb && opts->post_socket_cb(fd, NULL)) { + if (opts->post_socket_cb && + opts->post_socket_cb(fd, opts->cb_opts)) { log_err("Failed to call post_socket_cb"); goto error_close; } @@ -105,7 +106,7 @@ static int __start_server(int type, const struct sockaddr *addr, socklen_t addrl } if (type == SOCK_STREAM) { - if (listen(fd, 1) < 0) { + if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) { log_err("Failed to listed on socket"); goto error_close; } @@ -118,22 +119,32 @@ error_close: return -1; } -int start_server(int family, int type, const char *addr_str, __u16 port, - int timeout_ms) +int start_server_str(int family, int type, const char *addr_str, __u16 port, + const struct network_helper_opts *opts) { - struct network_helper_opts opts = { - .timeout_ms = timeout_ms, - }; struct sockaddr_storage addr; socklen_t addrlen; + if (!opts) + opts = &default_opts; + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) return -1; - return __start_server(type, (struct sockaddr *)&addr, addrlen, &opts); + return __start_server(type, (struct sockaddr *)&addr, addrlen, opts); } -static int reuseport_cb(int fd, const struct post_socket_opts *opts) +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) +{ + struct network_helper_opts opts = { + .timeout_ms = timeout_ms, + }; + + return start_server_str(family, type, addr_str, port, &opts); +} + +static int reuseport_cb(int fd, void *opts) { int on = 1; @@ -238,6 +249,34 @@ error_close: return -1; } +int client_socket(int family, int type, + const struct network_helper_opts *opts) +{ + int fd; + + if (!opts) + opts = &default_opts; + + fd = socket(family, type, opts->proto); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (settimeo(fd, opts->timeout_ms)) + goto error_close; + + if (opts->post_socket_cb && + opts->post_socket_cb(fd, opts->cb_opts)) + goto error_close; + + return fd; + +error_close: + save_errno_close(fd); + return -1; +} + static int connect_fd_to_addr(int fd, const struct sockaddr_storage *addr, socklen_t addrlen, const bool must_fail) @@ -273,15 +312,12 @@ int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t add if (!opts) opts = &default_opts; - fd = socket(addr->ss_family, type, opts->proto); + fd = client_socket(addr->ss_family, type, opts); if (fd < 0) { log_err("Failed to create client socket"); return -1; } - if (settimeo(fd, opts->timeout_ms)) - goto error_close; - if (connect_fd_to_addr(fd, addr, addrlen, opts->must_fail)) goto error_close; @@ -292,66 +328,21 @@ error_close: return -1; } -int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) +int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts *opts) { struct sockaddr_storage addr; - struct sockaddr_in *addr_in; - socklen_t addrlen, optlen; - int fd, type, protocol; + socklen_t addrlen; if (!opts) opts = &default_opts; - optlen = sizeof(type); - - if (opts->type) { - type = opts->type; - } else { - if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { - log_err("getsockopt(SOL_TYPE)"); - return -1; - } - } - - if (opts->proto) { - protocol = opts->proto; - } else { - if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { - log_err("getsockopt(SOL_PROTOCOL)"); - return -1; - } - } - addrlen = sizeof(addr); if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { log_err("Failed to get server addr"); return -1; } - addr_in = (struct sockaddr_in *)&addr; - fd = socket(addr_in->sin_family, type, protocol); - if (fd < 0) { - log_err("Failed to create client socket"); - return -1; - } - - if (settimeo(fd, opts->timeout_ms)) - goto error_close; - - if (opts->cc && opts->cc[0] && - setsockopt(fd, SOL_TCP, TCP_CONGESTION, opts->cc, - strlen(opts->cc) + 1)) - goto error_close; - - if (!opts->noconnect) - if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail)) - goto error_close; - - return fd; - -error_close: - save_errno_close(fd); - return -1; + return connect_to_addr(type, &addr, addrlen, opts); } int connect_to_fd(int server_fd, int timeout_ms) @@ -359,8 +350,23 @@ int connect_to_fd(int server_fd, int timeout_ms) struct network_helper_opts opts = { .timeout_ms = timeout_ms, }; + int type, protocol; + socklen_t optlen; + + optlen = sizeof(type); + if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { + log_err("getsockopt(SOL_TYPE)"); + return -1; + } + + optlen = sizeof(protocol); + if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { + log_err("getsockopt(SOL_PROTOCOL)"); + return -1; + } + opts.proto = protocol; - return connect_to_fd_opts(server_fd, &opts); + return connect_to_fd_opts(server_fd, type, &opts); } int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 883c7ea9d8d5..aac5b94d6379 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -21,16 +21,22 @@ typedef __u16 __sum16; #define VIP_NUM 5 #define MAGIC_BYTES 123 -struct post_socket_opts {}; - struct network_helper_opts { - const char *cc; int timeout_ms; bool must_fail; - bool noconnect; - int type; int proto; - int (*post_socket_cb)(int fd, const struct post_socket_opts *opts); + /* +ve: Passed to listen() as-is. + * 0: Default when the test does not set + * a particular value during the struct init. + * It is changed to 1 before passing to listen(). + * Most tests only have one on-going connection. + * -ve: It is changed to 0 before passing to listen(). + * It is useful to force syncookie without + * changing the "tcp_syncookies" sysctl from 1 to 2. + */ + int backlog; + int (*post_socket_cb)(int fd, void *opts); + void *cb_opts; }; /* ipv4 test vector */ @@ -50,6 +56,8 @@ struct ipv6_packet { extern struct ipv6_packet pkt_v6; int settimeo(int fd, int timeout_ms); +int start_server_str(int family, int type, const char *addr_str, __u16 port, + const struct network_helper_opts *opts); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); int *start_reuseport_server(int family, int type, const char *addr_str, @@ -58,10 +66,12 @@ int *start_reuseport_server(int family, int type, const char *addr_str, int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len, const struct network_helper_opts *opts); void free_fds(int *fds, unsigned int nr_close_fds); +int client_socket(int family, int type, + const struct network_helper_opts *opts); int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t len, const struct network_helper_opts *opts); int connect_to_fd(int server_fd, int timeout_ms); -int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts); +int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts *opts); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); int fastopen_connect(int server_fd, const char *data, unsigned int data_len, int timeout_ms); diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c index 0807a48a58ee..26e7c06c6cb4 100644 --- a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c @@ -146,6 +146,22 @@ static void test_xchg(struct arena_atomics *skel) ASSERT_EQ(skel->arena->xchg32_result, 1, "xchg32_result"); } +static void test_uaf(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.uaf); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails"); +} + void test_arena_atomics(void) { struct arena_atomics *skel; @@ -180,6 +196,8 @@ void test_arena_atomics(void) test_cmpxchg(skel); if (test__start_subtest("xchg")) test_xchg(skel); + if (test__start_subtest("uaf")) + test_uaf(skel); cleanup: arena_atomics__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 4407ea428e77..070c52c312e5 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -451,7 +451,7 @@ static void pe_subtest(struct test_bpf_cookie *skel) attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.freq = 1; - attr.sample_freq = 1000; + attr.sample_freq = 10000; pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); if (!ASSERT_GE(pfd, 0, "perf_fd")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index b30ff6b3b81a..a4a1f93878d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -104,6 +104,7 @@ static void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple"); ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0"); + ASSERT_EQ(skel->bss->test_einval_reserved_new, -EINVAL, "Test EINVAL for reserved in new struct not set to 0"); ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1"); ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ"); ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP"); @@ -122,6 +123,12 @@ static void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark"); ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting"); ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting"); + ASSERT_EQ(skel->data->test_ct_zone_id_alloc_entry, 0, "Test for alloc new entry in specified ct zone"); + ASSERT_EQ(skel->data->test_ct_zone_id_insert_entry, 0, "Test for insert new entry in specified ct zone"); + ASSERT_EQ(skel->data->test_ct_zone_id_succ_lookup, 0, "Test for successful lookup in specified ct_zone"); + ASSERT_EQ(skel->bss->test_ct_zone_dir_enoent_lookup, -ENOENT, "Test ENOENT for lookup with wrong ct zone dir"); + ASSERT_EQ(skel->bss->test_ct_zone_id_enoent_lookup, -ENOENT, "Test ENOENT for lookup in wrong ct zone"); + end: if (client_fd != -1) close(client_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 0aca02532794..63422f4f3896 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -23,6 +23,11 @@ static const unsigned int total_bytes = 10 * 1024 * 1024; static int expected_stg = 0xeB9F; +struct cb_opts { + const char *cc; + int map_fd; +}; + static int settcpca(int fd, const char *tcp_ca) { int err; @@ -34,55 +39,66 @@ static int settcpca(int fd, const char *tcp_ca) return 0; } -static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) +static bool start_test(char *addr_str, + const struct network_helper_opts *srv_opts, + const struct network_helper_opts *cli_opts, + int *srv_fd, int *cli_fd) { - int lfd = -1, fd = -1; - int err; + *srv_fd = start_server_str(AF_INET6, SOCK_STREAM, addr_str, 0, srv_opts); + if (!ASSERT_NEQ(*srv_fd, -1, "start_server_str")) + goto err; - lfd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); - if (!ASSERT_NEQ(lfd, -1, "socket")) - return; - - fd = socket(AF_INET6, SOCK_STREAM, 0); - if (!ASSERT_NEQ(fd, -1, "socket")) { - close(lfd); - return; - } + /* connect to server */ + *cli_fd = connect_to_fd_opts(*srv_fd, SOCK_STREAM, cli_opts); + if (!ASSERT_NEQ(*cli_fd, -1, "connect_to_fd_opts")) + goto err; - if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca)) - goto done; + return true; - if (sk_stg_map) { - err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd, - &expected_stg, BPF_NOEXIST); - if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)")) - goto done; +err: + if (*srv_fd != -1) { + close(*srv_fd); + *srv_fd = -1; } + if (*cli_fd != -1) { + close(*cli_fd); + *cli_fd = -1; + } + return false; +} - /* connect to server */ - err = connect_fd_to_fd(fd, lfd, 0); - if (!ASSERT_NEQ(err, -1, "connect")) - goto done; - - if (sk_stg_map) { - int tmp_stg; +static void do_test(const struct network_helper_opts *opts) +{ + int lfd = -1, fd = -1; - err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd, - &tmp_stg); - if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") || - !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)")) - goto done; - } + if (!start_test(NULL, opts, opts, &lfd, &fd)) + goto done; ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data"); done: - close(lfd); - close(fd); + if (lfd != -1) + close(lfd); + if (fd != -1) + close(fd); +} + +static int cc_cb(int fd, void *opts) +{ + struct cb_opts *cb_opts = (struct cb_opts *)opts; + + return settcpca(fd, cb_opts->cc); } static void test_cubic(void) { + struct cb_opts cb_opts = { + .cc = "bpf_cubic", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct bpf_cubic *cubic_skel; struct bpf_link *link; @@ -96,7 +112,7 @@ static void test_cubic(void) return; } - do_test("bpf_cubic", NULL); + do_test(&opts); ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); @@ -104,8 +120,37 @@ static void test_cubic(void) bpf_cubic__destroy(cubic_skel); } +static int stg_post_socket_cb(int fd, void *opts) +{ + struct cb_opts *cb_opts = (struct cb_opts *)opts; + int err; + + err = settcpca(fd, cb_opts->cc); + if (err) + return err; + + err = bpf_map_update_elem(cb_opts->map_fd, &fd, + &expected_stg, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)")) + return err; + + return 0; +} + static void test_dctcp(void) { + struct cb_opts cb_opts = { + .cc = "bpf_dctcp", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; + struct network_helper_opts cli_opts = { + .post_socket_cb = stg_post_socket_cb, + .cb_opts = &cb_opts, + }; + int lfd = -1, fd = -1, tmp_stg, err; struct bpf_dctcp *dctcp_skel; struct bpf_link *link; @@ -119,11 +164,58 @@ static void test_dctcp(void) return; } - do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map); + cb_opts.map_fd = bpf_map__fd(dctcp_skel->maps.sk_stg_map); + if (!start_test(NULL, &opts, &cli_opts, &lfd, &fd)) + goto done; + + err = bpf_map_lookup_elem(cb_opts.map_fd, &fd, &tmp_stg); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") || + !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)")) + goto done; + + ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data"); ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result"); +done: bpf_link__destroy(link); bpf_dctcp__destroy(dctcp_skel); + if (lfd != -1) + close(lfd); + if (fd != -1) + close(fd); +} + +static void test_dctcp_autoattach_map(void) +{ + struct cb_opts cb_opts = { + .cc = "bpf_dctcp", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; + struct bpf_dctcp *dctcp_skel; + struct bpf_link *link; + + dctcp_skel = bpf_dctcp__open_and_load(); + if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load")) + return; + + bpf_map__set_autoattach(dctcp_skel->maps.dctcp, true); + bpf_map__set_autoattach(dctcp_skel->maps.dctcp_nouse, false); + + if (!ASSERT_OK(bpf_dctcp__attach(dctcp_skel), "bpf_dctcp__attach")) + goto destroy; + + /* struct_ops is auto-attached */ + link = dctcp_skel->links.dctcp; + if (!ASSERT_OK_PTR(link, "link")) + goto destroy; + + do_test(&opts); + +destroy: + bpf_dctcp__destroy(dctcp_skel); } static char *err_str; @@ -171,11 +263,22 @@ static void test_invalid_license(void) static void test_dctcp_fallback(void) { int err, lfd = -1, cli_fd = -1, srv_fd = -1; - struct network_helper_opts opts = { - .cc = "cubic", - }; struct bpf_dctcp *dctcp_skel; struct bpf_link *link = NULL; + struct cb_opts dctcp = { + .cc = "bpf_dctcp", + }; + struct network_helper_opts srv_opts = { + .post_socket_cb = cc_cb, + .cb_opts = &dctcp, + }; + struct cb_opts cubic = { + .cc = "cubic", + }; + struct network_helper_opts cli_opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cubic, + }; char srv_cc[16]; socklen_t cc_len = sizeof(srv_cc); @@ -190,13 +293,7 @@ static void test_dctcp_fallback(void) if (!ASSERT_OK_PTR(link, "dctcp link")) goto done; - lfd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); - if (!ASSERT_GE(lfd, 0, "lfd") || - !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp")) - goto done; - - cli_fd = connect_to_fd_opts(lfd, &opts); - if (!ASSERT_GE(cli_fd, 0, "cli_fd")) + if (!start_test("::1", &srv_opts, &cli_opts, &lfd, &cli_fd)) goto done; srv_fd = accept(lfd, NULL, 0); @@ -297,6 +394,13 @@ static void test_unsupp_cong_op(void) static void test_update_ca(void) { + struct cb_opts cb_opts = { + .cc = "tcp_ca_update", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct tcp_ca_update *skel; struct bpf_link *link; int saved_ca1_cnt; @@ -307,25 +411,34 @@ static void test_update_ca(void) return; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); - ASSERT_OK_PTR(link, "attach_struct_ops"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto out; - do_test("tcp_ca_update", NULL); + do_test(&opts); saved_ca1_cnt = skel->bss->ca1_cnt; ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_update_2); ASSERT_OK(err, "update_map"); - do_test("tcp_ca_update", NULL); + do_test(&opts); ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } static void test_update_wrong(void) { + struct cb_opts cb_opts = { + .cc = "tcp_ca_update", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct tcp_ca_update *skel; struct bpf_link *link; int saved_ca1_cnt; @@ -336,24 +449,33 @@ static void test_update_wrong(void) return; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); - ASSERT_OK_PTR(link, "attach_struct_ops"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto out; - do_test("tcp_ca_update", NULL); + do_test(&opts); saved_ca1_cnt = skel->bss->ca1_cnt; ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_wrong); ASSERT_ERR(err, "update_map"); - do_test("tcp_ca_update", NULL); + do_test(&opts); ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } static void test_mixed_links(void) { + struct cb_opts cb_opts = { + .cc = "tcp_ca_update", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct tcp_ca_update *skel; struct bpf_link *link, *link_nl; int err; @@ -363,12 +485,13 @@ static void test_mixed_links(void) return; link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link); - ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl"); + if (!ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl")) + goto out; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); - do_test("tcp_ca_update", NULL); + do_test(&opts); ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt"); err = bpf_link__update_map(link, skel->maps.ca_no_link); @@ -376,6 +499,7 @@ static void test_mixed_links(void) bpf_link__destroy(link); bpf_link__destroy(link_nl); +out: tcp_ca_update__destroy(skel); } @@ -418,7 +542,8 @@ static void test_link_replace(void) bpf_link__destroy(link); link = bpf_map__attach_struct_ops(skel->maps.ca_update_2); - ASSERT_OK_PTR(link, "attach_struct_ops_2nd"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops_2nd")) + goto out; /* BPF_F_REPLACE with a wrong old map Fd. It should fail! * @@ -441,6 +566,7 @@ static void test_link_replace(void) bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } @@ -455,6 +581,13 @@ static void test_tcp_ca_kfunc(void) static void test_cc_cubic(void) { + struct cb_opts cb_opts = { + .cc = "bpf_cc_cubic", + }; + struct network_helper_opts opts = { + .post_socket_cb = cc_cb, + .cb_opts = &cb_opts, + }; struct bpf_cc_cubic *cc_cubic_skel; struct bpf_link *link; @@ -468,7 +601,7 @@ static void test_cc_cubic(void) return; } - do_test("bpf_cc_cubic", NULL); + do_test(&opts); bpf_link__destroy(link); bpf_cc_cubic__destroy(cc_cubic_skel); @@ -506,4 +639,6 @@ void test_bpf_tcp_ca(void) test_tcp_ca_kfunc(); if (test__start_subtest("cc_cubic")) test_cc_cubic(); + if (test__start_subtest("dctcp_autoattach_map")) + test_dctcp_autoattach_map(); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 4c6ada5b270b..73f669014b69 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -45,12 +45,6 @@ err_out: return err; } -struct scale_test_def { - const char *file; - enum bpf_prog_type attach_type; - bool fails; -}; - static void scale_test(const char *file, enum bpf_prog_type attach_type, bool should_fail) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_distill.c b/tools/testing/selftests/bpf/prog_tests/btf_distill.c new file mode 100644 index 000000000000..bfbe795823a2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_distill.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#include <test_progs.h> +#include <bpf/btf.h> +#include "btf_helpers.h" + +/* Fabricate base, split BTF with references to base types needed; then create + * split BTF with distilled base BTF and ensure expectations are met: + * - only referenced base types from split BTF are present + * - struct/union/enum are represented as empty unless anonymous, when they + * are represented in full in split BTF + */ +static void test_distilled_base(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_ptr(btf1, 1); /* [2] ptr to int */ + btf__add_struct(btf1, "s1", 8); /* [3] struct s1 { */ + btf__add_field(btf1, "f1", 2, 0, 0); /* int *f1; */ + /* } */ + btf__add_struct(btf1, "", 12); /* [4] struct { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + btf__add_field(btf1, "f2", 3, 32, 0); /* struct s1 f2; */ + /* } */ + btf__add_int(btf1, "unsigned int", 4, 0); /* [5] unsigned int */ + btf__add_union(btf1, "u1", 12); /* [6] union u1 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + btf__add_field(btf1, "f2", 2, 0, 0); /* int *f2; */ + /* } */ + btf__add_union(btf1, "", 4); /* [7] union { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + btf__add_enum(btf1, "e1", 4); /* [8] enum e1 { */ + btf__add_enum_value(btf1, "v1", 1); /* v1 = 1; */ + /* } */ + btf__add_enum(btf1, "", 4); /* [9] enum { */ + btf__add_enum_value(btf1, "av1", 2); /* av1 = 2; */ + /* } */ + btf__add_enum64(btf1, "e641", 8, true); /* [10] enum64 { */ + btf__add_enum64_value(btf1, "v1", 1024); /* v1 = 1024; */ + /* } */ + btf__add_enum64(btf1, "", 8, true); /* [11] enum64 { */ + btf__add_enum64_value(btf1, "v1", 1025); /* v1 = 1025; */ + /* } */ + btf__add_struct(btf1, "unneeded", 4); /* [12] struct unneeded { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + btf__add_struct(btf1, "embedded", 4); /* [13] struct embedded { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + btf__add_func_proto(btf1, 1); /* [14] int (*)(int *p1); */ + btf__add_func_param(btf1, "p1", 1); + + btf__add_array(btf1, 1, 1, 3); /* [15] int [3]; */ + + btf__add_struct(btf1, "from_proto", 4); /* [16] struct from_proto { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + btf__add_union(btf1, "u1", 4); /* [17] union u1 { */ + btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */ + /* } */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=8 vlen=1\n" + "\t'f1' type_id=2 bits_offset=0", + "[4] STRUCT '(anon)' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=32", + "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)", + "[6] UNION 'u1' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=2 bits_offset=0", + "[7] UNION '(anon)' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n" + "\t'v1' val=1", + "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n" + "\t'av1' val=2", + "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1024", + "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1025", + "[12] STRUCT 'unneeded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[13] STRUCT 'embedded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=1", + "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3", + "[16] STRUCT 'from_proto' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[17] UNION 'u1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0"); + + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + btf__add_ptr(btf2, 3); /* [18] ptr to struct s1 */ + /* add ptr to struct anon */ + btf__add_ptr(btf2, 4); /* [19] ptr to struct (anon) */ + btf__add_const(btf2, 6); /* [20] const union u1 */ + btf__add_restrict(btf2, 7); /* [21] restrict union (anon) */ + btf__add_volatile(btf2, 8); /* [22] volatile enum e1 */ + btf__add_typedef(btf2, "et", 9); /* [23] typedef enum (anon) */ + btf__add_const(btf2, 10); /* [24] const enum64 e641 */ + btf__add_ptr(btf2, 11); /* [25] restrict enum64 (anon) */ + btf__add_struct(btf2, "with_embedded", 4); /* [26] struct with_embedded { */ + btf__add_field(btf2, "f1", 13, 0, 0); /* struct embedded f1; */ + /* } */ + btf__add_func(btf2, "fn", BTF_FUNC_STATIC, 14); /* [27] int fn(int p1); */ + btf__add_typedef(btf2, "arraytype", 15); /* [28] typedef int[3] foo; */ + btf__add_func_proto(btf2, 1); /* [29] int (*)(struct from proto p1); */ + btf__add_func_param(btf2, "p1", 16); + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=8 vlen=1\n" + "\t'f1' type_id=2 bits_offset=0", + "[4] STRUCT '(anon)' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=32", + "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)", + "[6] UNION 'u1' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=2 bits_offset=0", + "[7] UNION '(anon)' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n" + "\t'v1' val=1", + "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n" + "\t'av1' val=2", + "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1024", + "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1025", + "[12] STRUCT 'unneeded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[13] STRUCT 'embedded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=1", + "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3", + "[16] STRUCT 'from_proto' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[17] UNION 'u1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[18] PTR '(anon)' type_id=3", + "[19] PTR '(anon)' type_id=4", + "[20] CONST '(anon)' type_id=6", + "[21] RESTRICT '(anon)' type_id=7", + "[22] VOLATILE '(anon)' type_id=8", + "[23] TYPEDEF 'et' type_id=9", + "[24] CONST '(anon)' type_id=10", + "[25] PTR '(anon)' type_id=11", + "[26] STRUCT 'with_embedded' size=4 vlen=1\n" + "\t'f1' type_id=13 bits_offset=0", + "[27] FUNC 'fn' type_id=14 linkage=static", + "[28] TYPEDEF 'arraytype' type_id=15", + "[29] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=16"); + + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(8, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf4, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] STRUCT 's1' size=8 vlen=0", + "[3] UNION 'u1' size=12 vlen=0", + "[4] ENUM 'e1' encoding=UNSIGNED size=4 vlen=0", + "[5] ENUM 'e641' encoding=UNSIGNED size=8 vlen=0", + "[6] STRUCT 'embedded' size=4 vlen=0", + "[7] STRUCT 'from_proto' size=4 vlen=0", + /* split BTF; these types should match split BTF above from 17-28, with + * updated type id references + */ + "[8] PTR '(anon)' type_id=2", + "[9] PTR '(anon)' type_id=20", + "[10] CONST '(anon)' type_id=3", + "[11] RESTRICT '(anon)' type_id=21", + "[12] VOLATILE '(anon)' type_id=4", + "[13] TYPEDEF 'et' type_id=22", + "[14] CONST '(anon)' type_id=5", + "[15] PTR '(anon)' type_id=23", + "[16] STRUCT 'with_embedded' size=4 vlen=1\n" + "\t'f1' type_id=6 bits_offset=0", + "[17] FUNC 'fn' type_id=24 linkage=static", + "[18] TYPEDEF 'arraytype' type_id=25", + "[19] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=7", + /* split BTF types added from original base BTF below */ + "[20] STRUCT '(anon)' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=2 bits_offset=32", + "[21] UNION '(anon)' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[22] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n" + "\t'av1' val=2", + "[23] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1025", + "[24] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=1", + "[25] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3"); + + if (!ASSERT_EQ(btf__relocate(btf4, btf1), 0, "relocate_split")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf4, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1", + "[3] STRUCT 's1' size=8 vlen=1\n" + "\t'f1' type_id=2 bits_offset=0", + "[4] STRUCT '(anon)' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=32", + "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)", + "[6] UNION 'u1' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=2 bits_offset=0", + "[7] UNION '(anon)' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n" + "\t'v1' val=1", + "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n" + "\t'av1' val=2", + "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1024", + "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1025", + "[12] STRUCT 'unneeded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[13] STRUCT 'embedded' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=1", + "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3", + "[16] STRUCT 'from_proto' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[17] UNION 'u1' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[18] PTR '(anon)' type_id=3", + "[19] PTR '(anon)' type_id=30", + "[20] CONST '(anon)' type_id=6", + "[21] RESTRICT '(anon)' type_id=31", + "[22] VOLATILE '(anon)' type_id=8", + "[23] TYPEDEF 'et' type_id=32", + "[24] CONST '(anon)' type_id=10", + "[25] PTR '(anon)' type_id=33", + "[26] STRUCT 'with_embedded' size=4 vlen=1\n" + "\t'f1' type_id=13 bits_offset=0", + "[27] FUNC 'fn' type_id=34 linkage=static", + "[28] TYPEDEF 'arraytype' type_id=35", + "[29] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=16", + /* below here are (duplicate) anon base types added by distill + * process to split BTF. + */ + "[30] STRUCT '(anon)' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=3 bits_offset=32", + "[31] UNION '(anon)' size=4 vlen=1\n" + "\t'f1' type_id=1 bits_offset=0", + "[32] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n" + "\t'av1' val=2", + "[33] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n" + "\t'v1' val=1025", + "[34] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n" + "\t'p1' type_id=1", + "[35] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3"); + +cleanup: + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + +/* ensure we can cope with multiple types with the same name in + * distilled base BTF. In this case because sizes are different, + * we can still disambiguate them. + */ +static void test_distilled_base_multi(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf1, "int", 8, BTF_INT_SIGNED); /* [2] int */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED"); + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + btf__add_ptr(btf2, 1); + btf__add_const(btf2, 2); + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] CONST '(anon)' type_id=2"); + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + VALIDATE_RAW_BTF( + btf3, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED"); + if (!ASSERT_EQ(btf__relocate(btf4, btf1), 0, "relocate_split")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf4, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] CONST '(anon)' type_id=2"); + +cleanup: + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + +/* If a needed type is not present in the base BTF we wish to relocate + * with, btf__relocate() should error our. + */ +static void test_distilled_base_missing_err(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf1, "int", 8, BTF_INT_SIGNED); /* [2] int */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED"); + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + btf__add_ptr(btf2, 1); + btf__add_const(btf2, 2); + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] CONST '(anon)' type_id=2"); + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + VALIDATE_RAW_BTF( + btf3, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED"); + btf5 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf")) + return; + btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */ + VALIDATE_RAW_BTF( + btf5, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split"); + +cleanup: + btf__free(btf5); + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + +/* With 2 types of same size in distilled base BTF, relocation should + * fail as we have no means to choose between them. + */ +static void test_distilled_base_multi_err(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [2] int */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + btf__add_ptr(btf2, 1); + btf__add_const(btf2, 2); + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] CONST '(anon)' type_id=2"); + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + VALIDATE_RAW_BTF( + btf3, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + ASSERT_EQ(btf__relocate(btf4, btf1), -EINVAL, "relocate_split"); +cleanup: + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + +/* With 2 types of same size in base BTF, relocation should + * fail as we have no means to choose between them. + */ +static void test_distilled_base_multi_err2(void) +{ + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL; + + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */ + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + btf__add_ptr(btf2, 1); + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] PTR '(anon)' type_id=1"); + if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4), + "distilled_base") || + !ASSERT_OK_PTR(btf3, "distilled_base") || + !ASSERT_OK_PTR(btf4, "distilled_split") || + !ASSERT_EQ(2, btf__type_cnt(btf3), "distilled_base_type_cnt")) + goto cleanup; + VALIDATE_RAW_BTF( + btf3, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + btf5 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf")) + return; + btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [2] int */ + VALIDATE_RAW_BTF( + btf5, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); + ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split"); +cleanup: + btf__free(btf5); + btf__free(btf4); + btf__free(btf3); + btf__free(btf2); + btf__free(btf1); +} + +/* create split reference BTF from vmlinux + split BTF with a few type references; + * ensure the resultant split reference BTF is as expected, containing only types + * needed to disambiguate references from split BTF. + */ +static void test_distilled_base_vmlinux(void) +{ + struct btf *split_btf = NULL, *vmlinux_btf = btf__load_vmlinux_btf(); + struct btf *split_dist = NULL, *base_dist = NULL; + __s32 int_id, myint_id; + + if (!ASSERT_OK_PTR(vmlinux_btf, "load_vmlinux")) + return; + int_id = btf__find_by_name_kind(vmlinux_btf, "int", BTF_KIND_INT); + if (!ASSERT_GT(int_id, 0, "find_int")) + goto cleanup; + split_btf = btf__new_empty_split(vmlinux_btf); + if (!ASSERT_OK_PTR(split_btf, "new_split")) + goto cleanup; + myint_id = btf__add_typedef(split_btf, "myint", int_id); + btf__add_ptr(split_btf, myint_id); + + if (!ASSERT_EQ(btf__distill_base(split_btf, &base_dist, &split_dist), 0, + "distill_vmlinux_base")) + goto cleanup; + + if (!ASSERT_OK_PTR(split_dist, "split_distilled") || + !ASSERT_OK_PTR(base_dist, "base_dist")) + goto cleanup; + VALIDATE_RAW_BTF( + split_dist, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] TYPEDEF 'myint' type_id=1", + "[3] PTR '(anon)' type_id=2"); + +cleanup: + btf__free(split_dist); + btf__free(base_dist); + btf__free(split_btf); + btf__free(vmlinux_btf); +} + +void test_btf_distill(void) +{ + if (test__start_subtest("distilled_base")) + test_distilled_base(); + if (test__start_subtest("distilled_base_multi")) + test_distilled_base_multi(); + if (test__start_subtest("distilled_base_missing_err")) + test_distilled_base_missing_err(); + if (test__start_subtest("distilled_base_multi_err")) + test_distilled_base_multi_err(); + if (test__start_subtest("distilled_base_multi_err2")) + test_distilled_base_multi_err2(); + if (test__start_subtest("distilled_base_vmlinux")) + test_distilled_base_vmlinux(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c b/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c new file mode 100644 index 000000000000..32159d3eb281 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Oracle and/or its affiliates. */ + +#include <test_progs.h> +#include <bpf/btf.h> +#include "btf_helpers.h" +#include "bpf/libbpf_internal.h" + +struct field_data { + __u32 ids[5]; + const char *strs[5]; +} fields[] = { + { .ids = {}, .strs = {} }, + { .ids = {}, .strs = { "int" } }, + { .ids = {}, .strs = { "int64" } }, + { .ids = { 1 }, .strs = { "" } }, + { .ids = { 2, 1 }, .strs = { "" } }, + { .ids = { 3, 1 }, .strs = { "s1", "f1", "f2" } }, + { .ids = { 1, 5 }, .strs = { "u1", "f1", "f2" } }, + { .ids = {}, .strs = { "e1", "v1", "v2" } }, + { .ids = {}, .strs = { "fw1" } }, + { .ids = { 1 }, .strs = { "t" } }, + { .ids = { 2 }, .strs = { "" } }, + { .ids = { 1 }, .strs = { "" } }, + { .ids = { 3 }, .strs = { "" } }, + { .ids = { 1, 1, 3 }, .strs = { "", "p1", "p2" } }, + { .ids = { 13 }, .strs = { "func" } }, + { .ids = { 1 }, .strs = { "var1" } }, + { .ids = { 3 }, .strs = { "var2" } }, + { .ids = {}, .strs = { "float" } }, + { .ids = { 11 }, .strs = { "decltag" } }, + { .ids = { 6 }, .strs = { "typetag" } }, + { .ids = {}, .strs = { "e64", "eval1", "eval2", "eval3" } }, + { .ids = { 15, 16 }, .strs = { "datasec1" } } + +}; + +/* Fabricate BTF with various types and check BTF field iteration finds types, + * strings expected. + */ +void test_btf_field_iter(void) +{ + struct btf *btf = NULL; + int id; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + return; + + btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */ + btf__add_int(btf, "int64", 8, BTF_INT_SIGNED); /* [2] int64 */ + btf__add_ptr(btf, 1); /* [3] int * */ + btf__add_array(btf, 1, 2, 3); /* [4] int64[3] */ + btf__add_struct(btf, "s1", 12); /* [5] struct s1 { */ + btf__add_field(btf, "f1", 3, 0, 0); /* int *f1; */ + btf__add_field(btf, "f2", 1, 0, 0); /* int f2; */ + /* } */ + btf__add_union(btf, "u1", 12); /* [6] union u1 { */ + btf__add_field(btf, "f1", 1, 0, 0); /* int f1; */ + btf__add_field(btf, "f2", 5, 0, 0); /* struct s1 f2; */ + /* } */ + btf__add_enum(btf, "e1", 4); /* [7] enum e1 { */ + btf__add_enum_value(btf, "v1", 1); /* v1 = 1; */ + btf__add_enum_value(btf, "v2", 2); /* v2 = 2; */ + /* } */ + + btf__add_fwd(btf, "fw1", BTF_FWD_STRUCT); /* [8] struct fw1; */ + btf__add_typedef(btf, "t", 1); /* [9] typedef int t; */ + btf__add_volatile(btf, 2); /* [10] volatile int64; */ + btf__add_const(btf, 1); /* [11] const int; */ + btf__add_restrict(btf, 3); /* [12] restrict int *; */ + btf__add_func_proto(btf, 1); /* [13] int (*)(int p1, int *p2); */ + btf__add_func_param(btf, "p1", 1); + btf__add_func_param(btf, "p2", 3); + + btf__add_func(btf, "func", BTF_FUNC_GLOBAL, 13);/* [14] int func(int p1, int *p2); */ + btf__add_var(btf, "var1", BTF_VAR_STATIC, 1); /* [15] static int var1; */ + btf__add_var(btf, "var2", BTF_VAR_STATIC, 3); /* [16] static int *var2; */ + btf__add_float(btf, "float", 4); /* [17] float; */ + btf__add_decl_tag(btf, "decltag", 11, -1); /* [18] decltag const int; */ + btf__add_type_tag(btf, "typetag", 6); /* [19] typetag union u1; */ + btf__add_enum64(btf, "e64", 8, true); /* [20] enum { */ + btf__add_enum64_value(btf, "eval1", 1000); /* eval1 = 1000, */ + btf__add_enum64_value(btf, "eval2", 2000); /* eval2 = 2000, */ + btf__add_enum64_value(btf, "eval3", 3000); /* eval3 = 3000 */ + /* } */ + btf__add_datasec(btf, "datasec1", 12); /* [21] datasec datasec1 */ + btf__add_datasec_var_info(btf, 15, 0, 4); + btf__add_datasec_var_info(btf, 16, 4, 8); + + VALIDATE_RAW_BTF( + btf, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] INT 'int64' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED", + "[3] PTR '(anon)' type_id=1", + "[4] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=3", + "[5] STRUCT 's1' size=12 vlen=2\n" + "\t'f1' type_id=3 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=0", + "[6] UNION 'u1' size=12 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=5 bits_offset=0", + "[7] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" + "\t'v1' val=1\n" + "\t'v2' val=2", + "[8] FWD 'fw1' fwd_kind=struct", + "[9] TYPEDEF 't' type_id=1", + "[10] VOLATILE '(anon)' type_id=2", + "[11] CONST '(anon)' type_id=1", + "[12] RESTRICT '(anon)' type_id=3", + "[13] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" + "\t'p1' type_id=1\n" + "\t'p2' type_id=3", + "[14] FUNC 'func' type_id=13 linkage=global", + "[15] VAR 'var1' type_id=1, linkage=static", + "[16] VAR 'var2' type_id=3, linkage=static", + "[17] FLOAT 'float' size=4", + "[18] DECL_TAG 'decltag' type_id=11 component_idx=-1", + "[19] TYPE_TAG 'typetag' type_id=6", + "[20] ENUM64 'e64' encoding=SIGNED size=8 vlen=3\n" + "\t'eval1' val=1000\n" + "\t'eval2' val=2000\n" + "\t'eval3' val=3000", + "[21] DATASEC 'datasec1' size=12 vlen=2\n" + "\ttype_id=15 offset=0 size=4\n" + "\ttype_id=16 offset=4 size=8"); + + for (id = 1; id < btf__type_cnt(btf); id++) { + struct btf_type *t = btf_type_by_id(btf, id); + struct btf_field_iter it_strs, it_ids; + int str_idx = 0, id_idx = 0; + __u32 *next_str, *next_id; + + if (!ASSERT_OK_PTR(t, "btf_type_by_id")) + break; + if (!ASSERT_OK(btf_field_iter_init(&it_strs, t, BTF_FIELD_ITER_STRS), + "iter_init_strs")) + break; + if (!ASSERT_OK(btf_field_iter_init(&it_ids, t, BTF_FIELD_ITER_IDS), + "iter_init_ids")) + break; + while ((next_str = btf_field_iter_next(&it_strs))) { + const char *str = btf__str_by_offset(btf, *next_str); + + if (!ASSERT_OK(strcmp(fields[id].strs[str_idx], str), "field_str_match")) + break; + str_idx++; + } + /* ensure no more strings are expected */ + ASSERT_EQ(fields[id].strs[str_idx], NULL, "field_str_cnt"); + + while ((next_id = btf_field_iter_next(&it_ids))) { + if (!ASSERT_EQ(*next_id, fields[id].ids[id_idx], "field_id_match")) + break; + id_idx++; + } + /* ensure no more ids are expected */ + ASSERT_EQ(fields[id].ids[id_idx], 0, "field_id_cnt"); + } + btf__free(btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index addf720428f7..9709c8db7275 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -32,7 +32,7 @@ static int run_test(int cgroup_fd, int server_fd, bool classid) goto out; } - fd = connect_to_fd_opts(server_fd, &opts); + fd = connect_to_fd_opts(server_fd, SOCK_STREAM, &opts); if (fd < 0) err = -1; else @@ -52,7 +52,7 @@ void test_cgroup_v1v2(void) server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); if (!ASSERT_GE(server_fd, 0, "server_fd")) return; - client_fd = connect_to_fd_opts(server_fd, &opts); + client_fd = connect_to_fd_opts(server_fd, SOCK_STREAM, &opts); if (!ASSERT_GE(client_fd, 0, "client_fd")) { close(server_fd); return; diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index ecf89df78109..2570bd4b0cb2 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -18,6 +18,11 @@ static const char * const cpumask_success_testcases[] = { "test_insert_leave", "test_insert_remove_release", "test_global_mask_rcu", + "test_global_mask_array_one_rcu", + "test_global_mask_array_rcu", + "test_global_mask_array_l2_rcu", + "test_global_mask_nested_rcu", + "test_global_mask_nested_deep_rcu", "test_cpumask_weight", }; diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index 3b7c57fe55a5..08b6391f2f56 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -69,15 +69,17 @@ static struct test_case test_cases[] = { { N(SCHED_CLS, struct __sk_buff, tstamp), .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" - "w11 &= 3;" - "if w11 != 0x3 goto pc+2;" + "if w11 & 0x4 goto pc+1;" + "goto pc+4;" + "if w11 & 0x3 goto pc+1;" + "goto pc+2;" "$dst = 0;" "goto pc+1;" "$dst = *(u64 *)($ctx + sk_buff::tstamp);", .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);" - "if w11 & 0x2 goto pc+1;" + "if w11 & 0x4 goto pc+1;" "goto pc+2;" - "w11 &= -2;" + "w11 &= -4;" "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;" "*(u64 *)($ctx + sk_buff::tstamp) = $src;", }, diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index 596536def43d..49b1ffc9af1f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -50,9 +50,9 @@ void serial_test_fexit_stress(void) out: for (i = 0; i < bpf_max_tramp_links; i++) { - if (link_fd[i]) + if (link_fd[i] > 0) close(link_fd[i]); - if (fexit_fd[i]) + if (fexit_fd[i] > 0) close(fexit_fd[i]); } free(fd); diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c index 5165b38f0e59..f7619e0ade10 100644 --- a/tools/testing/selftests/bpf/prog_tests/find_vma.c +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c @@ -29,8 +29,8 @@ static int open_pe(void) /* create perf event */ attr.size = sizeof(attr); - attr.type = PERF_TYPE_HARDWARE; - attr.config = PERF_COUNT_HW_CPU_CYCLES; + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.freq = 1; attr.sample_freq = 1000; pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c index 284764e7179f..4ddb8a5fece8 100644 --- a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c @@ -158,15 +158,13 @@ static int send_frags6(int client) void test_bpf_ip_check_defrag_ok(bool ipv6) { + int family = ipv6 ? AF_INET6 : AF_INET; struct network_helper_opts rx_opts = { .timeout_ms = 1000, - .noconnect = true, }; struct network_helper_opts tx_ops = { .timeout_ms = 1000, - .type = SOCK_RAW, .proto = IPPROTO_RAW, - .noconnect = true, }; struct sockaddr_storage caddr; struct ip_check_defrag *skel; @@ -192,7 +190,7 @@ void test_bpf_ip_check_defrag_ok(bool ipv6) nstoken = open_netns(NS1); if (!ASSERT_OK_PTR(nstoken, "setns ns1")) goto out; - srv_fd = start_server(ipv6 ? AF_INET6 : AF_INET, SOCK_DGRAM, NULL, SERVER_PORT, 0); + srv_fd = start_server(family, SOCK_DGRAM, NULL, SERVER_PORT, 0); close_netns(nstoken); if (!ASSERT_GE(srv_fd, 0, "start_server")) goto out; @@ -201,18 +199,18 @@ void test_bpf_ip_check_defrag_ok(bool ipv6) nstoken = open_netns(NS0); if (!ASSERT_OK_PTR(nstoken, "setns ns0")) goto out; - client_tx_fd = connect_to_fd_opts(srv_fd, &tx_ops); + client_tx_fd = client_socket(family, SOCK_RAW, &tx_ops); close_netns(nstoken); - if (!ASSERT_GE(client_tx_fd, 0, "connect_to_fd_opts")) + if (!ASSERT_GE(client_tx_fd, 0, "client_socket")) goto out; /* Open rx socket in ns0 */ nstoken = open_netns(NS0); if (!ASSERT_OK_PTR(nstoken, "setns ns0")) goto out; - client_rx_fd = connect_to_fd_opts(srv_fd, &rx_opts); + client_rx_fd = client_socket(family, SOCK_DGRAM, &rx_opts); close_netns(nstoken); - if (!ASSERT_GE(client_rx_fd, 0, "connect_to_fd_opts")) + if (!ASSERT_GE(client_rx_fd, 0, "client_socket")) goto out; /* Bind rx socket to a premeditated port */ diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 2eb71559713c..5b743212292f 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -78,6 +78,7 @@ static struct kfunc_test_params kfunc_tests[] = { SYSCALL_TEST(kfunc_syscall_test, 0), SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0), TC_TEST(kfunc_call_test_static_unused_arg, 0), + TC_TEST(kfunc_call_ctx, 0), }; struct syscall_test_args { diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c b/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c new file mode 100644 index 000000000000..c8f4dcaac7c7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (c) 2024 Meta Platforms, Inc */ + +#include <test_progs.h> +#include "test_kfunc_param_nullable.skel.h" + +void test_kfunc_param_nullable(void) +{ + RUN_TESTS(test_kfunc_param_nullable); +} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 2fb89de63bd2..77d07e0a4a55 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -183,6 +183,18 @@ static void test_linked_list_success(int mode, bool leave_in_map) if (!leave_in_map) clear_fields(skel->maps.bss_A); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_nested), &opts); + ASSERT_OK(ret, "global_list_push_pop_nested"); + ASSERT_OK(opts.retval, "global_list_push_pop_nested retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_array_push_pop), &opts); + ASSERT_OK(ret, "global_list_array_push_pop"); + ASSERT_OK(opts.retval, "global_list_array_push_pop retval"); + if (!leave_in_map) + clear_fields(skel->maps.bss_A); + if (mode == PUSH_POP) goto end; diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 274d2e033e39..d2ca32fa3b21 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -89,13 +89,8 @@ static int start_mptcp_server(int family, const char *addr_str, __u16 port, .timeout_ms = timeout_ms, .proto = IPPROTO_MPTCP, }; - struct sockaddr_storage addr; - socklen_t addrlen; - if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) - return -1; - - return start_server_addr(SOCK_STREAM, &addr, addrlen, &opts); + return start_server_str(family, SOCK_STREAM, addr_str, port, &opts); } static int verify_tsk(int map_fd, int client_fd) diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c index e9300c96607d..9818f06c97c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/rbtree.c +++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c @@ -31,6 +31,28 @@ static void test_rbtree_add_nodes(void) rbtree__destroy(skel); } +static void test_rbtree_add_nodes_nested(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct rbtree *skel; + int ret; + + skel = rbtree__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_nodes_nested), &opts); + ASSERT_OK(ret, "rbtree_add_nodes_nested run"); + ASSERT_OK(opts.retval, "rbtree_add_nodes_nested retval"); + ASSERT_EQ(skel->data->less_callback_ran, 1, "rbtree_add_nodes_nested less_callback_ran"); + + rbtree__destroy(skel); +} + static void test_rbtree_add_and_remove(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -53,6 +75,27 @@ static void test_rbtree_add_and_remove(void) rbtree__destroy(skel); } +static void test_rbtree_add_and_remove_array(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct rbtree *skel; + int ret; + + skel = rbtree__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_and_remove_array), &opts); + ASSERT_OK(ret, "rbtree_add_and_remove_array"); + ASSERT_OK(opts.retval, "rbtree_add_and_remove_array retval"); + + rbtree__destroy(skel); +} + static void test_rbtree_first_and_remove(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -104,8 +147,12 @@ void test_rbtree_success(void) { if (test__start_subtest("rbtree_add_nodes")) test_rbtree_add_nodes(); + if (test__start_subtest("rbtree_add_nodes_nested")) + test_rbtree_add_nodes_nested(); if (test__start_subtest("rbtree_add_and_remove")) test_rbtree_add_and_remove(); + if (test__start_subtest("rbtree_add_and_remove_array")) + test_rbtree_add_and_remove_array(); if (test__start_subtest("rbtree_first_and_remove")) test_rbtree_first_and_remove(); if (test__start_subtest("rbtree_api_release_aliasing")) diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 4c6f42dae409..da430df45aa4 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -12,9 +12,11 @@ #include <sys/sysinfo.h> #include <linux/perf_event.h> #include <linux/ring_buffer.h> + #include "test_ringbuf.lskel.h" #include "test_ringbuf_n.lskel.h" #include "test_ringbuf_map_key.lskel.h" +#include "test_ringbuf_write.lskel.h" #define EDONE 7777 @@ -84,6 +86,58 @@ static void *poll_thread(void *input) return (void *)(long)ring_buffer__poll(ringbuf, timeout); } +static void ringbuf_write_subtest(void) +{ + struct test_ringbuf_write_lskel *skel; + int page_size = getpagesize(); + size_t *mmap_ptr; + int err, rb_fd; + + skel = test_ringbuf_write_lskel__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->maps.ringbuf.max_entries = 0x4000; + + err = test_ringbuf_write_lskel__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + rb_fd = skel->maps.ringbuf.map_fd; + + mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); + if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) + goto cleanup; + *mmap_ptr = 0x3000; + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); + + skel->bss->pid = getpid(); + + ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new")) + goto cleanup; + + err = test_ringbuf_write_lskel__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup_ringbuf; + + skel->bss->discarded = 0; + skel->bss->passed = 0; + + /* trigger exactly two samples */ + syscall(__NR_getpgid); + syscall(__NR_getpgid); + + ASSERT_EQ(skel->bss->discarded, 2, "discarded"); + ASSERT_EQ(skel->bss->passed, 0, "passed"); + + test_ringbuf_write_lskel__detach(skel); +cleanup_ringbuf: + ring_buffer__free(ringbuf); +cleanup: + test_ringbuf_write_lskel__destroy(skel); +} + static void ringbuf_subtest(void) { const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); @@ -451,4 +505,6 @@ void test_ringbuf(void) ringbuf_n_subtest(); if (test__start_subtest("ringbuf_map_key")) ringbuf_map_key_subtest(); + if (test__start_subtest("ringbuf_write")) + ringbuf_write_subtest(); } diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 920aee41bd58..6cc69900b310 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -156,7 +156,8 @@ static void test_send_signal_tracepoint(bool signal_thread) static void test_send_signal_perf(bool signal_thread) { struct perf_event_attr attr = { - .sample_period = 1, + .freq = 1, + .sample_freq = 1000, .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, }; diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 597d0467a926..ae87c00867ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -77,6 +77,12 @@ struct test { bool reuseport_has_conns; /* Add a connected socket to reuseport group */ }; +struct cb_opts { + int family; + int sotype; + bool reuseport; +}; + static __u32 duration; /* for CHECK macro */ static bool is_ipv6(const char *ip) @@ -142,19 +148,14 @@ static int make_socket(int sotype, const char *ip, int port, return fd; } -static int make_server(int sotype, const char *ip, int port, - struct bpf_program *reuseport_prog) +static int setsockopts(int fd, void *opts) { - struct sockaddr_storage addr = {0}; + struct cb_opts *co = (struct cb_opts *)opts; const int one = 1; - int err, fd = -1; - - fd = make_socket(sotype, ip, port, &addr); - if (fd < 0) - return -1; + int err = 0; /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */ - if (sotype == SOCK_DGRAM) { + if (co->sotype == SOCK_DGRAM) { err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one, sizeof(one)); if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) { @@ -163,7 +164,7 @@ static int make_server(int sotype, const char *ip, int port, } } - if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) { + if (co->sotype == SOCK_DGRAM && co->family == AF_INET6) { err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one, sizeof(one)); if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) { @@ -172,7 +173,7 @@ static int make_server(int sotype, const char *ip, int port, } } - if (sotype == SOCK_STREAM) { + if (co->sotype == SOCK_STREAM) { err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) { @@ -181,7 +182,7 @@ static int make_server(int sotype, const char *ip, int port, } } - if (reuseport_prog) { + if (co->reuseport) { err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) { @@ -190,19 +191,28 @@ static int make_server(int sotype, const char *ip, int port, } } - err = bind(fd, (void *)&addr, inetaddr_len(&addr)); - if (CHECK(err, "bind", "failed\n")) { - log_err("failed to bind listen socket"); - goto fail; - } +fail: + return err; +} - if (sotype == SOCK_STREAM) { - err = listen(fd, SOMAXCONN); - if (CHECK(err, "make_server", "listen")) { - log_err("failed to listen on port %d", port); - goto fail; - } - } +static int make_server(int sotype, const char *ip, int port, + struct bpf_program *reuseport_prog) +{ + struct cb_opts cb_opts = { + .family = is_ipv6(ip) ? AF_INET6 : AF_INET, + .sotype = sotype, + .reuseport = reuseport_prog, + }; + struct network_helper_opts opts = { + .backlog = SOMAXCONN, + .post_socket_cb = setsockopts, + .cb_opts = &cb_opts, + }; + int err, fd; + + fd = start_server_str(cb_opts.family, sotype, ip, port, &opts); + if (!ASSERT_OK_FD(fd, "start_server_str")) + return -1; /* Late attach reuseport prog so we can have one init path */ if (reuseport_prog) { @@ -406,18 +416,12 @@ static int udp_recv_send(int server_fd) } /* Reply from original destination address. */ - fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0); - if (CHECK(fd < 0, "socket", "failed\n")) { + fd = start_server_addr(SOCK_DGRAM, dst_addr, sizeof(*dst_addr), NULL); + if (!ASSERT_OK_FD(fd, "start_server_addr")) { log_err("failed to create tx socket"); return -1; } - ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr)); - if (CHECK(ret, "bind", "failed\n")) { - log_err("failed to bind tx socket"); - goto out; - } - msg.msg_control = NULL; msg.msg_controllen = 0; n = sendmsg(fd, &msg, 0); @@ -629,9 +633,6 @@ static void run_lookup_prog(const struct test *t) * BPF socket lookup. */ if (t->reuseport_has_conns) { - struct sockaddr_storage addr = {}; - socklen_t len = sizeof(addr); - /* Add an extra socket to reuseport group */ reuse_conn_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port, @@ -639,12 +640,9 @@ static void run_lookup_prog(const struct test *t) if (reuse_conn_fd < 0) goto close; - /* Connect the extra socket to itself */ - err = getsockname(reuse_conn_fd, (void *)&addr, &len); - if (CHECK(err, "getsockname", "errno %d\n", errno)) - goto close; - err = connect(reuse_conn_fd, (void *)&addr, len); - if (CHECK(err, "connect", "errno %d\n", errno)) + /* Connect the extra socket to itself */ + err = connect_fd_to_fd(reuse_conn_fd, reuse_conn_fd, 0); + if (!ASSERT_OK(err, "connect_fd_to_fd")) goto close; } @@ -994,7 +992,7 @@ static void drop_on_reuseport(const struct test *t) err = update_lookup_map(t->sock_map, SERVER_A, server1); if (err) - goto detach; + goto close_srv1; /* second server on destination address we should never reach */ server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port, diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 1d3a20f01b60..7cd8be2780ca 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -70,7 +70,7 @@ static void *server_thread(void *arg) return (void *)(long)err; } -static int custom_cb(int fd, const struct post_socket_opts *opts) +static int custom_cb(int fd, void *opts) { char buf; int err; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c index bc9841144685..1af9ec1149aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_links.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -9,6 +9,8 @@ #define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null" #include "test_tc_link.skel.h" + +#include "netlink_helpers.h" #include "tc_helpers.h" void serial_test_tc_links_basic(void) @@ -1787,6 +1789,65 @@ void serial_test_tc_links_ingress(void) test_tc_links_ingress(BPF_TCX_INGRESS, false, false); } +struct qdisc_req { + struct nlmsghdr n; + struct tcmsg t; + char buf[1024]; +}; + +static int qdisc_replace(int ifindex, const char *kind, bool block) +{ + struct rtnl_handle rth = { .fd = -1 }; + struct qdisc_req req; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST; + req.n.nlmsg_type = RTM_NEWQDISC; + req.t.tcm_family = AF_UNSPEC; + req.t.tcm_ifindex = ifindex; + req.t.tcm_parent = 0xfffffff1; + + addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1); + if (block) + addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + return err; +} + +void serial_test_tc_links_dev_chain0(void) +{ + int err, ifindex; + + ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth"); + ifindex = if_nametoindex("foo"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + err = qdisc_replace(ifindex, "ingress", true); + if (!ASSERT_OK(err, "attaching ingress")) + goto cleanup; + ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block"); + err = qdisc_replace(ifindex, "clsact", false); + if (!ASSERT_OK(err, "attaching clsact")) + goto cleanup; + /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s + * triggered the issue without the fix reliably 100% of the time. + */ + sleep(5); + ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter"); +cleanup: + ASSERT_OK(system("ip link del dev foo"), "del veth"); + ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed"); + ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed"); +} + static void test_tc_links_dev_mixed(int target) { LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index b1073d36d77a..327d51f59142 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -890,9 +890,6 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); - /* non mono delivery time is not forwarded */ - ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, - dtime_cnt_str(t, INGRESS_FWDNS_P101)); for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c index ae93411fd582..09ca13bdf6ca 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c +++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c @@ -11,6 +11,7 @@ static int sanity_run(struct bpf_program *prog) .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), .repeat = 1, + .flags = BPF_F_TEST_SKB_CHECKSUM_COMPLETE, ); prog_fd = bpf_program__fd(prog); diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c index 29e183a80f49..bbcf12696a6b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c @@ -3,9 +3,12 @@ #include <test_progs.h> #include <time.h> +#include <sys/epoll.h> + #include "struct_ops_module.skel.h" #include "struct_ops_nulled_out_cb.skel.h" #include "struct_ops_forgotten_cb.skel.h" +#include "struct_ops_detach.skel.h" static void check_map_info(struct bpf_map_info *info) { @@ -242,6 +245,58 @@ cleanup: struct_ops_forgotten_cb__destroy(skel); } +/* Detach a link from a user space program */ +static void test_detach_link(void) +{ + struct epoll_event ev, events[2]; + struct struct_ops_detach *skel; + struct bpf_link *link = NULL; + int fd, epollfd = -1, nfds; + int err; + + skel = struct_ops_detach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "struct_ops_detach__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_do_detach); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + fd = bpf_link__fd(link); + if (!ASSERT_GE(fd, 0, "link_fd")) + goto cleanup; + + epollfd = epoll_create1(0); + if (!ASSERT_GE(epollfd, 0, "epoll_create1")) + goto cleanup; + + ev.events = EPOLLHUP; + ev.data.fd = fd; + err = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev); + if (!ASSERT_OK(err, "epoll_ctl")) + goto cleanup; + + err = bpf_link__detach(link); + if (!ASSERT_OK(err, "detach_link")) + goto cleanup; + + /* Wait for EPOLLHUP */ + nfds = epoll_wait(epollfd, events, 2, 500); + if (!ASSERT_EQ(nfds, 1, "epoll_wait")) + goto cleanup; + + if (!ASSERT_EQ(events[0].data.fd, fd, "epoll_wait_fd")) + goto cleanup; + if (!ASSERT_TRUE(events[0].events & EPOLLHUP, "events[0].events")) + goto cleanup; + +cleanup: + if (epollfd >= 0) + close(epollfd); + bpf_link__destroy(link); + struct_ops_detach__destroy(skel); +} + void serial_test_struct_ops_module(void) { if (test__start_subtest("struct_ops_load")) @@ -254,5 +309,7 @@ void serial_test_struct_ops_module(void) test_struct_ops_nulled_out_cb(); if (test__start_subtest("struct_ops_forgotten_cb")) test_struct_ops_forgotten_cb(); + if (test__start_subtest("test_detach_link")) + test_detach_link(); } diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c new file mode 100644 index 000000000000..871d16cb95cf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <sched.h> +#include <test_progs.h> +#include <pthread.h> +#include <network_helpers.h> + +#include "timer_lockup.skel.h" + +static long cpu; +static int *timer1_err; +static int *timer2_err; +static bool skip; + +volatile int k = 0; + +static void *timer_lockup_thread(void *arg) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1000, + ); + int i, prog_fd = *(int *)arg; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), + &cpuset), + "cpu affinity"); + + for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) { + bpf_prog_test_run_opts(prog_fd, &opts); + /* Skip the test if we can't reproduce the race in a reasonable + * amount of time. + */ + if (i > 50) { + WRITE_ONCE(skip, true); + break; + } + } + + return NULL; +} + +void test_timer_lockup(void) +{ + int timer1_prog, timer2_prog; + struct timer_lockup *skel; + pthread_t thrds[2]; + void *ret; + + skel = timer_lockup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) + return; + + timer1_prog = bpf_program__fd(skel->progs.timer1_prog); + timer2_prog = bpf_program__fd(skel->progs.timer2_prog); + + timer1_err = &skel->bss->timer1_err; + timer2_err = &skel->bss->timer2_err; + + if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread, + &timer1_prog), + "pthread_create thread1")) + goto out; + if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread, + &timer2_prog), + "pthread_create thread2")) { + pthread_exit(&thrds[0]); + goto out; + } + + pthread_join(thrds[1], &ret); + pthread_join(thrds[0], &ret); + + if (skip) { + test__skip(); + goto out; + } + + if (*timer1_err != -EDEADLK && *timer1_err != 0) + ASSERT_FAIL("timer1_err bad value"); + if (*timer2_err != -EDEADLK && *timer2_err != 0) + ASSERT_FAIL("timer2_err bad value"); +out: + timer_lockup__destroy(skel); + return; +} diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c index fe0fb0c9849a..19e68d4b3532 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c @@ -3,8 +3,9 @@ #include <test_progs.h> #include "tracing_struct.skel.h" +#include "tracing_struct_many_args.skel.h" -static void test_fentry(void) +static void test_struct_args(void) { struct tracing_struct *skel; int err; @@ -55,6 +56,25 @@ static void test_fentry(void) ASSERT_EQ(skel->bss->t6, 1, "t6 ret"); +destroy_skel: + tracing_struct__destroy(skel); +} + +static void test_struct_many_args(void) +{ + struct tracing_struct_many_args *skel; + int err; + + skel = tracing_struct_many_args__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tracing_struct_many_args__open_and_load")) + return; + + err = tracing_struct_many_args__attach(skel); + if (!ASSERT_OK(err, "tracing_struct_many_args__attach")) + goto destroy_skel; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + ASSERT_EQ(skel->bss->t7_a, 16, "t7:a"); ASSERT_EQ(skel->bss->t7_b, 17, "t7:b"); ASSERT_EQ(skel->bss->t7_c, 18, "t7:c"); @@ -74,12 +94,28 @@ static void test_fentry(void) ASSERT_EQ(skel->bss->t8_g, 23, "t8:g"); ASSERT_EQ(skel->bss->t8_ret, 156, "t8 ret"); - tracing_struct__detach(skel); + ASSERT_EQ(skel->bss->t9_a, 16, "t9:a"); + ASSERT_EQ(skel->bss->t9_b, 17, "t9:b"); + ASSERT_EQ(skel->bss->t9_c, 18, "t9:c"); + ASSERT_EQ(skel->bss->t9_d, 19, "t9:d"); + ASSERT_EQ(skel->bss->t9_e, 20, "t9:e"); + ASSERT_EQ(skel->bss->t9_f, 21, "t9:f"); + ASSERT_EQ(skel->bss->t9_g, 22, "t9:f"); + ASSERT_EQ(skel->bss->t9_h_a, 23, "t9:h.a"); + ASSERT_EQ(skel->bss->t9_h_b, 24, "t9:h.b"); + ASSERT_EQ(skel->bss->t9_h_c, 25, "t9:h.c"); + ASSERT_EQ(skel->bss->t9_h_d, 26, "t9:h.d"); + ASSERT_EQ(skel->bss->t9_i, 27, "t9:i"); + ASSERT_EQ(skel->bss->t9_ret, 258, "t9 ret"); + destroy_skel: - tracing_struct__destroy(skel); + tracing_struct_many_args__destroy(skel); } void test_tracing_struct(void) { - test_fentry(); + if (test__start_subtest("struct_args")) + test_struct_args(); + if (test__start_subtest("struct_many_args")) + test_struct_many_args(); } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 1c9c4ec1be11..9dc3687bc406 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -53,6 +53,7 @@ #include "verifier_movsx.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" +#include "verifier_or_jmp32_k.skel.h" #include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" #include "verifier_raw_stack.skel.h" @@ -86,6 +87,7 @@ #include "verifier_xadd.skel.h" #include "verifier_xdp.skel.h" #include "verifier_xdp_direct_packet_access.skel.h" +#include "verifier_bits_iter.skel.h" #define MAX_ENTRIES 11 @@ -170,6 +172,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } +void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); } void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } @@ -202,6 +205,7 @@ void test_verifier_var_off(void) { RUN(verifier_var_off); } void test_verifier_xadd(void) { RUN(verifier_xadd); } void test_verifier_xdp(void) { RUN(verifier_xdp); } void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); } +void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } static int init_test_val_map(struct bpf_object *obj, char *map_name) { diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index f09505f8b038..53d6ad8c2257 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -222,7 +222,7 @@ static void test_xdp_adjust_frags_tail_grow(void) prog = bpf_object__next_program(obj, NULL); if (bpf_object__load(obj)) - return; + goto out; prog_fd = bpf_program__fd(prog); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c new file mode 100644 index 000000000000..e1bf141d3401 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include <bpf/btf.h> +#include <linux/if_link.h> +#include <linux/udp.h> +#include <net/if.h> +#include <unistd.h> + +#include "xdp_flowtable.skel.h" + +#define TX_NETNS_NAME "ns0" +#define RX_NETNS_NAME "ns1" + +#define TX_NAME "v0" +#define FORWARD_NAME "v1" +#define RX_NAME "d0" + +#define TX_MAC "00:00:00:00:00:01" +#define FORWARD_MAC "00:00:00:00:00:02" +#define RX_MAC "00:00:00:00:00:03" +#define DST_MAC "00:00:00:00:00:04" + +#define TX_ADDR "10.0.0.1" +#define FORWARD_ADDR "10.0.0.2" +#define RX_ADDR "20.0.0.1" +#define DST_ADDR "20.0.0.2" + +#define PREFIX_LEN "8" +#define N_PACKETS 10 +#define UDP_PORT 12345 +#define UDP_PORT_STR "12345" + +static int send_udp_traffic(void) +{ + struct sockaddr_storage addr; + int i, sock; + + if (make_sockaddr(AF_INET, DST_ADDR, UDP_PORT, &addr, NULL)) + return -EINVAL; + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) + return sock; + + for (i = 0; i < N_PACKETS; i++) { + unsigned char buf[] = { 0xaa, 0xbb, 0xcc }; + int n; + + n = sendto(sock, buf, sizeof(buf), MSG_NOSIGNAL | MSG_CONFIRM, + (struct sockaddr *)&addr, sizeof(addr)); + if (n != sizeof(buf)) { + close(sock); + return -EINVAL; + } + + usleep(50000); /* 50ms */ + } + close(sock); + + return 0; +} + +void test_xdp_flowtable(void) +{ + struct xdp_flowtable *skel = NULL; + struct nstoken *tok = NULL; + int iifindex, stats_fd; + __u32 value, key = 0; + struct bpf_link *link; + + if (SYS_NOFAIL("nft -v")) { + fprintf(stdout, "Missing required nft tool\n"); + test__skip(); + return; + } + + SYS(out, "ip netns add " TX_NETNS_NAME); + SYS(out, "ip netns add " RX_NETNS_NAME); + + tok = open_netns(RX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; + + SYS(out, "sysctl -qw net.ipv4.conf.all.forwarding=1"); + + SYS(out, "ip link add " TX_NAME " type veth peer " FORWARD_NAME); + SYS(out, "ip link set " TX_NAME " netns " TX_NETNS_NAME); + SYS(out, "ip link set dev " FORWARD_NAME " address " FORWARD_MAC); + SYS(out, + "ip addr add " FORWARD_ADDR "/" PREFIX_LEN " dev " FORWARD_NAME); + SYS(out, "ip link set dev " FORWARD_NAME " up"); + + SYS(out, "ip link add " RX_NAME " type dummy"); + SYS(out, "ip link set dev " RX_NAME " address " RX_MAC); + SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); + SYS(out, "ip link set dev " RX_NAME " up"); + + /* configure the flowtable */ + SYS(out, "nft add table ip filter"); + SYS(out, + "nft add flowtable ip filter f { hook ingress priority 0\\; " + "devices = { " FORWARD_NAME ", " RX_NAME " }\\; }"); + SYS(out, + "nft add chain ip filter forward " + "{ type filter hook forward priority 0\\; }"); + SYS(out, + "nft add rule ip filter forward ip protocol udp th dport " + UDP_PORT_STR " flow add @f"); + + /* Avoid ARP calls */ + SYS(out, + "ip -4 neigh add " DST_ADDR " lladdr " DST_MAC " dev " RX_NAME); + + close_netns(tok); + tok = open_netns(TX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; + + SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); + SYS(out, "ip link set dev " TX_NAME " address " TX_MAC); + SYS(out, "ip link set dev " TX_NAME " up"); + SYS(out, "ip route add default via " FORWARD_ADDR); + + close_netns(tok); + tok = open_netns(RX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; + + iifindex = if_nametoindex(FORWARD_NAME); + if (!ASSERT_NEQ(iifindex, 0, "iifindex")) + goto out; + + skel = xdp_flowtable__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + goto out; + + link = bpf_program__attach_xdp(skel->progs.xdp_flowtable_do_lookup, + iifindex); + if (!ASSERT_OK_PTR(link, "prog_attach")) + goto out; + + close_netns(tok); + tok = open_netns(TX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; + + if (!ASSERT_OK(send_udp_traffic(), "send udp")) + goto out; + + close_netns(tok); + tok = open_netns(RX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; + + stats_fd = bpf_map__fd(skel->maps.stats); + if (!ASSERT_OK(bpf_map_lookup_elem(stats_fd, &key, &value), + "bpf_map_update_elem stats")) + goto out; + + ASSERT_GE(value, N_PACKETS - 2, "bpf_xdp_flow_lookup failed"); +out: + xdp_flowtable__destroy(skel); + if (tok) + close_netns(tok); + SYS_NOFAIL("ip netns del " TX_NETNS_NAME); + SYS_NOFAIL("ip netns del " RX_NETNS_NAME); +} diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c index 55f10563208d..bb0acd79d28a 100644 --- a/tools/testing/selftests/bpf/progs/arena_atomics.c +++ b/tools/testing/selftests/bpf/progs/arena_atomics.c @@ -25,20 +25,13 @@ bool skip_tests = true; __u32 pid = 0; -#undef __arena -#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) -#define __arena __attribute__((address_space(1))) -#else -#define __arena SEC(".addr_space.1") -#endif - -__u64 __arena add64_value = 1; -__u64 __arena add64_result = 0; -__u32 __arena add32_value = 1; -__u32 __arena add32_result = 0; -__u64 __arena add_stack_value_copy = 0; -__u64 __arena add_stack_result = 0; -__u64 __arena add_noreturn_value = 1; +__u64 __arena_global add64_value = 1; +__u64 __arena_global add64_result = 0; +__u32 __arena_global add32_value = 1; +__u32 __arena_global add32_result = 0; +__u64 __arena_global add_stack_value_copy = 0; +__u64 __arena_global add_stack_result = 0; +__u64 __arena_global add_noreturn_value = 1; SEC("raw_tp/sys_enter") int add(const void *ctx) @@ -58,13 +51,13 @@ int add(const void *ctx) return 0; } -__s64 __arena sub64_value = 1; -__s64 __arena sub64_result = 0; -__s32 __arena sub32_value = 1; -__s32 __arena sub32_result = 0; -__s64 __arena sub_stack_value_copy = 0; -__s64 __arena sub_stack_result = 0; -__s64 __arena sub_noreturn_value = 1; +__s64 __arena_global sub64_value = 1; +__s64 __arena_global sub64_result = 0; +__s32 __arena_global sub32_value = 1; +__s32 __arena_global sub32_result = 0; +__s64 __arena_global sub_stack_value_copy = 0; +__s64 __arena_global sub_stack_result = 0; +__s64 __arena_global sub_noreturn_value = 1; SEC("raw_tp/sys_enter") int sub(const void *ctx) @@ -84,8 +77,8 @@ int sub(const void *ctx) return 0; } -__u64 __arena and64_value = (0x110ull << 32); -__u32 __arena and32_value = 0x110; +__u64 __arena_global and64_value = (0x110ull << 32); +__u32 __arena_global and32_value = 0x110; SEC("raw_tp/sys_enter") int and(const void *ctx) @@ -101,8 +94,8 @@ int and(const void *ctx) return 0; } -__u32 __arena or32_value = 0x110; -__u64 __arena or64_value = (0x110ull << 32); +__u32 __arena_global or32_value = 0x110; +__u64 __arena_global or64_value = (0x110ull << 32); SEC("raw_tp/sys_enter") int or(const void *ctx) @@ -117,8 +110,8 @@ int or(const void *ctx) return 0; } -__u64 __arena xor64_value = (0x110ull << 32); -__u32 __arena xor32_value = 0x110; +__u64 __arena_global xor64_value = (0x110ull << 32); +__u32 __arena_global xor32_value = 0x110; SEC("raw_tp/sys_enter") int xor(const void *ctx) @@ -133,12 +126,12 @@ int xor(const void *ctx) return 0; } -__u32 __arena cmpxchg32_value = 1; -__u32 __arena cmpxchg32_result_fail = 0; -__u32 __arena cmpxchg32_result_succeed = 0; -__u64 __arena cmpxchg64_value = 1; -__u64 __arena cmpxchg64_result_fail = 0; -__u64 __arena cmpxchg64_result_succeed = 0; +__u32 __arena_global cmpxchg32_value = 1; +__u32 __arena_global cmpxchg32_result_fail = 0; +__u32 __arena_global cmpxchg32_result_succeed = 0; +__u64 __arena_global cmpxchg64_value = 1; +__u64 __arena_global cmpxchg64_result_fail = 0; +__u64 __arena_global cmpxchg64_result_succeed = 0; SEC("raw_tp/sys_enter") int cmpxchg(const void *ctx) @@ -156,10 +149,10 @@ int cmpxchg(const void *ctx) return 0; } -__u64 __arena xchg64_value = 1; -__u64 __arena xchg64_result = 0; -__u32 __arena xchg32_value = 1; -__u32 __arena xchg32_result = 0; +__u64 __arena_global xchg64_value = 1; +__u64 __arena_global xchg64_result = 0; +__u32 __arena_global xchg32_value = 1; +__u32 __arena_global xchg32_result = 0; SEC("raw_tp/sys_enter") int xchg(const void *ctx) @@ -176,3 +169,79 @@ int xchg(const void *ctx) return 0; } + +__u64 __arena_global uaf_sink; +volatile __u64 __arena_global uaf_recovery_fails; + +SEC("syscall") +int uaf(const void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; +#if defined(ENABLE_ATOMICS_TESTS) && !defined(__TARGET_ARCH_arm64) && \ + !defined(__TARGET_ARCH_x86) + __u32 __arena *page32; + __u64 __arena *page64; + void __arena *page; + + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + bpf_arena_free_pages(&arena, page, 1); + uaf_recovery_fails = 24; + + page32 = (__u32 __arena *)page; + uaf_sink += __sync_fetch_and_add(page32, 1); + uaf_recovery_fails -= 1; + __sync_add_and_fetch(page32, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_sub(page32, 1); + uaf_recovery_fails -= 1; + __sync_sub_and_fetch(page32, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_and(page32, 1); + uaf_recovery_fails -= 1; + __sync_and_and_fetch(page32, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_or(page32, 1); + uaf_recovery_fails -= 1; + __sync_or_and_fetch(page32, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_xor(page32, 1); + uaf_recovery_fails -= 1; + __sync_xor_and_fetch(page32, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_val_compare_and_swap(page32, 0, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_lock_test_and_set(page32, 1); + uaf_recovery_fails -= 1; + + page64 = (__u64 __arena *)page; + uaf_sink += __sync_fetch_and_add(page64, 1); + uaf_recovery_fails -= 1; + __sync_add_and_fetch(page64, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_sub(page64, 1); + uaf_recovery_fails -= 1; + __sync_sub_and_fetch(page64, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_and(page64, 1); + uaf_recovery_fails -= 1; + __sync_and_and_fetch(page64, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_or(page64, 1); + uaf_recovery_fails -= 1; + __sync_or_and_fetch(page64, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_fetch_and_xor(page64, 1); + uaf_recovery_fails -= 1; + __sync_xor_and_fetch(page64, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_val_compare_and_swap(page64, 0, 1); + uaf_recovery_fails -= 1; + uaf_sink += __sync_lock_test_and_set(page64, 1); + uaf_recovery_fails -= 1; +#endif + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c index 1e6ac187a6a0..81eaa94afeb0 100644 --- a/tools/testing/selftests/bpf/progs/arena_htab.c +++ b/tools/testing/selftests/bpf/progs/arena_htab.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -18,25 +19,35 @@ void __arena *htab_for_user; bool skip = false; int zero = 0; +char __arena arr1[100000]; +char arr2[1000]; SEC("syscall") int arena_htab_llvm(void *ctx) { #if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM) struct htab __arena *htab; + char __arena *arr = arr1; __u64 i; htab = bpf_alloc(sizeof(*htab)); cast_kern(htab); htab_init(htab); + cast_kern(arr); + /* first run. No old elems in the table */ - for (i = zero; i < 1000; i++) + for (i = zero; i < 100000 && can_loop; i++) { htab_update_elem(htab, i, i); + arr[i] = i; + } - /* should replace all elems with new ones */ - for (i = zero; i < 1000; i++) + /* should replace some elems with new ones */ + for (i = zero; i < 1000 && can_loop; i++) { htab_update_elem(htab, i, i); + /* Access mem to make the verifier use bounded loop logic */ + arr2[i] = i; + } cast_user(htab); htab_for_user = htab; #else diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c index 93bd0600eba0..3a2ddcacbea6 100644 --- a/tools/testing/selftests/bpf/progs/arena_list.c +++ b/tools/testing/selftests/bpf/progs/arena_list.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 3c9ffe340312..02f552e7fd4d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -65,7 +65,7 @@ static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca) } SEC("struct_ops") -void BPF_PROG(dctcp_init, struct sock *sk) +void BPF_PROG(bpf_dctcp_init, struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct bpf_dctcp *ca = inet_csk_ca(sk); @@ -77,7 +77,7 @@ void BPF_PROG(dctcp_init, struct sock *sk) (void *)fallback, sizeof(fallback)) == -EBUSY) ebusy_cnt++; - /* Switch back to myself and the recurred dctcp_init() + /* Switch back to myself and the recurred bpf_dctcp_init() * will get -EBUSY for all bpf_setsockopt(TCP_CONGESTION), * except the last "cdg" one. */ @@ -112,7 +112,7 @@ void BPF_PROG(dctcp_init, struct sock *sk) } SEC("struct_ops") -__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) +__u32 BPF_PROG(bpf_dctcp_ssthresh, struct sock *sk) { struct bpf_dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -122,7 +122,7 @@ __u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) } SEC("struct_ops") -void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) +void BPF_PROG(bpf_dctcp_update_alpha, struct sock *sk, __u32 flags) { const struct tcp_sock *tp = tcp_sk(sk); struct bpf_dctcp *ca = inet_csk_ca(sk); @@ -161,12 +161,12 @@ static void dctcp_react_to_loss(struct sock *sk) } SEC("struct_ops") -void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) +void BPF_PROG(bpf_dctcp_state, struct sock *sk, __u8 new_state) { if (new_state == TCP_CA_Recovery && new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state)) dctcp_react_to_loss(sk); - /* We handle RTO in dctcp_cwnd_event to ensure that we perform only + /* We handle RTO in bpf_dctcp_cwnd_event to ensure that we perform only * one loss-adjustment per RTT. */ } @@ -208,7 +208,7 @@ static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, } SEC("struct_ops") -void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) +void BPF_PROG(bpf_dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) { struct bpf_dctcp *ca = inet_csk_ca(sk); @@ -227,7 +227,7 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) } SEC("struct_ops") -__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) +__u32 BPF_PROG(bpf_dctcp_cwnd_undo, struct sock *sk) { const struct bpf_dctcp *ca = inet_csk_ca(sk); @@ -237,28 +237,28 @@ __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; SEC("struct_ops") -void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) +void BPF_PROG(bpf_dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) { tcp_reno_cong_avoid(sk, ack, acked); } SEC(".struct_ops") struct tcp_congestion_ops dctcp_nouse = { - .init = (void *)dctcp_init, - .set_state = (void *)dctcp_state, + .init = (void *)bpf_dctcp_init, + .set_state = (void *)bpf_dctcp_state, .flags = TCP_CONG_NEEDS_ECN, .name = "bpf_dctcp_nouse", }; SEC(".struct_ops") struct tcp_congestion_ops dctcp = { - .init = (void *)dctcp_init, - .in_ack_event = (void *)dctcp_update_alpha, - .cwnd_event = (void *)dctcp_cwnd_event, - .ssthresh = (void *)dctcp_ssthresh, - .cong_avoid = (void *)dctcp_cong_avoid, - .undo_cwnd = (void *)dctcp_cwnd_undo, - .set_state = (void *)dctcp_state, + .init = (void *)bpf_dctcp_init, + .in_ack_event = (void *)bpf_dctcp_update_alpha, + .cwnd_event = (void *)bpf_dctcp_cwnd_event, + .ssthresh = (void *)bpf_dctcp_ssthresh, + .cong_avoid = (void *)bpf_dctcp_cong_avoid, + .undo_cwnd = (void *)bpf_dctcp_cwnd_undo, + .set_state = (void *)bpf_dctcp_state, .flags = TCP_CONG_NEEDS_ECN, .name = "bpf_dctcp", }; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c index c5969ca6f26b..564835ba7d51 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -6,12 +6,6 @@ char _license[] SEC("license") = "GPL"; -struct key_t { - int a; - int b; - int c; -}; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 3); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c index 85fa710fad90..9f0e0705b2bf 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c @@ -6,12 +6,6 @@ char _license[] SEC("license") = "GPL"; -struct key_t { - int a; - int b; - int c; -}; - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 3); diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index fb2f5513e29e..81097a3f15eb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -7,9 +7,9 @@ * * The test_loader sequentially loads each program in a skeleton. * Programs could be loaded in privileged and unprivileged modes. - * - __success, __failure, __msg imply privileged mode; - * - __success_unpriv, __failure_unpriv, __msg_unpriv imply - * unprivileged mode. + * - __success, __failure, __msg, __regex imply privileged mode; + * - __success_unpriv, __failure_unpriv, __msg_unpriv, __regex_unpriv + * imply unprivileged mode. * If combination of privileged and unprivileged attributes is present * both modes are used. If none are present privileged mode is implied. * @@ -24,6 +24,9 @@ * Multiple __msg attributes could be specified. * __msg_unpriv Same as __msg but for unprivileged mode. * + * __regex Same as __msg, but using a regular expression. + * __regex_unpriv Same as __msg_unpriv but using a regular expression. + * * __success Expect program load success in privileged mode. * __success_unpriv Expect program load success in unprivileged mode. * @@ -59,10 +62,12 @@ * __auxiliary_unpriv Same, but load program in unprivileged mode. */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg))) +#define __regex(regex) __attribute__((btf_decl_tag("comment:test_expect_regex=" regex))) #define __failure __attribute__((btf_decl_tag("comment:test_expect_failure"))) #define __success __attribute__((btf_decl_tag("comment:test_expect_success"))) #define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc))) #define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" msg))) +#define __regex_unpriv(regex) __attribute__((btf_decl_tag("comment:test_expect_regex_unpriv=" regex))) #define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv"))) #define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) @@ -135,4 +140,8 @@ /* make it look to compiler like value is read and written */ #define __sink(expr) asm volatile("" : "+g"(expr)) +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + #endif diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 7a1e64c6c065..fd8106831c32 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -12,6 +12,31 @@ char _license[] SEC("license") = "GPL"; int pid, nr_cpus; +struct kptr_nested { + struct bpf_cpumask __kptr * mask; +}; + +struct kptr_nested_pair { + struct bpf_cpumask __kptr * mask_1; + struct bpf_cpumask __kptr * mask_2; +}; + +struct kptr_nested_mid { + int dummy; + struct kptr_nested m; +}; + +struct kptr_nested_deep { + struct kptr_nested_mid ptrs[2]; + struct kptr_nested_pair ptr_pairs[3]; +}; + +private(MASK) static struct bpf_cpumask __kptr * global_mask_array[2]; +private(MASK) static struct bpf_cpumask __kptr * global_mask_array_l2[2][1]; +private(MASK) static struct bpf_cpumask __kptr * global_mask_array_one[1]; +private(MASK) static struct kptr_nested global_mask_nested[2]; +private(MASK_DEEP) static struct kptr_nested_deep global_mask_nested_deep; + static bool is_test_task(void) { int cur_pid = bpf_get_current_pid_tgid() >> 32; @@ -461,6 +486,152 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) } SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_array_one_rcu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + if (!is_test_task()) + return 0; + + /* Kptr arrays with one element are special cased, being treated + * just like a single pointer. + */ + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask_array_one[0], local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + bpf_rcu_read_lock(); + local = global_mask_array_one[0]; + if (!local) { + err = 4; + bpf_rcu_read_unlock(); + return 0; + } + + bpf_rcu_read_unlock(); + + return 0; +} + +static int _global_mask_array_rcu(struct bpf_cpumask **mask0, + struct bpf_cpumask **mask1) +{ + struct bpf_cpumask *local; + + if (!is_test_task()) + return 0; + + /* Check if two kptrs in the array work and independently */ + + local = create_cpumask(); + if (!local) + return 0; + + bpf_rcu_read_lock(); + + local = bpf_kptr_xchg(mask0, local); + if (local) { + err = 1; + goto err_exit; + } + + /* [<mask 0>, NULL] */ + if (!*mask0 || *mask1) { + err = 2; + goto err_exit; + } + + local = create_cpumask(); + if (!local) { + err = 9; + goto err_exit; + } + + local = bpf_kptr_xchg(mask1, local); + if (local) { + err = 10; + goto err_exit; + } + + /* [<mask 0>, <mask 1>] */ + if (!*mask0 || !*mask1 || *mask0 == *mask1) { + err = 11; + goto err_exit; + } + +err_exit: + if (local) + bpf_cpumask_release(local); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_array_rcu, struct task_struct *task, u64 clone_flags) +{ + return _global_mask_array_rcu(&global_mask_array[0], &global_mask_array[1]); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_array_l2_rcu, struct task_struct *task, u64 clone_flags) +{ + return _global_mask_array_rcu(&global_mask_array_l2[0][0], &global_mask_array_l2[1][0]); +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_nested_rcu, struct task_struct *task, u64 clone_flags) +{ + return _global_mask_array_rcu(&global_mask_nested[0].mask, &global_mask_nested[1].mask); +} + +/* Ensure that the field->offset has been correctly advanced from one + * nested struct or array sub-tree to another. In the case of + * kptr_nested_deep, it comprises two sub-trees: ktpr_1 and kptr_2. By + * calling bpf_kptr_xchg() on every single kptr in both nested sub-trees, + * the verifier should reject the program if the field->offset of any kptr + * is incorrect. + * + * For instance, if we have 10 kptrs in a nested struct and a program that + * accesses each kptr individually with bpf_kptr_xchg(), the compiler + * should emit instructions to access 10 different offsets if it works + * correctly. If the field->offset values of any pair of them are + * incorrectly the same, the number of unique offsets in btf_record for + * this nested struct should be less than 10. The verifier should fail to + * discover some of the offsets emitted by the compiler. + * + * Even if the field->offset values of kptrs are not duplicated, the + * verifier should fail to find a btf_field for the instruction accessing a + * kptr if the corresponding field->offset is pointing to a random + * incorrect offset. + */ +SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_nested_deep_rcu, struct task_struct *task, u64 clone_flags) +{ + int r, i; + + r = _global_mask_array_rcu(&global_mask_nested_deep.ptrs[0].m.mask, + &global_mask_nested_deep.ptrs[1].m.mask); + if (r) + return r; + + for (i = 0; i < 3; i++) { + r = _global_mask_array_rcu(&global_mask_nested_deep.ptr_pairs[i].mask_1, + &global_mask_nested_deep.ptr_pairs[i].mask_2); + if (r) + return r; + } + return 0; +} + +SEC("tp_btf/task_newtask") int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *local; diff --git a/tools/testing/selftests/bpf/progs/crypto_bench.c b/tools/testing/selftests/bpf/progs/crypto_bench.c index e61fe0882293..4ac956b26240 100644 --- a/tools/testing/selftests/bpf/progs/crypto_bench.c +++ b/tools/testing/selftests/bpf/progs/crypto_bench.c @@ -57,7 +57,7 @@ int crypto_encrypt(struct __sk_buff *skb) { struct __crypto_ctx_value *v; struct bpf_crypto_ctx *ctx; - struct bpf_dynptr psrc, pdst, iv; + struct bpf_dynptr psrc, pdst; v = crypto_ctx_value_lookup(); if (!v) { @@ -73,9 +73,8 @@ int crypto_encrypt(struct __sk_buff *skb) bpf_dynptr_from_skb(skb, 0, &psrc); bpf_dynptr_from_mem(dst, len, 0, &pdst); - bpf_dynptr_from_mem(dst, 0, 0, &iv); - status = bpf_crypto_encrypt(ctx, &psrc, &pdst, &iv); + status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL); __sync_add_and_fetch(&hits, 1); return 0; @@ -84,7 +83,7 @@ int crypto_encrypt(struct __sk_buff *skb) SEC("tc") int crypto_decrypt(struct __sk_buff *skb) { - struct bpf_dynptr psrc, pdst, iv; + struct bpf_dynptr psrc, pdst; struct __crypto_ctx_value *v; struct bpf_crypto_ctx *ctx; @@ -98,9 +97,8 @@ int crypto_decrypt(struct __sk_buff *skb) bpf_dynptr_from_skb(skb, 0, &psrc); bpf_dynptr_from_mem(dst, len, 0, &pdst); - bpf_dynptr_from_mem(dst, 0, 0, &iv); - status = bpf_crypto_decrypt(ctx, &psrc, &pdst, &iv); + status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL); __sync_add_and_fetch(&hits, 1); return 0; diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c index 1be0a3fa5efd..645be6cddf36 100644 --- a/tools/testing/selftests/bpf/progs/crypto_sanity.c +++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c @@ -89,7 +89,7 @@ int decrypt_sanity(struct __sk_buff *skb) { struct __crypto_ctx_value *v; struct bpf_crypto_ctx *ctx; - struct bpf_dynptr psrc, pdst, iv; + struct bpf_dynptr psrc, pdst; int err; err = skb_dynptr_validate(skb, &psrc); @@ -114,12 +114,8 @@ int decrypt_sanity(struct __sk_buff *skb) * production code, a percpu map should be used to store the result. */ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); - /* iv dynptr has to be initialized with 0 size, but proper memory region - * has to be provided anyway - */ - bpf_dynptr_from_mem(dst, 0, 0, &iv); - status = bpf_crypto_decrypt(ctx, &psrc, &pdst, &iv); + status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL); return TC_ACT_SHOT; } @@ -129,7 +125,7 @@ int encrypt_sanity(struct __sk_buff *skb) { struct __crypto_ctx_value *v; struct bpf_crypto_ctx *ctx; - struct bpf_dynptr psrc, pdst, iv; + struct bpf_dynptr psrc, pdst; int err; status = 0; @@ -156,12 +152,8 @@ int encrypt_sanity(struct __sk_buff *skb) * production code, a percpu map should be used to store the result. */ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); - /* iv dynptr has to be initialized with 0 size, but proper memory region - * has to be provided anyway - */ - bpf_dynptr_from_mem(dst, 0, 0, &iv); - status = bpf_crypto_encrypt(ctx, &psrc, &pdst, &iv); + status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL); return TC_ACT_SHOT; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 66a60bfb5867..e35bc1eac52a 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -964,7 +964,7 @@ int dynptr_invalidate_slice_reinit(void *ctx) * mem_or_null pointers. */ SEC("?raw_tp") -__failure __msg("R1 type=scalar expected=percpu_ptr_") +__failure __regex("R[0-9]+ type=scalar expected=percpu_ptr_") int dynptr_invalidate_slice_or_null(void *ctx) { struct bpf_dynptr ptr; @@ -982,7 +982,7 @@ int dynptr_invalidate_slice_or_null(void *ctx) /* Destruction of dynptr should also any slices obtained from it */ SEC("?raw_tp") -__failure __msg("R7 invalid mem access 'scalar'") +__failure __regex("R[0-9]+ invalid mem access 'scalar'") int dynptr_invalidate_slice_failure(void *ctx) { struct bpf_dynptr ptr1; @@ -1069,7 +1069,7 @@ int dynptr_read_into_slot(void *ctx) /* bpf_dynptr_slice()s are read-only and cannot be written to */ SEC("?tc") -__failure __msg("R0 cannot write into rdonly_mem") +__failure __regex("R[0-9]+ cannot write into rdonly_mem") int skb_invalid_slice_write(struct __sk_buff *skb) { struct bpf_dynptr ptr; @@ -1686,3 +1686,27 @@ int test_dynptr_skb_small_buff(struct __sk_buff *skb) return !!data; } + +__noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr) +{ + long ret = 0; + /* Avoid leaving this global function empty to avoid having the compiler + * optimize away the call to this global function. + */ + __sink(ret); + return ret; +} + +SEC("?raw_tp") +__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr") +int test_dynptr_reg_type(void *ctx) +{ + struct task_struct *current = NULL; + /* R1 should be holding a PTR_TO_BTF_ID, so this shouldn't be a + * reg->type that can be passed to a function accepting a + * ARG_PTR_TO_DYNPTR | MEM_RDONLY. process_dynptr_func() should catch + * this. + */ + global_call_bpf_dynptr((const struct bpf_dynptr *)current); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c index 8956eb78a226..2011cacdeb18 100644 --- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c @@ -5,13 +5,12 @@ char _license[] SEC("license") = "GPL"; -extern const void bpf_fentry_test1 __ksym; +extern int bpf_fentry_test1(int a) __ksym; +extern int bpf_modify_return_test(int a, int *b) __ksym; + extern const void bpf_fentry_test2 __ksym; extern const void bpf_fentry_test3 __ksym; extern const void bpf_fentry_test4 __ksym; -extern const void bpf_modify_return_test __ksym; -extern const void bpf_fentry_test6 __ksym; -extern const void bpf_fentry_test7 __ksym; extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak; diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c index 1c2b6c1616b0..645b2c9f7867 100644 --- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c @@ -12,7 +12,7 @@ #define IP_OFFSET 0x1FFF #define NEXTHDR_FRAGMENT 44 -extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; @@ -42,7 +42,7 @@ static bool is_frag_v6(struct ipv6hdr *ip6h) return ip6h->nexthdr == NEXTHDR_FRAGMENT; } -static int handle_v4(struct sk_buff *skb) +static int handle_v4(struct __sk_buff *skb) { struct bpf_dynptr ptr; u8 iph_buf[20] = {}; @@ -64,7 +64,7 @@ static int handle_v4(struct sk_buff *skb) return NF_ACCEPT; } -static int handle_v6(struct sk_buff *skb) +static int handle_v6(struct __sk_buff *skb) { struct bpf_dynptr ptr; struct ipv6hdr *ip6h; @@ -89,9 +89,9 @@ static int handle_v6(struct sk_buff *skb) SEC("netfilter") int defrag(struct bpf_nf_ctx *ctx) { - struct sk_buff *skb = ctx->skb; + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; - switch (bpf_ntohs(skb->protocol)) { + switch (bpf_ntohs(ctx->skb->protocol)) { case ETH_P_IP: return handle_v4(skb); case ETH_P_IPV6: diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index fe65e0952a1e..16bdc3e25591 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -7,8 +7,6 @@ #include "bpf_misc.h" #include "bpf_compiler.h" -#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) - static volatile int zero = 0; int my_pid; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index cf68d1e48a0f..f502f755f567 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -177,4 +177,41 @@ int kfunc_call_test_static_unused_arg(struct __sk_buff *skb) return actual != expected ? -1 : 0; } +struct ctx_val { + struct bpf_testmod_ctx __kptr *ctx; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct ctx_val); +} ctx_map SEC(".maps"); + +SEC("tc") +int kfunc_call_ctx(struct __sk_buff *skb) +{ + struct bpf_testmod_ctx *ctx; + int err = 0; + + ctx = bpf_testmod_ctx_create(&err); + if (!ctx && !err) + err = -1; + if (ctx) { + int key = 0; + struct ctx_val *ctx_val = bpf_map_lookup_elem(&ctx_map, &key); + + /* Transfer ctx to map to be freed via implicit dtor call + * on cleanup. + */ + if (ctx_val) + ctx = bpf_kptr_xchg(&ctx_val->ctx, ctx); + if (ctx) { + bpf_testmod_ctx_release(ctx); + err = -1; + } + } + return err; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c index bbba9eb46551..bd8b7fb7061e 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c @@ -4,8 +4,7 @@ #include <bpf/bpf_tracing.h> #include <stdbool.h> #include "bpf_kfuncs.h" - -#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) +#include "bpf_misc.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c index d49070803e22..0835b5edf685 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c @@ -25,7 +25,7 @@ int BPF_PROG(trigger) static int check_cookie(__u64 val, __u64 *result) { - long *cookie; + __u64 *cookie; if (bpf_get_current_pid_tgid() >> 32 != pid) return 1; diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 26205ca80679..421f40835acd 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -4,13 +4,26 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> #include "bpf_experimental.h" - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) -#endif +#include "bpf_misc.h" #include "linked_list.h" +struct head_nested_inner { + struct bpf_spin_lock lock; + struct bpf_list_head head __contains(foo, node2); +}; + +struct head_nested { + int dummy; + struct head_nested_inner inner; +}; + +private(C) struct bpf_spin_lock glock_c; +private(C) struct bpf_list_head ghead_array[2] __contains(foo, node2); +private(C) struct bpf_list_head ghead_array_one[1] __contains(foo, node2); + +private(D) struct head_nested ghead_nested; + static __always_inline int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) { @@ -310,6 +323,32 @@ int global_list_push_pop(void *ctx) } SEC("tc") +int global_list_push_pop_nested(void *ctx) +{ + return test_list_push_pop(&ghead_nested.inner.lock, &ghead_nested.inner.head); +} + +SEC("tc") +int global_list_array_push_pop(void *ctx) +{ + int r; + + r = test_list_push_pop(&glock_c, &ghead_array[0]); + if (r) + return r; + + r = test_list_push_pop(&glock_c, &ghead_array[1]); + if (r) + return r; + + /* Arrays with only one element is a special case, being treated + * just like a bpf_list_head variable by the verifier, not an + * array. + */ + return test_list_push_pop(&glock_c, &ghead_array_one[0]); +} + +SEC("tc") int map_list_push_pop_multiple(void *ctx) { struct map_value *v; diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c index 10b2325c1720..63245785eb69 100644 --- a/tools/testing/selftests/bpf/progs/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c @@ -7,7 +7,7 @@ __u32 target_id; -__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym; +__s64 bpf_map_sum_elem_count(const struct bpf_map *map) __ksym; SEC("iter/bpf_map") int dump_bpf_map(struct bpf_iter__bpf_map *ctx) diff --git a/tools/testing/selftests/bpf/progs/nested_trust_common.h b/tools/testing/selftests/bpf/progs/nested_trust_common.h index 83d33931136e..1784b496be2e 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_common.h +++ b/tools/testing/selftests/bpf/progs/nested_trust_common.h @@ -7,6 +7,6 @@ #include <stdbool.h> bool bpf_cpumask_test_cpu(unsigned int cpu, const struct cpumask *cpumask) __ksym; -bool bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; +__u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; #endif /* _NESTED_TRUST_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c index ea39497f11ed..3568ec450100 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c @@ -31,14 +31,6 @@ int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_ return 0; } -SEC("tp_btf/task_newtask") -__failure __msg("R1 must have zero offset when passed to release func or trusted arg to kfunc") -int BPF_PROG(test_invalid_nested_offset, struct task_struct *task, u64 clone_flags) -{ - bpf_cpumask_first_zero(&task->cpus_mask); - return 0; -} - /* Although R2 is of type sk_buff but sock_common is expected, we will hit untrusted ptr first. */ SEC("tp_btf/tcp_probe") __failure __msg("R2 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_") diff --git a/tools/testing/selftests/bpf/progs/nested_trust_success.c b/tools/testing/selftests/bpf/progs/nested_trust_success.c index 833840bffd3b..2b66953ca82e 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_success.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_success.c @@ -32,3 +32,11 @@ int BPF_PROG(test_skb_field, struct sock *sk, struct sk_buff *skb) bpf_sk_storage_get(&sk_storage_map, skb->sk, 0, 0); return 0; } + +SEC("tp_btf/task_newtask") +__success +int BPF_PROG(test_nested_offset, struct task_struct *task, u64 clone_flags) +{ + bpf_cpumask_first_zero(&task->cpus_mask); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c index c0062645fc68..9e067dcbf607 100644 --- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c +++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c @@ -5,6 +5,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> +#include "bpf_misc.h" #include <errno.h> @@ -23,10 +24,6 @@ bool skip = false; #define BADPTR 0 #endif -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 1); diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 6957d9f2805e..8bd1ebd7d6af 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -9,6 +9,7 @@ #include "err.h" #include "bpf_experimental.h" #include "bpf_compiler.h" +#include "bpf_misc.h" #ifndef NULL #define NULL 0 @@ -133,10 +134,6 @@ struct { __uint(max_entries, 16); } disallowed_exec_inodes SEC(".maps"); -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0])) -#endif - static INLINE bool IS_ERR(const void* ptr) { return IS_ERR_VALUE((unsigned long)ptr); diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index b09f4fffe57c..a3620c15c136 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -13,6 +13,15 @@ struct node_data { struct bpf_rb_node node; }; +struct root_nested_inner { + struct bpf_spin_lock glock; + struct bpf_rb_root root __contains(node_data, node); +}; + +struct root_nested { + struct root_nested_inner inner; +}; + long less_callback_ran = -1; long removed_key = -1; long first_data[2] = {-1, -1}; @@ -20,6 +29,9 @@ long first_data[2] = {-1, -1}; #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) private(A) struct bpf_spin_lock glock; private(A) struct bpf_rb_root groot __contains(node_data, node); +private(A) struct bpf_rb_root groot_array[2] __contains(node_data, node); +private(A) struct bpf_rb_root groot_array_one[1] __contains(node_data, node); +private(B) struct root_nested groot_nested; static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) { @@ -72,6 +84,12 @@ long rbtree_add_nodes(void *ctx) } SEC("tc") +long rbtree_add_nodes_nested(void *ctx) +{ + return __add_three(&groot_nested.inner.root, &groot_nested.inner.glock); +} + +SEC("tc") long rbtree_add_and_remove(void *ctx) { struct bpf_rb_node *res = NULL; @@ -110,6 +128,65 @@ err_out: } SEC("tc") +long rbtree_add_and_remove_array(void *ctx) +{ + struct bpf_rb_node *res1 = NULL, *res2 = NULL, *res3 = NULL; + struct node_data *nodes[3][2] = {{NULL, NULL}, {NULL, NULL}, {NULL, NULL}}; + struct node_data *n; + long k1 = -1, k2 = -1, k3 = -1; + int i, j; + + for (i = 0; i < 3; i++) { + for (j = 0; j < 2; j++) { + nodes[i][j] = bpf_obj_new(typeof(*nodes[i][j])); + if (!nodes[i][j]) + goto err_out; + nodes[i][j]->key = i * 2 + j; + } + } + + bpf_spin_lock(&glock); + for (i = 0; i < 2; i++) + for (j = 0; j < 2; j++) + bpf_rbtree_add(&groot_array[i], &nodes[i][j]->node, less); + for (j = 0; j < 2; j++) + bpf_rbtree_add(&groot_array_one[0], &nodes[2][j]->node, less); + res1 = bpf_rbtree_remove(&groot_array[0], &nodes[0][0]->node); + res2 = bpf_rbtree_remove(&groot_array[1], &nodes[1][0]->node); + res3 = bpf_rbtree_remove(&groot_array_one[0], &nodes[2][0]->node); + bpf_spin_unlock(&glock); + + if (res1) { + n = container_of(res1, struct node_data, node); + k1 = n->key; + bpf_obj_drop(n); + } + if (res2) { + n = container_of(res2, struct node_data, node); + k2 = n->key; + bpf_obj_drop(n); + } + if (res3) { + n = container_of(res3, struct node_data, node); + k3 = n->key; + bpf_obj_drop(n); + } + if (k1 != 0 || k2 != 2 || k3 != 4) + return 2; + + return 0; + +err_out: + for (i = 0; i < 3; i++) { + for (j = 0; j < 2; j++) { + if (nodes[i][j]) + bpf_obj_drop(nodes[i][j]); + } + } + return 1; +} + +SEC("tc") long rbtree_first_and_remove(void *ctx) { struct bpf_rb_node *res = NULL; diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index 3fecf1c6dfe5..b722a1e1ddef 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -105,7 +105,7 @@ long rbtree_api_remove_unadded_node(void *ctx) } SEC("?tc") -__failure __msg("Unreleased reference id=3 alloc_insn=10") +__failure __regex("Unreleased reference id=3 alloc_insn=[0-9]+") long rbtree_api_remove_no_drop(void *ctx) { struct bpf_rb_node *res; diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 1553b9c16aa7..f8d4b7cfcd68 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -32,7 +32,7 @@ static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) } SEC("?tc") -__failure __msg("Unreleased reference id=4 alloc_insn=21") +__failure __regex("Unreleased reference id=4 alloc_insn=[0-9]+") long rbtree_refcounted_node_ref_escapes(void *ctx) { struct node_acquire *n, *m; @@ -73,7 +73,7 @@ long refcount_acquire_maybe_null(void *ctx) } SEC("?tc") -__failure __msg("Unreleased reference id=3 alloc_insn=9") +__failure __regex("Unreleased reference id=3 alloc_insn=[0-9]+") long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) { struct node_acquire *n, *m; diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c index 7a438600ae98..60518aed1ffc 100644 --- a/tools/testing/selftests/bpf/progs/setget_sockopt.c +++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c @@ -6,10 +6,7 @@ #include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif +#include "bpf_misc.h" extern unsigned long CONFIG_HZ __kconfig; diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c index db4abd2682fc..3bb4451524a1 100644 --- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c +++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c @@ -33,6 +33,8 @@ int main_prog(struct __sk_buff *skb) struct iphdr *ip = NULL; struct tcphdr *tcp; __u8 proto = 0; + int urg_ptr; + u32 offset; if (!(ip = get_iphdr(skb))) goto out; @@ -48,7 +50,14 @@ int main_prog(struct __sk_buff *skb) if (!tcp) goto out; - return tcp->urg_ptr; + urg_ptr = tcp->urg_ptr; + + /* Checksum validation part */ + proto++; + offset = sizeof(struct ethhdr) + offsetof(struct iphdr, protocol); + bpf_skb_store_bytes(skb, offset, &proto, sizeof(proto), BPF_F_RECOMPUTE_CSUM); + + return urg_ptr; out: return -1; } diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c new file mode 100644 index 000000000000..56b787a89876 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_do_detach; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c index 3494ca30fa7f..4a4e0b8d9b72 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -7,10 +7,6 @@ #include "bpf_experimental.h" #include "bpf_misc.h" -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - struct generic_map_value { void *data; }; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 77ad8adf68da..f7b330ddd007 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> @@ -9,10 +10,14 @@ #define EINVAL 22 #define ENOENT 2 +#define NF_CT_ZONE_DIR_ORIG (1 << IP_CT_DIR_ORIGINAL) +#define NF_CT_ZONE_DIR_REPL (1 << IP_CT_DIR_REPLY) + extern unsigned long CONFIG_HZ __kconfig; int test_einval_bpf_tuple = 0; int test_einval_reserved = 0; +int test_einval_reserved_new = 0; int test_einval_netns_id = 0; int test_einval_len_opts = 0; int test_eproto_l4proto = 0; @@ -22,6 +27,11 @@ int test_eafnosupport = 0; int test_alloc_entry = -EINVAL; int test_insert_entry = -EAFNOSUPPORT; int test_succ_lookup = -ENOENT; +int test_ct_zone_id_alloc_entry = -EINVAL; +int test_ct_zone_id_insert_entry = -EAFNOSUPPORT; +int test_ct_zone_id_succ_lookup = -ENOENT; +int test_ct_zone_dir_enoent_lookup = 0; +int test_ct_zone_id_enoent_lookup = 0; u32 test_delta_timeout = 0; u32 test_status = 0; u32 test_insert_lookup_mark = 0; @@ -45,6 +55,17 @@ struct bpf_ct_opts___local { s32 netns_id; s32 error; u8 l4proto; + u8 dir; + u8 reserved[2]; +}; + +struct bpf_ct_opts___new { + s32 netns_id; + s32 error; + u8 l4proto; + u8 dir; + u16 ct_zone_id; + u8 ct_zone_dir; u8 reserved[3]; } __attribute__((preserve_access_index)); @@ -220,10 +241,97 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, } } +static __always_inline void +nf_ct_opts_new_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___new *, u32), + struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___new *, u32), + void *ctx) +{ + struct bpf_ct_opts___new opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; + struct bpf_sock_tuple bpf_tuple; + struct nf_conn *ct; + + __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); + + opts_def.reserved[0] = 1; + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + opts_def.reserved[0] = 0; + if (ct) + bpf_ct_release(ct); + else + test_einval_reserved_new = opts_def.error; + + bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */ + bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */ + bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */ + bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */ + + /* use non-default ct zone */ + opts_def.ct_zone_id = 10; + opts_def.ct_zone_dir = NF_CT_ZONE_DIR_ORIG; + ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + if (ct) { + __u16 sport = bpf_get_prandom_u32(); + __u16 dport = bpf_get_prandom_u32(); + union nf_inet_addr saddr = {}; + union nf_inet_addr daddr = {}; + struct nf_conn *ct_ins; + + bpf_ct_set_timeout(ct, 10000); + + /* snat */ + saddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local); + /* dnat */ + daddr.ip = bpf_get_prandom_u32(); + bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local); + + ct_ins = bpf_ct_insert_entry(ct); + if (ct_ins) { + struct nf_conn *ct_lk; + + /* entry should exist in same ct zone we inserted it */ + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + if (ct_lk) { + bpf_ct_release(ct_lk); + test_ct_zone_id_succ_lookup = 0; + } + + /* entry should not exist with wrong direction */ + opts_def.ct_zone_dir = NF_CT_ZONE_DIR_REPL; + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + opts_def.ct_zone_dir = NF_CT_ZONE_DIR_ORIG; + if (ct_lk) + bpf_ct_release(ct_lk); + else + test_ct_zone_dir_enoent_lookup = opts_def.error; + + /* entry should not exist in default ct zone */ + opts_def.ct_zone_id = 0; + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + if (ct_lk) + bpf_ct_release(ct_lk); + else + test_ct_zone_id_enoent_lookup = opts_def.error; + + bpf_ct_release(ct_ins); + test_ct_zone_id_insert_entry = 0; + } + test_ct_zone_id_alloc_entry = 0; + } +} + SEC("xdp") int nf_xdp_ct_test(struct xdp_md *ctx) { nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx); + nf_ct_opts_new_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx); return 0; } @@ -231,6 +339,7 @@ SEC("tc") int nf_skb_ct_test(struct __sk_buff *ctx) { nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx); + nf_ct_opts_new_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c index 0e4759ab38ff..a586f087ffeb 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index 2dde8e3fe4c9..e68667aec6a6 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -45,7 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size) } SEC("?lsm.s/bpf") -__failure __msg("arg#0 expected pointer to stack or dynptr_ptr") +__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr") int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) { unsigned long val = 0; diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c new file mode 100644 index 000000000000..7ac7e1de34d8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_kfuncs.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +SEC("tc") +int kfunc_dynptr_nullable_test1(struct __sk_buff *skb) +{ + struct bpf_dynptr data; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_kfunc_dynptr_test(&data, NULL); + + return 0; +} + +SEC("tc") +int kfunc_dynptr_nullable_test2(struct __sk_buff *skb) +{ + struct bpf_dynptr data; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_kfunc_dynptr_test(&data, &data); + + return 0; +} + +SEC("tc") +__failure __msg("expected pointer to stack or const struct bpf_dynptr") +int kfunc_dynptr_nullable_test3(struct __sk_buff *skb) +{ + struct bpf_dynptr data; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_kfunc_dynptr_test(NULL, &data); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c new file mode 100644 index 000000000000..350513c0e4c9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +/* inputs */ +int pid = 0; + +/* outputs */ +long passed = 0; +long discarded = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_ringbuf_write(void *ctx) +{ + int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32; + void *sample1, *sample2; + + if (cur_pid != pid) + return 0; + + sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + if (!sample1) + return 0; + /* first one can pass */ + sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + if (!sample2) { + bpf_ringbuf_discard(sample1, 0); + __sync_fetch_and_add(&discarded, 1); + return 0; + } + /* second one must not */ + __sync_fetch_and_add(&passed, 1); + foo = sample2 + 4084; + *foo = 256; + bpf_ringbuf_discard(sample1, 0); + bpf_ringbuf_discard(sample2, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 99d2ea9fb658..f48f85f1bd70 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -92,7 +92,7 @@ struct { __uint(value_size, sizeof(int)); } tls_sock_map SEC(".maps"); -SEC("sk_skb1") +SEC("sk_skb/stream_parser") int bpf_prog1(struct __sk_buff *skb) { int *f, two = 2; @@ -104,7 +104,7 @@ int bpf_prog1(struct __sk_buff *skb) return skb->len; } -SEC("sk_skb2") +SEC("sk_skb/stream_verdict") int bpf_prog2(struct __sk_buff *skb) { __u32 lport = skb->local_port; @@ -151,7 +151,7 @@ static inline void bpf_write_pass(struct __sk_buff *skb, int offset) memcpy(c + offset, "PASS", 4); } -SEC("sk_skb3") +SEC("sk_skb/stream_verdict") int bpf_prog3(struct __sk_buff *skb) { int err, *f, ret = SK_PASS; @@ -177,9 +177,6 @@ int bpf_prog3(struct __sk_buff *skb) return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags); #endif } - f = bpf_map_lookup_elem(&sock_skb_opts, &one); - if (f && *f) - ret = SK_DROP; err = bpf_skb_adjust_room(skb, 4, 0, 0); if (err) return SK_DROP; @@ -233,7 +230,7 @@ int bpf_sockmap(struct bpf_sock_ops *skops) return 0; } -SEC("sk_msg1") +SEC("sk_msg") int bpf_prog4(struct sk_msg_md *msg) { int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; @@ -263,7 +260,7 @@ int bpf_prog4(struct sk_msg_md *msg) return SK_PASS; } -SEC("sk_msg2") +SEC("sk_msg") int bpf_prog6(struct sk_msg_md *msg) { int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; @@ -308,7 +305,7 @@ int bpf_prog6(struct sk_msg_md *msg) #endif } -SEC("sk_msg3") +SEC("sk_msg") int bpf_prog8(struct sk_msg_md *msg) { void *data_end = (void *)(long) msg->data_end; @@ -329,7 +326,8 @@ int bpf_prog8(struct sk_msg_md *msg) return SK_PASS; } -SEC("sk_msg4") + +SEC("sk_msg") int bpf_prog9(struct sk_msg_md *msg) { void *data_end = (void *)(long) msg->data_end; @@ -347,7 +345,7 @@ int bpf_prog9(struct sk_msg_md *msg) return SK_PASS; } -SEC("sk_msg5") +SEC("sk_msg") int bpf_prog10(struct sk_msg_md *msg) { int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c index 7f74077d6622..548660e299a5 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -10,10 +10,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_compiler.h" - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif +#include "bpf_misc.h" /* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */ #define TCP_MEM_LOOPS 28 /* because 30 doesn't fit into 512 bytes of stack */ diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c index 68a75436e8af..81249d119a8b 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c @@ -10,10 +10,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_compiler.h" - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif +#include "bpf_misc.h" /* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */ #define TCP_MEM_LOOPS 20 /* because 30 doesn't fit into 512 bytes of stack */ diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index efc3c61f7852..bbdd08764789 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -10,6 +10,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_compiler.h" +#include "bpf_misc.h" /* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */ #define MAX_ULONG_STR_LEN 0xF @@ -17,10 +18,6 @@ /* Max supported length of sysctl value string (pow2). */ #define MAX_VALUE_STR_LEN 0x40 -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - const char tcp_mem_name[] = "net/ipv4/tcp_mem"; static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) { diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c index 74ec09f040b7..ca8e8734d901 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -222,17 +222,21 @@ int egress_host(struct __sk_buff *skb) return TC_ACT_OK; if (skb_proto(skb_type) == IPPROTO_TCP) { - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC && skb->tstamp) inc_dtimes(EGRESS_ENDHOST); else inc_errs(EGRESS_ENDHOST); - } else { - if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC && + } else if (skb_proto(skb_type) == IPPROTO_UDP) { + if (skb->tstamp_type == BPF_SKB_CLOCK_TAI && skb->tstamp) inc_dtimes(EGRESS_ENDHOST); else inc_errs(EGRESS_ENDHOST); + } else { + if (skb->tstamp_type == BPF_SKB_CLOCK_REALTIME && + skb->tstamp) + inc_errs(EGRESS_ENDHOST); } skb->tstamp = EGRESS_ENDHOST_MAGIC; @@ -252,7 +256,7 @@ int ingress_host(struct __sk_buff *skb) if (!skb_type) return TC_ACT_OK; - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO && + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC && skb->tstamp == EGRESS_FWDNS_MAGIC) inc_dtimes(INGRESS_ENDHOST); else @@ -315,7 +319,6 @@ int egress_fwdns_prio100(struct __sk_buff *skb) SEC("tc") int ingress_fwdns_prio101(struct __sk_buff *skb) { - __u64 expected_dtime = EGRESS_ENDHOST_MAGIC; int skb_type; skb_type = skb_get_type(skb); @@ -323,29 +326,24 @@ int ingress_fwdns_prio101(struct __sk_buff *skb) /* Should have handled in prio100 */ return TC_ACT_SHOT; - if (skb_proto(skb_type) == IPPROTO_UDP) - expected_dtime = 0; - if (skb->tstamp_type) { if (fwdns_clear_dtime() || - skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || - skb->tstamp != expected_dtime) + (skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC && + skb->tstamp_type != BPF_SKB_CLOCK_TAI) || + skb->tstamp != EGRESS_ENDHOST_MAGIC) inc_errs(INGRESS_FWDNS_P101); else inc_dtimes(INGRESS_FWDNS_P101); } else { - if (!fwdns_clear_dtime() && expected_dtime) + if (!fwdns_clear_dtime()) inc_errs(INGRESS_FWDNS_P101); } - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) { skb->tstamp = INGRESS_FWDNS_MAGIC; } else { if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_DELIVERY_MONO)) - inc_errs(SET_DTIME); - if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_UNSPEC)) + BPF_SKB_CLOCK_MONOTONIC)) inc_errs(SET_DTIME); } @@ -370,7 +368,7 @@ int egress_fwdns_prio101(struct __sk_buff *skb) if (skb->tstamp_type) { if (fwdns_clear_dtime() || - skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO || + skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC || skb->tstamp != INGRESS_FWDNS_MAGIC) inc_errs(EGRESS_FWDNS_P101); else @@ -380,14 +378,11 @@ int egress_fwdns_prio101(struct __sk_buff *skb) inc_errs(EGRESS_FWDNS_P101); } - if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) { + if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) { skb->tstamp = EGRESS_FWDNS_MAGIC; } else { if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_DELIVERY_MONO)) - inc_errs(SET_DTIME); - if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, - BPF_SKB_TSTAMP_UNSPEC)) + BPF_SKB_CLOCK_MONOTONIC)) inc_errs(SET_DTIME); } diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c index c8e4553648bf..44ee0d037f95 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -9,6 +9,7 @@ #include "bpf_kfuncs.h" #include "test_siphash.h" #include "test_tcp_custom_syncookie.h" +#include "bpf_misc.h" #define MAX_PACKET_OFF 0xffff diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h index 29a6a53cf229..f8b1b7e68d2e 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h @@ -7,8 +7,6 @@ #define __packed __attribute__((__packed__)) #define __force -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - #define swap(a, b) \ do { \ typeof(a) __tmp = (a); \ diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c new file mode 100644 index 000000000000..3e520133281e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_lockup.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer1_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer2_map SEC(".maps"); + +int timer1_err; +int timer2_err; + +static int timer_cb1(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) + timer2_err = bpf_timer_cancel(timer); + + return 0; +} + +static int timer_cb2(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) + timer1_err = bpf_timer_cancel(timer); + + return 0; +} + +SEC("tc") +int timer1_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) { + bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb1); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} + +SEC("tc") +int timer2_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) { + bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb2); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c index 515daef3c84b..c435a3a8328a 100644 --- a/tools/testing/selftests/bpf/progs/tracing_struct.c +++ b/tools/testing/selftests/bpf/progs/tracing_struct.c @@ -18,11 +18,6 @@ struct bpf_testmod_struct_arg_3 { int b[]; }; -struct bpf_testmod_struct_arg_4 { - u64 a; - int b; -}; - long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs; __u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3; long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret; @@ -30,9 +25,6 @@ long t3_a, t3_b, t3_c_a, t3_c_b, t3_ret; long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret; long t5_ret; int t6; -long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret; -long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret; - SEC("fentry/bpf_testmod_test_struct_arg_1") int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c) @@ -138,50 +130,4 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a) return 0; } -SEC("fentry/bpf_testmod_test_struct_arg_7") -int BPF_PROG2(test_struct_arg_12, __u64, a, void *, b, short, c, int, d, - void *, e, struct bpf_testmod_struct_arg_4, f) -{ - t7_a = a; - t7_b = (long)b; - t7_c = c; - t7_d = d; - t7_e = (long)e; - t7_f_a = f.a; - t7_f_b = f.b; - return 0; -} - -SEC("fexit/bpf_testmod_test_struct_arg_7") -int BPF_PROG2(test_struct_arg_13, __u64, a, void *, b, short, c, int, d, - void *, e, struct bpf_testmod_struct_arg_4, f, int, ret) -{ - t7_ret = ret; - return 0; -} - -SEC("fentry/bpf_testmod_test_struct_arg_8") -int BPF_PROG2(test_struct_arg_14, __u64, a, void *, b, short, c, int, d, - void *, e, struct bpf_testmod_struct_arg_4, f, int, g) -{ - t8_a = a; - t8_b = (long)b; - t8_c = c; - t8_d = d; - t8_e = (long)e; - t8_f_a = f.a; - t8_f_b = f.b; - t8_g = g; - return 0; -} - -SEC("fexit/bpf_testmod_test_struct_arg_8") -int BPF_PROG2(test_struct_arg_15, __u64, a, void *, b, short, c, int, d, - void *, e, struct bpf_testmod_struct_arg_4, f, int, g, - int, ret) -{ - t8_ret = ret; - return 0; -} - char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c b/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c new file mode 100644 index 000000000000..4742012ace06 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +struct bpf_testmod_struct_arg_4 { + u64 a; + int b; +}; + +struct bpf_testmod_struct_arg_5 { + char a; + short b; + int c; + long d; +}; + +long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret; +long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret; +long t9_a, t9_b, t9_c, t9_d, t9_e, t9_f, t9_g, t9_h_a, t9_h_b, t9_h_c, t9_h_d, t9_i, t9_ret; + +SEC("fentry/bpf_testmod_test_struct_arg_7") +int BPF_PROG2(test_struct_many_args_1, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f) +{ + t7_a = a; + t7_b = (long)b; + t7_c = c; + t7_d = d; + t7_e = (long)e; + t7_f_a = f.a; + t7_f_b = f.b; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_7") +int BPF_PROG2(test_struct_many_args_2, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f, int, ret) +{ + t7_ret = ret; + return 0; +} + +SEC("fentry/bpf_testmod_test_struct_arg_8") +int BPF_PROG2(test_struct_many_args_3, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f, int, g) +{ + t8_a = a; + t8_b = (long)b; + t8_c = c; + t8_d = d; + t8_e = (long)e; + t8_f_a = f.a; + t8_f_b = f.b; + t8_g = g; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_8") +int BPF_PROG2(test_struct_many_args_4, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f, int, g, + int, ret) +{ + t8_ret = ret; + return 0; +} + +SEC("fentry/bpf_testmod_test_struct_arg_9") +int BPF_PROG2(test_struct_many_args_5, __u64, a, void *, b, short, c, int, d, void *, e, + char, f, short, g, struct bpf_testmod_struct_arg_5, h, long, i) +{ + t9_a = a; + t9_b = (long)b; + t9_c = c; + t9_d = d; + t9_e = (long)e; + t9_f = f; + t9_g = g; + t9_h_a = h.a; + t9_h_b = h.b; + t9_h_c = h.c; + t9_h_d = h.d; + t9_i = i; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_9") +int BPF_PROG2(test_struct_many_args_6, __u64, a, void *, b, short, c, int, d, void *, e, + char, f, short, g, struct bpf_testmod_struct_arg_5, h, long, i, int, ret) +{ + t9_ret = ret; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c index 11ab25c42c36..54de0389f878 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c @@ -221,3 +221,25 @@ int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx) bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0); return 0; } + +__noinline long global_call_bpf_dynptr_data(struct bpf_dynptr *dynptr) +{ + bpf_dynptr_data(dynptr, 0xA, 0xA); + return 0; +} + +static long callback_adjust_bpf_dynptr_reg_off(struct bpf_dynptr *dynptr, + void *ctx) +{ + global_call_bpf_dynptr_data(dynptr += 1024); + return 0; +} + +SEC("?raw_tp") +__failure __msg("dereference of modified dynptr_ptr ptr R1 off=16384 disallowed") +int user_ringbuf_callback_const_ptr_to_dynptr_reg_off(void *ctx) +{ + bpf_user_ringbuf_drain(&user_ringbuf, + callback_adjust_bpf_dynptr_reg_off, NULL, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 93144ae6df74..67509c5d3982 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index ef66ea460264..6065f862d964 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c new file mode 100644 index 000000000000..716113c2bce2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#include "bpf_misc.h" +#include "task_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, + u32 nr_bits) __ksym __weak; +int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak; +void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak; + +SEC("iter.s/cgroup") +__description("bits iter without destroy") +__failure __msg("Unreleased reference") +int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp) +{ + struct bpf_iter_bits it; + u64 data = 1; + + bpf_iter_bits_new(&it, &data, 1); + bpf_iter_bits_next(&it); + return 0; +} + +SEC("iter/cgroup") +__description("uninitialized iter in ->next()") +__failure __msg("expected an initialized iter_bits as arg #1") +int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) +{ + struct bpf_iter_bits *it = NULL; + + bpf_iter_bits_next(it); + return 0; +} + +SEC("iter/cgroup") +__description("uninitialized iter in ->destroy()") +__failure __msg("expected an initialized iter_bits as arg #1") +int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp) +{ + struct bpf_iter_bits it = {}; + + bpf_iter_bits_destroy(&it); + return 0; +} + +SEC("syscall") +__description("null pointer") +__success __retval(0) +int null_pointer(void) +{ + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, NULL, 1) + nr++; + return nr; +} + +SEC("syscall") +__description("bits copy") +__success __retval(10) +int bits_copy(void) +{ + u64 data = 0xf7310UL; /* 4 + 3 + 2 + 1 + 0*/ + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data, 1) + nr++; + return nr; +} + +SEC("syscall") +__description("bits memalloc") +__success __retval(64) +int bits_memalloc(void) +{ + u64 data[2]; + int nr = 0; + int *bit; + + __builtin_memset(&data, 0xf0, sizeof(data)); /* 4 * 16 */ + bpf_for_each(bits, bit, &data[0], sizeof(data) / sizeof(u64)) + nr++; + return nr; +} + +SEC("syscall") +__description("bit index") +__success __retval(8) +int bit_index(void) +{ + u64 data = 0x100; + int bit_idx = 0; + int *bit; + + bpf_for_each(bits, bit, &data, 1) { + if (*bit == 0) + continue; + bit_idx = *bit; + } + return bit_idx; +} + +SEC("syscall") +__description("bits nomem") +__success __retval(0) +int bits_nomem(void) +{ + u64 data[4]; + int nr = 0; + int *bit; + + __builtin_memset(&data, 0xff, sizeof(data)); + bpf_for_each(bits, bit, &data[0], 513) /* Be greater than 512 */ + nr++; + return nr; +} + +SEC("syscall") +__description("fewer words") +__success __retval(1) +int fewer_words(void) +{ + u64 data[2] = {0x1, 0xff}; + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data[0], 1) + nr++; + return nr; +} + +SEC("syscall") +__description("zero words") +__success __retval(0) +int zero_words(void) +{ + u64 data[2] = {0x1, 0xff}; + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data[0], 0) + nr++; + return nr; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index bd676d7e615f..e54bb5385bc1 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -274,6 +274,58 @@ static __naked void iter_limit_bug_cb(void) ); } +int tmp_var; +SEC("socket") +__failure __msg("infinite loop detected at insn 2") +__naked void jgt_imm64_and_may_goto(void) +{ + asm volatile (" \ + r0 = %[tmp_var] ll; \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -3; /* off -3 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" :: __imm_addr(tmp_var) + : __clobber_all); +} + +SEC("socket") +__failure __msg("infinite loop detected at insn 1") +__naked void may_goto_self(void) +{ + asm volatile (" \ + r0 = *(u32 *)(r10 - 4); \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -1; /* off -1 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__success __retval(0) +__naked void may_goto_neg_off(void) +{ + asm volatile (" \ + r0 = *(u32 *)(r10 - 4); \ + goto l0_%=; \ + goto l1_%=; \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -2; /* off -2 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ +l1_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + SEC("tc") __failure __flag(BPF_F_TEST_STATE_FREQ) @@ -307,6 +359,100 @@ int iter_limit_bug(struct __sk_buff *skb) return 0; } +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} + +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto2(void) +{ + asm volatile (" \ +l0_%=: r0 = 0; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} + +SEC("socket") +__success __retval(0) +__naked void jlt_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: call %[bpf_jiffies64]; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + if r0 < 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" :: __imm(bpf_jiffies64) + : __clobber_all); +} + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ + __clang_major__ >= 18 +SEC("socket") +__success __retval(0) +__naked void gotol_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: r0 = 0; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + gotol l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} +#endif + +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto_subprog(void) +{ + asm volatile (" \ + call subprog_with_may_goto; \ + exit; \ +" ::: __clobber_all); +} + +static __naked __noinline __used +void subprog_with_may_goto(void) +{ + asm volatile (" \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + #define ARR_SZ 1000000 int zero; char arr[ARR_SZ]; @@ -405,4 +551,240 @@ int cond_break5(const void *ctx) return cnt1 > 1 && cnt2 > 1 ? 1 : 0; } +#define ARR2_SZ 1000 +SEC(".data.arr2") +char arr2[ARR2_SZ]; + +SEC("socket") +__success __flag(BPF_F_TEST_STATE_FREQ) +int loop_inside_iter(const void *ctx) +{ + struct bpf_iter_num it; + int *v, sum = 0; + __u64 i = 0; + + bpf_iter_num_new(&it, 0, ARR2_SZ); + while ((v = bpf_iter_num_next(&it))) { + if (i < ARR2_SZ) + sum += arr2[i++]; + } + bpf_iter_num_destroy(&it); + return sum; +} + +SEC("socket") +__success __flag(BPF_F_TEST_STATE_FREQ) +int loop_inside_iter_signed(const void *ctx) +{ + struct bpf_iter_num it; + int *v, sum = 0; + long i = 0; + + bpf_iter_num_new(&it, 0, ARR2_SZ); + while ((v = bpf_iter_num_next(&it))) { + if (i < ARR2_SZ && i >= 0) + sum += arr2[i++]; + } + bpf_iter_num_destroy(&it); + return sum; +} + +volatile const int limit = ARR2_SZ; + +SEC("socket") +__success __flag(BPF_F_TEST_STATE_FREQ) +int loop_inside_iter_volatile_limit(const void *ctx) +{ + struct bpf_iter_num it; + int *v, sum = 0; + __u64 i = 0; + + bpf_iter_num_new(&it, 0, ARR2_SZ); + while ((v = bpf_iter_num_next(&it))) { + if (i < limit) + sum += arr2[i++]; + } + bpf_iter_num_destroy(&it); + return sum; +} + +#define ARR_LONG_SZ 1000 + +SEC(".data.arr_long") +long arr_long[ARR_LONG_SZ]; + +SEC("socket") +__success +int test1(const void *ctx) +{ + long i; + + for (i = 0; i < ARR_LONG_SZ && can_loop; i++) + arr_long[i] = i; + return 0; +} + +SEC("socket") +__success +int test2(const void *ctx) +{ + __u64 i; + + for (i = zero; i < ARR_LONG_SZ && can_loop; i++) { + barrier_var(i); + arr_long[i] = i; + } + return 0; +} + +SEC(".data.arr_foo") +struct { + int a; + int b; +} arr_foo[ARR_LONG_SZ]; + +SEC("socket") +__success +int test3(const void *ctx) +{ + __u64 i; + + for (i = zero; i < ARR_LONG_SZ && can_loop; i++) { + barrier_var(i); + arr_foo[i].a = i; + arr_foo[i].b = i; + } + return 0; +} + +SEC("socket") +__success +int test4(const void *ctx) +{ + long i; + + for (i = zero + ARR_LONG_SZ - 1; i < ARR_LONG_SZ && i >= 0 && can_loop; i--) { + barrier_var(i); + arr_foo[i].a = i; + arr_foo[i].b = i; + } + return 0; +} + +char buf[10] SEC(".data.buf"); + +SEC("socket") +__description("check add const") +__success +__naked void check_add_const(void) +{ + /* typical LLVM generated loop with may_goto */ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 > 9 goto l1_%=; \ +l0_%=: r1 = %[buf]; \ + r2 = r0; \ + r1 += r2; \ + r3 = *(u8 *)(r1 +0); \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 4; /* off of l1_%=: */ \ + .long 0; /* imm */ \ + r0 = r2; \ + r0 += 1; \ + if r2 < 9 goto l0_%=; \ + exit; \ +l1_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns), + __imm_ptr(buf) + : __clobber_common); +} + +SEC("socket") +__failure +__msg("*(u8 *)(r7 +0) = r0") +__msg("invalid access to map value, value_size=10 off=10 size=1") +__naked void check_add_const_3regs(void) +{ + asm volatile ( + "r6 = %[buf];" + "r7 = %[buf];" + "call %[bpf_ktime_get_ns];" + "r1 = r0;" /* link r0.id == r1.id == r2.id */ + "r2 = r0;" + "r1 += 1;" /* r1 == r0+1 */ + "r2 += 2;" /* r2 == r0+2 */ + "if r0 > 8 goto 1f;" /* r0 range [0, 8] */ + "r6 += r1;" /* r1 range [1, 9] */ + "r7 += r2;" /* r2 range [2, 10] */ + "*(u8 *)(r6 +0) = r0;" /* safe, within bounds */ + "*(u8 *)(r7 +0) = r0;" /* unsafe, out of bounds */ + "1: exit;" + : + : __imm(bpf_ktime_get_ns), + __imm_ptr(buf) + : __clobber_common); +} + +SEC("socket") +__failure +__msg("*(u8 *)(r8 -1) = r0") +__msg("invalid access to map value, value_size=10 off=10 size=1") +__naked void check_add_const_3regs_2if(void) +{ + asm volatile ( + "r6 = %[buf];" + "r7 = %[buf];" + "r8 = %[buf];" + "call %[bpf_ktime_get_ns];" + "if r0 < 2 goto 1f;" + "r1 = r0;" /* link r0.id == r1.id == r2.id */ + "r2 = r0;" + "r1 += 1;" /* r1 == r0+1 */ + "r2 += 2;" /* r2 == r0+2 */ + "if r2 > 11 goto 1f;" /* r2 range [0, 11] -> r0 range [-2, 9]; r1 range [-1, 10] */ + "if r0 s< 0 goto 1f;" /* r0 range [0, 9] -> r1 range [1, 10]; r2 range [2, 11]; */ + "r6 += r0;" /* r0 range [0, 9] */ + "r7 += r1;" /* r1 range [1, 10] */ + "r8 += r2;" /* r2 range [2, 11] */ + "*(u8 *)(r6 +0) = r0;" /* safe, within bounds */ + "*(u8 *)(r7 -1) = r0;" /* safe */ + "*(u8 *)(r8 -1) = r0;" /* unsafe */ + "1: exit;" + : + : __imm(bpf_ktime_get_ns), + __imm_ptr(buf) + : __clobber_common); +} + +SEC("socket") +__failure +__flag(BPF_F_TEST_STATE_FREQ) +__naked void check_add_const_regsafe_off(void) +{ + asm volatile ( + "r8 = %[buf];" + "call %[bpf_ktime_get_ns];" + "r6 = r0;" + "call %[bpf_ktime_get_ns];" + "r7 = r0;" + "call %[bpf_ktime_get_ns];" + "r1 = r0;" /* same ids for r1 and r0 */ + "if r6 > r7 goto 1f;" /* this jump can't be predicted */ + "r1 += 1;" /* r1.off == +1 */ + "goto 2f;" + "1: r1 += 100;" /* r1.off == +100 */ + "goto +0;" /* verify r1.off in regsafe() after this insn */ + "2: if r0 > 8 goto 3f;" /* r0 range [0,8], r1 range either [1,9] or [100,108]*/ + "r8 += r1;" + "*(u8 *)(r8 +0) = r0;" /* potentially unsafe, buf size is 10 */ + "3: exit;" + : + : __imm(bpf_ktime_get_ns), + __imm_ptr(buf) + : __clobber_common); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index cbb9d6714f53..028ec855587b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -224,6 +224,69 @@ l0_%=: \ : __clobber_all); } +SEC("socket") +__description("MOV32SX, S8, var_off u32_max") +__failure __msg("infinite loop detected") +__failure_unpriv __msg_unpriv("back-edge from insn 2 to 0") +__naked void mov64sx_s32_varoff_1(void) +{ + asm volatile (" \ +l0_%=: \ + r3 = *(u8 *)(r10 -387); \ + w7 = (s8)w3; \ + if w7 >= 0x2533823b goto l0_%=; \ + w0 = 0; \ + exit; \ +" : + : + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension") +__success __retval(0) +__failure_unpriv __msg_unpriv("frame pointer is read only") +__naked void mov64sx_s32_varoff_2(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r3 &= 0xf; \ + w7 = (s8)w3; \ + if w7 s>= 16 goto l0_%=; \ + w0 = 0; \ + exit; \ +l0_%=: \ + r10 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension") +__success __retval(0) +__failure_unpriv __msg_unpriv("frame pointer is read only") +__naked void mov64sx_s32_varoff_3(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r3 &= 0xf; \ + r3 |= 0x80; \ + w7 = (s8)w3; \ + if w7 s>= -5 goto l0_%=; \ + w0 = 0; \ + exit; \ +l0_%=: \ + r10 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c index 65bba330e7e5..ab9f9f2620ed 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c @@ -79,7 +79,7 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx) return NF_ACCEPT; } -extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; @@ -90,8 +90,8 @@ __success __failure_unpriv __retval(0) int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) { + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; const struct nf_hook_state *state = ctx->state; - struct sk_buff *skb = ctx->skb; const struct iphdr *iph; const struct tcphdr *th; u8 buffer_iph[20] = {}; @@ -99,7 +99,7 @@ int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) struct bpf_dynptr ptr; uint8_t ihl; - if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) + if (ctx->skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) return NF_ACCEPT; iph = bpf_dynptr_slice(&ptr, 0, buffer_iph, sizeof(buffer_iph)); diff --git a/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c new file mode 100644 index 000000000000..f37713a265ac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("socket") +__description("or_jmp32_k: bit ops + branch on unknown value") +__failure +__msg("R0 invalid mem access 'scalar'") +__naked void or_jmp32_k(void) +{ + asm volatile (" \ + r0 = 0xffffffff; \ + r0 /= 1; \ + r1 = 0; \ + w1 = -1; \ + w1 >>= 1; \ + w0 &= w1; \ + w0 |= 2; \ + if w0 != 0x7ffffffd goto l1; \ + r0 = 1; \ + exit; \ +l3: \ + r0 = 5; \ + *(u64*)(r0 - 8) = r0; \ + exit; \ +l2: \ + w0 -= 0xe; \ + if w0 == 1 goto l3; \ + r0 = 4; \ + exit; \ +l1: \ + w0 -= 0x7ffffff0; \ + if w0 s>= 0xe goto l2; \ + r0 = 3; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index 4a58e0398e72..6a6fad625f7e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -8,8 +8,6 @@ #include "bpf_misc.h" #include <../../../tools/include/linux/filter.h> -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) - int vals[] SEC(".data.vals") = {1, 2, 3, 4}; __naked __noinline __used diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c index 49e712acbf60..f8d3ae0c29ae 100644 --- a/tools/testing/selftests/bpf/progs/wq.c +++ b/tools/testing/selftests/bpf/progs/wq.c @@ -32,6 +32,7 @@ struct { } hmap_malloc SEC(".maps"); struct elem { + int ok_offset; struct bpf_wq w; }; @@ -53,7 +54,7 @@ __u32 ok; __u32 ok_sleepable; static int test_elem_callback(void *map, int *key, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq)) + int (callback_fn)(void *map, int *key, void *value)) { struct elem init = {}, *val; struct bpf_wq *wq; @@ -70,6 +71,8 @@ static int test_elem_callback(void *map, int *key, if (!val) return -2; + val->ok_offset = *key; + wq = &val->w; if (bpf_wq_init(wq, map, 0) != 0) return -3; @@ -84,7 +87,7 @@ static int test_elem_callback(void *map, int *key, } static int test_hmap_elem_callback(void *map, int *key, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq)) + int (callback_fn)(void *map, int *key, void *value)) { struct hmap_elem init = {}, *val; struct bpf_wq *wq; @@ -114,7 +117,7 @@ static int test_hmap_elem_callback(void *map, int *key, } /* callback for non sleepable workqueue */ -static int wq_callback(void *map, int *key, struct bpf_wq *work) +static int wq_callback(void *map, int *key, void *value) { bpf_kfunc_common_test(); ok |= (1 << *key); @@ -122,10 +125,16 @@ static int wq_callback(void *map, int *key, struct bpf_wq *work) } /* callback for sleepable workqueue */ -static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +static int wq_cb_sleepable(void *map, int *key, void *value) { + struct elem *data = (struct elem *)value; + int offset = data->ok_offset; + + if (*key != offset) + return 0; + bpf_kfunc_call_test_sleepable(); - ok_sleepable |= (1 << *key); + ok_sleepable |= (1 << offset); return 0; } diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c index 4cbdb425f223..25b51a72fe0f 100644 --- a/tools/testing/selftests/bpf/progs/wq_failures.c +++ b/tools/testing/selftests/bpf/progs/wq_failures.c @@ -28,14 +28,14 @@ struct { } lru SEC(".maps"); /* callback for non sleepable workqueue */ -static int wq_callback(void *map, int *key, struct bpf_wq *work) +static int wq_callback(void *map, int *key, void *value) { bpf_kfunc_common_test(); return 0; } /* callback for sleepable workqueue */ -static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +static int wq_cb_sleepable(void *map, int *key, void *value) { bpf_kfunc_call_test_sleepable(); return 0; diff --git a/tools/testing/selftests/bpf/progs/xdp_flowtable.c b/tools/testing/selftests/bpf/progs/xdp_flowtable.c new file mode 100644 index 000000000000..7fdc7b23ee74 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_flowtable.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +#define BPF_NO_KFUNC_PROTOTYPES +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86dd +#define IP_MF 0x2000 /* "More Fragments" */ +#define IP_OFFSET 0x1fff /* "Fragment Offset" */ +#define AF_INET 2 +#define AF_INET6 10 + +struct bpf_flowtable_opts___local { + s32 error; +}; + +struct flow_offload_tuple_rhash * +bpf_xdp_flow_lookup(struct xdp_md *, struct bpf_fib_lookup *, + struct bpf_flowtable_opts___local *, u32) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} stats SEC(".maps"); + +static bool xdp_flowtable_offload_check_iphdr(struct iphdr *iph) +{ + /* ip fragmented traffic */ + if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) + return false; + + /* ip options */ + if (iph->ihl * 4 != sizeof(*iph)) + return false; + + if (iph->ttl <= 1) + return false; + + return true; +} + +static bool xdp_flowtable_offload_check_tcp_state(void *ports, void *data_end, + u8 proto) +{ + if (proto == IPPROTO_TCP) { + struct tcphdr *tcph = ports; + + if (tcph + 1 > data_end) + return false; + + if (tcph->fin || tcph->rst) + return false; + } + + return true; +} + +struct flow_ports___local { + __be16 source, dest; +} __attribute__((preserve_access_index)); + +SEC("xdp.frags") +int xdp_flowtable_do_lookup(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + struct bpf_flowtable_opts___local opts = {}; + struct flow_offload_tuple_rhash *tuplehash; + struct bpf_fib_lookup tuple = { + .ifindex = ctx->ingress_ifindex, + }; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + struct flow_ports___local *ports; + __u32 *val, key = 0; + + if (eth + 1 > data_end) + return XDP_DROP; + + switch (eth->h_proto) { + case bpf_htons(ETH_P_IP): { + struct iphdr *iph = data + sizeof(*eth); + + ports = (struct flow_ports___local *)(iph + 1); + if (ports + 1 > data_end) + return XDP_PASS; + + /* sanity check on ip header */ + if (!xdp_flowtable_offload_check_iphdr(iph)) + return XDP_PASS; + + if (!xdp_flowtable_offload_check_tcp_state(ports, data_end, + iph->protocol)) + return XDP_PASS; + + tuple.family = AF_INET; + tuple.tos = iph->tos; + tuple.l4_protocol = iph->protocol; + tuple.tot_len = bpf_ntohs(iph->tot_len); + tuple.ipv4_src = iph->saddr; + tuple.ipv4_dst = iph->daddr; + tuple.sport = ports->source; + tuple.dport = ports->dest; + break; + } + case bpf_htons(ETH_P_IPV6): { + struct in6_addr *src = (struct in6_addr *)tuple.ipv6_src; + struct in6_addr *dst = (struct in6_addr *)tuple.ipv6_dst; + struct ipv6hdr *ip6h = data + sizeof(*eth); + + ports = (struct flow_ports___local *)(ip6h + 1); + if (ports + 1 > data_end) + return XDP_PASS; + + if (ip6h->hop_limit <= 1) + return XDP_PASS; + + if (!xdp_flowtable_offload_check_tcp_state(ports, data_end, + ip6h->nexthdr)) + return XDP_PASS; + + tuple.family = AF_INET6; + tuple.l4_protocol = ip6h->nexthdr; + tuple.tot_len = bpf_ntohs(ip6h->payload_len); + *src = ip6h->saddr; + *dst = ip6h->daddr; + tuple.sport = ports->source; + tuple.dport = ports->dest; + break; + } + default: + return XDP_PASS; + } + + tuplehash = bpf_xdp_flow_lookup(ctx, &tuple, &opts, sizeof(opts)); + if (!tuplehash) + return XDP_PASS; + + val = bpf_map_lookup_elem(&stats, &key); + if (val) + __sync_add_and_fetch(val, 1); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 7ea9785738b5..f8f5dc9f72b8 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +#define BPF_NO_KFUNC_PROTOTYPES #include "vmlinux.h" #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c index f6a501fbba2b..a1d9f106c3f0 100644 --- a/tools/testing/selftests/bpf/progs/xfrm_info.c +++ b/tools/testing/selftests/bpf/progs/xfrm_info.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define BPF_NO_KFUNC_PROTOTYPES #include "vmlinux.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 524c38e9cde4..f14e10b0de96 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -2,6 +2,7 @@ /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ #include <linux/capability.h> #include <stdlib.h> +#include <regex.h> #include <test_progs.h> #include <bpf/btf.h> @@ -17,9 +18,11 @@ #define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure" #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" #define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg=" +#define TEST_TAG_EXPECT_REGEX_PFX "comment:test_expect_regex=" #define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv" #define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv" #define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv=" +#define TEST_TAG_EXPECT_REGEX_PFX_UNPRIV "comment:test_expect_regex_unpriv=" #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" #define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags=" #define TEST_TAG_DESCRIPTION_PFX "comment:test_description=" @@ -46,10 +49,16 @@ enum mode { UNPRIV = 2 }; +struct expect_msg { + const char *substr; /* substring match */ + const char *regex_str; /* regex-based match */ + regex_t regex; +}; + struct test_subspec { char *name; bool expect_failure; - const char **expect_msgs; + struct expect_msg *expect_msgs; size_t expect_msg_cnt; int retval; bool execute; @@ -89,6 +98,16 @@ void test_loader_fini(struct test_loader *tester) static void free_test_spec(struct test_spec *spec) { + int i; + + /* Deallocate expect_msgs arrays. */ + for (i = 0; i < spec->priv.expect_msg_cnt; i++) + if (spec->priv.expect_msgs[i].regex_str) + regfree(&spec->priv.expect_msgs[i].regex); + for (i = 0; i < spec->unpriv.expect_msg_cnt; i++) + if (spec->unpriv.expect_msgs[i].regex_str) + regfree(&spec->unpriv.expect_msgs[i].regex); + free(spec->priv.name); free(spec->unpriv.name); free(spec->priv.expect_msgs); @@ -100,18 +119,38 @@ static void free_test_spec(struct test_spec *spec) spec->unpriv.expect_msgs = NULL; } -static int push_msg(const char *msg, struct test_subspec *subspec) +static int push_msg(const char *substr, const char *regex_str, struct test_subspec *subspec) { void *tmp; + int regcomp_res; + char error_msg[100]; + struct expect_msg *msg; - tmp = realloc(subspec->expect_msgs, (1 + subspec->expect_msg_cnt) * sizeof(void *)); + tmp = realloc(subspec->expect_msgs, + (1 + subspec->expect_msg_cnt) * sizeof(struct expect_msg)); if (!tmp) { ASSERT_FAIL("failed to realloc memory for messages\n"); return -ENOMEM; } subspec->expect_msgs = tmp; - subspec->expect_msgs[subspec->expect_msg_cnt++] = msg; + msg = &subspec->expect_msgs[subspec->expect_msg_cnt]; + + if (substr) { + msg->substr = substr; + msg->regex_str = NULL; + } else { + msg->regex_str = regex_str; + msg->substr = NULL; + regcomp_res = regcomp(&msg->regex, regex_str, REG_EXTENDED|REG_NEWLINE); + if (regcomp_res != 0) { + regerror(regcomp_res, &msg->regex, error_msg, sizeof(error_msg)); + PRINT_FAIL("Regexp compilation error in '%s': '%s'\n", + regex_str, error_msg); + return -EINVAL; + } + } + subspec->expect_msg_cnt += 1; return 0; } @@ -233,13 +272,25 @@ static int parse_test_spec(struct test_loader *tester, spec->mode_mask |= UNPRIV; } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) { msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; - err = push_msg(msg, &spec->priv); + err = push_msg(msg, NULL, &spec->priv); if (err) goto cleanup; spec->mode_mask |= PRIV; } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV)) { msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX_UNPRIV) - 1; - err = push_msg(msg, &spec->unpriv); + err = push_msg(msg, NULL, &spec->unpriv); + if (err) + goto cleanup; + spec->mode_mask |= UNPRIV; + } else if (str_has_pfx(s, TEST_TAG_EXPECT_REGEX_PFX)) { + msg = s + sizeof(TEST_TAG_EXPECT_REGEX_PFX) - 1; + err = push_msg(NULL, msg, &spec->priv); + if (err) + goto cleanup; + spec->mode_mask |= PRIV; + } else if (str_has_pfx(s, TEST_TAG_EXPECT_REGEX_PFX_UNPRIV)) { + msg = s + sizeof(TEST_TAG_EXPECT_REGEX_PFX_UNPRIV) - 1; + err = push_msg(NULL, msg, &spec->unpriv); if (err) goto cleanup; spec->mode_mask |= UNPRIV; @@ -337,16 +388,13 @@ static int parse_test_spec(struct test_loader *tester, } if (!spec->unpriv.expect_msgs) { - size_t sz = spec->priv.expect_msg_cnt * sizeof(void *); + for (i = 0; i < spec->priv.expect_msg_cnt; i++) { + struct expect_msg *msg = &spec->priv.expect_msgs[i]; - spec->unpriv.expect_msgs = malloc(sz); - if (!spec->unpriv.expect_msgs) { - PRINT_FAIL("failed to allocate memory for unpriv.expect_msgs\n"); - err = -ENOMEM; - goto cleanup; + err = push_msg(msg->substr, msg->regex_str, &spec->unpriv); + if (err) + goto cleanup; } - memcpy(spec->unpriv.expect_msgs, spec->priv.expect_msgs, sz); - spec->unpriv.expect_msg_cnt = spec->priv.expect_msg_cnt; } } @@ -402,27 +450,40 @@ static void validate_case(struct test_loader *tester, struct bpf_program *prog, int load_err) { - int i, j; + int i, j, err; + char *match; + regmatch_t reg_match[1]; for (i = 0; i < subspec->expect_msg_cnt; i++) { - char *match; - const char *expect_msg; - - expect_msg = subspec->expect_msgs[i]; + struct expect_msg *msg = &subspec->expect_msgs[i]; + + if (msg->substr) { + match = strstr(tester->log_buf + tester->next_match_pos, msg->substr); + if (match) + tester->next_match_pos = match - tester->log_buf + strlen(msg->substr); + } else { + err = regexec(&msg->regex, + tester->log_buf + tester->next_match_pos, 1, reg_match, 0); + if (err == 0) { + match = tester->log_buf + tester->next_match_pos + reg_match[0].rm_so; + tester->next_match_pos += reg_match[0].rm_eo; + } else { + match = NULL; + } + } - match = strstr(tester->log_buf + tester->next_match_pos, expect_msg); if (!ASSERT_OK_PTR(match, "expect_msg")) { - /* if we are in verbose mode, we've already emitted log */ if (env.verbosity == VERBOSE_NONE) emit_verifier_log(tester->log_buf, true /*force*/); - for (j = 0; j < i; j++) - fprintf(stderr, - "MATCHED MSG: '%s'\n", subspec->expect_msgs[j]); - fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg); + for (j = 0; j <= i; j++) { + msg = &subspec->expect_msgs[j]; + fprintf(stderr, "%s %s: '%s'\n", + j < i ? "MATCHED " : "EXPECTED", + msg->substr ? "SUBSTR" : " REGEX", + msg->substr ?: msg->regex_str); + } return; } - - tester->next_match_pos = match - tester->log_buf + strlen(expect_msg); } } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 0ba5a20b19ba..51341d50213b 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -377,6 +377,15 @@ int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_OK_FD(fd, name) ({ \ + static int duration = 0; \ + int ___fd = (fd); \ + bool ___ok = ___fd >= 0; \ + CHECK(!___ok, (name), "unexpected fd: %d (errno %d)\n", \ + ___fd, errno); \ + ___ok; \ +}) + #define SYS(goto_label, fmt, ...) \ ({ \ char cmd[1024]; \ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 92752f5eeded..3e02d7267de8 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -63,7 +63,8 @@ int passed; int failed; int map_fd[9]; struct bpf_map *maps[9]; -int prog_fd[11]; +struct bpf_program *progs[9]; +struct bpf_link *links[9]; int txmsg_pass; int txmsg_redir; @@ -680,7 +681,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } } - s->bytes_recvd += recv; + if (recv > 0) + s->bytes_recvd += recv; if (opt->check_recved_len && s->bytes_recvd > total_bytes) { errno = EMSGSIZE; @@ -952,7 +954,8 @@ enum { static int run_options(struct sockmap_options *options, int cg_fd, int test) { - int i, key, next_key, err, tx_prog_fd = -1, zero = 0; + int i, key, next_key, err, zero = 0; + struct bpf_program *tx_prog; /* If base test skip BPF setup */ if (test == BASE || test == BASE_SENDPAGE) @@ -960,48 +963,44 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) /* Attach programs to sockmap */ if (!txmsg_omit_skb_parser) { - err = bpf_prog_attach(prog_fd[0], map_fd[0], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { + links[0] = bpf_program__attach_sockmap(progs[0], map_fd[0]); + if (!links[0]) { fprintf(stderr, - "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[0], err, strerror(errno)); - return err; + "ERROR: bpf_program__attach_sockmap (sockmap %i->%i): (%s)\n", + bpf_program__fd(progs[0]), map_fd[0], strerror(errno)); + return -1; } } - err = bpf_prog_attach(prog_fd[1], map_fd[0], - BPF_SK_SKB_STREAM_VERDICT, 0); - if (err) { - fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n", - err, strerror(errno)); - return err; + links[1] = bpf_program__attach_sockmap(progs[1], map_fd[0]); + if (!links[1]) { + fprintf(stderr, "ERROR: bpf_program__attach_sockmap (sockmap): (%s)\n", + strerror(errno)); + return -1; } /* Attach programs to TLS sockmap */ if (txmsg_ktls_skb) { if (!txmsg_omit_skb_parser) { - err = bpf_prog_attach(prog_fd[0], map_fd[8], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { + links[2] = bpf_program__attach_sockmap(progs[0], map_fd[8]); + if (!links[2]) { fprintf(stderr, - "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[8], err, strerror(errno)); - return err; + "ERROR: bpf_program__attach_sockmap (TLS sockmap %i->%i): (%s)\n", + bpf_program__fd(progs[0]), map_fd[8], strerror(errno)); + return -1; } } - err = bpf_prog_attach(prog_fd[2], map_fd[8], - BPF_SK_SKB_STREAM_VERDICT, 0); - if (err) { - fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n", - err, strerror(errno)); - return err; + links[3] = bpf_program__attach_sockmap(progs[2], map_fd[8]); + if (!links[3]) { + fprintf(stderr, "ERROR: bpf_program__attach_sockmap (TLS sockmap): (%s)\n", + strerror(errno)); + return -1; } } /* Attach to cgroups */ - err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + err = bpf_prog_attach(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS, 0); if (err) { fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n", err, strerror(errno)); @@ -1017,30 +1016,31 @@ run: /* Attach txmsg program to sockmap */ if (txmsg_pass) - tx_prog_fd = prog_fd[4]; + tx_prog = progs[4]; else if (txmsg_redir) - tx_prog_fd = prog_fd[5]; + tx_prog = progs[5]; else if (txmsg_apply) - tx_prog_fd = prog_fd[6]; + tx_prog = progs[6]; else if (txmsg_cork) - tx_prog_fd = prog_fd[7]; + tx_prog = progs[7]; else if (txmsg_drop) - tx_prog_fd = prog_fd[8]; + tx_prog = progs[8]; else - tx_prog_fd = 0; + tx_prog = NULL; - if (tx_prog_fd) { - int redir_fd, i = 0; + if (tx_prog) { + int redir_fd; - err = bpf_prog_attach(tx_prog_fd, - map_fd[1], BPF_SK_MSG_VERDICT, 0); - if (err) { + links[4] = bpf_program__attach_sockmap(tx_prog, map_fd[1]); + if (!links[4]) { fprintf(stderr, - "ERROR: bpf_prog_attach (txmsg): %d (%s)\n", - err, strerror(errno)); + "ERROR: bpf_program__attach_sockmap (txmsg): (%s)\n", + strerror(errno)); + err = -1; goto out; } + i = 0; err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY); if (err) { fprintf(stderr, @@ -1279,16 +1279,14 @@ run: fprintf(stderr, "unknown test\n"); out: /* Detatch and zero all the maps */ - bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS); - bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER); - bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT); - bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER); - bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT); + bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS); - if (tx_prog_fd >= 0) - bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT); + for (i = 0; i < ARRAY_SIZE(links); i++) { + if (links[i]) + bpf_link__detach(links[i]); + } - for (i = 0; i < 8; i++) { + for (i = 0; i < ARRAY_SIZE(map_fd); i++) { key = next_key = 0; bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY); while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) { @@ -1783,34 +1781,6 @@ char *map_names[] = { "tls_sock_map", }; -int prog_attach_type[] = { - BPF_SK_SKB_STREAM_PARSER, - BPF_SK_SKB_STREAM_VERDICT, - BPF_SK_SKB_STREAM_VERDICT, - BPF_CGROUP_SOCK_OPS, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, -}; - -int prog_type[] = { - BPF_PROG_TYPE_SK_SKB, - BPF_PROG_TYPE_SK_SKB, - BPF_PROG_TYPE_SK_SKB, - BPF_PROG_TYPE_SOCK_OPS, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, -}; - static int populate_progs(char *bpf_file) { struct bpf_program *prog; @@ -1829,17 +1799,10 @@ static int populate_progs(char *bpf_file) return -1; } - bpf_object__for_each_program(prog, obj) { - bpf_program__set_type(prog, prog_type[i]); - bpf_program__set_expected_attach_type(prog, - prog_attach_type[i]); - i++; - } - i = bpf_object__load(obj); i = 0; bpf_object__for_each_program(prog, obj) { - prog_fd[i] = bpf_program__fd(prog); + progs[i] = prog; i++; } @@ -1853,6 +1816,9 @@ static int populate_progs(char *bpf_file) } } + for (i = 0; i < ARRAY_SIZE(links); i++) + links[i] = NULL; + return 0; } @@ -1970,7 +1936,6 @@ static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt) static int test_selftest(int cg_fd, struct sockmap_options *opt) { - test_selftests_sockmap(cg_fd, opt); test_selftests_sockhash(cg_fd, opt); test_selftests_ktls(cg_fd, opt); diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c index 7b5fc98838cd..3844f9b8232a 100644 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -139,14 +139,14 @@ out: return ret; } -static int v6only_true(int fd, const struct post_socket_opts *opts) +static int v6only_true(int fd, void *opts) { int mode = true; return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); } -static int v6only_false(int fd, const struct post_socket_opts *opts) +static int v6only_false(int fd, void *opts) { int mode = false; @@ -156,10 +156,6 @@ static int v6only_false(int fd, const struct post_socket_opts *opts) int main(int argc, char **argv) { struct network_helper_opts opts = { 0 }; - struct sockaddr_in addr4; - struct sockaddr_in6 addr6; - struct sockaddr_in addr4dual; - struct sockaddr_in6 addr6dual; int server = -1; int server_v6 = -1; int server_dual = -1; @@ -181,36 +177,17 @@ int main(int argc, char **argv) goto err; } - memset(&addr4, 0, sizeof(addr4)); - addr4.sin_family = AF_INET; - addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - addr4.sin_port = 0; - memcpy(&addr4dual, &addr4, sizeof(addr4dual)); - - memset(&addr6, 0, sizeof(addr6)); - addr6.sin6_family = AF_INET6; - addr6.sin6_addr = in6addr_loopback; - addr6.sin6_port = 0; - - memset(&addr6dual, 0, sizeof(addr6dual)); - addr6dual.sin6_family = AF_INET6; - addr6dual.sin6_addr = in6addr_any; - addr6dual.sin6_port = 0; - - server = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr4, - sizeof(addr4), NULL); + server = start_server_str(AF_INET, SOCK_STREAM, "127.0.0.1", 0, NULL); if (server == -1) goto err; opts.post_socket_cb = v6only_true; - server_v6 = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr6, - sizeof(addr6), &opts); + server_v6 = start_server_str(AF_INET6, SOCK_STREAM, "::1", 0, &opts); if (server_v6 == -1) goto err; opts.post_socket_cb = v6only_false; - server_dual = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr6dual, - sizeof(addr6dual), &opts); + server_dual = start_server_str(AF_INET6, SOCK_STREAM, "::0", 0, &opts); if (server_dual == -1) goto err; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index df04bda1c927..610392dfc4fb 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1237,11 +1237,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id); } -struct libcap { - struct __user_cap_header_struct hdr; - struct __user_cap_data_struct data[2]; -}; - static int set_admin(bool admin) { int err; diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 70e29f316fe7..465d196c7165 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -211,7 +211,7 @@ long ksym_get_addr(const char *name) */ int kallsyms_find(const char *sym, unsigned long long *addr) { - char type, name[500]; + char type, name[500], *match; unsigned long long value; int err = 0; FILE *f; @@ -221,6 +221,17 @@ int kallsyms_find(const char *sym, unsigned long long *addr) return -EINVAL; while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { + /* If CONFIG_LTO_CLANG_THIN is enabled, static variable/function + * symbols could be promoted to global due to cross-file inlining. + * For such cases, clang compiler will add .llvm.<hash> suffix + * to those symbols to avoid potential naming conflict. + * Let us ignore .llvm.<hash> suffix during symbol comparison. + */ + if (type == 'd') { + match = strstr(name, ".llvm."); + if (match) + *match = '\0'; + } if (strcmp(name, sym) == 0) { *addr = value; goto out; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index ab25a81fd3a1..d0cdd156cd55 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -76,7 +76,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc", + .errstr = "arg#0 expected pointer to ctx, but got PTR", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_pass_ctx", 2 }, }, @@ -276,6 +276,19 @@ .result = ACCEPT, }, { + "calls: invalid kfunc call: must provide (attach_prog_fd, btf_id) pair when freplace", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_EXT, + .result = REJECT, + .errstr = "Tracing programs must provide btf_id", + .fixup_kfunc_btf_id = { + { "bpf_dynptr_from_skb", 0 }, + }, +}, +{ "calls: basic sanity", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 0a9293a57211..90643ccc221d 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -39,12 +39,12 @@ .result = VERBOSE_ACCEPT, .errstr = "mark_precise: frame0: last_idx 26 first_idx 20\ - mark_precise: frame0: regs=r2 stack= before 25\ - mark_precise: frame0: regs=r2 stack= before 24\ - mark_precise: frame0: regs=r2 stack= before 23\ - mark_precise: frame0: regs=r2 stack= before 22\ - mark_precise: frame0: regs=r2 stack= before 20\ - mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: regs=r2,r9 stack= before 25\ + mark_precise: frame0: regs=r2,r9 stack= before 24\ + mark_precise: frame0: regs=r2,r9 stack= before 23\ + mark_precise: frame0: regs=r2,r9 stack= before 22\ + mark_precise: frame0: regs=r2,r9 stack= before 20\ + mark_precise: frame0: parent state regs=r2,r9 stack=:\ mark_precise: frame0: last_idx 19 first_idx 10\ mark_precise: frame0: regs=r2,r9 stack= before 19\ mark_precise: frame0: regs=r9 stack= before 18\ @@ -100,11 +100,11 @@ .errstr = "26: (85) call bpf_probe_read_kernel#113\ mark_precise: frame0: last_idx 26 first_idx 22\ - mark_precise: frame0: regs=r2 stack= before 25\ - mark_precise: frame0: regs=r2 stack= before 24\ - mark_precise: frame0: regs=r2 stack= before 23\ - mark_precise: frame0: regs=r2 stack= before 22\ - mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: regs=r2,r9 stack= before 25\ + mark_precise: frame0: regs=r2,r9 stack= before 24\ + mark_precise: frame0: regs=r2,r9 stack= before 23\ + mark_precise: frame0: regs=r2,r9 stack= before 22\ + mark_precise: frame0: parent state regs=r2,r9 stack=:\ mark_precise: frame0: last_idx 20 first_idx 20\ mark_precise: frame0: regs=r2,r9 stack= before 20\ mark_precise: frame0: parent state regs=r2,r9 stack=:\ diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 2eac0895b0a1..8144fd145237 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -196,6 +196,12 @@ static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem }; int ret; + if (umem->fill_size) + cfg.fill_size = umem->fill_size; + + if (umem->comp_size) + cfg.comp_size = umem->comp_size; + if (umem->unaligned_mode) cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; @@ -265,6 +271,10 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i cfg.bind_flags |= XDP_SHARED_UMEM; if (ifobject->mtu > MAX_ETH_PKT_SIZE) cfg.bind_flags |= XDP_USE_SG; + if (umem->comp_size) + cfg.tx_size = umem->comp_size; + if (umem->fill_size) + cfg.rx_size = umem->fill_size; txr = ifobject->tx_on ? &xsk->tx : NULL; rxr = ifobject->rx_on ? &xsk->rx : NULL; @@ -1616,7 +1626,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) buffers_to_fill = umem->num_frames; else - buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS; + buffers_to_fill = umem->fill_size; ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); if (ret != buffers_to_fill) @@ -1899,11 +1909,15 @@ static int testapp_validate_traffic(struct test_spec *test) } if (test->set_ring) { - if (ifobj_tx->hw_ring_size_supp) - return set_ring_size(ifobj_tx); - - ksft_test_result_skip("Changing HW ring size not supported.\n"); - return TEST_SKIP; + if (ifobj_tx->hw_ring_size_supp) { + if (set_ring_size(ifobj_tx)) { + ksft_test_result_skip("Failed to change HW ring size.\n"); + return TEST_FAILURE; + } + } else { + ksft_test_result_skip("Changing HW ring size not supported.\n"); + return TEST_SKIP; + } } xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx); @@ -2441,7 +2455,7 @@ static int testapp_hw_sw_min_ring_size(struct test_spec *test) static int testapp_hw_sw_max_ring_size(struct test_spec *test) { - u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; + u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4; int ret; test->set_ring = true; @@ -2449,7 +2463,8 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test) test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending; test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending; test->ifobj_rx->umem->num_frames = max_descs; - test->ifobj_rx->xsk->rxqsize = max_descs; + test->ifobj_rx->umem->fill_size = max_descs; + test->ifobj_rx->umem->comp_size = max_descs; test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; @@ -2457,9 +2472,12 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test) if (ret) return ret; - /* Set batch_size to 4095 */ - test->ifobj_tx->xsk->batch_size = max_descs - 1; - test->ifobj_rx->xsk->batch_size = max_descs - 1; + /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when + * updating the Rx HW tail register. + */ + test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8; + test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8; + pkt_stream_replace(test, max_descs, MIN_PKT_SIZE); return testapp_validate_traffic(test); } diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 906de5fab7a3..885c948c5d83 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -80,6 +80,8 @@ struct xsk_umem_info { void *buffer; u32 frame_size; u32 base_addr; + u32 fill_size; + u32 comp_size; bool unaligned_mode; }; diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index b8703c499d28..dfec31fb9b30 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -130,7 +130,6 @@ int run_test(int cpu) void suspend(void) { int power_state_fd; - struct sigevent event = {}; int timerfd; int err; struct itimerspec spec = {}; diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index 2732e0b29271..952e4448bf07 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -1,11 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only -test_memcontrol test_core -test_freezer -test_kmem -test_kill test_cpu test_cpuset -test_zswap +test_freezer test_hugetlb_memcg +test_kill +test_kmem +test_memcontrol +test_pids +test_zswap wait_inotify diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 16461dc0ffdf..1b897152bab6 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -6,26 +6,29 @@ all: ${HELPER_PROGS} TEST_FILES := with_stress.sh TEST_PROGS := test_stress.sh test_cpuset_prs.sh test_cpuset_v1_hp.sh TEST_GEN_FILES := wait_inotify -TEST_GEN_PROGS = test_memcontrol -TEST_GEN_PROGS += test_kmem -TEST_GEN_PROGS += test_core -TEST_GEN_PROGS += test_freezer -TEST_GEN_PROGS += test_kill +# Keep the lists lexicographically sorted +TEST_GEN_PROGS = test_core TEST_GEN_PROGS += test_cpu TEST_GEN_PROGS += test_cpuset -TEST_GEN_PROGS += test_zswap +TEST_GEN_PROGS += test_freezer TEST_GEN_PROGS += test_hugetlb_memcg +TEST_GEN_PROGS += test_kill +TEST_GEN_PROGS += test_kmem +TEST_GEN_PROGS += test_memcontrol +TEST_GEN_PROGS += test_pids +TEST_GEN_PROGS += test_zswap LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h include ../lib.mk -$(OUTPUT)/test_memcontrol: cgroup_util.c -$(OUTPUT)/test_kmem: cgroup_util.c $(OUTPUT)/test_core: cgroup_util.c -$(OUTPUT)/test_freezer: cgroup_util.c -$(OUTPUT)/test_kill: cgroup_util.c $(OUTPUT)/test_cpu: cgroup_util.c $(OUTPUT)/test_cpuset: cgroup_util.c -$(OUTPUT)/test_zswap: cgroup_util.c +$(OUTPUT)/test_freezer: cgroup_util.c $(OUTPUT)/test_hugetlb_memcg: cgroup_util.c +$(OUTPUT)/test_kill: cgroup_util.c +$(OUTPUT)/test_kmem: cgroup_util.c +$(OUTPUT)/test_memcontrol: cgroup_util.c +$(OUTPUT)/test_pids: cgroup_util.c +$(OUTPUT)/test_zswap: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index b5eb1be2248c..7c08cc153367 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -28,6 +28,14 @@ CPULIST=$(cat $CGROUP2/cpuset.cpus.effective) NR_CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//") [[ $NR_CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!" +# Check to see if /dev/console exists and is writable +if [[ -c /dev/console && -w /dev/console ]] +then + CONSOLE=/dev/console +else + CONSOLE=/dev/null +fi + # Set verbose flag and delay factor PROG=$1 VERBOSE=0 @@ -103,8 +111,8 @@ console_msg() { MSG=$1 echo "$MSG" - echo "" > /dev/console - echo "$MSG" > /dev/console + echo "" > $CONSOLE + echo "$MSG" > $CONSOLE pause 0.01 } @@ -161,6 +169,14 @@ test_add_proc() # T = put a task into cgroup # O<c>=<v> = Write <v> to CPU online file of <c> # +# ECPUs - effective CPUs of cpusets +# Pstate - partition root state +# ISOLCPUS - isolated CPUs (<icpus>[,<icpus2>]) +# +# Note that if there are 2 fields in ISOLCPUS, the first one is for +# sched-debug matching which includes offline CPUs and single-CPU partitions +# while the second one is for matching cpuset.cpus.isolated. +# SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1" TEST_MATRIX=( # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS @@ -220,23 +236,29 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3" " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3" " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-2,A2:1-2,A3:3 A1:P0,A3:P2 3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2" " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5" + " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3" + " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3" # Nested remote/local partition tests " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ A1:P0,A2:P1,A3:P2,B1:P1 2-3" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \ + A1:P0,A2:P1,A3:P0,B1:P1" " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ A1:P0,A2:P2,A3:P1 2-4,2-3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \ + A1:P0,A2:P2,A3:P1 2" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ A1:P0,A2:P-2,A3:P-1" @@ -262,8 +284,8 @@ TEST_MATRIX=( . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3" # Invalid to valid local partition direct transition tests - " C1-3:S+:P2 C2-3:X1:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:2-3:XA2: A1:P2,A2:P-2 1-3" - " C1-3:S+:P2 C2-3:X1:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" + " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3" + " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0" " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3" " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3" @@ -274,32 +296,26 @@ TEST_MATRIX=( " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . C4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" # Local partition CPU change tests " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2" " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3" # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ + . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . . C3 P2 . 0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P2 3" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ A1:P0,A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . . C3 . . 0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P-2" - " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . C4 . . . 0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \ + . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \ A1:P0,A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS @@ -346,6 +362,9 @@ TEST_MATRIX=( " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" + # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it + " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5" + # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: @@ -355,6 +374,9 @@ TEST_MATRIX=( # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5" + + # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive + " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5" ) # @@ -556,14 +578,15 @@ check_cgroup_states() do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 + CGRP_DIR=$CGRP STATE=$2 FILE= EVAL=$(expr substr $STATE 2 2) - [[ $CGRP = A2 ]] && CGRP=A1/A2 - [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 + [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2 + [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3 case $STATE in - P*) FILE=$CGRP/cpuset.cpus.partition + P*) FILE=$CGRP_DIR/cpuset.cpus.partition ;; *) echo "Unknown state: $STATE!" exit 1 @@ -587,6 +610,16 @@ check_cgroup_states() ;; esac [[ $EVAL != $VAL ]] && return 1 + + # + # For root partition, dump sched-domains info to console if + # verbose mode set for manual comparison with sched debug info. + # + [[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && { + DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective) + [[ -n "$DOMS" ]] && + echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE + } done return 0 } @@ -694,9 +727,9 @@ null_isolcpus_check() [[ $VERBOSE -gt 0 ]] || return 0 # Retry a few times before printing error RETRY=0 - while [[ $RETRY -lt 5 ]] + while [[ $RETRY -lt 8 ]] do - pause 0.01 + pause 0.02 check_isolcpus "." [[ $? -eq 0 ]] && return 0 ((RETRY++)) @@ -726,7 +759,7 @@ run_state_test() while [[ $I -lt $CNT ]] do - echo "Running test $I ..." > /dev/console + echo "Running test $I ..." > $CONSOLE [[ $VERBOSE -gt 1 ]] && { echo "" eval echo \${$TEST[$I]} @@ -783,7 +816,7 @@ run_state_test() while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] do # Wait a bit longer & recheck a few times - pause 0.01 + pause 0.02 ((RETRY++)) NEWLIST=$(cat cpuset.cpus.effective) done diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c new file mode 100644 index 000000000000..9ecb83c6cc5c --- /dev/null +++ b/tools/testing/selftests/cgroup/test_pids.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <errno.h> +#include <linux/limits.h> +#include <signal.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +static int run_success(const char *cgroup, void *arg) +{ + return 0; +} + +static int run_pause(const char *cgroup, void *arg) +{ + return pause(); +} + +/* + * This test checks that pids.max prevents forking new children above the + * specified limit in the cgroup. + */ +static int test_pids_max(const char *root) +{ + int ret = KSFT_FAIL; + char *cg_pids; + int pid; + + cg_pids = cg_name(root, "pids_test"); + if (!cg_pids) + goto cleanup; + + if (cg_create(cg_pids)) + goto cleanup; + + if (cg_read_strcmp(cg_pids, "pids.max", "max\n")) + goto cleanup; + + if (cg_write(cg_pids, "pids.max", "2")) + goto cleanup; + + if (cg_enter_current(cg_pids)) + goto cleanup; + + pid = cg_run_nowait(cg_pids, run_pause, NULL); + if (pid < 0) + goto cleanup; + + if (cg_run_nowait(cg_pids, run_success, NULL) != -1 || errno != EAGAIN) + goto cleanup; + + if (kill(pid, SIGINT)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + cg_destroy(cg_pids); + free(cg_pids); + + return ret; +} + +/* + * This test checks that pids.events are counted in cgroup associated with pids.max + */ +static int test_pids_events(const char *root) +{ + int ret = KSFT_FAIL; + char *cg_parent = NULL, *cg_child = NULL; + int pid; + + cg_parent = cg_name(root, "pids_parent"); + cg_child = cg_name(cg_parent, "pids_child"); + if (!cg_parent || !cg_child) + goto cleanup; + + if (cg_create(cg_parent)) + goto cleanup; + if (cg_write(cg_parent, "cgroup.subtree_control", "+pids")) + goto cleanup; + if (cg_create(cg_child)) + goto cleanup; + + if (cg_write(cg_parent, "pids.max", "2")) + goto cleanup; + + if (cg_read_strcmp(cg_child, "pids.max", "max\n")) + goto cleanup; + + if (cg_enter_current(cg_child)) + goto cleanup; + + pid = cg_run_nowait(cg_child, run_pause, NULL); + if (pid < 0) + goto cleanup; + + if (cg_run_nowait(cg_child, run_success, NULL) != -1 || errno != EAGAIN) + goto cleanup; + + if (kill(pid, SIGINT)) + goto cleanup; + + if (cg_read_key_long(cg_child, "pids.events", "max ") != 0) + goto cleanup; + if (cg_read_key_long(cg_parent, "pids.events", "max ") != 1) + goto cleanup; + + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_child) + cg_destroy(cg_child); + if (cg_parent) + cg_destroy(cg_parent); + free(cg_child); + free(cg_parent); + + return ret; +} + + + +#define T(x) { x, #x } +struct pids_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_pids_max), + T(test_pids_events), +}; +#undef T + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); + if (cg_find_unified_root(root, sizeof(root), NULL)) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + /* + * Check that pids controller is available: + * pids is listed in cgroup.controllers + */ + if (cg_read_strstr(root, "cgroup.controllers", "pids")) + ksft_exit_skip("pids controller isn't available\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "pids")) + if (cg_write(root, "cgroup.subtree_control", "+pids")) + ksft_exit_skip("Failed to set pids controller\n"); + + for (int i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + ksft_finished(); +} diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index 5c997f17fcbd..b12f1f9babf8 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -33,7 +33,6 @@ int main(int argc, char **argv) int granule = 1; int cmd = DMA_MAP_BENCHMARK; - char *p; while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) { switch (opt) { diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 4933d045ab66..c9f2f48fc30f 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -11,6 +11,7 @@ TEST_PROGS = \ hw_stats_l3_gre.sh \ loopback.sh \ pp_alloc_fail.py \ + rss_ctx.py \ # TEST_FILES := \ diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py new file mode 100755 index 000000000000..931dbc36ca43 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -0,0 +1,522 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import datetime +import random +from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt +from lib.py import NetDrvEpEnv +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import KsftSkipEx +from lib.py import rand_port +from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure + + +def _rss_key_str(key): + return ":".join(["{:02x}".format(x) for x in key]) + + +def _rss_key_rand(length): + return [random.randint(0, 255) for _ in range(length)] + + +def get_rss(cfg, context=0): + return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] + + +def get_drop_err_sum(cfg): + stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0] + cnt = 0 + for key in ['errors', 'dropped', 'over_errors', 'fifo_errors', + 'length_errors', 'crc_errors', 'missed_errors', + 'frame_errors']: + cnt += stats["stats64"]["rx"][key] + return cnt, stats["stats64"]["tx"]["carrier_changes"] + + +def ethtool_create(cfg, act, opts): + output = ethtool(f"{act} {cfg.ifname} {opts}").stdout + # Output will be something like: "New RSS context is 1" or + # "Added rule with ID 7", we want the integer from the end + return int(output.split()[-1]) + + +def require_ntuple(cfg): + features = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not features["ntuple-filters"]["active"]: + # ntuple is more of a capability than a config knob, don't bother + # trying to enable it (until some driver actually needs it). + raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) + + +# Get Rx packet counts for all queues, as a simple list of integers +# if @prev is specified the prev counts will be subtracted +def _get_rx_cnts(cfg, prev=None): + cfg.wait_hw_stats_settle() + data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True) + data = [x for x in data if x['queue-type'] == "rx"] + max_q = max([x["queue-id"] for x in data]) + queue_stats = [0] * (max_q + 1) + for q in data: + queue_stats[q["queue-id"]] = q["rx-packets"] + if prev and q["queue-id"] < len(prev): + queue_stats[q["queue-id"]] -= prev[q["queue-id"]] + return queue_stats + + +def _send_traffic_check(cfg, port, name, params): + # params is a dict with 3 possible keys: + # - "target": required, which queues we expect to get iperf traffic + # - "empty": optional, which queues should see no traffic at all + # - "noise": optional, which queues we expect to see low traffic; + # used for queues of the main context, since some background + # OS activity may use those queues while we're testing + # the value for each is a list, or some other iterable containing queue ids. + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + directed = sum(cnts[i] for i in params['target']) + + ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts)) + if params.get('noise'): + ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2, + "traffic on other queues:" + str(cnts)) + if params.get('empty'): + ksft_eq(sum(cnts[i] for i in params['empty']), 0, + "traffic on inactive queues: " + str(cnts)) + + +def test_rss_key_indir(cfg): + """Test basics like updating the main RSS key and indirection table.""" + + if len(_get_rx_cnts(cfg)) < 2: + KsftSkipEx("Device has only one queue (or doesn't support queue stats)") + + data = get_rss(cfg) + want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] + for k in want_keys: + if k not in data: + raise KsftFailEx("ethtool results missing key: " + k) + if not data[k]: + raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}") + + key_len = len(data['rss-hash-key']) + + # Set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + + data = get_rss(cfg) + ksft_eq(key, data['rss-hash-key']) + + # Set the indirection table + ethtool(f"-X {cfg.ifname} equal 2") + reset_indir = defer(ethtool, f"-X {cfg.ifname} default") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(1, max(data['rss-indirection-table'])) + + # Check we only get traffic on the first 2 queues + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + # 2 queues, 20k packets, must be at least 5k per queue + ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts)) + ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts)) + # The other queues should be unused + ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts)) + + # Restore, and check traffic gets spread again + reset_indir.exec() + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + # First two queues get less traffic than all the rest + ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts)) + + +def test_rss_queue_reconfigure(cfg, main_ctx=True): + """Make sure queue changes can't override requested RSS config. + + By default main RSS table should change to include all queues. + When user sets a specific RSS config the driver should preserve it, + even when queue count changes. Driver should refuse to deactivate + queues used in the user-set RSS config. + """ + + if not main_ctx: + require_ntuple(cfg) + + # Start with 4 queues, an arbitrary known number. + try: + qcnt = len(_get_rx_cnts(cfg)) + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test or qstat not supported") + + if main_ctx: + ctx_id = 0 + ctx_ref = "" + else: + ctx_id = ethtool_create(cfg, "-X", "context new") + ctx_ref = f"context {ctx_id}" + defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete") + + # Indirection table should be distributing to all queues. + data = get_rss(cfg, context=ctx_id) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(3, max(data['rss-indirection-table'])) + + # Increase queues, indirection table should be distributing to all queues. + # It's unclear whether tables of additional contexts should be reset, too. + if main_ctx: + ethtool(f"-L {cfg.ifname} combined 5") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(4, max(data['rss-indirection-table'])) + ethtool(f"-L {cfg.ifname} combined 4") + + # Configure the table explicitly + port = rand_port() + ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1") + if main_ctx: + other_key = 'empty' + defer(ethtool, f"-X {cfg.ifname} default") + else: + other_key = 'noise' + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3), + other_key: (1, 2) }) + + # We should be able to increase queues, but table should be left untouched + ethtool(f"-L {cfg.ifname} combined 5") + data = get_rss(cfg, context=ctx_id) + ksft_eq({0, 3}, set(data['rss-indirection-table'])) + + _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3), + other_key: (1, 2, 4) }) + + # Setting queue count to 3 should fail, queue 3 is used + try: + ethtool(f"-L {cfg.ifname} combined 3") + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})") + + +def test_rss_resize(cfg): + """Test resizing of the RSS table. + + Some devices dynamically increase and decrease the size of the RSS + indirection table based on the number of enabled queues. + When that happens driver must maintain the balance of entries + (preferably duplicating the smaller table). + """ + + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels['combined-max'] + qcnt = channels['combined-count'] + + if ch_max < 2: + raise KsftSkipEx(f"Not enough queues for the test: {ch_max}") + + ethtool(f"-L {cfg.ifname} combined 2") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + + ethtool(f"-X {cfg.ifname} weight 1 7") + defer(ethtool, f"-X {cfg.ifname} default") + + ethtool(f"-L {cfg.ifname} combined {ch_max}") + data = get_rss(cfg) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(1, max(data['rss-indirection-table'])) + + ksft_eq(7, + data['rss-indirection-table'].count(1) / + data['rss-indirection-table'].count(0), + f"Table imbalance after resize: {data['rss-indirection-table']}") + + +def test_hitless_key_update(cfg): + """Test that flows may be rehashed without impacting traffic. + + Some workloads may want to rehash the flows in response to an imbalance. + Most effective way to do that is changing the RSS key. Check that changing + the key does not cause link flaps or traffic disruption. + + Disrupting traffic for key update is not a bug, but makes the key + update unusable for rehashing under load. + """ + data = get_rss(cfg) + key_len = len(data['rss-hash-key']) + + key = _rss_key_rand(key_len) + + tgen = GenerateTraffic(cfg) + try: + errors0, carrier0 = get_drop_err_sum(cfg) + t0 = datetime.datetime.now() + ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key)) + t1 = datetime.datetime.now() + errors1, carrier1 = get_drop_err_sum(cfg) + finally: + tgen.wait_pkts_and_stop(5000) + + ksft_lt((t1 - t0).total_seconds(), 0.2) + ksft_eq(errors1 - errors1, 0) + ksft_eq(carrier1 - carrier0, 0) + + +def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): + """ + Test separating traffic into RSS contexts. + The queues will be allocated 2 for each context: + ctx0 ctx1 ctx2 ctx3 + [0 1] [2 3] [4 5] [6 7] ... + """ + + require_ntuple(cfg) + + requested_ctx_cnt = ctx_cnt + + # Try to allocate more queues when necessary + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 2 + 2 * ctx_cnt: + try: + ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") + ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + ports = [] + + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + for i in range(ctx_cnt): + want_cfg = f"start {2 + i * 2} equal 2" + create_cfg = want_cfg if create_with_cfg else "" + + try: + ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + except CmdExitFailure: + # try to carry on and skip at the end + if i == 0: + raise + ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") + ctx_cnt = i + break + + if not create_with_cfg: + ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + + # Sanity check the context we just created + data = get_rss(cfg, ctx_id) + ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data)) + ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) + + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + for i in range(ctx_cnt): + _send_traffic_check(cfg, ports[i], f"context {i}", + { 'target': (2+i*2, 3+i*2), + 'noise': (0, 1), + 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) }) + + if requested_ctx_cnt != ctx_cnt: + raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") + + +def test_rss_context4(cfg): + test_rss_context(cfg, 4) + + +def test_rss_context32(cfg): + test_rss_context(cfg, 32) + + +def test_rss_context4_create_with_cfg(cfg): + test_rss_context(cfg, 4, create_with_cfg=True) + + +def test_rss_context_queue_reconfigure(cfg): + test_rss_queue_reconfigure(cfg, main_ctx=False) + + +def test_rss_context_out_of_order(cfg, ctx_cnt=4): + """ + Test separating traffic into RSS contexts. + Contexts are removed in semi-random order, and steering re-tested + to make sure removal doesn't break steering to surviving contexts. + Test requires 3 contexts to work. + """ + + require_ntuple(cfg) + + requested_ctx_cnt = ctx_cnt + + # Try to allocate more queues when necessary + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 2 + 2 * ctx_cnt: + try: + ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") + ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + ntuple = [] + ctx = [] + ports = [] + + def remove_ctx(idx): + ntuple[idx].exec() + ntuple[idx] = None + ctx[idx].exec() + ctx[idx] = None + + def check_traffic(): + for i in range(ctx_cnt): + if ctx[i]: + expected = { + 'target': (2+i*2, 3+i*2), + 'noise': (0, 1), + 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) + } + else: + expected = { + 'target': (0, 1), + 'empty': range(2, 2+2*ctx_cnt) + } + + _send_traffic_check(cfg, ports[i], f"context {i}", expected) + + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + for i in range(ctx_cnt): + ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2") + ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")) + + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")) + + check_traffic() + + # Remove middle context + remove_ctx(ctx_cnt // 2) + check_traffic() + + # Remove first context + remove_ctx(0) + check_traffic() + + # Remove last context + remove_ctx(-1) + check_traffic() + + if requested_ctx_cnt != ctx_cnt: + raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") + + +def test_rss_context_overlap(cfg, other_ctx=0): + """ + Test contexts overlapping with each other. + Use 4 queues for the main context, but only queues 2 and 3 for context 1. + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + if other_ctx == 0: + ethtool(f"-X {cfg.ifname} equal 4") + defer(ethtool, f"-X {cfg.ifname} default") + else: + other_ctx = ethtool_create(cfg, "-X", "context new") + ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4") + defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete") + + ctx_id = ethtool_create(cfg, "-X", "context new") + ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + port = rand_port() + if other_ctx: + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + # Test the main context + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts)) + ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts)) + ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) + + # Now create a rule for context 1 and make sure traffic goes to a subset + if other_ctx: + ntuple.exec() + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + directed = sum(cnts[2:4]) + ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts)) + ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) + + +def test_rss_context_overlap2(cfg): + test_rss_context_overlap(cfg, True) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + cfg.netdevnl = NetdevFamily() + + ksft_run([test_rss_key_indir, test_rss_queue_reconfigure, + test_rss_resize, test_hitless_key_update, + test_rss_context, test_rss_context4, test_rss_context32, + test_rss_context_queue_reconfigure, + test_rss_context_overlap, test_rss_context_overlap2, + test_rss_context_out_of_order, test_rss_context4_create_with_cfg], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index edcedd7bffab..a5e800b8f103 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 import os +import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx -from lib.py import cmd, ip +from lib.py import cmd, ethtool, ip from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -82,6 +83,8 @@ class NetDrvEpEnv: self.env = _load_env_file(src_path) + self._stats_settle_time = None + # Things we try to destroy self.remote = None # These are for local testing state @@ -222,3 +225,17 @@ class NetDrvEpEnv: if remote: if not self._require_cmd(comm, "remote"): raise KsftSkipEx("Test requires (remote) command: " + comm) + + def wait_hw_stats_settle(self): + """ + Wait for HW stats to become consistent, some devices DMA HW stats + periodically so events won't be reflected until next sync. + Good drivers will tell us via ethtool what their sync period is. + """ + if self._stats_settle_time is None: + data = ethtool("-c " + self.ifname, json=True)[0] + + self._stats_settle_time = 0.025 + \ + data.get('stats-block-usecs', 0) / 1000 / 1000 + + time.sleep(self._stats_settle_time) diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index abdb677bdb1c..d9c10613ae67 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -5,28 +5,45 @@ import time from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen class GenerateTraffic: - def __init__(self, env): + def __init__(self, env, port=None): env.require_cmd("iperf3", remote=True) self.env = env - port = rand_port() - self._iperf_server = cmd(f"iperf3 -s -p {port}", background=True) + if port is None: + port = rand_port() + self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True) wait_port_listen(port) time.sleep(0.1) self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", background=True, host=env.remote) # Wait for traffic to ramp up - pkt = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"] + if not self._wait_pkts(pps=1000): + self.stop(verbose=True) + raise Exception("iperf3 traffic did not ramp up") + + def _wait_pkts(self, pkt_cnt=None, pps=None): + """ + Wait until we've seen pkt_cnt or until traffic ramps up to pps. + Only one of pkt_cnt or pss can be specified. + """ + pkt_start = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"] for _ in range(50): time.sleep(0.1) - now = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"] - if now - pkt > 1000: - return - pkt = now - self.stop(verbose=True) - raise Exception("iperf3 traffic did not ramp up") + pkt_now = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"] + if pps: + if pkt_now - pkt_start > pps / 10: + return True + pkt_start = pkt_now + elif pkt_cnt: + if pkt_now - pkt_start > pkt_cnt: + return True + return False + + def wait_pkts_and_stop(self, pkt_cnt): + failed = not self._wait_pkts(pkt_cnt=pkt_cnt) + self.stop(verbose=failed) def stop(self, verbose=None): self._iperf_client.process(terminate=True) diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh index 76f1ab4898d9..e1ad623146d7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh @@ -15,6 +15,13 @@ source $lib_dir/mirror_lib.sh source $lib_dir/mirror_gre_lib.sh source $lib_dir/mirror_gre_topo_lib.sh +ALL_TESTS=" + test_keyful + test_soft + test_tos_fixed + test_ttl_inherit +" + setup_keyful() { tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \ @@ -118,15 +125,15 @@ test_span_gre_ttl_inherit() RET=0 ip link set dev $tundev type $type ttl inherit - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev type $type ttl 100 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: no offload on TTL of inherit ($tcflags)" + log_test "$what: no offload on TTL of inherit" } test_span_gre_tos_fixed() @@ -138,61 +145,49 @@ test_span_gre_tos_fixed() RET=0 ip link set dev $tundev type $type tos 0x10 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev type $type tos inherit - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: no offload on a fixed TOS ($tcflags)" + log_test "$what: no offload on a fixed TOS" } test_span_failable() { - local should_fail=$1; shift local tundev=$1; shift local what=$1; shift RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - if ((should_fail)); then - fail_test_span_gre_dir $tundev ingress - else - quick_test_span_gre_dir $tundev ingress - fi + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: should_fail=$should_fail ($tcflags)" + log_test "fail $what" } -test_failable() +test_keyful() { - local should_fail=$1; shift - - test_span_failable $should_fail gt6-key "mirror to keyful gretap" - test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay" + test_span_failable gt6-key "mirror to keyful gretap" } -test_sw() +test_soft() { - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - test_failable 0 - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + test_span_failable gt6-soft "mirror to gretap w/ soft underlay" } -test_hw() +test_tos_fixed() { - test_failable 1 - test_span_gre_tos_fixed gt4 gretap "mirror to gretap" test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap" +} + +test_ttl_inherit() +{ test_span_gre_ttl_inherit gt4 gretap "mirror to gretap" test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap" } @@ -202,16 +197,6 @@ trap cleanup EXIT setup_prepare setup_wait -if ! tc_offload_check; then - check_err 1 "Could not test offloaded functionality" - log_test "mlxsw-specific tests for mirror to gretap" - exit -fi - -tcflags="skip_hw" -test_sw - -tcflags="skip_sw" -test_hw +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh index e5589e2fca85..d43093310e23 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh @@ -79,7 +79,7 @@ mirror_gre_tunnels_create() cat >> $MIRROR_GRE_BATCH_FILE <<-EOF filter add dev $swp1 ingress pref 1000 \ protocol ipv6 \ - flower $tcflags dst_ip $match_dip \ + flower skip_sw dst_ip $match_dip \ action mirred egress mirror dev $tun EOF done @@ -107,7 +107,7 @@ mirror_gre_tunnels_destroy() done } -__mirror_gre_test() +mirror_gre_test() { local count=$1; shift local should_fail=$1; shift @@ -131,20 +131,6 @@ __mirror_gre_test() done } -mirror_gre_test() -{ - local count=$1; shift - local should_fail=$1; shift - - if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then - check_err 1 "Could not test offloaded functionality" - return - fi - - tcflags="skip_sw" - __mirror_gre_test $count $should_fail -} - mirror_gre_setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 31252bc8775e..4994bea5daf8 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -11,7 +11,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ delta_two_masks_one_key_test delta_simple_rehash_test \ bloom_simple_test bloom_complex_test bloom_delta_test \ - max_erp_entries_test max_group_size_test" + max_erp_entries_test max_group_size_test collision_test" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/tc_common.sh @@ -457,7 +457,7 @@ delta_two_masks_one_key_test() { # If 2 keys are the same and only differ in mask in a way that # they belong under the same ERP (second is delta of the first), - # there should be no C-TCAM spill. + # there should be C-TCAM spill. RET=0 @@ -474,8 +474,8 @@ delta_two_masks_one_key_test() tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \ pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \ action drop" - tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0 - check_err $? "incorrect C-TCAM spill while inserting the second rule" + tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 1 + check_err $? "C-TCAM spill did not happen while inserting the second rule" $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -1087,6 +1087,53 @@ max_group_size_test() log_test "max ACL group size test ($tcflags). max size $max_size" } +collision_test() +{ + # Filters cannot share an eRP if in the common unmasked part (i.e., + # without the delta bits) they have the same values. If the driver does + # not prevent such configuration (by spilling into the C-TCAM), then + # multiple entries will be present in the device with the same key, + # leading to collisions and a reduced scale. + # + # Create such a scenario and make sure all the filters are successfully + # added. + + RET=0 + + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + # Add a single dst_ip/24 filter and multiple dst_ip/32 filters that all + # have the same values in the common unmasked part (dst_ip/24). + + tc filter add dev $h2 ingress pref 1 proto ipv4 handle 101 \ + flower $tcflags dst_ip 198.51.100.0/24 \ + action drop + + for i in {0..255}; do + tc filter add dev $h2 ingress pref 2 proto ipv4 \ + handle $((102 + i)) \ + flower $tcflags dst_ip 198.51.100.${i}/32 \ + action drop + ret=$? + [[ $ret -ne 0 ]] && break + done + + check_err $ret "failed to add all the filters" + + for i in {255..0}; do + tc filter del dev $h2 ingress pref 2 proto ipv4 \ + handle $((102 + i)) flower + done + + tc filter del dev $h2 ingress pref 1 proto ipv4 handle 101 flower + + log_test "collision test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config index f35de0542b60..bcf7555eaffe 100644 --- a/tools/testing/selftests/drivers/net/virtio_net/config +++ b/tools/testing/selftests/drivers/net/virtio_net/config @@ -1,2 +1,8 @@ -CONFIG_VIRTIO_NET=y +CONFIG_BPF_SYSCALL=y +CONFIG_CGROUP_BPF=y +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=m CONFIG_VIRTIO_DEBUG=y +CONFIG_VIRTIO_NET=y diff --git a/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile b/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile new file mode 100644 index 000000000000..03d0449d307c --- /dev/null +++ b/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for ifs(In Field Scan) selftests + +TEST_PROGS := test_ifs.sh + +include ../../../../../lib.mk diff --git a/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh b/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh new file mode 100755 index 000000000000..8b68964b29f4 --- /dev/null +++ b/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh @@ -0,0 +1,494 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test the functionality of the Intel IFS(In Field Scan) driver. +# + +# Matched with kselftest framework: tools/testing/selftests/kselftest.h +readonly KSFT_PASS=0 +readonly KSFT_FAIL=1 +readonly KSFT_XFAIL=2 +readonly KSFT_SKIP=4 + +readonly CPU_SYSFS="/sys/devices/system/cpu" +readonly CPU_OFFLINE_SYSFS="${CPU_SYSFS}/offline" +readonly IMG_PATH="/lib/firmware/intel/ifs_0" +readonly IFS_SCAN_MODE="0" +readonly IFS_ARRAY_BIST_SCAN_MODE="1" +readonly IFS_PATH="/sys/devices/virtual/misc/intel_ifs" +readonly IFS_SCAN_SYSFS_PATH="${IFS_PATH}_${IFS_SCAN_MODE}" +readonly IFS_ARRAY_BIST_SYSFS_PATH="${IFS_PATH}_${IFS_ARRAY_BIST_SCAN_MODE}" +readonly RUN_TEST="run_test" +readonly STATUS="status" +readonly DETAILS="details" +readonly STATUS_PASS="pass" +readonly PASS="PASS" +readonly FAIL="FAIL" +readonly INFO="INFO" +readonly XFAIL="XFAIL" +readonly SKIP="SKIP" +readonly IFS_NAME="intel_ifs" +readonly ALL="all" +readonly SIBLINGS="siblings" + +# Matches arch/x86/include/asm/intel-family.h and +# drivers/platform/x86/intel/ifs/core.c requirement as follows +readonly SAPPHIRERAPIDS_X="8f" +readonly EMERALDRAPIDS_X="cf" + +readonly INTEL_FAM6="06" + +LOOP_TIMES=3 +FML="" +MODEL="" +STEPPING="" +CPU_FMS="" +TRUE="true" +FALSE="false" +RESULT=$KSFT_PASS +IMAGE_NAME="" +INTERVAL_TIME=1 +OFFLINE_CPUS="" +# For IFS cleanup tags +ORIGIN_IFS_LOADED="" +IFS_IMAGE_NEED_RESTORE=$FALSE +IFS_LOG="/tmp/ifs_logs.$$" +RANDOM_CPU="" +DEFAULT_IMG_ID="" + +append_log() +{ + echo -e "$1" | tee -a "$IFS_LOG" +} + +online_offline_cpu_list() +{ + local on_off=$1 + local target_cpus=$2 + local cpu="" + local cpu_start="" + local cpu_end="" + local i="" + + if [[ -n "$target_cpus" ]]; then + for cpu in $(echo "$target_cpus" | tr ',' ' '); do + if [[ "$cpu" == *"-"* ]]; then + cpu_start="" + cpu_end="" + i="" + cpu_start=$(echo "$cpu" | cut -d "-" -f 1) + cpu_end=$(echo "$cpu" | cut -d "-" -f 2) + for((i=cpu_start;i<=cpu_end;i++)); do + append_log "[$INFO] echo $on_off > \ +${CPU_SYSFS}/cpu${i}/online" + echo "$on_off" > "$CPU_SYSFS"/cpu"$i"/online + done + else + set_target_cpu "$on_off" "$cpu" + fi + done + fi +} + +ifs_scan_result_summary() +{ + local failed_info pass_num skip_num fail_num + + if [[ -e "$IFS_LOG" ]]; then + failed_info=$(grep ^"\[${FAIL}\]" "$IFS_LOG") + fail_num=$(grep -c ^"\[${FAIL}\]" "$IFS_LOG") + skip_num=$(grep -c ^"\[${SKIP}\]" "$IFS_LOG") + pass_num=$(grep -c ^"\[${PASS}\]" "$IFS_LOG") + + if [[ "$fail_num" -ne 0 ]]; then + RESULT=$KSFT_FAIL + echo "[$INFO] IFS test failure summary:" + echo "$failed_info" + elif [[ "$skip_num" -ne 0 ]]; then + RESULT=$KSFT_SKIP + fi + echo "[$INFO] IFS test pass:$pass_num, skip:$skip_num, fail:$fail_num" + else + echo "[$INFO] No file $IFS_LOG for IFS scan summary" + fi +} + +ifs_cleanup() +{ + echo "[$INFO] Restore environment after IFS test" + + # Restore ifs origin image if origin image backup step is needed + [[ "$IFS_IMAGE_NEED_RESTORE" == "$TRUE" ]] && { + mv -f "$IMG_PATH"/"$IMAGE_NAME"_origin "$IMG_PATH"/"$IMAGE_NAME" + } + + # Restore the CPUs to the state before testing + [[ -z "$OFFLINE_CPUS" ]] || online_offline_cpu_list "0" "$OFFLINE_CPUS" + + lsmod | grep -q "$IFS_NAME" && [[ "$ORIGIN_IFS_LOADED" == "$FALSE" ]] && { + echo "[$INFO] modprobe -r $IFS_NAME" + modprobe -r "$IFS_NAME" + } + + ifs_scan_result_summary + [[ -e "$IFS_LOG" ]] && rm -rf "$IFS_LOG" + + echo "[RESULT] IFS test exit with $RESULT" + exit "$RESULT" +} + +do_cmd() +{ + local cmd=$* + local ret="" + + append_log "[$INFO] $cmd" + eval "$cmd" + ret=$? + if [[ $ret -ne 0 ]]; then + append_log "[$FAIL] $cmd failed. Return code is $ret" + RESULT=$KSFT_XFAIL + ifs_cleanup + fi +} + +test_exit() +{ + local info=$1 + RESULT=$2 + + declare -A EXIT_MAP + EXIT_MAP[$KSFT_PASS]=$PASS + EXIT_MAP[$KSFT_FAIL]=$FAIL + EXIT_MAP[$KSFT_XFAIL]=$XFAIL + EXIT_MAP[$KSFT_SKIP]=$SKIP + + append_log "[${EXIT_MAP[$RESULT]}] $info" + ifs_cleanup +} + +online_all_cpus() +{ + local off_cpus="" + + OFFLINE_CPUS=$(cat "$CPU_OFFLINE_SYSFS") + online_offline_cpu_list "1" "$OFFLINE_CPUS" + + off_cpus=$(cat "$CPU_OFFLINE_SYSFS") + if [[ -z "$off_cpus" ]]; then + append_log "[$INFO] All CPUs are online." + else + append_log "[$XFAIL] There is offline cpu:$off_cpus after online all cpu!" + RESULT=$KSFT_XFAIL + ifs_cleanup + fi +} + +get_cpu_fms() +{ + FML=$(grep -m 1 "family" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}') + MODEL=$(grep -m 1 "model" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}') + STEPPING=$(grep -m 1 "stepping" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}') + CPU_FMS="${FML}-${MODEL}-${STEPPING}" +} + +check_cpu_ifs_support_interval_time() +{ + get_cpu_fms + + if [[ "$FML" != "$INTEL_FAM6" ]]; then + test_exit "CPU family:$FML does not support IFS" "$KSFT_SKIP" + fi + + # Ucode has time interval requirement for IFS scan on same CPU as follows: + case $MODEL in + "$SAPPHIRERAPIDS_X") + INTERVAL_TIME=180; + ;; + "$EMERALDRAPIDS_X") + INTERVAL_TIME=30; + ;; + *) + # Set default interval time for other platforms + INTERVAL_TIME=1; + append_log "[$INFO] CPU FML:$FML model:0x$MODEL, default: 1s interval time" + ;; + esac +} + +check_ifs_loaded() +{ + local ifs_info="" + + ifs_info=$(lsmod | grep "$IFS_NAME") + if [[ -z "$ifs_info" ]]; then + append_log "[$INFO] modprobe $IFS_NAME" + modprobe "$IFS_NAME" || { + test_exit "Check if CONFIG_INTEL_IFS is set to m or \ +platform doesn't support ifs" "$KSFT_SKIP" + } + ifs_info=$(lsmod | grep "$IFS_NAME") + [[ -n "$ifs_info" ]] || test_exit "No ifs module listed by lsmod" "$KSFT_FAIL" + fi +} + +test_ifs_scan_entry() +{ + local ifs_info="" + + ifs_info=$(lsmod | grep "$IFS_NAME") + + if [[ -z "$ifs_info" ]]; then + ORIGIN_IFS_LOADED="$FALSE" + check_ifs_loaded + else + ORIGIN_IFS_LOADED="$TRUE" + append_log "[$INFO] Module $IFS_NAME is already loaded" + fi + + if [[ -d "$IFS_SCAN_SYSFS_PATH" ]]; then + append_log "[$PASS] IFS sysfs $IFS_SCAN_SYSFS_PATH entry is created\n" + else + test_exit "No sysfs entry in $IFS_SCAN_SYSFS_PATH" "$KSFT_FAIL" + fi +} + +load_image() +{ + local image_id=$1 + local image_info="" + local ret="" + + check_ifs_loaded + if [[ -e "${IMG_PATH}/${IMAGE_NAME}" ]]; then + append_log "[$INFO] echo 0x$image_id > ${IFS_SCAN_SYSFS_PATH}/current_batch" + echo "0x$image_id" > "$IFS_SCAN_SYSFS_PATH"/current_batch 2>/dev/null + ret=$? + [[ "$ret" -eq 0 ]] || { + append_log "[$FAIL] Load ifs image $image_id failed with ret:$ret\n" + return "$ret" + } + image_info=$(cat ${IFS_SCAN_SYSFS_PATH}/current_batch) + if [[ "$image_info" == 0x"$image_id" ]]; then + append_log "[$PASS] load IFS current_batch:$image_info" + else + append_log "[$FAIL] current_batch:$image_info is not expected:$image_id" + return "$KSFT_FAIL" + fi + else + append_log "[$FAIL] No IFS image file ${IMG_PATH}/${IMAGE_NAME}"\ + return "$KSFT_FAIL" + fi + return 0 +} + +test_load_origin_ifs_image() +{ + local image_id=$1 + + IMAGE_NAME="${CPU_FMS}-${image_id}.scan" + + load_image "$image_id" || return $? + return 0 +} + +test_load_bad_ifs_image() +{ + local image_id=$1 + + IMAGE_NAME="${CPU_FMS}-${image_id}.scan" + + do_cmd "mv -f ${IMG_PATH}/${IMAGE_NAME} ${IMG_PATH}/${IMAGE_NAME}_origin" + + # Set IFS_IMAGE_NEED_RESTORE to true before corrupt the origin ifs image file + IFS_IMAGE_NEED_RESTORE=$TRUE + do_cmd "dd if=/dev/urandom of=${IMG_PATH}/${IMAGE_NAME} bs=1K count=6 2>/dev/null" + + # Use the specified judgment for negative testing + append_log "[$INFO] echo 0x$image_id > ${IFS_SCAN_SYSFS_PATH}/current_batch" + echo "0x$image_id" > "$IFS_SCAN_SYSFS_PATH"/current_batch 2>/dev/null + ret=$? + if [[ "$ret" -ne 0 ]]; then + append_log "[$PASS] Load invalid ifs image failed with ret:$ret not 0 as expected" + else + append_log "[$FAIL] Load invalid ifs image ret:$ret unexpectedly" + fi + + do_cmd "mv -f ${IMG_PATH}/${IMAGE_NAME}_origin ${IMG_PATH}/${IMAGE_NAME}" + IFS_IMAGE_NEED_RESTORE=$FALSE +} + +test_bad_and_origin_ifs_image() +{ + local image_id=$1 + + append_log "[$INFO] Test loading bad and then loading original IFS image:" + test_load_origin_ifs_image "$image_id" || return $? + test_load_bad_ifs_image "$image_id" + # Load origin image again and make sure it's worked + test_load_origin_ifs_image "$image_id" || return $? + append_log "[$INFO] Loading invalid IFS image and then loading initial image passed.\n" +} + +ifs_test_cpu() +{ + local ifs_mode=$1 + local cpu_num=$2 + local image_id status details ret result result_info + + echo "$cpu_num" > "$IFS_PATH"_"$ifs_mode"/"$RUN_TEST" + ret=$? + + status=$(cat "${IFS_PATH}_${ifs_mode}/${STATUS}") + details=$(cat "${IFS_PATH}_${ifs_mode}/${DETAILS}") + + if [[ "$ret" -eq 0 && "$status" == "$STATUS_PASS" ]]; then + result="$PASS" + else + result="$FAIL" + fi + + cpu_num=$(cat "${CPU_SYSFS}/cpu${cpu_num}/topology/thread_siblings_list") + + # There is no image file for IFS ARRAY BIST scan + if [[ -e "${IFS_PATH}_${ifs_mode}/current_batch" ]]; then + image_id=$(cat "${IFS_PATH}_${ifs_mode}/current_batch") + result_info=$(printf "[%s] ifs_%1d cpu(s):%s, current_batch:0x%02x, \ +ret:%2d, status:%s, details:0x%016x" \ + "$result" "$ifs_mode" "$cpu_num" "$image_id" "$ret" \ + "$status" "$details") + else + result_info=$(printf "[%s] ifs_%1d cpu(s):%s, ret:%2d, status:%s, details:0x%016x" \ + "$result" "$ifs_mode" "$cpu_num" "$ret" "$status" "$details") + fi + + append_log "$result_info" +} + +ifs_test_cpus() +{ + local cpus_type=$1 + local ifs_mode=$2 + local image_id=$3 + local cpu_max_num="" + local cpu_num="" + + case "$cpus_type" in + "$ALL") + cpu_max_num=$(($(nproc) - 1)) + cpus=$(seq 0 $cpu_max_num) + ;; + "$SIBLINGS") + cpus=$(cat ${CPU_SYSFS}/cpu*/topology/thread_siblings_list \ + | sed -e 's/,.*//' \ + | sed -e 's/-.*//' \ + | sort -n \ + | uniq) + ;; + *) + test_exit "Invalid cpus_type:$cpus_type" "$KSFT_XFAIL" + ;; + esac + + for cpu_num in $cpus; do + ifs_test_cpu "$ifs_mode" "$cpu_num" + done + + if [[ -z "$image_id" ]]; then + append_log "[$INFO] ifs_$ifs_mode test $cpus_type cpus completed\n" + else + append_log "[$INFO] ifs_$ifs_mode $cpus_type cpus with $CPU_FMS-$image_id.scan \ +completed\n" + fi +} + +test_ifs_same_cpu_loop() +{ + local ifs_mode=$1 + local cpu_num=$2 + local loop_times=$3 + + append_log "[$INFO] Test ifs mode $ifs_mode on CPU:$cpu_num for $loop_times rounds:" + [[ "$ifs_mode" == "$IFS_SCAN_MODE" ]] && { + load_image "$DEFAULT_IMG_ID" || return $? + } + for (( i=1; i<=loop_times; i++ )); do + append_log "[$INFO] Loop iteration: $i in total of $loop_times" + # Only IFS scan needs the interval time + if [[ "$ifs_mode" == "$IFS_SCAN_MODE" ]]; then + do_cmd "sleep $INTERVAL_TIME" + elif [[ "$ifs_mode" == "$IFS_ARRAY_BIST_SCAN_MODE" ]]; then + true + else + test_exit "Invalid ifs_mode:$ifs_mode" "$KSFT_XFAIL" + fi + + ifs_test_cpu "$ifs_mode" "$cpu_num" + done + append_log "[$INFO] $loop_times rounds of ifs_$ifs_mode test on CPU:$cpu_num completed.\n" +} + +test_ifs_scan_available_imgs() +{ + local image_ids="" + local image_id="" + + append_log "[$INFO] Test ifs scan with available images:" + image_ids=$(find "$IMG_PATH" -maxdepth 1 -name "${CPU_FMS}-[0-9a-fA-F][0-9a-fA-F].scan" \ + 2>/dev/null \ + | sort \ + | awk -F "-" '{print $NF}' \ + | cut -d "." -f 1) + + for image_id in $image_ids; do + load_image "$image_id" || return $? + + ifs_test_cpus "$SIBLINGS" "$IFS_SCAN_MODE" "$image_id" + # IFS scan requires time interval for the scan on the same CPU + do_cmd "sleep $INTERVAL_TIME" + done +} + +prepare_ifs_test_env() +{ + local max_cpu="" + + check_cpu_ifs_support_interval_time + + online_all_cpus + max_cpu=$(($(nproc) - 1)) + RANDOM_CPU=$(shuf -i 0-$max_cpu -n 1) + + DEFAULT_IMG_ID=$(find $IMG_PATH -maxdepth 1 -name "${CPU_FMS}-[0-9a-fA-F][0-9a-fA-F].scan" \ + 2>/dev/null \ + | sort \ + | head -n 1 \ + | awk -F "-" '{print $NF}' \ + | cut -d "." -f 1) +} + +test_ifs() +{ + prepare_ifs_test_env + + test_ifs_scan_entry + + if [[ -z "$DEFAULT_IMG_ID" ]]; then + append_log "[$SKIP] No proper ${IMG_PATH}/${CPU_FMS}-*.scan, skip ifs_0 scan" + else + test_bad_and_origin_ifs_image "$DEFAULT_IMG_ID" + test_ifs_scan_available_imgs + test_ifs_same_cpu_loop "$IFS_SCAN_MODE" "$RANDOM_CPU" "$LOOP_TIMES" + fi + + if [[ -d "$IFS_ARRAY_BIST_SYSFS_PATH" ]]; then + ifs_test_cpus "$SIBLINGS" "$IFS_ARRAY_BIST_SCAN_MODE" + test_ifs_same_cpu_loop "$IFS_ARRAY_BIST_SCAN_MODE" "$RANDOM_CPU" "$LOOP_TIMES" + else + append_log "[$SKIP] No $IFS_ARRAY_BIST_SYSFS_PATH, skip IFS ARRAY BIST scan" + fi +} + +trap ifs_cleanup SIGTERM SIGINT +test_ifs +ifs_cleanup diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index fb4472ddffd8..ab67d58cfab7 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -3,8 +3,13 @@ CFLAGS = -Wall CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE +ALIGNS := 0x1000 0x200000 0x1000000 +ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS)) +ALIGN_STATIC_PIES := $(patsubst %,load_address.static.%,$(ALIGNS)) +ALIGNMENT_TESTS := $(ALIGN_PIES) $(ALIGN_STATIC_PIES) + TEST_PROGS := binfmt_script.py -TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular +TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS) TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile @@ -28,9 +33,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat cp $< $@ chmod -x $@ -$(OUTPUT)/load_address_4096: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@ -$(OUTPUT)/load_address_2097152: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@ -$(OUTPUT)/load_address_16777216: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@ +$(OUTPUT)/load_address.0x%: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \ + -fPIE -pie $< -o $@ +$(OUTPUT)/load_address.static.0x%: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \ + -fPIE -static-pie $< -o $@ diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c index 17e3207d34ae..8257fddba8c8 100644 --- a/tools/testing/selftests/exec/load_address.c +++ b/tools/testing/selftests/exec/load_address.c @@ -5,11 +5,13 @@ #include <link.h> #include <stdio.h> #include <stdlib.h> +#include <stdbool.h> #include "../kselftest.h" struct Statistics { unsigned long long load_address; unsigned long long alignment; + bool interp; }; int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) @@ -26,11 +28,20 @@ int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) stats->alignment = 0; for (i = 0; i < info->dlpi_phnum; i++) { + unsigned long long align; + + if (info->dlpi_phdr[i].p_type == PT_INTERP) { + stats->interp = true; + continue; + } + if (info->dlpi_phdr[i].p_type != PT_LOAD) continue; - if (info->dlpi_phdr[i].p_align > stats->alignment) - stats->alignment = info->dlpi_phdr[i].p_align; + align = info->dlpi_phdr[i].p_align; + + if (align > stats->alignment) + stats->alignment = align; } return 1; // Terminate dl_iterate_phdr. @@ -38,27 +49,57 @@ int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) int main(int argc, char **argv) { - struct Statistics extracted; - unsigned long long misalign; + struct Statistics extracted = { }; + unsigned long long misalign, pow2; + bool interp_needed; + char buf[1024]; + FILE *maps; int ret; ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(4); + + /* Dump maps file for debugging reference. */ + maps = fopen("/proc/self/maps", "r"); + if (!maps) + ksft_exit_fail_msg("FAILED: /proc/self/maps: %s\n", strerror(errno)); + while (fgets(buf, sizeof(buf), maps)) { + ksft_print_msg("%s", buf); + } + fclose(maps); + /* Walk the program headers. */ ret = dl_iterate_phdr(ExtractStatistics, &extracted); if (ret != 1) ksft_exit_fail_msg("FAILED: dl_iterate_phdr\n"); - if (extracted.alignment == 0) - ksft_exit_fail_msg("FAILED: No alignment found\n"); - else if (extracted.alignment & (extracted.alignment - 1)) - ksft_exit_fail_msg("FAILED: Alignment is not a power of 2\n"); + /* Report our findings. */ + ksft_print_msg("load_address=%#llx alignment=%#llx\n", + extracted.load_address, extracted.alignment); + + /* If we're named with ".static." we expect no INTERP. */ + interp_needed = strstr(argv[0], ".static.") == NULL; + + /* Were we built as expected? */ + ksft_test_result(interp_needed == extracted.interp, + "%s INTERP program header %s\n", + interp_needed ? "Wanted" : "Unwanted", + extracted.interp ? "seen" : "missing"); + + /* Did we find an alignment? */ + ksft_test_result(extracted.alignment != 0, + "Alignment%s found\n", extracted.alignment ? "" : " NOT"); + + /* Is the alignment sane? */ + pow2 = extracted.alignment & (extracted.alignment - 1); + ksft_test_result(pow2 == 0, + "Alignment is%s a power of 2: %#llx\n", + pow2 == 0 ? "" : " NOT", extracted.alignment); + /* Is the load address aligned? */ misalign = extracted.load_address & (extracted.alignment - 1); - if (misalign) - ksft_exit_fail_msg("FAILED: alignment = %llu, load_address = %llu\n", - extracted.alignment, extracted.load_address); + ksft_test_result(misalign == 0, "Load Address is %saligned (%#llx)\n", + misalign ? "MIS" : "", misalign); - ksft_test_result_pass("Completed\n"); ksft_finished(); } diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile index 71ec34bf1501..4373cea79b79 100644 --- a/tools/testing/selftests/fchmodat2/Makefile +++ b/tools/testing/selftests/fchmodat2/Makefile @@ -1,6 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES) + +# gcc requires -static-libasan in order to ensure that Address Sanitizer's +# library is the first one loaded. However, clang already statically links the +# Address Sanitizer if -fsanitize is specified. Therefore, simply omit +# -static-libasan for clang builds. +ifeq ($(LLVM),) + CFLAGS += -static-libasan +endif + TEST_GEN_PROGS := fchmodat2_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 07a0d5b545ca..3af3136e35a4 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := statmount_test +TEST_GEN_PROGS := statmount_test statmount_test_ns include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h new file mode 100644 index 000000000000..f4294bab9d73 --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/statmount.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __STATMOUNT_H +#define __STATMOUNT_H + +#include <stdint.h> +#include <linux/mount.h> +#include <asm/unistd.h> + +static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask, + struct statmount *buf, size_t bufsize, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = mask, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_statmount, &req, buf, bufsize, flags); +} + +static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id, + uint64_t last_mnt_id, uint64_t list[], size_t num, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = last_mnt_id, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_listmount, &req, list, num, flags); +} + +#endif /* __STATMOUNT_H */ diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index e6d7c4f1c85b..c773334bbcc9 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -4,17 +4,15 @@ #include <assert.h> #include <stddef.h> -#include <stdint.h> #include <sched.h> #include <fcntl.h> #include <sys/param.h> #include <sys/mount.h> #include <sys/stat.h> #include <sys/statfs.h> -#include <linux/mount.h> #include <linux/stat.h> -#include <asm/unistd.h> +#include "statmount.h" #include "../../kselftest.h" static const char *const known_fs[] = { @@ -36,18 +34,6 @@ static const char *const known_fs[] = { "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", "zonefs", NULL }; -static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf, - size_t bufsize, unsigned int flags) -{ - struct mnt_id_req req = { - .size = MNT_ID_REQ_SIZE_VER0, - .mnt_id = mnt_id, - .param = mask, - }; - - return syscall(__NR_statmount, &req, buf, bufsize, flags); -} - static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) { size_t bufsize = 1 << 15; @@ -56,7 +42,7 @@ static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigne int ret; for (;;) { - ret = statmount(mnt_id, mask, tmp, bufsize, flags); + ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags); if (ret != -1) break; if (tofree) @@ -121,12 +107,20 @@ static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX"; static int orig_root; static uint64_t root_id, parent_id; static uint32_t old_root_id, old_parent_id; - +static FILE *f_mountinfo; static void cleanup_namespace(void) { - fchdir(orig_root); - chroot("."); + int ret; + + ret = fchdir(orig_root); + if (ret == -1) + ksft_perror("fchdir to original root"); + + ret = chroot("."); + if (ret == -1) + ksft_perror("chroot to original root"); + umount2(root_mntpoint, MNT_DETACH); rmdir(root_mntpoint); } @@ -138,7 +132,7 @@ static void setup_namespace(void) uid_t uid = getuid(); gid_t gid = getgid(); - ret = unshare(CLONE_NEWNS|CLONE_NEWUSER); + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); if (ret == -1) ksft_exit_fail_msg("unsharing mountns and userns: %s\n", strerror(errno)); @@ -149,6 +143,11 @@ static void setup_namespace(void) sprintf(buf, "0 %d 1", gid); write_file("/proc/self/gid_map", buf); + f_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!f_mountinfo) + ksft_exit_fail_msg("failed to open mountinfo: %s\n", + strerror(errno)); + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); if (ret == -1) ksft_exit_fail_msg("making mount tree private: %s\n", @@ -208,25 +207,13 @@ static int setup_mount_tree(int log2_num) return 0; } -static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id, - uint64_t list[], size_t num, unsigned int flags) -{ - struct mnt_id_req req = { - .size = MNT_ID_REQ_SIZE_VER0, - .mnt_id = mnt_id, - .param = last_mnt_id, - }; - - return syscall(__NR_listmount, &req, list, num, flags); -} - static void test_listmount_empty_root(void) { ssize_t res; const unsigned int size = 32; uint64_t list[size]; - res = listmount(LSMT_ROOT, 0, list, size, 0); + res = listmount(LSMT_ROOT, 0, 0, list, size, 0); if (res == -1) { ksft_test_result_fail("listmount: %s\n", strerror(errno)); return; @@ -251,7 +238,7 @@ static void test_statmount_zero_mask(void) struct statmount sm; int ret; - ret = statmount(root_id, 0, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount zero mask: %s\n", strerror(errno)); @@ -277,7 +264,7 @@ static void test_statmount_mnt_basic(void) int ret; uint64_t mask = STATMOUNT_MNT_BASIC; - ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount mnt basic: %s\n", strerror(errno)); @@ -337,7 +324,7 @@ static void test_statmount_sb_basic(void) struct statx sx; struct statfs sf; - ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0); if (ret == -1) { ksft_test_result_fail("statmount sb basic: %s\n", strerror(errno)); @@ -462,6 +449,88 @@ static void test_statmount_fs_type(void) free(sm); } +static void test_statmount_mnt_opts(void) +{ + struct statmount *sm; + const char *statmount_opts; + char *line = NULL; + size_t len = 0; + + sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS, + 0); + if (!sm) { + ksft_test_result_fail("statmount mnt opts: %s\n", + strerror(errno)); + return; + } + + while (getline(&line, &len, f_mountinfo) != -1) { + int i; + char *p, *p2; + unsigned int old_mnt_id; + + old_mnt_id = atoi(line); + if (old_mnt_id != sm->mnt_id_old) + continue; + + for (p = line, i = 0; p && i < 5; i++) + p = strchr(p + 1, ' '); + if (!p) + continue; + + p2 = strchr(p + 1, ' '); + if (!p2) + continue; + *p2 = '\0'; + p = strchr(p2 + 1, '-'); + if (!p) + continue; + for (p++, i = 0; p && i < 2; i++) + p = strchr(p + 1, ' '); + if (!p) + continue; + p++; + + /* skip generic superblock options */ + if (strncmp(p, "ro", 2) == 0) + p += 2; + else if (strncmp(p, "rw", 2) == 0) + p += 2; + if (*p == ',') + p++; + if (strncmp(p, "sync", 4) == 0) + p += 4; + if (*p == ',') + p++; + if (strncmp(p, "dirsync", 7) == 0) + p += 7; + if (*p == ',') + p++; + if (strncmp(p, "lazytime", 8) == 0) + p += 8; + if (*p == ',') + p++; + p2 = strrchr(p, '\n'); + if (p2) + *p2 = '\0'; + + statmount_opts = sm->str + sm->mnt_opts; + if (strcmp(statmount_opts, p) != 0) + ksft_test_result_fail( + "unexpected mount options: '%s' != '%s'\n", + statmount_opts, p); + else + ksft_test_result_pass("statmount mount options\n"); + free(sm); + free(line); + return; + } + + ksft_test_result_fail("didnt't find mount entry\n"); + free(sm); + free(line); +} + static void test_statmount_string(uint64_t mask, size_t off, const char *name) { struct statmount *sm; @@ -498,14 +567,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name) exactsize = sm->size; shortsize = sizeof(*sm) + i; - ret = statmount(root_id, mask, sm, exactsize, 0); + ret = statmount(root_id, 0, mask, sm, exactsize, 0); if (ret == -1) { ksft_test_result_fail("statmount exact size: %s\n", strerror(errno)); goto out; } errno = 0; - ret = statmount(root_id, mask, sm, shortsize, 0); + ret = statmount(root_id, 0, mask, sm, shortsize, 0); if (ret != -1 || errno != EOVERFLOW) { ksft_test_result_fail("should have failed with EOVERFLOW: %s\n", strerror(errno)); @@ -533,7 +602,7 @@ static void test_listmount_tree(void) if (res == -1) return; - num = res = listmount(LSMT_ROOT, 0, list, size, 0); + num = res = listmount(LSMT_ROOT, 0, 0, list, size, 0); if (res == -1) { ksft_test_result_fail("listmount: %s\n", strerror(errno)); return; @@ -545,7 +614,7 @@ static void test_listmount_tree(void) } for (i = 0; i < size - step;) { - res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0); + res = listmount(LSMT_ROOT, 0, i ? list2[i - 1] : 0, list2 + i, step, 0); if (res == -1) ksft_test_result_fail("short listmount: %s\n", strerror(errno)); @@ -577,18 +646,18 @@ int main(void) int ret; uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT | - STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE; + STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE | STATMOUNT_MNT_NS_ID; ksft_print_header(); - ret = statmount(0, 0, NULL, 0, 0); + ret = statmount(0, 0, 0, NULL, 0, 0); assert(ret == -1); if (errno == ENOSYS) ksft_exit_skip("statmount() syscall not supported\n"); setup_namespace(); - ksft_set_plan(14); + ksft_set_plan(15); test_listmount_empty_root(); test_statmount_zero_mask(); test_statmount_mnt_basic(); @@ -596,6 +665,7 @@ int main(void) test_statmount_mnt_root(); test_statmount_mnt_point(); test_statmount_fs_type(); + test_statmount_mnt_opts(); test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root"); test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point"); test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type"); diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c new file mode 100644 index 000000000000..e044f5fc57fd --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE + +#include <assert.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <linux/nsfs.h> +#include <linux/stat.h> + +#include "statmount.h" +#include "../../kselftest.h" + +#define NSID_PASS 0 +#define NSID_FAIL 1 +#define NSID_SKIP 2 +#define NSID_ERROR 3 + +static void handle_result(int ret, const char *testname) +{ + if (ret == NSID_PASS) + ksft_test_result_pass("%s\n", testname); + else if (ret == NSID_FAIL) + ksft_test_result_fail("%s\n", testname); + else if (ret == NSID_ERROR) + ksft_exit_fail_msg("%s\n", testname); + else + ksft_test_result_skip("%s\n", testname); +} + +static inline int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + ksft_print_msg("waitpid returned -1, errno=%d\n", errno); + return -1; + } + + if (!WIFEXITED(status)) { + ksft_print_msg( + "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n", + WIFSIGNALED(status), WTERMSIG(status)); + return -1; + } + + ret = WEXITSTATUS(status); + return ret; +} + +static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id) +{ + int fd = open(mnt_ns, O_RDONLY); + + if (fd < 0) { + ksft_print_msg("failed to open for ns %s: %s\n", + mnt_ns, strerror(errno)); + sleep(60); + return NSID_ERROR; + } + + if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) { + ksft_print_msg("failed to get the nsid for ns %s: %s\n", + mnt_ns, strerror(errno)); + return NSID_ERROR; + } + close(fd); + return NSID_PASS; +} + +static int get_mnt_id(const char *path, uint64_t *mnt_id) +{ + struct statx sx; + int ret; + + ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); + if (ret == -1) { + ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, + strerror(errno)); + return NSID_ERROR; + } + + if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { + ksft_print_msg("no unique mount ID available for %s\n", path); + return NSID_ERROR; + } + + *mnt_id = sx.stx_mnt_id; + return NSID_PASS; +} + +static int write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + int ret; + + if (fd == -1) { + ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); + return NSID_ERROR; + } + + ret = write(fd, val, len); + if (ret == -1) { + ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); + return NSID_ERROR; + } + if (ret != len) { + ksft_print_msg("short write to %s\n", path); + return NSID_ERROR; + } + + ret = close(fd); + if (ret == -1) { + ksft_print_msg("closing %s\n", path); + return NSID_ERROR; + } + + return NSID_PASS; +} + +static int setup_namespace(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); + if (ret == -1) + ksft_exit_fail_msg("unsharing mountns and userns: %s\n", + strerror(errno)); + + sprintf(buf, "0 %d 1", uid); + ret = write_file("/proc/self/uid_map", buf); + if (ret != NSID_PASS) + return ret; + ret = write_file("/proc/self/setgroups", "deny"); + if (ret != NSID_PASS) + return ret; + sprintf(buf, "0 %d 1", gid); + ret = write_file("/proc/self/gid_map", buf); + if (ret != NSID_PASS) + return ret; + + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (ret == -1) { + ksft_print_msg("making mount tree private: %s\n", + strerror(errno)); + return NSID_ERROR; + } + + return NSID_PASS; +} + +static int _test_statmount_mnt_ns_id(void) +{ + struct statmount sm; + uint64_t mnt_ns_id; + uint64_t root_id; + int ret; + + ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id); + if (ret != NSID_PASS) + return ret; + + ret = get_mnt_id("/", &root_id); + if (ret != NSID_PASS) + return ret; + + ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (sm.size != sizeof(sm)) { + ksft_print_msg("unexpected size: %u != %u\n", sm.size, + (uint32_t)sizeof(sm)); + return NSID_FAIL; + } + if (sm.mask != STATMOUNT_MNT_NS_ID) { + ksft_print_msg("statmount mnt ns id unavailable\n"); + return NSID_SKIP; + } + + if (sm.mnt_ns_id != mnt_ns_id) { + ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n", + (unsigned long long)sm.mnt_ns_id, + (unsigned long long)mnt_ns_id); + return NSID_FAIL; + } + + return NSID_PASS; +} + +static void test_statmount_mnt_ns_id(void) +{ + pid_t pid; + int ret; + + pid = fork(); + if (pid < 0) + ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno)); + + /* We're the original pid, wait for the result. */ + if (pid != 0) { + ret = wait_for_pid(pid); + handle_result(ret, "test statmount ns id"); + return; + } + + ret = setup_namespace(); + if (ret != NSID_PASS) + exit(ret); + ret = _test_statmount_mnt_ns_id(); + exit(ret); +} + +static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts) +{ + uint64_t list[256]; + uint64_t mnt_ns_id; + uint64_t nr_mounts; + char buf[256]; + int ret; + + /* Get the mount ns id for our child. */ + snprintf(buf, sizeof(buf), "/proc/%lu/ns/mnt", (unsigned long)pid); + ret = get_mnt_ns_id(buf, &mnt_ns_id); + + nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0); + if (nr_mounts == (uint64_t)-1) { + ksft_print_msg("listmount: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (nr_mounts != child_nr_mounts) { + ksft_print_msg("listmount results is %zi != %zi\n", nr_mounts, + child_nr_mounts); + return NSID_FAIL; + } + + /* Validate that all of our entries match our mnt_ns_id. */ + for (int i = 0; i < nr_mounts; i++) { + struct statmount sm; + + ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm, + sizeof(sm), 0); + if (ret < 0) { + ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno)); + return NSID_ERROR; + } + + if (sm.mask != STATMOUNT_MNT_NS_ID) { + ksft_print_msg("statmount mnt ns id unavailable\n"); + return NSID_SKIP; + } + + if (sm.mnt_ns_id != mnt_ns_id) { + ksft_print_msg("listmount gave us the wrong ns id: 0x%llx != 0x%llx\n", + (unsigned long long)sm.mnt_ns_id, + (unsigned long long)mnt_ns_id); + return NSID_FAIL; + } + } + + return NSID_PASS; +} + +static void test_listmount_ns(void) +{ + uint64_t nr_mounts; + char pval; + int child_ready_pipe[2]; + int parent_ready_pipe[2]; + pid_t pid; + int ret, child_ret; + + if (pipe(child_ready_pipe) < 0) + ksft_exit_fail_msg("failed to create the child pipe: %s\n", + strerror(errno)); + if (pipe(parent_ready_pipe) < 0) + ksft_exit_fail_msg("failed to create the parent pipe: %s\n", + strerror(errno)); + + pid = fork(); + if (pid < 0) + ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno)); + + if (pid == 0) { + char cval; + uint64_t list[256]; + + close(child_ready_pipe[0]); + close(parent_ready_pipe[1]); + + ret = setup_namespace(); + if (ret != NSID_PASS) + exit(ret); + + nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 256, 0); + if (nr_mounts == (uint64_t)-1) { + ksft_print_msg("listmount: %s\n", strerror(errno)); + exit(NSID_FAIL); + } + + /* + * Tell our parent how many mounts we have, and then wait for it + * to tell us we're done. + */ + write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts)); + read(parent_ready_pipe[0], &cval, sizeof(cval)); + exit(NSID_PASS); + } + + close(child_ready_pipe[1]); + close(parent_ready_pipe[0]); + + /* Wait until the child has created everything. */ + if (read(child_ready_pipe[0], &nr_mounts, sizeof(nr_mounts)) != + sizeof(nr_mounts)) + ret = NSID_ERROR; + + ret = validate_external_listmount(pid, nr_mounts); + + if (write(parent_ready_pipe[1], &pval, sizeof(pval)) != sizeof(pval)) + ret = NSID_ERROR; + + child_ret = wait_for_pid(pid); + if (child_ret != NSID_PASS) + ret = child_ret; + handle_result(ret, "test listmount ns id"); +} + +int main(void) +{ + int ret; + + ksft_print_header(); + ret = statmount(0, 0, 0, NULL, 0, 0); + assert(ret == -1); + if (errno == ENOSYS) + ksft_exit_skip("statmount() syscall not supported\n"); + + ksft_set_plan(2); + test_statmount_mnt_ns_id(); + test_listmount_ns(); + + if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index f825623e3edc..dc0408a831d0 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -460,7 +460,7 @@ FIXTURE(hid_bpf) { int hid_id; pthread_t tid; struct hid *skel; - int hid_links[3]; /* max number of programs loaded in a single test */ + struct bpf_link *hid_links[3]; /* max number of programs loaded in a single test */ }; static void detach_bpf(FIXTURE_DATA(hid_bpf) * self) { @@ -470,9 +470,14 @@ static void detach_bpf(FIXTURE_DATA(hid_bpf) * self) close(self->hidraw_fd); self->hidraw_fd = 0; + if (!self->skel) + return; + + hid__detach(self->skel); + for (i = 0; i < ARRAY_SIZE(self->hid_links); i++) { if (self->hid_links[i]) - close(self->hid_links[i]); + bpf_link__destroy(self->hid_links[i]); } hid__destroy(self->skel); @@ -527,14 +532,7 @@ static void load_programs(const struct test_program programs[], FIXTURE_DATA(hid_bpf) * self, const FIXTURE_VARIANT(hid_bpf) * variant) { - int attach_fd, err = -EINVAL; - struct attach_prog_args args = { - .retval = -1, - }; - DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattr, - .ctx_in = &args, - .ctx_size_in = sizeof(args), - ); + int err = -EINVAL; ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links)) TH_LOG("too many programs are to be loaded"); @@ -545,37 +543,45 @@ static void load_programs(const struct test_program programs[], for (int i = 0; i < progs_count; i++) { struct bpf_program *prog; + struct bpf_map *map; + int *ops_hid_id; prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj, programs[i].name); ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name); bpf_program__set_autoload(prog, true); + + map = bpf_object__find_map_by_name(*self->skel->skeleton->obj, + programs[i].name + 4); + ASSERT_OK_PTR(map) TH_LOG("can not find struct_ops by name '%s'", + programs[i].name + 4); + + /* hid_id is the first field of struct hid_bpf_ops */ + ops_hid_id = bpf_map__initial_value(map, NULL); + ASSERT_OK_PTR(ops_hid_id) TH_LOG("unable to retrieve struct_ops data"); + + *ops_hid_id = self->hid_id; } err = hid__load(self->skel); ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err); - attach_fd = bpf_program__fd(self->skel->progs.attach_prog); - ASSERT_GE(attach_fd, 0) TH_LOG("locate attach_prog: %d", attach_fd); - for (int i = 0; i < progs_count; i++) { - struct bpf_program *prog; + struct bpf_map *map; - prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj, - programs[i].name); - ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name); - - args.prog_fd = bpf_program__fd(prog); - args.hid = self->hid_id; - args.insert_head = programs[i].insert_head; - err = bpf_prog_test_run_opts(attach_fd, &tattr); - ASSERT_GE(args.retval, 0) - TH_LOG("attach_hid(%s): %d", programs[i].name, args.retval); + map = bpf_object__find_map_by_name(*self->skel->skeleton->obj, + programs[i].name + 4); + ASSERT_OK_PTR(map) TH_LOG("can not find struct_ops by name '%s'", + programs[i].name + 4); - self->hid_links[i] = args.retval; + self->hid_links[i] = bpf_map__attach_struct_ops(map); + ASSERT_OK_PTR(self->hid_links[i]) TH_LOG("failed to attach struct ops '%s'", + programs[i].name + 4); } + hid__attach(self->skel); + self->hidraw_fd = open_hidraw(self->dev_id); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); } @@ -640,6 +646,47 @@ TEST_F(hid_bpf, raw_event) } /* + * Attach hid_first_event to the given uhid device, + * retrieve and open the matching hidraw node, + * inject one event in the uhid device, + * check that the program sees it and can change the data + */ +TEST_F(hid_bpf, subprog_raw_event) +{ + const struct test_program progs[] = { + { .name = "hid_subprog_first_event" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[2], 47); + + /* inject another event */ + memset(buf, 0, sizeof(buf)); + buf[0] = 1; + buf[1] = 47; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[2], 52); +} + +/* * Ensures that we can attach/detach programs */ TEST_F(hid_bpf, test_attach_detach) @@ -648,13 +695,17 @@ TEST_F(hid_bpf, test_attach_detach) { .name = "hid_first_event" }, { .name = "hid_second_event" }, }; + struct bpf_link *link; __u8 buf[10] = {0}; - int err, link; + int err, link_fd; LOAD_PROGRAMS(progs); link = self->hid_links[0]; - ASSERT_GT(link, 0) TH_LOG("HID-BPF link not created"); + ASSERT_OK_PTR(link) TH_LOG("HID-BPF link not created"); + + link_fd = bpf_link__fd(link); + ASSERT_GE(link_fd, 0) TH_LOG("HID-BPF link FD not valid"); /* inject one event */ buf[0] = 1; @@ -673,7 +724,7 @@ TEST_F(hid_bpf, test_attach_detach) /* pin the first program and immediately unpin it */ #define PIN_PATH "/sys/fs/bpf/hid_first_event" - err = bpf_obj_pin(link, PIN_PATH); + err = bpf_obj_pin(link_fd, PIN_PATH); ASSERT_OK(err) TH_LOG("error while calling bpf_obj_pin"); remove(PIN_PATH); #undef PIN_PATH @@ -876,6 +927,325 @@ TEST_F(hid_bpf, test_hid_user_raw_request_call) } /* + * Call hid_hw_raw_request against the given uhid device, + * check that the program is called and prevents the + * call to uhid. + */ +TEST_F(hid_bpf, test_hid_filter_raw_request_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_filter_raw_request" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* first check that we did not attach to device_event */ + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 42); + ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test"); + + /* now check that our program is preventing hid_hw_raw_request() */ + + /* emit hid_hw_raw_request from hidraw */ + /* Get Feature */ + memset(buf, 0, sizeof(buf)); + buf[0] = 0x1; /* Report Number */ + err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); + ASSERT_LT(err, 0) TH_LOG("unexpected success while reading HIDIOCGFEATURE: %d", err); + ASSERT_EQ(errno, 20) TH_LOG("unexpected error code while reading HIDIOCGFEATURE: %d", + errno); + + /* remove our bpf program and check that we can now emit commands */ + + /* detach the program */ + detach_bpf(self); + + self->hidraw_fd = open_hidraw(self->dev_id); + ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); + + err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); + ASSERT_GE(err, 0) TH_LOG("error while reading HIDIOCGFEATURE: %d", err); +} + +/* + * Call hid_hw_raw_request against the given uhid device, + * check that the program is called and can issue the call + * to uhid and transform the answer. + */ +TEST_F(hid_bpf, test_hid_change_raw_request_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_hidraw_raw_request" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* emit hid_hw_raw_request from hidraw */ + /* Get Feature */ + memset(buf, 0, sizeof(buf)); + buf[0] = 0x1; /* Report Number */ + err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); + ASSERT_EQ(err, 3) TH_LOG("unexpected returned size while reading HIDIOCGFEATURE: %d", err); + + ASSERT_EQ(buf[0], 2); + ASSERT_EQ(buf[1], 3); + ASSERT_EQ(buf[2], 4); +} + +/* + * Call hid_hw_raw_request against the given uhid device, + * check that the program is not making infinite loops. + */ +TEST_F(hid_bpf, test_hid_infinite_loop_raw_request_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_infinite_loop_raw_request" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* emit hid_hw_raw_request from hidraw */ + /* Get Feature */ + memset(buf, 0, sizeof(buf)); + buf[0] = 0x1; /* Report Number */ + err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); + ASSERT_EQ(err, 3) TH_LOG("unexpected returned size while reading HIDIOCGFEATURE: %d", err); +} + +/* + * Call hid_hw_output_report against the given uhid device, + * check that the program is called and prevents the + * call to uhid. + */ +TEST_F(hid_bpf, test_hid_filter_output_report_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_filter_output_report" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* first check that we did not attach to device_event */ + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 42); + ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test"); + + /* now check that our program is preventing hid_hw_output_report() */ + + buf[0] = 1; /* report ID */ + buf[1] = 2; + buf[2] = 42; + + err = write(self->hidraw_fd, buf, 3); + ASSERT_LT(err, 0) TH_LOG("unexpected success while sending hid_hw_output_report: %d", err); + ASSERT_EQ(errno, 25) TH_LOG("unexpected error code while sending hid_hw_output_report: %d", + errno); + + /* remove our bpf program and check that we can now emit commands */ + + /* detach the program */ + detach_bpf(self); + + self->hidraw_fd = open_hidraw(self->dev_id); + ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); + + err = write(self->hidraw_fd, buf, 3); + ASSERT_GE(err, 0) TH_LOG("error while sending hid_hw_output_report: %d", err); +} + +/* + * Call hid_hw_output_report against the given uhid device, + * check that the program is called and can issue the call + * to uhid and transform the answer. + */ +TEST_F(hid_bpf, test_hid_change_output_report_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_hidraw_output_report" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* emit hid_hw_output_report from hidraw */ + buf[0] = 1; /* report ID */ + buf[1] = 2; + buf[2] = 42; + + err = write(self->hidraw_fd, buf, 10); + ASSERT_EQ(err, 2) TH_LOG("unexpected returned size while sending hid_hw_output_report: %d", + err); +} + +/* + * Call hid_hw_output_report against the given uhid device, + * check that the program is not making infinite loops. + */ +TEST_F(hid_bpf, test_hid_infinite_loop_output_report_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_infinite_loop_output_report" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* emit hid_hw_output_report from hidraw */ + buf[0] = 1; /* report ID */ + buf[1] = 2; + buf[2] = 42; + + err = write(self->hidraw_fd, buf, 8); + ASSERT_EQ(err, 2) TH_LOG("unexpected returned size while sending hid_hw_output_report: %d", + err); +} + +/* + * Attach hid_multiply_event_wq to the given uhid device, + * retrieve and open the matching hidraw node, + * inject one event in the uhid device, + * check that the program sees it and can add extra data + */ +TEST_F(hid_bpf, test_multiply_events_wq) +{ + const struct test_program progs[] = { + { .name = "hid_test_multiply_events_wq" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 47); + + usleep(100000); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 9) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 2); + ASSERT_EQ(buf[1], 3); +} + +/* + * Attach hid_multiply_event to the given uhid device, + * retrieve and open the matching hidraw node, + * inject one event in the uhid device, + * check that the program sees it and can add extra data + */ +TEST_F(hid_bpf, test_multiply_events) +{ + const struct test_program progs[] = { + { .name = "hid_test_multiply_events" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 9) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 2); + ASSERT_EQ(buf[1], 47); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 9) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 2); + ASSERT_EQ(buf[1], 52); +} + +/* + * Call hid_bpf_input_report against the given uhid device, + * check that the program is not making infinite loops. + */ +TEST_F(hid_bpf, test_hid_infinite_loop_input_report_call) +{ + const struct test_program progs[] = { + { .name = "hid_test_infinite_loop_input_report" }, + }; + __u8 buf[10] = {0}; + int err; + + LOAD_PROGRAMS(progs); + + /* emit hid_hw_output_report from hidraw */ + buf[0] = 1; /* report ID */ + buf[1] = 2; + buf[2] = 42; + + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 3); + + /* read the data from hidraw: hid_bpf_try_input_report should work exactly one time */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 4); + + /* read the data from hidraw: there should be none */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, -1) TH_LOG("read_hidraw"); +} + +/* * Attach hid_insert{0,1,2} to the given uhid device, * retrieve and open the matching hidraw node, * inject one event in the uhid device, diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c index f67d35def142..ee9bbbcf751b 100644 --- a/tools/testing/selftests/hid/progs/hid.c +++ b/tools/testing/selftests/hid/progs/hid.c @@ -14,8 +14,8 @@ struct attach_prog_args { __u64 callback_check = 52; __u64 callback2_check = 52; -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx) +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */); @@ -29,8 +29,38 @@ int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx) return hid_ctx->size; } -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx) +SEC(".struct_ops.link") +struct hid_bpf_ops first_event = { + .hid_device_event = (void *)hid_first_event, + .hid_id = 2, +}; + +int __hid_subprog_first_event(struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) +{ + __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */); + + if (!rw_data) + return 0; /* EPERM check */ + + rw_data[2] = rw_data[1] + 5; + + return hid_ctx->size; +} + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_subprog_first_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) +{ + return __hid_subprog_first_event(hid_ctx, type); +} + +SEC(".struct_ops.link") +struct hid_bpf_ops subprog_first_event = { + .hid_device_event = (void *)hid_subprog_first_event, + .hid_id = 2, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); @@ -42,8 +72,13 @@ int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx) return hid_ctx->size; } -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx) +SEC(".struct_ops.link") +struct hid_bpf_ops second_event = { + .hid_device_event = (void *)hid_second_event, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */); @@ -55,15 +90,10 @@ int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx) return 9; } -SEC("syscall") -int attach_prog(struct attach_prog_args *ctx) -{ - ctx->retval = hid_bpf_attach_prog(ctx->hid, - ctx->prog_fd, - ctx->insert_head ? HID_BPF_FLAG_INSERT_HEAD : - HID_BPF_FLAG_NONE); - return 0; -} +SEC(".struct_ops.link") +struct hid_bpf_ops change_report_id = { + .hid_device_event = (void *)hid_change_report_id, +}; struct hid_hw_request_syscall_args { /* data needs to come at offset 0 so we can use it in calls */ @@ -181,7 +211,12 @@ static const __u8 rdesc[] = { 0xc0, /* END_COLLECTION */ }; -SEC("?fmod_ret/hid_bpf_rdesc_fixup") +/* + * the following program is marked as sleepable (struct_ops.s). + * This is not strictly mandatory but is a nice test for + * sleepable struct_ops + */ +SEC("?struct_ops.s/hid_rdesc_fixup") int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hid_ctx) { __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4096 /* size */); @@ -200,8 +235,13 @@ int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hid_ctx) return sizeof(rdesc) + 73; } -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx) +SEC(".struct_ops.link") +struct hid_bpf_ops rdesc_fixup = { + .hid_rdesc_fixup = (void *)hid_rdesc_fixup, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); @@ -217,8 +257,14 @@ int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx) return 0; } -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx) +SEC(".struct_ops.link") +struct hid_bpf_ops test_insert1 = { + .hid_device_event = (void *)hid_test_insert1, + .flags = BPF_F_BEFORE, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); @@ -234,8 +280,13 @@ int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx) return 0; } -SEC("?fmod_ret/hid_bpf_device_event") -int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx) +SEC(".struct_ops.link") +struct hid_bpf_ops test_insert2 = { + .hid_device_event = (void *)hid_test_insert2, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) { __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */); @@ -250,3 +301,300 @@ int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx) return 0; } + +SEC(".struct_ops.link") +struct hid_bpf_ops test_insert3 = { + .hid_device_event = (void *)hid_test_insert3, +}; + +SEC("?struct_ops/hid_hw_request") +int BPF_PROG(hid_test_filter_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source) +{ + return -20; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_filter_raw_request = { + .hid_hw_request = (void *)hid_test_filter_raw_request, +}; + +static struct file *current_file; + +SEC("fentry/hidraw_open") +int BPF_PROG(hidraw_open, struct inode *inode, struct file *file) +{ + current_file = file; + return 0; +} + +SEC("?struct_ops.s/hid_hw_request") +int BPF_PROG(hid_test_hidraw_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */); + int ret; + + if (!data) + return 0; /* EPERM check */ + + /* check if the incoming request comes from our hidraw operation */ + if (source == (__u64)current_file) { + data[0] = reportnum; + + ret = hid_bpf_hw_request(hctx, data, 2, rtype, reqtype); + if (ret != 2) + return -1; + data[0] = reportnum + 1; + data[1] = reportnum + 2; + data[2] = reportnum + 3; + return 3; + } + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_hidraw_raw_request = { + .hid_hw_request = (void *)hid_test_hidraw_raw_request, +}; + +SEC("?struct_ops.s/hid_hw_request") +int BPF_PROG(hid_test_infinite_loop_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */); + int ret; + + if (!data) + return 0; /* EPERM check */ + + /* always forward the request as-is to the device, hid-bpf should prevent + * infinite loops. + */ + data[0] = reportnum; + + ret = hid_bpf_hw_request(hctx, data, 2, rtype, reqtype); + if (ret == 2) + return 3; + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_infinite_loop_raw_request = { + .hid_hw_request = (void *)hid_test_infinite_loop_raw_request, +}; + +SEC("?struct_ops/hid_hw_output_report") +int BPF_PROG(hid_test_filter_output_report, struct hid_bpf_ctx *hctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source) +{ + return -25; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_filter_output_report = { + .hid_hw_output_report = (void *)hid_test_filter_output_report, +}; + +SEC("?struct_ops.s/hid_hw_output_report") +int BPF_PROG(hid_test_hidraw_output_report, struct hid_bpf_ctx *hctx, __u64 source) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */); + int ret; + + if (!data) + return 0; /* EPERM check */ + + /* check if the incoming request comes from our hidraw operation */ + if (source == (__u64)current_file) + return hid_bpf_hw_output_report(hctx, data, 2); + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_hidraw_output_report = { + .hid_hw_output_report = (void *)hid_test_hidraw_output_report, +}; + +SEC("?struct_ops.s/hid_hw_output_report") +int BPF_PROG(hid_test_infinite_loop_output_report, struct hid_bpf_ctx *hctx, __u64 source) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */); + int ret; + + if (!data) + return 0; /* EPERM check */ + + /* always forward the request as-is to the device, hid-bpf should prevent + * infinite loops. + */ + + ret = hid_bpf_hw_output_report(hctx, data, 2); + if (ret == 2) + return 2; + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_infinite_loop_output_report = { + .hid_hw_output_report = (void *)hid_test_infinite_loop_output_report, +}; + +struct elem { + struct bpf_wq work; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} hmap SEC(".maps"); + +static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +{ + __u8 buf[9] = {2, 3, 4, 5, 6, 7, 8, 9, 10}; + struct hid_bpf_ctx *hid_ctx; + + hid_ctx = hid_bpf_allocate_context(*key); + if (!hid_ctx) + return 0; /* EPERM check */ + + hid_bpf_input_report(hid_ctx, HID_INPUT_REPORT, buf, sizeof(buf)); + + hid_bpf_release_context(hid_ctx); + + return 0; +} + +static int test_inject_input_report_callback(int *key) +{ + struct elem init = {}, *val; + struct bpf_wq *wq; + + if (bpf_map_update_elem(&hmap, key, &init, 0)) + return -1; + + val = bpf_map_lookup_elem(&hmap, key); + if (!val) + return -2; + + wq = &val->work; + if (bpf_wq_init(wq, &hmap, 0) != 0) + return -3; + + if (bpf_wq_set_callback(wq, wq_cb_sleepable, 0)) + return -4; + + if (bpf_wq_start(wq, 0)) + return -5; + + return 0; +} + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_multiply_events_wq, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 9 /* size */); + int hid = hid_ctx->hid->id; + int ret; + + if (!data) + return 0; /* EPERM check */ + + if (data[0] != 1) + return 0; + + ret = test_inject_input_report_callback(&hid); + if (ret) + return ret; + + data[1] += 5; + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_multiply_events_wq = { + .hid_device_event = (void *)hid_test_multiply_events_wq, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_multiply_events, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 9 /* size */); + __u8 buf[9]; + int ret; + + if (!data) + return 0; /* EPERM check */ + + if (data[0] != 1) + return 0; + + /* + * we have to use an intermediate buffer as hid_bpf_input_report + * will memset data to \0 + */ + __builtin_memcpy(buf, data, sizeof(buf)); + + buf[0] = 2; + buf[1] += 5; + ret = hid_bpf_try_input_report(hid_ctx, HID_INPUT_REPORT, buf, sizeof(buf)); + if (ret < 0) + return ret; + + /* + * In real world we should reset the original buffer as data might be garbage now, + * but it actually now has the content of 'buf' + */ + data[1] += 5; + + return 9; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_multiply_events = { + .hid_device_event = (void *)hid_test_multiply_events, +}; + +SEC("?struct_ops/hid_device_event") +int BPF_PROG(hid_test_infinite_loop_input_report, struct hid_bpf_ctx *hctx, + enum hid_report_type report_type, __u64 source) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 6 /* size */); + __u8 buf[6]; + + if (!data) + return 0; /* EPERM check */ + + /* + * we have to use an intermediate buffer as hid_bpf_input_report + * will memset data to \0 + */ + __builtin_memcpy(buf, data, sizeof(buf)); + + /* always forward the request as-is to the device, hid-bpf should prevent + * infinite loops. + * the return value is ignored so the event is passing to userspace. + */ + + hid_bpf_try_input_report(hctx, report_type, buf, sizeof(buf)); + + /* each time we process the event, we increment by one data[1]: + * after each successful call to hid_bpf_try_input_report, buf + * has been memcopied into data by the kernel. + */ + data[1] += 1; + + return 0; +} + +SEC(".struct_ops.link") +struct hid_bpf_ops test_infinite_loop_input_report = { + .hid_device_event = (void *)hid_test_infinite_loop_input_report, +}; diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index 9cd56821d0f1..cfe37f491906 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -7,6 +7,7 @@ /* "undefine" structs and enums in vmlinux.h, because we "override" them below */ #define hid_bpf_ctx hid_bpf_ctx___not_used +#define hid_bpf_ops hid_bpf_ops___not_used #define hid_report_type hid_report_type___not_used #define hid_class_request hid_class_request___not_used #define hid_bpf_attach_flags hid_bpf_attach_flags___not_used @@ -20,13 +21,11 @@ #define HID_REQ_SET_REPORT HID_REQ_SET_REPORT___not_used #define HID_REQ_SET_IDLE HID_REQ_SET_IDLE___not_used #define HID_REQ_SET_PROTOCOL HID_REQ_SET_PROTOCOL___not_used -#define HID_BPF_FLAG_NONE HID_BPF_FLAG_NONE___not_used -#define HID_BPF_FLAG_INSERT_HEAD HID_BPF_FLAG_INSERT_HEAD___not_used -#define HID_BPF_FLAG_MAX HID_BPF_FLAG_MAX___not_used #include "vmlinux.h" #undef hid_bpf_ctx +#undef hid_bpf_ops #undef hid_report_type #undef hid_class_request #undef hid_bpf_attach_flags @@ -40,9 +39,6 @@ #undef HID_REQ_SET_REPORT #undef HID_REQ_SET_IDLE #undef HID_REQ_SET_PROTOCOL -#undef HID_BPF_FLAG_NONE -#undef HID_BPF_FLAG_INSERT_HEAD -#undef HID_BPF_FLAG_MAX #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -57,10 +53,8 @@ enum hid_report_type { }; struct hid_bpf_ctx { - __u32 index; - const struct hid_device *hid; + struct hid_device *hid; __u32 allocated_size; - enum hid_report_type report_type; union { __s32 retval; __s32 size; @@ -76,17 +70,28 @@ enum hid_class_request { HID_REQ_SET_PROTOCOL = 0x0B, }; -enum hid_bpf_attach_flags { - HID_BPF_FLAG_NONE = 0, - HID_BPF_FLAG_INSERT_HEAD = _BITUL(0), - HID_BPF_FLAG_MAX, +struct hid_bpf_ops { + int hid_id; + u32 flags; + struct list_head list; + int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type, + u64 source); + int (*hid_rdesc_fixup)(struct hid_bpf_ctx *ctx); + int (*hid_hw_request)(struct hid_bpf_ctx *ctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, + u64 source); + int (*hid_hw_output_report)(struct hid_bpf_ctx *ctx, u64 source); + struct hid_device *hdev; }; +#ifndef BPF_F_BEFORE +#define BPF_F_BEFORE (1U << 3) +#endif + /* following are kfuncs exported by HID for HID-BPF */ extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz) __ksym; -extern int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, u32 flags) __ksym; extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __ksym; extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __ksym; extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, @@ -100,5 +105,18 @@ extern int hid_bpf_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, __u8 *data, size_t buf__sz) __ksym; +extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx, + enum hid_report_type type, + __u8 *data, + size_t buf__sz) __ksym; + +/* bpf_wq implementation */ +extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; +extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; +extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, + int (callback_fn)(void *map, int *key, struct bpf_wq *wq), + unsigned int flags__k, void *aux__ign) __ksym; +#define bpf_wq_set_callback(timer, cb, flags) \ + bpf_wq_set_callback_impl(timer, cb, flags, NULL) #endif /* __HID_BPF_HELPERS_H */ diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 76c2a6945d3e..b8967b6e29d5 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -168,15 +168,7 @@ static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...) static inline void ksft_perror(const char *msg) { -#ifndef NOLIBC ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno); -#else - /* - * nolibc doesn't provide strerror() and it seems - * inappropriate to add one, just print the errno. - */ - ksft_print_msg("%s: %d)\n", msg, errno); -#endif } static inline __printf(1, 2) void ksft_test_result_pass(const char *msg, ...) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index b634969cbb6f..40723a6a083f 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -66,8 +66,6 @@ #include <sys/wait.h> #include <unistd.h> #include <setjmp.h> -#include <syscall.h> -#include <linux/sched.h> #include "kselftest.h" @@ -82,17 +80,6 @@ # define TH_LOG_ENABLED 1 #endif -/* Wait for the child process to end but without sharing memory mapping. */ -static inline pid_t clone3_vfork(void) -{ - struct clone_args args = { - .flags = CLONE_VFORK, - .exit_signal = SIGCHLD, - }; - - return syscall(__NR_clone3, &args, sizeof(args)); -} - /** * TH_LOG() * @@ -437,7 +424,7 @@ static inline pid_t clone3_vfork(void) } \ if (setjmp(_metadata->env) == 0) { \ /* _metadata and potentially self are shared with all forks. */ \ - child = clone3_vfork(); \ + child = fork(); \ if (child == 0) { \ fixture_name##_setup(_metadata, self, variant->data); \ /* Let setup failure terminate early. */ \ @@ -1016,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t) .sa_flags = SA_SIGINFO, }; struct sigaction saved_action; - int status; + /* + * Sets status so that WIFEXITED(status) returns true and + * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value + * should never be evaluated because of the waitpid(2) check and + * SIGALRM handling. + */ + int status = KSFT_FAIL << 8; + int child; if (sigaction(SIGALRM, &action, &saved_action)) { t->exit_code = KSFT_FAIL; @@ -1028,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t) __active_test = t; t->timed_out = false; alarm(t->timeout); - waitpid(t->pid, &status, 0); + child = waitpid(t->pid, &status, 0); + if (child == -1 && errno != EINTR) { + t->exit_code = KSFT_FAIL; + fprintf(TH_LOG_STREAM, + "# %s: Failed to wait for PID %d (errno: %d)\n", + t->name, t->pid, errno); + return; + } + alarm(0); if (sigaction(SIGALRM, &saved_action, NULL)) { t->exit_code = KSFT_FAIL; @@ -1083,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t) WTERMSIG(status)); } } else { + t->exit_code = KSFT_FAIL; fprintf(TH_LOG_STREAM, "# %s: Test ended in some other way [%u]\n", t->name, @@ -1218,6 +1221,7 @@ void __run_test(struct __fixture_metadata *f, struct __test_xfail *xfail; char test_name[1024]; const char *diagnostic; + int child; /* reset test struct */ t->exit_code = KSFT_PASS; @@ -1236,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f, fflush(stdout); fflush(stderr); - t->pid = clone3_vfork(); - if (t->pid < 0) { + child = fork(); + if (child < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->exit_code = KSFT_FAIL; - } else if (t->pid == 0) { + } else if (child == 0) { setpgrp(); t->fn(t, variant); _exit(t->exit_code); } else { + t->pid = child; __wait_for_test(t); } ksft_print_msg(" %4s %s\n", diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 8eb57de0b587..c0c7c1fe93f9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -277,6 +277,7 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) #define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) #define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) +#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) #define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 14ee17151a59..b5035c63d516 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -9,6 +9,7 @@ #include "kvm_util.h" #include "processor.h" +#include "sbi.h" void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index c664e446136b..594b061aef52 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1247,9 +1247,20 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint8_t maxphyaddr; + uint8_t maxphyaddr, guest_maxphyaddr; - max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; + /* + * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR + * enumerates the max _mappable_ GPA, which can be less than the raw + * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU + * doesn't support 5-level TDP. + */ + guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR); + guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits; + TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits, + "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR"); + + max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1; /* Avoid reserved HyperTransport region on AMD processors. */ if (!host_cpu_is_amd) diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c index 823c132069b4..0e0712854953 100644 --- a/tools/testing/selftests/kvm/riscv/ebreak_test.c +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -6,6 +6,7 @@ * */ #include "kvm_util.h" +#include "ucall_common.h" #define LABEL_ADDRESS(v) ((uint64_t)&(v)) diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c index 69bb94e6b227..f299cbfd23ca 100644 --- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -15,6 +15,7 @@ #include "processor.h" #include "sbi.h" #include "arch_timer.h" +#include "ucall_common.h" /* Maximum counters(firmware + hardware) */ #define RISCV_MAX_PMU_COUNTERS 64 diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c index 7a4a61be119b..3fb967f40c6a 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c @@ -105,11 +105,11 @@ void test_features(uint32_t vm_type, uint64_t supported_features) int i; for (i = 0; i < 64; i++) { - if (!(supported_features & (1u << i))) + if (!(supported_features & BIT_ULL(i))) test_init2_invalid(vm_type, &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }, "unknown feature"); - else if (KNOWN_FEATURES & (1u << i)) + else if (KNOWN_FEATURES & BIT_ULL(i)) test_init2(vm_type, &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }); } diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 429535816dbd..7b299ed5ff45 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -38,6 +38,14 @@ else CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) endif # CROSS_COMPILE +# gcc defaults to silence (off) for the following warnings, but clang defaults +# to the opposite. The warnings are not useful for the kernel itself, which is +# why they have remained disabled in gcc for the main kernel build. And it is +# only due to including kernel data structures in the selftests, that we get the +# warnings from clang. Therefore, disable the warnings for clang builds. +CFLAGS += -Wno-address-of-packed-member +CFLAGS += -Wno-gnu-variable-sized-type-not-at-end + CC := $(CLANG) $(CLANG_FLAGS) -fintegrated-as else CC := $(CROSS_COMPILE)gcc diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 368973f05250..cff124c1eddd 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -31,6 +31,7 @@ SLAB_FREE_CROSS SLAB_FREE_PAGE #SOFTLOCKUP Hangs the system #HARDLOCKUP Hangs the system +#SMP_CALL_LOCKUP Hangs the system #SPINLOCKUP Hangs the system #HUNG_TASK Hangs the system EXEC_DATA diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 37de82da9be7..b61803e36d1c 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -656,12 +656,33 @@ unmap: munmap(map, size); } +static void init_global_file_handles(void) +{ + mem_fd = open("/proc/self/mem", O_RDWR); + if (mem_fd < 0) + ksft_exit_fail_msg("opening /proc/self/mem failed\n"); + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); + ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); + if (ksm_full_scans_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); + proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); + proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", + O_RDONLY); + ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); +} + int main(int argc, char **argv) { unsigned int tests = 8; int err; if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { + init_global_file_handles(); exit(test_child_ksm()); } @@ -674,22 +695,7 @@ int main(int argc, char **argv) pagesize = getpagesize(); - mem_fd = open("/proc/self/mem", O_RDWR); - if (mem_fd < 0) - ksft_exit_fail_msg("opening /proc/self/mem failed\n"); - ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); - if (ksm_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); - ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); - if (ksm_full_scans_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); - pagemap_fd = open("/proc/self/pagemap", O_RDONLY); - if (pagemap_fd < 0) - ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); - proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); - proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", - O_RDONLY); - ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); + init_global_file_handles(); test_unmerge(); test_unmerge_zero_pages(); diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c index b74813fdc951..d53de2486080 100644 --- a/tools/testing/selftests/mm/map_fixed_noreplace.c +++ b/tools/testing/selftests/mm/map_fixed_noreplace.c @@ -67,7 +67,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: munmap failed!?\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); addr = base_addr + page_size; size = 3 * page_size; @@ -76,7 +77,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 3*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Exact same mapping again: @@ -93,7 +95,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); /* * Second mapping contained within first: @@ -111,7 +114,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Overlap end of existing mapping: @@ -128,7 +132,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+(3*PAGE_SIZE)\n"); /* * Overlap start of existing mapping: @@ -145,7 +150,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE bytes at base\n"); /* * Adjacent to start of existing mapping: @@ -162,7 +168,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base\n"); /* * Adjacent to end of existing mapping: @@ -179,7 +186,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base+(4*PAGE_SIZE)\n"); addr = base_addr; size = 5 * page_size; diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 49a56eb5d036..666ab7d9390b 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -43,7 +43,6 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap -test_unix_oob timestamping tls toeplitz diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bd01e4a0be2c..bc3925200637 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -43,6 +43,8 @@ TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_flavors_test.sh +TEST_PROGS += srv6_end_dx4_netfilter_test.sh +TEST_PROGS += srv6_end_dx6_netfilter_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh @@ -53,6 +55,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh +TEST_PROGS += netns-sysctl.sh TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 3b83c797650d..50584479540b 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd scm_rights +TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config new file mode 100644 index 000000000000..37368567768c --- /dev/null +++ b/tools/testing/selftests/net/af_unix/config @@ -0,0 +1,3 @@ +CONFIG_UNIX=y +CONFIG_AF_UNIX_OOB=y +CONFIG_UNIX_DIAG=m diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c new file mode 100644 index 000000000000..16d0c172eaeb --- /dev/null +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +#include <netinet/in.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/signalfd.h> +#include <sys/socket.h> + +#include "../../kselftest_harness.h" + +#define BUF_SZ 32 + +FIXTURE(msg_oob) +{ + int fd[4]; /* 0: AF_UNIX sender + * 1: AF_UNIX receiver + * 2: TCP sender + * 3: TCP receiver + */ + int signal_fd; + int epoll_fd[2]; /* 0: AF_UNIX receiver + * 1: TCP receiver + */ + bool tcp_compliant; +}; + +FIXTURE_VARIANT(msg_oob) +{ + bool peek; +}; + +FIXTURE_VARIANT_ADD(msg_oob, no_peek) +{ + .peek = false, +}; + +FIXTURE_VARIANT_ADD(msg_oob, peek) +{ + .peek = true +}; + +static void create_unix_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int ret; + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(ret, 0); +} + +static void create_tcp_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct sockaddr_in addr; + socklen_t addrlen; + int listen_fd; + int ret; + + listen_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(listen_fd, 0); + + ret = listen(listen_fd, -1); + ASSERT_EQ(ret, 0); + + addrlen = sizeof(addr); + ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_EQ(ret, 0); + + self->fd[2] = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(self->fd[2], 0); + + ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(ret, 0); + + self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_GE(self->fd[3], 0); + + ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK); + ASSERT_EQ(ret, 0); +} + +static void setup_sigurg(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct signalfd_siginfo siginfo; + int pid = getpid(); + sigset_t mask; + int i, ret; + + for (i = 0; i < 2; i++) { + ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid); + ASSERT_EQ(ret, 0); + } + + ret = sigemptyset(&mask); + ASSERT_EQ(ret, 0); + + ret = sigaddset(&mask, SIGURG); + ASSERT_EQ(ret, 0); + + ret = sigprocmask(SIG_BLOCK, &mask, NULL); + ASSERT_EQ(ret, 0); + + self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK); + ASSERT_GE(self->signal_fd, 0); + + ret = read(self->signal_fd, &siginfo, sizeof(siginfo)); + ASSERT_EQ(ret, -1); +} + +static void setup_epollpri(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct epoll_event event = { + .events = EPOLLPRI, + }; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + self->epoll_fd[i] = epoll_create1(0); + ASSERT_GE(self->epoll_fd[i], 0); + + ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event); + ASSERT_EQ(ret, 0); + } +} + +static void close_sockets(FIXTURE_DATA(msg_oob) *self) +{ + int i; + + for (i = 0; i < 4; i++) + close(self->fd[i]); +} + +FIXTURE_SETUP(msg_oob) +{ + create_unix_socketpair(_metadata, self); + create_tcp_socketpair(_metadata, self); + + setup_sigurg(_metadata, self); + setup_epollpri(_metadata, self); + + self->tcp_compliant = true; +} + +FIXTURE_TEARDOWN(msg_oob) +{ + close_sockets(self); +} + +static void __epollpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_remaining) +{ + struct epoll_event event[2] = {}; + int i, ret[2]; + + for (i = 0; i < 2; i++) + ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0); + + ASSERT_EQ(ret[0], oob_remaining); + + if (self->tcp_compliant) + ASSERT_EQ(ret[0], ret[1]); + + if (oob_remaining) { + ASSERT_EQ(event[0].events, EPOLLPRI); + + if (self->tcp_compliant) + ASSERT_EQ(event[0].events, event[1].events); + } +} + +static void __sendpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *buf, size_t len, int flags) +{ + int i, ret[2]; + + for (i = 0; i < 2; i++) { + struct signalfd_siginfo siginfo = {}; + int bytes; + + ret[i] = send(self->fd[i * 2], buf, len, flags); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + + if (flags & MSG_OOB) { + ASSERT_EQ(bytes, sizeof(siginfo)); + ASSERT_EQ(siginfo.ssi_signo, SIGURG); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + } + + ASSERT_EQ(bytes, -1); + } + + ASSERT_EQ(ret[0], len); + ASSERT_EQ(ret[0], ret[1]); +} + +static void __recvpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *expected_buf, int expected_len, + int buf_len, int flags) +{ + int i, ret[2], recv_errno[2], expected_errno = 0; + char recv_buf[2][BUF_SZ] = {}; + bool printed = false; + + ASSERT_GE(BUF_SZ, buf_len); + + errno = 0; + + for (i = 0; i < 2; i++) { + ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + recv_errno[i] = errno; + } + + if (expected_len < 0) { + expected_errno = -expected_len; + expected_len = -1; + } + + if (ret[0] != expected_len || recv_errno[0] != expected_errno) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(ret[0], expected_len); + ASSERT_EQ(recv_errno[0], expected_errno); + } + + if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + + printed = true; + + if (self->tcp_compliant) { + ASSERT_EQ(ret[0], ret[1]); + ASSERT_EQ(recv_errno[0], recv_errno[1]); + } + } + + if (expected_len >= 0) { + int cmp; + + cmp = strncmp(expected_buf, recv_buf[0], expected_len); + if (cmp) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(cmp, 0); + } + + cmp = strncmp(recv_buf[0], recv_buf[1], expected_len); + if (cmp) { + if (!printed) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + } + + if (self->tcp_compliant) + ASSERT_EQ(cmp, 0); + } + } +} + +static void __setinlinepair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int i, oob_inline = 1; + + for (i = 0; i < 2; i++) { + int ret; + + ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE, + &oob_inline, sizeof(oob_inline)); + ASSERT_EQ(ret, 0); + } +} + +static void __siocatmarkpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_head) +{ + int answ[2] = {}; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]); + ASSERT_EQ(ret, 0); + } + + ASSERT_EQ(answ[0], oob_head); + + if (self->tcp_compliant) + ASSERT_EQ(answ[0], answ[1]); +} + +#define sendpair(buf, len, flags) \ + __sendpair(_metadata, self, buf, len, flags) + +#define recvpair(expected_buf, expected_len, buf_len, flags) \ + do { \ + if (variant->peek) \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, \ + buf_len, (flags) | MSG_PEEK); \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, buf_len, flags); \ + } while (0) + +#define epollpair(oob_remaining) \ + __epollpair(_metadata, self, oob_remaining) + +#define siocatmarkpair(oob_head) \ + __siocatmarkpair(_metadata, self, oob_head) + +#define setinlinepair() \ + __setinlinepair(_metadata, self) + +#define tcp_incompliant \ + for (self->tcp_compliant = false; \ + self->tcp_compliant == false; \ + self->tcp_compliant = true) + +TEST_F(msg_oob, non_oob) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */ + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_ahead) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */ + epollpair(false); + siocatmarkpair(true); + + recvpair("world", 5, 5, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_break_drop) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */ + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("ld", 2, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + recvpair("ld", 2, 2, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */ + epollpair(false); + siocatmarkpair(true); + } +} + +TEST_F(msg_oob, ex_oob_drop_2) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + tcp_incompliant { + siocatmarkpair(false); + } + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); + siocatmarkpair(true); + } +} + +TEST_F(msg_oob, ex_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("ld", 2, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + tcp_incompliant { + recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */ + } + + epollpair(true); + siocatmarkpair(true); + + recvpair("d", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob) +{ + setinlinepair(); + + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob_break) +{ + setinlinepair(); + + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("o", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */ + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */ + } + + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("ld", 2, 0); + epollpair(true); + siocatmarkpair(false); + + setinlinepair(); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("rld", 3, 3, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_no_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + setinlinepair(); + + sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */ + epollpair(true); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + setinlinepair(); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP recv()s "y". */ + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); + siocatmarkpair(false); + } +} + +TEST_F(msg_oob, inline_ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index 2bfed46e0b19..d66336256580 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -14,12 +14,12 @@ FIXTURE(scm_rights) { - int fd[16]; + int fd[32]; }; FIXTURE_VARIANT(scm_rights) { - char name[16]; + char name[32]; int type; int flags; bool test_listener; @@ -172,6 +172,8 @@ static void __create_sockets(struct __test_metadata *_metadata, const FIXTURE_VARIANT(scm_rights) *variant, int n) { + ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0])); + if (variant->test_listener) create_listeners(_metadata, self, n); else @@ -283,4 +285,23 @@ TEST_F(scm_rights, cross_edge) close_sockets(8); } +TEST_F(scm_rights, backtrack_from_scc) +{ + create_sockets(10); + + send_fd(0, 1); + send_fd(0, 4); + send_fd(1, 2); + send_fd(2, 3); + send_fd(3, 1); + + send_fd(5, 6); + send_fd(5, 9); + send_fd(6, 7); + send_fd(7, 8); + send_fd(8, 6); + + close_sockets(10); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c deleted file mode 100644 index a7c51889acd5..000000000000 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ /dev/null @@ -1,436 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -#include <stdio.h> -#include <stdlib.h> -#include <sys/socket.h> -#include <arpa/inet.h> -#include <unistd.h> -#include <string.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <errno.h> -#include <netinet/tcp.h> -#include <sys/un.h> -#include <sys/signal.h> -#include <sys/poll.h> - -static int pipefd[2]; -static int signal_recvd; -static pid_t producer_id; -static char sock_name[32]; - -static void sig_hand(int sn, siginfo_t *si, void *p) -{ - signal_recvd = sn; -} - -static int set_sig_handler(int signal) -{ - struct sigaction sa; - - sa.sa_sigaction = sig_hand; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_SIGINFO | SA_RESTART; - - return sigaction(signal, &sa, NULL); -} - -static void set_filemode(int fd, int set) -{ - int flags = fcntl(fd, F_GETFL, 0); - - if (set) - flags &= ~O_NONBLOCK; - else - flags |= O_NONBLOCK; - fcntl(fd, F_SETFL, flags); -} - -static void signal_producer(int fd) -{ - char cmd; - - cmd = 'S'; - write(fd, &cmd, sizeof(cmd)); -} - -static void wait_for_signal(int fd) -{ - char buf[5]; - - read(fd, buf, 5); -} - -static void die(int status) -{ - fflush(NULL); - unlink(sock_name); - kill(producer_id, SIGTERM); - exit(status); -} - -int is_sioctatmark(int fd) -{ - int ans = -1; - - if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { -#ifdef DEBUG - perror("SIOCATMARK Failed"); -#endif - } - return ans; -} - -void read_oob(int fd, char *c) -{ - - *c = ' '; - if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { -#ifdef DEBUG - perror("Reading MSG_OOB Failed"); -#endif - } -} - -int read_data(int pfd, char *buf, int size) -{ - int len = 0; - - memset(buf, size, '0'); - len = read(pfd, buf, size); -#ifdef DEBUG - if (len < 0) - perror("read failed"); -#endif - return len; -} - -static void wait_for_data(int pfd, int event) -{ - struct pollfd pfds[1]; - - pfds[0].fd = pfd; - pfds[0].events = event; - poll(pfds, 1, -1); -} - -void producer(struct sockaddr_un *consumer_addr) -{ - int cfd; - char buf[64]; - int i; - - memset(buf, 'x', sizeof(buf)); - cfd = socket(AF_UNIX, SOCK_STREAM, 0); - - wait_for_signal(pipefd[0]); - if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(*consumer_addr)) != 0) { - perror("Connect failed"); - kill(0, SIGTERM); - exit(1); - } - - for (i = 0; i < 2; i++) { - /* Test 1: Test for SIGURG and OOB */ - wait_for_signal(pipefd[0]); - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 2: Test for OOB being overwitten */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '#'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 3: Test for SIOCATMARK */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - send(cfd, buf, sizeof(buf), 0); - - wait_for_signal(pipefd[0]); - - /* Test 4: Test for 1byte OOB msg */ - memset(buf, 'x', sizeof(buf)); - buf[0] = '@'; - send(cfd, buf, 1, MSG_OOB); - } -} - -int -main(int argc, char **argv) -{ - int lfd, pfd; - struct sockaddr_un consumer_addr, paddr; - socklen_t len = sizeof(consumer_addr); - char buf[1024]; - int on = 0; - char oob; - int atmark; - - lfd = socket(AF_UNIX, SOCK_STREAM, 0); - memset(&consumer_addr, 0, sizeof(consumer_addr)); - consumer_addr.sun_family = AF_UNIX; - sprintf(sock_name, "unix_oob_%d", getpid()); - unlink(sock_name); - strcpy(consumer_addr.sun_path, sock_name); - - if ((bind(lfd, (struct sockaddr *)&consumer_addr, - sizeof(consumer_addr))) != 0) { - perror("socket bind failed"); - exit(1); - } - - pipe(pipefd); - - listen(lfd, 1); - - producer_id = fork(); - if (producer_id == 0) { - producer(&consumer_addr); - exit(0); - } - - set_sig_handler(SIGURG); - signal_producer(pipefd[1]); - - pfd = accept(lfd, (struct sockaddr *) &paddr, &len); - fcntl(pfd, F_SETOWN, getpid()); - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1: - * veriyf that SIGURG is - * delivered, 63 bytes are - * read, oob is '@', and POLLPRI works. - */ - wait_for_data(pfd, POLLPRI); - read_oob(pfd, &oob); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 63 || oob != '@') { - fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2: - * Verify that the first OOB is over written by - * the 2nd one and the first OOB is returned as - * part of the read, and sigurg is received. - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = 0; - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - read_oob(pfd, &oob); - if (!signal_recvd || len != 127 || oob != '#') { - fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and atmark - * is set. - * oob is '%' and second read returns - * 64 bytes. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 150) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - read_oob(pfd, &oob); - - if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { - fprintf(stderr, - "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n", - signal_recvd, len, oob, atmark); - die(1); - } - - signal_recvd = 0; - - len = read_data(pfd, buf, 1024); - if (len != 64) { - fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4: - * verify that a single byte - * oob message is delivered. - * set non blocking mode and - * check proper error is - * returned and sigurg is - * received and correct - * oob is read. - */ - - set_filemode(pfd, 0); - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if ((len == -1) && (errno == 11)) - len = 0; - - read_oob(pfd, &oob); - - if (!signal_recvd || len != 0 || oob != '@') { - fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - set_filemode(pfd, 1); - - /* Inline Testing */ - - on = 1; - if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { - perror("SO_OOBINLINE"); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1 -- Inline: - * Check that SIGURG is - * delivered and 63 bytes are - * read and oob is '@' - */ - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - - if (!signal_recvd || len != 63) { - fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", - signal_recvd, len); - die(1); - } - - len = read_data(pfd, buf, 1024); - - if (len != 1) { - fprintf(stderr, - "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2 -- Inline: - * Verify that the first OOB is over written by - * the 2nd one and read breaks correctly on - * 2nd OOB boundary with the first OOB returned as - * part of the read, and sigurg is delivered and - * siocatmark returns true. - * next read returns one byte, the oob byte - * and siocatmark returns false. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 127 || atmark != 1 || !signal_recvd) { - fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", - len, atmark); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 1 || buf[0] != '#' || atmark == 1) { - fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3 -- Inline: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and siocatmark - * is true after the read. - * subsequent read returns 65 bytes - * because of oob which should be '%'. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 126) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (!signal_recvd || len != 127 || !atmark) { - fprintf(stderr, - "Test 3 Inline failed, sigurg %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 65 || buf[0] != '%' || atmark != 0) { - fprintf(stderr, - "Test 3.1 Inline failed, len %d oob %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4 -- Inline: - * verify that a single - * byte oob message is delivered - * and read returns one byte, the oob - * byte and sigurg is received - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 1 || buf[0] != '@') { - fprintf(stderr, - "Test 4 Inline failed, signal %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - die(0); -} diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index 7e7ed6c558da..d458b45c775b 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Author: Taehee Yoo <ap420073@gmail.com> diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 04de7a6ba6f3..5b9baf708950 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -26,7 +26,6 @@ CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m @@ -75,7 +74,12 @@ CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y CONFIG_NET_SCH_PRIO=m CONFIG_NFT_COMPAT=m +CONFIG_NF_CONNTRACK_OVS=y CONFIG_NF_FLOW_TABLE=m +CONFIG_OPENVSWITCH=m +CONFIG_OPENVSWITCH_GENEVE=m +CONFIG_OPENVSWITCH_GRE=m +CONFIG_OPENVSWITCH_VXLAN=m CONFIG_PSAMPLE=m CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m @@ -101,3 +105,5 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RPFILTER=m diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index fa7b59ff4029..224346426ef2 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -39,6 +39,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ ipip_hier_gre.sh \ lib_sh_test.sh \ local_termination.sh \ + min_max_mtu.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ mirror_gre_bridge_1d_vlan.sh \ @@ -70,6 +71,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ router_broadcast.sh \ router_mpath_nh_res.sh \ router_mpath_nh.sh \ + router_mpath_seed.sh \ router_multicast.sh \ router_multipath.sh \ router_nh.sh \ diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index f1de525cfa55..62a05bca1e82 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -122,6 +122,8 @@ devlink_reload() still_pending=$(devlink resource show "$DEVLINK_DEV" | \ grep -c "size_new") check_err $still_pending "Failed reload - There are still unset sizes" + + udevadm settle } declare -A DEVLINK_ORIG diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index eabbdf00d8ca..ff96bb7535ff 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1134,12 +1134,19 @@ bridge_ageing_time_get() } declare -A SYSCTL_ORIG +sysctl_save() +{ + local key=$1; shift + + SYSCTL_ORIG[$key]=$(sysctl -n $key) +} + sysctl_set() { local key=$1; shift local value=$1; shift - SYSCTL_ORIG[$key]=$(sysctl -n $key) + sysctl_save "$key" sysctl -qw $key="$value" } @@ -1218,22 +1225,6 @@ trap_uninstall() tc filter del dev $dev $direction pref 1 flower } -slow_path_trap_install() -{ - # For slow-path testing, we need to install a trap to get to - # slow path the packets that would otherwise be switched in HW. - if [ "${tcflags/skip_hw}" != "$tcflags" ]; then - trap_install "$@" - fi -} - -slow_path_trap_uninstall() -{ - if [ "${tcflags/skip_hw}" != "$tcflags" ]; then - trap_uninstall "$@" - fi -} - __icmp_capture_add_del() { local add_del=$1; shift @@ -1250,22 +1241,34 @@ __icmp_capture_add_del() icmp_capture_install() { - __icmp_capture_add_del add 100 "" "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del add 100 "" "$tundev" "$filter" } icmp_capture_uninstall() { - __icmp_capture_add_del del 100 "" "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del del 100 "" "$tundev" "$filter" } icmp6_capture_install() { - __icmp_capture_add_del add 100 v6 "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del add 100 v6 "$tundev" "$filter" } icmp6_capture_uninstall() { - __icmp_capture_add_del del 100 v6 "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del del 100 v6 "$tundev" "$filter" } __vlan_capture_add_del() @@ -1283,12 +1286,18 @@ __vlan_capture_add_del() vlan_capture_install() { - __vlan_capture_add_del add 100 "$@" + local dev=$1; shift + local filter=$1; shift + + __vlan_capture_add_del add 100 "$dev" "$filter" } vlan_capture_uninstall() { - __vlan_capture_add_del del 100 "$@" + local dev=$1; shift + local filter=$1; shift + + __vlan_capture_add_del del 100 "$dev" "$filter" } __dscp_capture_add_del() @@ -1648,34 +1657,61 @@ __start_traffic() local sip=$1; shift local dip=$1; shift local dmac=$1; shift + local -a mz_args=("$@") $MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \ - -a own -b $dmac -t "$proto" -q "$@" & + -a own -b $dmac -t "$proto" -q "${mz_args[@]}" & sleep 1 } start_traffic_pktsize() { local pktsize=$1; shift + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") - __start_traffic $pktsize udp "$@" + __start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_tcp_traffic_pktsize() { local pktsize=$1; shift + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") - __start_traffic $pktsize tcp "$@" + __start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_traffic() { - start_traffic_pktsize 8000 "$@" + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") + + start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_tcp_traffic() { - start_tcp_traffic_pktsize 8000 "$@" + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") + + start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } stop_traffic() diff --git a/tools/testing/selftests/net/forwarding/min_max_mtu.sh b/tools/testing/selftests/net/forwarding/min_max_mtu.sh new file mode 100755 index 000000000000..97bb8b221bed --- /dev/null +++ b/tools/testing/selftests/net/forwarding/min_max_mtu.sh @@ -0,0 +1,283 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +# | H1 | +# | | +# | $h1.10 + | +# | 192.0.2.2/24 | | +# | 2001:db8:1::2/64 | | +# | | | +# | $h1 + | +# | | | +# +------------------|-+ +# | +# +------------------|-+ +# | SW | | +# | $swp1 + | +# | | | +# | $swp1.10 + | +# | 192.0.2.1/24 | +# | 2001:db8:1::1/64 | +# | | +# +--------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + max_mtu_config_test + max_mtu_traffic_test + min_mtu_config_test + min_mtu_traffic_test +" + +NUM_NETIFS=2 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.2/24 2001:db8:1::2/64 +} + +h1_destroy() +{ + vlan_destroy $h1 10 192.0.2.2/24 2001:db8:1::2/64 + simple_if_fini $h1 +} + +switch_create() +{ + ip li set dev $swp1 up + vlan_create $swp1 10 "" 192.0.2.1/24 2001:db8:1::1/64 +} + +switch_destroy() +{ + ip li set dev $swp1 down + vlan_destroy $swp1 10 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + + switch_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 192.0.2.1 +} + +ping_ipv6() +{ + ping6_test $h1.10 2001:db8:1::1 +} + +min_max_mtu_get_if() +{ + local dev=$1; shift + local min_max=$1; shift + + ip -d -j link show $dev | jq ".[].$min_max" +} + +ensure_compatible_min_max_mtu() +{ + local min_max=$1; shift + + local mtu=$(min_max_mtu_get_if ${NETIFS[p1]} $min_max) + local i + + for ((i = 2; i <= NUM_NETIFS; ++i)); do + local current_mtu=$(min_max_mtu_get_if ${NETIFS[p$i]} $min_max) + + if [ $current_mtu -ne $mtu ]; then + return 1 + fi + done +} + +mtu_set_if() +{ + local dev=$1; shift + local mtu=$1; shift + local should_fail=${1:-0}; shift + + mtu_set $dev $mtu 2>/dev/null + check_err_fail $should_fail $? "Set MTU $mtu for $dev" +} + +mtu_set_all_if() +{ + local mtu=$1; shift + local i + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + mtu_set_if ${NETIFS[p$i]} $mtu + mtu_set_if ${NETIFS[p$i]}.10 $mtu + done +} + +mtu_restore_all_if() +{ + local i + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + mtu_restore ${NETIFS[p$i]}.10 + mtu_restore ${NETIFS[p$i]} + done +} + +mtu_test_ping4() +{ + local mtu=$1; shift + local should_fail=$1; shift + + # Ping adds 8 bytes for ICMP header and 20 bytes for IP header + local ping_headers_len=$((20 + 8)) + local pkt_size=$((mtu - ping_headers_len)) + + ping_do $h1.10 192.0.2.1 "-s $pkt_size -M do" + check_err_fail $should_fail $? "Ping, packet size: $pkt_size" +} + +mtu_test_ping6() +{ + local mtu=$1; shift + local should_fail=$1; shift + + # Ping adds 8 bytes for ICMP header and 40 bytes for IPv6 header + local ping6_headers_len=$((40 + 8)) + local pkt_size=$((mtu - ping6_headers_len)) + + ping6_do $h1.10 2001:db8:1::1 "-s $pkt_size -M do" + check_err_fail $should_fail $? "Ping6, packet size: $pkt_size" +} + +max_mtu_config_test() +{ + local i + + RET=0 + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + local dev=${NETIFS[p$i]} + local max_mtu=$(min_max_mtu_get_if $dev "max_mtu") + local should_fail + + should_fail=0 + mtu_set_if $dev $max_mtu $should_fail + mtu_restore $dev + + should_fail=1 + mtu_set_if $dev $((max_mtu + 1)) $should_fail + mtu_restore $dev + done + + log_test "Test maximum MTU configuration" +} + +max_mtu_traffic_test() +{ + local should_fail + local max_mtu + + RET=0 + + if ! ensure_compatible_min_max_mtu "max_mtu"; then + log_test_xfail "Topology has incompatible maximum MTU values" + return + fi + + max_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "max_mtu") + + should_fail=0 + mtu_set_all_if $max_mtu + mtu_test_ping4 $max_mtu $should_fail + mtu_test_ping6 $max_mtu $should_fail + mtu_restore_all_if + + should_fail=1 + mtu_set_all_if $((max_mtu - 1)) + mtu_test_ping4 $max_mtu $should_fail + mtu_test_ping6 $max_mtu $should_fail + mtu_restore_all_if + + log_test "Test traffic, packet size is maximum MTU" +} + +min_mtu_config_test() +{ + local i + + RET=0 + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + local dev=${NETIFS[p$i]} + local min_mtu=$(min_max_mtu_get_if $dev "min_mtu") + local should_fail + + should_fail=0 + mtu_set_if $dev $min_mtu $should_fail + mtu_restore $dev + + should_fail=1 + mtu_set_if $dev $((min_mtu - 1)) $should_fail + mtu_restore $dev + done + + log_test "Test minimum MTU configuration" +} + +min_mtu_traffic_test() +{ + local should_fail=0 + local min_mtu + + RET=0 + + if ! ensure_compatible_min_max_mtu "min_mtu"; then + log_test_xfail "Topology has incompatible minimum MTU values" + return + fi + + min_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "min_mtu") + mtu_set_all_if $min_mtu + mtu_test_ping4 $min_mtu $should_fail + # Do not test minimum MTU with IPv6, as IPv6 requires higher MTU. + + mtu_restore_all_if + + log_test "Test traffic, packet size is minimum MTU" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh index 0266443601bc..921c733ee04f 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -74,7 +74,7 @@ test_span_gre_mac() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" + mirror_install $swp1 $direction $tundev "matchall" icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac" mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10 @@ -82,29 +82,29 @@ test_span_gre_mac() icmp_capture_uninstall h3-${tundev} mirror_uninstall $swp1 $direction - log_test "$direction $what: envelope MAC ($tcflags)" + log_test "$direction $what: envelope MAC" } test_two_spans() { RET=0 - mirror_install $swp1 ingress gt4 "matchall $tcflags" - mirror_install $swp1 egress gt6 "matchall $tcflags" - quick_test_span_gre_dir gt4 ingress - quick_test_span_gre_dir gt6 egress + mirror_install $swp1 ingress gt4 "matchall" + mirror_install $swp1 egress gt6 "matchall" + quick_test_span_gre_dir gt4 8 0 + quick_test_span_gre_dir gt6 0 8 mirror_uninstall $swp1 ingress - fail_test_span_gre_dir gt4 ingress - quick_test_span_gre_dir gt6 egress + fail_test_span_gre_dir gt4 8 0 + quick_test_span_gre_dir gt6 0 8 - mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 ingress gt4 "matchall" mirror_uninstall $swp1 egress - quick_test_span_gre_dir gt4 ingress - fail_test_span_gre_dir gt6 egress + quick_test_span_gre_dir gt4 8 0 + fail_test_span_gre_dir gt6 0 8 mirror_uninstall $swp1 ingress - log_test "two simultaneously configured mirrors ($tcflags)" + log_test "two simultaneously configured mirrors" } test_gretap() @@ -131,30 +131,11 @@ test_ip6gretap_mac() test_span_gre_mac gt6 egress "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh index 6c257ec03756..e3cd48e18eeb 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh @@ -196,32 +196,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL" } -test_all() -{ - RET=0 - - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh index 04fd14b0a9b7..6c7bd33332c2 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh @@ -108,30 +108,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh index f35313c76fac..909ec956a5e5 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -104,30 +104,11 @@ test_ip6gretap_stp() full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh index 0cf4c47a46f9..40ac9dd3aff1 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh @@ -104,30 +104,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } -tests() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -tests - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - tests -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index c53148b1dc63..fe4d7c906a70 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -227,10 +227,10 @@ test_lag_slave() RET=0 tc filter add dev $swp1 ingress pref 999 \ - proto 802.1q flower vlan_ethtype arp $tcflags \ + proto 802.1q flower vlan_ethtype arp \ action pass mirror_install $swp1 ingress gt4 \ - "proto 802.1q flower vlan_id 333 $tcflags" + "proto 802.1q flower vlan_id 333" # Test connectivity through $up_dev when $down_dev is set down. ip link set dev $down_dev down @@ -239,7 +239,7 @@ test_lag_slave() setup_wait_dev $host_dev $ARPING -I br1 192.0.2.130 -qfc 1 sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10" # Test lack of connectivity when both slaves are down. ip link set dev $up_dev down @@ -252,7 +252,7 @@ test_lag_slave() mirror_uninstall $swp1 ingress tc filter del dev $swp1 ingress pref 999 - log_test "$what ($tcflags)" + log_test "$what" } test_mirror_gretap_first() @@ -265,30 +265,11 @@ test_mirror_gretap_second() test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh index 5ea9d63915f7..65ae9d960c18 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -73,7 +73,7 @@ test_span_gre_ttl() RET=0 mirror_install $swp1 ingress $tundev \ - "prot ip flower $tcflags ip_prot icmp" + "prot ip flower ip_prot icmp" tc filter add dev $h3 ingress pref 77 prot $prot \ flower skip_hw ip_ttl 50 action pass @@ -81,13 +81,13 @@ test_span_gre_ttl() ip link set dev $tundev type $type ttl 50 sleep 2 - mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 ">= 10" ip link set dev $tundev type $type ttl 100 tc filter del dev $h3 ingress pref 77 mirror_uninstall $swp1 ingress - log_test "$what: TTL change ($tcflags)" + log_test "$what: TTL change" } test_span_gre_tun_up() @@ -98,15 +98,15 @@ test_span_gre_tun_up() RET=0 ip link set dev $tundev down - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev up - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: tunnel down/up ($tcflags)" + log_test "$what: tunnel down/up" } test_span_gre_egress_up() @@ -118,8 +118,8 @@ test_span_gre_egress_up() RET=0 ip link set dev $swp3 down - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev # After setting the device up, wait for neighbor to get resolved so that # we can expect mirroring to work. @@ -127,10 +127,10 @@ test_span_gre_egress_up() setup_wait_dev $swp3 ping -c 1 -I $swp3 $remote_ip &>/dev/null - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: egress down/up ($tcflags)" + log_test "$what: egress down/up" } test_span_gre_remote_ip() @@ -144,14 +144,14 @@ test_span_gre_remote_ip() RET=0 ip link set dev $tundev type $type remote $wrong_ip - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev type $type remote $correct_ip - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: remote address change ($tcflags)" + log_test "$what: remote address change" } test_span_gre_tun_del() @@ -165,10 +165,10 @@ test_span_gre_tun_del() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev ip link del dev $tundev - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev tunnel_create $tundev $type $local_ip $remote_ip \ ttl 100 tos inherit $flags @@ -176,11 +176,11 @@ test_span_gre_tun_del() # Recreating the tunnel doesn't reestablish mirroring, so reinstall it # and verify it works for the follow-up tests. mirror_uninstall $swp1 ingress - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: tunnel deleted ($tcflags)" + log_test "$what: tunnel deleted" } test_span_gre_route_del() @@ -192,18 +192,18 @@ test_span_gre_route_del() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev ip route del $route dev $edev - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev ip route add $route dev $edev - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: underlay route removal ($tcflags)" + log_test "$what: underlay route removal" } test_ttl() @@ -244,30 +244,11 @@ test_route_del() test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh index 09389f3b9369..3a84f3ab5856 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh @@ -64,12 +64,19 @@ cleanup() test_span_gre_dir_acl() { - test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 + local tundev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + test_span_gre_dir_ips "$tundev" "$forward_type" \ + "$backward_type" 192.0.2.3 192.0.2.4 } fail_test_span_gre_dir_acl() { - fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 + local tundev=$1; shift + + fail_test_span_gre_dir_ips "$tundev" 192.0.2.3 192.0.2.4 } full_test_span_gre_dir_acl() @@ -84,16 +91,15 @@ full_test_span_gre_dir_acl() RET=0 mirror_install $swp1 $direction $tundev \ - "protocol ip flower $tcflags dst_ip $match_dip" - fail_test_span_gre_dir $tundev $direction - test_span_gre_dir_acl "$tundev" "$direction" \ - "$forward_type" "$backward_type" + "protocol ip flower dst_ip $match_dip" + fail_test_span_gre_dir $tundev + test_span_gre_dir_acl "$tundev" "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction # Test lack of mirroring after ACL mirror is uninstalled. - fail_test_span_gre_dir_acl "$tundev" "$direction" + fail_test_span_gre_dir_acl "$tundev" - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } test_gretap() @@ -108,30 +114,11 @@ test_ip6gretap() full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh index 9edf4cb104a8..1261e6f46e34 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh @@ -37,8 +37,14 @@ # | \ / | # | \____________________________________________/ | # | | | -# | + lag2 (team) | -# | 192.0.2.130/28 | +# | + lag2 (team) ------> + gt4-dst (gretap) | +# | 192.0.2.130/28 loc=192.0.2.130 | +# | rem=192.0.2.129 | +# | ttl=100 | +# | tos=inherit | +# | | +# | | +# | | # | | # +---------------------------------------------------------------------------+ @@ -50,9 +56,6 @@ ALL_TESTS=" NUM_NETIFS=6 source lib.sh source mirror_lib.sh -source mirror_gre_lib.sh - -require_command $ARPING vlan_host_create() { @@ -122,16 +125,21 @@ h3_create() { vrf_create vrf-h3 ip link set dev vrf-h3 up - tc qdisc add dev $h3 clsact - tc qdisc add dev $h4 clsact h3_create_team + + tunnel_create gt4-dst gretap 192.0.2.130 192.0.2.129 \ + ttl 100 tos inherit + ip link set dev gt4-dst master vrf-h3 + tc qdisc add dev gt4-dst clsact } h3_destroy() { + tc qdisc del dev gt4-dst clsact + ip link set dev gt4-dst nomaster + tunnel_destroy gt4-dst + h3_destroy_team - tc qdisc del dev $h4 clsact - tc qdisc del dev $h3 clsact ip link set dev vrf-h3 down vrf_destroy vrf-h3 } @@ -188,18 +196,12 @@ setup_prepare() h2_create h3_create switch_create - - trap_install $h3 ingress - trap_install $h4 ingress } cleanup() { pre_cleanup - trap_uninstall $h4 ingress - trap_uninstall $h3 ingress - switch_destroy h3_destroy h2_destroy @@ -218,7 +220,8 @@ test_lag_slave() RET=0 mirror_install $swp1 ingress gt4 \ - "proto 802.1q flower vlan_id 333 $tcflags" + "proto 802.1q flower vlan_id 333" + vlan_capture_install gt4-dst "vlan_ethtype ipv4 ip_proto icmp type 8" # Move $down_dev away from the team. That will prompt change in # txability of the connected device, without changing its upness. The @@ -226,13 +229,14 @@ test_lag_slave() # other slave. ip link set dev $down_dev nomaster sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 10 # Test lack of connectivity when neither slave is txable. ip link set dev $up_dev nomaster sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 0 + + vlan_capture_uninstall gt4-dst mirror_uninstall $swp1 ingress # Recreate H3's team device, because mlxsw, which this test is @@ -243,7 +247,7 @@ test_lag_slave() # Wait for ${h,swp}{3,4}. setup_wait - log_test "$what ($tcflags)" + log_test "$what" } test_mirror_gretap_first() @@ -256,30 +260,11 @@ test_mirror_gretap_second() test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh index 0c36546e131e..20078cc55f24 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -5,22 +5,34 @@ source "$net_forwarding_dir/mirror_lib.sh" quick_test_span_gre_dir_ips() { local tundev=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift - do_test_span_dir_ips 10 h3-$tundev "$@" + do_test_span_dir_ips 10 h3-$tundev "$ip1" "$ip2" \ + "$forward_type" "$backward_type" } fail_test_span_gre_dir_ips() { local tundev=$1; shift + local ip1=$1; shift + local ip2=$1; shift - do_test_span_dir_ips 0 h3-$tundev "$@" + do_test_span_dir_ips 0 h3-$tundev "$ip1" "$ip2" } test_span_gre_dir_ips() { local tundev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local ip1=$1; shift + local ip2=$1; shift - test_span_dir_ips h3-$tundev "$@" + test_span_dir_ips h3-$tundev "$forward_type" \ + "$backward_type" "$ip1" "$ip2" } full_test_span_gre_dir_ips() @@ -35,12 +47,12 @@ full_test_span_gre_dir_ips() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" - test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + mirror_install $swp1 $direction $tundev "matchall" + test_span_dir_ips "h3-$tundev" "$forward_type" \ "$backward_type" "$ip1" "$ip2" mirror_uninstall $swp1 $direction - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } full_test_span_gre_dir_vlan_ips() @@ -56,45 +68,63 @@ full_test_span_gre_dir_vlan_ips() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" + mirror_install $swp1 $direction $tundev "matchall" - test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + test_span_dir_ips "h3-$tundev" "$forward_type" \ "$backward_type" "$ip1" "$ip2" tc filter add dev $h3 ingress pref 77 prot 802.1q \ flower $vlan_match \ action pass - mirror_test v$h1 $ip1 $ip2 $h3 77 10 + mirror_test v$h1 $ip1 $ip2 $h3 77 '>= 10' tc filter del dev $h3 ingress pref 77 mirror_uninstall $swp1 $direction - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } quick_test_span_gre_dir() { - quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift + + quick_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2 \ + "$forward_type" "$backward_type" } fail_test_span_gre_dir() { - fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 -} + local tundev=$1; shift -test_span_gre_dir() -{ - test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + fail_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2 } full_test_span_gre_dir() { - full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + + full_test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \ + "$backward_type" "$what" 192.0.2.1 192.0.2.2 } full_test_span_gre_dir_vlan() { - full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local direction=$1; shift + local vlan_match=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + + full_test_span_gre_dir_vlan_ips "$tundev" "$direction" "$vlan_match" \ + "$forward_type" "$backward_type" \ + "$what" 192.0.2.1 192.0.2.2 } full_test_span_gre_stp_ips() @@ -104,27 +134,39 @@ full_test_span_gre_stp_ips() local what=$1; shift local ip1=$1; shift local ip2=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift local h3mac=$(mac_get $h3) RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \ + "$forward_type" "$backward_type" bridge link set dev $nbpdev state disabled sleep 1 - fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + fail_test_span_gre_dir_ips $tundev $ip1 $ip2 bridge link set dev $nbpdev state forwarding sleep 1 - quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \ + "$forward_type" "$backward_type" mirror_uninstall $swp1 ingress - log_test "$what: STP state ($tcflags)" + log_test "$what: STP state" } full_test_span_gre_stp() { - full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local nbpdev=$1; shift + local what=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift + + full_test_span_gre_stp_ips "$tundev" "$nbpdev" "$what" \ + 192.0.2.1 192.0.2.2 \ + "$forward_type" "$backward_type" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh index fc0508e40fca..2cbfbecf25c8 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh @@ -60,41 +60,32 @@ test_span_gre_neigh() local addr=$1; shift local tundev=$1; shift local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift local what=$1; shift RET=0 ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55 - mirror_install $swp1 $direction $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 $direction $tundev "matchall" + fail_test_span_gre_dir $tundev "$forward_type" "$backward_type" ip neigh del dev $swp3 $addr - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction - log_test "$direction $what: neighbor change ($tcflags)" + log_test "$direction $what: neighbor change" } test_gretap() { - test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap" - test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 ingress 8 0 "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 egress 0 8 "mirror to gretap" } test_ip6gretap() { - test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap" - test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap" -} - -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + test_span_gre_neigh 2001:db8:2::2 gt6 ingress 8 0 "mirror to ip6gretap" + test_span_gre_neigh 2001:db8:2::2 gt6 egress 0 8 "mirror to ip6gretap" } trap cleanup EXIT @@ -102,14 +93,6 @@ trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh index 6f9ef1820e93..34bc646938e3 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh @@ -75,42 +75,31 @@ cleanup() test_gretap() { RET=0 - mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 ingress gt4 "matchall" # For IPv4, test that there's no mirroring without the route directing # the traffic to tunnel remote address. Then add it and test that # mirroring starts. For IPv6 we can't test this due to the limitation # that routes for locally-specified IPv6 addresses can't be added. - fail_test_span_gre_dir gt4 ingress + fail_test_span_gre_dir gt4 ip route add 192.0.2.130/32 via 192.0.2.162 - quick_test_span_gre_dir gt4 ingress + quick_test_span_gre_dir gt4 ip route del 192.0.2.130/32 via 192.0.2.162 mirror_uninstall $swp1 ingress - log_test "mirror to gre with next-hop remote ($tcflags)" + log_test "mirror to gre with next-hop remote" } test_ip6gretap() { RET=0 - mirror_install $swp1 ingress gt6 "matchall $tcflags" - quick_test_span_gre_dir gt6 ingress + mirror_install $swp1 ingress gt6 "matchall" + quick_test_span_gre_dir gt6 mirror_uninstall $swp1 ingress - log_test "mirror to ip6gre with next-hop remote ($tcflags)" -} - -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + log_test "mirror to ip6gre with next-hop remote" } trap cleanup EXIT @@ -118,14 +107,6 @@ trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh index 88cecdb9a861..63689928cb51 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh @@ -63,30 +63,11 @@ test_gretap() full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index c8a9b5bd841f..1b902cc579f6 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -153,21 +153,21 @@ test_span_gre_forbidden_cpu() RET=0 # Run the pass-test first, to prime neighbor table. - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev # Now forbid the VLAN at the bridge and see it fail. bridge vlan del dev br1 vid 555 self sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev bridge vlan add dev br1 vid 555 self sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: vlan forbidden at a bridge ($tcflags)" + log_test "$what: vlan forbidden at a bridge" } test_gretap_forbidden_cpu() @@ -187,22 +187,22 @@ test_span_gre_forbidden_egress() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev bridge vlan del dev $swp3 vid 555 sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev bridge vlan add dev $swp3 vid 555 # Re-prime FDB $ARPING -I br1.555 192.0.2.130 -fqc 1 sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: vlan forbidden at a bridge egress ($tcflags)" + log_test "$what: vlan forbidden at a bridge egress" } test_gretap_forbidden_egress() @@ -223,30 +223,30 @@ test_span_gre_untagged_egress() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" + mirror_install $swp1 ingress $tundev "matchall" - quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + quick_test_span_vlan_dir $h3 555 "$ul_proto" h3_addr_add_del del $h3.555 bridge vlan add dev $swp3 vid 555 pvid untagged h3_addr_add_del add $h3 sleep 5 - quick_test_span_gre_dir $tundev ingress - fail_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + fail_test_span_vlan_dir $h3 555 "$ul_proto" h3_addr_add_del del $h3 bridge vlan add dev $swp3 vid 555 h3_addr_add_del add $h3.555 sleep 5 - quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + quick_test_span_vlan_dir $h3 555 "$ul_proto" mirror_uninstall $swp1 ingress - log_test "$what: vlan untagged at a bridge egress ($tcflags)" + log_test "$what: vlan untagged at a bridge egress" } test_gretap_untagged_egress() @@ -267,19 +267,19 @@ test_span_gre_fdb_roaming() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev while ((RET == 0)); do bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null bridge fdb add dev $swp2 $h3mac vlan 555 master static sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev if ! bridge fdb sh dev $swp2 vlan 555 master \ | grep -q $h3mac; then printf "TEST: %-60s [RETRY]\n" \ - "$what: MAC roaming ($tcflags)" + "$what: MAC roaming" # ARP or ND probably reprimed the FDB while the test # was running. We would get a spurious failure. RET=0 @@ -292,11 +292,11 @@ test_span_gre_fdb_roaming() # Re-prime FDB $ARPING -I br1.555 192.0.2.130 -fqc 1 sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: MAC roaming ($tcflags)" + log_test "$what: MAC roaming" } test_gretap_fdb_roaming() @@ -319,30 +319,11 @@ test_ip6gretap_stp() full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 3e8ebeff3019..6bf9d5ae933c 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -44,14 +44,17 @@ mirror_test() local type="icmp echoreq" fi + if [[ -z ${expect//[[:digit:]]/} ]]; then + expect="== $expect" + fi + local t0=$(tc_rule_stats_get $dev $pref) $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ -c 10 -d 100msec -t $type sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) - # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((delta $expect)) check_err $? "Expected to capture $expect packets, got $delta." } @@ -59,36 +62,42 @@ do_test_span_dir_ips() { local expect=$1; shift local dev=$1; shift - local direction=$1; shift local ip1=$1; shift local ip2=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift - icmp_capture_install $dev + icmp_capture_install $dev "type $forward_type" mirror_test v$h1 $ip1 $ip2 $dev 100 $expect + icmp_capture_uninstall $dev + + icmp_capture_install $dev "type $backward_type" mirror_test v$h2 $ip2 $ip1 $dev 100 $expect icmp_capture_uninstall $dev } quick_test_span_dir_ips() { - do_test_span_dir_ips 10 "$@" -} + local dev=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift -fail_test_span_dir_ips() -{ - do_test_span_dir_ips 0 "$@" + do_test_span_dir_ips 10 "$dev" "$ip1" "$ip2" \ + "$forward_type" "$backward_type" } test_span_dir_ips() { local dev=$1; shift - local direction=$1; shift local forward_type=$1; shift local backward_type=$1; shift local ip1=$1; shift local ip2=$1; shift - quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2" + quick_test_span_dir_ips "$dev" "$ip1" "$ip2" \ + "$forward_type" "$backward_type" icmp_capture_install $dev "type $forward_type" mirror_test v$h1 $ip1 $ip2 $dev 100 10 @@ -99,14 +108,14 @@ test_span_dir_ips() icmp_capture_uninstall $dev } -fail_test_span_dir() -{ - fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 -} - test_span_dir() { - test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + test_span_dir_ips "$dev" "$forward_type" "$backward_type" \ + 192.0.2.1 192.0.2.2 } do_test_span_vlan_dir_ips() @@ -114,7 +123,6 @@ do_test_span_vlan_dir_ips() local expect=$1; shift local dev=$1; shift local vid=$1; shift - local direction=$1; shift local ul_proto=$1; shift local ip1=$1; shift local ip2=$1; shift @@ -123,27 +131,50 @@ do_test_span_vlan_dir_ips() # The traffic is meant for local box anyway, so will be trapped to # kernel. vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto" - mirror_test v$h1 $ip1 $ip2 $dev 100 $expect - mirror_test v$h2 $ip2 $ip1 $dev 100 $expect + mirror_test v$h1 $ip1 $ip2 $dev 100 "$expect" + mirror_test v$h2 $ip2 $ip1 $dev 100 "$expect" vlan_capture_uninstall $dev } quick_test_span_vlan_dir_ips() { - do_test_span_vlan_dir_ips 10 "$@" + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + do_test_span_vlan_dir_ips '>= 10' "$dev" "$vid" "$ul_proto" \ + "$ip1" "$ip2" } fail_test_span_vlan_dir_ips() { - do_test_span_vlan_dir_ips 0 "$@" + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + do_test_span_vlan_dir_ips 0 "$dev" "$vid" "$ul_proto" "$ip1" "$ip2" } quick_test_span_vlan_dir() { - quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + + quick_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \ + 192.0.2.1 192.0.2.2 } fail_test_span_vlan_dir() { - fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + + fail_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \ + 192.0.2.1 192.0.2.2 } diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh index 0b44e148235e..2f150a414d38 100755 --- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh @@ -40,12 +40,16 @@ setup_prepare() vlan_create $h2 111 v$h2 192.0.2.18/28 bridge vlan add dev $swp2 vid 111 + + trap_install $h3 ingress } cleanup() { pre_cleanup + trap_uninstall $h3 ingress + vlan_destroy $h2 111 vlan_destroy $h1 111 vlan_destroy $h3 555 @@ -63,11 +67,11 @@ test_vlan_dir() RET=0 - mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type" + mirror_install $swp1 $direction $swp3.555 "matchall" + test_span_dir "$h3.555" "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction - log_test "$direction mirror to vlan ($tcflags)" + log_test "$direction mirror to vlan" } test_vlan() @@ -84,14 +88,12 @@ test_tagged_vlan_dir() RET=0 - mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \ - 192.0.2.17 192.0.2.18 - do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" ip \ - 192.0.2.17 192.0.2.18 + mirror_install $swp1 $direction $swp3.555 "matchall" + do_test_span_vlan_dir_ips '>= 10' "$h3.555" 111 ip 192.0.2.17 192.0.2.18 + do_test_span_vlan_dir_ips 0 "$h3.555" 555 ip 192.0.2.17 192.0.2.18 mirror_uninstall $swp1 $direction - log_test "$direction mirror tagged to vlan ($tcflags)" + log_test "$direction mirror tagged to vlan" } test_tagged_vlan() @@ -100,32 +102,11 @@ test_tagged_vlan() test_tagged_vlan_dir egress 0 8 } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - trap_install $h3 ingress - - tests_run - - trap_uninstall $h3 ingress - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_mpath_seed.sh b/tools/testing/selftests/net/forwarding/router_mpath_seed.sh new file mode 100755 index 000000000000..314cb906c1eb --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_mpath_seed.sh @@ -0,0 +1,333 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-------------------------+ +-------------------------+ +# | H1 | | H2 | +# | $h1 + | | + $h2 | +# | 192.0.2.1/28 | | | | 192.0.2.34/28 | +# | 2001:db8:1::1/64 | | | | 2001:db8:3::2/64 | +# +-------------------|-----+ +-|-----------------------+ +# | | +# +-------------------|-----+ +-|-----------------------+ +# | R1 | | | | R2 | +# | $rp11 + | | + $rp21 | +# | 192.0.2.2/28 | | 192.0.2.33/28 | +# | 2001:db8:1::2/64 | | 2001:db8:3::1/64 | +# | | | | +# | $rp12 + | | + $rp22 | +# | 192.0.2.17/28 | | | | 192.0.2.18..27/28 | +# | 2001:db8:2::17/64 | | | | 2001:db8:2::18..27/64 | +# +-------------------|-----+ +-|-----------------------+ +# | | +# `----------' + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + test_mpath_seed_stability_ipv4 + test_mpath_seed_stability_ipv6 + test_mpath_seed_get + test_mpath_seed_ipv4 + test_mpath_seed_ipv6 +" +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + ip -4 route add 192.0.2.32/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del 192.0.2.32/28 vrf v$h1 nexthop via 192.0.2.2 + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.34/28 2001:db8:3::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.33 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:3::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:3::1 + ip -4 route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.33 + simple_if_fini $h2 192.0.2.34/28 2001:db8:3::2/64 +} + +router1_create() +{ + simple_if_init $rp11 192.0.2.2/28 2001:db8:1::2/64 + __simple_if_init $rp12 v$rp11 192.0.2.17/28 2001:db8:2::17/64 +} + +router1_destroy() +{ + __simple_if_fini $rp12 192.0.2.17/28 2001:db8:2::17/64 + simple_if_fini $rp11 192.0.2.2/28 2001:db8:1::2/64 +} + +router2_create() +{ + simple_if_init $rp21 192.0.2.33/28 2001:db8:3::1/64 + __simple_if_init $rp22 v$rp21 192.0.2.18/28 2001:db8:2::18/64 + ip -4 route add 192.0.2.0/28 vrf v$rp21 nexthop via 192.0.2.17 + ip -6 route add 2001:db8:1::/64 vrf v$rp21 nexthop via 2001:db8:2::17 +} + +router2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$rp21 nexthop via 2001:db8:2::17 + ip -4 route del 192.0.2.0/28 vrf v$rp21 nexthop via 192.0.2.17 + __simple_if_fini $rp22 192.0.2.18/28 2001:db8:2::18/64 + simple_if_fini $rp21 192.0.2.33/28 2001:db8:3::1/64 +} + +nexthops_create() +{ + local i + for i in $(seq 10); do + ip nexthop add id $((1000 + i)) via 192.0.2.18 dev $rp12 + ip nexthop add id $((2000 + i)) via 2001:db8:2::18 dev $rp12 + done + + ip nexthop add id 1000 group $(seq -s / 1001 1010) hw_stats on + ip nexthop add id 2000 group $(seq -s / 2001 2010) hw_stats on + ip -4 route add 192.0.2.32/28 vrf v$rp11 nhid 1000 + ip -6 route add 2001:db8:3::/64 vrf v$rp11 nhid 2000 +} + +nexthops_destroy() +{ + local i + + ip -6 route del 2001:db8:3::/64 vrf v$rp11 nhid 2000 + ip -4 route del 192.0.2.32/28 vrf v$rp11 nhid 1000 + ip nexthop del id 2000 + ip nexthop del id 1000 + + for i in $(seq 10 -1 1); do + ip nexthop del id $((2000 + i)) + ip nexthop del id $((1000 + i)) + done +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp11=${NETIFS[p2]} + + rp12=${NETIFS[p3]} + rp22=${NETIFS[p4]} + + rp21=${NETIFS[p5]} + h2=${NETIFS[p6]} + + sysctl_save net.ipv4.fib_multipath_hash_seed + + vrf_prepare + + h1_create + h2_create + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + nexthops_destroy + router2_destroy + router1_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + sysctl_restore net.ipv4.fib_multipath_hash_seed +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:3::2 +} + +test_mpath_seed_get() +{ + RET=0 + + local i + for ((i = 0; i < 100; i++)); do + local seed_w=$((999331 * i)) + sysctl -qw net.ipv4.fib_multipath_hash_seed=$seed_w + local seed_r=$(sysctl -n net.ipv4.fib_multipath_hash_seed) + ((seed_r == seed_w)) + check_err $? "mpath seed written as $seed_w, but read as $seed_r" + done + + log_test "mpath seed set/get" +} + +nh_stats_snapshot() +{ + local group_id=$1; shift + + ip -j -s -s nexthop show id $group_id | + jq -c '[.[].group_stats | sort_by(.id) | .[].packets]' +} + +get_active_nh() +{ + local s0=$1; shift + local s1=$1; shift + + jq -n --argjson s0 "$s0" --argjson s1 "$s1" -f /dev/stdin <<-"EOF" + [range($s0 | length)] | + map($s1[.] - $s0[.]) | + map(if . > 8 then 1 else 0 end) | + index(1) + EOF +} + +probe_nh() +{ + local group_id=$1; shift + local -a mz=("$@") + + local s0=$(nh_stats_snapshot $group_id) + "${mz[@]}" + local s1=$(nh_stats_snapshot $group_id) + + get_active_nh "$s0" "$s1" +} + +probe_seed() +{ + local group_id=$1; shift + local seed=$1; shift + local -a mz=("$@") + + sysctl -qw net.ipv4.fib_multipath_hash_seed=$seed + probe_nh "$group_id" "${mz[@]}" +} + +test_mpath_seed() +{ + local group_id=$1; shift + local what=$1; shift + local -a mz=("$@") + local ii + + RET=0 + + local -a tally=(0 0 0 0 0 0 0 0 0 0) + for ((ii = 0; ii < 100; ii++)); do + local act=$(probe_seed $group_id $((999331 * ii)) "${mz[@]}") + ((tally[act]++)) + done + + local tally_str="${tally[@]}" + for ((ii = 0; ii < ${#tally[@]}; ii++)); do + ((tally[ii] > 0)) + check_err $? "NH #$ii not hit, tally='$tally_str'" + done + + log_test "mpath seed $what" + sysctl -qw net.ipv4.fib_multipath_hash_seed=0 +} + +test_mpath_seed_ipv4() +{ + test_mpath_seed 1000 IPv4 \ + $MZ $h1 -A 192.0.2.1 -B 192.0.2.34 -q \ + -p 64 -d 0 -c 10 -t udp +} + +test_mpath_seed_ipv6() +{ + test_mpath_seed 2000 IPv6 \ + $MZ -6 $h1 -A 2001:db8:1::1 -B 2001:db8:3::2 -q \ + -p 64 -d 0 -c 10 -t udp +} + +check_mpath_seed_stability() +{ + local seed=$1; shift + local act_0=$1; shift + local act_1=$1; shift + + ((act_0 == act_1)) + check_err $? "seed $seed: active NH moved from $act_0 to $act_1 after seed change" +} + +test_mpath_seed_stability() +{ + local group_id=$1; shift + local what=$1; shift + local -a mz=("$@") + + RET=0 + + local seed_0=0 + local seed_1=3221338814 + local seed_2=3735928559 + + # Initial active NH before touching the seed at all. + local act_ini=$(probe_nh $group_id "${mz[@]}") + + local act_0_0=$(probe_seed $group_id $seed_0 "${mz[@]}") + local act_1_0=$(probe_seed $group_id $seed_1 "${mz[@]}") + local act_2_0=$(probe_seed $group_id $seed_2 "${mz[@]}") + + local act_0_1=$(probe_seed $group_id $seed_0 "${mz[@]}") + local act_1_1=$(probe_seed $group_id $seed_1 "${mz[@]}") + local act_2_1=$(probe_seed $group_id $seed_2 "${mz[@]}") + + check_mpath_seed_stability initial $act_ini $act_0_0 + check_mpath_seed_stability $seed_0 $act_0_0 $act_0_1 + check_mpath_seed_stability $seed_1 $act_1_0 $act_1_1 + check_mpath_seed_stability $seed_2 $act_2_0 $act_2_1 + + log_test "mpath seed stability $what" + sysctl -qw net.ipv4.fib_multipath_hash_seed=0 +} + +test_mpath_seed_stability_ipv4() +{ + test_mpath_seed_stability 1000 IPv4 \ + $MZ $h1 -A 192.0.2.1 -B 192.0.2.34 -q \ + -p 64 -d 0 -c 10 -t udp +} + +test_mpath_seed_stability_ipv6() +{ + test_mpath_seed_stability 2000 IPv6 \ + $MZ -6 $h1 -A 2001:db8:1::1 -B 2001:db8:3::2 -q \ + -p 64 -d 0 -c 10 -t udp +} + +trap cleanup EXIT + +setup_prepare +setup_wait +nexthops_create + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 6f0a2e452ba1..3f9d50f1ef9e 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -680,9 +680,9 @@ test_learning() local mac=de:ad:be:ef:13:37 local dst=192.0.2.100 - # Enable learning on the VxLAN device and set ageing time to 10 seconds - ip link set dev br1 type bridge ageing_time 1000 - ip link set dev vx1 type vxlan ageing 10 + # Enable learning on the VxLAN device and set ageing time to 30 seconds + ip link set dev br1 type bridge ageing_time 3000 + ip link set dev vx1 type vxlan ageing 30 ip link set dev vx1 type vxlan learning reapply_config @@ -740,7 +740,7 @@ test_learning() vxlan_flood_test $mac $dst 0 10 0 - sleep 20 + sleep 60 bridge fdb show brport vx1 | grep $mac | grep -q self check_fail $? diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 3684b813b0f6..f5d207fc770a 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -152,6 +152,15 @@ setup_hsr_interfaces() ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 + ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 + + ip -net "$ns2" link set address 00:11:22:00:02:01 dev ns2eth1 + ip -net "$ns2" link set address 00:11:22:00:02:02 dev ns2eth2 + + ip -net "$ns3" link set address 00:11:22:00:03:01 dev ns3eth1 + ip -net "$ns3" link set address 00:11:22:00:03:02 dev ns3eth2 + # All Links up ip -net "$ns1" link set ns1eth1 up ip -net "$ns1" link set ns1eth2 up diff --git a/tools/testing/selftests/net/hsr/hsr_redbox.sh b/tools/testing/selftests/net/hsr/hsr_redbox.sh index 1f36785347c0..998103502d5d 100755 --- a/tools/testing/selftests/net/hsr/hsr_redbox.sh +++ b/tools/testing/selftests/net/hsr/hsr_redbox.sh @@ -96,6 +96,21 @@ setup_hsr_interfaces() ip -n "${ns4}" link set ns4eth1 up ip -n "${ns5}" link set ns5eth1 up + ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 + ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 + + ip -net "$ns2" link set address 00:11:22:00:02:01 dev ns2eth1 + ip -net "$ns2" link set address 00:11:22:00:02:02 dev ns2eth2 + ip -net "$ns2" link set address 00:11:22:00:02:03 dev ns2eth3 + + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth1 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth2 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth3 + ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3br1 + + ip -net "$ns4" link set address 00:11:22:00:04:01 dev ns4eth1 + ip -net "$ns5" link set address 00:11:22:00:05:01 dev ns5eth1 + ip -net "${ns1}" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version ${HSRv} proto 0 ip -net "${ns2}" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 interlink ns2eth3 supervision 45 version ${HSRv} proto 0 diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 9155c914c064..d0219032f773 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -125,28 +125,36 @@ slowwait_for_counter() slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" } +remove_ns_list() +{ + local item=$1 + local ns + local ns_list=("${NS_LIST[@]}") + NS_LIST=() + + for ns in "${ns_list[@]}"; do + if [ "${ns}" != "${item}" ]; then + NS_LIST+=("${ns}") + fi + done +} + cleanup_ns() { local ns="" - local errexit=0 local ret=0 - # disable errexit temporary - if [[ $- =~ "e" ]]; then - errexit=1 - set +e - fi - for ns in "$@"; do [ -z "${ns}" ] && continue - ip netns delete "${ns}" &> /dev/null + ip netns delete "${ns}" &> /dev/null || true if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" ret=1 + else + remove_ns_list "${ns}" fi done - [ $errexit -eq 1 ] && set -e return $ret } @@ -159,29 +167,30 @@ cleanup_all_ns() # setup_ns local remote setup_ns() { - local ns="" local ns_name="" local ns_list=() - local ns_exist= for ns_name in "$@"; do + # avoid conflicts with local var: internal error + if [ "${ns_name}" = "ns_name" ]; then + echo "Failed to setup namespace '${ns_name}': invalid name" + cleanup_ns "${ns_list[@]}" + exit $ksft_fail + fi + # Some test may setup/remove same netns multi times - if unset ${ns_name} 2> /dev/null; then - ns="${ns_name,,}-$(mktemp -u XXXXXX)" - eval readonly ${ns_name}="$ns" - ns_exist=false + if [ -z "${!ns_name}" ]; then + eval "${ns_name}=${ns_name,,}-$(mktemp -u XXXXXX)" else - eval ns='$'${ns_name} - cleanup_ns "$ns" - ns_exist=true + cleanup_ns "${!ns_name}" fi - if ! ip netns add "$ns"; then + if ! ip netns add "${!ns_name}"; then echo "Failed to create namespace $ns_name" cleanup_ns "${ns_list[@]}" return $ksft_skip fi - ip -n "$ns" link set lo up - ! $ns_exist && ns_list+=("$ns") + ip -n "${!ns_name}" link set lo up + ns_list+=("${!ns_name}") done NS_LIST+=("${ns_list[@]}") } @@ -190,10 +199,10 @@ tc_rule_stats_get() { local dev=$1; shift local pref=$1; shift - local dir=$1; shift + local dir=${1:-ingress}; shift local selector=${1:-.packets}; shift - tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \ + tc -j -s filter show dev $dev $dir pref $pref \ | jq ".[1].options.actions[].stats$selector" } diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 4769b4eb1ea1..f26c20df9db4 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -6,6 +6,7 @@ import sys import time import traceback from .consts import KSFT_MAIN_NAME +from .utils import global_defer_queue KSFT_RESULT = None KSFT_RESULT_ALL = True @@ -57,6 +58,11 @@ def ksft_ge(a, b, comment=""): _fail("Check failed", a, "<", b, comment) +def ksft_lt(a, b, comment=""): + if a >= b: + _fail("Check failed", a, ">=", b, comment) + + class ksft_raises: def __init__(self, expected_type): self.exception = None @@ -103,6 +109,24 @@ def ktap_result(ok, cnt=1, case="", comment=""): print(res) +def ksft_flush_defer(): + global KSFT_RESULT + + i = 0 + qlen_start = len(global_defer_queue) + while global_defer_queue: + i += 1 + entry = global_defer_queue.pop() + try: + entry.exec_only() + except: + ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!") + tb = traceback.format_exc() + for line in tb.strip().split('\n'): + ksft_pr("Defer Exception|", line) + KSFT_RESULT = False + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or [] @@ -122,32 +146,41 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): global KSFT_RESULT cnt = 0 + stop = False for case in cases: KSFT_RESULT = True cnt += 1 + comment = "" + cnt_key = "" + try: case(*args) except KsftSkipEx as e: - ktap_result(True, cnt, case, comment="SKIP " + str(e)) - totals['skip'] += 1 - continue + comment = "SKIP " + str(e) + cnt_key = 'skip' except KsftXfailEx as e: - ktap_result(True, cnt, case, comment="XFAIL " + str(e)) - totals['xfail'] += 1 - continue - except Exception as e: + comment = "XFAIL " + str(e) + cnt_key = 'xfail' + except BaseException as e: + stop |= isinstance(e, KeyboardInterrupt) tb = traceback.format_exc() for line in tb.strip().split('\n'): ksft_pr("Exception|", line) - ktap_result(False, cnt, case) - totals['fail'] += 1 - continue - - ktap_result(KSFT_RESULT, cnt, case) - if KSFT_RESULT: - totals['pass'] += 1 - else: - totals['fail'] += 1 + if stop: + ksft_pr("Stopping tests due to KeyboardInterrupt.") + KSFT_RESULT = False + cnt_key = 'fail' + + ksft_flush_defer() + + if not cnt_key: + cnt_key = 'pass' if KSFT_RESULT else 'fail' + + ktap_result(KSFT_RESULT, cnt, case, comment=comment) + totals[cnt_key] += 1 + + if stop: + break print( f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 0540ea24921d..72590c3f90f1 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -1,12 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 +import errno import json as _json import random import re +import socket import subprocess import time +class CmdExitFailure(Exception): + pass + + class cmd: def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5): if ns: @@ -41,8 +47,8 @@ class cmd: if self.proc.returncode != 0 and fail: if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] - raise Exception("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % - (self.proc.args, stdout, stderr)) + raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % + (self.proc.args, stdout, stderr)) class bkg(cmd): @@ -60,6 +66,40 @@ class bkg(cmd): return self.process(terminate=self.terminate, fail=self.check_fail) +global_defer_queue = [] + + +class defer: + def __init__(self, func, *args, **kwargs): + global global_defer_queue + + if not callable(func): + raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") + + self.func = func + self.args = args + self.kwargs = kwargs + + self._queue = global_defer_queue + self._queue.append(self) + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + return self.exec() + + def exec_only(self): + self.func(*self.args, **self.kwargs) + + def cancel(self): + self._queue.remove(self) + + def exec(self): + self.cancel() + self.exec_only() + + def tool(name, args, json=None, ns=None, host=None): cmd_str = name + ' ' if json: @@ -77,11 +117,24 @@ def ip(args, json=None, ns=None, host=None): return tool('ip', args, json=json, host=host) +def ethtool(args, json=None, ns=None, host=None): + return tool('ethtool', args, json=json, ns=ns, host=host) + + def rand_port(): """ - Get unprivileged port, for now just random, one day we may decide to check if used. + Get a random unprivileged port, try to make sure it's not already used. """ - return random.randint(10000, 65535) + for _ in range(1000): + port = random.randint(10000, 65535) + try: + with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: + s.bind(("", port)) + return port + except OSError as e: + if e.errno != errno.EADDRINUSE: + raise + raise Exception("Can't find any free unprivileged port") def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5): diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2b66c5fa71eb..108aeeb84ef1 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2249,9 +2249,10 @@ remove_tests() if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal - pm_nl_set_limits $ns2 3 3 + pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 6ffa9b7a3260..438280e68434 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -1,6 +1,9 @@ #! /bin/bash # SPDX-License-Identifier: GPL-2.0 +. "$(dirname "${0}")/../lib.sh" +. "$(dirname "${0}")/../net_helper.sh" + readonly KSFT_PASS=0 readonly KSFT_FAIL=1 readonly KSFT_SKIP=4 @@ -361,20 +364,7 @@ mptcp_lib_check_transfer() { # $1: ns, $2: port mptcp_lib_wait_local_port_listen() { - local listener_ns="${1}" - local port="${2}" - - local port_hex - port_hex="$(printf "%04X" "${port}")" - - local _ - for _ in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \ - {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done + wait_local_port_listen "${@}" "tcp" } mptcp_lib_check_output() { @@ -438,17 +428,13 @@ mptcp_lib_check_tools() { } mptcp_lib_ns_init() { - local sec rndh - - sec=$(date +%s) - rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX) + if ! setup_ns "${@}"; then + mptcp_lib_pr_fail "Failed to setup namespaces ${*}" + exit ${KSFT_FAIL} + fi local netns for netns in "${@}"; do - eval "${netns}=${netns}-${rndh}" - - ip netns add "${!netns}" || exit ${KSFT_SKIP} - ip -net "${!netns}" link set lo up ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 @@ -456,9 +442,10 @@ mptcp_lib_ns_init() { } mptcp_lib_ns_exit() { + cleanup_ns "${@}" + local netns for netns in "${@}"; do - ip netns del "${netns}" rm -f /tmp/"${netns}".{nstat,out} done } diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 9e2981f2d7f5..9cb05978269d 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -160,10 +160,12 @@ make_connection() local is_v6=$1 local app_port=$app4_port local connect_addr="10.0.1.1" + local client_addr="10.0.1.2" local listen_addr="0.0.0.0" if [ "$is_v6" = "v6" ] then connect_addr="dead:beef:1::1" + client_addr="dead:beef:1::2" listen_addr="::" app_port=$app6_port else @@ -206,6 +208,7 @@ make_connection() [ "$server_serverside" = 1 ] then test_pass + print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}" else test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" mptcp_lib_result_print_all_tap @@ -297,7 +300,7 @@ test_announce() ip netns exec "$ns2"\ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ ns2eth1 - print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port" + print_test "ADD_ADDR id:client 10.0.2.2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ "$client4_port" @@ -306,7 +309,7 @@ test_announce() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann\ dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 - print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port" + print_test "ADD_ADDR6 id:client dead:beef:2::2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ "$client_addr_id" "$client6_port" "v6" @@ -316,7 +319,7 @@ test_announce() client_addr_id=$((client_addr_id+1)) ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ $client_addr_id dev ns2eth1 port $new4_port - print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port" + print_test "ADD_ADDR id:client+1 10.0.2.2 (ns2) => ns1, new port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" @@ -327,7 +330,7 @@ test_announce() # ADD_ADDR from the server to client machine reusing the subflow port ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR id:server 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$app4_port" @@ -336,7 +339,7 @@ test_announce() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR6 id:server dead:beef:2::1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ "$server_addr_id" "$app6_port" "v6" @@ -346,7 +349,7 @@ test_announce() server_addr_id=$((server_addr_id+1)) ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ $server_addr_id dev ns1eth2 port $new4_port - print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port" + print_test "ADD_ADDR id:server+1 10.0.2.1 (ns1) => ns2, new port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" @@ -380,7 +383,7 @@ test_remove() local invalid_token=$(( client4_token - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\ $client_addr_id > /dev/null 2>&1 - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" + print_test "RM_ADDR id:client ns2 => ns1, invalid token" local type type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] @@ -394,7 +397,7 @@ test_remove() local invalid_id=$(( client_addr_id + 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $invalid_id > /dev/null 2>&1 - print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" + print_test "RM_ADDR id:client+1 ns2 => ns1, invalid id" type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then @@ -407,7 +410,7 @@ test_remove() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR id:client ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -416,7 +419,7 @@ test_remove() client_addr_id=$(( client_addr_id - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $client_addr_id - print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR id:client-1 ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -424,7 +427,7 @@ test_remove() :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ $client_addr_id - print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1" + print_test "RM_ADDR6 id:client-1 ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" @@ -434,7 +437,7 @@ test_remove() # RM_ADDR from the server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id - print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR id:server ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -443,7 +446,7 @@ test_remove() server_addr_id=$(( server_addr_id - 1 )) ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ $server_addr_id - print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR id:server-1 ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -451,7 +454,7 @@ test_remove() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ $server_addr_id - print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2" + print_test "RM_ADDR6 id:server-1 ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" } @@ -479,8 +482,14 @@ verify_subflow_events() local locid local remid local info + local e_dport_txt - info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" + # only display the fixed ports + if [ "${e_dport}" -ge "${app4_port}" ] && [ "${e_dport}" -le "${app6_port}" ]; then + e_dport_txt=":${e_dport}" + fi + + info="${e_saddr} (${e_from}) => ${e_daddr}${e_dport_txt} (${e_to})" if [ "$e_type" = "$SUB_ESTABLISHED" ] then @@ -766,7 +775,7 @@ test_subflows_v4_v6_mix() :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ $server_addr_id dev ns1eth2 - print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" + print_test "ADD_ADDR4 id:server 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ "$server_addr_id" "$app6_port" @@ -861,7 +870,7 @@ test_listener() local listener_pid=$! sleep 0.5 - print_test "CREATE_LISTENER 10.0.2.2:$client4_port" + print_test "CREATE_LISTENER 10.0.2.2 (client port)" verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port # ADD_ADDR from client to server machine reusing the subflow port @@ -878,13 +887,14 @@ test_listener() mptcp_lib_kill_wait $listener_pid sleep 0.5 - print_test "CLOSE_LISTENER 10.0.2.2:$client4_port" + print_test "CLOSE_LISTENER 10.0.2.2 (client port)" verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port } print_title "Make connections" make_connection make_connection "v6" +print_title "Will be using address IDs ${client_addr_id} (client) and ${server_addr_id} (server)" test_announce test_remove diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index bdc03a2097e8..7ea5fb28c93d 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -85,6 +85,7 @@ static bool cfg_rx; static int cfg_runtime_ms = 4200; static int cfg_verbose; static int cfg_waittime_ms = 500; +static int cfg_notification_limit = 32; static bool cfg_zerocopy; static socklen_t cfg_alen; @@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET]; static long packets, bytes, completions, expected_completions; static int zerocopied = -1; static uint32_t next_completion; +static uint32_t sends_since_notify; static unsigned long gettimeofday_ms(void) { @@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain) error(1, errno, "send"); if (cfg_verbose && ret != len) fprintf(stderr, "send: ret=%u != %u\n", ret, len); + sends_since_notify++; if (len) { packets++; @@ -435,7 +438,7 @@ static bool do_recv_completion(int fd, int domain) /* Detect notification gaps. These should not happen often, if at all. * Gaps can occur due to drops, reordering and retransmissions. */ - if (lo != next_completion) + if (cfg_verbose && lo != next_completion) fprintf(stderr, "gap: %u..%u does not append to %u\n", lo, hi, next_completion); next_completion = hi + 1; @@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain) static void do_recv_completions(int fd, int domain) { while (do_recv_completion(fd, domain)) {} + sends_since_notify = 0; } /* Wait for all remaining completions on the errqueue */ @@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol) else do_sendmsg(fd, &msg, cfg_zerocopy, domain); + if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit) + do_recv_completions(fd, domain); + while (!do_poll(fd, POLLOUT)) { if (cfg_zerocopy) do_recv_completions(fd, domain); @@ -708,7 +715,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) { + while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -736,6 +743,9 @@ static void parse_opts(int argc, char **argv) if (cfg_ifindex == 0) error(1, errno, "invalid iface: %s", optarg); break; + case 'l': + cfg_notification_limit = strtoul(optarg, NULL, 0); + break; case 'm': cfg_cork_mixed = true; break; diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 8538f08c64c2..c61d23a8c88d 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -375,6 +375,42 @@ EOF wait 2>/dev/null } +test_queue_removal() +{ + read tainted_then < /proc/sys/kernel/tainted + + ip netns exec "$ns1" nft -f - <<EOF +flush ruleset +table ip filter { + chain output { + type filter hook output priority 0; policy accept; + ip protocol icmp queue num 0 + } +} +EOF + ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0 + + ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null + kill $nfqpid + + ip netns exec "$ns1" nft flush ruleset + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + if [ "$tainted_now" -eq 0 ];then + echo "PASS: queue program exiting while packets queued" + else + echo "TAINT: queue program exiting while packets queued" + ret=1 + fi +} + ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null @@ -413,5 +449,6 @@ test_tcp_localhost test_tcp_localhost_connectclose test_tcp_localhost_requeue test_icmp_vrf +test_queue_removal exit $ret diff --git a/tools/testing/selftests/net/netns-sysctl.sh b/tools/testing/selftests/net/netns-sysctl.sh new file mode 100755 index 000000000000..45c34a3b9aae --- /dev/null +++ b/tools/testing/selftests/net/netns-sysctl.sh @@ -0,0 +1,40 @@ +#!/bin/bash -e +# SPDX-License-Identifier: GPL-2.0 +# +# This test checks that the network buffer sysctls are present +# in a network namespaces, and that they are readonly. + +source lib.sh + +cleanup() { + cleanup_ns $test_ns +} + +trap cleanup EXIT + +fail() { + echo "ERROR: $*" >&2 + exit 1 +} + +setup_ns test_ns + +for sc in {r,w}mem_{default,max}; do + # check that this is writable in a netns + [ -w "/proc/sys/net/core/$sc" ] || + fail "$sc isn't writable in the init netns!" + + # change the value in the host netns + sysctl -qw "net.core.$sc=300000" || + fail "Can't write $sc in init netns!" + + # check that the value is read from the init netns + [ "$(ip netns exec $test_ns sysctl -n "net.core.$sc")" -eq 300000 ] || + fail "Value for $sc mismatch!" + + # check that this isn't writable in a netns + ip netns exec $test_ns [ -w "/proc/sys/net/core/$sc" ] && + fail "$sc is writable in a netns!" +done + +echo 'Test passed OK' diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 5cae53543849..cc0bfae2bafa 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # OVS kernel module self tests @@ -11,6 +11,11 @@ ksft_skip=4 PAUSE_ON_FAIL=no VERBOSE=0 TRACING=0 +WAIT_TIMEOUT=5 + +if test "X$KSFT_MACHINE_SLOW" == "Xyes"; then + WAIT_TIMEOUT=10 +fi tests=" arp_ping eth-arp: Basic arp ping between two NS @@ -20,10 +25,37 @@ tests=" nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces - drop_reason drop: test drop reasons are emitted" + drop_reason drop: test drop reasons are emitted + psample psample: Sampling packets with psample" info() { - [ $VERBOSE = 0 ] || echo $* + [ "${ovs_dir}" != "" ] && + echo "`date +"[%m-%d %H:%M:%S]"` $*" >> ${ovs_dir}/debug.log + [ $VERBOSE = 0 ] || echo $* +} + +ovs_wait() { + info "waiting $WAIT_TIMEOUT s for: $@" + + if "$@" ; then + info "wait succeeded immediately" + return 0 + fi + + # A quick re-check helps speed up small races in fast systems. + # However, fractional sleeps might not necessarily work. + local start=0 + sleep 0.1 || { sleep 1; start=1; } + + for (( i=start; i<WAIT_TIMEOUT; i++ )); do + if "$@" ; then + info "wait succeeded after $i seconds" + return 0 + fi + sleep 1 + done + info "wait failed after $i seconds" + return 1 } ovs_base=`pwd` @@ -65,7 +97,8 @@ ovs_setenv() { ovs_sbx() { if test "X$2" != X; then - (ovs_setenv $1; shift; "$@" >> ${ovs_dir}/debug.log) + (ovs_setenv $1; shift; + info "run cmd: $@"; "$@" >> ${ovs_dir}/debug.log) else ovs_setenv $1 fi @@ -102,12 +135,21 @@ ovs_netns_spawn_daemon() { shift netns=$1 shift - info "spawning cmd: $*" - ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & + if [ "$netns" == "_default" ]; then + $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & + else + ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & + fi pid=$! ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null" } +ovs_spawn_daemon() { + sbx=$1 + shift + ovs_netns_spawn_daemon $sbx "_default" $* +} + ovs_add_netns_and_veths () { info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}" ovs_sbx "$1" ip netns add "$3" || return 1 @@ -139,7 +181,7 @@ ovs_add_flow () { info "Adding flow to DP: sbx:$1 br:$2 flow:$3 act:$4" ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-flow "$2" "$3" "$4" if [ $? -ne 0 ]; then - echo "Flow [ $3 : $4 ] failed" >> ${ovs_dir}/debug.log + info "Flow [ $3 : $4 ] failed" return 1 fi return 0 @@ -170,6 +212,19 @@ ovs_drop_reason_count() return `echo "$perf_output" | grep "$pattern" | wc -l` } +ovs_test_flow_fails () { + ERR_MSG="Flow actions may not be safe on all matching packets" + + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow $@ &> /dev/null $@ && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + + if [ "$PRE_TEST" == "$POST_TEST" ]; then + return 1 + fi + return 0 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." @@ -184,6 +239,91 @@ usage() { exit 1 } + +# psample test +# - use psample to observe packets +test_psample() { + sbx_add "test_psample" || return $? + + # Add a datapath with per-vport dispatching. + ovs_add_dp "test_psample" psample -V 2:1 || return 1 + + info "create namespaces" + ovs_add_netns_and_veths "test_psample" "psample" \ + client c0 c1 172.31.110.10/24 -u || return 1 + ovs_add_netns_and_veths "test_psample" "psample" \ + server s0 s1 172.31.110.20/24 -u || return 1 + + # Check if psample actions can be configured. + ovs_add_flow "test_psample" psample \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'psample(group=1)' &> /dev/null + if [ $? == 1 ]; then + info "no support for psample - skipping" + ovs_exit_sig + return $ksft_skip + fi + + ovs_del_flows "test_psample" psample + + # Test action verification. + OLDIFS=$IFS + IFS='*' + min_key='in_port(1),eth(),eth_type(0x0800),ipv4()' + for testcase in \ + "cookie to large"*"psample(group=1,cookie=1615141312111009080706050403020100)" \ + "no group with cookie"*"psample(cookie=abcd)" \ + "no group"*"psample()"; + do + set -- $testcase; + ovs_test_flow_fails "test_psample" psample $min_key $2 + if [ $? == 1 ]; then + info "failed - $1" + return 1 + fi + done + IFS=$OLDIFS + + ovs_del_flows "test_psample" psample + # Allow ARP + ovs_add_flow "test_psample" psample \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_psample" psample \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + + # Sample first 14 bytes of all traffic. + ovs_add_flow "test_psample" psample \ + "in_port(1),eth(),eth_type(0x0800),ipv4()" \ + "trunc(14),psample(group=1,cookie=c0ffee),2" + + # Sample all traffic. In this case, use a sample() action with both + # psample and an upcall emulating simultaneous local sampling and + # sFlow / IPFIX. + nlpid=$(grep -E "listening on upcall packet handler" \ + $ovs_dir/s0.out | cut -d ":" -f 2 | tr -d ' ') + + ovs_add_flow "test_psample" psample \ + "in_port(2),eth(),eth_type(0x0800),ipv4()" \ + "sample(sample=100%,actions(psample(group=2,cookie=eeff0c),userspace(pid=${nlpid},userdata=eeff0c))),1" + + # Record psample data. + ovs_spawn_daemon "test_psample" python3 $ovs_base/ovs-dpctl.py psample-events + ovs_wait grep -q "listening for psample events" ${ovs_dir}/stdout + + # Send a single ping. + ovs_sbx "test_psample" ip netns exec client ping -I c1 172.31.110.20 -c 1 || return 1 + + # We should have received one userspace action upcall and 2 psample packets. + ovs_wait grep -q "userspace action command" $ovs_dir/s0.out || return 1 + + # client -> server samples should only contain the first 14 bytes of the packet. + ovs_wait grep -qE "rate:4294967295,group:1,cookie:c0ffee data:[0-9a-f]{28}$" \ + $ovs_dir/stdout || return 1 + + ovs_wait grep -q "rate:4294967295,group:2,cookie:eeff0c" $ovs_dir/stdout || return 1 + + return 0 +} + # drop_reason test # - drop packets and verify the right drop reason is reported test_drop_reason() { @@ -599,7 +739,8 @@ test_upcall_interfaces() { ovs_add_netns_and_veths "test_upcall_interfaces" ui0 upc left0 l0 \ 172.31.110.1/24 -u || return 1 - sleep 1 + ovs_wait grep -q "listening on upcall packet handler" ${ovs_dir}/left0.out + info "sending arping" ip netns exec upc arping -I l0 172.31.110.20 -c 1 \ >$ovs_dir/arping.stdout 2>$ovs_dir/arping.stderr @@ -613,16 +754,20 @@ run_test() { tname="$1" tdesc="$2" - if ! lsmod | grep openvswitch >/dev/null 2>&1; then - stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}" - return $ksft_skip - fi - if python3 ovs-dpctl.py -h 2>&1 | \ grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}" return $ksft_skip fi + + python3 ovs-dpctl.py show >/dev/null 2>&1 || \ + echo "[DPCTL] show exception." + + if ! lsmod | grep openvswitch >/dev/null 2>&1; then + stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}" + return $ksft_skip + fi + printf "TEST: %-60s [START]\n" "${tname}" unset IFS diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 1dd057afd3fb..8a0396bfaf99 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -8,8 +8,10 @@ import argparse import errno import ipaddress import logging +import math import multiprocessing import re +import socket import struct import sys import time @@ -26,13 +28,16 @@ try: from pyroute2.netlink import genlmsg from pyroute2.netlink import nla from pyroute2.netlink import nlmsg_atoms + from pyroute2.netlink.event import EventSocket from pyroute2.netlink.exceptions import NetlinkError from pyroute2.netlink.generic import GenericNetlinkSocket + from pyroute2.netlink.nlsocket import Marshal import pyroute2 + import pyroute2.iproute except ModuleNotFoundError: print("Need to install the python pyroute2 package >= 0.6.") - sys.exit(0) + sys.exit(1) OVS_DATAPATH_FAMILY = "ovs_datapath" @@ -58,6 +63,7 @@ OVS_FLOW_CMD_DEL = 2 OVS_FLOW_CMD_GET = 3 OVS_FLOW_CMD_SET = 4 +UINT32_MAX = 0xFFFFFFFF def macstr(mac): outstr = ":".join(["%02X" % i for i in mac]) @@ -198,6 +204,18 @@ def convert_ipv4(data): return int(ipaddress.IPv4Address(ip)), int(ipaddress.IPv4Address(mask)) +def convert_ipv6(data): + ip, _, mask = data.partition('/') + + if not ip: + ip = mask = 0 + elif not mask: + mask = 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' + elif mask.isdigit(): + mask = ipaddress.IPv6Network("::/" + mask).hostmask + + return ipaddress.IPv6Address(ip).packed, ipaddress.IPv6Address(mask).packed + def convert_int(size): def convert_int_sized(data): value, _, mask = data.partition('/') @@ -267,6 +285,75 @@ def parse_extract_field( return str_skipped, data +def parse_attrs(actstr, attr_desc): + """Parses the given action string and returns a list of netlink + attributes based on a list of attribute descriptions. + + Each element in the attribute description list is a tuple such as: + (name, attr_name, parse_func) + where: + name: is the string representing the attribute + attr_name: is the name of the attribute as defined in the uAPI. + parse_func: is a callable accepting a string and returning either + a single object (the parsed attribute value) or a tuple of + two values (the parsed attribute value and the remaining string) + + Returns a list of attributes and the remaining string. + """ + def parse_attr(actstr, key, func): + actstr = actstr[len(key) :] + + if not func: + return None, actstr + + delim = actstr[0] + actstr = actstr[1:] + + if delim == "=": + pos = strcspn(actstr, ",)") + ret = func(actstr[:pos]) + else: + ret = func(actstr) + + if isinstance(ret, tuple): + (datum, actstr) = ret + else: + datum = ret + actstr = actstr[strcspn(actstr, ",)"):] + + if delim == "(": + if not actstr or actstr[0] != ")": + raise ValueError("Action contains unbalanced parentheses") + + actstr = actstr[1:] + + actstr = actstr[strspn(actstr, ", ") :] + + return datum, actstr + + attrs = [] + attr_desc = list(attr_desc) + while actstr and actstr[0] != ")" and attr_desc: + found = False + for i, (key, attr, func) in enumerate(attr_desc): + if actstr.startswith(key): + datum, actstr = parse_attr(actstr, key, func) + attrs.append([attr, datum]) + found = True + del attr_desc[i] + + if not found: + raise ValueError("Unknown attribute: '%s'" % actstr) + + actstr = actstr[strspn(actstr, ", ") :] + + if actstr[0] != ")": + raise ValueError("Action string contains extra garbage or has " + "unbalanced parenthesis: '%s'" % actstr) + + return attrs, actstr[1:] + + class ovs_dp_msg(genlmsg): # include the OVS version # We need a custom header rather than just being able to rely on @@ -282,15 +369,15 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_UNSPEC", "none"), ("OVS_ACTION_ATTR_OUTPUT", "uint32"), ("OVS_ACTION_ATTR_USERSPACE", "userspace"), - ("OVS_ACTION_ATTR_SET", "none"), + ("OVS_ACTION_ATTR_SET", "ovskey"), ("OVS_ACTION_ATTR_PUSH_VLAN", "none"), ("OVS_ACTION_ATTR_POP_VLAN", "flag"), - ("OVS_ACTION_ATTR_SAMPLE", "none"), + ("OVS_ACTION_ATTR_SAMPLE", "sample"), ("OVS_ACTION_ATTR_RECIRC", "uint32"), ("OVS_ACTION_ATTR_HASH", "none"), ("OVS_ACTION_ATTR_PUSH_MPLS", "none"), ("OVS_ACTION_ATTR_POP_MPLS", "flag"), - ("OVS_ACTION_ATTR_SET_MASKED", "none"), + ("OVS_ACTION_ATTR_SET_MASKED", "ovskey"), ("OVS_ACTION_ATTR_CT", "ctact"), ("OVS_ACTION_ATTR_TRUNC", "uint32"), ("OVS_ACTION_ATTR_PUSH_ETH", "none"), @@ -304,8 +391,85 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"), ("OVS_ACTION_ATTR_DROP", "uint32"), + ("OVS_ACTION_ATTR_PSAMPLE", "psample"), ) + class psample(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_PSAMPLE_ATTR_UNSPEC", "none"), + ("OVS_PSAMPLE_ATTR_GROUP", "uint32"), + ("OVS_PSAMPLE_ATTR_COOKIE", "array(uint8)"), + ) + + def dpstr(self, more=False): + args = "group=%d" % self.get_attr("OVS_PSAMPLE_ATTR_GROUP") + + cookie = self.get_attr("OVS_PSAMPLE_ATTR_COOKIE") + if cookie: + args += ",cookie(%s)" % \ + "".join(format(x, "02x") for x in cookie) + + return "psample(%s)" % args + + def parse(self, actstr): + desc = ( + ("group", "OVS_PSAMPLE_ATTR_GROUP", int), + ("cookie", "OVS_PSAMPLE_ATTR_COOKIE", + lambda x: list(bytearray.fromhex(x))) + ) + + attrs, actstr = parse_attrs(actstr, desc) + + for attr in attrs: + self["attrs"].append(attr) + + return actstr + + class sample(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_SAMPLE_ATTR_UNSPEC", "none"), + ("OVS_SAMPLE_ATTR_PROBABILITY", "uint32"), + ("OVS_SAMPLE_ATTR_ACTIONS", "ovsactions"), + ) + + def dpstr(self, more=False): + args = [] + + args.append("sample={:.2f}%".format( + 100 * self.get_attr("OVS_SAMPLE_ATTR_PROBABILITY") / + UINT32_MAX)) + + actions = self.get_attr("OVS_SAMPLE_ATTR_ACTIONS") + if actions: + args.append("actions(%s)" % actions.dpstr(more)) + + return "sample(%s)" % ",".join(args) + + def parse(self, actstr): + def parse_nested_actions(actstr): + subacts = ovsactions() + parsed_len = subacts.parse(actstr) + return subacts, actstr[parsed_len :] + + def percent_to_rate(percent): + percent = float(percent.strip('%')) + return int(math.floor(UINT32_MAX * (percent / 100.0) + .5)) + + desc = ( + ("sample", "OVS_SAMPLE_ATTR_PROBABILITY", percent_to_rate), + ("actions", "OVS_SAMPLE_ATTR_ACTIONS", parse_nested_actions), + ) + attrs, actstr = parse_attrs(actstr, desc) + + for attr in attrs: + self["attrs"].append(attr) + + return actstr + class ctact(nla): nla_flags = NLA_F_NESTED @@ -427,50 +591,77 @@ class ovsactions(nla): print_str += "userdata=" for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"): print_str += "%x." % f - if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None: + if self.get_attr("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT") is not None: print_str += "egress_tun_port=%d" % self.get_attr( - "OVS_USERSPACE_ATTR_TUN_PORT" + "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT" ) print_str += ")" return print_str + def parse(self, actstr): + attrs_desc = ( + ("pid", "OVS_USERSPACE_ATTR_PID", int), + ("userdata", "OVS_USERSPACE_ATTR_USERDATA", + lambda x: list(bytearray.fromhex(x))), + ("egress_tun_port", "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", int) + ) + + attrs, actstr = parse_attrs(actstr, attrs_desc) + for attr in attrs: + self["attrs"].append(attr) + + return actstr + def dpstr(self, more=False): print_str = "" - for field in self.nla_map: + for field in self["attrs"]: if field[1] == "none" or self.get_attr(field[0]) is None: continue if print_str != "": print_str += "," - if field[1] == "uint32": - if field[0] == "OVS_ACTION_ATTR_OUTPUT": - print_str += "%d" % int(self.get_attr(field[0])) - elif field[0] == "OVS_ACTION_ATTR_RECIRC": - print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) - elif field[0] == "OVS_ACTION_ATTR_TRUNC": - print_str += "trunc(%d)" % int(self.get_attr(field[0])) - elif field[0] == "OVS_ACTION_ATTR_DROP": - print_str += "drop(%d)" % int(self.get_attr(field[0])) - elif field[1] == "flag": - if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": - print_str += "ct_clear" - elif field[0] == "OVS_ACTION_ATTR_POP_VLAN": - print_str += "pop_vlan" - elif field[0] == "OVS_ACTION_ATTR_POP_ETH": - print_str += "pop_eth" - elif field[0] == "OVS_ACTION_ATTR_POP_NSH": - print_str += "pop_nsh" - elif field[0] == "OVS_ACTION_ATTR_POP_MPLS": - print_str += "pop_mpls" + if field[0] == "OVS_ACTION_ATTR_OUTPUT": + print_str += "%d" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_RECIRC": + print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_TRUNC": + print_str += "trunc(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_DROP": + print_str += "drop(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_CT_CLEAR": + print_str += "ct_clear" + elif field[0] == "OVS_ACTION_ATTR_POP_VLAN": + print_str += "pop_vlan" + elif field[0] == "OVS_ACTION_ATTR_POP_ETH": + print_str += "pop_eth" + elif field[0] == "OVS_ACTION_ATTR_POP_NSH": + print_str += "pop_nsh" + elif field[0] == "OVS_ACTION_ATTR_POP_MPLS": + print_str += "pop_mpls" else: datum = self.get_attr(field[0]) if field[0] == "OVS_ACTION_ATTR_CLONE": print_str += "clone(" print_str += datum.dpstr(more) print_str += ")" + elif field[0] == "OVS_ACTION_ATTR_SET" or \ + field[0] == "OVS_ACTION_ATTR_SET_MASKED": + print_str += "set" + field = datum + mask = None + if field[0] == "OVS_ACTION_ATTR_SET_MASKED": + print_str += "_masked" + field = datum[0] + mask = datum[1] + print_str += "(" + print_str += field.dpstr(mask, more) + print_str += ")" else: - print_str += datum.dpstr(more) + try: + print_str += datum.dpstr(more) + except: + print_str += "{ATTR: %s not decoded}" % field[0] return print_str @@ -531,7 +722,7 @@ class ovsactions(nla): for flat_act in parse_flat_map: if parse_starts_block(actstr, flat_act[0], False): actstr = actstr[len(flat_act[0]):] - self["attrs"].append([flat_act[1]]) + self["attrs"].append([flat_act[1], True]) actstr = actstr[strspn(actstr, ", ") :] parsed = True @@ -544,6 +735,25 @@ class ovsactions(nla): self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts)) actstr = actstr[parsedLen:] parsed = True + elif parse_starts_block(actstr, "set(", False): + parencount += 1 + k = ovskey() + actstr = actstr[len("set("):] + actstr = k.parse(actstr, None) + self["attrs"].append(("OVS_ACTION_ATTR_SET", k)) + if not actstr.startswith(")"): + actstr = ")" + actstr + parsed = True + elif parse_starts_block(actstr, "set_masked(", False): + parencount += 1 + k = ovskey() + m = ovskey() + actstr = actstr[len("set_masked("):] + actstr = k.parse(actstr, m) + self["attrs"].append(("OVS_ACTION_ATTR_SET_MASKED", [k, m])) + if not actstr.startswith(")"): + actstr = ")" + actstr + parsed = True elif parse_starts_block(actstr, "ct(", False): parencount += 1 actstr = actstr[len("ct(") :] @@ -637,6 +847,37 @@ class ovsactions(nla): self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact]) parsed = True + elif parse_starts_block(actstr, "sample(", False): + sampleact = self.sample() + actstr = sampleact.parse(actstr[len("sample(") : ]) + self["attrs"].append(["OVS_ACTION_ATTR_SAMPLE", sampleact]) + parsed = True + + elif parse_starts_block(actstr, "psample(", False): + psampleact = self.psample() + actstr = psampleact.parse(actstr[len("psample(") : ]) + self["attrs"].append(["OVS_ACTION_ATTR_PSAMPLE", psampleact]) + parsed = True + + elif parse_starts_block(actstr, "userspace(", False): + uact = self.userspace() + actstr = uact.parse(actstr[len("userspace(") : ]) + self["attrs"].append(["OVS_ACTION_ATTR_USERSPACE", uact]) + parsed = True + + elif parse_starts_block(actstr, "trunc(", False): + parencount += 1 + actstr, val = parse_extract_field( + actstr, + "trunc(", + r"([0-9]+)", + int, + False, + None, + ) + self["attrs"].append(["OVS_ACTION_ATTR_TRUNC", val]) + parsed = True + actstr = actstr[strspn(actstr, ", ") :] while parencount > 0: parencount -= 1 @@ -675,7 +916,7 @@ class ovskey(nla): ("OVS_KEY_ATTR_ARP", "ovs_key_arp"), ("OVS_KEY_ATTR_ND", "ovs_key_nd"), ("OVS_KEY_ATTR_SKB_MARK", "uint32"), - ("OVS_KEY_ATTR_TUNNEL", "none"), + ("OVS_KEY_ATTR_TUNNEL", "ovs_key_tunnel"), ("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"), ("OVS_KEY_ATTR_TCP_FLAGS", "be16"), ("OVS_KEY_ATTR_DP_HASH", "uint32"), @@ -907,21 +1148,21 @@ class ovskey(nla): "src", "src", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), - lambda x: ipaddress.IPv6Address(x), + lambda x: ipaddress.IPv6Address(x).packed if x else 0, + convert_ipv6, ), ( "dst", "dst", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), - lambda x: ipaddress.IPv6Address(x), + lambda x: ipaddress.IPv6Address(x).packed if x else 0, + convert_ipv6, ), - ("label", "label", "%d", int), - ("proto", "proto", "%d", int), - ("tclass", "tclass", "%d", int), - ("hlimit", "hlimit", "%d", int), - ("frag", "frag", "%d", int), + ("label", "label", "%d", lambda x: int(x) if x else 0), + ("proto", "proto", "%d", lambda x: int(x) if x else 0), + ("tclass", "tclass", "%d", lambda x: int(x) if x else 0), + ("hlimit", "hlimit", "%d", lambda x: int(x) if x else 0), + ("frag", "frag", "%d", lambda x: int(x) if x else 0), ) def __init__( @@ -1119,7 +1360,7 @@ class ovskey(nla): "target", "target", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), + convert_ipv6, ), ("sll", "sll", macstr, lambda x: int.from_bytes(x, "big")), ("tll", "tll", macstr, lambda x: int.from_bytes(x, "big")), @@ -1204,13 +1445,13 @@ class ovskey(nla): "src", "src", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big", convertmac), + convert_ipv6, ), ( "dst", "dst", lambda x: str(ipaddress.IPv6Address(x)), - lambda x: int.from_bytes(x, "big"), + convert_ipv6, ), ("tp_src", "tp_src", "%d", int), ("tp_dst", "tp_dst", "%d", int), @@ -1235,6 +1476,163 @@ class ovskey(nla): init=init, ) + class ovs_key_tunnel(nla): + nla_flags = NLA_F_NESTED + + nla_map = ( + ("OVS_TUNNEL_KEY_ATTR_ID", "be64"), + ("OVS_TUNNEL_KEY_ATTR_IPV4_SRC", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_IPV4_DST", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_TOS", "uint8"), + ("OVS_TUNNEL_KEY_ATTR_TTL", "uint8"), + ("OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT", "flag"), + ("OVS_TUNNEL_KEY_ATTR_CSUM", "flag"), + ("OVS_TUNNEL_KEY_ATTR_OAM", "flag"), + ("OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS", "array(uint32)"), + ("OVS_TUNNEL_KEY_ATTR_TP_SRC", "be16"), + ("OVS_TUNNEL_KEY_ATTR_TP_DST", "be16"), + ("OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS", "none"), + ("OVS_TUNNEL_KEY_ATTR_IPV6_SRC", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_IPV6_DST", "ipaddr"), + ("OVS_TUNNEL_KEY_ATTR_PAD", "none"), + ("OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS", "none"), + ("OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE", "flag"), + ) + + def parse(self, flowstr, mask=None): + if not flowstr.startswith("tunnel("): + return None, None + + k = ovskey.ovs_key_tunnel() + if mask is not None: + mask = ovskey.ovs_key_tunnel() + + flowstr = flowstr[len("tunnel("):] + + v6_address = None + + fields = [ + ("tun_id=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_ID", + 0xffffffffffffffff, None, None), + + ("src=", r"([0-9a-fA-F\.]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV4_SRC", "255.255.255.255", "0.0.0.0", + False), + ("dst=", r"([0-9a-fA-F\.]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV4_DST", "255.255.255.255", "0.0.0.0", + False), + + ("ipv6_src=", r"([0-9a-fA-F:]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV6_SRC", + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "::", True), + ("ipv6_dst=", r"([0-9a-fA-F:]+)", str, + "OVS_TUNNEL_KEY_ATTR_IPV6_DST", + "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "::", True), + + ("tos=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TOS", 255, 0, + None), + ("ttl=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TTL", 255, 0, + None), + + ("tp_src=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TP_SRC", + 65535, 0, None), + ("tp_dst=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TP_DST", + 65535, 0, None), + ] + + forced_include = ["OVS_TUNNEL_KEY_ATTR_TTL"] + + for prefix, regex, typ, attr_name, mask_val, default_val, v46_flag in fields: + flowstr, value = parse_extract_field(flowstr, prefix, regex, typ, False) + if not attr_name: + raise Exception("Bad list value in tunnel fields") + + if value is None and attr_name in forced_include: + value = default_val + mask_val = default_val + + if value is not None: + if v46_flag is not None: + if v6_address is None: + v6_address = v46_flag + if v46_flag != v6_address: + raise ValueError("Cannot mix v6 and v4 addresses") + k["attrs"].append([attr_name, value]) + if mask is not None: + mask["attrs"].append([attr_name, mask_val]) + else: + if v46_flag is not None: + if v6_address is None or v46_flag != v6_address: + continue + if mask is not None: + mask["attrs"].append([attr_name, default_val]) + + if k["attrs"][0][0] != "OVS_TUNNEL_KEY_ATTR_ID": + raise ValueError("Needs a tunid set") + + if flowstr.startswith("flags("): + flowstr = flowstr[len("flags("):] + flagspos = flowstr.find(")") + flags = flowstr[:flagspos] + flowstr = flowstr[flagspos + 1:] + + flag_attrs = { + "df": "OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT", + "csum": "OVS_TUNNEL_KEY_ATTR_CSUM", + "oam": "OVS_TUNNEL_KEY_ATTR_OAM" + } + + for flag in flags.split("|"): + if flag in flag_attrs: + k["attrs"].append([flag_attrs[flag], True]) + if mask is not None: + mask["attrs"].append([flag_attrs[flag], True]) + + flowstr = flowstr[strspn(flowstr, ", ") :] + return flowstr, k, mask + + def dpstr(self, mask=None, more=False): + print_str = "tunnel(" + + flagsattrs = [] + for k in self["attrs"]: + noprint = False + if k[0] == "OVS_TUNNEL_KEY_ATTR_ID": + print_str += "tun_id=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV4_SRC": + print_str += "src=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV4_DST": + print_str += "dst=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV6_SRC": + print_str += "ipv6_src=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV6_DST": + print_str += "ipv6_dst=%s" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TOS": + print_str += "tos=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TTL": + print_str += "ttl=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TP_SRC": + print_str += "tp_src=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_TP_DST": + print_str += "tp_dst=%d" % k[1] + elif k[0] == "OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT": + noprint = True + flagsattrs.append("df") + elif k[0] == "OVS_TUNNEL_KEY_ATTR_CSUM": + noprint = True + flagsattrs.append("csum") + elif k[0] == "OVS_TUNNEL_KEY_ATTR_OAM": + noprint = True + flagsattrs.append("oam") + + if not noprint: + print_str += "," + + if len(flagsattrs): + print_str += "flags(" + "|".join(flagsattrs) + ")" + print_str += ")" + return print_str + class ovs_key_mpls(nla): fields = (("lse", ">I"),) @@ -1243,6 +1641,7 @@ class ovskey(nla): ("OVS_KEY_ATTR_PRIORITY", "skb_priority", intparse), ("OVS_KEY_ATTR_SKB_MARK", "skb_mark", intparse), ("OVS_KEY_ATTR_RECIRC_ID", "recirc_id", intparse), + ("OVS_KEY_ATTR_TUNNEL", "tunnel", ovskey.ovs_key_tunnel), ("OVS_KEY_ATTR_DP_HASH", "dp_hash", intparse), ("OVS_KEY_ATTR_CT_STATE", "ct_state", parse_ct_state), ("OVS_KEY_ATTR_CT_ZONE", "ct_zone", intparse), @@ -1309,7 +1708,7 @@ class ovskey(nla): mask["attrs"].append([field[0], m]) self["attrs"].append([field[0], k]) - flowstr = flowstr[strspn(flowstr, "),") :] + flowstr = flowstr[strspn(flowstr, "), ") :] return flowstr @@ -1346,6 +1745,13 @@ class ovskey(nla): True, ), ( + "OVS_KEY_ATTR_TUNNEL", + "tunnel", + None, + False, + False, + ), + ( "OVS_KEY_ATTR_CT_STATE", "ct_state", "0x%04x", @@ -1617,7 +2023,7 @@ class OvsVport(GenericNetlinkSocket): ("OVS_VPORT_ATTR_PORT_NO", "uint32"), ("OVS_VPORT_ATTR_TYPE", "uint32"), ("OVS_VPORT_ATTR_NAME", "asciiz"), - ("OVS_VPORT_ATTR_OPTIONS", "none"), + ("OVS_VPORT_ATTR_OPTIONS", "vportopts"), ("OVS_VPORT_ATTR_UPCALL_PID", "array(uint32)"), ("OVS_VPORT_ATTR_STATS", "vportstats"), ("OVS_VPORT_ATTR_PAD", "none"), @@ -1625,6 +2031,13 @@ class OvsVport(GenericNetlinkSocket): ("OVS_VPORT_ATTR_NETNSID", "uint32"), ) + class vportopts(nla): + nla_map = ( + ("OVS_TUNNEL_ATTR_UNSPEC", "none"), + ("OVS_TUNNEL_ATTR_DST_PORT", "uint16"), + ("OVS_TUNNEL_ATTR_EXTENSION", "none"), + ) + class vportstats(nla): fields = ( ("rx_packets", "=Q"), @@ -1693,7 +2106,7 @@ class OvsVport(GenericNetlinkSocket): raise ne return reply - def attach(self, dpindex, vport_ifname, ptype): + def attach(self, dpindex, vport_ifname, ptype, dport, lwt): msg = OvsVport.ovs_vport_msg() msg["cmd"] = OVS_VPORT_CMD_NEW @@ -1702,12 +2115,43 @@ class OvsVport(GenericNetlinkSocket): msg["dpifindex"] = dpindex port_type = OvsVport.str_to_type(ptype) - msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type]) msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname]) msg["attrs"].append( ["OVS_VPORT_ATTR_UPCALL_PID", [self.upcall_packet.epid]] ) + TUNNEL_DEFAULTS = [("geneve", 6081), + ("vxlan", 4789)] + + for tnl in TUNNEL_DEFAULTS: + if ptype == tnl[0]: + if not dport: + dport = tnl[1] + + if not lwt: + vportopt = OvsVport.ovs_vport_msg.vportopts() + vportopt["attrs"].append( + ["OVS_TUNNEL_ATTR_DST_PORT", socket.htons(dport)] + ) + msg["attrs"].append( + ["OVS_VPORT_ATTR_OPTIONS", vportopt] + ) + else: + port_type = OvsVport.OVS_VPORT_TYPE_NETDEV + ipr = pyroute2.iproute.IPRoute() + + if tnl[0] == "geneve": + ipr.link("add", ifname=vport_ifname, kind=tnl[0], + geneve_port=dport, + geneve_collect_metadata=True, + geneve_udp_zero_csum6_rx=1) + elif tnl[0] == "vxlan": + ipr.link("add", ifname=vport_ifname, kind=tnl[0], + vxlan_learning=0, vxlan_collect_metadata=1, + vxlan_udp_zero_csum6_rx=1, vxlan_port=dport) + break + msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type]) + try: reply = self.nlm_request( msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK @@ -2018,10 +2462,71 @@ class OvsFlow(GenericNetlinkSocket): print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True) def execute(self, packetmsg): - print("userspace execute command") + print("userspace execute command", flush=True) def action(self, packetmsg): - print("userspace action command") + print("userspace action command", flush=True) + + +class psample_sample(genlmsg): + nla_map = ( + ("PSAMPLE_ATTR_IIFINDEX", "none"), + ("PSAMPLE_ATTR_OIFINDEX", "none"), + ("PSAMPLE_ATTR_ORIGSIZE", "none"), + ("PSAMPLE_ATTR_SAMPLE_GROUP", "uint32"), + ("PSAMPLE_ATTR_GROUP_SEQ", "none"), + ("PSAMPLE_ATTR_SAMPLE_RATE", "uint32"), + ("PSAMPLE_ATTR_DATA", "array(uint8)"), + ("PSAMPLE_ATTR_GROUP_REFCOUNT", "none"), + ("PSAMPLE_ATTR_TUNNEL", "none"), + ("PSAMPLE_ATTR_PAD", "none"), + ("PSAMPLE_ATTR_OUT_TC", "none"), + ("PSAMPLE_ATTR_OUT_TC_OCC", "none"), + ("PSAMPLE_ATTR_LATENCY", "none"), + ("PSAMPLE_ATTR_TIMESTAMP", "none"), + ("PSAMPLE_ATTR_PROTO", "none"), + ("PSAMPLE_ATTR_USER_COOKIE", "array(uint8)"), + ) + + def dpstr(self): + fields = [] + data = "" + for (attr, value) in self["attrs"]: + if attr == "PSAMPLE_ATTR_SAMPLE_GROUP": + fields.append("group:%d" % value) + if attr == "PSAMPLE_ATTR_SAMPLE_RATE": + fields.append("rate:%d" % value) + if attr == "PSAMPLE_ATTR_USER_COOKIE": + value = "".join(format(x, "02x") for x in value) + fields.append("cookie:%s" % value) + if attr == "PSAMPLE_ATTR_DATA" and len(value) > 0: + data = "data:%s" % "".join(format(x, "02x") for x in value) + + return ("%s %s" % (",".join(fields), data)).strip() + + +class psample_msg(Marshal): + PSAMPLE_CMD_SAMPLE = 0 + PSAMPLE_CMD_GET_GROUP = 1 + PSAMPLE_CMD_NEW_GROUP = 2 + PSAMPLE_CMD_DEL_GROUP = 3 + PSAMPLE_CMD_SET_FILTER = 4 + msg_map = {PSAMPLE_CMD_SAMPLE: psample_sample} + + +class PsampleEvent(EventSocket): + genl_family = "psample" + mcast_groups = ["packets"] + marshal_class = psample_msg + + def read_samples(self): + print("listening for psample events", flush=True) + while True: + try: + for msg in self.get(): + print(msg.dpstr(), flush=True) + except NetlinkError as ne: + raise ne def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): @@ -2053,12 +2558,19 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): for iface in ndb.interfaces: rep = vpl.info(iface.ifname, ifindex) if rep is not None: + opts = "" + vpo = rep.get_attr("OVS_VPORT_ATTR_OPTIONS") + if vpo: + dpo = vpo.get_attr("OVS_TUNNEL_ATTR_DST_PORT") + if dpo: + opts += " tnl-dport:%s" % socket.ntohs(dpo) print( - " port %d: %s (%s)" + " port %d: %s (%s%s)" % ( rep.get_attr("OVS_VPORT_ATTR_PORT_NO"), rep.get_attr("OVS_VPORT_ATTR_NAME"), OvsVport.type_to_str(rep.get_attr("OVS_VPORT_ATTR_TYPE")), + opts, ) ) @@ -2081,7 +2593,7 @@ def main(argv): help="Increment 'verbose' output counter.", default=0, ) - subparsers = parser.add_subparsers() + subparsers = parser.add_subparsers(dest="subcommand") showdpcmd = subparsers.add_parser("show") showdpcmd.add_argument( @@ -2120,12 +2632,30 @@ def main(argv): "--ptype", type=str, default="netdev", - choices=["netdev", "internal"], + choices=["netdev", "internal", "geneve", "vxlan"], help="Interface type (default netdev)", ) + addifcmd.add_argument( + "-p", + "--dport", + type=int, + default=0, + help="Destination port (0 for default)" + ) + addifcmd.add_argument( + "-l", + "--lwt", + type=bool, + default=True, + help="Use LWT infrastructure instead of vport (default true)." + ) delifcmd = subparsers.add_parser("del-if") delifcmd.add_argument("dpname", help="Datapath Name") delifcmd.add_argument("delif", help="Interface name for adding") + delifcmd.add_argument("-d", + "--dellink", + type=bool, default=False, + help="Delete the link as well.") dumpflcmd = subparsers.add_parser("dump-flows") dumpflcmd.add_argument("dumpdp", help="Datapath Name") @@ -2138,6 +2668,8 @@ def main(argv): delfscmd = subparsers.add_parser("del-flows") delfscmd.add_argument("flsbr", help="Datapath name") + subparsers.add_parser("psample-events") + args = parser.parse_args() if args.verbose > 0: @@ -2152,6 +2684,9 @@ def main(argv): sys.setrecursionlimit(100000) + if args.subcommand == "psample-events": + PsampleEvent().read_samples() + if hasattr(args, "showdp"): found = False for iface in ndb.interfaces: @@ -2186,7 +2721,8 @@ def main(argv): print("DP '%s' not found." % args.dpname) return 1 dpindex = rep["dpifindex"] - rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype) + rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype, + args.dport, args.lwt) msg = "vport '%s'" % args.addif if rep and rep["header"]["error"] is None: msg += " added." @@ -2207,6 +2743,9 @@ def main(argv): msg += " removed." else: msg += " failed to remove." + if args.dellink: + ipr = pyroute2.iproute.IPRoute() + ipr.link("del", index=ipr.link_lookup(ifname=args.delif)[0]) elif hasattr(args, "dumpdp"): rep = ovsdp.info(args.dumpdp, 0) if rep is None: diff --git a/tools/testing/selftests/net/openvswitch/settings b/tools/testing/selftests/net/openvswitch/settings new file mode 100644 index 000000000000..e2206265f67c --- /dev/null +++ b/tools/testing/selftests/net/openvswitch/settings @@ -0,0 +1 @@ +timeout=900 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index cfc84958025a..5175c0c83a23 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -842,25 +842,97 @@ setup_bridge() { run_cmd ${ns_a} ip link set veth_A-C master br0 } +setup_ovs_via_internal_utility() { + type="${1}" + a_addr="${2}" + b_addr="${3}" + dport="${4}" + + run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 ${type}_a -t ${type} || return 1 + + ports=$(python3 ./openvswitch/ovs-dpctl.py show) + br0_port=$(echo "$ports" | grep -E "\sovs_br0" | sed -e 's@port @@' | cut -d: -f1 | xargs) + type_a_port=$(echo "$ports" | grep ${type}_a | sed -e 's@port @@' | cut -d: -f1 | xargs) + veth_a_port=$(echo "$ports" | grep veth_A | sed -e 's@port @@' | cut -d: -f1 | xargs) + + v4_a_tun="${prefix4}.${a_r1}.1" + v4_b_tun="${prefix4}.${b_r1}.1" + + v6_a_tun="${prefix6}:${a_r1}::1" + v6_b_tun="${prefix6}:${b_r1}::1" + + if [ "${v4_a_tun}" = "${a_addr}" ]; then + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0806),arp()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0806),arp(sip=${veth4_c_addr},tip=${tunnel4_b_addr})" \ + "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + else + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0800),ipv4()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x86dd),ipv6()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0806),arp()" \ + "${veth_a_port}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \ + "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0806),arp(sip=${veth4_c_addr},tip=${tunnel4_b_addr})" \ + "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}" + fi +} + +setup_ovs_via_vswitchd() { + type="${1}" + b_addr="${2}" + + run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \ + set interface ${type}_a type=${type} \ + options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1 +} + setup_ovs_vxlan_or_geneve() { type="${1}" a_addr="${2}" b_addr="${3}" + dport="6081" if [ "${type}" = "vxlan" ]; then + dport="4789" opts="${opts} ttl 64 dstport 4789" opts_b="local ${b_addr}" fi - run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \ - set interface ${type}_a type=${type} \ - options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1 + setup_ovs_via_internal_utility "${type}" "${a_addr}" "${b_addr}" \ + "${dport}" || \ + setup_ovs_via_vswitchd "${type}" "${b_addr}" || return 1 run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1 run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b + run_cmd ip link set ${type}_a up run_cmd ${ns_b} ip link set ${type}_b up } @@ -880,8 +952,24 @@ setup_ovs_vxlan6() { setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 } +setup_ovs_br_internal() { + run_cmd python3 ./openvswitch/ovs-dpctl.py add-dp ovs_br0 || \ + return 1 +} + +setup_ovs_br_vswitchd() { + run_cmd ovs-vsctl add-br ovs_br0 || return 1 +} + +setup_ovs_add_if() { + ifname="${1}" + run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 \ + "${ifname}" || \ + run_cmd ovs-vsctl add-port ovs_br0 "${ifname}" +} + setup_ovs_bridge() { - run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip + setup_ovs_br_internal || setup_ovs_br_vswitchd || return $ksft_skip run_cmd ip link set ovs_br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -891,7 +979,7 @@ setup_ovs_bridge() { run_cmd ${ns_c} ip link set veth_C-A up run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A - run_cmd ovs-vsctl add-port ovs_br0 veth_A-C + setup_ovs_add_if veth_A-C # Move veth_A-R1 to init run_cmd ${ns_a} ip link set veth_A-R1 netns 1 @@ -922,6 +1010,18 @@ trace() { sleep 1 } +cleanup_del_ovs_internal() { + # squelch the output of the del-if commands since it can be wordy + python3 ./openvswitch/ovs-dpctl.py del-if ovs_br0 -d true vxlan_a >/dev/null 2>&1 + python3 ./openvswitch/ovs-dpctl.py del-if ovs_br0 -d true geneve_a >/dev/null 2>&1 + python3 ./openvswitch/ovs-dpctl.py del-dp ovs_br0 >/dev/null 2>&1 +} + +cleanup_del_ovs_vswitchd() { + ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null + ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null +} + cleanup() { for pid in ${tcpdump_pids}; do kill ${pid} @@ -940,10 +1040,10 @@ cleanup() { cleanup_all_ns - ip link del veth_A-C 2>/dev/null - ip link del veth_A-R1 2>/dev/null - ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null - ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null + ip link del veth_A-C 2>/dev/null + ip link del veth_A-R1 2>/dev/null + cleanup_del_ovs_internal + cleanup_del_ovs_vswitchd rm -f "$tmpoutfile" } @@ -1397,6 +1497,12 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { outer_family=${3} ll_mtu=4000 + if [ "${type}" = "vxlan" ]; then + tun_a="vxlan_sys_4789" + elif [ "${type}" = "geneve" ]; then + tun_a="genev_sys_6081" + fi + if [ ${outer_family} -eq 4 ]; then setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header @@ -1407,17 +1513,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi - if [ "${type}" = "vxlan" ]; then - tun_a="vxlan_sys_4789" - elif [ "${type}" = "geneve" ]; then - tun_a="genev_sys_6081" - fi - - trace "" "${tun_a}" "${ns_b}" ${type}_b \ - "" veth_A-R1 "${ns_r1}" veth_R1-A \ - "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \ - "" ovs_br0 "" veth-A-C \ - "${ns_c}" veth_C-A + trace "" ${type}_a "${ns_b}" ${type}_b \ + "" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \ + "" ovs_br0 "" veth-A_C \ + "${ns_c}" veth_C-A "" "${tun_a}" if [ ${family} -eq 4 ]; then ping=ping @@ -1436,8 +1536,9 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { mtu "${ns_b}" veth_B-R1 ${ll_mtu} mtu "${ns_r1}" veth_R1-B ${ll_mtu} - mtu "" ${tun_a} $((${ll_mtu} + 1000)) - mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) + mtu "" ${tun_a} $((${ll_mtu} + 1000)) 2>/dev/null || \ + mtu "" ${type}_a $((${ll_mtu} + 1000)) 2>/dev/null + mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1 diff --git a/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh new file mode 100755 index 000000000000..e23210aa547f --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh @@ -0,0 +1,335 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu <wujianguo@chinatelecom.cn> +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv4 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv4 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | 10.0.0.11/24 | +----------+ | | +----------+ | 10.0.0.22/24 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX4 nh4 10.0.0.1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX4 nh4 10.0.0.2 dev veth-t100| +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv4_HS_NETWORK=10.0.0 +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} iptables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.${rt}${hs}/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX4 nh4 ${IPv4_HS_NETWORK}.${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh new file mode 100755 index 000000000000..9e69a2ed5bc3 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh @@ -0,0 +1,340 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu <wujianguo@chinatelecom.cn> +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv6 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv6 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv6 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::11/64 | +----------+ | | +----------+ | cafe::22/64 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX6 nh6 cafe::1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX6 nh6 cafe::2 dev veth-t100 | +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv6_HS_NETWORK=cafe +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::${rt}${hs}/64 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local rtveth=veth-t${tid} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX6 nh6 ${IPv6_HS_NETWORK}::${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -6 -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index e154d9e198a9..a5698b0a3718 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -30,8 +30,6 @@ static void setup_lo_intf(const char *lo_intf) static void tcp_self_connect(const char *tst, unsigned int port, bool different_keyids, bool check_restore) { - uint64_t before_challenge_ack, after_challenge_ack; - uint64_t before_syn_challenge, after_syn_challenge; struct tcp_ao_counters before_ao, after_ao; uint64_t before_aogood, after_aogood; struct netstat *ns_before, *ns_after; @@ -62,8 +60,6 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_before = netstat_read(); before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); - before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL); - before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL); if (test_get_tcp_ao_counters(sk, &before_ao)) test_error("test_get_tcp_ao_counters()"); @@ -82,8 +78,6 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_after = netstat_read(); after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); - after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL); - after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL); if (test_get_tcp_ao_counters(sk, &after_ao)) test_error("test_get_tcp_ao_counters()"); if (!check_restore) { @@ -98,18 +92,6 @@ static void tcp_self_connect(const char *tst, unsigned int port, close(sk); return; } - if (after_challenge_ack <= before_challenge_ack || - after_syn_challenge <= before_syn_challenge) { - /* - * It's also meant to test simultaneous open, so check - * these counters as well. - */ - test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu", - tst, after_challenge_ack, before_challenge_ack, - after_syn_challenge, before_syn_challenge); - close(sk); - return; - } if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) { close(sk); diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 85b3baa3f7f3..3e74cfa1a2bf 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -53,6 +53,7 @@ static bool cfg_do_ipv6; static bool cfg_do_connected; static bool cfg_do_connectionless; static bool cfg_do_msgmore; +static bool cfg_do_recv = true; static bool cfg_do_setsockopt; static int cfg_specific_test_id = -1; @@ -414,6 +415,9 @@ static void run_one(struct testcase *test, int fdt, int fdr, if (!sent) return; + if (!cfg_do_recv) + return; + if (test->gso_len) mss = test->gso_len; else @@ -464,8 +468,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen) if (fdr == -1) error(1, errno, "socket r"); - if (bind(fdr, addr, alen)) - error(1, errno, "bind"); + if (cfg_do_recv) { + if (bind(fdr, addr, alen)) + error(1, errno, "bind"); + } /* Have tests fail quickly instead of hang */ if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) @@ -524,7 +530,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "46cCmst:")) != -1) { + while ((c = getopt(argc, argv, "46cCmRst:")) != -1) { switch (c) { case '4': cfg_do_ipv4 = true; @@ -541,6 +547,9 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_do_msgmore = true; break; + case 'R': + cfg_do_recv = false; + break; case 's': cfg_do_setsockopt = true; break; diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh index 6c63178086b0..85d1fa3c1ff7 100755 --- a/tools/testing/selftests/net/udpgso.sh +++ b/tools/testing/selftests/net/udpgso.sh @@ -27,6 +27,31 @@ test_route_mtu() { ip route add local fd00::1/128 table local dev lo mtu 1500 } +setup_dummy_sink() { + ip link add name sink mtu 1500 type dummy + ip addr add dev sink 10.0.0.0/24 + ip addr add dev sink fd00::2/64 nodad + ip link set dev sink up +} + +test_hw_gso_hw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic on >/dev/null + ethtool -K sink tx-udp-segmentation on >/dev/null +} + +test_sw_gso_hw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic on >/dev/null + ethtool -K sink tx-udp-segmentation off >/dev/null +} + +test_sw_gso_sw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic off >/dev/null + ethtool -K sink tx-udp-segmentation off >/dev/null +} + if [ "$#" -gt 0 ]; then "$1" shift 2 # pop "test_*" arg and "--" delimiter @@ -56,3 +81,21 @@ echo "ipv4 msg_more" echo "ipv6 msg_more" ./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m + +echo "ipv4 hw-gso hw-csum" +./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -4 -C -R + +echo "ipv6 hw-gso hw-csum" +./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -6 -C -R + +echo "ipv4 sw-gso hw-csum" +./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -4 -C -R + +echo "ipv6 sw-gso hw-csum" +./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -6 -C -R + +echo "ipv4 sw-gso sw-csum" +./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -4 -C -R + +echo "ipv6 sw-gso sw-csum" +./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -6 -C -R diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index 2da32f4c479b..152171fb1fc8 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -59,6 +59,7 @@ # while it is forwarded between different vrfs. source lib.sh +PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH VERBOSE=0 PAUSE_ON_FAIL=no DEFAULT_TTYPE=sym @@ -533,6 +534,86 @@ ipv6_ping_frag_asym() ipv6_ping_frag asym } +ipv4_ping_local() +{ + log_section "IPv4 (sym route): VRF ICMP local error route lookup ping" + + setup_sym + + check_connectivity || return + + run_cmd ip netns exec $r1 ip vrf exec blue ping -c1 -w1 ${H2_N2_IP} + log_test $? 0 "VRF ICMP local IPv4" +} + +ipv4_tcp_local() +{ + log_section "IPv4 (sym route): VRF tcp local connection" + + setup_sym + + check_connectivity || return + + run_cmd nettest -s -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -N "$r1" -d blue -r ${H2_N2_IP} + log_test $? 0 "VRF tcp local connection IPv4" +} + +ipv4_udp_local() +{ + log_section "IPv4 (sym route): VRF udp local connection" + + setup_sym + + check_connectivity || return + + run_cmd nettest -s -D -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -D -N "$r1" -d blue -r ${H2_N2_IP} + log_test $? 0 "VRF udp local connection IPv4" +} + +ipv6_ping_local() +{ + log_section "IPv6 (sym route): VRF ICMP local error route lookup ping" + + setup_sym + + check_connectivity6 || return + + run_cmd ip netns exec $r1 ip vrf exec blue ${ping6} -c1 -w1 ${H2_N2_IP6} + log_test $? 0 "VRF ICMP local IPv6" +} + +ipv6_tcp_local() +{ + log_section "IPv6 (sym route): VRF tcp local connection" + + setup_sym + + check_connectivity6 || return + + run_cmd nettest -s -6 -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -6 -N "$r1" -d blue -r ${H2_N2_IP6} + log_test $? 0 "VRF tcp local connection IPv6" +} + +ipv6_udp_local() +{ + log_section "IPv6 (sym route): VRF udp local connection" + + setup_sym + + check_connectivity6 || return + + run_cmd nettest -s -6 -D -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -6 -D -N "$r1" -d blue -r ${H2_N2_IP6} + log_test $? 0 "VRF udp local connection IPv6" +} + ################################################################################ # usage @@ -555,8 +636,10 @@ EOF # Some systems don't have a ping6 binary anymore command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) -TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym" -TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_ttl_asym ipv6_traceroute_asym" +TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local +ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym" +TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local +ipv6_ping_ttl_asym ipv6_traceroute_asym" ret=0 nsuccess=0 @@ -594,12 +677,18 @@ do ipv4_traceroute|traceroute) ipv4_traceroute;;& ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;& ipv4_ping_frag|ping) ipv4_ping_frag;;& + ipv4_ping_local|ping) ipv4_ping_local;;& + ipv4_tcp_local) ipv4_tcp_local;;& + ipv4_udp_local) ipv4_udp_local;;& ipv6_ping_ttl|ping) ipv6_ping_ttl;;& ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;& ipv6_traceroute|traceroute) ipv6_traceroute;;& ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;& ipv6_ping_frag|ping) ipv6_ping_frag;;& + ipv6_ping_local|ping) ipv6_ping_local;;& + ipv6_tcp_local) ipv6_tcp_local;;& + ipv6_udp_local) ipv6_udp_local;;& # setup namespaces and config, but do not run any tests setup_sym|setup) setup_sym; exit 0;; diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk new file mode 100644 index 000000000000..59cb26cf3f73 --- /dev/null +++ b/tools/testing/selftests/net/ynl.mk @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +# YNL selftest build snippet + +# Inputs: +# +# YNL_GENS: families we need in the selftests +# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet) +# YNL_GEN_FILES: TEST_GEN_FILES which need YNL + +YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) + +$(YNL_OUTPUTS): $(OUTPUT)/libynl.a +$(YNL_OUTPUTS): CFLAGS += \ + -I$(top_srcdir)/usr/include/ $(KHDR_INCLUDES) \ + -I$(top_srcdir)/tools/net/ynl/lib/ \ + -I$(top_srcdir)/tools/net/ynl/generated/ + +$(OUTPUT)/libynl.a: + $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a + $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 40dd95228051..3fbabab46958 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -152,7 +152,7 @@ CFLAGS_mips32be = -EB -mabi=32 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) \ - $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) + $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA) LDFLAGS := REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \ diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 94bb6e11c16f..093d0512f4c5 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -64,6 +64,14 @@ static const char *argv0; /* will be used by constructor tests */ static int constructor_test_value; +static const int is_nolibc = +#ifdef NOLIBC + 1 +#else + 0 +#endif +; + /* definition of a series of tests */ struct test { const char *name; /* test name */ @@ -607,7 +615,7 @@ int expect_strne(const char *expr, int llen, const char *cmp) static __attribute__((unused)) int expect_str_buf_eq(size_t expr, const char *buf, size_t val, int llen, const char *cmp) { - llen += printf(" = %lu <%s> ", expr, buf); + llen += printf(" = %lu <%s> ", (unsigned long)expr, buf); if (strcmp(buf, cmp) != 0) { result(llen, FAIL); return 1; @@ -621,6 +629,51 @@ int expect_str_buf_eq(size_t expr, const char *buf, size_t val, int llen, const return 0; } +#define EXPECT_STRTOX(cond, func, input, base, expected, chars, expected_errno) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strtox(llen, func, input, base, expected, chars, expected_errno); } while (0) + +static __attribute__((unused)) +int expect_strtox(int llen, void *func, const char *input, int base, intmax_t expected, int expected_chars, int expected_errno) +{ + char *endptr; + int actual_errno, actual_chars; + intmax_t r; + + errno = 0; + if (func == strtol) { + r = strtol(input, &endptr, base); + } else if (func == strtoul) { + r = strtoul(input, &endptr, base); + } else { + result(llen, FAIL); + return 1; + } + actual_errno = errno; + actual_chars = endptr - input; + + llen += printf(" %lld = %lld", (long long)expected, (long long)r); + if (r != expected) { + result(llen, FAIL); + return 1; + } + if (expected_chars == -1) { + if (*endptr != '\0') { + result(llen, FAIL); + return 1; + } + } else if (expected_chars != actual_chars) { + result(llen, FAIL); + return 1; + } + if (actual_errno != expected_errno) { + result(llen, FAIL); + return 1; + } + + result(llen, OK); + return 0; +} + /* declare tests based on line numbers. There must be exactly one test per line. */ #define CASE_TEST(name) \ case __LINE__: llen += printf("%d %s", test, #name); @@ -942,6 +995,7 @@ int run_syscall(int min, int max) int ret = 0; void *p1, *p2; int has_gettid = 1; + int has_brk; /* <proc> indicates whether or not /proc is mounted */ proc = stat("/proc", &stat_buf) == 0; @@ -954,6 +1008,9 @@ int run_syscall(int min, int max) has_gettid = __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 30); #endif + /* on musl setting brk()/sbrk() always fails */ + has_brk = brk(0) == 0; + for (test = min; test >= 0 && test <= max; test++) { int llen = 0; /* line length */ @@ -969,9 +1026,9 @@ int run_syscall(int min, int max) CASE_TEST(kill_0); EXPECT_SYSZR(1, kill(getpid(), 0)); break; CASE_TEST(kill_CONT); EXPECT_SYSZR(1, kill(getpid(), 0)); break; CASE_TEST(kill_BADPID); EXPECT_SYSER(1, kill(INT_MAX, 0), -1, ESRCH); break; - CASE_TEST(sbrk_0); EXPECT_PTRNE(1, sbrk(0), (void *)-1); break; - CASE_TEST(sbrk); if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(1, (p2 == (void *)-1) || p2 == p1); break; - CASE_TEST(brk); EXPECT_SYSZR(1, brk(sbrk(0))); break; + CASE_TEST(sbrk_0); EXPECT_PTRNE(has_brk, sbrk(0), (void *)-1); break; + CASE_TEST(sbrk); if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(has_brk, (p2 == (void *)-1) || p2 == p1); break; + CASE_TEST(brk); EXPECT_SYSZR(has_brk, brk(sbrk(0))); break; CASE_TEST(chdir_root); EXPECT_SYSZR(1, chdir("/")); chdir(getenv("PWD")); break; CASE_TEST(chdir_dot); EXPECT_SYSZR(1, chdir(".")); break; CASE_TEST(chdir_blah); EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break; @@ -1076,19 +1133,17 @@ int run_stdlib(int min, int max) CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break; CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break; CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break; -#ifdef NOLIBC - CASE_TEST(strlcat_0); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 0), buf, 3, "test"); break; - CASE_TEST(strlcat_1); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 1), buf, 4, "test"); break; - CASE_TEST(strlcat_5); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 5), buf, 7, "test"); break; - CASE_TEST(strlcat_6); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 6), buf, 7, "testb"); break; - CASE_TEST(strlcat_7); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 7), buf, 7, "testba"); break; - CASE_TEST(strlcat_8); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 8), buf, 7, "testbar"); break; - CASE_TEST(strlcpy_0); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 0), buf, 3, "test"); break; - CASE_TEST(strlcpy_1); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 1), buf, 3, ""); break; - CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 2), buf, 3, "b"); break; - CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 3), buf, 3, "ba"); break; - CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 4), buf, 3, "bar"); break; -#endif + CASE_TEST(strlcat_0); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 0), buf, 3, "test"); break; + CASE_TEST(strlcat_1); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 1), buf, 4, "test"); break; + CASE_TEST(strlcat_5); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 5), buf, 7, "test"); break; + CASE_TEST(strlcat_6); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 6), buf, 7, "testb"); break; + CASE_TEST(strlcat_7); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 7), buf, 7, "testba"); break; + CASE_TEST(strlcat_8); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 8), buf, 7, "testbar"); break; + CASE_TEST(strlcpy_0); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 0), buf, 3, "test"); break; + CASE_TEST(strlcpy_1); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 1), buf, 3, ""); break; + CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 2), buf, 3, "b"); break; + CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 3), buf, 3, "ba"); break; + CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 4), buf, 3, "bar"); break; CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break; CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break; CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break; @@ -1139,6 +1194,26 @@ int run_stdlib(int min, int max) CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, sizeof(long) == 8 ? (ptrdiff_t) 0x8000000000000000LL : (ptrdiff_t) 0x80000000); break; CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, sizeof(long) == 8 ? (ptrdiff_t) 0x7fffffffffffffffLL : (ptrdiff_t) 0x7fffffff); break; CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, sizeof(long) == 8 ? (size_t) 0xffffffffffffffffULL : (size_t) 0xffffffffU); break; + CASE_TEST(strtol_simple); EXPECT_STRTOX(1, strtol, "35", 10, 35, -1, 0); break; + CASE_TEST(strtol_positive); EXPECT_STRTOX(1, strtol, "+35", 10, 35, -1, 0); break; + CASE_TEST(strtol_negative); EXPECT_STRTOX(1, strtol, "-35", 10, -35, -1, 0); break; + CASE_TEST(strtol_hex_auto); EXPECT_STRTOX(1, strtol, "0xFF", 0, 255, -1, 0); break; + CASE_TEST(strtol_base36); EXPECT_STRTOX(1, strtol, "12yZ", 36, 50507, -1, 0); break; + CASE_TEST(strtol_cutoff); EXPECT_STRTOX(1, strtol, "1234567890", 8, 342391, 7, 0); break; + CASE_TEST(strtol_octal_auto); EXPECT_STRTOX(1, strtol, "011", 0, 9, -1, 0); break; + CASE_TEST(strtol_hex_00); EXPECT_STRTOX(1, strtol, "0x00", 16, 0, -1, 0); break; + CASE_TEST(strtol_hex_FF); EXPECT_STRTOX(1, strtol, "FF", 16, 255, -1, 0); break; + CASE_TEST(strtol_hex_ff); EXPECT_STRTOX(1, strtol, "ff", 16, 255, -1, 0); break; + CASE_TEST(strtol_hex_prefix); EXPECT_STRTOX(1, strtol, "0xFF", 16, 255, -1, 0); break; + CASE_TEST(strtol_trailer); EXPECT_STRTOX(1, strtol, "35foo", 10, 35, 2, 0); break; + CASE_TEST(strtol_overflow); EXPECT_STRTOX(1, strtol, "0x8000000000000000", 16, LONG_MAX, -1, ERANGE); break; + CASE_TEST(strtol_underflow); EXPECT_STRTOX(1, strtol, "-0x8000000000000001", 16, LONG_MIN, -1, ERANGE); break; + CASE_TEST(strtoul_negative); EXPECT_STRTOX(1, strtoul, "-0x1", 16, ULONG_MAX, 4, 0); break; + CASE_TEST(strtoul_overflow); EXPECT_STRTOX(1, strtoul, "0x10000000000000000", 16, ULONG_MAX, -1, ERANGE); break; + CASE_TEST(strerror_success); EXPECT_STREQ(is_nolibc, strerror(0), "errno=0"); break; + CASE_TEST(strerror_EINVAL); EXPECT_STREQ(is_nolibc, strerror(EINVAL), "errno=22"); break; + CASE_TEST(strerror_int_max); EXPECT_STREQ(is_nolibc, strerror(INT_MAX), "errno=2147483647"); break; + CASE_TEST(strerror_int_min); EXPECT_STREQ(is_nolibc, strerror(INT_MIN), "errno=-2147483648"); break; case __LINE__: return ret; /* must be last */ diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index c0a5a7cea9fa..0446e6326a40 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -15,9 +15,10 @@ download_location="${cache_dir}/crosstools/" build_location="$(realpath "${cache_dir}"/nolibc-tests/)" perform_download=0 test_mode=system +CFLAGS_EXTRA="-Werror" archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch" -TEMP=$(getopt -o 'j:d:c:b:a:m:ph' -n "$0" -- "$@") +TEMP=$(getopt -o 'j:d:c:b:a:m:peh' -n "$0" -- "$@") eval set -- "$TEMP" unset TEMP @@ -40,6 +41,7 @@ Options: -a [ARCH] Host architecture of toolchains to use (default: ${hostarch}) -b [DIR] Build location (default: ${build_location}) -m [MODE] Test mode user/system (default: ${test_mode}) + -e Disable -Werror EOF } @@ -66,6 +68,9 @@ while true; do '-m') test_mode="$2" shift 2; continue ;; + '-e') + CFLAGS_EXTRA="" + shift; continue ;; '-h') print_usage exit 0 @@ -153,7 +158,7 @@ test_arch() { exit 1 esac printf '%-15s' "$arch:" - swallow_output "${MAKE[@]}" "$test_target" V=1 + swallow_output "${MAKE[@]}" CFLAGS_EXTRA="$CFLAGS_EXTRA" "$test_target" V=1 cp run.out run.out."${arch}" "${MAKE[@]}" report | grep passed } diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 254d676a2689..185dc76ebb5f 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -1,8 +1,18 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test +# gcc requires -static-libasan in order to ensure that Address Sanitizer's +# library is the first one loaded. However, clang already statically links the +# Address Sanitizer if -fsanitize is specified. Therefore, simply omit +# -static-libasan for clang builds. +ifeq ($(LLVM),) + CFLAGS += -static-libasan +endif + +LOCAL_HDRS += helpers.h + include ../lib.mk -$(TEST_GEN_PROGS): helpers.c helpers.h +$(TEST_GEN_PROGS): helpers.c diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk index b909bee3cb2a..abb9e58d95c4 100644 --- a/tools/testing/selftests/powerpc/flags.mk +++ b/tools/testing/selftests/powerpc/flags.mk @@ -5,8 +5,5 @@ GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown") export GIT_VERSION endif -ifeq ($(CFLAGS),) -CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS) +CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS) export CFLAGS -endif - diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c index 1b339d6bbff1..1ff1104e6575 100644 --- a/tools/testing/selftests/resctrl/cache.c +++ b/tools/testing/selftests/resctrl/cache.c @@ -101,12 +101,12 @@ static int get_llc_occu_resctrl(unsigned long *llc_occupancy) * * Return: 0 on success, < 0 on error. */ -static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value) +static int print_results_cache(const char *filename, pid_t bm_pid, __u64 llc_value) { FILE *fp; if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { - printf("Pid: %d \t LLC_value: %llu\n", bm_pid, llc_value); + printf("Pid: %d \t LLC_value: %llu\n", (int)bm_pid, llc_value); } else { fp = fopen(filename, "a"); if (!fp) { @@ -114,7 +114,7 @@ static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value return -1; } - fprintf(fp, "Pid: %d \t llc_value: %llu\n", bm_pid, llc_value); + fprintf(fp, "Pid: %d \t llc_value: %llu\n", (int)bm_pid, llc_value); fclose(fp); } @@ -133,7 +133,7 @@ static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value * Return: =0 on success. <0 on failure. */ int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, - const char *filename, int bm_pid) + const char *filename, pid_t bm_pid) { int ret; @@ -161,7 +161,7 @@ int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, * * Return: =0 on success. <0 on failure. */ -int measure_llc_resctrl(const char *filename, int bm_pid) +int measure_llc_resctrl(const char *filename, pid_t bm_pid) { unsigned long llc_occu_resc = 0; int ret; diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index c7686fb6641a..742782438ca3 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -158,7 +158,6 @@ static int cat_test(const struct resctrl_test *test, struct resctrl_val_param *param, size_t span, unsigned long current_mask) { - char *resctrl_val = param->resctrl_val; struct perf_event_read pe_read; struct perf_event_attr pea; cpu_set_t old_affinity; @@ -178,8 +177,7 @@ static int cat_test(const struct resctrl_test *test, return ret; /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/ - ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, - resctrl_val); + ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); if (ret) goto reset_affinity; @@ -272,7 +270,6 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param start_mask = create_bit_mask(start, n); struct resctrl_val_param param = { - .resctrl_val = CAT_STR, .ctrlgrp = "c1", .filename = RESULT_FILE_NAME, .num_of_runs = 0, @@ -291,11 +288,30 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param return ret; } +static bool arch_supports_noncont_cat(const struct resctrl_test *test) +{ + unsigned int eax, ebx, ecx, edx; + + /* AMD always supports non-contiguous CBM. */ + if (get_vendor() == ARCH_AMD) + return true; + + /* Intel support for non-contiguous CBM needs to be discovered. */ + if (!strcmp(test->resource, "L3")) + __cpuid_count(0x10, 1, eax, ebx, ecx, edx); + else if (!strcmp(test->resource, "L2")) + __cpuid_count(0x10, 2, eax, ebx, ecx, edx); + else + return false; + + return ((ecx >> 3) & 1); +} + static int noncont_cat_run_test(const struct resctrl_test *test, const struct user_params *uparams) { unsigned long full_cache_mask, cont_mask, noncont_mask; - unsigned int eax, ebx, ecx, edx, sparse_masks; + unsigned int sparse_masks; int bit_center, ret; char schemata[64]; @@ -304,15 +320,8 @@ static int noncont_cat_run_test(const struct resctrl_test *test, if (ret) return ret; - if (!strcmp(test->resource, "L3")) - __cpuid_count(0x10, 1, eax, ebx, ecx, edx); - else if (!strcmp(test->resource, "L2")) - __cpuid_count(0x10, 2, eax, ebx, ecx, edx); - else - return -EINVAL; - - if (sparse_masks != ((ecx >> 3) & 1)) { - ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n"); + if (arch_supports_noncont_cat(test) != sparse_masks) { + ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n"); return 1; } diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index 0105afec6188..0c045080d808 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -16,6 +16,17 @@ #define MAX_DIFF 2000000 #define MAX_DIFF_PERCENT 15 +#define CON_MON_LCC_OCCUP_PATH \ + "%s/%s/mon_data/mon_L3_%02d/llc_occupancy" + +static int cmt_init(const struct resctrl_val_param *param, int domain_id) +{ + sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, + param->ctrlgrp, domain_id); + + return 0; +} + static int cmt_setup(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *p) @@ -29,6 +40,13 @@ static int cmt_setup(const struct resctrl_test *test, return 0; } +static int cmt_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + sleep(1); + return measure_llc_resctrl(param->filename, bm_pid); +} + static int show_results_info(unsigned long sum_llc_val, int no_of_bits, unsigned long cache_span, unsigned long max_diff, unsigned long max_diff_percent, unsigned long num_of_runs, @@ -126,13 +144,13 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param } struct resctrl_val_param param = { - .resctrl_val = CMT_STR, .ctrlgrp = "c1", - .mongrp = "m1", .filename = RESULT_FILE_NAME, .mask = ~(long_mask << n) & long_mask, .num_of_runs = 0, + .init = cmt_init, .setup = cmt_setup, + .measure = cmt_measure, }; span = cache_portion_size(cache_total_size, param.mask, long_mask); diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index a6ad39aae162..ab8496a4925b 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -17,6 +17,19 @@ #define ALLOCATION_MIN 10 #define ALLOCATION_STEP 10 +static int mba_init(const struct resctrl_val_param *param, int domain_id) +{ + int ret; + + ret = initialize_mem_bw_imc(); + if (ret) + return ret; + + initialize_mem_bw_resctrl(param, domain_id); + + return 0; +} + /* * Change schemata percentage from 100 to 10%. Write schemata to specified * con_mon grp, mon_grp in resctrl FS. @@ -51,6 +64,12 @@ static int mba_setup(const struct resctrl_test *test, return 0; } +static int mba_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + return measure_mem_bw(uparams, param, bm_pid, "reads"); +} + static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) { int allocation, runs; @@ -145,12 +164,11 @@ static void mba_test_cleanup(void) static int mba_run_test(const struct resctrl_test *test, const struct user_params *uparams) { struct resctrl_val_param param = { - .resctrl_val = MBA_STR, .ctrlgrp = "c1", - .mongrp = "m1", .filename = RESULT_FILE_NAME, - .bw_report = "reads", - .setup = mba_setup + .init = mba_init, + .setup = mba_setup, + .measure = mba_measure, }; int ret; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index 6fec51e1ff46..6b5a3b52d861 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -86,6 +86,19 @@ static int check_results(size_t span) return ret; } +static int mbm_init(const struct resctrl_val_param *param, int domain_id) +{ + int ret; + + ret = initialize_mem_bw_imc(); + if (ret) + return ret; + + initialize_mem_bw_resctrl(param, domain_id); + + return 0; +} + static int mbm_setup(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *p) @@ -105,6 +118,12 @@ static int mbm_setup(const struct resctrl_test *test, return ret; } +static int mbm_measure(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) +{ + return measure_mem_bw(uparams, param, bm_pid, "reads"); +} + static void mbm_test_cleanup(void) { remove(RESULT_FILE_NAME); @@ -113,12 +132,11 @@ static void mbm_test_cleanup(void) static int mbm_run_test(const struct resctrl_test *test, const struct user_params *uparams) { struct resctrl_val_param param = { - .resctrl_val = MBM_STR, .ctrlgrp = "c1", - .mongrp = "m1", .filename = RESULT_FILE_NAME, - .bw_report = "reads", - .setup = mbm_setup + .init = mbm_init, + .setup = mbm_setup, + .measure = mbm_measure, }; int ret; diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 00d51fa7531c..2dda56084588 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -43,13 +43,6 @@ #define DEFAULT_SPAN (250 * MB) -#define PARENT_EXIT() \ - do { \ - kill(ppid, SIGKILL); \ - umount_resctrlfs(); \ - exit(EXIT_FAILURE); \ - } while (0) - /* * user_params: User supplied parameters * @cpu: CPU number to which the benchmark will be bound to @@ -88,24 +81,27 @@ struct resctrl_test { /* * resctrl_val_param: resctrl test parameters - * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) * @ctrlgrp: Name of the control monitor group (con_mon grp) * @mongrp: Name of the monitor group (mon grp) * @filename: Name of file to which the o/p should be written - * @bw_report: Bandwidth report type (reads vs writes) - * @setup: Call back function to setup test environment + * @init: Callback function to initialize test environment + * @setup: Callback function to setup per test run environment + * @measure: Callback that performs the measurement (a single test) */ struct resctrl_val_param { - char *resctrl_val; - char ctrlgrp[64]; - char mongrp[64]; + const char *ctrlgrp; + const char *mongrp; char filename[64]; - char *bw_report; unsigned long mask; int num_of_runs; + int (*init)(const struct resctrl_val_param *param, + int domain_id); int (*setup)(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *param); + int (*measure)(const struct user_params *uparams, + struct resctrl_val_param *param, + pid_t bm_pid); }; struct perf_event_read { @@ -115,11 +111,6 @@ struct perf_event_read { } values[2]; }; -#define MBM_STR "mbm" -#define MBA_STR "mba" -#define CMT_STR "cmt" -#define CAT_STR "cat" - /* * Memory location that consumes values compiler must not optimize away. * Volatile ensures writes to this location cannot be optimized away by @@ -127,8 +118,6 @@ struct perf_event_read { */ extern volatile int *value_sink; -extern pid_t bm_pid, ppid; - extern char llc_occup_path[1024]; int get_vendor(void); @@ -137,7 +126,7 @@ int filter_dmesg(void); int get_domain_id(const char *resource, int cpu_no, int *domain_id); int mount_resctrlfs(void); int umount_resctrlfs(void); -int validate_bw_report_request(char *bw_report); +const char *get_bw_report_type(const char *bw_report); bool resctrl_resource_exists(const char *resource); bool resctrl_mon_feature_exists(const char *resource, const char *feature); bool resource_info_file_exists(const char *resource, const char *file); @@ -145,15 +134,21 @@ bool test_resource_feature_check(const struct resctrl_test *test); char *fgrep(FILE *inf, const char *str); int taskset_benchmark(pid_t bm_pid, int cpu_no, cpu_set_t *old_affinity); int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity); -int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource); -int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, - char *resctrl_val); +int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no, + const char *resource); +int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp); int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); unsigned char *alloc_buffer(size_t buf_size, int memflush); void mem_flush(unsigned char *buf, size_t buf_size); void fill_cache_read(unsigned char *buf, size_t buf_size, bool once); int run_fill_buf(size_t buf_size, int memflush, int op, bool once); +int initialize_mem_bw_imc(void); +int measure_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid, + const char *bw_report); +void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, + int domain_id); int resctrl_val(const struct resctrl_test *test, const struct user_params *uparams, const char * const *benchmark_cmd, @@ -174,8 +169,8 @@ void perf_event_initialize_read_format(struct perf_event_read *pe_read); int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no); int perf_event_reset_enable(int pe_fd); int perf_event_measure(int pe_fd, struct perf_event_read *pe_read, - const char *filename, int bm_pid); -int measure_llc_resctrl(const char *filename, int bm_pid); + const char *filename, pid_t bm_pid); +int measure_llc_resctrl(const char *filename, pid_t bm_pid); void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines); /* diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 445f306d4c2f..8c275f6b4dd7 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -19,30 +19,10 @@ #define MAX_TOKENS 5 #define READ 0 #define WRITE 1 -#define CON_MON_MBM_LOCAL_BYTES_PATH \ - "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes" #define CON_MBM_LOCAL_BYTES_PATH \ "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes" -#define MON_MBM_LOCAL_BYTES_PATH \ - "%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes" - -#define MBM_LOCAL_BYTES_PATH \ - "%s/mon_data/mon_L3_%02d/mbm_local_bytes" - -#define CON_MON_LCC_OCCUP_PATH \ - "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy" - -#define CON_LCC_OCCUP_PATH \ - "%s/%s/mon_data/mon_L3_%02d/llc_occupancy" - -#define MON_LCC_OCCUP_PATH \ - "%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy" - -#define LCC_OCCUP_PATH \ - "%s/mon_data/mon_L3_%02d/llc_occupancy" - struct membw_read_format { __u64 value; /* The value of the event */ __u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ @@ -276,7 +256,7 @@ static int num_of_imcs(void) return count; } -static int initialize_mem_bw_imc(void) +int initialize_mem_bw_imc(void) { int imc, j; @@ -293,44 +273,93 @@ static int initialize_mem_bw_imc(void) return 0; } +static void perf_close_imc_mem_bw(void) +{ + int mc; + + for (mc = 0; mc < imcs; mc++) { + if (imc_counters_config[mc][READ].fd != -1) + close(imc_counters_config[mc][READ].fd); + if (imc_counters_config[mc][WRITE].fd != -1) + close(imc_counters_config[mc][WRITE].fd); + } +} + /* - * get_mem_bw_imc: Memory band width as reported by iMC counters - * @cpu_no: CPU number that the benchmark PID is binded to - * @bw_report: Bandwidth report type (reads, writes) - * - * Memory B/W utilized by a process on a socket can be calculated using - * iMC counters. Perf events are used to read these counters. + * perf_open_imc_mem_bw - Open perf fds for IMCs + * @cpu_no: CPU number that the benchmark PID is bound to * * Return: = 0 on success. < 0 on failure. */ -static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) +static int perf_open_imc_mem_bw(int cpu_no) { - float reads, writes, of_mul_read, of_mul_write; - int imc, j, ret; + int imc, ret; - /* Start all iMC counters to log values (both read and write) */ - reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1; for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) { - ret = open_perf_event(imc, cpu_no, j); - if (ret) - return -1; - } - for (j = 0; j < 2; j++) - membw_ioctl_perf_event_ioc_reset_enable(imc, j); + imc_counters_config[imc][READ].fd = -1; + imc_counters_config[imc][WRITE].fd = -1; + } + + for (imc = 0; imc < imcs; imc++) { + ret = open_perf_event(imc, cpu_no, READ); + if (ret) + goto close_fds; + ret = open_perf_event(imc, cpu_no, WRITE); + if (ret) + goto close_fds; + } + + return 0; + +close_fds: + perf_close_imc_mem_bw(); + return -1; +} + +/* + * do_mem_bw_test - Perform memory bandwidth test + * + * Runs memory bandwidth test over one second period. Also, handles starting + * and stopping of the IMC perf counters around the test. + */ +static void do_imc_mem_bw_test(void) +{ + int imc; + + for (imc = 0; imc < imcs; imc++) { + membw_ioctl_perf_event_ioc_reset_enable(imc, READ); + membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE); } sleep(1); /* Stop counters after a second to get results (both read and write) */ for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) - membw_ioctl_perf_event_ioc_disable(imc, j); + membw_ioctl_perf_event_ioc_disable(imc, READ); + membw_ioctl_perf_event_ioc_disable(imc, WRITE); } +} + +/* + * get_mem_bw_imc - Memory bandwidth as reported by iMC counters + * @bw_report: Bandwidth report type (reads, writes) + * + * Memory bandwidth utilized by a process on a socket can be calculated + * using iMC counters. Perf events are used to read these counters. + * + * Return: = 0 on success. < 0 on failure. + */ +static int get_mem_bw_imc(const char *bw_report, float *bw_imc) +{ + float reads, writes, of_mul_read, of_mul_write; + int imc; + + /* Start all iMC counters to log values (both read and write) */ + reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1; /* * Get results which are stored in struct type imc_counter_config - * Take over flow into consideration before calculating total b/w + * Take overflow into consideration before calculating total bandwidth. */ for (imc = 0; imc < imcs; imc++) { struct imc_counter_config *r = @@ -340,15 +369,13 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) if (read(r->fd, &r->return_value, sizeof(struct membw_read_format)) == -1) { - ksft_perror("Couldn't get read b/w through iMC"); - + ksft_perror("Couldn't get read bandwidth through iMC"); return -1; } if (read(w->fd, &w->return_value, sizeof(struct membw_read_format)) == -1) { - ksft_perror("Couldn't get write bw through iMC"); - + ksft_perror("Couldn't get write bandwidth through iMC"); return -1; } @@ -369,11 +396,6 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) writes += w->return_value.value * of_mul_write * SCALE; } - for (imc = 0; imc < imcs; imc++) { - close(imc_counters_config[imc][READ].fd); - close(imc_counters_config[imc][WRITE].fd); - } - if (strcmp(bw_report, "reads") == 0) { *bw_imc = reads; return 0; @@ -388,84 +410,45 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) return 0; } -void set_mbm_path(const char *ctrlgrp, const char *mongrp, int domain_id) +/* + * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" + * @param: Parameters passed to resctrl_val() + * @domain_id: Domain ID (cache ID; for MB, L3 cache ID) + */ +void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, + int domain_id) { - if (ctrlgrp && mongrp) - sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH, - RESCTRL_PATH, ctrlgrp, mongrp, domain_id); - else if (!ctrlgrp && mongrp) - sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, - mongrp, domain_id); - else if (ctrlgrp && !mongrp) - sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, - ctrlgrp, domain_id); - else if (!ctrlgrp && !mongrp) - sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, - domain_id); + sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, + param->ctrlgrp, domain_id); } /* - * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" - * @ctrlgrp: Name of the control monitor group (con_mon grp) - * @mongrp: Name of the monitor group (mon grp) - * @cpu_no: CPU number that the benchmark PID is binded to - * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) + * Open file to read MBM local bytes from resctrl FS */ -static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp, - int cpu_no, char *resctrl_val) +static FILE *open_mem_bw_resctrl(const char *mbm_bw_file) { - int domain_id; - - if (get_domain_id("MB", cpu_no, &domain_id) < 0) { - ksft_print_msg("Could not get domain ID\n"); - return; - } + FILE *fp; - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) - set_mbm_path(ctrlgrp, mongrp, domain_id); + fp = fopen(mbm_bw_file, "r"); + if (!fp) + ksft_perror("Failed to open total memory bandwidth file"); - if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - if (ctrlgrp) - sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, - RESCTRL_PATH, ctrlgrp, domain_id); - else - sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, - RESCTRL_PATH, domain_id); - } + return fp; } /* * Get MBM Local bytes as reported by resctrl FS - * For MBM, - * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp - * 2. If only con_mon grp is given, then read from con_mon grp - * 3. If both are not given, then read from root con_mon grp - * For MBA, - * 1. If con_mon grp is given, then read from it - * 2. If con_mon grp is not given, then read from root con_mon grp */ -static int get_mem_bw_resctrl(unsigned long *mbm_total) +static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total) { - FILE *fp; - - fp = fopen(mbm_total_path, "r"); - if (!fp) { - ksft_perror("Failed to open total bw file"); - + if (fscanf(fp, "%lu\n", mbm_total) <= 0) { + ksft_perror("Could not get MBM local bytes"); return -1; } - if (fscanf(fp, "%lu", mbm_total) <= 0) { - ksft_perror("Could not get mbm local bytes"); - fclose(fp); - - return -1; - } - fclose(fp); - return 0; } -pid_t bm_pid, ppid; +static pid_t bm_pid, ppid; void ctrlc_handler(int signum, siginfo_t *info, void *ptr) { @@ -523,6 +506,13 @@ void signal_handler_unregister(void) } } +static void parent_exit(pid_t ppid) +{ + kill(ppid, SIGKILL); + umount_resctrlfs(); + exit(EXIT_FAILURE); +} + /* * print_results_bw: the memory bandwidth results are stored in a file * @filename: file that stores the results @@ -532,14 +522,14 @@ void signal_handler_unregister(void) * * Return: 0 on success, < 0 on error. */ -static int print_results_bw(char *filename, int bm_pid, float bw_imc, +static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc, unsigned long bw_resc) { unsigned long diff = fabs(bw_imc - bw_resc); FILE *fp; if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { - printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc); + printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc); printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff); } else { fp = fopen(filename, "a"); @@ -549,7 +539,7 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc, return -1; } if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n", - bm_pid, bw_imc, bw_resc, diff) <= 0) { + (int)bm_pid, bw_imc, bw_resc, diff) <= 0) { ksft_print_msg("Could not log results\n"); fclose(fp); @@ -561,73 +551,67 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc, return 0; } -static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num) -{ - if (strlen(ctrlgrp) && strlen(mongrp)) - sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH, - ctrlgrp, mongrp, sock_num); - else if (!strlen(ctrlgrp) && strlen(mongrp)) - sprintf(llc_occup_path, MON_LCC_OCCUP_PATH, RESCTRL_PATH, - mongrp, sock_num); - else if (strlen(ctrlgrp) && !strlen(mongrp)) - sprintf(llc_occup_path, CON_LCC_OCCUP_PATH, RESCTRL_PATH, - ctrlgrp, sock_num); - else if (!strlen(ctrlgrp) && !strlen(mongrp)) - sprintf(llc_occup_path, LCC_OCCUP_PATH, RESCTRL_PATH, sock_num); -} - /* - * initialize_llc_occu_resctrl: Appropriately populate "llc_occup_path" - * @ctrlgrp: Name of the control monitor group (con_mon grp) - * @mongrp: Name of the monitor group (mon grp) - * @cpu_no: CPU number that the benchmark PID is binded to - * @resctrl_val: Resctrl feature (Eg: cat, cmt.. etc) + * measure_mem_bw - Measures memory bandwidth numbers while benchmark runs + * @uparams: User supplied parameters + * @param: Parameters passed to resctrl_val() + * @bm_pid: PID that runs the benchmark + * @bw_report: Bandwidth report type (reads, writes) + * + * Measure memory bandwidth from resctrl and from another source which is + * perf imc value or could be something else if perf imc event is not + * available. Compare the two values to validate resctrl value. It takes + * 1 sec to measure the data. */ -static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp, - int cpu_no, char *resctrl_val) +int measure_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid, + const char *bw_report) { - int domain_id; + unsigned long bw_resc, bw_resc_start, bw_resc_end; + FILE *mem_bw_fp; + float bw_imc; + int ret; - if (get_domain_id("L3", cpu_no, &domain_id) < 0) { - ksft_print_msg("Could not get domain ID\n"); - return; - } + bw_report = get_bw_report_type(bw_report); + if (!bw_report) + return -1; - if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) - set_cmt_path(ctrlgrp, mongrp, domain_id); -} + mem_bw_fp = open_mem_bw_resctrl(mbm_total_path); + if (!mem_bw_fp) + return -1; -static int measure_vals(const struct user_params *uparams, - struct resctrl_val_param *param, - unsigned long *bw_resc_start) -{ - unsigned long bw_resc, bw_resc_end; - float bw_imc; - int ret; + ret = perf_open_imc_mem_bw(uparams->cpu); + if (ret < 0) + goto close_fp; - /* - * Measure memory bandwidth from resctrl and from - * another source which is perf imc value or could - * be something else if perf imc event is not available. - * Compare the two values to validate resctrl value. - * It takes 1sec to measure the data. - */ - ret = get_mem_bw_imc(uparams->cpu, param->bw_report, &bw_imc); + ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start); if (ret < 0) - return ret; + goto close_imc; + + rewind(mem_bw_fp); + + do_imc_mem_bw_test(); - ret = get_mem_bw_resctrl(&bw_resc_end); + ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end); if (ret < 0) - return ret; + goto close_imc; - bw_resc = (bw_resc_end - *bw_resc_start) / MB; - ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); - if (ret) - return ret; + ret = get_mem_bw_imc(bw_report, &bw_imc); + if (ret < 0) + goto close_imc; - *bw_resc_start = bw_resc_end; + perf_close_imc_mem_bw(); + fclose(mem_bw_fp); - return 0; + bw_resc = (bw_resc_end - bw_resc_start) / MB; + + return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); + +close_imc: + perf_close_imc_mem_bw(); +close_fp: + fclose(mem_bw_fp); + return ret; } /* @@ -654,7 +638,7 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext) fp = freopen("/dev/null", "w", stdout); if (!fp) { ksft_perror("Unable to direct benchmark status to /dev/null"); - PARENT_EXIT(); + parent_exit(ppid); } if (strcmp(benchmark_cmd[0], "fill_buf") == 0) { @@ -668,7 +652,7 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext) once = false; } else { ksft_print_msg("Invalid once parameter\n"); - PARENT_EXIT(); + parent_exit(ppid); } if (run_fill_buf(span, memflush, operation, once)) @@ -682,7 +666,7 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext) fclose(stdout); ksft_print_msg("Unable to run specified benchmark\n"); - PARENT_EXIT(); + parent_exit(ppid); } /* @@ -700,21 +684,19 @@ int resctrl_val(const struct resctrl_test *test, const char * const *benchmark_cmd, struct resctrl_val_param *param) { - char *resctrl_val = param->resctrl_val; - unsigned long bw_resc_start = 0; struct sigaction sigact; int ret = 0, pipefd[2]; char pipe_message = 0; union sigval value; + int domain_id; if (strcmp(param->filename, "") == 0) sprintf(param->filename, "stdio"); - if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) || - !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { - ret = validate_bw_report_request(param->bw_report); - if (ret) - return ret; + ret = get_domain_id(test->resource, uparams->cpu, &domain_id); + if (ret < 0) { + ksft_print_msg("Could not get domain ID\n"); + return ret; } /* @@ -755,7 +737,7 @@ int resctrl_val(const struct resctrl_test *test, /* Register for "SIGUSR1" signal from parent */ if (sigaction(SIGUSR1, &sigact, NULL)) { ksft_perror("Can't register child for signal"); - PARENT_EXIT(); + parent_exit(ppid); } /* Tell parent that child is ready */ @@ -773,10 +755,10 @@ int resctrl_val(const struct resctrl_test *test, sigsuspend(&sigact.sa_mask); ksft_perror("Child is done"); - PARENT_EXIT(); + parent_exit(ppid); } - ksft_print_msg("Benchmark PID: %d\n", bm_pid); + ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid); /* * The cast removes constness but nothing mutates benchmark_cmd within @@ -792,22 +774,15 @@ int resctrl_val(const struct resctrl_test *test, goto out; /* Write benchmark to specified control&monitoring grp in resctrl FS */ - ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, - resctrl_val); + ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); if (ret) goto out; - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || - !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - ret = initialize_mem_bw_imc(); + if (param->init) { + ret = param->init(param, domain_id); if (ret) goto out; - - initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp, - uparams->cpu, resctrl_val); - } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) - initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp, - uparams->cpu, resctrl_val); + } /* Parent waits for child to be ready. */ close(pipefd[1]); @@ -841,17 +816,9 @@ int resctrl_val(const struct resctrl_test *test, if (ret < 0) break; - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || - !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - ret = measure_vals(uparams, param, &bw_resc_start); - if (ret) - break; - } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { - sleep(1); - ret = measure_llc_resctrl(param->filename, bm_pid); - if (ret) - break; - } + ret = param->measure(uparams, param, bm_pid); + if (ret) + break; } out: diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 1cade75176eb..250c320349a7 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -456,6 +456,9 @@ int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity) * @grp: Full path and name of the group * @parent_grp: Full path and name of the parent group * + * Creates a group @grp_name if it does not exist yet. If @grp_name is NULL, + * it is interpreted as the root group which always results in success. + * * Return: 0 on success, < 0 on error. */ static int create_grp(const char *grp_name, char *grp, const char *parent_grp) @@ -464,12 +467,7 @@ static int create_grp(const char *grp_name, char *grp, const char *parent_grp) struct dirent *ep; DIR *dp; - /* - * At this point, we are guaranteed to have resctrl FS mounted and if - * length of grp_name == 0, it means, user wants to use root con_mon - * grp, so do nothing - */ - if (strlen(grp_name) == 0) + if (!grp_name) return 0; /* Check if requested grp exists or not */ @@ -508,7 +506,7 @@ static int write_pid_to_tasks(char *tasks, pid_t pid) return -1; } - if (fprintf(fp, "%d\n", pid) < 0) { + if (fprintf(fp, "%d\n", (int)pid) < 0) { ksft_print_msg("Failed to write pid to tasks file\n"); fclose(fp); @@ -524,7 +522,6 @@ static int write_pid_to_tasks(char *tasks, pid_t pid) * @bm_pid: PID that should be written * @ctrlgrp: Name of the control monitor group (con_mon grp) * @mongrp: Name of the monitor group (mon grp) - * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc) * * If a con_mon grp is requested, create it and write pid to it, otherwise * write pid to root con_mon grp. @@ -534,14 +531,13 @@ static int write_pid_to_tasks(char *tasks, pid_t pid) * * Return: 0 on success, < 0 on error. */ -int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, - char *resctrl_val) +int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp) { char controlgroup[128], monitorgroup[512], monitorgroup_p[256]; char tasks[1024]; int ret = 0; - if (strlen(ctrlgrp)) + if (ctrlgrp) sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp); else sprintf(controlgroup, "%s", RESCTRL_PATH); @@ -555,22 +551,19 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, if (ret) goto out; - /* Create mon grp and write pid into it for "mbm" and "cmt" test */ - if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) || - !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { - if (strlen(mongrp)) { - sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); - sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); - ret = create_grp(mongrp, monitorgroup, monitorgroup_p); - if (ret) - goto out; - - sprintf(tasks, "%s/mon_groups/%s/tasks", - controlgroup, mongrp); - ret = write_pid_to_tasks(tasks, bm_pid); - if (ret) - goto out; - } + /* Create monitor group and write pid into if it is used */ + if (mongrp) { + sprintf(monitorgroup_p, "%s/mon_groups", controlgroup); + sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp); + ret = create_grp(mongrp, monitorgroup, monitorgroup_p); + if (ret) + goto out; + + sprintf(tasks, "%s/mon_groups/%s/tasks", + controlgroup, mongrp); + ret = write_pid_to_tasks(tasks, bm_pid); + if (ret) + goto out; } out: @@ -593,7 +586,8 @@ out: * * Return: 0 on success, < 0 on error. */ -int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource) +int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no, + const char *resource) { char controlgroup[1024], reason[128], schema[1024] = {}; int domain_id, fd, schema_len, ret = 0; @@ -611,7 +605,7 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resour goto out; } - if (strlen(ctrlgrp) != 0) + if (ctrlgrp) sprintf(controlgroup, "%s/%s/schemata", RESCTRL_PATH, ctrlgrp); else sprintf(controlgroup, "%s/schemata", RESCTRL_PATH); @@ -837,22 +831,21 @@ int filter_dmesg(void) return 0; } -int validate_bw_report_request(char *bw_report) +const char *get_bw_report_type(const char *bw_report) { if (strcmp(bw_report, "reads") == 0) - return 0; + return bw_report; if (strcmp(bw_report, "writes") == 0) - return 0; + return bw_report; if (strcmp(bw_report, "nt-writes") == 0) { - strcpy(bw_report, "writes"); - return 0; + return "writes"; } if (strcmp(bw_report, "total") == 0) - return 0; + return bw_report; - fprintf(stderr, "Requested iMC B/W report type unavailable\n"); + fprintf(stderr, "Requested iMC bandwidth report type unavailable\n"); - return -1; + return NULL; } int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c index 62397d5934f1..ed351a1cb917 100644 --- a/tools/testing/selftests/riscv/sigreturn/sigreturn.c +++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c @@ -51,7 +51,7 @@ static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *)) asm(".option push \n\ .option arch, +v \n\ - vsetivli x0, 1, e32, ta, ma \n\ + vsetivli x0, 1, e32, m1, ta, ma \n\ vmv.s.x v0, %1 \n\ # Generate SIGSEGV \n\ lw a0, 0(x0) \n\ diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c index 62fba7356af2..52d97fae4dbd 100644 --- a/tools/testing/selftests/sched/cs_prctl_test.c +++ b/tools/testing/selftests/sched/cs_prctl_test.c @@ -42,11 +42,11 @@ static pid_t gettid(void) #ifndef PR_SCHED_CORE #define PR_SCHED_CORE 62 -# define PR_SCHED_CORE_GET 0 -# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ -# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ -# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ -# define PR_SCHED_CORE_MAX 4 +#define PR_SCHED_CORE_GET 0 +#define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +#define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +#define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +#define PR_SCHED_CORE_MAX 4 #endif #define MAX_PROCESSES 128 diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index b83099160fbc..94886c82ae60 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -194,14 +194,14 @@ int main(int argc, char *argv[]) ksft_set_plan(7); ksft_print_msg("Running on:\n"); - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("uname -a"); ksft_print_msg("Current BPF sysctl settings:\n"); /* Avoid using "sysctl" which may not be installed. */ - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("grep -H . /proc/sys/net/core/bpf_jit_enable"); - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); affinity(); diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 783ebce8c4de..e3f97f90d8db 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3954,6 +3954,60 @@ TEST(user_notification_filter_empty) EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); } +TEST(user_ioctl_notification_filter_empty) +{ + pid_t pid; + long ret; + int status, p[2]; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + struct seccomp_notif req = {}; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + if (__NR_clone3 < 0) + SKIP(return, "Test not built with clone3 support"); + + ASSERT_EQ(0, pipe(p)); + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int listener; + + listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); + if (listener < 0) + _exit(EXIT_FAILURE); + + if (dup2(listener, 200) != 200) + _exit(EXIT_FAILURE); + close(p[1]); + close(listener); + sleep(1); + + _exit(EXIT_SUCCESS); + } + if (read(p[0], &status, 1) != 0) + _exit(EXIT_SUCCESS); + close(p[0]); + /* + * The seccomp filter has become unused so we should be notified once + * the kernel gets around to cleaning up task struct. + */ + EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1); + EXPECT_EQ(errno, ENOENT); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + static void *do_thread(void *data) { return NULL; @@ -4755,6 +4809,83 @@ TEST(user_notification_wait_killable_fatal) EXPECT_EQ(SIGTERM, WTERMSIG(status)); } +struct tsync_vs_thread_leader_args { + pthread_t leader; +}; + +static void *tsync_vs_dead_thread_leader_sibling(void *_args) +{ + struct sock_filter allow_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog allow_prog = { + .len = (unsigned short)ARRAY_SIZE(allow_filter), + .filter = allow_filter, + }; + struct tsync_vs_thread_leader_args *args = _args; + void *retval; + long ret; + + ret = pthread_join(args->leader, &retval); + if (ret) + exit(1); + if (retval != _args) + exit(2); + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog); + if (ret) + exit(3); + + exit(0); +} + +/* + * Ensure that a dead thread leader doesn't prevent installing new filters with + * SECCOMP_FILTER_FLAG_TSYNC from other threads. + */ +TEST(tsync_vs_dead_thread_leader) +{ + int status; + pid_t pid; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct sock_filter allow_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog allow_prog = { + .len = (unsigned short)ARRAY_SIZE(allow_filter), + .filter = allow_filter, + }; + struct tsync_vs_thread_leader_args *args; + pthread_t sibling; + + args = malloc(sizeof(*args)); + ASSERT_NE(NULL, args); + args->leader = pthread_self(); + + ret = pthread_create(&sibling, NULL, + tsync_vs_dead_thread_leader_sibling, args); + ASSERT_EQ(0, ret); + + /* Install a new filter just to the leader thread. */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); + ASSERT_EQ(0, ret); + pthread_exit(args); + exit(1); + } + + EXPECT_EQ(pid, waitpid(pid, &status, 0)); + EXPECT_EQ(0, status); +} + /* * TODO: * - expand NNP testing diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c index e40dc5be2f66..d12ff955de0d 100644 --- a/tools/testing/selftests/timens/exec.c +++ b/tools/testing/selftests/timens/exec.c @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec) > 5) + if (labs(tst.tv_sec - now.tv_sec) > 5) return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); } return 0; @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec) > 5) + if (labs(tst.tv_sec - now.tv_sec) > 5) return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); } @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) /* Check that a child process is in the new timens. */ for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5) + if (labs(tst.tv_sec - now.tv_sec - OFFSET) > 5) return pr_fail("%ld %ld\n", now.tv_sec + OFFSET, tst.tv_sec); } diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c index 5e7f0051bd7b..5b939f59dfa4 100644 --- a/tools/testing/selftests/timens/timer.c +++ b/tools/testing/selftests/timens/timer.c @@ -56,7 +56,7 @@ int run_test(int clockid, struct timespec now) return pr_perror("timerfd_gettime"); elapsed = new_value.it_value.tv_sec; - if (abs(elapsed - 3600) > 60) { + if (llabs(elapsed - 3600) > 60) { ksft_test_result_fail("clockid: %d elapsed: %lld\n", clockid, elapsed); return 1; diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c index 9edd43d6b2c1..a4196bbd6e33 100644 --- a/tools/testing/selftests/timens/timerfd.c +++ b/tools/testing/selftests/timens/timerfd.c @@ -61,7 +61,7 @@ int run_test(int clockid, struct timespec now) return pr_perror("timerfd_gettime(%d)", clockid); elapsed = new_value.it_value.tv_sec; - if (abs(elapsed - 3600) > 60) { + if (llabs(elapsed - 3600) > 60) { ksft_test_result_fail("clockid: %d elapsed: %lld\n", clockid, elapsed); return 1; diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c index beb7614941fb..5b8907bf451d 100644 --- a/tools/testing/selftests/timens/vfork_exec.c +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -32,7 +32,7 @@ static void *tcheck(void *_args) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now->tv_sec) > 5) { + if (labs(tst.tv_sec - now->tv_sec) > 5) { pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n", args->tst_name, tst.tv_sec, now->tv_sec); return (void *)1UL; @@ -64,7 +64,7 @@ static int check(char *tst_name, struct timespec *now) for (i = 0; i < 2; i++) { _gettime(CLOCK_MONOTONIC, &tst, i); - if (abs(tst.tv_sec - now->tv_sec) > 5) + if (labs(tst.tv_sec - now->tv_sec) > 5) return pr_fail("%s: unexpected value: %ld (%ld)\n", tst_name, tst.tv_sec, now->tv_sec); } diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c index 4ef2184f1558..7c07edd0d450 100644 --- a/tools/testing/selftests/timers/rtcpie.c +++ b/tools/testing/selftests/timers/rtcpie.c @@ -29,7 +29,7 @@ static const char default_rtc[] = "/dev/rtc0"; int main(int argc, char **argv) { - int i, fd, retval, irqcount = 0; + int i, fd, retval; unsigned long tmp, data, old_pie_rate; const char *rtc = default_rtc; struct timeval start, end, diff; @@ -120,7 +120,6 @@ int main(int argc, char **argv) fprintf(stderr, " %d",i); fflush(stderr); - irqcount++; } /* Disable periodic interrupts */ diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index d53a4d8008f9..98d8ba2afa00 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,35 +1,30 @@ # SPDX-License-Identifier: GPL-2.0 -include ../lib.mk - uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres +TEST_GEN_PROGS := vdso_test_gettimeofday +TEST_GEN_PROGS += vdso_test_getcpu +TEST_GEN_PROGS += vdso_test_abi +TEST_GEN_PROGS += vdso_test_clock_getres ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) -TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 +TEST_GEN_PROGS += vdso_standalone_test_x86 endif -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness +TEST_GEN_PROGS += vdso_test_correctness CFLAGS := -std=gnu99 -CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector -LDFLAGS_vdso_test_correctness := -ldl + ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s endif -all: $(TEST_GEN_PROGS) +include ../lib.mk $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c $(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c + $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c - $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ - vdso_standalone_test_x86.c parse_vdso.c \ - -o $@ +$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector + $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c - $(CC) $(CFLAGS) \ - vdso_test_correctness.c \ - -o $@ \ - $(LDFLAGS_vdso_test_correctness) +$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 413f75620a35..4ae417372e9e 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -55,14 +55,20 @@ static struct vdso_info ELF(Verdef) *verdef; } vdso_info; -/* Straight from the ELF specification. */ -static unsigned long elf_hash(const unsigned char *name) +/* + * Straight from the ELF specification...and then tweaked slightly, in order to + * avoid a few clang warnings. + */ +static unsigned long elf_hash(const char *name) { unsigned long h = 0, g; - while (*name) + const unsigned char *uch_name = (const unsigned char *)name; + + while (*uch_name) { - h = (h << 4) + *name++; - if (g = h & 0xf0000000) + h = (h << 4) + *uch_name++; + g = h & 0xf0000000; + if (g) h ^= g >> 24; h &= ~g; } diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 8a44ff973ee1..27f6fdf11969 100644 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -18,7 +18,7 @@ #include "parse_vdso.h" -/* We need a libc functions... */ +/* We need some libc functions... */ int strcmp(const char *a, const char *b) { /* This implementation is buggy: it never returns -1. */ @@ -34,6 +34,20 @@ int strcmp(const char *a, const char *b) return 0; } +/* + * The clang build needs this, although gcc does not. + * Stolen from lib/string.c. + */ +void *memcpy(void *dest, const void *src, size_t count) +{ + char *tmp = dest; + const char *s = src; + + while (count--) + *tmp++ = *s++; + return dest; +} + /* ...and two syscalls. This is x86-specific. */ static inline long x86_syscall3(long nr, long a0, long a1, long a2) { @@ -70,7 +84,7 @@ void to_base10(char *lastdig, time_t n) } } -__attribute__((externally_visible)) void c_main(void **stack) +void c_main(void **stack) { /* Parse the stack */ long argc = (long)*stack; diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index e95bd56b332f..35856b11c143 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -109,9 +109,9 @@ KERNEL_ARCH := x86_64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) -QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off else -QEMU_MACHINE := -cpu max -machine microvm -no-acpi +QEMU_MACHINE := -cpu max -machine microvm,acpi=off endif else ifeq ($(ARCH),i686) CHOST := i686-linux-musl @@ -120,9 +120,9 @@ KERNEL_ARCH := x86 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) -QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off else -QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi +QEMU_MACHINE := -cpu coreduo -machine microvm,acpi=off endif else ifeq ($(ARCH),mips64) CHOST := mips64-linux-musl diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 0b872c0a42d2..5c8757a25998 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -40,6 +40,13 @@ CFLAGS := -O2 -g -std=gnu99 -pthread -Wall $(KHDR_INCLUDES) # call32_from_64 in thunks.S uses absolute addresses. ifeq ($(CAN_BUILD_WITH_NOPIE),1) CFLAGS += -no-pie + +ifneq ($(LLVM),) +# clang only wants to see -no-pie during linking. Here, we don't have a separate +# linking stage, so a compiler warning is unavoidable without (wastefully) +# restructuring the Makefile. Avoid this by simply disabling that warning. +CFLAGS += -Wno-unused-command-line-argument +endif endif define gen-target-rule-32 @@ -73,10 +80,10 @@ all_64: $(BINARIES_64) EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) $(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h - $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm + $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl -lm $(BINARIES_64): $(OUTPUT)/%_64: %.c helpers.h - $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl # x86_64 users should be encouraged to install 32-bit libraries ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01) @@ -100,10 +107,22 @@ warn_32bit_failure: exit 0; endif -# Some tests have additional dependencies. -$(OUTPUT)/sysret_ss_attrs_64: thunks.S -$(OUTPUT)/ptrace_syscall_32: raw_syscall_helper_32.S -$(OUTPUT)/test_syscall_vdso_32: thunks_32.S +# Add an additional file to the source file list for a given target, and also +# add a Makefile dependency on that same file. However, do these separately, so +# that the compiler invocation ("$(CC) file1.c file2.S") is not combined with +# the dependencies ("header3.h"), because clang, unlike gcc, will not accept +# header files as an input to the compiler invocation. +define extra-files +$(OUTPUT)/$(1): EXTRA_FILES := $(2) +$(OUTPUT)/$(1): $(2) +endef + +$(eval $(call extra-files,sysret_ss_attrs_64,thunks.S)) +$(eval $(call extra-files,ptrace_syscall_32,raw_syscall_helper_32.S)) +$(eval $(call extra-files,test_syscall_vdso_32,thunks_32.S)) +$(eval $(call extra-files,fsgsbase_restore_64,clang_helpers_64.S)) +$(eval $(call extra-files,fsgsbase_restore_32,clang_helpers_32.S)) +$(eval $(call extra-files,sysret_rip_64,clang_helpers_64.S)) # check_initial_reg_state is special: it needs a custom entry, and it # needs to be static so that its interpreter doesn't destroy its initial diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index 95aad6d8849b..1fdf35a4d7f6 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -39,16 +39,6 @@ struct xsave_buffer { }; }; -static inline uint64_t xgetbv(uint32_t index) -{ - uint32_t eax, edx; - - asm volatile("xgetbv;" - : "=a" (eax), "=d" (edx) - : "c" (index)); - return eax + ((uint64_t)edx << 32); -} - static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm) { uint32_t rfbm_lo = rfbm; @@ -164,12 +154,6 @@ static inline void clear_xstate_header(struct xsave_buffer *buffer) memset(&buffer->header, 0, sizeof(buffer->header)); } -static inline uint64_t get_xstatebv(struct xsave_buffer *buffer) -{ - /* XSTATE_BV is at the beginning of the header: */ - return *(uint64_t *)&buffer->header; -} - static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv) { /* XSTATE_BV is at the beginning of the header: */ diff --git a/tools/testing/selftests/x86/clang_helpers_32.S b/tools/testing/selftests/x86/clang_helpers_32.S new file mode 100644 index 000000000000..dc16271bac70 --- /dev/null +++ b/tools/testing/selftests/x86/clang_helpers_32.S @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * 32-bit assembly helpers for asm operations that lack support in both gcc and + * clang. For example, clang asm does not support segment prefixes. + */ +.global dereference_seg_base +dereference_seg_base: + mov %fs:(0), %eax + ret + +.section .note.GNU-stack,"",%progbits diff --git a/tools/testing/selftests/x86/clang_helpers_64.S b/tools/testing/selftests/x86/clang_helpers_64.S new file mode 100644 index 000000000000..185a69dbf39c --- /dev/null +++ b/tools/testing/selftests/x86/clang_helpers_64.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * 64-bit assembly helpers for asm operations that lack support in both gcc and + * clang. For example, clang asm does not support segment prefixes. + */ +.global dereference_seg_base + +dereference_seg_base: + mov %gs:(0), %rax + ret + +.global test_page +.global test_syscall_insn + +.pushsection ".text", "ax" +.balign 4096 +test_page: .globl test_page + .fill 4094,1,0xcc + +test_syscall_insn: + syscall + +.ifne . - test_page - 4096 + .error "test page is not one page long" +.endif +.popsection + +.section .note.GNU-stack,"",%progbits diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 8c780cce941d..50cf32de6313 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -109,11 +109,6 @@ static inline void wrgsbase(unsigned long gsbase) asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory"); } -static inline void wrfsbase(unsigned long fsbase) -{ - asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory"); -} - enum which_base { FS, GS }; static unsigned long read_base(enum which_base which) @@ -212,7 +207,6 @@ static void mov_0_gs(unsigned long initial_base, bool schedule) } static volatile unsigned long remote_base; -static volatile bool remote_hard_zero; static volatile unsigned int ftx; /* diff --git a/tools/testing/selftests/x86/fsgsbase_restore.c b/tools/testing/selftests/x86/fsgsbase_restore.c index 6fffadc51579..224058c1e4b2 100644 --- a/tools/testing/selftests/x86/fsgsbase_restore.c +++ b/tools/testing/selftests/x86/fsgsbase_restore.c @@ -39,12 +39,11 @@ # define SEG "%fs" #endif -static unsigned int dereference_seg_base(void) -{ - int ret; - asm volatile ("mov %" SEG ":(0), %0" : "=rm" (ret)); - return ret; -} +/* + * Defined in clang_helpers_[32|64].S, because unlike gcc, clang inline asm does + * not support segmentation prefixes. + */ +unsigned int dereference_seg_base(void); static void init_seg(void) { diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 5d7961a5f7f6..0b75b29f794b 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -487,7 +487,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX]; if (asm_ss != sig_ss && sig == SIGTRAP) { /* Sanity check failure. */ - printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", + printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %x, ax = %llx\n", ss, *ssptr(ctx), (unsigned long long)asm_ss); nerrs++; } diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c index 461fa41a4d02..48ab065a76f9 100644 --- a/tools/testing/selftests/x86/syscall_arg_fault.c +++ b/tools/testing/selftests/x86/syscall_arg_fault.c @@ -29,7 +29,6 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), err(1, "sigaction"); } -static volatile sig_atomic_t sig_traps; static sigjmp_buf jmpbuf; static volatile sig_atomic_t n_errs; diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c index 84d74be1d902..b30de9aaa6d4 100644 --- a/tools/testing/selftests/x86/sysret_rip.c +++ b/tools/testing/selftests/x86/sysret_rip.c @@ -22,21 +22,13 @@ #include <sys/mman.h> #include <assert.h> - -asm ( - ".pushsection \".text\", \"ax\"\n\t" - ".balign 4096\n\t" - "test_page: .globl test_page\n\t" - ".fill 4094,1,0xcc\n\t" - "test_syscall_insn:\n\t" - "syscall\n\t" - ".ifne . - test_page - 4096\n\t" - ".error \"test page is not one page long\"\n\t" - ".endif\n\t" - ".popsection" - ); - +/* + * These items are in clang_helpers_64.S, in order to avoid clang inline asm + * limitations: + */ +void test_syscall_ins(void); extern const char test_page[]; + static void const *current_test_page_addr = test_page; static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), diff --git a/tools/testing/selftests/x86/test_FISTTP.c b/tools/testing/selftests/x86/test_FISTTP.c index 09789c0ce3e9..b9ae9d8cebcb 100644 --- a/tools/testing/selftests/x86/test_FISTTP.c +++ b/tools/testing/selftests/x86/test_FISTTP.c @@ -25,7 +25,7 @@ int test(void) feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); asm volatile ("\n" " fld1""\n" - " fisttp res16""\n" + " fisttps res16""\n" " fld1""\n" " fisttpl res32""\n" " fld1""\n" @@ -45,7 +45,7 @@ int test(void) feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); asm volatile ("\n" " fldpi""\n" - " fisttp res16""\n" + " fisttps res16""\n" " fldpi""\n" " fisttpl res32""\n" " fldpi""\n" @@ -66,7 +66,7 @@ int test(void) asm volatile ("\n" " fldpi""\n" " fchs""\n" - " fisttp res16""\n" + " fisttps res16""\n" " fldpi""\n" " fchs""\n" " fisttpl res32""\n" @@ -88,7 +88,7 @@ int test(void) feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); asm volatile ("\n" " fldln2""\n" - " fisttp res16""\n" + " fisttps res16""\n" " fldln2""\n" " fisttpl res32""\n" " fldln2""\n" diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index d4c8e8d79d38..6de11b4df458 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -97,11 +97,6 @@ static inline long sys_gtod(struct timeval *tv, struct timezone *tz) return syscall(SYS_gettimeofday, tv, tz); } -static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) -{ - return syscall(SYS_clock_gettime, id, ts); -} - static inline long sys_time(time_t *t) { return syscall(SYS_time, t); @@ -252,7 +247,7 @@ static void test_getcpu(int cpu) if (ret_sys == 0) { if (cpu_sys != cpu) - ksft_print_msg("syscall reported CPU %hu but should be %d\n", + ksft_print_msg("syscall reported CPU %u but should be %d\n", cpu_sys, cpu); have_node = true; @@ -270,10 +265,10 @@ static void test_getcpu(int cpu) if (cpu_vdso != cpu || node_vdso != node) { if (cpu_vdso != cpu) - ksft_print_msg("vDSO reported CPU %hu but should be %d\n", + ksft_print_msg("vDSO reported CPU %u but should be %d\n", cpu_vdso, cpu); if (node_vdso != node) - ksft_print_msg("vDSO reported node %hu but should be %hu\n", + ksft_print_msg("vDSO reported node %u but should be %u\n", node_vdso, node); ksft_test_result_fail("Wrong values\n"); } else { @@ -295,10 +290,10 @@ static void test_getcpu(int cpu) if (cpu_vsys != cpu || node_vsys != node) { if (cpu_vsys != cpu) - ksft_print_msg("vsyscall reported CPU %hu but should be %d\n", + ksft_print_msg("vsyscall reported CPU %u but should be %d\n", cpu_vsys, cpu); if (node_vsys != node) - ksft_print_msg("vsyscall reported node %hu but should be %hu\n", + ksft_print_msg("vsyscall reported node %u but should be %u\n", node_vsys, node); ksft_test_result_fail("Wrong values\n"); } else { diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c index fe99f2434155..ac8d8e1e9805 100644 --- a/tools/testing/selftests/x86/vdso_restorer.c +++ b/tools/testing/selftests/x86/vdso_restorer.c @@ -92,4 +92,6 @@ int main() printf("[FAIL]\t!SA_SIGINFO handler was not called\n"); nerrs++; } + + return nerrs; } diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index a7f56a09ca9f..6e0b4e95e230 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -13,3 +13,16 @@ CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-p clean: ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf vsock_uring_test -include *.d + +VSOCK_INSTALL_PATH ?= + +install: all +ifdef VSOCK_INSTALL_PATH + mkdir -p $(VSOCK_INSTALL_PATH) + install -m 744 vsock_test $(VSOCK_INSTALL_PATH) + install -m 744 vsock_perf $(VSOCK_INSTALL_PATH) + install -m 744 vsock_diag_test $(VSOCK_INSTALL_PATH) + install -m 744 vsock_uring_test $(VSOCK_INSTALL_PATH) +else + $(error Error: set VSOCK_INSTALL_PATH to use install) +endif |