diff options
Diffstat (limited to 'tools')
260 files changed, 4417 insertions, 1308 deletions
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 3ae84c3b8e6d..3deb6c11f134 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -612,6 +612,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 1b25c0a95d3f..40a9e59c2fd5 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE -#include <stdio.h> +#include <err.h> +#include <getopt.h> #include <stdbool.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> -#include <getopt.h> #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define min(a, b) (((a) < (b)) ? (a) : (b)) @@ -145,14 +146,14 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, if (!func->leafs) { func->leafs = malloc(sizeof(struct subleaf)); if (!func->leafs) - perror("malloc func leaf"); + err(EXIT_FAILURE, NULL); func->nr = 1; } else { s = func->nr; func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf)); if (!func->leafs) - perror("realloc f->leafs"); + err(EXIT_FAILURE, NULL); func->nr++; } @@ -211,7 +212,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) range = malloc(sizeof(struct cpuid_range)); if (!range) - perror("malloc range"); + err(EXIT_FAILURE, NULL); if (input_eax & 0x80000000) range->is_ext = true; @@ -220,7 +221,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); if (!range->funcs) - perror("malloc range->funcs"); + err(EXIT_FAILURE, NULL); range->nr = idx_func; memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func); @@ -395,8 +396,8 @@ static int parse_line(char *line) return 0; err_exit: - printf("Warning: wrong line format:\n"); - printf("\tline[%d]: %s\n", flines, line); + warnx("Wrong line format:\n" + "\tline[%d]: %s", flines, line); return -1; } @@ -418,10 +419,8 @@ static void parse_text(void) file = fopen("./cpuid.csv", "r"); } - if (!file) { - printf("Fail to open '%s'\n", filename); - return; - } + if (!file) + err(EXIT_FAILURE, "%s", filename); while (1) { ret = getline(&line, &len, file); @@ -530,7 +529,7 @@ static inline struct cpuid_func *index_to_func(u32 index) func_idx = index & 0xffff; if ((func_idx + 1) > (u32)range->nr) { - printf("ERR: invalid input index (0x%x)\n", index); + warnx("Invalid input index (0x%x)", index); return NULL; } return &range->funcs[func_idx]; @@ -562,7 +561,7 @@ static void show_info(void) return; } - printf("ERR: invalid input subleaf (0x%x)\n", user_sub); + warnx("Invalid input subleaf (0x%x)", user_sub); } show_func(func); @@ -593,15 +592,15 @@ static void setup_platform_cpuid(void) static void usage(void) { - printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" - "\t-a|--all Show both bit flags and complex bit fields info\n" - "\t-b|--bitflags Show boolean flags only\n" - "\t-d|--detail Show details of the flag/fields (default)\n" - "\t-f|--flags Specify the cpuid csv file\n" - "\t-h|--help Show usage info\n" - "\t-l|--leaf=index Specify the leaf you want to check\n" - "\t-r|--raw Show raw cpuid data\n" - "\t-s|--subleaf=sub Specify the subleaf you want to check\n" + warnx("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" + "\t-a|--all Show both bit flags and complex bit fields info\n" + "\t-b|--bitflags Show boolean flags only\n" + "\t-d|--detail Show details of the flag/fields (default)\n" + "\t-f|--flags Specify the CPUID CSV file\n" + "\t-h|--help Show usage info\n" + "\t-l|--leaf=index Specify the leaf you want to check\n" + "\t-r|--raw Show raw CPUID data\n" + "\t-s|--subleaf=sub Specify the subleaf you want to check" ); } @@ -652,7 +651,7 @@ static int parse_options(int argc, char *argv[]) user_sub = strtoul(optarg, NULL, 0); break; default: - printf("%s: Invalid option '%c'\n", argv[0], optopt); + warnx("Invalid option '%c'", optopt); return -1; } diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index ab5cdc3337da..e91d4c4e1c16 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -13,7 +13,7 @@ #endif #include "../include/asm/inat.h" /* __ignore_sync_check__ */ #include "../include/asm/insn.h" /* __ignore_sync_check__ */ -#include "../include/linux/unaligned.h" /* __ignore_sync_check__ */ +#include <linux/unaligned.h> /* __ignore_sync_check__ */ #include <linux/errno.h> #include <linux/kconfig.h> diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688..cd3fd5155f6e 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -35,7 +35,7 @@ # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. # -# REX2 Prefix +# REX2 Prefix Superscripts # - (!REX2): REX2 is not allowed # - (REX2): REX2 variant e.g. JMPABS @@ -286,10 +286,10 @@ df: ESC # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. -e0: LOOPNE/LOOPNZ Jb (f64) (!REX2) -e1: LOOPE/LOOPZ Jb (f64) (!REX2) -e2: LOOP Jb (f64) (!REX2) -e3: JrCXZ Jb (f64) (!REX2) +e0: LOOPNE/LOOPNZ Jb (f64),(!REX2) +e1: LOOPE/LOOPZ Jb (f64),(!REX2) +e2: LOOP Jb (f64),(!REX2) +e3: JrCXZ Jb (f64),(!REX2) e4: IN AL,Ib (!REX2) e5: IN eAX,Ib (!REX2) e6: OUT Ib,AL (!REX2) @@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2) # in "near" jumps and calls is 16-bit. For CALL, # push of return address is 16-bit wide, RSP is decremented by 2 # but is not truncated to 16 bits, unlike RIP. -e8: CALL Jz (f64) (!REX2) -e9: JMP-near Jz (f64) (!REX2) -ea: JMP-far Ap (i64) (!REX2) -eb: JMP-short Jb (f64) (!REX2) +e8: CALL Jz (f64),(!REX2) +e9: JMP-near Jz (f64),(!REX2) +ea: JMP-far Ap (i64),(!REX2) +eb: JMP-short Jb (f64),(!REX2) ec: IN AL,DX (!REX2) ed: IN eAX,DX (!REX2) ee: OUT DX,AL (!REX2) @@ -478,22 +478,22 @@ AVXcode: 1 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). -80: JO Jz (f64) (!REX2) -81: JNO Jz (f64) (!REX2) -82: JB/JC/JNAE Jz (f64) (!REX2) -83: JAE/JNB/JNC Jz (f64) (!REX2) -84: JE/JZ Jz (f64) (!REX2) -85: JNE/JNZ Jz (f64) (!REX2) -86: JBE/JNA Jz (f64) (!REX2) -87: JA/JNBE Jz (f64) (!REX2) -88: JS Jz (f64) (!REX2) -89: JNS Jz (f64) (!REX2) -8a: JP/JPE Jz (f64) (!REX2) -8b: JNP/JPO Jz (f64) (!REX2) -8c: JL/JNGE Jz (f64) (!REX2) -8d: JNL/JGE Jz (f64) (!REX2) -8e: JLE/JNG Jz (f64) (!REX2) -8f: JNLE/JG Jz (f64) (!REX2) +80: JO Jz (f64),(!REX2) +81: JNO Jz (f64),(!REX2) +82: JB/JC/JNAE Jz (f64),(!REX2) +83: JAE/JNB/JNC Jz (f64),(!REX2) +84: JE/JZ Jz (f64),(!REX2) +85: JNE/JNZ Jz (f64),(!REX2) +86: JBE/JNA Jz (f64),(!REX2) +87: JA/JNBE Jz (f64),(!REX2) +88: JS Jz (f64),(!REX2) +89: JNS Jz (f64),(!REX2) +8a: JP/JPE Jz (f64),(!REX2) +8b: JNP/JPO Jz (f64),(!REX2) +8c: JL/JNGE Jz (f64),(!REX2) +8d: JNL/JGE Jz (f64),(!REX2) +8e: JLE/JNG Jz (f64),(!REX2) +8f: JNLE/JG Jz (f64),(!REX2) # 0x0f 0x90-0x9f 90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) 91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 156b62a163c5..8a48cc2536f5 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -226,7 +226,7 @@ static int load_xbc_from_initrd(int fd, char **buf) /* Wrong Checksum */ rcsum = xbc_calc_checksum(*buf, size); if (csum != rcsum) { - pr_err("checksum error: %d != %d\n", csum, rcsum); + pr_err("checksum error: %u != %u\n", csum, rcsum); return -EINVAL; } @@ -395,7 +395,7 @@ static int apply_xbc(const char *path, const char *xbc_path) xbc_get_info(&ret, NULL); printf("\tNumber of nodes: %d\n", ret); printf("\tSize: %u bytes\n", (unsigned int)size); - printf("\tChecksum: %d\n", (unsigned int)csum); + printf("\tChecksum: %u\n", (unsigned int)csum); /* TODO: Check the options by schema */ xbc_exit(); diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 9af426d43299..4189c9d74fb0 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -221,7 +221,7 @@ static int cgroup_has_attached_progs(int cgroup_fd) for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) { int count = count_attached_bpf_progs(cgroup_fd, cgroup_attach_types[i]); - if (count < 0) + if (count < 0 && errno != EINVAL) return -1; if (count > 0) { @@ -318,11 +318,11 @@ static int show_bpf_progs(int cgroup_fd, enum bpf_attach_type type, static int do_show(int argc, char **argv) { - enum bpf_attach_type type; int has_attached_progs; const char *path; int cgroup_fd; int ret = -1; + unsigned int i; query_flags = 0; @@ -370,14 +370,14 @@ static int do_show(int argc, char **argv) "AttachFlags", "Name"); btf_vmlinux = libbpf_find_kernel_btf(); - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) { /* * Not all attach types may be supported, so it's expected, * that some requests will fail. * If we were able to get the show for at least one * attach type, let's return 0. */ - if (show_bpf_progs(cgroup_fd, type, 0) == 0) + if (show_bpf_progs(cgroup_fd, cgroup_attach_types[i], 0) == 0) ret = 0; } @@ -400,9 +400,9 @@ exit: static int do_show_tree_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftw) { - enum bpf_attach_type type; int has_attached_progs; int cgroup_fd; + unsigned int i; if (typeflag != FTW_D) return 0; @@ -434,8 +434,8 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, } btf_vmlinux = libbpf_find_kernel_btf(); - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) - show_bpf_progs(cgroup_fd, type, ftw->level); + for (i = 0; i < ARRAY_SIZE(cgroup_attach_types); i++) + show_bpf_progs(cgroup_fd, cgroup_attach_types[i], ftw->level); if (errno == EINVAL) /* Last attach type does not support query. diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 9b75639434b8..0a764426d935 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -461,10 +461,11 @@ int get_fd_type(int fd) p_err("can't read link type: %s", strerror(errno)); return -1; } - if (n == sizeof(path)) { + if (n == sizeof(buf)) { p_err("can't read link type: path too long!"); return -1; } + buf[n] = '\0'; if (strstr(buf, "bpf-map")) return BPF_OBJ_MAP; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 08d0ac543c67..a0536528dfde 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -534,9 +534,9 @@ int main(int argc, char **argv) usage(); if (version_requested) - return do_version(argc, argv); - - ret = cmd_select(commands, argc, argv, do_help); + ret = do_version(argc, argv); + else + ret = cmd_select(commands, argc, argv, do_help); if (json_output) jsonw_destroy(&json_wtr); diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index d2242d9f8441..39f208928cdb 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -366,17 +366,18 @@ static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_netdev_t *netinfo = cookie; struct ifinfomsg *ifinfo = msg; + struct ip_devname_ifindex *tmp; if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index) return 0; if (netinfo->used_len == netinfo->array_len) { - netinfo->devices = realloc(netinfo->devices, - (netinfo->array_len + 16) * - sizeof(struct ip_devname_ifindex)); - if (!netinfo->devices) + tmp = realloc(netinfo->devices, + (netinfo->array_len + 16) * sizeof(struct ip_devname_ifindex)); + if (!tmp) return -ENOMEM; + netinfo->devices = tmp; netinfo->array_len += 16; } netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index; @@ -395,6 +396,7 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_tcinfo_t *tcinfo = cookie; struct tcmsg *info = msg; + struct tc_kind_handle *tmp; if (tcinfo->is_qdisc) { /* skip clsact qdisc */ @@ -406,11 +408,12 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) } if (tcinfo->used_len == tcinfo->array_len) { - tcinfo->handle_array = realloc(tcinfo->handle_array, + tmp = realloc(tcinfo->handle_array, (tcinfo->array_len + 16) * sizeof(struct tc_kind_handle)); - if (!tcinfo->handle_array) + if (!tmp) return -ENOMEM; + tcinfo->handle_array = tmp; tcinfo->array_len += 16; } tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index e71be67f1d86..52ffb74ae4e8 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1928,6 +1928,7 @@ static int do_loader(int argc, char **argv) obj = bpf_object__open_file(file, &open_opts); if (!obj) { + err = -1; p_err("failed to open object file"); goto err_close_obj; } diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 4b8079f294f6..b0072e64b010 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -19,7 +19,7 @@ endif # Overrides for the prepare step libraries. HOST_OVERRIDES := AR="$(HOSTAR)" CC="$(HOSTCC)" LD="$(HOSTLD)" ARCH="$(HOSTARCH)" \ - CROSS_COMPILE="" EXTRA_CFLAGS="$(HOSTCFLAGS)" + CROSS_COMPILE="" CLANG_CROSS_FLAGS="" EXTRA_CFLAGS="$(HOSTCFLAGS)" RM ?= rm HOSTCC ?= gcc diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index 5fb3fb3d97e0..ffe988867703 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -149,6 +149,10 @@ objprefix := $(subst ./,,$(OUTPUT)$(dir)/) obj-y := $(addprefix $(objprefix),$(obj-y)) subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y)) +# Separate out test log files from real build objects. +test-y := $(filter %_log, $(obj-y)) +obj-y := $(filter-out %_log, $(obj-y)) + # Final '$(obj)-in.o' object in-target := $(objprefix)$(obj)-in.o @@ -159,7 +163,7 @@ $(subdir-y): $(sort $(subdir-obj-y)): $(subdir-y) ; -$(in-target): $(obj-y) FORCE +$(in-target): $(obj-y) $(test-y) FORCE $(call rule_mkdir) $(call if_changed,$(host)ld_multi) diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index 270c28a0d098..6bf4bde77903 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -146,11 +146,11 @@ def detect_kernel_config(): def for_each_slab(prog): - PGSlab = ~prog.constant('PG_slab') + slabtype = prog.constant('PGTY_slab') for page in for_each_page(prog): try: - if page.page_type.value_() == PGSlab: + if (page.page_type.value_() >> 24) == slabtype: yield cast('struct slab *', page) except FaultError: pass diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c index 12743d7f164f..e68a824d67b2 100644 --- a/tools/hv/hv_fcopy_uio_daemon.c +++ b/tools/hv/hv_fcopy_uio_daemon.c @@ -35,7 +35,10 @@ #define WIN8_SRV_MINOR 1 #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) -#define FCOPY_UIO "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/uio" +#define FCOPY_DEVICE_PATH(subdir) \ + "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/" #subdir +#define FCOPY_UIO_PATH FCOPY_DEVICE_PATH(uio) +#define FCOPY_CHANNELS_PATH FCOPY_DEVICE_PATH(channels) #define FCOPY_VER_COUNT 1 static const int fcopy_versions[] = { @@ -47,9 +50,62 @@ static const int fw_versions[] = { UTIL_FW_VERSION }; -#define HV_RING_SIZE 0x4000 /* 16KB ring buffer size */ +static uint32_t get_ring_buffer_size(void) +{ + char ring_path[PATH_MAX]; + DIR *dir; + struct dirent *entry; + struct stat st; + uint32_t ring_size = 0; + int retry_count = 0; + + /* Find the channel directory */ + dir = opendir(FCOPY_CHANNELS_PATH); + if (!dir) { + usleep(100 * 1000); /* Avoid race with kernel, wait 100ms and retry once */ + dir = opendir(FCOPY_CHANNELS_PATH); + if (!dir) { + syslog(LOG_ERR, "Failed to open channels directory: %s", strerror(errno)); + return 0; + } + } + +retry_once: + while ((entry = readdir(dir)) != NULL) { + if (entry->d_type == DT_DIR && strcmp(entry->d_name, ".") != 0 && + strcmp(entry->d_name, "..") != 0) { + snprintf(ring_path, sizeof(ring_path), "%s/%s/ring", + FCOPY_CHANNELS_PATH, entry->d_name); + + if (stat(ring_path, &st) == 0) { + /* + * stat returns size of Tx, Rx rings combined, + * so take half of it for individual ring size. + */ + ring_size = (uint32_t)st.st_size / 2; + syslog(LOG_INFO, "Ring buffer size from %s: %u bytes", + ring_path, ring_size); + break; + } + } + } + + if (!ring_size && retry_count == 0) { + retry_count = 1; + rewinddir(dir); + usleep(100 * 1000); /* Wait 100ms and retry once */ + goto retry_once; + } + + closedir(dir); -static unsigned char desc[HV_RING_SIZE]; + if (!ring_size) + syslog(LOG_ERR, "Could not determine ring size"); + + return ring_size; +} + +static unsigned char *desc; static int target_fd; static char target_fname[PATH_MAX]; @@ -62,8 +118,11 @@ static int hv_fcopy_create_file(char *file_name, char *path_name, __u32 flags) filesize = 0; p = path_name; - snprintf(target_fname, sizeof(target_fname), "%s/%s", - path_name, file_name); + if (snprintf(target_fname, sizeof(target_fname), "%s/%s", + path_name, file_name) >= sizeof(target_fname)) { + syslog(LOG_ERR, "target file name is too long: %s/%s", path_name, file_name); + goto done; + } /* * Check to see if the path is already in place; if not, @@ -270,7 +329,7 @@ static void wcstoutf8(char *dest, const __u16 *src, size_t dest_size) { size_t len = 0; - while (len < dest_size) { + while (len < dest_size && *src) { if (src[len] < 0x80) dest[len++] = (char)(*src++); else @@ -282,27 +341,15 @@ static void wcstoutf8(char *dest, const __u16 *src, size_t dest_size) static int hv_fcopy_start(struct hv_start_fcopy *smsg_in) { - setlocale(LC_ALL, "en_US.utf8"); - size_t file_size, path_size; - char *file_name, *path_name; - char *in_file_name = (char *)smsg_in->file_name; - char *in_path_name = (char *)smsg_in->path_name; - - file_size = wcstombs(NULL, (const wchar_t *restrict)in_file_name, 0) + 1; - path_size = wcstombs(NULL, (const wchar_t *restrict)in_path_name, 0) + 1; - - file_name = (char *)malloc(file_size * sizeof(char)); - path_name = (char *)malloc(path_size * sizeof(char)); - - if (!file_name || !path_name) { - free(file_name); - free(path_name); - syslog(LOG_ERR, "Can't allocate memory for file name and/or path name"); - return HV_E_FAIL; - } + /* + * file_name and path_name should have same length with appropriate + * member of hv_start_fcopy. + */ + char file_name[W_MAX_PATH], path_name[W_MAX_PATH]; - wcstoutf8(file_name, (__u16 *)in_file_name, file_size); - wcstoutf8(path_name, (__u16 *)in_path_name, path_size); + setlocale(LC_ALL, "en_US.utf8"); + wcstoutf8(file_name, smsg_in->file_name, W_MAX_PATH - 1); + wcstoutf8(path_name, smsg_in->path_name, W_MAX_PATH - 1); return hv_fcopy_create_file(file_name, path_name, smsg_in->copy_flags); } @@ -406,7 +453,7 @@ int main(int argc, char *argv[]) int daemonize = 1, long_index = 0, opt, ret = -EINVAL; struct vmbus_br txbr, rxbr; void *ring; - uint32_t len = HV_RING_SIZE; + uint32_t ring_size, len; char uio_name[NAME_MAX] = {0}; char uio_dev_path[PATH_MAX] = {0}; @@ -437,7 +484,20 @@ int main(int argc, char *argv[]) openlog("HV_UIO_FCOPY", 0, LOG_USER); syslog(LOG_INFO, "starting; pid is:%d", getpid()); - fcopy_get_first_folder(FCOPY_UIO, uio_name); + ring_size = get_ring_buffer_size(); + if (!ring_size) { + ret = -ENODEV; + goto exit; + } + + desc = malloc(ring_size * sizeof(unsigned char)); + if (!desc) { + syslog(LOG_ERR, "malloc failed for desc buffer"); + ret = -ENOMEM; + goto exit; + } + + fcopy_get_first_folder(FCOPY_UIO_PATH, uio_name); snprintf(uio_dev_path, sizeof(uio_dev_path), "/dev/%s", uio_name); fcopy_fd = open(uio_dev_path, O_RDWR); @@ -445,17 +505,17 @@ int main(int argc, char *argv[]) syslog(LOG_ERR, "open %s failed; error: %d %s", uio_dev_path, errno, strerror(errno)); ret = fcopy_fd; - goto exit; + goto free_desc; } - ring = vmbus_uio_map(&fcopy_fd, HV_RING_SIZE); + ring = vmbus_uio_map(&fcopy_fd, ring_size); if (!ring) { ret = errno; syslog(LOG_ERR, "mmap ringbuffer failed; error: %d %s", ret, strerror(ret)); goto close; } - vmbus_br_setup(&txbr, ring, HV_RING_SIZE); - vmbus_br_setup(&rxbr, (char *)ring + HV_RING_SIZE, HV_RING_SIZE); + vmbus_br_setup(&txbr, ring, ring_size); + vmbus_br_setup(&rxbr, (char *)ring + ring_size, ring_size); rxbr.vbr->imask = 0; @@ -470,7 +530,7 @@ int main(int argc, char *argv[]) continue; } - len = HV_RING_SIZE; + len = ring_size; ret = rte_vmbus_chan_recv_raw(&rxbr, desc, &len); if (unlikely(ret <= 0)) { /* This indicates a failure to communicate (or worse) */ @@ -490,6 +550,8 @@ int main(int argc, char *argv[]) } close: close(fcopy_fd); +free_desc: + free(desc); exit: return ret; } diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 5a37ccbec54f..f61a01dd7eb7 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, return NULL; } +#ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #include <stdlib.h> static inline void print_ip_sym(const char *loglvl, unsigned long ip) @@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip) free(name); } +#else +static inline void print_ip_sym(const char *loglvl, unsigned long ip) {} +#endif #endif diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h index 933bc0be7e1c..a9d8b5b51f37 100644 --- a/tools/include/nolibc/std.h +++ b/tools/include/nolibc/std.h @@ -20,6 +20,8 @@ #include "stdint.h" +#include <linux/types.h> + /* those are commonly provided by sys/types.h */ typedef unsigned int dev_t; typedef unsigned long ino_t; @@ -31,6 +33,6 @@ typedef unsigned long nlink_t; typedef signed long off_t; typedef signed long blksize_t; typedef signed long blkcnt_t; -typedef signed long time_t; +typedef __kernel_old_time_t time_t; #endif /* _NOLIBC_STD_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index b26a5d0c417c..9d606c7138a8 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -127,7 +127,7 @@ typedef struct { int __fd = (fd); \ if (__fd >= 0) \ __set->fds[__fd / FD_SETIDXMASK] &= \ - ~(1U << (__fd & FX_SETBITMASK)); \ + ~(1U << (__fd & FD_SETBITMASK)); \ } while (0) #define FD_SET(fd, set) do { \ @@ -144,7 +144,7 @@ typedef struct { int __r = 0; \ if (__fd >= 0) \ __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ -1U << (__fd & FD_SET_BITMASK)); \ +1U << (__fd & FD_SETBITMASK)); \ __r; \ }) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4a939c90dc2e..5a5cdb453935 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1206,6 +1206,7 @@ enum bpf_perf_event_type { #define BPF_F_BEFORE (1U << 3) #define BPF_F_AFTER (1U << 4) #define BPF_F_ID (1U << 5) +#define BPF_F_PREORDER (1U << 6) #define BPF_F_LINK BPF_F_LINK /* 1 << 13 */ /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the @@ -2034,6 +2035,7 @@ union bpf_attr { * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates * the checksum is to be computed against a pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -6048,6 +6050,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 2f082b01ff22..42ec5ddaab8d 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -117,12 +117,12 @@ struct xdp_options { ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) /* Request transmit timestamp. Upon completion, put it into tx_timestamp - * field of union xsk_tx_metadata. + * field of struct xsk_tx_metadata. */ #define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0) /* Request transmit checksum offload. Checksum start position and offset - * are communicated via csum_start and csum_offset fields of union + * are communicated via csum_start and csum_offset fields of struct * xsk_tx_metadata. */ #define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index c0e13cdf9660..b997c68bd945 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -388,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; #define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j #define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) +#if defined(__clang__) && (__clang_major__ >= 19) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#elif defined(__GNUC__) && (__GNUC__ >= 14) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#else #define ___type(...) typeof(___arrow(__VA_ARGS__)) +#endif #define ___read(read_fn, dst, src_type, src, accessor) \ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3c131039c523..b770702dab37 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -995,7 +995,7 @@ static struct btf *btf_new_empty(struct btf *base_btf) if (base_btf) { btf->base_btf = base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len; + btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off; btf->swapped_endian = base_btf->swapped_endian; } @@ -1185,6 +1185,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, elf = elf_begin(fd, ELF_C_READ, NULL); if (!elf) { + err = -LIBBPF_ERRNO__FORMAT; pr_warn("failed to open %s as ELF file\n", path); goto done; } @@ -4175,6 +4176,19 @@ static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id return true; } +static bool btf_dedup_identical_ptrs(struct btf_dedup *d, __u32 id1, __u32 id2) +{ + struct btf_type *t1, *t2; + + t1 = btf_type_by_id(d->btf, id1); + t2 = btf_type_by_id(d->btf, id2); + + if (!btf_is_ptr(t1) || !btf_is_ptr(t2)) + return false; + + return t1->type == t2->type; +} + /* * Check equivalence of BTF type graph formed by candidate struct/union (we'll * call it "candidate graph" in this description for brevity) to a type graph @@ -4307,6 +4321,9 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, */ if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) return 1; + /* A similar case is again observed for PTRs. */ + if (btf_dedup_identical_ptrs(d, hypot_type_id, cand_id)) + return 1; return 0; } diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 0a7327541c17..12306b5de3ef 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -225,6 +225,9 @@ static void btf_dump_free_names(struct hashmap *map) size_t bkt; struct hashmap_entry *cur; + if (!map) + return; + hashmap__for_each_entry(map, cur, bkt) free((void *)cur->pkey); @@ -867,8 +870,8 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d, } pads[] = { {"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8} }; - int new_off, pad_bits, bits, i; - const char *pad_type; + int new_off = 0, pad_bits = 0, bits, i; + const char *pad_type = NULL; if (cur_off >= next_off) return; /* no gap */ diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c index 4f7399d85eab..8ef8003480da 100644 --- a/tools/lib/bpf/btf_relocate.c +++ b/tools/lib/bpf/btf_relocate.c @@ -212,7 +212,7 @@ static int btf_relocate_map_distilled_base(struct btf_relocate *r) * need to match both name and size, otherwise embedding the base * struct/union in the split type is invalid. */ - for (id = r->nr_dist_base_types; id < r->nr_split_types; id++) { + for (id = r->nr_dist_base_types; id < r->nr_dist_base_types + r->nr_split_types; id++) { err = btf_mark_embedded_composite_type_ids(r, id); if (err) goto done; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5ff643e60d09..e33cf3caf8b6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -60,6 +60,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define MAX_EVENT_NAME_LEN 64 + #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" #define BPF_INSN_SZ (sizeof(struct bpf_insn)) @@ -283,7 +285,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) old_errno = errno; va_start(args, format); - __libbpf_pr(level, format, args); + print_fn(level, format, args); va_end(args); errno = old_errno; @@ -594,7 +596,7 @@ struct extern_desc { int sym_idx; int btf_id; int sec_btf_id; - const char *name; + char *name; char *essent_name; bool is_set; bool is_weak; @@ -724,7 +726,7 @@ struct bpf_object { struct usdt_manager *usdt_man; - struct bpf_map *arena_map; + int arena_map_idx; void *arena_data; size_t arena_data_sz; @@ -887,7 +889,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_off + prog_sz > sec_sz) { + if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) { pr_warn("sec '%s': program at offset %zu crosses section boundary\n", sec_name, sec_off); return -LIBBPF_ERRNO__FORMAT; @@ -1492,6 +1494,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.btf_maps_shndx = -1; obj->kconfig_map_idx = -1; + obj->arena_map_idx = -1; obj->kern_version = get_kernel_version(); obj->loaded = false; @@ -2074,7 +2077,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, } len = strlen(value); - if (value[len - 1] != '"') { + if (len < 2 || value[len - 1] != '"') { pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", ext->name, value); return -EINVAL; @@ -2933,7 +2936,7 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, const long page_sz = sysconf(_SC_PAGE_SIZE); size_t mmap_sz; - mmap_sz = bpf_map_mmap_sz(obj->arena_map); + mmap_sz = bpf_map_mmap_sz(map); if (roundup(data_sz, page_sz) > mmap_sz) { pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", sec_name, mmap_sz, data_sz); @@ -3007,12 +3010,12 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, if (map->def.type != BPF_MAP_TYPE_ARENA) continue; - if (obj->arena_map) { + if (obj->arena_map_idx >= 0) { pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", - map->name, obj->arena_map->name); + map->name, obj->maps[obj->arena_map_idx].name); return -EINVAL; } - obj->arena_map = map; + obj->arena_map_idx = i; if (obj->efile.arena_data) { err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, @@ -3022,7 +3025,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return err; } } - if (obj->efile.arena_data && !obj->arena_map) { + if (obj->efile.arena_data && obj->arena_map_idx < 0) { pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", ARENA_SEC); return -ENOENT; @@ -4221,7 +4224,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return ext->btf_id; } t = btf__type_by_id(obj->btf, ext->btf_id); - ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); + if (!ext->name) + return -ENOMEM; ext->sym_idx = i; ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; @@ -4541,10 +4546,20 @@ static int bpf_program__record_reloc(struct bpf_program *prog, /* arena data relocation */ if (shdr_idx == obj->efile.arena_data_shndx) { + if (obj->arena_map_idx < 0) { + pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n", + prog->name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } reloc_desc->type = RELO_DATA; reloc_desc->insn_idx = insn_idx; - reloc_desc->map_idx = obj->arena_map - obj->maps; + reloc_desc->map_idx = obj->arena_map_idx; reloc_desc->sym_off = sym->st_value; + + map = &obj->maps[obj->arena_map_idx]; + pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n", + prog->name, obj->arena_map_idx, map->name, map->sec_idx, + map->sec_offset, insn_idx); return 0; } @@ -9060,8 +9075,10 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->btf_custom_path); zfree(&obj->kconfig); - for (i = 0; i < obj->nr_extern; i++) + for (i = 0; i < obj->nr_extern; i++) { + zfree(&obj->externs[i].name); zfree(&obj->externs[i].essent_name); + } zfree(&obj->externs); obj->nr_extern = 0; @@ -11039,16 +11056,16 @@ static const char *tracefs_available_filter_functions_addrs(void) : TRACEFS"/available_filter_functions_addrs"; } -static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *kfunc_name, size_t offset) +static void gen_probe_legacy_event_name(char *buf, size_t buf_sz, + const char *name, size_t offset) { static int index = 0; int i; - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, - __sync_fetch_and_add(&index, 1)); + snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(), + __sync_fetch_and_add(&index, 1), name, offset); - /* sanitize binary_path in the probe name */ + /* sanitize name in the probe name */ for (i = 0; buf[i]; i++) { if (!isalnum(buf[i])) buf[i] = '_'; @@ -11174,9 +11191,9 @@ int probe_kern_syscall_wrapper(int token_fd) return pfd >= 0 ? 1 : 0; } else { /* legacy mode */ - char probe_name[128]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) return 0; @@ -11233,10 +11250,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, func_name, offset, -1 /* pid */, 0 /* ref_ctr_off */); } else { - char probe_name[256]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), - func_name, offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -11744,20 +11761,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru return ret; } -static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *binary_path, uint64_t offset) -{ - int i; - - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); - - /* sanitize binary_path in the probe name */ - for (i = 0; buf[i]; i++) { - if (!isalnum(buf[i])) - buf[i] = '_'; - } -} - static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { @@ -12173,13 +12176,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[PATH_MAX + 64]; + char probe_name[MAX_EVENT_NAME_LEN]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); - gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), - binary_path, func_offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + strrchr(binary_path, '/') ? : binary_path, + func_offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -13256,7 +13260,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = sample_period; attr.wakeup_events = sample_period; p.attr = &attr; @@ -13983,6 +13986,12 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) } link = map_skel->link; + if (!link) { + pr_warn("map '%s': BPF map skeleton link is uninitialized\n", + bpf_map__name(map)); + continue; + } + if (*link) continue; diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 6985ab0f1ca9..d4ab9315afe7 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -567,17 +567,15 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, } obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL); if (!obj->elf) { - err = -errno; pr_warn_elf("failed to parse ELF file '%s'", filename); - return err; + return -EINVAL; } /* Sanity check ELF file high-level properties */ ehdr = elf64_getehdr(obj->elf); if (!ehdr) { - err = -errno; pr_warn_elf("failed to get ELF header for %s", filename); - return err; + return -EINVAL; } if (ehdr->e_ident[EI_DATA] != host_endianness) { err = -EOPNOTSUPP; @@ -593,9 +591,8 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, } if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) { - err = -errno; pr_warn_elf("failed to get SHSTRTAB section index for %s", filename); - return err; + return -EINVAL; } scn = NULL; @@ -605,26 +602,23 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, shdr = elf64_getshdr(scn); if (!shdr) { - err = -errno; pr_warn_elf("failed to get section #%zu header for %s", sec_idx, filename); - return err; + return -EINVAL; } sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name); if (!sec_name) { - err = -errno; pr_warn_elf("failed to get section #%zu name for %s", sec_idx, filename); - return err; + return -EINVAL; } data = elf_getdata(scn, 0); if (!data) { - err = -errno; pr_warn_elf("failed to get section #%zu (%s) data from %s", sec_idx, sec_name, filename); - return err; + return -EINVAL; } sec = add_src_sec(obj, sec_name); @@ -1226,7 +1220,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj } else { if (!secs_match(dst_sec, src_sec)) { pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* "license" and "version" sections are deduped */ @@ -2013,7 +2007,7 @@ add_sym: obj->sym_map[src_sym_idx] = dst_sym_idx; - if (sym_type == STT_SECTION && dst_sym) { + if (sym_type == STT_SECTION && dst_sec) { dst_sec->sec_sym_idx = dst_sym_idx; dst_sym->st_value = 0; } @@ -2073,7 +2067,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob } } else if (!secs_match(dst_sec, src_sec)) { pr_warn("sections %s are not compatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* shdr->sh_link points to SYMTAB */ @@ -2635,14 +2629,14 @@ int bpf_linker__finalize(struct bpf_linker *linker) /* Finalize ELF layout */ if (elf_update(linker->elf, ELF_C_NULL) < 0) { - err = -errno; + err = -EINVAL; pr_warn_elf("failed to finalize ELF layout"); return libbpf_err(err); } /* Write out final ELF contents */ if (elf_update(linker->elf, ELF_C_WRITE) < 0) { - err = -errno; + err = -EINVAL; pr_warn_elf("failed to write ELF contents"); return libbpf_err(err); } diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 975e265eab3b..06663f9ea581 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype, minlen = nla_attr_minlen[pt->type]; if (libbpf_nla_len(nla) < minlen) - return -1; + return -EINVAL; if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) - return -1; + return -EINVAL; if (pt->type == LIBBPF_NLA_STRING) { char *data = libbpf_nla_data(nla); if (data[libbpf_nla_len(nla) - 1] != '\0') - return -1; + return -EINVAL; } return 0; @@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, if (policy) { err = validate_nla(nla, maxtype, policy); if (err < 0) - goto errout; + return err; } - if (tb[type]) + if (tb[type]) { pr_warn("Attribute of type %#x found multiple times in message, " "previous attribute is being ignored.\n", type); + } tb[type] = nla; } - err = 0; -errout: - return err; + return 0; } /** diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 93794f01bb67..6ff28e7bf5e3 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -659,7 +659,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation */ usdt_abs_ip = note.loc_addr; - if (base_addr) + if (base_addr && note.base_addr) usdt_abs_ip += base_addr - note.base_addr; /* When attaching uprobes (which is what USDTs basically are) diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index 8561b0f01a24..9ef569492560 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -9,6 +9,7 @@ #include <sys/stat.h> #include <unistd.h> #include <dirent.h> +#include <assert.h> #include "subcmd-util.h" #include "help.h" #include "exec-cmd.h" @@ -82,10 +83,11 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) ci++; cj++; } else { - zfree(&cmds->names[cj]); - cmds->names[cj++] = cmds->names[ci++]; + cmds->names[cj++] = cmds->names[ci]; + cmds->names[ci++] = NULL; } } else if (cmp == 0) { + zfree(&cmds->names[ci]); ci++; ei++; } else if (cmp > 0) { @@ -94,12 +96,12 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) } if (ci != cj) { while (ci < cmds->cnt) { - zfree(&cmds->names[cj]); - cmds->names[cj++] = cmds->names[ci++]; + cmds->names[cj++] = cmds->names[ci]; + cmds->names[ci++] = NULL; } } for (ci = cj; ci < cmds->cnt; ci++) - zfree(&cmds->names[ci]); + assert(cmds->names[ci] == NULL); cmds->cnt = cj; } diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index eb896d30545b..555d617c1f50 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -807,7 +807,7 @@ static int option__cmp(const void *va, const void *vb) static struct option *options__order(const struct option *opts) { int nr_opts = 0, nr_group = 0, nr_parent = 0, len; - const struct option *o, *p = opts; + const struct option *o = NULL, *p = opts; struct option *opt, *ordered = NULL, *group; /* flatten the options that have parents */ diff --git a/tools/net/ynl/ethtool.py b/tools/net/ynl/ethtool.py index 63c471f075ab..7e8342f91481 100755 --- a/tools/net/ynl/ethtool.py +++ b/tools/net/ynl/ethtool.py @@ -337,16 +337,24 @@ def main(): print('Capabilities:') [print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])] - print(f'PTP Hardware Clock: {tsinfo["phc-index"]}') + print(f'PTP Hardware Clock: {tsinfo.get("phc-index", "none")}') - print('Hardware Transmit Timestamp Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + if 'tx-types' in tsinfo: + print('Hardware Transmit Timestamp Modes:') + [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + else: + print('Hardware Transmit Timestamp Modes: none') + + if 'rx-filters' in tsinfo: + print('Hardware Receive Filter Modes:') + [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + else: + print('Hardware Receive Filter Modes: none') - print('Hardware Receive Filter Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + if 'stats' in tsinfo and tsinfo['stats']: + print('Statistics:') + [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] - print('Statistics:') - [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] return print(f'Settings for {args.device}:') diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index e16cef160bc2..c4da34048ef8 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -95,7 +95,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off, ynl_attr_for_each_payload(start, data_len, attr) { astart_off = (char *)attr - (char *)start; - aend_off = astart_off + ynl_attr_data_len(attr); + aend_off = (char *)ynl_attr_data_end(attr) - (char *)start; if (aend_off <= off) continue; @@ -364,7 +364,7 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) "Invalid attribute (binary %s)", policy->name); return -1; case YNL_PT_NUL_STR: - if ((!policy->len || len <= policy->len) && !data[len - 1]) + if (len && (!policy->len || len <= policy->len) && !data[len - 1]) break; yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, "Invalid attribute (string %s)", policy->name); diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 717530bc9c52..c78f1c1bca75 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2361,6 +2361,17 @@ def print_kernel_family_struct_src(family, cw): if not kernel_can_gen_family_struct(family): return + if 'sock-priv' in family.kernel_family: + # Generate "trampolines" to make CFI happy + cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_init", + [f"{family.c_name}_nl_sock_priv_init(priv);"], + ["void *priv"]) + cw.nl() + cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_destroy", + [f"{family.c_name}_nl_sock_priv_destroy(priv);"], + ["void *priv"]) + cw.nl() + cw.block_start(f"struct genl_family {family.ident_name}_nl_family __ro_after_init =") cw.p('.name\t\t= ' + family.fam_key + ',') cw.p('.version\t= ' + family.ver_key + ',') @@ -2378,9 +2389,8 @@ def print_kernel_family_struct_src(family, cw): cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),') if 'sock-priv' in family.kernel_family: cw.p(f'.sock_priv_size\t= sizeof({family.kernel_family["sock-priv"]}),') - # Force cast here, actual helpers take pointer to the real type. - cw.p(f'.sock_priv_init\t= (void *){family.c_name}_nl_sock_priv_init,') - cw.p(f'.sock_priv_destroy = (void *){family.c_name}_nl_sock_priv_destroy,') + cw.p(f'.sock_priv_init\t= __{family.c_name}_nl_sock_priv_init,') + cw.p(f'.sock_priv_destroy = __{family.c_name}_nl_sock_priv_destroy,') cw.block_end(';') @@ -2407,6 +2417,9 @@ def render_uapi(family, cw): defines = [] for const in family['definitions']: + if const.get('header'): + continue + if const['type'] != 'const': cw.writes_defines(defines) defines = [] diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f0d8796b984a..d4d82bb9b551 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -216,10 +216,15 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking14panic_explicit") || str_ends_with(func->name, "_4core9panicking14panic_nounwind") || str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || + str_ends_with(func->name, "_4core9panicking18panic_nounwind_fmt") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || + str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || + str_ends_with(func->name, "_7___rustc17rust_begin_unwind") || + strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || - (strstr(func->name, "_4core5slice5index24slice_") && + (strstr(func->name, "_4core5slice5index") && + strstr(func->name, "slice_") && str_ends_with(func->name, "_fail")); } @@ -638,47 +643,8 @@ static int add_dead_ends(struct objtool_file *file) uint64_t offset; /* - * Check for manually annotated dead ends. - */ - rsec = find_section_by_name(file->elf, ".rela.discard.unreachable"); - if (!rsec) - goto reachable; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type == STT_SECTION) { - offset = reloc_addend(reloc); - } else if (reloc->sym->local_label) { - offset = reloc->sym->offset; - } else { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, offset); - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) { - insn = find_last_insn(file, reloc->sym->sec); - if (!insn) { - WARN("can't find unreachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - } else { - WARN("can't find unreachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - - insn->dead_end = true; - } - -reachable: - /* - * These manually annotated reachable checks are needed for GCC 4.4, - * where the Linux unreachable() macro isn't supported. In that case - * GCC doesn't know the "ud2" is fatal, so it generates code as if it's - * not a dead end. + * UD2 defaults to being a dead-end, allow them to be annotated for + * non-fatal, eg WARN. */ rsec = find_section_by_name(file->elf, ".rela.discard.reachable"); if (!rsec) @@ -1280,12 +1246,15 @@ static const char *uaccess_safe_builtin[] = { "__ubsan_handle_load_invalid_value", /* STACKLEAK */ "stackleak_track_stack", + /* TRACE_BRANCH_PROFILING */ + "ftrace_likely_update", + /* STACKPROTECTOR */ + "__stack_chk_fail", /* misc */ "csum_partial_copy_generic", "copy_mc_fragile", "copy_mc_fragile_handle_tail", "copy_mc_enhanced_fast_string", - "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */ "rep_stos_alternative", "rep_movs_alternative", "__copy_user_nocache", @@ -1604,6 +1573,8 @@ static int add_jump_destinations(struct objtool_file *file) unsigned long dest_off; for_each_insn(file, insn) { + struct symbol *func = insn_func(insn); + if (insn->jump_dest) { /* * handle_group_alt() may have previously set @@ -1627,7 +1598,7 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->return_thunk) { add_return_call(file, insn, true); continue; - } else if (insn_func(insn)) { + } else if (func) { /* * External sibling call or internal sibling call with * STT_FUNC reloc. @@ -1660,6 +1631,15 @@ static int add_jump_destinations(struct objtool_file *file) continue; } + /* + * GCOV/KCOV dead code can jump to the end of the + * function/section. + */ + if (file->ignore_unreachables && func && + dest_sec == insn->sec && + dest_off == func->offset + func->len) + continue; + WARN_INSN(insn, "can't find jump dest instruction at %s+0x%lx", dest_sec->name, dest_off); return -1; @@ -1684,8 +1664,7 @@ static int add_jump_destinations(struct objtool_file *file) /* * Cross-function jump. */ - if (insn_func(insn) && insn_func(jump_dest) && - insn_func(insn) != insn_func(jump_dest)) { + if (func && insn_func(jump_dest) && func != insn_func(jump_dest)) { /* * For GCC 8+, create parent/child links for any cold @@ -1702,10 +1681,10 @@ static int add_jump_destinations(struct objtool_file *file) * case where the parent function's only reference to a * subfunction is through a jump table. */ - if (!strstr(insn_func(insn)->name, ".cold") && + if (!strstr(func->name, ".cold") && strstr(insn_func(jump_dest)->name, ".cold")) { - insn_func(insn)->cfunc = insn_func(jump_dest); - insn_func(jump_dest)->pfunc = insn_func(insn); + func->cfunc = insn_func(jump_dest); + insn_func(jump_dest)->pfunc = func; } } @@ -2099,6 +2078,14 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, reloc_addend(reloc) == pfunc->offset) break; + /* + * Clang sometimes leaves dangling unused jump table entries + * which point to the end of the function. Ignore them. + */ + if (reloc->sym->sec == pfunc->sec && + reloc_addend(reloc) == pfunc->offset + pfunc->len) + goto next; + dest_insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); if (!dest_insn) break; @@ -2116,6 +2103,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, alt->insn = dest_insn; alt->next = insn->alts; insn->alts = alt; +next: prev_offset = reloc_offset(reloc); } @@ -2627,13 +2615,14 @@ static void mark_rodata(struct objtool_file *file) * * - .rodata: can contain GCC switch tables * - .rodata.<func>: same, if -fdata-sections is being used - * - .rodata..c_jump_table: contains C annotated jump tables + * - .data.rel.ro.c_jump_table: contains C annotated jump tables * * .rodata.str1.* sections are ignored; they don't contain jump tables. */ for_each_sec(file, sec) { - if (!strncmp(sec->name, ".rodata", 7) && - !strstr(sec->name, ".str1.")) { + if ((!strncmp(sec->name, ".rodata", 7) && + !strstr(sec->name, ".str1.")) || + !strncmp(sec->name, ".data.rel.ro", 12)) { sec->rodata = true; found = true; } @@ -3366,7 +3355,7 @@ static int handle_insn_ops(struct instruction *insn, if (update_cfi_state(insn, next_insn, &state->cfi, op)) return 1; - if (!insn->alt_group) + if (!opts.uaccess || !insn->alt_group) continue; if (op->dest.type == OP_DEST_PUSHF) { @@ -3661,6 +3650,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, !strncmp(func->name, "__pfx_", 6)) return 0; + if (file->ignore_unreachables) + return 0; + WARN("%s() falls through to next function %s()", func->name, insn_func(insn)->name); return 1; @@ -3830,6 +3822,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; case INSN_STAC: + if (!opts.uaccess) + break; + if (state.uaccess) { WARN_INSN(insn, "recursive UACCESS enable"); return 1; @@ -3839,6 +3834,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_CLAC: + if (!opts.uaccess) + break; + if (!state.uaccess && func) { WARN_INSN(insn, "redundant UACCESS disable"); return 1; @@ -3880,6 +3878,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (!next_insn) { if (state.cfi.cfa.base == CFI_UNDEFINED) return 0; + if (file->ignore_unreachables) + return 0; + WARN("%s: unexpected end of section", sec->name); return 1; } @@ -4018,6 +4019,11 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) WARN_INSN(insn, "RET before UNTRAIN"); return 1; + case INSN_CONTEXT_SWITCH: + if (insn_func(insn)) + break; + return 0; + case INSN_NOP: if (insn->retpoline_safe) return 0; @@ -4027,6 +4033,9 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) break; } + if (insn->dead_end) + return 0; + if (!next) { WARN_INSN(insn, "teh end!"); return -1; @@ -4181,7 +4190,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio * It may also insert a UD2 after calling a __noreturn function. */ prev_insn = prev_insn_same_sec(file, insn); - if (prev_insn->dead_end && + if (prev_insn && prev_insn->dead_end && (insn->type == INSN_BUG || (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && insn->jump_dest->type == INSN_BUG))) @@ -4303,7 +4312,8 @@ static int validate_symbol(struct objtool_file *file, struct section *sec, if (!insn || insn->ignore || insn->visited) return 0; - state->uaccess = sym->uaccess_safe; + if (opts.uaccess) + state->uaccess = sym->uaccess_safe; ret = validate_branch(file, insn_func(insn), insn, *state); if (ret) @@ -4603,35 +4613,6 @@ static int validate_sls(struct objtool_file *file) return warnings; } -static bool ignore_noreturn_call(struct instruction *insn) -{ - struct symbol *call_dest = insn_call_dest(insn); - - /* - * FIXME: hack, we need a real noreturn solution - * - * Problem is, exc_double_fault() may or may not return, depending on - * whether CONFIG_X86_ESPFIX64 is set. But objtool has no visibility - * to the kernel config. - * - * Other potential ways to fix it: - * - * - have compiler communicate __noreturn functions somehow - * - remove CONFIG_X86_ESPFIX64 - * - read the .config file - * - add a cmdline option - * - create a generic objtool annotation format (vs a bunch of custom - * formats) and annotate it - */ - if (!strcmp(call_dest->name, "exc_double_fault")) { - /* prevent further unreachable warnings for the caller */ - insn->sym->warned = 1; - return true; - } - - return false; -} - static int validate_reachable_instructions(struct objtool_file *file) { struct instruction *insn, *prev_insn; @@ -4648,7 +4629,7 @@ static int validate_reachable_instructions(struct objtool_file *file) prev_insn = prev_insn_same_sec(file, insn); if (prev_insn && prev_insn->dead_end) { call_dest = insn_call_dest(prev_insn); - if (call_dest && !ignore_noreturn_call(prev_insn)) { + if (call_dest) { WARN_INSN(insn, "%s() is missing a __noreturn annotation", call_dest->name); warnings++; @@ -4671,6 +4652,8 @@ static int disas_funcs(const char *funcs) char *cmd; cross_compile = getenv("CROSS_COMPILE"); + if (!cross_compile) + cross_compile = ""; objdump_str = "%sobjdump -wdr %s | gawk -M -v _funcs='%s' '" "BEGIN { split(_funcs, funcs); }" @@ -4777,8 +4760,10 @@ int check(struct objtool_file *file) init_cfi_state(&force_undefined_cfi); force_undefined_cfi.force_undefined = true; - if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) + if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) { + ret = -1; goto out; + } cfi_hash_add(&init_cfi); cfi_hash_add(&func_cfi); @@ -4795,7 +4780,7 @@ int check(struct objtool_file *file) if (opts.retpoline) { ret = validate_retpoline(file); if (ret < 0) - return ret; + goto out; warnings += ret; } @@ -4831,7 +4816,7 @@ int check(struct objtool_file *file) */ ret = validate_unrets(file); if (ret < 0) - return ret; + goto out; warnings += ret; } @@ -4894,7 +4879,7 @@ int check(struct objtool_file *file) if (opts.prefix) { ret = add_prefix_symbols(file); if (ret < 0) - return ret; + goto out; warnings += ret; } diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h index 86d4af9c5aa9..89ee12b1a138 100644 --- a/tools/objtool/include/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -10,7 +10,7 @@ #include <objtool/check.h> #include <objtool/elf.h> -#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" +#define C_JUMP_TABLE_SECTION ".data.rel.ro.c_jump_table" struct special_alt { struct list_head list; diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index f5b81d439387..1ef45d803024 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -48,8 +48,6 @@ libbpf/ libperf/ libsubcmd/ libsymbol/ -libtraceevent/ -libtraceevent_plugins/ fixdep Documentation/doc.dep python_ext_build/ diff --git a/tools/perf/Documentation/intel-hybrid.txt b/tools/perf/Documentation/intel-hybrid.txt index e7a776ad25d7..0379903673a4 100644 --- a/tools/perf/Documentation/intel-hybrid.txt +++ b/tools/perf/Documentation/intel-hybrid.txt @@ -8,15 +8,15 @@ Part of events are available on core cpu, part of events are available on atom cpu and even part of events are available on both. Kernel exports two new cpu pmus via sysfs: -/sys/devices/cpu_core -/sys/devices/cpu_atom +/sys/bus/event_source/devices/cpu_core +/sys/bus/event_source/devices/cpu_atom The 'cpus' files are created under the directories. For example, -cat /sys/devices/cpu_core/cpus +cat /sys/bus/event_source/devices/cpu_core/cpus 0-15 -cat /sys/devices/cpu_atom/cpus +cat /sys/bus/event_source/devices/cpu_atom/cpus 16-23 It indicates cpu0-cpu15 are core cpus and cpu16-cpu23 are atom cpus. @@ -60,8 +60,8 @@ can't carry pmu information. So now this type is extended to be PMU aware type. The PMU type ID is stored at attr.config[63:32]. PMU type ID is retrieved from sysfs. -/sys/devices/cpu_atom/type -/sys/devices/cpu_core/type +/sys/bus/event_source/devices/cpu_atom/type +/sys/bus/event_source/devices/cpu_core/type The new attr.config layout for PERF_TYPE_HARDWARE: diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index dea005410ec0..ee5d333e2ca9 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -188,7 +188,7 @@ in the CPU vendor specific documentation. The available PMUs and their raw parameters can be listed with - ls /sys/devices/*/format + ls /sys/bus/event_source/devices/*/format For example the raw event "LSD.UOPS" core pmu event above could be specified as diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index dc42de1785ce..908165fcec7d 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,5 +1,6 @@ arch/arm64/tools/gen-sysreg.awk arch/arm64/tools/sysreg +arch/*/include/uapi/asm/bpf_perf_event.h tools/perf tools/arch tools/scripts diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 2ce71d2e5fae..a2034fa18325 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -513,13 +513,14 @@ ifeq ($(feature-setns), 1) $(call detected,CONFIG_SETNS) endif +ifeq ($(feature-reallocarray), 0) + CFLAGS += -DCOMPAT_NEED_REALLOCARRAY +endif + ifdef CORESIGHT $(call feature_check,libopencsd) ifeq ($(feature-libopencsd), 1) CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) - ifeq ($(feature-reallocarray), 0) - CFLAGS += -DCOMPAT_NEED_REALLOCARRAY - endif LDFLAGS += $(LIBOPENCSD_LDFLAGS) EXTLIBS += $(OPENCSDLIBS) $(call detected,CONFIG_LIBOPENCSD) @@ -568,6 +569,8 @@ ifndef NO_LIBELF ifeq ($(feature-libdebuginfod), 1) CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT EXTLIBS += -ldebuginfod + else + $(warning No elfutils/debuginfod.h found, no debuginfo server support, please install libdebuginfod-dev/elfutils-debuginfod-client-devel or equivalent) endif endif @@ -1135,9 +1138,6 @@ ifndef NO_AUXTRACE ifndef NO_AUXTRACE $(call detected,CONFIG_AUXTRACE) CFLAGS += -DHAVE_AUXTRACE_SUPPORT - ifeq ($(feature-reallocarray), 0) - CFLAGS += -DCOMPAT_NEED_REALLOCARRAY - endif endif endif diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9dd2e8d3f3c9..b61c355fbdee 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -164,7 +164,7 @@ ifneq ($(OUTPUT),) VPATH += $(OUTPUT) export VPATH # create symlink to the original source -SOURCE := $(shell ln -sf $(srctree)/tools/perf $(OUTPUT)/source) +SOURCE := $(shell ln -sfn $(srctree)/tools/perf $(OUTPUT)/source) endif ifeq ($(V),1) @@ -1143,7 +1143,8 @@ install-tests: all install-gtk $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \ $(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_probe'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ - $(INSTALL) tests/shell/base_probe/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ + $(INSTALL) tests/shell/base_report/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ + $(INSTALL) tests/shell/base_report/*.txt '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/base_report'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' ; \ $(INSTALL) tests/shell/coresight/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight' $(Q)$(MAKE) -C tests/shell/coresight install-tests diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index df7b5dfcc26a..7ea882ef293a 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -32,7 +32,7 @@ #define MAX_PATH 1024 #endif -#define UNCORE_IIO_PMU_PATH "devices/uncore_iio_%d" +#define UNCORE_IIO_PMU_PATH "bus/event_source/devices/uncore_iio_%d" #define SYSFS_UNCORE_PMU_PATH "%s/"UNCORE_IIO_PMU_PATH #define PLATFORM_MAPPING_PATH UNCORE_IIO_PMU_PATH"/die%d" diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index ef5c4257844d..20fe4f72b4af 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -420,7 +420,12 @@ static int cmpworker(const void *p1, const void *p2) struct worker *w1 = (struct worker *) p1; struct worker *w2 = (struct worker *) p2; - return w1->tid > w2->tid; + + if (w1->tid > w2->tid) + return 1; + if (w1->tid < w2->tid) + return -1; + return 0; } int bench_epoll_wait(int argc, const char **argv) diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c index ea4dfc07cbd6..e7dc216f717f 100644 --- a/tools/perf/bench/syscall.c +++ b/tools/perf/bench/syscall.c @@ -22,8 +22,7 @@ #define __NR_fork -1 #endif -#define LOOPS_DEFAULT 10000000 -static int loops = LOOPS_DEFAULT; +static int loops; static const struct option options[] = { OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), @@ -80,6 +79,18 @@ static int bench_syscall_common(int argc, const char **argv, int syscall) const char *name = NULL; int i; + switch (syscall) { + case __NR_fork: + case __NR_execve: + /* Limit default loop to 10000 times to save time */ + loops = 10000; + break; + default: + loops = 10000000; + break; + } + + /* Options -l and --loops override default above */ argc = parse_options(argc, argv, options, bench_syscall_usage, 0); gettimeofday(&start, NULL); @@ -94,16 +105,9 @@ static int bench_syscall_common(int argc, const char **argv, int syscall) break; case __NR_fork: test_fork(); - /* Only loop 10000 times to save time */ - if (i == 10000) - loops = 10000; break; case __NR_execve: test_execve(); - /* Only loop 10000 times to save time */ - if (i == 10000) - loops = 10000; - break; default: break; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index d6989195a061..11e49cafa3af 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2367,10 +2367,10 @@ int cmd_inject(int argc, const char **argv) }; int ret; const char *known_build_ids = NULL; - bool build_ids; - bool build_id_all; - bool mmap2_build_ids; - bool mmap2_build_id_all; + bool build_ids = false; + bool build_id_all = false; + bool mmap2_build_ids = false; + bool mmap2_build_id_all = false; struct option options[] = { OPT_BOOLEAN('b', "build-ids", &build_ids, diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 062e2b56a2ab..33a456980664 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1591,8 +1591,8 @@ static const struct { { LCB_F_PERCPU | LCB_F_WRITE, "pcpu-sem:W", "percpu-rwsem" }, { LCB_F_MUTEX, "mutex", "mutex" }, { LCB_F_MUTEX | LCB_F_SPIN, "mutex", "mutex" }, - /* alias for get_type_flag() */ - { LCB_F_MUTEX | LCB_F_SPIN, "mutex-spin", "mutex" }, + /* alias for optimistic spinning only */ + { LCB_F_MUTEX | LCB_F_SPIN, "mutex:spin", "mutex-spin" }, }; static const char *get_type_str(unsigned int flags) @@ -1617,19 +1617,6 @@ static const char *get_type_name(unsigned int flags) return "unknown"; } -static unsigned int get_type_flag(const char *str) -{ - for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { - if (!strcmp(lock_type_table[i].name, str)) - return lock_type_table[i].flags; - } - for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { - if (!strcmp(lock_type_table[i].str, str)) - return lock_type_table[i].flags; - } - return UINT_MAX; -} - static void lock_filter_finish(void) { zfree(&filters.types); @@ -2350,29 +2337,58 @@ static int parse_lock_type(const struct option *opt __maybe_unused, const char * int unset __maybe_unused) { char *s, *tmp, *tok; - int ret = 0; s = strdup(str); if (s == NULL) return -1; for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { - unsigned int flags = get_type_flag(tok); + bool found = false; - if (flags == -1U) { - pr_err("Unknown lock flags: %s\n", tok); - ret = -1; - break; + /* `tok` is `str` in `lock_type_table` if it contains ':'. */ + if (strchr(tok, ':')) { + for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { + if (!strcmp(lock_type_table[i].str, tok) && + add_lock_type(lock_type_table[i].flags)) { + found = true; + break; + } + } + + if (!found) { + pr_err("Unknown lock flags name: %s\n", tok); + free(s); + return -1; + } + + continue; } - if (!add_lock_type(flags)) { - ret = -1; - break; + /* + * Otherwise `tok` is `name` in `lock_type_table`. + * Single lock name could contain multiple flags. + */ + for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) { + if (!strcmp(lock_type_table[i].name, tok)) { + if (add_lock_type(lock_type_table[i].flags)) { + found = true; + } else { + free(s); + return -1; + } + } } + + if (!found) { + pr_err("Unknown lock name: %s\n", tok); + free(s); + return -1; + } + } free(s); - return ret; + return 0; } static bool add_lock_addr(unsigned long addr) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index adbaf80b398c..ab9035573a15 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -3471,7 +3471,7 @@ static struct option __record_options[] = { "sample selected machine registers on interrupt," " use '-I?' to list register names", parse_intr_regs), OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", - "sample selected machine registers on interrupt," + "sample selected machine registers in user space," " use '--user-regs=?' to list register names", parse_user_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5dc17ffee27a..8700c3968066 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1418,7 +1418,7 @@ int cmd_report(int argc, const char **argv) OPT_STRING(0, "addr2line", &addr2line_path, "path", "addr2line binary to use for line numbers"), OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, - "Disable symbol demangling"), + "Symbol demangling. Enabled by default, use --no-demangle to disable."), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"), @@ -1551,12 +1551,12 @@ int cmd_report(int argc, const char **argv) input_name = "perf.data"; } +repeat: data.path = input_name; data.force = symbol_conf.force; symbol_conf.skip_empty = report.skip_empty; -repeat: perf_tool__init(&report.tool, ordered_events); report.tool.sample = process_sample_event; report.tool.mmap = perf_event__process_mmap; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5981cc51abc8..64bf3ac237f2 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -999,7 +999,7 @@ thread_atoms_search(struct rb_root_cached *root, struct thread *thread, else if (cmp < 0) node = node->rb_right; else { - BUG_ON(thread != atoms->thread); + BUG_ON(!RC_CHK_EQUAL(thread, atoms->thread)); return atoms; } } @@ -1116,6 +1116,21 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) atoms->nb_atoms++; } +static void free_work_atoms(struct work_atoms *atoms) +{ + struct work_atom *atom, *tmp; + + if (atoms == NULL) + return; + + list_for_each_entry_safe(atom, tmp, &atoms->work_list, list) { + list_del(&atom->list); + free(atom); + } + thread__zput(atoms->thread); + free(atoms); +} + static int latency_switch_event(struct perf_sched *sched, struct evsel *evsel, struct perf_sample *sample, @@ -1639,6 +1654,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, const char *color = PERF_COLOR_NORMAL; char stimestamp[32]; const char *str; + int ret = -1; BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); @@ -1669,17 +1685,20 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, sched_in = map__findnew_thread(sched, machine, -1, next_pid); sched_out = map__findnew_thread(sched, machine, -1, prev_pid); if (sched_in == NULL || sched_out == NULL) - return -1; + goto out; tr = thread__get_runtime(sched_in); - if (tr == NULL) { - thread__put(sched_in); - return -1; - } + if (tr == NULL) + goto out; + + thread__put(sched->curr_thread[this_cpu.cpu]); + thread__put(sched->curr_out_thread[this_cpu.cpu]); sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); sched->curr_out_thread[this_cpu.cpu] = thread__get(sched_out); + ret = 0; + str = thread__comm_str(sched_in); new_shortname = 0; if (!tr->shortname[0]) { @@ -1774,12 +1793,10 @@ sched_out: color_fprintf(stdout, color, "\n"); out: - if (sched->map.task_name) - thread__put(sched_out); - + thread__put(sched_out); thread__put(sched_in); - return 0; + return ret; } static int process_sched_switch_event(const struct perf_tool *tool, @@ -2023,6 +2040,16 @@ static u64 evsel__get_time(struct evsel *evsel, u32 cpu) return r->last_time[cpu]; } +static void timehist__evsel_priv_destructor(void *priv) +{ + struct evsel_runtime *r = priv; + + if (r) { + free(r->last_time); + free(r); + } +} + static int comm_width = 30; static char *timehist_get_commstr(struct thread *thread) @@ -3276,6 +3303,8 @@ static int perf_sched__timehist(struct perf_sched *sched) setup_pager(); + evsel__set_priv_destructor(timehist__evsel_priv_destructor); + /* prefer sched_waking if it is captured */ if (evlist__find_tracepoint_by_name(session->evlist, "sched:sched_waking")) handlers[1].handler = timehist_sched_wakeup_ignore; @@ -3376,13 +3405,13 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d this->total_runtime += data->total_runtime; this->nb_atoms += data->nb_atoms; this->total_lat += data->total_lat; - list_splice(&data->work_list, &this->work_list); + list_splice_init(&data->work_list, &this->work_list); if (this->max_lat < data->max_lat) { this->max_lat = data->max_lat; this->max_lat_start = data->max_lat_start; this->max_lat_end = data->max_lat_end; } - zfree(&data); + free_work_atoms(data); return; } } @@ -3461,7 +3490,6 @@ static int perf_sched__lat(struct perf_sched *sched) work_list = rb_entry(next, struct work_atoms, node); output_lat_thread(sched, work_list); next = rb_next(next); - thread__zput(work_list->thread); } printf(" -----------------------------------------------------------------------------------------------------------------\n"); @@ -3475,6 +3503,13 @@ static int perf_sched__lat(struct perf_sched *sched) rc = 0; + while ((next = rb_first_cached(&sched->sorted_atom_root))) { + struct work_atoms *data; + + data = rb_entry(next, struct work_atoms, node); + rb_erase_cached(next, &sched->sorted_atom_root); + free_work_atoms(data); + } out_free_cpus_switch_event: free_cpus_switch_event(sched); return rc; @@ -3546,10 +3581,10 @@ static int perf_sched__map(struct perf_sched *sched) sched->curr_out_thread = calloc(MAX_CPUS, sizeof(*(sched->curr_out_thread))); if (!sched->curr_out_thread) - return rc; + goto out_free_curr_thread; if (setup_cpus_switch_event(sched)) - goto out_free_curr_thread; + goto out_free_curr_out_thread; if (setup_map_cpus(sched)) goto out_free_cpus_switch_event; @@ -3580,7 +3615,14 @@ out_put_map_cpus: out_free_cpus_switch_event: free_cpus_switch_event(sched); +out_free_curr_out_thread: + for (int i = 0; i < MAX_CPUS; i++) + thread__put(sched->curr_out_thread[i]); + zfree(&sched->curr_out_thread); + out_free_curr_thread: + for (int i = 0; i < MAX_CPUS; i++) + thread__put(sched->curr_thread[i]); zfree(&sched->curr_thread); return rc; } @@ -3887,13 +3929,15 @@ int cmd_sched(int argc, const char **argv) if (!argc) usage_with_options(sched_usage, sched_options); + thread__set_priv_destructor(free); + /* * Aliased to 'perf script' for now: */ if (!strcmp(argv[0], "script")) { - return cmd_script(argc, argv); + ret = cmd_script(argc, argv); } else if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) { - return __cmd_record(argc, argv); + ret = __cmd_record(argc, argv); } else if (strlen(argv[0]) > 2 && strstarts("latency", argv[0])) { sched.tp_handler = &lat_ops; if (argc > 1) { @@ -3902,7 +3946,7 @@ int cmd_sched(int argc, const char **argv) usage_with_options(latency_usage, latency_options); } setup_sorting(&sched, latency_options, latency_usage); - return perf_sched__lat(&sched); + ret = perf_sched__lat(&sched); } else if (!strcmp(argv[0], "map")) { if (argc) { argc = parse_options(argc, argv, map_options, map_usage, 0); @@ -3913,13 +3957,14 @@ int cmd_sched(int argc, const char **argv) sched.map.task_names = strlist__new(sched.map.task_name, NULL); if (sched.map.task_names == NULL) { fprintf(stderr, "Failed to parse task names\n"); - return -1; + ret = -1; + goto out; } } } sched.tp_handler = &map_ops; setup_sorting(&sched, latency_options, latency_usage); - return perf_sched__map(&sched); + ret = perf_sched__map(&sched); } else if (strlen(argv[0]) > 2 && strstarts("replay", argv[0])) { sched.tp_handler = &replay_ops; if (argc) { @@ -3927,7 +3972,7 @@ int cmd_sched(int argc, const char **argv) if (argc) usage_with_options(replay_usage, replay_options); } - return perf_sched__replay(&sched); + ret = perf_sched__replay(&sched); } else if (!strcmp(argv[0], "timehist")) { if (argc) { argc = parse_options(argc, argv, timehist_options, @@ -3943,19 +3988,19 @@ int cmd_sched(int argc, const char **argv) parse_options_usage(NULL, timehist_options, "w", true); if (sched.show_next) parse_options_usage(NULL, timehist_options, "n", true); - return -EINVAL; + ret = -EINVAL; + goto out; } ret = symbol__validate_sym_arguments(); - if (ret) - return ret; - - return perf_sched__timehist(&sched); + if (!ret) + ret = perf_sched__timehist(&sched); } else { usage_with_options(sched_usage, sched_options); } +out: /* free usage string allocated by parse_options_subcommand */ free((void *)sched_usage[0]); - return 0; + return ret; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4933efdfee76..628c61397d2d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -96,7 +96,7 @@ #include <internal/threadmap.h> #define DEFAULT_SEPARATOR " " -#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" +#define FREEZE_ON_SMI_PATH "bus/event_source/devices/cpu/freeze_on_smi" static void print_counters(struct timespec *ts, int argc, const char **argv); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 724a79386321..ca3e8eca6610 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -809,7 +809,7 @@ static void perf_event__process_sample(const struct perf_tool *tool, * invalid --vmlinux ;-) */ if (!machine->kptr_restrict_warned && !top->vmlinux_warned && - __map__is_kernel(al.map) && map__has_symbols(al.map)) { + __map__is_kernel(al.map) && !map__has_symbols(al.map)) { if (symbol_conf.vmlinux_name) { char serr[256]; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ffa129527309..f77e4f4b6f03 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1327,7 +1327,7 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ }, [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, }, - { .name = "rseq", .errpid = true, + { .name = "rseq", .arg = { [0] = { .from_user = true /* rseq */, }, }, }, { .name = "rt_sigaction", .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, @@ -1351,7 +1351,7 @@ static const struct syscall_fmt syscall_fmts[] = { { .name = "sendto", .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, [4] = SCA_SOCKADDR_FROM_USER(addr), }, }, - { .name = "set_robust_list", .errpid = true, + { .name = "set_robust_list", .arg = { [0] = { .from_user = true /* head */, }, }, }, { .name = "set_tid_address", .errpid = true, }, { .name = "setitimer", @@ -2122,8 +2122,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) return PTR_ERR(sc->tp_format); } + /* + * The tracepoint format contains __syscall_nr field, so it's one more + * than the actual number of syscall arguments. + */ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? - RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) + RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1)) return -ENOMEM; sc->args = sc->tp_format->format.fields; @@ -2869,8 +2873,8 @@ errno_print: { else if (sc->fmt->errpid) { struct thread *child = machine__find_thread(trace->host, ret, ret); + fprintf(trace->output, "%ld", ret); if (child != NULL) { - fprintf(trace->output, "%ld", ret); if (thread__comm_set(child)) fprintf(trace->output, " (%s)", thread__comm_str(child)); thread__put(child); @@ -3982,10 +3986,13 @@ static int trace__set_filter_loop_pids(struct trace *trace) if (!strcmp(thread__comm_str(parent), "sshd") || strstarts(thread__comm_str(parent), "gnome-terminal")) { pids[nr++] = thread__tid(parent); + thread__put(parent); break; } + thread__put(thread); thread = parent; } + thread__put(thread); err = evlist__append_tp_filter_pids(trace->evlist, nr, pids); if (!err && trace->filter_pids.map) diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json index c5d1d22bd034..5228f94a793f 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json @@ -229,19 +229,19 @@ }, { "MetricName": "slots_lost_misspeculation_fraction", - "MetricExpr": "(OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots)", + "MetricExpr": "100 * (OP_SPEC - OP_RETIRED) / (CPU_CYCLES * #slots)", "BriefDescription": "Fraction of slots lost due to misspeculation", "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", - "ScaleUnit": "100percent of slots" + "ScaleUnit": "1percent of slots" }, { "MetricName": "retired_fraction", - "MetricExpr": "OP_RETIRED / (CPU_CYCLES * #slots)", + "MetricExpr": "100 * OP_RETIRED / (CPU_CYCLES * #slots)", "BriefDescription": "Fraction of slots retiring, useful work", "DefaultMetricgroupName": "TopdownL1", "MetricGroup": "Default;TopdownL1", - "ScaleUnit": "100percent of slots" + "ScaleUnit": "1percent of slots" }, { "MetricName": "backend_core", @@ -266,7 +266,7 @@ }, { "MetricName": "frontend_bandwidth", - "MetricExpr": "frontend_bound - frontend_latency", + "MetricExpr": "frontend_bound - 100 * frontend_latency", "BriefDescription": "Fraction of slots the CPU did not dispatch at full bandwidth - able to dispatch partial slots only (1, 2, or 3 uops)", "MetricGroup": "TopdownL2", "ScaleUnit": "1percent of slots" diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 121cf61ba1b3..e0b2e7268ef6 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -680,7 +680,10 @@ class CallGraphModelBase(TreeModel): s = value.replace("%", "\\%") s = s.replace("_", "\\_") # Translate * and ? into SQL LIKE pattern characters % and _ - trans = string.maketrans("*?", "%_") + if sys.version_info[0] == 3: + trans = str.maketrans("*?", "%_") + else: + trans = string.maketrans("*?", "%_") match = " LIKE '" + str(s).translate(trans) + "'" else: match = " GLOB '" + str(value) + "'" diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index 4cb7d486b5c1..047433c977bc 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -104,6 +104,7 @@ static int bp_accounting(int wp_cnt, int share) fd_wp = wp_event((void *)&the_var, &attr_new); TEST_ASSERT_VAL("failed to create max wp\n", fd_wp != -1); pr_debug("wp max created\n"); + close(fd_wp); } for (i = 0; i < wp_cnt; i++) diff --git a/tools/perf/tests/shell/coresight/asm_pure_loop/asm_pure_loop.S b/tools/perf/tests/shell/coresight/asm_pure_loop/asm_pure_loop.S index 75cf084a927d..577760046772 100644 --- a/tools/perf/tests/shell/coresight/asm_pure_loop/asm_pure_loop.S +++ b/tools/perf/tests/shell/coresight/asm_pure_loop/asm_pure_loop.S @@ -26,3 +26,5 @@ skip: mov x0, #0 mov x8, #93 // __NR_exit syscall svc #0 + +.section .note.GNU-stack, "", @progbits diff --git a/tools/perf/tests/shell/record_bpf_filter.sh b/tools/perf/tests/shell/record_bpf_filter.sh index 1b58ccc1fd88..4d6c3c1b7fb9 100755 --- a/tools/perf/tests/shell/record_bpf_filter.sh +++ b/tools/perf/tests/shell/record_bpf_filter.sh @@ -89,7 +89,7 @@ test_bpf_filter_fail() { test_bpf_filter_group() { echo "Group bpf-filter test" - if ! perf record -e task-clock --filter 'period > 1000 || ip > 0' \ + if ! perf record -e task-clock --filter 'period > 1000, ip > 0' \ -o /dev/null true 2>/dev/null then echo "Group bpf-filter test [Failed should succeed]" @@ -97,7 +97,7 @@ test_bpf_filter_group() { return fi - if ! perf record -e task-clock --filter 'cpu > 0 || ip > 0' \ + if ! perf record -e task-clock --filter 'period > 1000 , cpu > 0 || ip > 0' \ -o /dev/null true 2>&1 | grep -q PERF_SAMPLE_CPU then echo "Group bpf-filter test [Failed forbidden CPU]" diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index 5a3b8a5a9b5c..8d1e6bbeac90 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -26,8 +26,12 @@ check_vmlinux() { trace_landlock() { echo "Tracing syscall ${syscall}" - # test flight just to see if landlock_add_rule and libbpf are available - $TESTPROG + # test flight just to see if landlock_add_rule is available + if ! perf trace $TESTPROG 2>&1 | grep -q landlock + then + echo "No landlock system call found, skipping to non-syscall tracing." + return + fi if perf trace -e $syscall $TESTPROG 2>&1 | \ grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*" diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 5cab17a1942e..ee43d8fa2ed6 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -258,7 +258,7 @@ static int compar(const void *a, const void *b) const struct event_node *nodeb = b; s64 cmp = nodea->event_time - nodeb->event_time; - return cmp; + return cmp < 0 ? -1 : (cmp > 0 ? 1 : 0); } static int process_events(struct evlist *evlist, diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c index ed114b044293..b6986d50dde6 100644 --- a/tools/perf/tests/tests-scripts.c +++ b/tools/perf/tests/tests-scripts.c @@ -255,6 +255,7 @@ static void append_scripts_in_dir(int dir_fd, continue; /* Skip scripts that have a separate driver. */ fd = openat(dir_fd, ent->d_name, O_PATH); append_scripts_in_dir(fd, result, result_sz); + close(fd); } for (i = 0; i < n_dirs; i++) /* Clean up */ zfree(&entlist[i]); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 49ba82bf3391..3283b6313bab 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3267,10 +3267,10 @@ do_hotkey: // key came straight from options ui__popup_menu() /* * No need to set actions->dso here since * it's just to remove the current filter. - * Ditto for thread below. */ do_zoom_dso(browser, actions); } else if (top == &browser->hists->thread_filter) { + actions->thread = thread; do_zoom_thread(browser, actions); } else if (top == &browser->hists->socket_filter) { do_zoom_socket(browser, actions); diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 138ffc71b32d..2c06f2a85400 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -37,6 +37,8 @@ #include "../../arch/arm64/include/asm/cputype.h" #define MAX_TIMESTAMP (~0ULL) +#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST)) + struct arm_spe { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -520,6 +522,10 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); + /* Only synthesize data source for LDST operations */ + if (!is_ldst_op(record->op)) + return 0; + if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; else if (record->op & ARM_SPE_OP_ST) @@ -619,7 +625,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq) * When data_src is zero it means the record is not a memory operation, * skip to synthesize memory sample for this case. */ - if (spe->sample_memory && data_src) { + if (spe->sample_memory && is_ldst_op(record->op)) { err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); if (err) return err; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 13608237c50e..c81444059ad0 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -289,7 +289,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, } info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); + if (!perf_env__insert_bpf_prog_info(env, info_node)) { + free(info_linear); + free(info_node); + } info_linear = NULL; /* @@ -480,7 +483,10 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) info_node = malloc(sizeof(struct bpf_prog_info_node)); if (info_node) { info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); + if (!perf_env__insert_bpf_prog_info(env, info_node)) { + free(info_linear); + free(info_node); + } } else free(info_linear); diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l index f313404f95a9..6aa65ade3385 100644 --- a/tools/perf/util/bpf-filter.l +++ b/tools/perf/util/bpf-filter.l @@ -76,7 +76,7 @@ static int path_or_error(void) num_dec [0-9]+ num_hex 0[Xx][0-9a-fA-F]+ space [ \t]+ -path [^ \t\n]+ +path [^ \t\n,]+ ident [_a-zA-Z][_a-zA-Z0-9]+ %% diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 4a62ed593e84..e4352881e3fa 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -431,9 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) { bool augmented, do_output = false; - int zero = 0, size, aug_size, index, - value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); + int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */ + s64 aug_size, size; unsigned int nr, *beauty_map; struct beauty_payload_enter *payload; void *arg, *payload_offset; @@ -484,14 +484,11 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } else if (size > 0 && size <= value_size) { /* struct */ if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg)) augmented = true; - } else if (size < 0 && size >= -6) { /* buffer */ + } else if ((int)size < 0 && size >= -6) { /* buffer */ index = -(size + 1); barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick. index &= 7; // Satisfy the bounds checking with the verifier in some kernels. - aug_size = args->args[index]; - - if (aug_size > TRACE_AUG_MAX_BUF) - aug_size = TRACE_AUG_MAX_BUF; + aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index]; if (aug_size > 0) { if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg)) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index e763e8d99a43..ee00313d5d7e 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -864,7 +864,7 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, char *allocated_name = NULL; int ret = 0; - if (!dso__has_build_id(dso)) + if (!dso__has_build_id(dso) || !dso__hit(dso)) return 0; if (dso__is_kcore(dso)) { diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 49b79cf0c5cc..8aa456d7c2cd 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -5,6 +5,8 @@ #include <internal/rc_check.h> #include <linux/refcount.h> #include <linux/zalloc.h> +#include <tools/libc_compat.h> // reallocarray + #include "rwsem.h" DECLARE_RC_STRUCT(comm_str) { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index d633d15329fa..e56330c85fe7 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -46,8 +46,8 @@ int debug_type_profile; FILE *debug_file(void) { if (!_debug_file) { - pr_warning_once("debug_file not set"); debug_set_file(stderr); + pr_warning_once("debug_file not set"); } return _debug_file; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index bb8e8f444054..c0472a41147c 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -808,7 +808,9 @@ static inline bool dso__is_kcore(const struct dso *dso) static inline bool dso__is_kallsyms(const struct dso *dso) { - return RC_CHK_ACCESS(dso)->kernel && RC_CHK_ACCESS(dso)->long_name[0] != '/'; + enum dso_binary_type bt = dso__binary_type(dso); + + return bt == DSO_BINARY_TYPE__KALLSYMS || bt == DSO_BINARY_TYPE__GUEST_KALLSYMS; } bool dso__is_object_file(const struct dso *dso); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 1edbccfc3281..d981b6f4bc5e 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -22,15 +22,19 @@ struct perf_env perf_env; #include "bpf-utils.h" #include <bpf/libbpf.h> -void perf_env__insert_bpf_prog_info(struct perf_env *env, +bool perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { + bool ret; + down_write(&env->bpf_progs.lock); - __perf_env__insert_bpf_prog_info(env, info_node); + ret = __perf_env__insert_bpf_prog_info(env, info_node); up_write(&env->bpf_progs.lock); + + return ret; } -void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) +bool __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { __u32 prog_id = info_node->info_linear->info.id; struct bpf_prog_info_node *node; @@ -48,13 +52,14 @@ void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info p = &(*p)->rb_right; } else { pr_debug("duplicated bpf prog info %u\n", prog_id); - return; + return false; } } rb_link_node(&info_node->rb_node, parent, p); rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos); env->bpf_progs.infos_cnt++; + return true; } struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 51b36c36019b..38de0af2a680 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -176,9 +176,9 @@ const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); void perf_env__init(struct perf_env *env); -void __perf_env__insert_bpf_prog_info(struct perf_env *env, +bool __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); -void perf_env__insert_bpf_prog_info(struct perf_env *env, +bool perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a9df84692d4a..dac87dccaaaa 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1434,19 +1434,18 @@ static int evlist__create_syswide_maps(struct evlist *evlist) */ cpus = perf_cpu_map__new_online_cpus(); if (!cpus) - goto out; + return -ENOMEM; threads = perf_thread_map__new_dummy(); - if (!threads) - goto out_put; + if (!threads) { + perf_cpu_map__put(cpus); + return -ENOMEM; + } perf_evlist__set_maps(&evlist->core, cpus, threads); - perf_thread_map__put(threads); -out_put: perf_cpu_map__put(cpus); -out: - return -ENOMEM; + return 0; } int evlist__open(struct evlist *evlist) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbf9c8cee3c5..6d7249cc1a99 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1477,6 +1477,15 @@ static void evsel__free_config_terms(struct evsel *evsel) free_config_terms(&evsel->config_terms); } +static void (*evsel__priv_destructor)(void *priv); + +void evsel__set_priv_destructor(void (*destructor)(void *priv)) +{ + assert(evsel__priv_destructor == NULL); + + evsel__priv_destructor = destructor; +} + void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); @@ -1502,6 +1511,8 @@ void evsel__exit(struct evsel *evsel) hashmap__free(evsel->per_pkg_mask); evsel->per_pkg_mask = NULL; zfree(&evsel->metric_events); + if (evsel__priv_destructor) + evsel__priv_destructor(evsel->priv); perf_evsel__object.fini(evsel); if (evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME || evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 15e745a9a798..26574a33a725 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -282,6 +282,8 @@ void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx); void evsel__exit(struct evsel *evsel); void evsel__delete(struct evsel *evsel); +void evsel__set_priv_destructor(void (*destructor)(void *priv)); + struct callchain_param; void evsel__config(struct evsel *evsel, struct record_opts *opts, diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index b2536a59c44e..90c6ce2212e4 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -288,7 +288,7 @@ struct expr_parse_ctx *expr__ctx_new(void) { struct expr_parse_ctx *ctx; - ctx = malloc(sizeof(struct expr_parse_ctx)); + ctx = calloc(1, sizeof(struct expr_parse_ctx)); if (!ctx) return NULL; @@ -297,9 +297,6 @@ struct expr_parse_ctx *expr__ctx_new(void) free(ctx); return NULL; } - ctx->sctx.user_requested_cpu_list = NULL; - ctx->sctx.runtime = 0; - ctx->sctx.system_wide = false; return ctx; } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a6386d12afd7..7b99f58f7bf2 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3188,7 +3188,10 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) /* after reading from file, translate offset to address */ bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; - __perf_env__insert_bpf_prog_info(env, info_node); + if (!__perf_env__insert_bpf_prog_info(env, info_node)) { + free(info_linear); + free(info_node); + } } up_write(&env->bpf_progs.lock); @@ -3235,7 +3238,8 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) if (__do_read(ff, node->data, data_size)) goto out; - __perf_env__insert_btf(env, node); + if (!__perf_env__insert_btf(env, node)) + free(node); node = NULL; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index fd2597613f3d..61f10578e121 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -127,6 +127,7 @@ struct intel_pt { bool single_pebs; bool sample_pebs; + int pebs_data_src_fmt; struct evsel *pebs_evsel; u64 evt_sample_type; @@ -175,6 +176,7 @@ enum switch_state { struct intel_pt_pebs_event { struct evsel *evsel; u64 id; + int data_src_fmt; }; struct intel_pt_queue { @@ -2232,7 +2234,146 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) + +/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */ +static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */ +static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel pebs_set_tlb_lock() */ +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Based on kernel __grt_latency_data() */ +static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk, + const u64 *pebs_data_source) +{ + u64 val; + + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; + val = pebs_data_source[dse]; + + pebs_set_tlb_lock(&val, tlb, lock); + + if (blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +/* Default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) + +enum DATA_SRC_FORMAT { + DATA_SRC_FORMAT_ERR = -1, + DATA_SRC_FORMAT_NA = 0, + DATA_SRC_FORMAT_GRT = 1, + DATA_SRC_FORMAT_CMT = 2, +}; + +/* Based on kernel grt_latency_data() and cmt_latency_data */ +static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt) +{ + switch (data_src_fmt) { + case DATA_SRC_FORMAT_GRT: { + union { + u64 val; + struct { + unsigned int dse:4; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:25; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_grt); + } + case DATA_SRC_FORMAT_CMT: { + union { + u64 val; + struct { + unsigned int dse:5; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:24; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_cmt); + } + default: + return PERF_MEM_NA; + } +} + +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, + u64 id, int data_src_fmt) { const struct intel_pt_blk_items *items = &ptq->state->items; struct perf_sample sample = { .ip = 0, }; @@ -2350,6 +2491,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse } } + if (sample_type & PERF_SAMPLE_DATA_SRC) { + if (items->has_mem_aux_info && data_src_fmt) { + if (data_src_fmt < 0) { + pr_err("Intel PT missing data_src info\n"); + return -1; + } + sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt); + } else { + sample.data_src = PERF_MEM_NA; + } + } + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { u64 ax = items->has_rax ? items->rax : 0; /* Refer kernel's intel_hsw_transaction() */ @@ -2368,9 +2521,10 @@ static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; struct evsel *evsel = pt->pebs_evsel; + int data_src_fmt = pt->pebs_data_src_fmt; u64 id = evsel->core.id[0]; - return intel_pt_do_synth_pebs_sample(ptq, evsel, id); + return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt); } static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) @@ -2395,7 +2549,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) hw_id); return intel_pt_synth_single_pebs_sample(ptq); } - err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt); if (err) return err; } @@ -3355,6 +3509,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +/* + * Events with data_src are identified by L1_Hit_Indication + * refer https://github.com/intel/perfmon + */ +static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel) +{ + struct perf_env *env = pt->machine->env; + int fmt = DATA_SRC_FORMAT_NA; + + if (!env->cpuid) + return DATA_SRC_FORMAT_ERR; + + /* + * PEBS-via-PT is only supported on E-core non-hybrid. Of those only + * Gracemont and Crestmont have data_src. Check for: + * Alderlake N (Gracemont) + * Sierra Forest (Crestmont) + * Grand Ridge (Crestmont) + */ + + if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19)) + fmt = DATA_SRC_FORMAT_GRT; + + if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) || + !strncmp(env->cpuid, "GenuineIntel,6,182,", 19)) + fmt = DATA_SRC_FORMAT_CMT; + + if (fmt == DATA_SRC_FORMAT_NA) + return fmt; + + /* + * Only data_src events are: + * mem-loads event=0xd0,umask=0x5 + * mem-stores event=0xd0,umask=0x6 + */ + if (evsel->core.attr.type == PERF_TYPE_RAW && + ((evsel->core.attr.config & 0xffff) == 0x5d0 || + (evsel->core.attr.config & 0xffff) == 0x6d0)) + return fmt; + + return DATA_SRC_FORMAT_NA; +} + static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) @@ -3375,6 +3572,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, ptq->pebs[hw_id].evsel = evsel; ptq->pebs[hw_id].id = sample->id; + ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel); return 0; } @@ -3924,6 +4122,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) } pt->single_pebs = true; pt->sample_pebs = true; + pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel); pt->pebs_evsel = evsel; } } diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c index 50a3c3e07160..2c421b475b3b 100644 --- a/tools/perf/util/intel-tpebs.c +++ b/tools/perf/util/intel-tpebs.c @@ -254,7 +254,7 @@ int tpebs_start(struct evlist *evsel_list) new = zalloc(sizeof(*new)); if (!new) { ret = -1; - zfree(name); + zfree(&name); goto err; } new->name = name; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 27d5345d2b30..20fd742984e3 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1003,7 +1003,7 @@ static int machine__get_running_kernel_start(struct machine *machine, err = kallsyms__get_symbol_start(filename, "_edata", &addr); if (err) - err = kallsyms__get_function_start(filename, "_etext", &addr); + err = kallsyms__get_symbol_start(filename, "_etext", &addr); if (!err) *end = addr; @@ -1974,7 +1974,7 @@ static void ip__resolve_ams(struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, /*symbols=*/true, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -2076,7 +2076,7 @@ static int add_callchain_ip(struct thread *thread, al.sym = NULL; al.srcline = NULL; if (!cpumode) { - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, symbols, &al); } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { @@ -2104,6 +2104,8 @@ static int add_callchain_ip(struct thread *thread, } if (symbols) thread__find_symbol(thread, *cpumode, ip, &al); + else + thread__find_map(thread, *cpumode, ip, &al); } if (al.sym != NULL) { diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 432399cbe5dd..09c9cc326c08 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -1136,8 +1136,13 @@ struct map *maps__find_next_entry(struct maps *maps, struct map *map) struct map *result = NULL; down_read(maps__lock(maps)); + while (!maps__maps_by_address_sorted(maps)) { + up_read(maps__lock(maps)); + maps__sort_by_address(maps); + down_read(maps__lock(maps)); + } i = maps__by_address_index(maps, map); - if (i < maps__nr_maps(maps)) + if (++i < maps__nr_maps(maps)) result = map__get(maps__maps_by_address(maps)[i]); up_read(maps__lock(maps)); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index bf5090f5220b..9c4adfb45f62 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -189,7 +189,7 @@ static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu if (!e->event_name) return true; - scnprintf(path, PATH_MAX, "%s/devices/%s/events/%s", mnt, pmu->name, e->event_name); + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/%s", mnt, pmu->name, e->event_name); return !stat(path, &st); } diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index cb185c5659d6..68f5de2d79c7 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -266,11 +266,16 @@ pid_t nsinfo__pid(const struct nsinfo *nsi) return RC_CHK_ACCESS(nsi)->pid; } -pid_t nsinfo__in_pidns(const struct nsinfo *nsi) +bool nsinfo__in_pidns(const struct nsinfo *nsi) { return RC_CHK_ACCESS(nsi)->in_pidns; } +void nsinfo__set_in_pidns(struct nsinfo *nsi) +{ + RC_CHK_ACCESS(nsi)->in_pidns = true; +} + void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc) { diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 8c0731c6cbb7..e95c79b80e27 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -58,7 +58,8 @@ void nsinfo__clear_need_setns(struct nsinfo *nsi); pid_t nsinfo__tgid(const struct nsinfo *nsi); pid_t nsinfo__nstgid(const struct nsinfo *nsi); pid_t nsinfo__pid(const struct nsinfo *nsi); -pid_t nsinfo__in_pidns(const struct nsinfo *nsi); +bool nsinfo__in_pidns(const struct nsinfo *nsi); +void nsinfo__set_in_pidns(struct nsinfo *nsi); void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc); void nsinfo__mountns_exit(struct nscookie *nc); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 61bdda01a05a..8b4e346808b4 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -33,12 +33,12 @@ #define UNIT_MAX_LEN 31 /* max length for event unit name */ enum event_source { - /* An event loaded from /sys/devices/<pmu>/events. */ + /* An event loaded from /sys/bus/event_source/devices/<pmu>/events. */ EVENT_SRC_SYSFS, /* An event loaded from a CPUID matched json file. */ EVENT_SRC_CPU_JSON, /* - * An event loaded from a /sys/devices/<pmu>/identifier matched json + * An event loaded from a /sys/bus/event_source/devices/<pmu>/identifier matched json * file. */ EVENT_SRC_SYS_JSON, @@ -593,7 +593,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, }; if (pmu_events_table__find_event(pmu->events_table, pmu, name, update_alias, &data) == 0) - pmu->cpu_json_aliases++; + pmu->cpu_common_json_aliases++; } pmu->sysfs_aliases++; break; @@ -1807,9 +1807,10 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) if (pmu->cpu_aliases_added) nr += pmu->cpu_json_aliases; else if (pmu->events_table) - nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->cpu_json_aliases; + nr += pmu_events_table__num_events(pmu->events_table, pmu) - + pmu->cpu_common_json_aliases; else - assert(pmu->cpu_json_aliases == 0); + assert(pmu->cpu_json_aliases == 0 && pmu->cpu_common_json_aliases == 0); return pmu->selectable ? nr + 1 : nr; } diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 4397c48ad569..bcd278b9b546 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -131,6 +131,11 @@ struct perf_pmu { uint32_t cpu_json_aliases; /** @sys_json_aliases: Number of json event aliases loaded matching the PMU's identifier. */ uint32_t sys_json_aliases; + /** + * @cpu_common_json_aliases: Number of json events that overlapped with sysfs when + * loading all sysfs events. + */ + uint32_t cpu_common_json_aliases; /** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */ bool sysfs_aliases_loaded; /** diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index d7d67e09d759..362596ed2729 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -701,11 +701,25 @@ char *perf_pmus__default_pmu_name(void) struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) { struct perf_pmu *pmu = evsel->pmu; + bool legacy_core_type; - if (!pmu) { - pmu = perf_pmus__find_by_type(evsel->core.attr.type); - ((struct evsel *)evsel)->pmu = pmu; + if (pmu) + return pmu; + + pmu = perf_pmus__find_by_type(evsel->core.attr.type); + legacy_core_type = + evsel->core.attr.type == PERF_TYPE_HARDWARE || + evsel->core.attr.type == PERF_TYPE_HW_CACHE; + if (!pmu && legacy_core_type) { + if (perf_pmus__supports_extended_type()) { + u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; + + pmu = perf_pmus__find_by_type(type); + } else { + pmu = perf_pmus__find_core_pmu(); + } } + ((struct evsel *)evsel)->pmu = pmu; return pmu; } diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 81e0135cddf0..a1c71d9793bd 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -282,6 +282,7 @@ bool is_event_supported(u8 type, u64 config) ret = evsel__open(evsel, NULL, tmap) >= 0; } + evsel__close(evsel); evsel__delete(evsel); } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index ee3d43a7ba45..e7f36ea9e2fa 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -79,7 +79,7 @@ struct pyrf_event { }; #define sample_members \ - sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \ + sample_member_def(sample_ip, ip, T_ULONGLONG, "event ip"), \ sample_member_def(sample_pid, pid, T_INT, "event pid"), \ sample_member_def(sample_tid, tid, T_INT, "event tid"), \ sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \ @@ -512,6 +512,11 @@ static PyObject *pyrf_event__new(union perf_event *event) event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)) return NULL; + // FIXME this better be dynamic or we need to parse everything + // before calling perf_mmap__consume(), including tracepoint fields. + if (sizeof(pevent->event) < event->header.size) + return NULL; + ptype = pyrf_event__type[event->header.type]; pevent = PyObject_New(struct pyrf_event, ptype); if (pevent != NULL) @@ -1011,20 +1016,22 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, evsel = evlist__event2evsel(evlist, event); if (!evsel) { + Py_DECREF(pyevent); Py_INCREF(Py_None); return Py_None; } pevent->evsel = evsel; - err = evsel__parse_sample(evsel, event, &pevent->sample); - - /* Consume the even only after we parsed it out. */ perf_mmap__consume(&md->core); - if (err) + err = evsel__parse_sample(evsel, &pevent->event, &pevent->sample); + if (err) { + Py_DECREF(pyevent); return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); + } + return pyevent; } end: diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 99376c12dd8e..7c49997fab3a 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -154,6 +154,7 @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type { const struct evsel *cur; int evsel_ctx = evsel_context(evsel); + struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel); evlist__for_each_entry(evsel->evlist, cur) { struct perf_stat_aggr *aggr; @@ -180,7 +181,7 @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type * Except the SW CLOCK events, * ignore if not the PMU we're looking for. */ - if ((type != STAT_NSECS) && (evsel->pmu != cur->pmu)) + if ((type != STAT_NSECS) && (evsel_pmu != evsel__find_pmu(cur))) continue; aggr = &cur->stats->aggr[aggr_idx]; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index c6f369b5d893..36c1d3090689 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -90,11 +90,23 @@ int filename__read_build_id(const char *filename, struct build_id *bid) { FILE *fp; int ret = -1; - bool need_swap = false; + bool need_swap = false, elf32; u8 e_ident[EI_NIDENT]; - size_t buf_size; - void *buf; int i; + union { + struct { + Elf32_Ehdr ehdr32; + Elf32_Phdr *phdr32; + }; + struct { + Elf64_Ehdr ehdr64; + Elf64_Phdr *phdr64; + }; + } hdrs; + void *phdr; + size_t phdr_size; + void *buf = NULL; + size_t buf_size = 0; fp = fopen(filename, "r"); if (fp == NULL) @@ -108,117 +120,79 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out; need_swap = check_need_swap(e_ident[EI_DATA]); + elf32 = e_ident[EI_CLASS] == ELFCLASS32; - /* for simplicity */ - fseek(fp, 0, SEEK_SET); - - if (e_ident[EI_CLASS] == ELFCLASS32) { - Elf32_Ehdr ehdr; - Elf32_Phdr *phdr; - - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64, + elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64), + 1, fp) != 1) + goto out; - if (need_swap) { - ehdr.e_phoff = bswap_32(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (need_swap) { + if (elf32) { + hdrs.ehdr32.e_phoff = bswap_32(hdrs.ehdr32.e_phoff); + hdrs.ehdr32.e_phentsize = bswap_16(hdrs.ehdr32.e_phentsize); + hdrs.ehdr32.e_phnum = bswap_16(hdrs.ehdr32.e_phnum); + } else { + hdrs.ehdr64.e_phoff = bswap_64(hdrs.ehdr64.e_phoff); + hdrs.ehdr64.e_phentsize = bswap_16(hdrs.ehdr64.e_phentsize); + hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum); } + } + phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum + : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum; + phdr = malloc(phdr_size); + if (phdr == NULL) + goto out; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { - void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_32(phdr->p_offset); - phdr->p_filesz = bswap_32(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - - buf_size = phdr->p_filesz; - offset = phdr->p_offset; - tmp = realloc(buf, buf_size); - if (tmp == NULL) - goto out_free; - - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); + if (fread(phdr, phdr_size, 1, fp) != 1) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } - } - } else { - Elf64_Ehdr ehdr; - Elf64_Phdr *phdr; + if (elf32) + hdrs.phdr32 = phdr; + else + hdrs.phdr64 = phdr; - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) { + size_t p_filesz; if (need_swap) { - ehdr.e_phoff = bswap_64(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (elf32) { + hdrs.phdr32[i].p_type = bswap_32(hdrs.phdr32[i].p_type); + hdrs.phdr32[i].p_offset = bswap_32(hdrs.phdr32[i].p_offset); + hdrs.phdr32[i].p_filesz = bswap_32(hdrs.phdr32[i].p_offset); + } else { + hdrs.phdr64[i].p_type = bswap_32(hdrs.phdr64[i].p_type); + hdrs.phdr64[i].p_offset = bswap_64(hdrs.phdr64[i].p_offset); + hdrs.phdr64[i].p_filesz = bswap_64(hdrs.phdr64[i].p_filesz); + } } + if ((elf32 ? hdrs.phdr32[i].p_type : hdrs.phdr64[i].p_type) != PT_NOTE) + continue; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + p_filesz = elf32 ? hdrs.phdr32[i].p_filesz : hdrs.phdr64[i].p_filesz; + if (p_filesz > buf_size) { void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_64(phdr->p_offset); - phdr->p_filesz = bswap_64(phdr->p_filesz); - } - if (phdr->p_type != PT_NOTE) - continue; - - buf_size = phdr->p_filesz; - offset = phdr->p_offset; + buf_size = p_filesz; tmp = realloc(buf, buf_size); if (tmp == NULL) goto out_free; - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + } + fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); + if (fread(buf, p_filesz, 1, fp) != 1) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } + ret = read_build_id(buf, p_filesz, bid, need_swap); + if (ret == 0) { + ret = bid->size; + break; } } out_free: free(buf); + free(phdr); out: fclose(fp); return ret; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 3bbf173ad822..c0ec5ed4f1aa 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1405,6 +1405,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, goto out_err; } } + map__zput(new_node->map); free(new_node); } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0ffdd52d86d7..309d573eac9a 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -406,7 +406,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al) + bool symbols, struct addr_location *al) { size_t i; const u8 cpumodes[] = { @@ -417,7 +417,11 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_symbol(thread, cpumodes[i], addr, al); + if (symbols) + thread__find_symbol(thread, cpumodes[i], addr, al); + else + thread__find_map(thread, cpumodes[i], addr, al); + if (al->map) break; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 6cbf6eb2812e..1fb32e7d62a4 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -122,7 +122,7 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al); + bool symbols, struct addr_location *al); int thread__memcpy(struct thread *thread, struct machine *machine, void *buf, u64 ip, int len, bool *is64bit); diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c index 32c39cfe209b..4c6a86e1cb54 100644 --- a/tools/perf/util/units.c +++ b/tools/perf/util/units.c @@ -64,7 +64,7 @@ unsigned long convert_unit(unsigned long value, char *unit) int unit_number__scnprintf(char *buf, size_t size, u64 n) { - char unit[4] = "BKMG"; + char unit[] = "BKMG"; int i = 0; while (((n / 1024) > 1) && (i < 3)) { diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c index e63dc11fa3a5..48e25be6e163 100644 --- a/tools/power/cpupower/bench/parse.c +++ b/tools/power/cpupower/bench/parse.c @@ -120,6 +120,10 @@ out_dir: struct config *prepare_default_config() { struct config *config = malloc(sizeof(struct config)); + if (!config) { + perror("malloc"); + return NULL; + } dprintf("loading defaults\n"); diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index ae6af354a81d..6ab9139f16af 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -33,7 +33,7 @@ static int mperf_get_count_percent(unsigned int self_id, double *percent, unsigned int cpu); static int mperf_get_count_freq(unsigned int id, unsigned long long *count, unsigned int cpu); -static struct timespec time_start, time_end; +static struct timespec *time_start, *time_end; static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { { @@ -174,7 +174,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent, dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n", mperf_cstates[id].name, mperf_diff, tsc_diff); } else if (max_freq_mode == MAX_FREQ_SYSFS) { - timediff = max_frequency * timespec_diff_us(time_start, time_end); + timediff = max_frequency * timespec_diff_us(time_start[cpu], time_end[cpu]); *percent = 100.0 * mperf_diff / timediff; dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n", mperf_cstates[id].name, mperf_diff, timediff); @@ -207,7 +207,7 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count, if (max_freq_mode == MAX_FREQ_TSC_REF) { /* Calculate max_freq from TSC count */ tsc_diff = tsc_at_measure_end[cpu] - tsc_at_measure_start[cpu]; - time_diff = timespec_diff_us(time_start, time_end); + time_diff = timespec_diff_us(time_start[cpu], time_end[cpu]); max_frequency = tsc_diff / time_diff; } @@ -226,9 +226,8 @@ static int mperf_start(void) { int cpu; - clock_gettime(CLOCK_REALTIME, &time_start); - for (cpu = 0; cpu < cpu_count; cpu++) { + clock_gettime(CLOCK_REALTIME, &time_start[cpu]); mperf_get_tsc(&tsc_at_measure_start[cpu]); mperf_init_stats(cpu); } @@ -241,11 +240,11 @@ static int mperf_stop(void) int cpu; for (cpu = 0; cpu < cpu_count; cpu++) { - mperf_measure_stats(cpu); + clock_gettime(CLOCK_REALTIME, &time_end[cpu]); mperf_get_tsc(&tsc_at_measure_end[cpu]); + mperf_measure_stats(cpu); } - clock_gettime(CLOCK_REALTIME, &time_end); return 0; } @@ -349,6 +348,8 @@ struct cpuidle_monitor *mperf_register(void) aperf_current_count = calloc(cpu_count, sizeof(unsigned long long)); tsc_at_measure_start = calloc(cpu_count, sizeof(unsigned long long)); tsc_at_measure_end = calloc(cpu_count, sizeof(unsigned long long)); + time_start = calloc(cpu_count, sizeof(struct timespec)); + time_end = calloc(cpu_count, sizeof(struct timespec)); mperf_monitor.name_len = strlen(mperf_monitor.name); return &mperf_monitor; } @@ -361,6 +362,8 @@ void mperf_unregister(void) free(aperf_current_count); free(tsc_at_measure_start); free(tsc_at_measure_end); + free(time_start); + free(time_end); free(is_valid); } diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 067717bce1d4..e4b00e13302b 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -33,6 +33,9 @@ name as necessary to disambiguate it from others is necessary. Note that option msr0xXXX is a hex offset, eg. msr0x10 /sys/path... is an absolute path to a sysfs attribute <device> is a perf device from /sys/bus/event_source/devices/<device> eg. cstate_core + On Intel hybrid platforms, instead of one "cpu" perf device there are two, "cpu_core" and "cpu_atom" devices for P and E cores respectively. + Turbostat, in this case, allow user to use "cpu" device and will automatically detect the type of a CPU and translate it to "cpu_core" and "cpu_atom" accordingly. + For a complete example see "ADD PERF COUNTER EXAMPLE #2 (using virtual "cpu" device)". <event> is a perf event for given device from /sys/bus/event_source/devices/<device>/events/<event> eg. c1-residency perf/cstate_core/c1-residency would then use /sys/bus/event_source/devices/cstate_core/events/c1-residency @@ -165,6 +168,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. .PP +\fBCoreThr\fP Core Thermal Throttling events during the measurement interval. Note that events since boot can be find in /sys/devices/system/cpu/cpu*/thermal_throttle/* +.PP \fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used. .PP \fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used. @@ -194,6 +199,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics \fBUncMHz\fP per-package uncore MHz, instantaneous sample. .PP \fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages. +For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group. .SH TOO MUCH INFORMATION EXAMPLE By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters. This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug. @@ -387,6 +393,28 @@ CPU pCPU%c1 CPU%c1 .fi +.SH ADD PERF COUNTER EXAMPLE #2 (using virtual cpu device) +Here we run on hybrid, Raptor Lake platform. +We limit turbostat to show output for just cpu0 (pcore) and cpu12 (ecore). +We add a counter showing number of L3 cache misses, using virtual "cpu" device, +labeling it with the column header, "VCMISS". +We add a counter showing number of L3 cache misses, using virtual "cpu_core" device, +labeling it with the column header, "PCMISS". This will fail on ecore cpu12. +We add a counter showing number of L3 cache misses, using virtual "cpu_atom" device, +labeling it with the column header, "ECMISS". This will fail on pcore cpu0. +We display it only once, after the conclusion of 0.1 second sleep. +.nf +sudo ./turbostat --quiet --cpu 0,12 --show CPU --add perf/cpu/cache-misses,cpu,delta,raw,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,raw,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,raw,ECMISS sleep .1 +turbostat: added_perf_counters_init_: perf/cpu_atom/cache-misses: failed to open counter on cpu0 +turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu12 +0.104630 sec +CPU ECMISS PCMISS VCMISS +- 0x0000000000000000 0x0000000000000000 0x0000000000000000 +0 0x0000000000000000 0x0000000000007951 0x0000000000007796 +12 0x000000000001137a 0x0000000000000000 0x0000000000011392 + +.fi + .SH ADD PMT COUNTER EXAMPLE Here we limit turbostat to showing just the CPU number 0. We add two counters, showing crystal clock count and the DC6 residency. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index a5ebee8b23bb..b663a76d31f1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -31,6 +31,9 @@ ) // end copied section +#define CPUID_LEAF_MODEL_ID 0x1A +#define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24 + #define X86_VENDOR_INTEL 0 #include INTEL_FAMILY_HEADER @@ -64,6 +67,7 @@ #include <stdbool.h> #include <assert.h> #include <linux/kernel.h> +#include <limits.h> #define UNUSED(x) (void)(x) @@ -89,6 +93,11 @@ #define PERF_DEV_NAME_BYTES 32 #define PERF_EVT_NAME_BYTES 32 +#define INTEL_ECORE_TYPE 0x20 +#define INTEL_PCORE_TYPE 0x40 + +#define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL)) + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; @@ -1079,8 +1088,8 @@ int backwards_count; char *progname; #define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ -cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; -size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; +cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; +size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_THREAD_COUNTERS 24 #define MAX_ADDED_CORE_COUNTERS 8 #define MAX_ADDED_PACKAGE_COUNTERS 16 @@ -1848,6 +1857,7 @@ struct cpu_topology { int logical_node_id; /* 0-based count within the package */ int physical_core_id; int thread_id; + int type; cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ } *cpus; @@ -3233,7 +3243,7 @@ void delta_core(struct core_data *new, struct core_data *old) old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; old->core_temp_c = new->core_temp_c; - old->core_throt_cnt = new->core_throt_cnt; + old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt; old->mc6_us = new->mc6_us - old->mc6_us; DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value); @@ -4482,6 +4492,38 @@ unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id) return (value & value_mask) >> value_shift; } + +/* Rapl domain enumeration helpers */ +static inline int get_rapl_num_domains(void) +{ + int num_packages = topo.max_package_id + 1; + int num_cores_per_package; + int num_cores; + + if (!platform->has_per_core_rapl) + return num_packages; + + num_cores_per_package = topo.max_core_id + 1; + num_cores = num_cores_per_package * num_packages; + + return num_cores; +} + +static inline int get_rapl_domain_id(int cpu) +{ + int nr_cores_per_package = topo.max_core_id + 1; + int rapl_core_id; + + if (!platform->has_per_core_rapl) + return cpus[cpu].physical_package_id; + + /* Compute the system-wide unique core-id for @cpu */ + rapl_core_id = cpus[cpu].physical_core_id; + rapl_core_id += cpus[cpu].physical_package_id * nr_cores_per_package; + + return rapl_core_id; +} + /* * get_counters(...) * migrate to cpu @@ -4535,7 +4577,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) goto done; if (platform->has_per_core_rapl) { - status = get_rapl_counters(cpu, c->core_id, c, p); + status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); if (status != 0) return status; } @@ -4601,7 +4643,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) p->sys_lpi = cpuidle_cur_sys_lpi_us; if (!platform->has_per_core_rapl) { - status = get_rapl_counters(cpu, p->package_id, c, p); + status = get_rapl_counters(cpu, get_rapl_domain_id(cpu), c, p); if (status != 0) return status; } @@ -5659,6 +5701,32 @@ int init_thread_id(int cpu) return 0; } +int set_my_cpu_type(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int max_level; + + __cpuid(0, max_level, ebx, ecx, edx); + + if (max_level < CPUID_LEAF_MODEL_ID) + return 0; + + __cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx); + + return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT); +} + +int set_cpu_hybrid_type(int cpu) +{ + if (cpu_migrate(cpu)) + return -1; + + int type = set_my_cpu_type(); + + cpus[cpu].type = type; + return 0; +} + /* * snapshot_proc_interrupts() * @@ -6178,8 +6246,16 @@ int check_for_cap_sys_rawio(void) int ret = 0; caps = cap_get_proc(); - if (caps == NULL) + if (caps == NULL) { + /* + * CONFIG_MULTIUSER=n kernels have no cap_get_proc() + * Allow them to continue and attempt to access MSRs + */ + if (errno == ENOSYS) + return 0; + return 1; + } if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) { ret = 1; @@ -6342,7 +6418,8 @@ static void probe_intel_uncore_frequency_legacy(void) sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, j); - if (access(path_base, R_OK)) + sprintf(path, "%s/current_freq_khz", path_base); + if (access(path, R_OK)) continue; BIC_PRESENT(BIC_UNCORE_MHZ); @@ -6410,7 +6487,18 @@ static void probe_intel_uncore_frequency_cluster(void) sprintf(path, "%s/current_freq_khz", path_base); sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id); - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); + /* + * Once add_couter() is called, that counter is always read + * and reported -- So it is effectively (enabled & present). + * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz) + * is (enabled). Since we are in this routine, we + * know we will not probe and set (present) the legacy counter. + * + * This allows "--show/--hide UncMHz" to be effective for + * the clustered MHz counters, as a group. + */ + if BIC_IS_ENABLED(BIC_UNCORE_MHZ) + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); if (quiet) continue; @@ -7524,7 +7612,7 @@ void linux_perf_init(void) void rapl_perf_init(void) { - const unsigned int num_domains = (platform->has_per_core_rapl ? topo.max_core_id : topo.max_package_id) + 1; + const unsigned int num_domains = get_rapl_num_domains(); bool *domain_visited = calloc(num_domains, sizeof(bool)); rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain)); @@ -7565,8 +7653,7 @@ void rapl_perf_init(void) continue; /* Skip already seen and handled RAPL domains */ - next_domain = - platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id; + next_domain = get_rapl_domain_id(cpu); assert(next_domain < num_domains); @@ -8188,6 +8275,33 @@ int dir_filter(const struct dirent *dirp) return 0; } +char *possible_file = "/sys/devices/system/cpu/possible"; +char possible_buf[1024]; + +int initialize_cpu_possible_set(void) +{ + FILE *fp; + + fp = fopen(possible_file, "r"); + if (!fp) { + warn("open %s", possible_file); + return -1; + } + if (fread(possible_buf, sizeof(char), 1024, fp) == 0) { + warn("read %s", possible_file); + goto err; + } + if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) { + warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str); + goto err; + } + return 0; + +err: + fclose(fp); + return -1; +} + void topology_probe(bool startup) { int i; @@ -8220,6 +8334,16 @@ void topology_probe(bool startup) for_all_proc_cpus(mark_cpu_present); /* + * Allocate and initialize cpu_possible_set + */ + cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_possible_set == NULL) + err(3, "CPU_ALLOC"); + cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set); + initialize_cpu_possible_set(); + + /* * Allocate and initialize cpu_effective_set */ cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1)); @@ -8287,6 +8411,8 @@ void topology_probe(bool startup) for_all_proc_cpus(init_thread_id); + for_all_proc_cpus(set_cpu_hybrid_type); + /* * For online cpus * find max_core_id, max_package_id @@ -8551,6 +8677,35 @@ void check_perf_access(void) bic_enabled &= ~BIC_IPC; } +bool perf_has_hybrid_devices(void) +{ + /* + * 0: unknown + * 1: has separate perf device for p and e core + * -1: doesn't have separate perf device for p and e core + */ + static int cached; + + if (cached > 0) + return true; + + if (cached < 0) + return false; + + if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) { + cached = -1; + return false; + } + + if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) { + cached = -1; + return false; + } + + cached = 1; + return true; +} + int added_perf_counters_init_(struct perf_counter_info *pinfo) { size_t num_domains = 0; @@ -8607,29 +8762,56 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) if (domain_visited[next_domain]) continue; - perf_type = read_perf_type(pinfo->device); + /* + * Intel hybrid platforms expose different perf devices for P and E cores. + * Instead of one, "/sys/bus/event_source/devices/cpu" device, there are + * "/sys/bus/event_source/devices/{cpu_core,cpu_atom}". + * + * This makes it more complicated to the user, because most of the counters + * are available on both and have to be handled manually, otherwise. + * + * Code below, allow user to use the old "cpu" name, which is translated accordingly. + */ + const char *perf_device = pinfo->device; + + if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) { + switch (cpus[cpu].type) { + case INTEL_PCORE_TYPE: + perf_device = "cpu_core"; + break; + + case INTEL_ECORE_TYPE: + perf_device = "cpu_atom"; + break; + + default: /* Don't change, we will probably fail and report a problem soon. */ + break; + } + } + + perf_type = read_perf_type(perf_device); if (perf_type == (unsigned int)-1) { warnx("%s: perf/%s/%s: failed to read %s", - __func__, pinfo->device, pinfo->event, "type"); + __func__, perf_device, pinfo->event, "type"); continue; } - perf_config = read_perf_config(pinfo->device, pinfo->event); + perf_config = read_perf_config(perf_device, pinfo->event); if (perf_config == (unsigned int)-1) { warnx("%s: perf/%s/%s: failed to read %s", - __func__, pinfo->device, pinfo->event, "config"); + __func__, perf_device, pinfo->event, "config"); continue; } /* Scale is not required, some counters just don't have it. */ - perf_scale = read_perf_scale(pinfo->device, pinfo->event); + perf_scale = read_perf_scale(perf_device, pinfo->event); if (perf_scale == 0.0) perf_scale = 1.0; fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); if (fd_perf == -1) { warnx("%s: perf/%s/%s: failed to open counter on cpu%d", - __func__, pinfo->device, pinfo->event, cpu); + __func__, perf_device, pinfo->event, cpu); continue; } @@ -8639,7 +8821,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) if (debug) fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", - pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); } pinfo = pinfo->next; @@ -8762,7 +8944,7 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) if (fd_pmt == -1) goto loop_cleanup_and_break; - mmap_size = (size + 0x1000UL) & (~0x1000UL); + mmap_size = ROUND_UP_TO_PAGE_SIZE(size); mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); if (mmio != MAP_FAILED) { @@ -9001,6 +9183,18 @@ void turbostat_init() } } +void affinitize_child(void) +{ + /* Prefer cpu_possible_set, if available */ + if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) { + warn("sched_setaffinity cpu_possible_set"); + + /* Otherwise, allow child to run on same cpu set as turbostat */ + if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set)) + warn("sched_setaffinity cpu_allowed_set"); + } +} + int fork_it(char **argv) { pid_t child_pid; @@ -9016,6 +9210,7 @@ int fork_it(char **argv) child_pid = fork(); if (!child_pid) { /* child */ + affinitize_child(); execvp(argv[0], argv); err(errno, "exec %s", argv[0]); } else { diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 248ab790d143..75d6d61279d6 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -251,8 +251,16 @@ void bpf_obj_drop_impl(void *kptr, void *meta) __ksym; #define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL)) #define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL) -void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; -void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; +int bpf_list_push_front_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; +#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0) + +int bpf_list_push_back_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; +#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0) + struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym; struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, @@ -325,6 +333,17 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask) return (const struct cpumask *)mask; } +/* + * Return true if task @p cannot migrate to a different CPU, false + * otherwise. + */ +static inline bool is_migration_disabled(const struct task_struct *p) +{ + if (bpf_core_field_exists(p->migration_disabled)) + return p->migration_disabled; + return false; +} + /* rcu */ void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 0aa4005017c7..5f2afd95de43 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -98,7 +98,9 @@ else ifneq ($(CROSS_COMPILE),) # Allow userspace to override CLANG_CROSS_FLAGS to specify their own # sysroots and flags or to avoid the GCC call in pure Clang builds. ifeq ($(CLANG_CROSS_FLAGS),) -CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) +CLANG_TARGET := $(notdir $(CROSS_COMPILE:%-=%)) +CLANG_TARGET := $(subst s390-linux,s390x-linux,$(CLANG_TARGET)) +CLANG_CROSS_FLAGS := --target=$(CLANG_TARGET) GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc 2>/dev/null)) ifneq ($(GCC_TOOLCHAIN_DIR),) CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) diff --git a/tools/sound/dapm-graph b/tools/sound/dapm-graph index f14bdfedee8f..b6196ee5065a 100755 --- a/tools/sound/dapm-graph +++ b/tools/sound/dapm-graph @@ -10,7 +10,7 @@ set -eu STYLE_COMPONENT_ON="color=dodgerblue;style=bold" STYLE_COMPONENT_OFF="color=gray40;style=filled;fillcolor=gray90" -STYLE_NODE_ON="shape=box,style=bold,color=green4" +STYLE_NODE_ON="shape=box,style=bold,color=green4,fillcolor=white" STYLE_NODE_OFF="shape=box,style=filled,color=gray30,fillcolor=gray95" # Print usage and exit diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index dacad94e2be4..bad227ee1b5b 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1371,7 +1371,10 @@ sub __eval_option { # If a variable contains itself, use the default var if (($var eq $name) && defined($opt{$var})) { $o = $opt{$var}; - $retval = "$retval$o"; + # Only append if the default doesn't contain itself + if ($o !~ m/\$\{$var\}/) { + $retval = "$retval$o"; + } } elsif (defined($opt{$o})) { $o = $opt{$o}; $retval = "$retval$o"; @@ -2419,6 +2422,11 @@ sub get_version { return if ($have_version); doprint "$make kernelrelease ... "; $version = `$make -s kernelrelease | tail -1`; + if (!length($version)) { + run_command "$make allnoconfig" or return 0; + doprint "$make kernelrelease ... "; + $version = `$make -s kernelrelease | tail -1`; + } chomp($version); doprint "$version\n"; $have_version = 1; @@ -2960,8 +2968,6 @@ sub run_bisect_test { my $failed = 0; my $result; - my $output; - my $ret; $in_bisect = 1; @@ -4300,6 +4306,14 @@ if (defined($opt{"LOG_FILE"})) { if ($opt{"CLEAR_LOG"}) { unlink $opt{"LOG_FILE"}; } + + if (! -e $opt{"LOG_FILE"} && $opt{"LOG_FILE"} =~ m,^(.*/),) { + my $dir = $1; + if (! -d $dir) { + mkpath($dir) or die "Failed to create directories '$dir': $!"; + print "\nThe log directory $dir did not exist, so it was created.\n"; + } + } open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; LOG->autoflush(1); } diff --git a/tools/testing/kunit/qemu_configs/sh.py b/tools/testing/kunit/qemu_configs/sh.py index 78a474a5b95f..f00cb89fdef6 100644 --- a/tools/testing/kunit/qemu_configs/sh.py +++ b/tools/testing/kunit/qemu_configs/sh.py @@ -7,7 +7,9 @@ CONFIG_CPU_SUBTYPE_SH7751R=y CONFIG_MEMORY_START=0x0c000000 CONFIG_SH_RTS7751R2D=y CONFIG_RTS7751R2D_PLUS=y -CONFIG_SERIAL_SH_SCI=y''', +CONFIG_SERIAL_SH_SCI=y +CONFIG_CMDLINE_EXTEND=y +''', qemu_arch='sh4', kernel_path='arch/sh/boot/zImage', kernel_command_line='console=ttySC1', diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py index e975c4331a7c..2019550a1b69 100644 --- a/tools/testing/kunit/qemu_configs/sparc.py +++ b/tools/testing/kunit/qemu_configs/sparc.py @@ -2,8 +2,11 @@ from ..qemu_config import QemuArchParams QEMU_ARCH = QemuArchParams(linux_arch='sparc', kconfig=''' -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y''', +CONFIG_KUNIT_FAULT_TEST=n +CONFIG_SPARC32=y +CONFIG_SERIAL_SUNZILOG=y +CONFIG_SERIAL_SUNZILOG_CONSOLE=y +''', qemu_arch='sparc', kernel_path='arch/sparc/boot/zImage', kernel_command_line='console=ttyS0 mem=256M', diff --git a/tools/testing/kunit/qemu_configs/x86_64.py b/tools/testing/kunit/qemu_configs/x86_64.py index dc7949076863..4a6bf4e048f5 100644 --- a/tools/testing/kunit/qemu_configs/x86_64.py +++ b/tools/testing/kunit/qemu_configs/x86_64.py @@ -7,4 +7,6 @@ CONFIG_SERIAL_8250_CONSOLE=y''', qemu_arch='x86_64', kernel_path='arch/x86/boot/bzImage', kernel_command_line='console=ttyS0', - extra_qemu_params=[]) + # qboot is faster than SeaBIOS and doesn't mess up + # the terminal. + extra_qemu_params=['-bios', 'qboot.rom']) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 363d031a16f7..85c5f39131d3 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -115,6 +115,7 @@ TARGETS += user_events TARGETS += vDSO TARGETS += mm TARGETS += x86 +TARGETS += x86/bugs TARGETS += zram #Please keep the TARGETS list alphabetically sorted # Run "make quicktest=1 run_tests" or @@ -195,7 +196,7 @@ export KHDR_INCLUDES all: @ret=1; \ - for TARGET in $(TARGETS); do \ + for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ mkdir $$BUILD_TARGET -p; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \ diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c index 32ee3ce57721..37964f311a33 100644 --- a/tools/testing/selftests/alsa/utimer-test.c +++ b/tools/testing/selftests/alsa/utimer-test.c @@ -135,6 +135,7 @@ TEST_F(timer_f, utimer) { pthread_join(ticking_thread, NULL); ASSERT_EQ(total_ticks, TICKS_COUNT); pclose(rfp); + free(buf); } TEST(wrong_timers_test) { diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 6d61992fe8a0..408fb1c5c2f8 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -168,7 +168,7 @@ static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type) memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); sve.vl = sve_vl_from_vq(SVE_VQ_MIN); - sve.flags = SVE_PT_VL_INHERIT; + sve.flags = SVE_PT_VL_INHERIT | SVE_PT_REGS_SVE; ret = set_sve(child, type, &sve); if (ret != 0) { ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n", @@ -233,6 +233,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, /* Set the VL by doing a set with no register payload */ memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); + sve.flags = SVE_PT_REGS_SVE; sve.vl = vl; ret = set_sve(child, type, &sve); if (ret != 0) { @@ -251,7 +252,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, return; } - ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n", + ksft_test_result(new_sve->vl == prctl_vl, "Set %s VL %u\n", type->name, vl); free(new_sve); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 43a029318478..6fc29996ae29 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -193,9 +193,9 @@ ifeq ($(shell expr $(MAKE_VERSION) \>= 4.4), 1) $(let OUTPUT,$(OUTPUT)/,\ $(eval include ../../../build/Makefile.feature)) else -OUTPUT := $(OUTPUT)/ +override OUTPUT := $(OUTPUT)/ $(eval include ../../../build/Makefile.feature) -OUTPUT := $(patsubst %/,%,$(OUTPUT)) +override OUTPUT := $(patsubst %/,%,$(OUTPUT)) endif endif diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 2ed0ef6f21ee..32e9f194d449 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -4,6 +4,7 @@ #include <argp.h> #include <unistd.h> #include <stdint.h> +#include "bpf_util.h" #include "bench.h" #include "trigger_bench.skel.h" #include "trace_helpers.h" @@ -72,7 +73,7 @@ static __always_inline void inc_counter(struct counter *counters) unsigned slot; if (unlikely(tid == 0)) - tid = syscall(SYS_gettid); + tid = sys_gettid(); /* multiplicative hashing, it's fast */ slot = 2654435769U * tid; diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index 6c3b4d4f173a..aeef86b3da74 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -40,6 +40,14 @@ DECLARE_TRACE(bpf_testmod_test_nullable_bare, TP_ARGS(ctx__nullable) ); +struct sk_buff; + +DECLARE_TRACE(bpf_testmod_test_raw_tp_null, + TP_PROTO(struct sk_buff *skb), + TP_ARGS(skb) +); + + #undef BPF_TESTMOD_DECLARE_TRACE #ifdef DECLARE_TRACE_WRITABLE #define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 8835761d9a12..4e6a9e9c0368 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -380,6 +380,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); + (void)trace_bpf_testmod_test_raw_tp_null(NULL); + struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) + sizeof(int)), GFP_KERNEL); if (struct_arg3 != NULL) { diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 10587a29b967..feff92219e21 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -6,6 +6,7 @@ #include <stdlib.h> #include <string.h> #include <errno.h> +#include <syscall.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ static inline unsigned int bpf_num_possible_cpus(void) @@ -59,4 +60,12 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz) (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) #endif +/* Availability of gettid across glibc versions is hit-and-miss, therefore + * fallback to syscall in this macro and use it everywhere. + */ +#ifndef sys_gettid +#define sys_gettid() syscall(SYS_gettid) +#endif + + #endif /* __BPF_UTIL__ */ diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c index 7d050364efca..62971dbf2996 100644 --- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c @@ -12,6 +12,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include "bpf_util.h" #include "test_maps.h" #include "task_local_storage_helpers.h" #include "read_bpf_task_storage_busy.skel.h" @@ -115,7 +116,7 @@ void test_task_storage_map_stress_lookup(void) CHECK(err, "attach", "error %d\n", err); /* Trigger program */ - syscall(SYS_gettid); + sys_gettid(); skel->bss->pid = 0; CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy); diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 27784946b01b..af0ee70a53f9 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -771,12 +771,13 @@ static const char *pkt_type_str(u16 pkt_type) return "Unknown"; } +#define MAX_FLAGS_STRLEN 21 /* Show the information of the transport layer in the packet */ static void show_transport(const u_char *packet, u16 len, u32 ifindex, const char *src_addr, const char *dst_addr, u16 proto, bool ipv6, u8 pkt_type) { - char *ifname, _ifname[IF_NAMESIZE]; + char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = ""; const char *transport_str; u16 src_port, dst_port; struct udphdr *udp; @@ -817,29 +818,21 @@ static void show_transport(const u_char *packet, u16 len, u32 ifindex, /* TCP or UDP*/ - flockfile(stdout); + if (proto == IPPROTO_TCP) + snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s", + tcp->fin ? ", FIN" : "", + tcp->syn ? ", SYN" : "", + tcp->rst ? ", RST" : "", + tcp->ack ? ", ACK" : ""); + if (ipv6) - printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d", + printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n", ifname, pkt_type_str(pkt_type), src_addr, src_port, - dst_addr, dst_port, transport_str, len); + dst_addr, dst_port, transport_str, len, flags); else - printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d", + printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n", ifname, pkt_type_str(pkt_type), src_addr, src_port, - dst_addr, dst_port, transport_str, len); - - if (proto == IPPROTO_TCP) { - if (tcp->fin) - printf(", FIN"); - if (tcp->syn) - printf(", SYN"); - if (tcp->rst) - printf(", RST"); - if (tcp->ack) - printf(", ACK"); - } - - printf("\n"); - funlockfile(stdout); + dst_addr, dst_port, transport_str, len, flags); } static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type) diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index cc184e4420f6..67557cda2208 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -6,6 +6,10 @@ #include <test_progs.h> #include "bloom_filter_map.skel.h" +#ifndef NUMA_NO_NODE +#define NUMA_NO_NODE (-1) +#endif + static void test_fail_cases(void) { LIBBPF_OPTS(bpf_map_create_opts, opts); @@ -69,6 +73,7 @@ static void test_success_cases(void) /* Create a map */ opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE; + opts.numa_node = NUMA_NO_NODE; fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts); if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 070c52c312e5..6befa870434b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -690,7 +690,7 @@ void test_bpf_cookie(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->my_tid = syscall(SYS_gettid); + skel->bss->my_tid = sys_gettid(); if (test__start_subtest("kprobe")) kprobe_subtest(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index f0a3a9c18e9e..b8e1224cfd19 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -226,7 +226,7 @@ static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts, ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL), "pthread_create"); - skel->bss->tid = gettid(); + skel->bss->tid = sys_gettid(); do_dummy_read_opts(skel->progs.dump_task, opts); @@ -255,10 +255,10 @@ static void *run_test_task_tid(void *arg) union bpf_iter_link_info linfo; int num_unknown_tid, num_known_tid; - ASSERT_NEQ(getpid(), gettid(), "check_new_thread_id"); + ASSERT_NEQ(getpid(), sys_gettid(), "check_new_thread_id"); memset(&linfo, 0, sizeof(linfo)); - linfo.task.tid = gettid(); + linfo.task.tid = sys_gettid(); opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); test_task_common(&opts, 0, 1); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index a4a1f93878d4..fad98f01e2c0 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -63,6 +63,12 @@ static void test_bpf_nf_ct(int mode) .repeat = 1, ); + if (SYS_NOFAIL("iptables-legacy --version")) { + fprintf(stdout, "Missing required iptables-legacy tool\n"); + test__skip(); + return; + } + skel = test_bpf_nf__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_distill.c b/tools/testing/selftests/bpf/prog_tests/btf_distill.c index ca84726d5ac1..b72b966df77b 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_distill.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_distill.c @@ -385,7 +385,7 @@ static void test_distilled_base_missing_err(void) "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED"); btf5 = btf__new_empty(); if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf")) - return; + goto cleanup; btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */ VALIDATE_RAW_BTF( btf5, @@ -478,7 +478,7 @@ static void test_distilled_base_multi_err2(void) "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED"); btf5 = btf__new_empty(); if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf")) - return; + goto cleanup; btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [2] int */ VALIDATE_RAW_BTF( diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 747761572098..9015e2c2ab12 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -63,14 +63,14 @@ static void test_tp_btf(int cgroup_fd) if (!ASSERT_OK(err, "map_delete_elem")) goto out; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); err = cgrp_ls_tp_btf__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; - syscall(SYS_gettid); - syscall(SYS_gettid); + sys_gettid(); + sys_gettid(); skel->bss->target_pid = 0; @@ -154,7 +154,7 @@ static void test_recursion(int cgroup_fd) goto out; /* trigger sys_enter, make sure it does not cause deadlock */ - syscall(SYS_gettid); + sys_gettid(); out: cgrp_ls_recursion__destroy(skel); @@ -224,7 +224,7 @@ static void test_yes_rcu_lock(__u64 cgroup_id) return; CGROUP_MODE_SET(skel); - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c new file mode 100644 index 000000000000..7526de379081 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/changes_pkt_data.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "bpf/libbpf.h" +#include "changes_pkt_data_freplace.skel.h" +#include "changes_pkt_data.skel.h" +#include <test_progs.h> + +static void print_verifier_log(const char *log) +{ + if (env.verbosity >= VERBOSE_VERY) + fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log); +} + +static void test_aux(const char *main_prog_name, + const char *to_be_replaced, + const char *replacement, + bool expect_load) +{ + struct changes_pkt_data_freplace *freplace = NULL; + struct bpf_program *freplace_prog = NULL; + struct bpf_program *main_prog = NULL; + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct changes_pkt_data *main = NULL; + char log[16*1024]; + int err; + + opts.kernel_log_buf = log; + opts.kernel_log_size = sizeof(log); + if (env.verbosity >= VERBOSE_SUPER) + opts.kernel_log_level = 1 | 2 | 4; + main = changes_pkt_data__open_opts(&opts); + if (!ASSERT_OK_PTR(main, "changes_pkt_data__open")) + goto out; + main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name); + if (!ASSERT_OK_PTR(main_prog, "main_prog")) + goto out; + bpf_program__set_autoload(main_prog, true); + err = changes_pkt_data__load(main); + print_verifier_log(log); + if (!ASSERT_OK(err, "changes_pkt_data__load")) + goto out; + freplace = changes_pkt_data_freplace__open_opts(&opts); + if (!ASSERT_OK_PTR(freplace, "changes_pkt_data_freplace__open")) + goto out; + freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement); + if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog")) + goto out; + bpf_program__set_autoload(freplace_prog, true); + bpf_program__set_autoattach(freplace_prog, true); + bpf_program__set_attach_target(freplace_prog, + bpf_program__fd(main_prog), + to_be_replaced); + err = changes_pkt_data_freplace__load(freplace); + print_verifier_log(log); + if (expect_load) { + ASSERT_OK(err, "changes_pkt_data_freplace__load"); + } else { + ASSERT_ERR(err, "changes_pkt_data_freplace__load"); + ASSERT_HAS_SUBSTR(log, "Extension program changes packet data", "error log"); + } + +out: + changes_pkt_data_freplace__destroy(freplace); + changes_pkt_data__destroy(main); +} + +/* There are two global subprograms in both changes_pkt_data.skel.h: + * - one changes packet data; + * - another does not. + * It is ok to freplace subprograms that change packet data with those + * that either do or do not. It is only ok to freplace subprograms + * that do not change packet data with those that do not as well. + * The below tests check outcomes for each combination of such freplace. + * Also test a case when main subprogram itself is replaced and is a single + * subprogram in a program. + */ +void test_changes_pkt_data_freplace(void) +{ + struct { + const char *main; + const char *to_be_replaced; + bool changes; + } mains[] = { + { "main_with_subprogs", "changes_pkt_data", true }, + { "main_with_subprogs", "does_not_change_pkt_data", false }, + { "main_changes", "main_changes", true }, + { "main_does_not_change", "main_does_not_change", false }, + }; + struct { + const char *func; + bool changes; + } replacements[] = { + { "changes_pkt_data", true }, + { "does_not_change_pkt_data", false } + }; + char buf[64]; + + for (int i = 0; i < ARRAY_SIZE(mains); ++i) { + for (int j = 0; j < ARRAY_SIZE(replacements); ++j) { + snprintf(buf, sizeof(buf), "%s_with_%s", + mains[i].to_be_replaced, replacements[j].func); + if (!test__start_subtest(buf)) + continue; + test_aux(mains[i].main, mains[i].to_be_replaced, replacements[j].func, + mains[i].changes || !replacements[j].changes); + } + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 26019313e1fc..1c682550e0e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -1010,7 +1010,7 @@ static void run_core_reloc_tests(bool use_btfgen) struct data *data; void *mmap_data = NULL; - my_pid_tgid = getpid() | ((uint64_t)syscall(SYS_gettid) << 32); + my_pid_tgid = getpid() | ((uint64_t)sys_gettid() << 32); for (i = 0; i < ARRAY_SIZE(test_cases); i++) { char btf_file[] = "/tmp/core_reloc.btf.XXXXXX"; diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index d50cbd8040d4..e59af2aa6601 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -171,6 +171,10 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, /* See also arch_adjust_kprobe_addr(). */ if (skel->kconfig->CONFIG_X86_KERNEL_IBT) entry_offset = 4; + if (skel->kconfig->CONFIG_PPC64 && + skel->kconfig->CONFIG_KPROBES_ON_FTRACE && + !skel->kconfig->CONFIG_PPC_FTRACE_OUT_OF_LINE) + entry_offset = 4; err = verify_perf_link_info(link_fd, type, kprobe_addr, 0, entry_offset); ASSERT_OK(err, "verify_perf_link_info"); } else { diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c index cad664546912..fa639b021f7e 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c @@ -20,7 +20,7 @@ void test_linked_funcs(void) bpf_program__set_autoload(skel->progs.handler1, true); bpf_program__set_autoload(skel->progs.handler2, true); - skel->rodata->my_tid = syscall(SYS_gettid); + skel->rodata->my_tid = sys_gettid(); skel->bss->syscall_id = SYS_getpgid; err = linked_funcs__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index c29787e092d6..761ce24bce38 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -23,7 +23,7 @@ static int get_pid_tgid(pid_t *pid, pid_t *tgid, struct stat st; int err; - *pid = syscall(SYS_gettid); + *pid = sys_gettid(); *tgid = getpid(); err = stat("/proc/self/ns/pid", &st); diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c new file mode 100644 index 000000000000..6fa19449297e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "raw_tp_null.skel.h" + +void test_raw_tp_null(void) +{ + struct raw_tp_null *skel; + + skel = raw_tp_null__open_and_load(); + if (!ASSERT_OK_PTR(skel, "raw_tp_null__open_and_load")) + return; + + skel->bss->tid = sys_gettid(); + + if (!ASSERT_OK(raw_tp_null__attach(skel), "raw_tp_null__attach")) + goto end; + + ASSERT_OK(trigger_module_test_read(2), "trigger testmod read"); + ASSERT_EQ(skel->bss->i, 3, "invocations"); + +end: + raw_tp_null__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index a1f7e7378a64..ebe0c12b5536 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -21,7 +21,7 @@ static void test_success(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.get_cgroup_id, true); bpf_program__set_autoload(skel->progs.task_succ, true); @@ -58,7 +58,7 @@ static void test_rcuptr_acquire(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.task_acquire, true); err = rcu_read_lock__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index da430df45aa4..d1e4cb28a72c 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -97,7 +97,7 @@ static void ringbuf_write_subtest(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->maps.ringbuf.max_entries = 0x4000; + skel->maps.ringbuf.max_entries = 0x40000; err = test_ringbuf_write_lskel__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -108,7 +108,7 @@ static void ringbuf_write_subtest(void) mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) goto cleanup; - *mmap_ptr = 0x3000; + *mmap_ptr = 0x30000; ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 82bfb266741c..fb08c565d6aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -492,8 +492,8 @@ static void test_sockmap_skb_verdict_shutdown(void) if (!ASSERT_EQ(err, 1, "epoll_wait(fd)")) goto out_close; - n = recv(c1, &b, 1, SOCK_NONBLOCK); - ASSERT_EQ(n, 0, "recv_timeout(fin)"); + n = recv(c1, &b, 1, MSG_DONTWAIT); + ASSERT_EQ(n, 0, "recv(fin)"); out_close: close(c1); close(p1); @@ -546,7 +546,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog) ASSERT_EQ(avail, expected, "ioctl(FIONREAD)"); /* On DROP test there will be no data to read */ if (pass_prog) { - recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC); + recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC); ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)"); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 2d0796314862..0a99fd404f6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map) goto close_cli; err = disconnect(cli); - ASSERT_OK(err, "disconnect"); close_cli: close(cli); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 4ee1148d22be..1cfed83156b0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -924,6 +924,8 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map) goto close; n = xsend(c1, buf, sizeof(buf), 0); + if (n == -1) + goto close; if (n < sizeof(buf)) FAIL("incomplete write"); diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 21c5a37846ad..1f0977742741 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1496,8 +1496,8 @@ static void test_tailcall_bpf2bpf_hierarchy_3(void) RUN_TESTS(tailcall_bpf2bpf_hierarchy3); } -/* test_tailcall_freplace checks that the attached freplace prog is OK to - * update the prog_array map. +/* test_tailcall_freplace checks that the freplace prog fails to update the + * prog_array map, no matter whether the freplace prog attaches to its target. */ static void test_tailcall_freplace(void) { @@ -1505,7 +1505,7 @@ static void test_tailcall_freplace(void) struct bpf_link *freplace_link = NULL; struct bpf_program *freplace_prog; struct tc_bpf2bpf *tc_skel = NULL; - int prog_fd, map_fd; + int prog_fd, tc_prog_fd, map_fd; char buff[128] = {}; int err, key; @@ -1523,9 +1523,10 @@ static void test_tailcall_freplace(void) if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) goto out; - prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); + tc_prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); freplace_prog = freplace_skel->progs.entry_freplace; - err = bpf_program__set_attach_target(freplace_prog, prog_fd, "subprog"); + err = bpf_program__set_attach_target(freplace_prog, tc_prog_fd, + "subprog_tc"); if (!ASSERT_OK(err, "set_attach_target")) goto out; @@ -1533,27 +1534,117 @@ static void test_tailcall_freplace(void) if (!ASSERT_OK(err, "tailcall_freplace__load")) goto out; - freplace_link = bpf_program__attach_freplace(freplace_prog, prog_fd, - "subprog"); + map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); + prog_fd = bpf_program__fd(freplace_prog); + key = 0; + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + ASSERT_ERR(err, "update jmp_table failure"); + + freplace_link = bpf_program__attach_freplace(freplace_prog, tc_prog_fd, + "subprog_tc"); if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) goto out; - map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); - prog_fd = bpf_program__fd(freplace_prog); + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + ASSERT_ERR(err, "update jmp_table failure"); + +out: + bpf_link__destroy(freplace_link); + tailcall_freplace__destroy(freplace_skel); + tc_bpf2bpf__destroy(tc_skel); +} + +/* test_tailcall_bpf2bpf_freplace checks the failure that fails to attach a tail + * callee prog with freplace prog or fails to update an extended prog to + * prog_array map. + */ +static void test_tailcall_bpf2bpf_freplace(void) +{ + struct tailcall_freplace *freplace_skel = NULL; + struct bpf_link *freplace_link = NULL; + struct tc_bpf2bpf *tc_skel = NULL; + char buff[128] = {}; + int prog_fd, map_fd; + int err, key; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); + + tc_skel = tc_bpf2bpf__open_and_load(); + if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) + goto out; + + prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); + freplace_skel = tailcall_freplace__open(); + if (!ASSERT_OK_PTR(freplace_skel, "tailcall_freplace__open")) + goto out; + + err = bpf_program__set_attach_target(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK(err, "set_attach_target")) + goto out; + + err = tailcall_freplace__load(freplace_skel); + if (!ASSERT_OK(err, "tailcall_freplace__load")) + goto out; + + /* OK to attach then detach freplace prog. */ + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) + goto out; + + err = bpf_link__destroy(freplace_link); + freplace_link = NULL; + if (!ASSERT_OK(err, "destroy link")) + goto out; + + /* OK to update prog_array map then delete element from the map. */ + key = 0; + map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); if (!ASSERT_OK(err, "update jmp_table")) goto out; - prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); - err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_EQ(topts.retval, 34, "test_run retval"); + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "delete_elem from jmp_table")) + goto out; + + /* Fail to attach a tail callee prog with freplace prog. */ + + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "update jmp_table")) + goto out; + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_ERR_PTR(freplace_link, "attach_freplace failure")) + goto out; + + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "delete_elem from jmp_table")) + goto out; + + /* Fail to update an extended prog to prog_array map. */ + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) + goto out; + + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_ERR(err, "update jmp_table failure")) + goto out; out: bpf_link__destroy(freplace_link); - tc_bpf2bpf__destroy(tc_skel); tailcall_freplace__destroy(freplace_skel); + tc_bpf2bpf__destroy(tc_skel); } void test_tailcalls(void) @@ -1606,4 +1697,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_hierarchy_3(); if (test__start_subtest("tailcall_freplace")) test_tailcall_freplace(); + if (test__start_subtest("tailcall_bpf2bpf_freplace")) + test_tailcall_bpf2bpf_freplace(); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index c33c05161a9e..0d42ce00166f 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -23,14 +23,14 @@ static void test_sys_enter_exit(void) if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); err = task_local_storage__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; - syscall(SYS_gettid); - syscall(SYS_gettid); + sys_gettid(); + sys_gettid(); /* 3x syscalls: 1x attach and 2x gettid */ ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt"); @@ -99,7 +99,7 @@ static void test_recursion(void) /* trigger sys_enter, make sure it does not cause deadlock */ skel->bss->test_pid = getpid(); - syscall(SYS_gettid); + sys_gettid(); skel->bss->test_pid = 0; task_ls_recursion__detach(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index fe86e4fdb89c..c3ab9b6fb069 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -828,8 +828,12 @@ static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel) return validate_struct_ops_load(mnt_fd, true /* should succeed */); } +static const char *token_bpffs_custom_dir() +{ + return getenv("BPF_SELFTESTS_BPF_TOKEN_DIR") ?: "/tmp/bpf-token-fs"; +} + #define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" -#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel) { @@ -892,6 +896,7 @@ static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel) { + const char *custom_dir = token_bpffs_custom_dir(); LIBBPF_OPTS(bpf_object_open_opts, opts); struct dummy_st_ops_success *skel; int err; @@ -909,10 +914,10 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l * BPF token implicitly, unless pointed to it through * LIBBPF_BPF_TOKEN_PATH envvar */ - rmdir(TOKEN_BPFFS_CUSTOM); - if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) + rmdir(custom_dir); + if (!ASSERT_OK(mkdir(custom_dir, 0777), "mkdir_bpffs_custom")) goto err_out; - err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); + err = sys_move_mount(mnt_fd, "", AT_FDCWD, custom_dir, MOVE_MOUNT_F_EMPTY_PATH); if (!ASSERT_OK(err, "move_mount_bpffs")) goto err_out; @@ -925,7 +930,7 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l goto err_out; } - err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); + err = setenv(TOKEN_ENVVAR, custom_dir, 1 /*overwrite*/); if (!ASSERT_OK(err, "setenv_token_path")) goto err_out; @@ -951,11 +956,11 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l if (!ASSERT_ERR(err, "obj_empty_token_path_load")) goto err_out; - rmdir(TOKEN_BPFFS_CUSTOM); + rmdir(custom_dir); unsetenv(TOKEN_ENVVAR); return 0; err_out: - rmdir(TOKEN_BPFFS_CUSTOM); + rmdir(custom_dir); unsetenv(TOKEN_ENVVAR); return -EINVAL; } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index 844f6fc8487b..02a484b22aa6 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -125,7 +125,7 @@ static void *child_thread(void *ctx) struct child *child = ctx; int c = 0, err; - child->tid = syscall(SYS_gettid); + child->tid = sys_gettid(); /* let parent know we are ready */ err = write(child->c2p[1], &c, 1); @@ -869,21 +869,14 @@ static void consumer_test(struct uprobe_multi_consumers *skel, fmt = "prog 0/1: uprobe"; } else { /* - * uprobe return is tricky ;-) - * * to trigger uretprobe consumer, the uretprobe needs to be installed, * which means one of the 'return' uprobes was alive when probe was hit: * * idxs: 2/3 uprobe return in 'installed' mask - * - * in addition if 'after' state removes everything that was installed in - * 'before' state, then uprobe kernel object goes away and return uprobe - * is not installed and we won't hit it even if it's in 'after' state. */ unsigned long had_uretprobes = before & 0b1100; /* is uretprobe installed */ - unsigned long probe_preserved = before & after; /* did uprobe go away */ - if (had_uretprobes && probe_preserved && test_bit(idx, after)) + if (had_uretprobes && test_bit(idx, after)) val++; fmt = "idx 2/3: uretprobe"; } diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index d424e7ecbd12..9fd3ae987321 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -21,8 +21,7 @@ #include "../progs/test_user_ringbuf.h" static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ; -static const long c_ringbuf_size = 1 << 12; /* 1 small page */ -static const long c_max_entries = c_ringbuf_size / c_sample_size; +static long c_ringbuf_size, c_max_entries; static void drain_current_samples(void) { @@ -424,7 +423,9 @@ static void test_user_ringbuf_loop(void) uint32_t remaining_samples = total_samples; int err; - BUILD_BUG_ON(total_samples <= c_max_entries); + if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries")) + return; + err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (err) return; @@ -686,6 +687,9 @@ void test_user_ringbuf(void) { int i; + c_ringbuf_size = getpagesize(); /* 1 page */ + c_max_entries = c_ringbuf_size / c_sample_size; + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { if (!test__start_subtest(success_tests[i].test_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index 481626a875d1..df27535995af 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -2,35 +2,41 @@ #include <uapi/linux/bpf.h> #include <linux/if_link.h> #include <test_progs.h> +#include <network_helpers.h> #include "test_xdp_with_cpumap_frags_helpers.skel.h" #include "test_xdp_with_cpumap_helpers.skel.h" #define IFINDEX_LO 1 +#define TEST_NS "cpu_attach_ns" static void test_xdp_with_cpumap_helpers(void) { - struct test_xdp_with_cpumap_helpers *skel; + struct test_xdp_with_cpumap_helpers *skel = NULL; struct bpf_prog_info info = {}; __u32 len = sizeof(info); struct bpf_cpumap_val val = { .qsize = 192, }; - int err, prog_fd, map_fd; + int err, prog_fd, prog_redir_fd, map_fd; + struct nstoken *nstoken = NULL; __u32 idx = 0; + SYS(out_close, "ip netns add %s", TEST_NS); + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto out_close; + SYS(out_close, "ip link set dev lo up"); + skel = test_xdp_with_cpumap_helpers__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load")) return; - prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); - err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); + prog_redir_fd = bpf_program__fd(skel->progs.xdp_redir_prog); + err = bpf_xdp_attach(IFINDEX_LO, prog_redir_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP")) goto out_close; - err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); - ASSERT_OK(err, "XDP program detach"); - prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); map_fd = bpf_map__fd(skel->maps.cpu_map); err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); @@ -45,6 +51,26 @@ static void test_xdp_with_cpumap_helpers(void) ASSERT_OK(err, "Read cpumap entry"); ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id"); + /* send a packet to trigger any potential bugs in there */ + char data[ETH_HLEN] = {}; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = sizeof(data), + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, + .repeat = 1, + ); + err = bpf_prog_test_run_opts(prog_redir_fd, &opts); + ASSERT_OK(err, "XDP test run"); + + /* wait for the packets to be flushed, then check that redirect has been + * performed + */ + kern_sync_rcu(); + ASSERT_NEQ(skel->bss->redirect_count, 0, "redirected packets"); + + err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); + ASSERT_OK(err, "XDP program detach"); + /* can not attach BPF_XDP_CPUMAP program to a device */ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program")) @@ -65,6 +91,8 @@ static void test_xdp_with_cpumap_helpers(void) ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry"); out_close: + close_netns(nstoken); + SYS_NOFAIL("ip netns del %s", TEST_NS); test_xdp_with_cpumap_helpers__destroy(skel); } @@ -111,7 +139,7 @@ out_close: test_xdp_with_cpumap_frags_helpers__destroy(skel); } -void serial_test_xdp_cpumap_attach(void) +void test_xdp_cpumap_attach(void) { if (test__start_subtest("CPUMAP with programs in entries")) test_xdp_with_cpumap_helpers(); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 27ffed17d4be..461ab18705d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -23,7 +23,7 @@ static void test_xdp_with_devmap_helpers(void) __u32 len = sizeof(info); int err, dm_fd, dm_fd_redir, map_fd; struct nstoken *nstoken = NULL; - char data[10] = {}; + char data[ETH_HLEN] = {}; __u32 idx = 0; SYS(out_close, "ip netns add %s", TEST_NS); @@ -58,7 +58,7 @@ static void test_xdp_with_devmap_helpers(void) /* send a packet to trigger any potential bugs in there */ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); @@ -158,7 +158,7 @@ static void test_xdp_with_devmap_helpers_veth(void) struct nstoken *nstoken = NULL; __u32 len = sizeof(info); int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst; - char data[10] = {}; + char data[ETH_HLEN] = {}; __u32 idx = 0; SYS(out_close, "ip netns add %s", TEST_NS); @@ -208,7 +208,7 @@ static void test_xdp_with_devmap_helpers_veth(void) /* send a packet to trigger any potential bugs in there */ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data.c b/tools/testing/selftests/bpf/progs/changes_pkt_data.c new file mode 100644 index 000000000000..43cada48b28a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__noinline +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +__noinline __weak +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +SEC("?tc") +int main_with_subprogs(struct __sk_buff *sk) +{ + changes_pkt_data(sk); + does_not_change_pkt_data(sk); + return 0; +} + +SEC("?tc") +int main_changes(struct __sk_buff *sk) +{ + bpf_skb_pull_data(sk, 0); + return 0; +} + +SEC("?tc") +int main_does_not_change(struct __sk_buff *sk) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c new file mode 100644 index 000000000000..f9a622705f1b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/changes_pkt_data_freplace.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("?freplace") +long changes_pkt_data(struct __sk_buff *sk) +{ + return bpf_skb_pull_data(sk, 0); +} + +SEC("?freplace") +long does_not_change_pkt_data(struct __sk_buff *sk) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c new file mode 100644 index 000000000000..5927054b6dd9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int tid; +int i; + +SEC("tp_btf/bpf_testmod_test_raw_tp_null") +int BPF_PROG(test_raw_tp_null, struct sk_buff *skb) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + if (task->pid != tid) + return 0; + + /* If dead code elimination kicks in, the increment +=2 will be + * removed. For raw_tp programs attaching to tracepoints in kernel + * modules, we mark input arguments as PTR_MAYBE_NULL, so branch + * prediction should never kick in. + */ + asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;" + : [i]"+r"(i) + : [ctx]"r"(skb) + : "memory"); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c index 18373a7df76e..f47bf88f8d2a 100644 --- a/tools/testing/selftests/bpf/progs/strncmp_bench.c +++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c @@ -35,7 +35,10 @@ static __always_inline int local_strncmp(const char *s1, unsigned int sz, SEC("tp/syscalls/sys_enter_getpgid") int strncmp_no_helper(void *ctx) { - if (local_strncmp(str, cmp_str_len + 1, target) < 0) + const char *target_str = target; + + barrier_var(target_str); + if (local_strncmp(str, cmp_str_len + 1, target_str) < 0) __sync_add_and_fetch(&hits, 1); return 0; } diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c index 79f5087dade2..fe6249d99b31 100644 --- a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c @@ -5,10 +5,11 @@ #include "bpf_misc.h" __noinline -int subprog(struct __sk_buff *skb) +int subprog_tc(struct __sk_buff *skb) { int ret = 1; + __sink(skb); __sink(ret); /* let verifier know that 'subprog_tc' can change pointers to skb->data */ bpf_skb_change_proto(skb, 0, 0); @@ -18,7 +19,7 @@ int subprog(struct __sk_buff *skb) SEC("tc") int entry_tc(struct __sk_buff *skb) { - return subprog(skb); + return subprog_tc(skb); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c index 6afa834756e9..fac33a14f200 100644 --- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c +++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c @@ -6,13 +6,20 @@ #include <stdbool.h> extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak; +extern bool CONFIG_PPC_FTRACE_OUT_OF_LINE __kconfig __weak; +extern bool CONFIG_KPROBES_ON_FTRACE __kconfig __weak; +extern bool CONFIG_PPC64 __kconfig __weak; -/* This function is here to have CONFIG_X86_KERNEL_IBT - * used and added to object BTF. +/* This function is here to have CONFIG_X86_KERNEL_IBT, + * CONFIG_PPC_FTRACE_OUT_OF_LINE, CONFIG_KPROBES_ON_FTRACE, + * CONFIG_PPC6 used and added to object BTF. */ int unused(void) { - return CONFIG_X86_KERNEL_IBT ? 0 : 1; + return CONFIG_X86_KERNEL_IBT || + CONFIG_PPC_FTRACE_OUT_OF_LINE || + CONFIG_KPROBES_ON_FTRACE || + CONFIG_PPC64 ? 0 : 1; } SEC("kprobe") diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c index a3f220ba7025..ee65bad0436d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c +++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c @@ -32,6 +32,16 @@ int my_int_last SEC(".data.array_not_last"); int percpu_arr[1] SEC(".data.percpu_arr"); +/* at least one extern is included, to ensure that a specific + * regression is tested whereby resizing resulted in a free-after-use + * bug after type information is invalidated by the resize operation. + * + * There isn't a particularly good API to test for this specific condition, + * but by having externs for the resizing tests it will cover this path. + */ +extern int LINUX_KERNEL_VERSION __kconfig; +long version_sink; + SEC("tp/syscalls/sys_enter_getpid") int bss_array_sum(void *ctx) { @@ -44,6 +54,9 @@ int bss_array_sum(void *ctx) for (size_t i = 0; i < bss_array_len; ++i) sum += array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } @@ -59,6 +72,9 @@ int data_array_sum(void *ctx) for (size_t i = 0; i < data_array_len; ++i) sum += my_array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c index 350513c0e4c9..f063a0013f85 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c @@ -26,11 +26,11 @@ int test_ringbuf_write(void *ctx) if (cur_pid != pid) return 0; - sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + sample1 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0); if (!sample1) return 0; /* first one can pass */ - sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + sample2 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0); if (!sample2) { bpf_ringbuf_discard(sample1, 0); __sync_fetch_and_add(&discarded, 1); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c index 20ec6723df18..3619239b01b7 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c @@ -12,10 +12,12 @@ struct { __uint(max_entries, 4); } cpu_map SEC(".maps"); +__u32 redirect_count = 0; + SEC("xdp") int xdp_redir_prog(struct xdp_md *ctx) { - return bpf_redirect_map(&cpu_map, 1, 0); + return bpf_redirect_map(&cpu_map, 0, 0); } SEC("xdp") @@ -27,6 +29,9 @@ int xdp_dummy_prog(struct xdp_md *ctx) SEC("xdp/cpumap") int xdp_dummy_cm(struct xdp_md *ctx) { + if (bpf_get_smp_processor_id() == 0) + redirect_count++; + if (ctx->ingress_ifindex == IFINDEX_LO) return XDP_DROP; diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index 6065f862d964..758b09a5eb88 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -29,12 +29,12 @@ int big_alloc1(void *ctx) if (!page1) return 1; *page1 = 1; - page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE, + page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE * 2, 1, NUMA_NO_NODE, 0); if (!page2) return 2; *page2 = 2; - no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE, + no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE, 1, NUMA_NO_NODE, 0); if (no_page) return 3; @@ -66,4 +66,110 @@ int big_alloc1(void *ctx) #endif return 0; } + +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) +#define PAGE_CNT 100 +__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */ +__u8 __arena *base; + +/* + * Check that arena's range_tree algorithm allocates pages sequentially + * on the first pass and then fills in all gaps on the second pass. + */ +__noinline int alloc_pages(int page_cnt, int pages_atonce, bool first_pass, + int max_idx, int step) +{ + __u8 __arena *pg; + int i, pg_idx; + + for (i = 0; i < page_cnt; i++) { + pg = bpf_arena_alloc_pages(&arena, NULL, pages_atonce, + NUMA_NO_NODE, 0); + if (!pg) + return step; + pg_idx = (unsigned long) (pg - base) / PAGE_SIZE; + if (first_pass) { + /* Pages must be allocated sequentially */ + if (pg_idx != i) + return step + 100; + } else { + /* Allocator must fill into gaps */ + if (pg_idx >= max_idx || (pg_idx & 1)) + return step + 200; + } + *pg = pg_idx; + page[pg_idx] = pg; + cond_break; + } + return 0; +} + +//SEC("syscall") +//__success __retval(0) +int big_alloc2(void *ctx) +{ + __u8 __arena *pg; + int i, err; + + base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!base) + return 1; + bpf_arena_free_pages(&arena, (void __arena *)base, 1); + + err = alloc_pages(PAGE_CNT, 1, true, PAGE_CNT, 2); + if (err) + return err; + + /* Clear all even pages */ + for (i = 0; i < PAGE_CNT; i += 2) { + pg = page[i]; + if (*pg != i) + return 3; + bpf_arena_free_pages(&arena, (void __arena *)pg, 1); + page[i] = NULL; + cond_break; + } + + /* Allocate into freed gaps */ + err = alloc_pages(PAGE_CNT / 2, 1, false, PAGE_CNT, 4); + if (err) + return err; + + /* Free pairs of pages */ + for (i = 0; i < PAGE_CNT; i += 4) { + pg = page[i]; + if (*pg != i) + return 5; + bpf_arena_free_pages(&arena, (void __arena *)pg, 2); + page[i] = NULL; + page[i + 1] = NULL; + cond_break; + } + + /* Allocate 2 pages at a time into freed gaps */ + err = alloc_pages(PAGE_CNT / 4, 2, false, PAGE_CNT, 6); + if (err) + return err; + + /* Check pages without freeing */ + for (i = 0; i < PAGE_CNT; i += 2) { + pg = page[i]; + if (*pg != i) + return 7; + cond_break; + } + + pg = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + + if (!pg) + return 8; + /* + * The first PAGE_CNT pages are occupied. The new page + * must be above. + */ + if ((pg - base) / PAGE_SIZE < PAGE_CNT) + return 9; + return 0; +} +#endif char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 6b564d4c0986..051d1962a4c7 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -130,4 +130,57 @@ __naked int state_loop_first_last_equal(void) ); } +__used __naked static void __bpf_cond_op_r10(void) +{ + asm volatile ( + "r2 = 2314885393468386424 ll;" + "goto +0;" + "if r2 <= r10 goto +3;" + "if r1 >= -1835016 goto +0;" + "if r2 <= 8 goto +0;" + "if r3 <= 0 goto +0;" + "exit;" + ::: __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("8: (bd) if r2 <= r10 goto pc+3") +__msg("9: (35) if r1 >= 0xffe3fff8 goto pc+0") +__msg("10: (b5) if r2 <= 0x8 goto pc+0") +__msg("mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0") +__msg("mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3") +__msg("mark_precise: frame1: regs=r2 stack= before 7: (05) goto pc+0") +__naked void bpf_cond_op_r10(void) +{ + asm volatile ( + "r3 = 0 ll;" + "call __bpf_cond_op_r10;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("3: (bf) r3 = r10") +__msg("4: (bd) if r3 <= r2 goto pc+1") +__msg("5: (b5) if r2 <= 0x8 goto pc+2") +__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 4: (bd) if r3 <= r2 goto pc+1") +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10") +__naked void bpf_cond_op_not_r10(void) +{ + asm volatile ( + "r0 = 0;" + "r2 = 2314885393468386424 ll;" + "r3 = r10;" + "if r3 <= r2 goto +1;" + "if r2 <= 8 goto +2;" + "r0 = 2 ll;" + "exit;" + ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index ee76b51005ab..3c8f6646e33d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -50,6 +50,13 @@ struct { __uint(map_flags, BPF_F_NO_PREALLOC); } sk_storage_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + SEC("cgroup/skb") __description("skb->sk: no NULL check") __failure __msg("invalid mem access 'sock_common_or_null'") @@ -977,4 +984,53 @@ l1_%=: r0 = *(u8*)(r7 + 0); \ : __clobber_all); } +__noinline +long skb_pull_data2(struct __sk_buff *sk, __u32 len) +{ + return bpf_skb_pull_data(sk, len); +} + +__noinline +long skb_pull_data1(struct __sk_buff *sk, __u32 len) +{ + return skb_pull_data2(sk, len); +} + +/* global function calls bpf_skb_pull_data(), which invalidates packet + * pointers established before global function call. + */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + skb_pull_data1(sk, 0); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + +__noinline +int tail_call(struct __sk_buff *sk) +{ + bpf_tail_call_static(sk, &jmp_table, 0); + return 0; +} + +/* Tail calls invalidate packet pointers. */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + tail_call(sk); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index 7ea535bfbacd..e4ef82a6ee38 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -619,7 +619,7 @@ __naked void pass_pointer_to_tail_call(void) SEC("socket") __description("unpriv: cmp map pointer with zero") -__success __failure_unpriv __msg_unpriv("R1 pointer comparison") +__success __success_unpriv __retval(0) __naked void cmp_map_pointer_with_zero(void) { diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 3e9b009580d4..7f69d7b5bd4d 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -970,6 +970,14 @@ void run_subtest(struct test_loader *tester, emit_verifier_log(tester->log_buf, false /*force*/); validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log); + /* Restore capabilities because the kernel will silently ignore requests + * for program info (such as xlated program text) if we are not + * bpf-capable. Also, for some reason test_verifier executes programs + * with all capabilities restored. Do the same here. + */ + if (restore_capabilities(&caps)) + goto tobj_cleanup; + if (subspec->expect_xlated.cnt) { err = get_xlated_program_text(bpf_program__fd(tprog), tester->log_buf, tester->log_buf_sz); @@ -995,12 +1003,6 @@ void run_subtest(struct test_loader *tester, } if (should_do_test_run(spec, subspec)) { - /* For some reason test_verifier executes programs - * with all capabilities restored. Do the same here. - */ - if (restore_capabilities(&caps)) - goto tobj_cleanup; - /* Do bpf_map__attach_struct_ops() for each struct_ops map. * This should trigger bpf_struct_ops->reg callback on kernel side. */ diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index fda7589c5023..0921939532c6 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -138,6 +138,18 @@ static int sched_next_online(int pid, int *next_to_try) return ret; } +/* Derive target_free from map_size, same as bpf_common_lru_populate */ +static unsigned int __tgt_size(unsigned int map_size) +{ + return (map_size / nr_cpus) / 2; +} + +/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */ +static unsigned int __map_size(unsigned int tgt_free) +{ + return tgt_free * nr_cpus * 2; +} + /* Size of the LRU map is 2 * Add key=1 (+1 key) * Add key=2 (+1 key) @@ -231,11 +243,11 @@ static void test_lru_sanity0(int map_type, int map_flags) printf("Pass\n"); } -/* Size of the LRU map is 1.5*tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Lookup 1 to tgt_free/2 - * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys) - * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU +/* Verify that unreferenced elements are recycled before referenced ones. + * Insert elements. + * Reference a subset of these. + * Insert more, enough to trigger recycling. + * Verify that unreferenced are recycled. */ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) { @@ -257,7 +269,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -266,13 +278,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup 1 to tgt_free/2 */ + /* Lookup 1 to batch_size */ end_key = 1 + batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); @@ -280,12 +292,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) BPF_NOEXIST)); } - /* Insert 1+tgt_free to 2*tgt_free - * => 1+tgt_free/2 to LOCALFREE_TARGET will be + /* Insert another map_size - batch_size keys + * Map will contain 1 to batch_size plus these latest, i.e., + * => previous 1+batch_size to map_size - batch_size will have been * removed by LRU */ - key = 1 + tgt_free; - end_key = key + tgt_free; + key = 1 + __map_size(tgt_free); + end_key = key + __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -301,17 +314,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map 1.5 * tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Update 1 to tgt_free/2 - * => The original 1 to tgt_free/2 will be removed due to - * the LRU shrink process - * Re-insert 1 to tgt_free/2 again and do a lookup immeidately - * Insert 1+tgt_free to tgt_free*3/2 - * Insert 1+tgt_free*3/2 to tgt_free*5/2 - * => Key 1+tgt_free to tgt_free*3/2 - * will be removed from LRU because it has never - * been lookup and ref bit is not set +/* Verify that insertions exceeding map size will recycle the oldest. + * Verify that unreferenced elements are recycled before referenced. */ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) { @@ -334,7 +338,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -343,8 +347,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -357,8 +361,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) * shrink the inactive list to get tgt_free * number of free nodes. * - * Hence, the oldest key 1 to tgt_free/2 - * are removed from the LRU list. + * Hence, the oldest key is removed from the LRU list. */ key = 1; if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { @@ -370,8 +373,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) BPF_EXIST)); } - /* Re-insert 1 to tgt_free/2 again and do a lookup - * immeidately. + /* Re-insert 1 to batch_size again and do a lookup immediately. */ end_key = 1 + batch_size; value[0] = 4321; @@ -387,17 +389,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1+tgt_free to tgt_free*3/2 */ - end_key = 1 + tgt_free + batch_size; - for (key = 1 + tgt_free; key < end_key; key++) + /* Insert batch_size new elements */ + key = 1 + __map_size(tgt_free); + end_key = key + batch_size; + for (; key < end_key; key++) /* These newly added but not referenced keys will be * gone during the next LRU shrink. */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */ - end_key = key + tgt_free; + /* Insert map_size - batch_size elements */ + end_key += __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -413,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map is 2*tgt_free - * It is to test the active/inactive list rotation - * Insert 1 to 2*tgt_free (+2*tgt_free keys) - * Lookup key 1 to tgt_free*3/2 - * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys) - * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU +/* Test the active/inactive list rotation + * + * Fill the whole map, deplete the free list. + * Reference all except the last lru->target_free elements. + * Insert lru->target_free new elements. This triggers one shrink. + * Verify that the non-referenced elements are replaced. */ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) { @@ -437,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) assert(sched_next_online(0, &next_cpu) != -1); - batch_size = tgt_free / 2; - assert(batch_size * 2 == tgt_free); + batch_size = __tgt_size(tgt_free); map_size = tgt_free * 2; lru_map_fd = create_map(map_type, map_flags, map_size); @@ -449,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */ - end_key = 1 + (2 * tgt_free); + /* Fill the map */ + end_key = 1 + map_size; for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup key 1 to tgt_free*3/2 */ - end_key = tgt_free + batch_size; + /* Reference all but the last batch_size */ + end_key = 1 + map_size - batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } - /* Add 1+2*tgt_free to tgt_free*5/2 - * (+tgt_free/2 keys) - */ + /* Insert new batch_size: replaces the non-referenced elements */ key = 2 * tgt_free + 1; end_key = key + batch_size; for (; key < end_key; key++) { @@ -500,7 +500,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free * nr_cpus); else - lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free); + lru_map_fd = create_map(map_type, map_flags, + 3 * __map_size(tgt_free)); assert(lru_map_fd != -1); expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index 7989ec608454..cb55a908bb0d 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -305,6 +305,7 @@ else client_connect verify_data server_listen + wait_for_port ${port} ${netcat_opt} fi # serverside, use BPF for decap diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1ec5c4c47235..7b6b9c4cadb5 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -309,6 +309,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) fprintf(stderr, "invalid top N specifier: %s\n", arg); argp_usage(state); } + break; case 'C': env.comparison_mode = true; break; diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 6f9956eed797..ad6c08dfd6c8 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -79,7 +79,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .flags = XSK_UMEM__DEFAULT_FLAGS, + .flags = XDP_UMEM_TX_METADATA_LEN, .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; __u32 idx = 0; diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index 8d275f03e977..8d233ac95696 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -127,22 +127,42 @@ int run_test(int cpu) return KSFT_PASS; } +/* + * Reads the suspend success count from sysfs. + * Returns the count on success or exits on failure. + */ +static int get_suspend_success_count_or_fail(void) +{ + FILE *fp; + int val; + + fp = fopen("/sys/power/suspend_stats/success", "r"); + if (!fp) + ksft_exit_fail_msg( + "Failed to open suspend_stats/success: %s\n", + strerror(errno)); + + if (fscanf(fp, "%d", &val) != 1) { + fclose(fp); + ksft_exit_fail_msg( + "Failed to read suspend success count\n"); + } + + fclose(fp); + return val; +} + void suspend(void) { - int power_state_fd; int timerfd; int err; + int count_before; + int count_after; struct itimerspec spec = {}; if (getuid() != 0) ksft_exit_skip("Please run the test as root - Exiting.\n"); - power_state_fd = open("/sys/power/state", O_RDWR); - if (power_state_fd < 0) - ksft_exit_fail_msg( - "open(\"/sys/power/state\") failed %s)\n", - strerror(errno)); - timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0); if (timerfd < 0) ksft_exit_fail_msg("timerfd_create() failed\n"); @@ -152,14 +172,15 @@ void suspend(void) if (err < 0) ksft_exit_fail_msg("timerfd_settime() failed\n"); + count_before = get_suspend_success_count_or_fail(); + system("(echo mem > /sys/power/state) 2> /dev/null"); - timerfd_gettime(timerfd, &spec); - if (spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0) + count_after = get_suspend_success_count_or_fail(); + if (count_after <= count_before) ksft_exit_fail_msg("Failed to enter Suspend state\n"); close(timerfd); - close(power_state_fd); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh index 3f45512fb512..7406c24be1ac 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Test the special cpuset v1 hotplug case where a cpuset become empty of diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh index e350c521b467..3aad9db921b5 100755 --- a/tools/testing/selftests/cpufreq/cpufreq.sh +++ b/tools/testing/selftests/cpufreq/cpufreq.sh @@ -244,9 +244,10 @@ do_suspend() printf "Failed to suspend using RTC wake alarm\n" return 1 fi + else + echo $filename > $SYSFS/power/state fi - echo $filename > $SYSFS/power/state printf "Came out of $1\n" printf "Do basic tests after finishing $1 to verify cpufreq state\n\n" diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py index 2e8a74aff543..58f3291fed12 100755 --- a/tools/testing/selftests/damon/damon_nr_regions.py +++ b/tools/testing/selftests/damon/damon_nr_regions.py @@ -65,6 +65,7 @@ def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions): test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % ( real_nr_regions, min_nr_regions, max_nr_regions) + collected_nr_regions.sort() if (collected_nr_regions[0] < min_nr_regions or collected_nr_regions[-1] > max_nr_regions): print('fail %s' % test_name) @@ -109,6 +110,7 @@ def main(): attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs attrs.min_nr_regions = 3 attrs.max_nr_regions = 7 + attrs.update_us = 100000 err = kdamonds.kdamonds[0].commit() if err is not None: proc.terminate() diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py index 7d4c6bb2e3cd..57c4937aaed2 100755 --- a/tools/testing/selftests/damon/damos_quota.py +++ b/tools/testing/selftests/damon/damos_quota.py @@ -51,16 +51,19 @@ def main(): nr_quota_exceeds = scheme.stats.qt_exceeds wss_collected.sort() + nr_expected_quota_exceeds = 0 for wss in wss_collected: if wss > sz_quota: print('quota is not kept: %s > %s' % (wss, sz_quota)) print('collected samples are as below') print('\n'.join(['%d' % wss for wss in wss_collected])) exit(1) + if wss == sz_quota: + nr_expected_quota_exceeds += 1 - if nr_quota_exceeds < len(wss_collected): - print('quota is not always exceeded: %d > %d' % - (len(wss_collected), nr_quota_exceeds)) + if nr_quota_exceeds < nr_expected_quota_exceeds: + print('quota is exceeded less than expected: %d < %d' % + (nr_quota_exceeds, nr_expected_quota_exceeds)) exit(1) if __name__ == '__main__': diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py index 18246f3b62f7..f76e0412b564 100755 --- a/tools/testing/selftests/damon/damos_quota_goal.py +++ b/tools/testing/selftests/damon/damos_quota_goal.py @@ -63,6 +63,9 @@ def main(): if last_effective_bytes != 0 else -1.0)) if last_effective_bytes == goal.effective_bytes: + # effective quota was already minimum that cannot be more reduced + if expect_increase is False and last_effective_bytes == 1: + continue print('efective bytes not changed: %d' % goal.effective_bytes) exit(1) diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index edc56e2cc606..7bc148889ca7 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -11,8 +11,8 @@ ALL_TESTS=" lib_dir=$(dirname "$0") source ${lib_dir}/bond_topo_3d1c.sh -c_maddr="33:33:00:00:00:10" -g_maddr="33:33:00:00:02:54" +c_maddr="33:33:ff:00:00:10" +g_maddr="33:33:ff:00:02:54" skip_prio() { diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 1ea9bb695e94..3f35faca6559 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -224,7 +224,7 @@ class NetDrvEpEnv: if not self._require_cmd(comm, "local"): raise KsftSkipEx("Test requires command: " + comm) if remote: - if not self._require_cmd(comm, "remote"): + if not self._require_cmd(comm, "remote", host=self.remote): raise KsftSkipEx("Test requires (remote) command: " + comm) def wait_hw_stats_settle(self): diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index d9c10613ae67..44151b7b1a24 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +import re import time from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen @@ -10,12 +11,11 @@ class GenerateTraffic: self.env = env - if port is None: - port = rand_port() - self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True) - wait_port_listen(port) + self.port = rand_port() if port is None else port + self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True) + wait_port_listen(self.port) time.sleep(0.1) - self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", + self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400", background=True, host=env.remote) # Wait for traffic to ramp up @@ -56,3 +56,16 @@ class GenerateTraffic: ksft_pr(">> Server:") ksft_pr(self._iperf_server.stdout) ksft_pr(self._iperf_server.stderr) + self._wait_client_stopped() + + def _wait_client_stopped(self, sleep=0.005, timeout=5): + end = time.monotonic() + timeout + + live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ") + + while time.monotonic() < end: + data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout + if not live_port_pattern.search(data): + return + time.sleep(sleep) + raise Exception(f"Waiting for client to stop timed out after {timeout}s") diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 384cfa3d38a6..92c2f0376c08 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -142,7 +142,7 @@ function pre_ethtool { } function check_table { - local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 local -n expected=$2 local last=$3 @@ -212,7 +212,7 @@ function check_tables { } function print_table { - local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1 read -a have < $path tree $NSIM_DEV_DFS/ @@ -641,7 +641,7 @@ for port in 0 1; do NSIM_NETDEV=`get_netdev_name old_netdevs` ip link set dev $NSIM_NETDEV up - echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error msg="1 - create VxLANs v6" exp0=( 0 0 0 0 ) @@ -663,7 +663,7 @@ for port in 0 1; do new_geneve gnv0 20000 msg="2 - destroy GENEVE" - echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error exp1=( `mke 20000 2` 0 0 0 ) del_dev gnv0 @@ -764,7 +764,7 @@ for port in 0 1; do msg="create VxLANs v4" new_vxlan vxlan0 10000 $NSIM_NETDEV - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="NIC device goes down" @@ -775,7 +775,7 @@ for port in 0 1; do fi check_tables - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="NIC device goes up again" @@ -789,7 +789,7 @@ for port in 0 1; do del_dev vxlan0 check_tables - echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="destroy NIC" @@ -896,7 +896,7 @@ msg="vacate VxLAN in overflow table" exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) del_dev vxlan2 -echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset +echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset check_tables msg="tunnels destroyed 2" diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 30f29096e27c..4868b514ae78 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -40,10 +40,9 @@ def addremove_queues(cfg, nl) -> None: netnl = EthtoolFamily() channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}}) - if channels['combined-count'] == 0: - rx_type = 'rx' - else: - rx_type = 'combined' + rx_type = 'rx' + if channels.get('combined-count', 0) > 0: + rx_type = 'combined' expected = curr_queues - 1 cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index c773334bbcc9..550e5d762c23 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -383,6 +383,10 @@ static void test_statmount_mnt_point(void) return; } + if (!(sm->mask & STATMOUNT_MNT_POINT)) { + ksft_test_result_fail("missing STATMOUNT_MNT_POINT in mask\n"); + return; + } if (strcmp(sm->str + sm->mnt_point, "/") != 0) { ksft_test_result_fail("unexpected mount point: '%s' != '/'\n", sm->str + sm->mnt_point); @@ -408,6 +412,10 @@ static void test_statmount_mnt_root(void) strerror(errno)); return; } + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("missing STATMOUNT_MNT_ROOT in mask\n"); + return; + } mnt_root = sm->str + sm->mnt_root; last_root = strrchr(mnt_root, '/'); if (last_root) @@ -437,6 +445,10 @@ static void test_statmount_fs_type(void) strerror(errno)); return; } + if (!(sm->mask & STATMOUNT_FS_TYPE)) { + ksft_test_result_fail("missing STATMOUNT_FS_TYPE in mask\n"); + return; + } fs_type = sm->str + sm->fs_type; for (s = known_fs; s != NULL; s++) { if (strcmp(fs_type, *s) == 0) @@ -464,6 +476,11 @@ static void test_statmount_mnt_opts(void) return; } + if (!(sm->mask & STATMOUNT_MNT_BASIC)) { + ksft_test_result_fail("missing STATMOUNT_MNT_BASIC in mask\n"); + return; + } + while (getline(&line, &len, f_mountinfo) != -1) { int i; char *p, *p2; @@ -514,7 +531,10 @@ static void test_statmount_mnt_opts(void) if (p2) *p2 = '\0'; - statmount_opts = sm->str + sm->mnt_opts; + if (sm->mask & STATMOUNT_MNT_OPTS) + statmount_opts = sm->str + sm->mnt_opts; + else + statmount_opts = ""; if (strcmp(statmount_opts, p) != 0) ksft_test_result_fail( "unexpected mount options: '%s' != '%s'\n", diff --git a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc index 35e8d47d6072..8a7ce647a60d 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc @@ -15,11 +15,11 @@ find_alternate_gid() { tac /etc/group | grep -v ":$original_gid:" | head -1 | cut -d: -f3 } -mount_tracefs_with_options() { +remount_tracefs_with_options() { local mount_point="$1" local options="$2" - mount -t tracefs -o "$options" nodev "$mount_point" + mount -t tracefs -o "remount,$options" nodev "$mount_point" setup } @@ -81,7 +81,7 @@ test_gid_mount_option() { # Unmount existing tracefs instance and mount with new GID unmount_tracefs "$mount_point" - mount_tracefs_with_options "$mount_point" "$new_options" + remount_tracefs_with_options "$mount_point" "$new_options" check_gid "$mount_point" "$other_group" @@ -92,7 +92,7 @@ test_gid_mount_option() { # Unmount and remount with the original GID unmount_tracefs "$mount_point" - mount_tracefs_with_options "$mount_point" "$mount_options" + remount_tracefs_with_options "$mount_point" "$mount_options" check_gid "$mount_point" "$original_group" } diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index b7c8f29c09a9..65916bb55dfb 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -14,11 +14,35 @@ fail() { #msg exit_fail } +# As reading trace can last forever, simply look for 3 different +# events then exit out of reading the file. If there's not 3 different +# events, then the test has failed. +check_unique() { + cat trace | grep -v '^#' | awk ' + BEGIN { cnt = 0; } + { + for (i = 0; i < cnt; i++) { + if (event[i] == $5) { + break; + } + } + if (i == cnt) { + event[cnt++] = $5; + if (cnt > 2) { + exit; + } + } + } + END { + printf "%d", cnt; + }' +} + echo 'sched:*' > set_event yield -count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`check_unique` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -29,7 +53,7 @@ echo 1 > events/sched/enable yield -count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`check_unique` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc index 4b994b6df5ac..ed81eaf2afd6 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -29,7 +29,7 @@ ftrace_filter_check 'schedule*' '^schedule.*$' ftrace_filter_check '*pin*lock' '.*pin.*lock$' # filter by start*mid* -ftrace_filter_check 'mutex*try*' '^mutex.*try.*' +ftrace_filter_check 'mutex*unl*' '^mutex.*unl.*' # Advanced full-glob matching feature is recently supported. # Skip the tests if we are sure the kernel does not support it. diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 7d7a6a06cdb7..2d8230da9064 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); if (!res || errno != EWOULDBLOCK) { - ksft_test_result_pass("futex_waitv returned: %d %s\n", + ksft_test_result_fail("futex_waitv returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); ret = RET_FAIL; diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h index ddbcfc9b7bac..7a5fd1d5355e 100644 --- a/tools/testing/selftests/futex/include/futextest.h +++ b/tools/testing/selftests/futex/include/futextest.h @@ -47,6 +47,17 @@ typedef volatile u_int32_t futex_t; FUTEX_PRIVATE_FLAG) #endif +/* + * SYS_futex is expected from system C library, in glibc some 32-bit + * architectures (e.g. RV32) are using 64-bit time_t, therefore it doesn't have + * SYS_futex defined but just SYS_futex_time64. Define SYS_futex as + * SYS_futex_time64 in this situation to ensure the compilation and the + * compatibility. + */ +#if !defined(SYS_futex) && defined(SYS_futex_time64) +#define SYS_futex SYS_futex_time64 +#endif + /** * futex() - SYS_futex syscall wrapper * @uaddr: address of first futex diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh index 6fb66a687f17..bbc29ed9c60a 100755 --- a/tools/testing/selftests/gpio/gpio-sim.sh +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -46,12 +46,6 @@ remove_chip() { rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip" } -configfs_cleanup() { - for CHIP in `ls $CONFIGFS_DIR/`; do - remove_chip $CHIP - done -} - create_chip() { local CHIP=$1 @@ -105,6 +99,13 @@ disable_chip() { echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip" } +configfs_cleanup() { + for CHIP in `ls $CONFIGFS_DIR/`; do + disable_chip $CHIP + remove_chip $CHIP + done +} + configfs_chip_name() { local CHIP=$1 local BANK=$2 @@ -181,6 +182,7 @@ create_chip chip create_bank chip bank enable_chip chip test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work" +disable_chip chip remove_chip chip echo "1.2. chip_name returns 'none' if the chip is still pending" @@ -195,6 +197,7 @@ create_chip chip create_bank chip bank enable_chip chip test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work" +disable_chip chip remove_chip chip echo "2. Creating and configuring simulated chips" @@ -204,6 +207,7 @@ create_chip chip create_bank chip bank enable_chip chip test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1" +disable_chip chip remove_chip chip echo "2.2. Number of lines can be specified" @@ -212,6 +216,7 @@ create_bank chip bank set_num_lines chip bank 16 enable_chip chip test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16" +disable_chip chip remove_chip chip echo "2.3. Label can be set" @@ -220,6 +225,7 @@ create_bank chip bank set_label chip bank foobar enable_chip chip test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect" +disable_chip chip remove_chip chip echo "2.4. Label can be left empty" @@ -227,6 +233,7 @@ create_chip chip create_bank chip bank enable_chip chip test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty" +disable_chip chip remove_chip chip echo "2.5. Line names can be configured" @@ -238,6 +245,7 @@ set_line_name chip bank 2 bar enable_chip chip test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect" test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect" +disable_chip chip remove_chip chip echo "2.6. Line config can remain unused if offset is greater than number of lines" @@ -248,6 +256,7 @@ set_line_name chip bank 5 foobar enable_chip chip test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect" test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect" +disable_chip chip remove_chip chip echo "2.7. Line configfs directory names are sanitized" @@ -267,6 +276,7 @@ for CHIP in $CHIPS; do enable_chip $CHIP done for CHIP in $CHIPS; do + disable_chip $CHIP remove_chip $CHIP done @@ -278,6 +288,7 @@ echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \ fail "Setting label of a live chip should fail" echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \ fail "Setting number of lines of a live chip should fail" +disable_chip chip remove_chip chip echo "2.10. Can't create line items when chip is live" @@ -285,6 +296,7 @@ create_chip chip create_bank chip bank enable_chip chip mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail" +disable_chip chip remove_chip chip echo "2.11. Probe errors are propagated to user-space" @@ -316,6 +328,7 @@ mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog enable_chip chip $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \ fail "Setting the value of a hogged line shouldn't succeed" +disable_chip chip remove_chip chip echo "3. Controlling simulated chips" @@ -331,6 +344,7 @@ test "$?" = "1" || fail "pull set incorrectly" sysfs_set_pull chip bank 0 pull-down $BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1 test "$?" = "0" || fail "pull set incorrectly" +disable_chip chip remove_chip chip echo "3.2. Pull can be read from sysfs" @@ -344,6 +358,7 @@ SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed" sysfs_set_pull chip bank 0 pull-up test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed" +disable_chip chip remove_chip chip echo "3.3. Incorrect input in sysfs is rejected" @@ -355,6 +370,7 @@ DEVNAME=`configfs_dev_name chip` CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull" echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected" +disable_chip chip remove_chip chip echo "3.4. Can't write to value" @@ -365,6 +381,7 @@ DEVNAME=`configfs_dev_name chip` CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly" +disable_chip chip remove_chip chip echo "4. Simulated GPIO chips are functional" @@ -382,6 +399,7 @@ $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 & sleep 0.1 # FIXME Any better way? test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs" kill $! +disable_chip chip remove_chip chip echo "4.2. Bias settings work correctly" @@ -394,6 +412,7 @@ CHIPNAME=`configfs_chip_name chip bank` SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" $BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0 test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work" +disable_chip chip remove_chip chip echo "GPIO $MODULE test PASS" diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 4927b9add5ad..a81c22d52007 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -289,6 +289,10 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) &test_hwpt_id); test_err_hwpt_alloc(EINVAL, self->device_id, self->device_id, 0, &test_hwpt_id); + test_err_hwpt_alloc(EOPNOTSUPP, self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_FAULT_ID_VALID, + &test_hwpt_id); test_cmd_hwpt_alloc(self->device_id, self->ioas_id, IOMMU_HWPT_ALLOC_NEST_PARENT, @@ -1744,6 +1748,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking) { + size_t mmap_buffer_size; unsigned long size; int mmap_flags; void *vrc; @@ -1758,22 +1763,33 @@ FIXTURE_SETUP(iommufd_dirty_tracking) self->fd = open("/dev/iommu", O_RDWR); ASSERT_NE(-1, self->fd); - rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size); - if (rc || !self->buffer) { - SKIP(return, "Skipping buffer_size=%lu due to errno=%d", - variant->buffer_size, rc); - } - mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED; + mmap_buffer_size = variant->buffer_size; if (variant->hugepages) { /* * MAP_POPULATE will cause the kernel to fail mmap if THPs are * not available. */ mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + + /* + * Allocation must be aligned to the HUGEPAGE_SIZE, because the + * following mmap() will automatically align the length to be a + * multiple of the underlying huge page size. Failing to do the + * same at this allocation will result in a memory overwrite by + * the mmap(). + */ + if (mmap_buffer_size < HUGEPAGE_SIZE) + mmap_buffer_size = HUGEPAGE_SIZE; + } + + rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size); + if (rc || !self->buffer) { + SKIP(return, "Skipping buffer_size=%lu due to errno=%d", + mmap_buffer_size, rc); } assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); - vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, + vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); assert(vrc == self->buffer); @@ -1802,8 +1818,8 @@ FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking) { - munmap(self->buffer, variant->buffer_size); - munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE)); + free(self->buffer); + free(self->bitmap); teardown_iommufd(self->fd, _metadata); } diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh index 79a125eb24c2..14e7f3ec3f84 100644 --- a/tools/testing/selftests/kselftest/ktap_helpers.sh +++ b/tools/testing/selftests/kselftest/ktap_helpers.sh @@ -40,7 +40,7 @@ ktap_skip_all() { __ktap_test() { result="$1" description="$2" - directive="$3" # optional + directive="${3:-}" # optional local directive_str= [ ! -z "$directive" ] && directive_str="# $directive" diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index a5a72415e37b..666c9fde76da 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -760,33 +760,33 @@ /* Report with actual signedness to avoid weird output. */ \ switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \ case 0: { \ - unsigned long long __exp_print = (uintptr_t)__exp; \ - unsigned long long __seen_print = (uintptr_t)__seen; \ - __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ + uintmax_t __exp_print = (uintmax_t)__exp; \ + uintmax_t __seen_print = (uintmax_t)__seen; \ + __TH_LOG("Expected %s (%ju) %s %s (%ju)", \ _expected_str, __exp_print, #_t, \ _seen_str, __seen_print); \ break; \ } \ case 1: { \ - unsigned long long __exp_print = (uintptr_t)__exp; \ - long long __seen_print = (intptr_t)__seen; \ - __TH_LOG("Expected %s (%llu) %s %s (%lld)", \ + uintmax_t __exp_print = (uintmax_t)__exp; \ + intmax_t __seen_print = (intmax_t)__seen; \ + __TH_LOG("Expected %s (%ju) %s %s (%jd)", \ _expected_str, __exp_print, #_t, \ _seen_str, __seen_print); \ break; \ } \ case 2: { \ - long long __exp_print = (intptr_t)__exp; \ - unsigned long long __seen_print = (uintptr_t)__seen; \ - __TH_LOG("Expected %s (%lld) %s %s (%llu)", \ + intmax_t __exp_print = (intmax_t)__exp; \ + uintmax_t __seen_print = (uintmax_t)__seen; \ + __TH_LOG("Expected %s (%jd) %s %s (%ju)", \ _expected_str, __exp_print, #_t, \ _seen_str, __seen_print); \ break; \ } \ case 3: { \ - long long __exp_print = (intptr_t)__exp; \ - long long __seen_print = (intptr_t)__seen; \ - __TH_LOG("Expected %s (%lld) %s %s (%lld)", \ + intmax_t __exp_print = (intmax_t)__exp; \ + intmax_t __seen_print = (intmax_t)__seen; \ + __TH_LOG("Expected %s (%jd) %s %s (%jd)", \ _expected_str, __exp_print, #_t, \ _seen_str, __seen_print); \ break; \ diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile index 348e2dbdb4e0..480f13e77fcc 100644 --- a/tools/testing/selftests/landlock/Makefile +++ b/tools/testing/selftests/landlock/Makefile @@ -13,11 +13,11 @@ TEST_GEN_PROGS := $(src_test:.c=) TEST_GEN_PROGS_EXTENDED := true # Short targets: -$(TEST_GEN_PROGS): LDLIBS += -lcap +$(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread $(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static include ../lib.mk # Targets with $(OUTPUT)/ prefix: -$(TEST_GEN_PROGS): LDLIBS += -lcap +$(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread $(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index 1bc16fde2e8a..4766f8fec9f6 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -98,10 +98,54 @@ TEST(abi_version) ASSERT_EQ(EINVAL, errno); } +/* + * Old source trees might not have the set of Kselftest fixes related to kernel + * UAPI headers. + */ +#ifndef LANDLOCK_CREATE_RULESET_ERRATA +#define LANDLOCK_CREATE_RULESET_ERRATA (1U << 1) +#endif + +TEST(errata) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, + }; + int errata; + + errata = landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_ERRATA); + /* The errata bitmask will not be backported to tests. */ + ASSERT_LE(0, errata); + TH_LOG("errata: 0x%x", errata); + + ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, landlock_create_ruleset( + NULL, 0, + LANDLOCK_CREATE_RULESET_VERSION | + LANDLOCK_CREATE_RULESET_ERRATA)); + ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_ERRATA | + 1 << 31)); + ASSERT_EQ(EINVAL, errno); +} + /* Tests ordering of syscall argument checks. */ TEST(create_ruleset_checks_ordering) { - const int last_flag = LANDLOCK_CREATE_RULESET_VERSION; + const int last_flag = LANDLOCK_CREATE_RULESET_ERRATA; const int invalid_flag = last_flag << 1; int ruleset_fd; const struct landlock_ruleset_attr ruleset_attr = { diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 61056fa074bb..60afc1ce11bc 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -68,6 +68,7 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) CAP_MKNOD, CAP_NET_ADMIN, CAP_NET_BIND_SERVICE, + CAP_SETUID, CAP_SYS_ADMIN, CAP_SYS_CHROOT, /* clang-format on */ @@ -234,6 +235,7 @@ enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd) struct protocol_variant { int domain; int type; + int protocol; }; struct service_fixture { diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 29af19c4e9f9..a8982da4acbd 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -3,6 +3,8 @@ CONFIG_CGROUP_SCHED=y CONFIG_INET=y CONFIG_IPV6=y CONFIG_KEYS=y +CONFIG_MPTCP=y +CONFIG_MPTCP_IPV6=y CONFIG_NET=y CONFIG_NET_NS=y CONFIG_OVERLAY_FS=y diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 6788762188fe..97d360eae4f6 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -2003,8 +2003,7 @@ static void test_execute(struct __test_metadata *const _metadata, const int err, ASSERT_EQ(1, WIFEXITED(status)); ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) { - TH_LOG("Unexpected return code for \"%s\": %s", path, - strerror(errno)); + TH_LOG("Unexpected return code for \"%s\"", path); }; } diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 4e0aeb53b225..376079d70d3f 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -85,18 +85,18 @@ static void setup_loopback(struct __test_metadata *const _metadata) clear_ambient_cap(_metadata, CAP_NET_ADMIN); } +static bool prot_is_tcp(const struct protocol_variant *const prot) +{ + return (prot->domain == AF_INET || prot->domain == AF_INET6) && + prot->type == SOCK_STREAM && + (prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP); +} + static bool is_restricted(const struct protocol_variant *const prot, const enum sandbox_type sandbox) { - switch (prot->domain) { - case AF_INET: - case AF_INET6: - switch (prot->type) { - case SOCK_STREAM: - return sandbox == TCP_SANDBOX; - } - break; - } + if (sandbox == TCP_SANDBOX) + return prot_is_tcp(prot); return false; } @@ -105,7 +105,7 @@ static int socket_variant(const struct service_fixture *const srv) int ret; ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC, - 0); + srv->protocol.protocol); if (ret < 0) return -errno; return ret; @@ -290,22 +290,59 @@ FIXTURE_TEARDOWN(protocol) } /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) { +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp1) { /* clang-format on */ .sandbox = NO_SANDBOX, .prot = { .domain = AF_INET, .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, }, }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) { +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) { /* clang-format on */ .sandbox = NO_SANDBOX, .prot = { .domain = AF_INET6, .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_mptcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, }, }; @@ -330,6 +367,17 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_udp) { }; /* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_mptcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + +/* clang-format off */ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_stream) { /* clang-format on */ .sandbox = NO_SANDBOX, @@ -350,22 +398,48 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) { }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) { +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp1) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) { /* clang-format on */ .sandbox = TCP_SANDBOX, .prot = { .domain = AF_INET, .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, }, }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) { +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) { /* clang-format on */ .sandbox = TCP_SANDBOX, .prot = { .domain = AF_INET6, .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, }, }; @@ -390,6 +464,17 @@ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_udp) { }; /* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_mptcp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + +/* clang-format off */ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_stream) { /* clang-format on */ .sandbox = TCP_SANDBOX, @@ -400,6 +485,17 @@ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_stream) { }; /* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_mptcp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + +/* clang-format off */ FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_datagram) { /* clang-format on */ .sandbox = TCP_SANDBOX, diff --git a/tools/testing/selftests/landlock/scoped_signal_test.c b/tools/testing/selftests/landlock/scoped_signal_test.c index 475ee62a832d..d8bf33417619 100644 --- a/tools/testing/selftests/landlock/scoped_signal_test.c +++ b/tools/testing/selftests/landlock/scoped_signal_test.c @@ -249,47 +249,67 @@ TEST_F(scoped_domains, check_access_signal) _metadata->exit_code = KSFT_FAIL; } -static int thread_pipe[2]; - enum thread_return { THREAD_INVALID = 0, THREAD_SUCCESS = 1, THREAD_ERROR = 2, + THREAD_TEST_FAILED = 3, }; -void *thread_func(void *arg) +static void *thread_sync(void *arg) { + const int pipe_read = *(int *)arg; char buf; - if (read(thread_pipe[0], &buf, 1) != 1) + if (read(pipe_read, &buf, 1) != 1) return (void *)THREAD_ERROR; return (void *)THREAD_SUCCESS; } -TEST(signal_scoping_threads) +TEST(signal_scoping_thread_before) { - pthread_t no_sandbox_thread, scoped_thread; + pthread_t no_sandbox_thread; enum thread_return ret = THREAD_INVALID; + int thread_pipe[2]; drop_caps(_metadata); ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC)); - ASSERT_EQ(0, - pthread_create(&no_sandbox_thread, NULL, thread_func, NULL)); + ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_sync, + &thread_pipe[0])); - /* Restricts the domain after creating the first thread. */ + /* Enforces restriction after creating the thread. */ create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL); - ASSERT_EQ(EPERM, pthread_kill(no_sandbox_thread, 0)); - ASSERT_EQ(1, write(thread_pipe[1], ".", 1)); - - ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_func, NULL)); - ASSERT_EQ(0, pthread_kill(scoped_thread, 0)); - ASSERT_EQ(1, write(thread_pipe[1], ".", 1)); + EXPECT_EQ(0, pthread_kill(no_sandbox_thread, 0)); + EXPECT_EQ(1, write(thread_pipe[1], ".", 1)); EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret)); EXPECT_EQ(THREAD_SUCCESS, ret); + + EXPECT_EQ(0, close(thread_pipe[0])); + EXPECT_EQ(0, close(thread_pipe[1])); +} + +TEST(signal_scoping_thread_after) +{ + pthread_t scoped_thread; + enum thread_return ret = THREAD_INVALID; + int thread_pipe[2]; + + drop_caps(_metadata); + ASSERT_EQ(0, pipe2(thread_pipe, O_CLOEXEC)); + + /* Enforces restriction before creating the thread. */ + create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL); + + ASSERT_EQ(0, pthread_create(&scoped_thread, NULL, thread_sync, + &thread_pipe[0])); + + EXPECT_EQ(0, pthread_kill(scoped_thread, 0)); + EXPECT_EQ(1, write(thread_pipe[1], ".", 1)); + EXPECT_EQ(0, pthread_join(scoped_thread, (void **)&ret)); EXPECT_EQ(THREAD_SUCCESS, ret); @@ -297,6 +317,64 @@ TEST(signal_scoping_threads) EXPECT_EQ(0, close(thread_pipe[1])); } +struct thread_setuid_args { + int pipe_read, new_uid; +}; + +void *thread_setuid(void *ptr) +{ + const struct thread_setuid_args *arg = ptr; + char buf; + + if (read(arg->pipe_read, &buf, 1) != 1) + return (void *)THREAD_ERROR; + + /* libc's setuid() should update all thread's credentials. */ + if (getuid() != arg->new_uid) + return (void *)THREAD_TEST_FAILED; + + return (void *)THREAD_SUCCESS; +} + +TEST(signal_scoping_thread_setuid) +{ + struct thread_setuid_args arg; + pthread_t no_sandbox_thread; + enum thread_return ret = THREAD_INVALID; + int pipe_parent[2]; + int prev_uid; + + disable_caps(_metadata); + + /* This test does not need to be run as root. */ + prev_uid = getuid(); + arg.new_uid = prev_uid + 1; + EXPECT_LT(0, arg.new_uid); + + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + arg.pipe_read = pipe_parent[0]; + + /* Capabilities must be set before creating a new thread. */ + set_cap(_metadata, CAP_SETUID); + ASSERT_EQ(0, pthread_create(&no_sandbox_thread, NULL, thread_setuid, + &arg)); + + /* Enforces restriction after creating the thread. */ + create_scoped_domain(_metadata, LANDLOCK_SCOPE_SIGNAL); + + EXPECT_NE(arg.new_uid, getuid()); + EXPECT_EQ(0, setuid(arg.new_uid)); + EXPECT_EQ(arg.new_uid, getuid()); + EXPECT_EQ(1, write(pipe_parent[1], ".", 1)); + + EXPECT_EQ(0, pthread_join(no_sandbox_thread, (void **)&ret)); + EXPECT_EQ(THREAD_SUCCESS, ret); + + clear_cap(_metadata, CAP_SETUID); + EXPECT_EQ(0, close(pipe_parent[0])); + EXPECT_EQ(0, close(pipe_parent[1])); +} + const short backlog = 10; static volatile sig_atomic_t signal_received; diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index e949a43a6145..efabfcbe0b49 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -261,9 +261,6 @@ TEST(check_file_mmap) TH_LOG("No read-ahead pages found in memory"); } - EXPECT_LT(i, vec_size) { - TH_LOG("Read-ahead pages reached the end of the file"); - } /* * End of the readahead window. The rest of the pages shouldn't * be in memory. diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 02e1204971b0..c0138cb19705 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -33,9 +33,16 @@ endif # LDLIBS. MAKEFLAGS += --no-builtin-rules -CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) LDLIBS = -lrt -lpthread -lm +# Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is +# automatically enabled at -O1 or above. This triggers various unused-result +# warnings where functions such as read() or write() are called and their +# return value is not checked. Disable _FORTIFY_SOURCE to silence those +# warnings. +CFLAGS += -U_FORTIFY_SOURCE + TEST_GEN_FILES = cow TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_longterm diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 67df7b47087f..e1fe16bcbbe8 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -29,7 +29,7 @@ fi if [[ $cgroup2 ]]; then cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup2 none $cgroup_path do_umount=1 fi @@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then else cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup memory,hugetlb $cgroup_path do_umount=1 fi diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index 2c3a0eb6b22d..9bc4591c7b16 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int compaction_index = 0; char nr_hugepages[20] = {0}; char init_nr_hugepages[24] = {0}; + char target_nr_hugepages[24] = {0}; + int slen; snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), "%lu", initial_nr_hugepages); @@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, goto out; } - /* Request a large number of huge pages. The Kernel will allocate - as much as it can */ - if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", - strerror(errno)); + /* + * Request huge pages for about half of the free memory. The Kernel + * will allocate as much as it can, and we expect it will get at least 1/3 + */ + nr_hugepages_ul = mem_free / hugepage_size / 2; + snprintf(target_nr_hugepages, sizeof(target_nr_hugepages), + "%lu", nr_hugepages_ul); + + slen = strlen(target_nr_hugepages); + if (write(fd, target_nr_hugepages, slen) != slen) { + ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n", + nr_hugepages_ul, strerror(errno)); goto close_fd; } diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 32c6ccc2a6be..d87c5b1763ff 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -758,7 +758,7 @@ static void do_run_with_base_page(test_fn fn, bool swapout) } /* Populate a base page. */ - memset(mem, 0, pagesize); + memset(mem, 1, pagesize); if (swapout) { madvise(mem, pagesize, MADV_PAGEOUT); @@ -824,12 +824,12 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) * Try to populate a THP. Touch the first sub-page and test if * we get the last sub-page populated automatically. */ - mem[0] = 0; + mem[0] = 1; if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { ksft_test_result_skip("Did not get a THP populated\n"); goto munmap; } - memset(mem, 0, thpsize); + memset(mem, 1, thpsize); size = thpsize; switch (thp_run) { @@ -876,7 +876,7 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mremap_size = thpsize / 2; mremap_mem = mmap(NULL, mremap_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { + if (mremap_mem == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); goto munmap; } @@ -1012,7 +1012,7 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) } /* Populate an huge page. */ - memset(mem, 0, hugetlbsize); + memset(mem, 1, hugetlbsize); /* * We need a total of two hugetlb pages to handle COW/unsharing diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index ada9156cc497..c463d1c09c9b 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -15,7 +15,7 @@ #define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <sys/mman.h> #include <errno.h> #include <fcntl.h> /* Definition of O_* constants */ diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index 11f9bbe7dc22..0b0d4ba1af27 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -23,7 +23,7 @@ fi if [[ $cgroup2 ]]; then CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then - CGROUP_ROOT=/dev/cgroup/memory + CGROUP_ROOT=$(mktemp -d) mount -t cgroup2 none $CGROUP_ROOT do_umount=1 fi diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 66b4e111b5a2..b61803e36d1c 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -11,7 +11,7 @@ #include <string.h> #include <stdbool.h> #include <stdint.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <errno.h> #include <fcntl.h> #include <sys/mman.h> @@ -369,6 +369,7 @@ unmap: munmap(map, size); } +#ifdef __NR_userfaultfd static void test_unmerge_uffd_wp(void) { struct uffdio_writeprotect uffd_writeprotect; @@ -429,6 +430,7 @@ close_uffd: unmap: munmap(map, size); } +#endif /* Verify that KSM can be enabled / queried with prctl. */ static void test_prctl(void) @@ -684,7 +686,9 @@ int main(int argc, char **argv) exit(test_child_ksm()); } +#ifdef __NR_userfaultfd tests++; +#endif ksft_print_header(); ksft_set_plan(tests); @@ -696,7 +700,9 @@ int main(int argc, char **argv) test_unmerge(); test_unmerge_zero_pages(); test_unmerge_discarded(); +#ifdef __NR_userfaultfd test_unmerge_uffd_wp(); +#endif test_prot_none(); diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 74c911aa3aea..9a0597310a76 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -17,7 +17,7 @@ #include <stdlib.h> #include <string.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <errno.h> #include <stdio.h> #include <fcntl.h> @@ -28,6 +28,8 @@ #define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__) #define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__) +#ifdef __NR_memfd_secret + #define PATTERN 0x55 static const int prot = PROT_READ | PROT_WRITE; @@ -332,3 +334,13 @@ int main(int argc, char *argv[]) ksft_finished(); } + +#else /* __NR_memfd_secret */ + +int main(int argc, char *argv[]) +{ + printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_memfd_secret */ diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index 1db134063c38..b8a7efe9204e 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -9,7 +9,7 @@ */ #include <fcntl.h> #include <signal.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <string.h> #include <errno.h> #include <stdlib.h> @@ -265,6 +265,7 @@ munmap: munmap(mmap_mem, mmap_size); } +#ifdef __NR_userfaultfd static void test_uffdio_copy(void) { struct uffdio_register uffdio_register; @@ -321,6 +322,7 @@ munmap: munmap(dst, pagesize); free(src); } +#endif /* __NR_userfaultfd */ int main(void) { @@ -333,7 +335,9 @@ int main(void) thpsize / 1024); tests += 3; } +#ifdef __NR_userfaultfd tests += 1; +#endif /* __NR_userfaultfd */ ksft_print_header(); ksft_set_plan(tests); @@ -363,7 +367,9 @@ int main(void) if (thpsize) test_pte_mapped_thp(); /* Placing a fresh page via userfaultfd may set the PTE dirty. */ +#ifdef __NR_userfaultfd test_uffdio_copy(); +#endif /* __NR_userfaultfd */ err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 1e5731bab499..4417eaa5cfb7 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -3,7 +3,6 @@ #include <errno.h> #include <stdio.h> #include <stdlib.h> -#include <asm-generic/unistd.h> static int mlock2_(void *start, size_t len, int flags) { diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index 3d0c0bdae5bc..e90af82f8688 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -102,8 +102,18 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey) return; } +#define REPEAT_8(s) s s s s s s s s +#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \ + REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) +#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \ + REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) +#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \ + REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) +#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \ + REPEAT_4096(s) REPEAT_4096(s) + /* 4-byte instructions * 16384 = 64K page */ -#define __page_o_noops() asm(".rept 16384 ; nop; .endr") +#define __page_o_noops() asm(REPEAT_16384("nop\n")) void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 4990f7ab4cb7..4fcecfb7b189 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -42,7 +42,7 @@ #include <sys/wait.h> #include <sys/stat.h> #include <fcntl.h> -#include <asm-generic/unistd.h> +#include <unistd.h> #include <sys/ptrace.h> #include <setjmp.h> diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index c5797ad1d37b..d86ca1554d6d 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -300,7 +300,9 @@ uffd_stress_bin=./uffd-stress CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16 # Hugetlb tests require source and destination huge pages. Pass in half # the size of the free pages we have, which is used for *each*. -half_ufd_size_MB=$((freepgs / 2)) +# uffd-stress expects a region expressed in MiB, so we adjust +# half_ufd_size_MB accordingly. +half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2)) CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 717539eddf98..7ad6ba660c7d 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -673,7 +673,11 @@ int uffd_open_dev(unsigned int flags) int uffd_open_sys(unsigned int flags) { +#ifdef __NR_userfaultfd return syscall(__NR_userfaultfd, flags); +#else + return -1; +#endif } int uffd_open(unsigned int flags) diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index a4b83280998a..944d559ade21 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -33,10 +33,11 @@ * pthread_mutex_lock will also verify the atomicity of the memory * transfer (UFFDIO_COPY). */ -#include <asm-generic/unistd.h> + #include "uffd-common.h" uint64_t features; +#ifdef __NR_userfaultfd #define BOUNCE_RANDOM (1<<0) #define BOUNCE_RACINGFAULTS (1<<1) @@ -471,3 +472,15 @@ int main(int argc, char **argv) nr_pages, nr_pages_per_cpu); return userfaultfd_stress(); } + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index a2e71b1636e7..3ddbb0a71b9c 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -5,11 +5,12 @@ * Copyright (C) 2015-2023 Red Hat, Inc. */ -#include <asm-generic/unistd.h> #include "uffd-common.h" #include "../../../../mm/gup_test.h" +#ifdef __NR_userfaultfd + /* The unit test doesn't need a large or random size, make it 32MB for now */ #define UFFD_TEST_MEM_SIZE (32UL << 20) @@ -1558,3 +1559,14 @@ int main(int argc, char *argv[]) return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS; } +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("Skipping %s (missing __NR_userfaultfd)\n", __file__); + return KSFT_SKIP; +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index d9d587454d20..8c1597ebc2d3 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -149,7 +149,7 @@ cfg_test_host_common() check_err $? "Failed to add $name host entry" bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null - check_fail $? "Managed to replace $name host entry" + check_err $? "Failed to replace $name host entry" bridge mdb del dev br0 port br0 grp $grp $state vid 10 bridge mdb get dev br0 grp $grp vid 10 &> /dev/null diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index c992e385159c..195360082d94 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -48,7 +48,6 @@ declare -A NETIFS=( : "${WAIT_TIME:=5}" # Whether to pause on, respectively, after a failure and before cleanup. -: "${PAUSE_ON_FAIL:=no}" : "${PAUSE_ON_CLEANUP:=no}" # Whether to create virtual interfaces, and what netdevice type they should be. @@ -446,22 +445,6 @@ done ############################################################################## # Helpers -# Exit status to return at the end. Set in case one of the tests fails. -EXIT_STATUS=0 -# Per-test return value. Clear at the beginning of each test. -RET=0 - -ret_set_ksft_status() -{ - local ksft_status=$1; shift - local msg=$1; shift - - RET=$(ksft_status_merge $RET $ksft_status) - if (( $? )); then - retmsg=$msg - fi -} - # Whether FAILs should be interpreted as XFAILs. Internal. FAIL_TO_XFAIL= @@ -535,102 +518,6 @@ xfail_on_veth() fi } -log_test_result() -{ - local test_name=$1; shift - local opt_str=$1; shift - local result=$1; shift - local retmsg=$1; shift - - printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" - if [[ $retmsg ]]; then - printf "\t%s\n" "$retmsg" - fi -} - -pause_on_fail() -{ - if [[ $PAUSE_ON_FAIL == yes ]]; then - echo "Hit enter to continue, 'q' to quit" - read a - [[ $a == q ]] && exit 1 - fi -} - -handle_test_result_pass() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" " OK " -} - -handle_test_result_fail() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" FAIL "$retmsg" - pause_on_fail -} - -handle_test_result_xfail() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" - pause_on_fail -} - -handle_test_result_skip() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" SKIP "$retmsg" -} - -log_test() -{ - local test_name=$1 - local opt_str=$2 - - if [[ $# -eq 2 ]]; then - opt_str="($opt_str)" - fi - - if ((RET == ksft_pass)); then - handle_test_result_pass "$test_name" "$opt_str" - elif ((RET == ksft_xfail)); then - handle_test_result_xfail "$test_name" "$opt_str" - elif ((RET == ksft_skip)); then - handle_test_result_skip "$test_name" "$opt_str" - else - handle_test_result_fail "$test_name" "$opt_str" - fi - - EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) - return $RET -} - -log_test_skip() -{ - RET=$ksft_skip retmsg= log_test "$@" -} - -log_test_xfail() -{ - RET=$ksft_xfail retmsg= log_test "$@" -} - -log_info() -{ - local msg=$1 - - echo "INFO: $msg" -} - not() { "$@" diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 02c21ff4ca81..aabd6e5480b8 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -100,5 +100,6 @@ trap cleanup EXIT if [[ "${test}" == "all" ]]; then run_all_tests else - run_test "${proto}" "${test}" + exit_code=$(run_test "${proto}" "${test}") + exit $exit_code fi; diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index be4a30a0d02a..9b44a091802c 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -227,7 +227,8 @@ static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, attr->rta_len = RTA_LENGTH(size); attr->rta_type = rta_type; - memcpy(RTA_DATA(attr), payload, size); + if (payload) + memcpy(RTA_DATA(attr), payload, size); return 0; } diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index be8707bfb46e..bb4d2f8d50d6 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -6,6 +6,9 @@ : "${WAIT_TIMEOUT:=20}" +# Whether to pause on after a failure. +: "${PAUSE_ON_FAIL:=no}" + BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms # Kselftest framework constants. @@ -17,6 +20,11 @@ ksft_skip=4 # namespace list created by setup_ns NS_LIST=() +# Exit status to return at the end. Set in case one of the tests fails. +EXIT_STATUS=0 +# Per-test return value. Clear at the beginning of each test. +RET=0 + ############################################################################## # Helpers @@ -233,3 +241,110 @@ tc_rule_handle_stats_get() | jq ".[] | select(.options.handle == $handle) | \ .options.actions[0].stats$selector" } + +ret_set_ksft_status() +{ + local ksft_status=$1; shift + local msg=$1; shift + + RET=$(ksft_status_merge $RET $ksft_status) + if (( $? )); then + retmsg=$msg + fi +} + +log_test_result() +{ + local test_name=$1; shift + local opt_str=$1; shift + local result=$1; shift + local retmsg=$1 + + printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" + if [[ $retmsg ]]; then + printf "\t%s\n" "$retmsg" + fi +} + +pause_on_fail() +{ + if [[ $PAUSE_ON_FAIL == yes ]]; then + echo "Hit enter to continue, 'q' to quit" + read a + [[ $a == q ]] && exit 1 + fi +} + +handle_test_result_pass() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" " OK " +} + +handle_test_result_fail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" FAIL "$retmsg" + pause_on_fail +} + +handle_test_result_xfail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" + pause_on_fail +} + +handle_test_result_skip() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" SKIP "$retmsg" +} + +log_test() +{ + local test_name=$1 + local opt_str=$2 + + if [[ $# -eq 2 ]]; then + opt_str="($opt_str)" + fi + + if ((RET == ksft_pass)); then + handle_test_result_pass "$test_name" "$opt_str" + elif ((RET == ksft_xfail)); then + handle_test_result_xfail "$test_name" "$opt_str" + elif ((RET == ksft_skip)); then + handle_test_result_skip "$test_name" "$opt_str" + else + handle_test_result_fail "$test_name" "$opt_str" + fi + + EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) + return $RET +} + +log_test_skip() +{ + RET=$ksft_skip retmsg= log_test "$@" +} + +log_test_xfail() +{ + RET=$ksft_xfail retmsg= log_test "$@" +} + +log_info() +{ + local msg=$1 + + echo "INFO: $msg" +} diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index 82c3264b115e..704b88b6a8d2 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../../ diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 5d796622e730..4524085784df 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -2,9 +2,10 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ +TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \ + mptcp_connect_checksum.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 4209b9569039..c83a8b47bbdf 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -25,6 +25,8 @@ #include <sys/types.h> #include <sys/mman.h> +#include <arpa/inet.h> + #include <netdb.h> #include <netinet/in.h> @@ -1211,23 +1213,42 @@ static void parse_setsock_options(const char *name) exit(1); } -void xdisconnect(int fd, int addrlen) +void xdisconnect(int fd) { - struct sockaddr_storage empty; + socklen_t addrlen = sizeof(struct sockaddr_storage); + struct sockaddr_storage addr, empty; int msec_sleep = 10; - int queued = 1; - int i; + void *raw_addr; + int i, cmdlen; + char cmd[128]; + + /* get the local address and convert it to string */ + if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0) + xerror("getsockname"); + + if (addr.ss_family == AF_INET) + raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr); + else if (addr.ss_family == AF_INET6) + raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr); + else + xerror("bad family"); + + strcpy(cmd, "ss -M | grep -q "); + cmdlen = strlen(cmd); + if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], + sizeof(cmd) - cmdlen)) + xerror("inet_ntop"); shutdown(fd, SHUT_WR); - /* while until the pending data is completely flushed, the later + /* + * wait until the pending data is completely flushed and all + * the MPTCP sockets reached the closed status. * disconnect will bypass/ignore/drop any pending data. */ for (i = 0; ; i += msec_sleep) { - if (ioctl(fd, SIOCOUTQ, &queued) < 0) - xerror("can't query out socket queue: %d", errno); - - if (!queued) + /* closed socket are not listed by 'ss' */ + if (system(cmd) != 0) break; if (i > poll_timeout) @@ -1249,7 +1270,7 @@ int main_loop(void) if (cfg_input && cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } @@ -1272,18 +1293,18 @@ again: if (cfg_input && !cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) - return ret; + goto out; if (cfg_truncate > 0) { - xdisconnect(fd, peer->ai_addrlen); + shutdown(fd, SHUT_WR); } else if (--cfg_repeat > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); /* the socket could be unblocking at this point, we need the * connect to be blocking @@ -1299,7 +1320,10 @@ again: close(fd); } - return 0; +out: + if (cfg_input) + close(fd_in); + return ret; } int parse_proto(const char *proto) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh new file mode 100644 index 000000000000..ce93ec2f107f --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -C "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh new file mode 100644 index 000000000000..5dd30f9394af --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh new file mode 100644 index 000000000000..1d16fb1cc9bb --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}" diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c index 64d6805381c5..8617e6d7698d 100644 --- a/tools/testing/selftests/net/ncdevmem.c +++ b/tools/testing/selftests/net/ncdevmem.c @@ -62,7 +62,7 @@ */ static char *server_ip = "192.168.1.4"; -static char *client_ip = "192.168.1.2"; +static char *client_ip; static char *port = "5201"; static size_t do_validation; static int start_queue = 8; @@ -71,24 +71,107 @@ static char *ifname = "eth1"; static unsigned int ifindex; static unsigned int dmabuf_id; -void print_bytes(void *ptr, size_t size) +struct memory_buffer { + int fd; + size_t size; + + int devfd; + int memfd; + char *buf_mem; +}; + +struct memory_provider { + struct memory_buffer *(*alloc)(size_t size); + void (*free)(struct memory_buffer *ctx); + void (*memcpy_from_device)(void *dst, struct memory_buffer *src, + size_t off, int n); +}; + +static struct memory_buffer *udmabuf_alloc(size_t size) { - unsigned char *p = ptr; - int i; + struct udmabuf_create create; + struct memory_buffer *ctx; + int ret; - for (i = 0; i < size; i++) - printf("%02hhX ", p[i]); - printf("\n"); + ctx = malloc(sizeof(*ctx)); + if (!ctx) + error(1, ENOMEM, "malloc failed"); + + ctx->size = size; + + ctx->devfd = open("/dev/udmabuf", O_RDWR); + if (ctx->devfd < 0) + error(1, errno, + "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", + TEST_PREFIX); + + ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); + if (ctx->memfd < 0) + error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX); + + ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) + error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + + ret = ftruncate(ctx->memfd, size); + if (ret == -1) + error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + + memset(&create, 0, sizeof(create)); + + create.memfd = ctx->memfd; + create.offset = 0; + create.size = size; + ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); + if (ctx->fd < 0) + error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); + + ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + ctx->fd, 0); + if (ctx->buf_mem == MAP_FAILED) + error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX); + + return ctx; +} + +static void udmabuf_free(struct memory_buffer *ctx) +{ + munmap(ctx->buf_mem, ctx->size); + close(ctx->fd); + close(ctx->memfd); + close(ctx->devfd); + free(ctx); } -void print_nonzero_bytes(void *ptr, size_t size) +static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, + size_t off, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst, src->buf_mem + off, n); + + sync.flags = DMA_BUF_SYNC_END; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + +static struct memory_provider udmabuf_memory_provider = { + .alloc = udmabuf_alloc, + .free = udmabuf_free, + .memcpy_from_device = udmabuf_memcpy_from_device, +}; + +static struct memory_provider *provider = &udmabuf_memory_provider; + +static void print_nonzero_bytes(void *ptr, size_t size) { unsigned char *p = ptr; unsigned int i; for (i = 0; i < size; i++) putchar(p[i]); - printf("\n"); } void validate_buffer(void *line, size_t size) @@ -120,7 +203,7 @@ void validate_buffer(void *line, size_t size) char command[256]; \ memset(command, 0, sizeof(command)); \ snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ - printf("Running: %s\n", command); \ + fprintf(stderr, "Running: %s\n", command); \ system(command); \ }) @@ -128,22 +211,22 @@ static int reset_flow_steering(void) { int ret = 0; - ret = run_command("sudo ethtool -K %s ntuple off", ifname); + ret = run_command("sudo ethtool -K %s ntuple off >&2", ifname); if (ret) return ret; - return run_command("sudo ethtool -K %s ntuple on", ifname); + return run_command("sudo ethtool -K %s ntuple on >&2", ifname); } static int configure_headersplit(bool on) { - return run_command("sudo ethtool -G %s tcp-data-split %s", ifname, + return run_command("sudo ethtool -G %s tcp-data-split %s >&2", ifname, on ? "on" : "off"); } static int configure_rss(void) { - return run_command("sudo ethtool -X %s equal %d", ifname, start_queue); + return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue); } static int configure_channels(unsigned int rx, unsigned int tx) @@ -151,10 +234,29 @@ static int configure_channels(unsigned int rx, unsigned int tx) return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); } -static int configure_flow_steering(void) +static int configure_flow_steering(struct sockaddr_in6 *server_sin) { - return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d", - ifname, client_ip, server_ip, port, port, start_queue); + const char *type = "tcp6"; + const char *server_addr; + char buf[40]; + + inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); + server_addr = buf; + + if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) { + type = "tcp4"; + server_addr = strrchr(server_addr, ':') + 1; + } + + return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", + ifname, + type, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_addr, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); } static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, @@ -187,7 +289,7 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, goto err_close; } - printf("got dmabuf id=%d\n", rsp->id); + fprintf(stderr, "got dmabuf id=%d\n", rsp->id); dmabuf_id = rsp->id; netdev_bind_rx_req_free(req); @@ -202,66 +304,82 @@ err_close: return -1; } -static void create_udmabuf(int *devfd, int *memfd, int *buf, size_t dmabuf_size) +static void enable_reuseaddr(int fd) { - struct udmabuf_create create; + int opt = 1; int ret; - *devfd = open("/dev/udmabuf", O_RDWR); - if (*devfd < 0) { - error(70, 0, - "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); - } + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret) + error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX); - *memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); - if (*memfd < 0) - error(70, 0, "%s: [skip,no-memfd]\n", TEST_PREFIX); + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + if (ret) + error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX); +} - /* Required for udmabuf */ - ret = fcntl(*memfd, F_ADD_SEALS, F_SEAL_SHRINK); - if (ret < 0) - error(73, 0, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; - ret = ftruncate(*memfd, dmabuf_size); - if (ret == -1) - error(74, 0, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } - memset(&create, 0, sizeof(create)); + return 0; +} - create.memfd = *memfd; - create.offset = 0; - create.size = dmabuf_size; - *buf = ioctl(*devfd, UDMABUF_CREATE, &create); - if (*buf < 0) - error(75, 0, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); +static struct netdev_queue_id *create_queues(void) +{ + struct netdev_queue_id *queues; + size_t i = 0; + + queues = calloc(num_queues, sizeof(*queues)); + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + return queues; } -int do_server(void) +int do_server(struct memory_buffer *mem) { char ctrl_data[sizeof(int) * 20000]; struct netdev_queue_id *queues; size_t non_page_aligned_frags = 0; - struct sockaddr_in client_addr; - struct sockaddr_in server_sin; + struct sockaddr_in6 client_addr; + struct sockaddr_in6 server_sin; size_t page_aligned_frags = 0; - int devfd, memfd, buf, ret; size_t total_received = 0; socklen_t client_addr_len; bool is_devmem = false; - char *buf_mem = NULL; + char *tmp_mem = NULL; struct ynl_sock *ys; - size_t dmabuf_size; char iobuf[819200]; char buffer[256]; int socket_fd; int client_fd; - size_t i = 0; - int opt = 1; - - dmabuf_size = getpagesize() * NUM_PAGES; + int ret; - create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) + error(1, 0, "parse server address"); if (reset_flow_steering()) error(1, 0, "Failed to reset flow steering\n"); @@ -271,92 +389,65 @@ int do_server(void) error(1, 0, "Failed to configure rss\n"); /* Flow steer our devmem flows to start_queue */ - if (configure_flow_steering()) + if (configure_flow_steering(&server_sin)) error(1, 0, "Failed to configure flow steering\n"); sleep(1); - queues = malloc(sizeof(*queues) * num_queues); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); - buf_mem = mmap(NULL, dmabuf_size, PROT_READ | PROT_WRITE, MAP_SHARED, - buf, 0); - if (buf_mem == MAP_FAILED) - error(1, 0, "mmap()"); - - server_sin.sin_family = AF_INET; - server_sin.sin_port = htons(atoi(port)); + tmp_mem = malloc(mem->size); + if (!tmp_mem) + error(1, ENOMEM, "malloc failed"); - ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr); - if (socket < 0) - error(79, 0, "%s: [FAIL, create socket]\n", TEST_PREFIX); + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (socket_fd < 0) + error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); - socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0); - if (socket < 0) - error(errno, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + enable_reuseaddr(socket_fd); - ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt, - sizeof(opt)); - if (ret) - error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); - - ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, - sizeof(opt)); - if (ret) - error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); - - printf("binding to address %s:%d\n", server_ip, - ntohs(server_sin.sin_port)); + fprintf(stderr, "binding to address %s:%d\n", server_ip, + ntohs(server_sin.sin6_port)); ret = bind(socket_fd, &server_sin, sizeof(server_sin)); if (ret) - error(errno, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); ret = listen(socket_fd, 1); if (ret) - error(errno, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); client_addr_len = sizeof(client_addr); - inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer, + inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer, sizeof(buffer)); - printf("Waiting or connection on %s:%d\n", buffer, - ntohs(server_sin.sin_port)); + fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, + ntohs(server_sin.sin6_port)); client_fd = accept(socket_fd, &client_addr, &client_addr_len); - inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer, + inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, sizeof(buffer)); - printf("Got connection from %s:%d\n", buffer, - ntohs(client_addr.sin_port)); + fprintf(stderr, "Got connection from %s:%d\n", buffer, + ntohs(client_addr.sin6_port)); while (1) { struct iovec iov = { .iov_base = iobuf, .iov_len = sizeof(iobuf) }; struct dmabuf_cmsg *dmabuf_cmsg = NULL; - struct dma_buf_sync sync = { 0 }; struct cmsghdr *cm = NULL; struct msghdr msg = { 0 }; struct dmabuf_token token; ssize_t ret; is_devmem = false; - printf("\n\n"); msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = ctrl_data; msg.msg_controllen = sizeof(ctrl_data); ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); - printf("recvmsg ret=%ld\n", ret); + fprintf(stderr, "recvmsg ret=%ld\n", ret); if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) continue; if (ret < 0) { @@ -364,16 +455,15 @@ int do_server(void) continue; } if (ret == 0) { - printf("client exited\n"); + fprintf(stderr, "client exited\n"); goto cleanup; } - i++; for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_SOCKET || (cm->cmsg_type != SCM_DEVMEM_DMABUF && cm->cmsg_type != SCM_DEVMEM_LINEAR)) { - fprintf(stdout, "skipping non-devmem cmsg\n"); + fprintf(stderr, "skipping non-devmem cmsg\n"); continue; } @@ -384,7 +474,7 @@ int do_server(void) /* TODO: process data copied from skb's linear * buffer. */ - fprintf(stdout, + fprintf(stderr, "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", dmabuf_cmsg->frag_size); @@ -395,12 +485,13 @@ int do_server(void) token.token_count = 1; total_received += dmabuf_cmsg->frag_size; - printf("received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", - dmabuf_cmsg->frag_offset >> PAGE_SHIFT, - dmabuf_cmsg->frag_offset % getpagesize(), - dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size, - dmabuf_cmsg->frag_token, total_received, - dmabuf_cmsg->dmabuf_id); + fprintf(stderr, + "received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", + dmabuf_cmsg->frag_offset >> PAGE_SHIFT, + dmabuf_cmsg->frag_offset % getpagesize(), + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, + total_received, dmabuf_cmsg->dmabuf_id); if (dmabuf_cmsg->dmabuf_id != dmabuf_id) error(1, 0, @@ -411,22 +502,16 @@ int do_server(void) else page_aligned_frags++; - sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START; - ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); + provider->memcpy_from_device(tmp_mem, mem, + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size); if (do_validation) - validate_buffer( - ((unsigned char *)buf_mem) + - dmabuf_cmsg->frag_offset, - dmabuf_cmsg->frag_size); + validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size); else - print_nonzero_bytes( - ((unsigned char *)buf_mem) + - dmabuf_cmsg->frag_offset, - dmabuf_cmsg->frag_size); - - sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END; - ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); + print_nonzero_bytes(tmp_mem, + dmabuf_cmsg->frag_size); ret = setsockopt(client_fd, SOL_SOCKET, SO_DEVMEM_DONTNEED, &token, @@ -438,25 +523,22 @@ int do_server(void) if (!is_devmem) error(1, 0, "flow steering error\n"); - printf("total_received=%lu\n", total_received); + fprintf(stderr, "total_received=%lu\n", total_received); } - fprintf(stdout, "%s: ok\n", TEST_PREFIX); + fprintf(stderr, "%s: ok\n", TEST_PREFIX); - fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", page_aligned_frags, non_page_aligned_frags); - fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", page_aligned_frags, non_page_aligned_frags); cleanup: - munmap(buf_mem, dmabuf_size); + free(tmp_mem); close(client_fd); close(socket_fd); - close(buf); - close(memfd); - close(devfd); ynl_sock_destroy(ys); return 0; @@ -464,52 +546,33 @@ cleanup: void run_devmem_tests(void) { - struct netdev_queue_id *queues; - int devfd, memfd, buf; + struct memory_buffer *mem; struct ynl_sock *ys; - size_t dmabuf_size; - size_t i = 0; - dmabuf_size = getpagesize() * NUM_PAGES; - - create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); + mem = provider->alloc(getpagesize() * NUM_PAGES); /* Configure RSS to divert all traffic from our devmem queues */ if (configure_rss()) error(1, 0, "rss error\n"); - queues = calloc(num_queues, sizeof(*queues)); - if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, + calloc(num_queues, sizeof(struct netdev_queue_id)), + num_queues, &ys)) error(1, 0, "Binding empty queues array should have failed\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - if (configure_headersplit(0)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Configure dmabuf with header split off should have failed\n"); if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); /* Deactivating a bound queue should not be legal */ @@ -518,11 +581,15 @@ void run_devmem_tests(void) /* Closing the netlink socket does an implicit unbind */ ynl_sock_destroy(ys); + + provider->free(mem); } int main(int argc, char *argv[]) { + struct memory_buffer *mem; int is_server = 0, opt; + int ret; while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { switch (opt) { @@ -551,7 +618,7 @@ int main(int argc, char *argv[]) ifname = optarg; break; case '?': - printf("unknown option: %c\n", optopt); + fprintf(stderr, "unknown option: %c\n", optopt); break; } } @@ -559,12 +626,13 @@ int main(int argc, char *argv[]) ifindex = if_nametoindex(ifname); for (; optind < argc; optind++) - printf("extra arguments: %s\n", argv[optind]); + fprintf(stderr, "extra arguments: %s\n", argv[optind]); run_devmem_tests(); - if (is_server) - return do_server(); + mem = provider->alloc(getpagesize() * NUM_PAGES); + ret = is_server ? do_server(mem) : 1; + provider->free(mem); - return 0; + return ret; } diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index c28379a965d8..1559ba275105 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -13,6 +13,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh index 6a764d70ab06..4788641717d9 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -4,6 +4,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg exit 1 fi diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 43d8b500d391..8cc6036f97dc 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -91,4 +91,4 @@ CONFIG_XFRM_STATISTICS=y CONFIG_NET_PKTGEN=m CONFIG_TUN=m CONFIG_INET_DIAG=m -CONFIG_SCTP_DIAG=m +CONFIG_INET_SCTP_DIAG=m diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index a9d109fcc15c..00fe1a6c1f30 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -593,6 +593,7 @@ EOF echo "PASS: queue program exiting while packets queued" else echo "TAINT: queue program exiting while packets queued" + dmesg ret=1 fi } diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile index 2f1508abc826..3fd1da2ec07d 100644 --- a/tools/testing/selftests/net/openvswitch/Makefile +++ b/tools/testing/selftests/net/openvswitch/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := openvswitch.sh diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 6c651c880fe8..66be7699c72c 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -197,6 +197,12 @@ # # - pmtu_ipv6_route_change # Same as above but with IPv6 +# +# - pmtu_ipv4_mp_exceptions +# Use the same topology as in pmtu_ipv4, but add routeable addresses +# on host A and B on lo reachable via both routers. Host A and B +# addresses have multipath routes to each other, b_r1 mtu = 1500. +# Check that PMTU exceptions are created for both paths. source lib.sh source net_helper.sh @@ -266,7 +272,8 @@ tests=" list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 - pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1 + pmtu_ipv4_mp_exceptions ipv4: PMTU multipath nh exceptions 1" # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an @@ -343,6 +350,9 @@ tunnel6_a_addr="fd00:2::a" tunnel6_b_addr="fd00:2::b" tunnel6_mask="64" +host4_a_addr="192.168.99.99" +host4_b_addr="192.168.88.88" + dummy6_0_prefix="fc00:1000::" dummy6_1_prefix="fc00:1001::" dummy6_mask="64" @@ -984,6 +994,52 @@ setup_ovs_bridge() { run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2 } +setup_multipath_new() { + # Set up host A with multipath routes to host B host4_b_addr + run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo + run_cmd ${ns_a} ip nexthop add id 401 via ${prefix4}.${a_r1}.2 dev veth_A-R1 + run_cmd ${ns_a} ip nexthop add id 402 via ${prefix4}.${a_r2}.2 dev veth_A-R2 + run_cmd ${ns_a} ip nexthop add id 403 group 401/402 + run_cmd ${ns_a} ip route add ${host4_b_addr} src ${host4_a_addr} nhid 403 + + # Set up host B with multipath routes to host A host4_a_addr + run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo + run_cmd ${ns_b} ip nexthop add id 401 via ${prefix4}.${b_r1}.2 dev veth_B-R1 + run_cmd ${ns_b} ip nexthop add id 402 via ${prefix4}.${b_r2}.2 dev veth_B-R2 + run_cmd ${ns_b} ip nexthop add id 403 group 401/402 + run_cmd ${ns_b} ip route add ${host4_a_addr} src ${host4_b_addr} nhid 403 +} + +setup_multipath_old() { + # Set up host A with multipath routes to host B host4_b_addr + run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo + run_cmd ${ns_a} ip route add ${host4_b_addr} \ + src ${host4_a_addr} \ + nexthop via ${prefix4}.${a_r1}.2 weight 1 \ + nexthop via ${prefix4}.${a_r2}.2 weight 1 + + # Set up host B with multipath routes to host A host4_a_addr + run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo + run_cmd ${ns_b} ip route add ${host4_a_addr} \ + src ${host4_b_addr} \ + nexthop via ${prefix4}.${b_r1}.2 weight 1 \ + nexthop via ${prefix4}.${b_r2}.2 weight 1 +} + +setup_multipath() { + if [ "$USE_NH" = "yes" ]; then + setup_multipath_new + else + setup_multipath_old + fi + + # Set up routers with routes to dummies + run_cmd ${ns_r1} ip route add ${host4_a_addr} via ${prefix4}.${a_r1}.1 + run_cmd ${ns_r2} ip route add ${host4_a_addr} via ${prefix4}.${a_r2}.1 + run_cmd ${ns_r1} ip route add ${host4_b_addr} via ${prefix4}.${b_r1}.1 + run_cmd ${ns_r2} ip route add ${host4_b_addr} via ${prefix4}.${b_r2}.1 +} + setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip @@ -1076,23 +1132,15 @@ link_get_mtu() { } route_get_dst_exception() { - ns_cmd="${1}" - dst="${2}" - dsfield="${3}" + ns_cmd="${1}"; shift - if [ -z "${dsfield}" ]; then - dsfield=0 - fi - - ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}" + ${ns_cmd} ip route get "$@" } route_get_dst_pmtu_from_exception() { - ns_cmd="${1}" - dst="${2}" - dsfield="${3}" + ns_cmd="${1}"; shift - mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")" + mtu_parse "$(route_get_dst_exception "${ns_cmd}" "$@")" } check_pmtu_value() { @@ -1235,10 +1283,10 @@ test_pmtu_ipv4_dscp_icmp_exception() { run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}" # Check that exceptions have been created with the correct PMTU - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")" check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")" check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 } @@ -1285,9 +1333,9 @@ test_pmtu_ipv4_dscp_udp_exception() { UDP:"${dst2}":50000,tos="${dsfield}" # Check that exceptions have been created with the correct PMTU - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")" check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")" check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 } @@ -2329,6 +2377,36 @@ test_pmtu_ipv6_route_change() { test_pmtu_ipvX_route_change 6 } +test_pmtu_ipv4_mp_exceptions() { + setup namespaces routing multipath || return $ksft_skip + + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1500 + mtu "${ns_b}" veth_B-R1 1500 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Ping and expect two nexthop exceptions for two routes + run_cmd ${ns_a} ping -q -M want -i 0.1 -c 1 -s 1800 "${host4_b_addr}" + + # Check that exceptions have been created with the correct PMTU + pmtu_a_R1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R1)" + pmtu_a_R2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R2)" + + check_pmtu_value "1500" "${pmtu_a_R1}" "exceeding MTU (veth_A-R1)" || return 1 + check_pmtu_value "1500" "${pmtu_a_R2}" "exceeding MTU (veth_A-R2)" || return 1 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index bdf6f10d0558..8a92432177d3 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -738,6 +738,11 @@ kci_test_ipsec_offload() sysfsf=$sysfsd/ipsec sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false + esp4_offload_probed_default=false + + if lsmod | grep -q esp4_offload; then + esp4_offload_probed_default=true + fi if ! mount | grep -q debugfs; then mount -t debugfs none /sys/kernel/debug/ &> /dev/null @@ -809,10 +814,10 @@ kci_test_ipsec_offload() # does driver have correct offload info run_cmd diff $sysfsf - << EOF SA count=2 tx=3 -sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000 +sa[0] tx ipaddr=$dstip sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[0] key=0x34333231 38373635 32313039 36353433 -sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0 +sa[1] rx ipaddr=$srcip sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[1] key=0x34333231 38373635 32313039 36353433 EOF @@ -831,6 +836,7 @@ EOF fi # clean up any leftovers + ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload echo 0 > /sys/bus/netdevsim/del_device $probed && rmmod netdevsim diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index d5ffd8c9172e..799dbc2b4b01 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -48,7 +48,7 @@ run_one() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} & local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp @@ -95,7 +95,7 @@ run_one_nat() { # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp @@ -117,9 +117,9 @@ run_one_2sock() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3f2fca02fec5..36ff28af4b19 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -103,6 +103,19 @@ struct testcase testcases_v4[] = { .r_num_mss = 1, }, { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V4, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4 + 2, + .tfail = true, + }, + { /* send a single MSS + 1B */ .tlen = CONST_MSS_V4 + 1, .gso_len = CONST_MSS_V4, @@ -206,6 +219,19 @@ struct testcase testcases_v6[] = { .r_num_mss = 1, }, { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V6, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6 + 2, + .tfail = true + }, + { /* send a single MSS + 1B */ .tlen = CONST_MSS_V6 + 1, .gso_len = CONST_MSS_V6, diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore index ee93dc4969b8..4931b3b6bbd3 100644 --- a/tools/testing/selftests/perf_events/.gitignore +++ b/tools/testing/selftests/perf_events/.gitignore @@ -2,3 +2,4 @@ sigtrap_threads remove_on_exec watermark_signal +mmap diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile index 70e3ff211278..2e5d85770dfe 100644 --- a/tools/testing/selftests/perf_events/Makefile +++ b/tools/testing/selftests/perf_events/Makefile @@ -2,5 +2,5 @@ CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) LDFLAGS += -lpthread -TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal +TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal mmap include ../lib.mk diff --git a/tools/testing/selftests/perf_events/mmap.c b/tools/testing/selftests/perf_events/mmap.c new file mode 100644 index 000000000000..ea0427aac1f9 --- /dev/null +++ b/tools/testing/selftests/perf_events/mmap.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE + +#include <dirent.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <unistd.h> + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/types.h> + +#include <linux/perf_event.h> + +#include "../kselftest_harness.h" + +#define RB_SIZE 0x3000 +#define AUX_SIZE 0x10000 +#define AUX_OFFS 0x4000 + +#define HOLE_SIZE 0x1000 + +/* Reserve space for rb, aux with space for shrink-beyond-vma testing. */ +#define REGION_SIZE (2 * RB_SIZE + 2 * AUX_SIZE) +#define REGION_AUX_OFFS (2 * RB_SIZE) + +#define MAP_BASE 1 +#define MAP_AUX 2 + +#define EVENT_SRC_DIR "/sys/bus/event_source/devices" + +FIXTURE(perf_mmap) +{ + int fd; + void *ptr; + void *region; +}; + +FIXTURE_VARIANT(perf_mmap) +{ + bool aux; + unsigned long ptr_size; +}; + +FIXTURE_VARIANT_ADD(perf_mmap, rb) +{ + .aux = false, + .ptr_size = RB_SIZE, +}; + +FIXTURE_VARIANT_ADD(perf_mmap, aux) +{ + .aux = true, + .ptr_size = AUX_SIZE, +}; + +static bool read_event_type(struct dirent *dent, __u32 *type) +{ + char typefn[512]; + FILE *fp; + int res; + + snprintf(typefn, sizeof(typefn), "%s/%s/type", EVENT_SRC_DIR, dent->d_name); + fp = fopen(typefn, "r"); + if (!fp) + return false; + + res = fscanf(fp, "%u", type); + fclose(fp); + return res > 0; +} + +FIXTURE_SETUP(perf_mmap) +{ + struct perf_event_attr attr = { + .size = sizeof(attr), + .disabled = 1, + .exclude_kernel = 1, + .exclude_hv = 1, + }; + struct perf_event_attr attr_ok = {}; + unsigned int eacces = 0, map = 0; + struct perf_event_mmap_page *rb; + struct dirent *dent; + void *aux, *region; + DIR *dir; + + self->ptr = NULL; + + dir = opendir(EVENT_SRC_DIR); + if (!dir) + SKIP(return, "perf not available."); + + region = mmap(NULL, REGION_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(region, MAP_FAILED); + self->region = region; + + // Try to find a suitable event on this system + while ((dent = readdir(dir))) { + int fd; + + if (!read_event_type(dent, &attr.type)) + continue; + + fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); + if (fd < 0) { + if (errno == EACCES) + eacces++; + continue; + } + + // Check whether the event supports mmap() + rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0); + if (rb == MAP_FAILED) { + close(fd); + continue; + } + + if (!map) { + // Save the event in case that no AUX capable event is found + attr_ok = attr; + map = MAP_BASE; + } + + if (!variant->aux) + continue; + + rb->aux_offset = AUX_OFFS; + rb->aux_size = AUX_SIZE; + + // Check whether it supports a AUX buffer + aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, fd, AUX_OFFS); + if (aux == MAP_FAILED) { + munmap(rb, RB_SIZE); + close(fd); + continue; + } + + attr_ok = attr; + map = MAP_AUX; + munmap(aux, AUX_SIZE); + munmap(rb, RB_SIZE); + close(fd); + break; + } + closedir(dir); + + if (!map) { + if (!eacces) + SKIP(return, "No mappable perf event found."); + else + SKIP(return, "No permissions for perf_event_open()"); + } + + self->fd = syscall(SYS_perf_event_open, &attr_ok, 0, -1, -1, 0); + ASSERT_NE(self->fd, -1); + + rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, self->fd, 0); + ASSERT_NE(rb, MAP_FAILED); + + if (!variant->aux) { + self->ptr = rb; + return; + } + + if (map != MAP_AUX) + SKIP(return, "No AUX event found."); + + rb->aux_offset = AUX_OFFS; + rb->aux_size = AUX_SIZE; + aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, self->fd, AUX_OFFS); + ASSERT_NE(aux, MAP_FAILED); + self->ptr = aux; +} + +FIXTURE_TEARDOWN(perf_mmap) +{ + ASSERT_EQ(munmap(self->region, REGION_SIZE), 0); + if (self->fd != -1) + ASSERT_EQ(close(self->fd), 0); +} + +TEST_F(perf_mmap, remap) +{ + void *tmp, *ptr = self->ptr; + unsigned long size = variant->ptr_size; + + // Test the invalid remaps + ASSERT_EQ(mremap(ptr, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + ASSERT_EQ(mremap(ptr + HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + ASSERT_EQ(mremap(ptr + size - HOLE_SIZE, HOLE_SIZE, size, MREMAP_MAYMOVE), MAP_FAILED); + // Shrink the end of the mapping such that we only unmap past end of the VMA, + // which should succeed and poke a hole into the PROT_NONE region + ASSERT_NE(mremap(ptr + size - HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED); + + // Remap the whole buffer to a new address + tmp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(tmp, MAP_FAILED); + + // Try splitting offset 1 hole size into VMA, this should fail + ASSERT_EQ(mremap(ptr + HOLE_SIZE, size - HOLE_SIZE, size - HOLE_SIZE, + MREMAP_MAYMOVE | MREMAP_FIXED, tmp), MAP_FAILED); + // Remapping the whole thing should succeed fine + ptr = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tmp); + ASSERT_EQ(ptr, tmp); + ASSERT_EQ(munmap(tmp, size), 0); +} + +TEST_F(perf_mmap, unmap) +{ + unsigned long size = variant->ptr_size; + + // Try to poke holes into the mappings + ASSERT_NE(munmap(self->ptr, HOLE_SIZE), 0); + ASSERT_NE(munmap(self->ptr + HOLE_SIZE, HOLE_SIZE), 0); + ASSERT_NE(munmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE), 0); +} + +TEST_F(perf_mmap, map) +{ + unsigned long size = variant->ptr_size; + + // Try to poke holes into the mappings by mapping anonymous memory over it + ASSERT_EQ(mmap(self->ptr, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); + ASSERT_EQ(mmap(self->ptr + HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); + ASSERT_EQ(mmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c index 580fcac0a09f..b71ef8a493ed 100644 --- a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c +++ b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c @@ -20,7 +20,7 @@ static int test_gettimeofday(void) gettimeofday(&tv_end, NULL); } - timersub(&tv_start, &tv_end, &tv_diff); + timersub(&tv_end, &tv_start, &tv_diff); printf("time = %.6f\n", tv_diff.tv_sec + (tv_diff.tv_usec) * 1e-6); diff --git a/tools/testing/selftests/rseq/rseq-riscv-bits.h b/tools/testing/selftests/rseq/rseq-riscv-bits.h index de31a0143139..f02f411d550d 100644 --- a/tools/testing/selftests/rseq/rseq-riscv-bits.h +++ b/tools/testing/selftests/rseq/rseq-riscv-bits.h @@ -243,7 +243,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") #endif - RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3) + RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, 3) RSEQ_INJECT_ASM(4) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ @@ -251,8 +251,8 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [ptr] "r" (ptr), - [off] "er" (off), - [inc] "er" (inc) + [off] "r" (off), + [inc] "r" (inc) RSEQ_INJECT_INPUT : "memory", RSEQ_ASM_TMP_REG_1 RSEQ_INJECT_CLOBBER diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h index 37e598d0a365..67d544aaa9a3 100644 --- a/tools/testing/selftests/rseq/rseq-riscv.h +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -158,7 +158,7 @@ do { \ "bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \ "333:\n" -#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \ +#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, post_commit_label) \ "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \ RSEQ_ASM_OP_R_ADD(off) \ REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \ diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 5b9772cdf265..f6156790c3b4 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -61,7 +61,6 @@ unsigned int rseq_size = -1U; unsigned int rseq_flags; static int rseq_ownership; -static int rseq_reg_success; /* At least one rseq registration has succeded. */ /* Allocate a large area for the TLS. */ #define RSEQ_THREAD_AREA_ALLOC_SIZE 1024 @@ -152,14 +151,27 @@ int rseq_register_current_thread(void) } rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { - if (RSEQ_READ_ONCE(rseq_reg_success)) { + /* + * After at least one thread has registered successfully + * (rseq_size > 0), the registration of other threads should + * never fail. + */ + if (RSEQ_READ_ONCE(rseq_size) > 0) { /* Incoherent success/failure within process. */ abort(); } return -1; } assert(rseq_current_cpu_raw() >= 0); - RSEQ_WRITE_ONCE(rseq_reg_success, 1); + + /* + * The first thread to register sets the rseq_size to mimic the libc + * behavior. + */ + if (RSEQ_READ_ONCE(rseq_size) == 0) { + RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size()); + } + return 0; } @@ -235,12 +247,18 @@ void rseq_init(void) return; } rseq_ownership = 1; - if (!rseq_available()) { - rseq_size = 0; - return; - } + + /* Calculate the offset of the rseq area from the thread pointer. */ rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); + + /* rseq flags are deprecated, always set to 0. */ rseq_flags = 0; + + /* + * Set the size to 0 until at least one thread registers to mimic the + * libc behavior. + */ + rseq_size = 0; } static __attribute__((destructor)) diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 4e217b620e0c..062d10925a10 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -60,7 +60,14 @@ extern ptrdiff_t rseq_offset; /* - * Size of the registered rseq area. 0 if the registration was + * The rseq ABI is composed of extensible feature fields. The extensions + * are done by appending additional fields at the end of the structure. + * The rseq_size defines the size of the active feature set which can be + * used by the application for the current rseq registration. Features + * starting at offset >= rseq_size are inactive and should not be used. + * + * The rseq_size is the intersection between the available allocation + * size for the rseq area and the feature size supported by the kernel. * unsuccessful. */ extern unsigned int rseq_size; diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index 6a7db1502c29..6dcf7e6104af 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -43,7 +43,10 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) if (!p) return; - target = bpf_get_prandom_u32() % nr_cpus; + if (p->nr_cpus_allowed == nr_cpus && !is_migration_disabled(p)) + target = bpf_get_prandom_u32() % nr_cpus; + else + target = scx_bpf_task_cpu(p); scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0); bpf_task_release(p); diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c index 472851b56854..0ff27e57fe43 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.c @@ -34,9 +34,10 @@ static enum scx_test_status run(void *ctx) /* Just sleeping is fine, plenty of scheduling events happening */ sleep(1); - SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR)); bpf_link__destroy(link); + SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_UNREG)); + return SCX_TEST_PASS; } @@ -50,7 +51,7 @@ static void cleanup(void *ctx) struct scx_test dsp_local_on = { .name = "dsp_local_on", .description = "Verify we can directly dispatch tasks to a local DSQs " - "from osp.dispatch()", + "from ops.dispatch()", .setup = setup, .run = run, .cleanup = cleanup, diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c index 31bcd06e21cd..2c084ded2968 100644 --- a/tools/testing/selftests/sched_ext/exit.c +++ b/tools/testing/selftests/sched_ext/exit.c @@ -22,6 +22,14 @@ static enum scx_test_status run(void *ctx) struct bpf_link *link; char buf[16]; + /* + * On single-CPU systems, ops.select_cpu() is never + * invoked, so skip this test to avoid getting stuck + * indefinitely. + */ + if (tc == EXIT_SELECT_CPU && libbpf_num_possible_cpus() == 1) + continue; + skel = exit__open(); skel->rodata->exit_point = tc; exit__load(skel); diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 4d4cd8d966db..361797e10ed5 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -12,6 +12,8 @@ char _license[] SEC("license") = "GPL"; +#define DSQ_ID 0 + s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { @@ -20,7 +22,7 @@ s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu, void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags) { - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) @@ -28,7 +30,7 @@ void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags) void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev) { - scx_bpf_consume(SCX_DSQ_GLOBAL); + scx_bpf_consume(DSQ_ID); } void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags) @@ -123,7 +125,7 @@ void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight) s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init) { - return 0; + return scx_bpf_create_dsq(DSQ_ID, -1); } void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 8c3a73461475..60c84d935a2b 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1618,14 +1618,8 @@ void teardown_trace_fixture(struct __test_metadata *_metadata, { if (tracer) { int status; - /* - * Extract the exit code from the other process and - * adopt it for ourselves in case its asserts failed. - */ ASSERT_EQ(0, kill(tracer, SIGUSR1)); ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); - if (WEXITSTATUS(status)) - _metadata->exit_code = KSFT_FAIL; } } @@ -3155,12 +3149,15 @@ TEST(syscall_restart) ret = get_syscall(_metadata, child_pid); #if defined(__arm__) /* - * FIXME: * - native ARM registers do NOT expose true syscall. * - compat ARM registers on ARM64 DO expose true syscall. + * - values of utsbuf.machine include 'armv8l' or 'armb8b' + * for ARM64 running in compat mode. */ ASSERT_EQ(0, uname(&utsbuf)); - if (strncmp(utsbuf.machine, "arm", 3) == 0) { + if ((strncmp(utsbuf.machine, "arm", 3) == 0) && + (strncmp(utsbuf.machine, "armv8l", 6) != 0) && + (strncmp(utsbuf.machine, "armv8b", 6) != 0)) { EXPECT_EQ(__NR_nanosleep, ret); } else #endif diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c index d975a6767329..48cf01aeec3e 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -79,6 +79,21 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) } } +static void prctl_valid(struct __test_metadata *_metadata, + unsigned long op, unsigned long off, + unsigned long size, void *sel) +{ + EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel)); +} + +static void prctl_invalid(struct __test_metadata *_metadata, + unsigned long op, unsigned long off, + unsigned long size, void *sel, int err) +{ + EXPECT_EQ(-1, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel)); + EXPECT_EQ(err, errno); +} + TEST(bad_prctl_param) { char sel = SYSCALL_DISPATCH_FILTER_ALLOW; @@ -86,57 +101,42 @@ TEST(bad_prctl_param) /* Invalid op */ op = -1; - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0, 0, &sel); - ASSERT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0, 0, &sel, EINVAL); /* PR_SYS_DISPATCH_OFF */ op = PR_SYS_DISPATCH_OFF; /* offset != 0 */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, 0); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x1, 0x0, 0, EINVAL); /* len != 0 */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x0, 0xff, 0, EINVAL); /* sel != NULL */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x0, 0x0, &sel, EINVAL); /* Valid parameter */ - errno = 0; - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, 0x0); - EXPECT_EQ(0, errno); + prctl_valid(_metadata, op, 0x0, 0x0, 0x0); /* PR_SYS_DISPATCH_ON */ op = PR_SYS_DISPATCH_ON; /* Dispatcher region is bad (offset > 0 && len == 0) */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, &sel); - EXPECT_EQ(EINVAL, errno); - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, -1L, 0x0, &sel); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x1, 0x0, &sel, EINVAL); + prctl_invalid(_metadata, op, -1L, 0x0, &sel, EINVAL); /* Invalid selector */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x1, (void *) -1); - ASSERT_EQ(EFAULT, errno); + prctl_invalid(_metadata, op, 0x0, 0x1, (void *) -1, EFAULT); /* * Dispatcher range overflows unsigned long */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 1, -1L, &sel); - ASSERT_EQ(EINVAL, errno) { - TH_LOG("Should reject bad syscall range"); - } + prctl_invalid(_metadata, PR_SYS_DISPATCH_ON, 1, -1L, &sel, EINVAL); /* * Allowed range overflows usigned long */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel); - ASSERT_EQ(EINVAL, errno) { - TH_LOG("Should reject bad syscall range"); - } + prctl_invalid(_metadata, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel, EINVAL); } /* diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json index 58189327f644..383fbda07245 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json @@ -78,10 +78,10 @@ "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0xff", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0x1f", "expExitCode": "0", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow", - "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 255 baseclass", + "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 31 baseclass", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DEV1 ingress" diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index c5264594064c..83faa4e354e3 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -156,8 +156,8 @@ int main(int argc, char **argv) /* Check everything is sane before we start switching asynchronously */ if (do_sanity_check) { for (i = 0; i < count; i++) { - printf("Validating clocksource %s\n", - clocksource_list[i]); + ksft_print_msg("Validating clocksource %s\n", + clocksource_list[i]); if (change_clocksource(clocksource_list[i])) { status = -1; goto out; @@ -169,7 +169,7 @@ int main(int argc, char **argv) } } - printf("Running Asynchronous Switching Tests...\n"); + ksft_print_msg("Running Asynchronous Switching Tests...\n"); pid = fork(); if (!pid) return run_tests(runtime); diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh new file mode 100755 index 000000000000..1f2b642381d1 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_04.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_04" +ERR_CODE=0 + +_prep_test "stripe" "mkfs & mount & umount on zero copy" + +backfile_0=$(_create_backfile 256M) +backfile_1=$(_create_backfile 256M) +dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1") +_check_add_dev $TID $? "$backfile_0" "$backfile_1" + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "stripe" + +_remove_backfile "$backfile_0" +_remove_backfile "$backfile_1" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index 8757f738b0b1..0aad682b12c8 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -76,7 +76,8 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, u void __weak __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks) { - ksft_exit_skip("Not implemented on architecture\n"); + ksft_test_result_skip("Not implemented on architecture\n"); + ksft_finished(); } int main(int argc, char *argv[]) diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index 95057f7567db..ff8d5675da2b 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -242,6 +242,7 @@ static void kselftest(void) pid_t child; ksft_print_header(); + vgetrandom_init(); ksft_set_plan(2); for (size_t i = 0; i < 1000; ++i) { @@ -295,8 +296,6 @@ static void usage(const char *argv0) int main(int argc, char *argv[]) { - vgetrandom_init(); - if (argc == 1) { kselftest(); return 0; @@ -306,6 +305,9 @@ int main(int argc, char *argv[]) usage(argv[0]); return 1; } + + vgetrandom_init(); + if (!strcmp(argv[1], "bench-single")) bench_single(); else if (!strcmp(argv[1], "bench-multi")) diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d51249f14e2f..5656e58a5380 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ - test_vsyscall mov_ss_trap \ + test_vsyscall mov_ss_trap sigtrap_loop \ syscall_arg_fault fsgsbase_restore sigaltstack TARGETS_C_BOTHBITS += nx_stack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ diff --git a/tools/testing/selftests/x86/bugs/Makefile b/tools/testing/selftests/x86/bugs/Makefile new file mode 100644 index 000000000000..8ff2d7226c7f --- /dev/null +++ b/tools/testing/selftests/x86/bugs/Makefile @@ -0,0 +1,3 @@ +TEST_PROGS := its_sysfs.py its_permutations.py its_indirect_alignment.py its_ret_alignment.py +TEST_FILES := common.py +include ../../lib.mk diff --git a/tools/testing/selftests/x86/bugs/common.py b/tools/testing/selftests/x86/bugs/common.py new file mode 100644 index 000000000000..2f9664a80617 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/common.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# This contains kselftest framework adapted common functions for testing +# mitigation for x86 bugs. + +import os, sys, re, shutil + +sys.path.insert(0, '../../kselftest') +import ksft + +def read_file(path): + if not os.path.exists(path): + return None + with open(path, 'r') as file: + return file.read().strip() + +def cpuinfo_has(arg): + cpuinfo = read_file('/proc/cpuinfo') + if arg in cpuinfo: + return True + return False + +def cmdline_has(arg): + cmdline = read_file('/proc/cmdline') + if arg in cmdline: + return True + return False + +def cmdline_has_either(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg in cmdline: + return True + return False + +def cmdline_has_none(args): + return not cmdline_has_either(args) + +def cmdline_has_all(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg not in cmdline: + return False + return True + +def get_sysfs(bug): + return read_file("/sys/devices/system/cpu/vulnerabilities/" + bug) + +def sysfs_has(bug, mitigation): + status = get_sysfs(bug) + if mitigation in status: + return True + return False + +def sysfs_has_either(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if sysfs_has(bug, mitigation): + return True + return False + +def sysfs_has_none(bugs, mitigations): + return not sysfs_has_either(bugs, mitigations) + +def sysfs_has_all(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if not sysfs_has(bug, mitigation): + return False + return True + +def bug_check_pass(bug, found): + ksft.print_msg(f"\nFound: {found}") + # ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_pass(f'{bug}: {found}') + +def bug_check_fail(bug, found, expected): + ksft.print_msg(f'\nFound:\t {found}') + ksft.print_msg(f'Expected:\t {expected}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def bug_status_unknown(bug, found): + ksft.print_msg(f'\nUnknown status: {found}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def basic_checks_sufficient(bug, mitigation): + if not mitigation: + bug_status_unknown(bug, "None") + return True + elif mitigation == "Not affected": + ksft.test_result_pass(bug) + return True + elif mitigation == "Vulnerable": + if cmdline_has_either([f'{bug}=off', 'mitigations=off']): + bug_check_pass(bug, mitigation) + return True + return False + +def get_section_info(vmlinux, section_name): + from elftools.elf.elffile import ELFFile + with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + section = elffile.get_section_by_name(section_name) + if section is None: + ksft.print_msg("Available sections in vmlinux:") + for sec in elffile.iter_sections(): + ksft.print_msg(sec.name) + raise ValueError(f"Section {section_name} not found in {vmlinux}") + return section['sh_addr'], section['sh_offset'], section['sh_size'] + +def get_patch_sites(vmlinux, offset, size): + import struct + output = [] + with open(vmlinux, 'rb') as f: + f.seek(offset) + i = 0 + while i < size: + data = f.read(4) # s32 + if not data: + break + sym_offset = struct.unpack('<i', data)[0] + i + i += 4 + output.append(sym_offset) + return output + +def get_instruction_from_vmlinux(elffile, section, virtual_address, target_address): + from capstone import Cs, CS_ARCH_X86, CS_MODE_64 + section_start = section['sh_addr'] + section_end = section_start + section['sh_size'] + + if not (section_start <= target_address < section_end): + return None + + offset = target_address - section_start + code = section.data()[offset:offset + 16] + + cap = init_capstone() + for instruction in cap.disasm(code, target_address): + if instruction.address == target_address: + return instruction + return None + +def init_capstone(): + from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_OPT_SYNTAX_ATT + cap = Cs(CS_ARCH_X86, CS_MODE_64) + cap.syntax = CS_OPT_SYNTAX_ATT + return cap + +def get_runtime_kernel(): + import drgn + return drgn.program_from_kernel() + +def check_dependencies_or_skip(modules, script_name="unknown test"): + for mod in modules: + try: + __import__(mod) + except ImportError: + ksft.test_result_skip(f"Skipping {script_name}: missing module '{mod}'") + ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_indirect_alignment.py b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py new file mode 100644 index 000000000000..cdc33ae6a91c --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Test if indirect CALL/JMP are correctly patched by evaluating +# the vmlinux .retpoline_sites in /proc/kcore. + +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Best to copy the vmlinux at a standard location: +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_indirect_alignment.py [vmlinux] + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" + +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_indirect_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +if c.sysfs_has("spectre_v2", "Retpolines"): + ksft.test_result_skip("Skipping its_indirect_alignment.py: Retpolines deployed") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_indirect_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at argument path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +retpolines_start_vmlinux, retpolines_sec_offset, size = c.get_section_info(vmlinux, '.retpoline_sites') +ksft.print_msg(f"vmlinux: Section .retpoline_sites (0x{retpolines_start_vmlinux:x}) found at 0x{retpolines_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, retpolines_sec_offset, size) +total_retpoline_tests = len(sites_offset) +ksft.print_msg(f"Found {total_retpoline_tests} retpoline sites") + +prog = c.get_runtime_kernel() +retpolines_start_kcore = prog.symbol('__retpoline_sites').address +ksft.print_msg(f'kcore: __retpoline_sites: 0x{retpolines_start_kcore:x}') + +x86_indirect_its_thunk_r15 = prog.symbol('__x86_indirect_its_thunk_r15').address +ksft.print_msg(f'kcore: __x86_indirect_its_thunk_r15: 0x{x86_indirect_its_thunk_r15:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(0, len(sites_offset)): + site = retpolines_start_kcore + sites_offset[i] + vmlinux_site = retpolines_start_vmlinux + sites_offset[i] + passed = unknown = failed = False + try: + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + operand = kcore_insn.op_str + insn_end = site + kcore_insn.size - 1 # TODO handle Jcc.32 __x86_indirect_thunk_\reg + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {identify_address(prog, site)} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if (site & 0x20) ^ (insn_end & 0x20): + ksft.print_msg(f"\tSite at safe/unsafe boundary: {str(kcore_insn.bytes)} {kcore_insn.mnemonic} {operand}") + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if operand.startswith('0xffffffff'): + thunk = int(operand, 16) + if thunk > x86_indirect_its_thunk_r15: + insn_at_thunk = list(cap.disasm(prog.read(thunk, 16), thunk))[0] + operand += ' -> ' + insn_at_thunk.mnemonic + ' ' + insn_at_thunk.op_str + ' <dynamic-thunk?>' + if 'jmp' in insn_at_thunk.mnemonic and thunk & 0x20: + ksft.print_msg(f"\tPASSED: Found {operand} at safe address") + passed = True + if not passed: + if kcore_insn.operands[0].type == capstone.CS_OP_IMM: + operand += ' <' + prog.symbol(int(operand, 16)) + '>' + if '__x86_indirect_its_thunk_' in operand: + ksft.print_msg(f"\tPASSED: Found {operand}") + else: + ksft.print_msg(f"\tPASSED: Found direct branch: {kcore_insn}, ITS thunk not required.") + passed = True + else: + unknown = True + if passed: + tests_passed += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: unexpected operand: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.bytes} {kcore_insn.mnemonic} {operand}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASS: \t{tests_passed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"FAIL: \t{tests_failed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_retpoline_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed") +else: + ksft.test_result_fail(f"{tests_failed} ITS return thunk sites failed") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_permutations.py b/tools/testing/selftests/x86/bugs/its_permutations.py new file mode 100644 index 000000000000..3204f4728c62 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_permutations.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) cmdline permutations with other bugs +# like spectre_v2 and retbleed. + +import os, sys, subprocess, itertools, re, shutil + +test_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, test_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) + +if not mitigation or "Not affected" in mitigation: + ksft.test_result_skip("Skipping its_permutations.py: not applicable") + ksft.finished() + +if shutil.which('vng') is None: + ksft.test_result_skip("Skipping its_permutations.py: virtme-ng ('vng') not found in PATH.") + ksft.finished() + +TEST = f"{test_dir}/its_sysfs.py" +default_kparam = ['clearcpuid=hypervisor', 'panic=5', 'panic_on_warn=1', 'oops=panic', 'nmi_watchdog=1', 'hung_task_panic=1'] + +DEBUG = " -v " + +# Install dependencies +# https://github.com/arighi/virtme-ng +# apt install virtme-ng +BOOT_CMD = f"vng --run {test_dir}/../../../../../arch/x86/boot/bzImage " +#BOOT_CMD += DEBUG + +bug = "indirect_target_selection" + +input_options = { + 'indirect_target_selection' : ['off', 'on', 'stuff', 'vmexit'], + 'retbleed' : ['off', 'stuff', 'auto'], + 'spectre_v2' : ['off', 'on', 'eibrs', 'retpoline', 'ibrs', 'eibrs,retpoline'], +} + +def pretty_print(output): + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + + # Define patterns and their corresponding colors + patterns = { + r"^ok \d+": OKGREEN, + r"^not ok \d+": FAIL, + r"^# Testing .*": OKBLUE, + r"^# Found: .*": WARNING, + r"^# Totals: .*": BOLD, + r"pass:([1-9]\d*)": OKGREEN, + r"fail:([1-9]\d*)": FAIL, + r"skip:([1-9]\d*)": WARNING, + } + + # Apply colors based on patterns + for pattern, color in patterns.items(): + output = re.sub(pattern, lambda match: f"{color}{match.group(0)}{ENDC}", output, flags=re.MULTILINE) + + print(output) + +combinations = list(itertools.product(*input_options.values())) +ksft.print_header() +ksft.set_plan(len(combinations)) + +logs = "" + +for combination in combinations: + append = "" + log = "" + for p in default_kparam: + append += f' --append={p}' + command = BOOT_CMD + append + test_params = "" + for i, key in enumerate(input_options.keys()): + param = f'{key}={combination[i]}' + test_params += f' {param}' + command += f" --append={param}" + command += f" -- {TEST}" + test_name = f"{bug} {test_params}" + pretty_print(f'# Testing {test_name}') + t = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + t.wait() + output, _ = t.communicate() + if t.returncode == 0: + ksft.test_result_pass(test_name) + else: + ksft.test_result_fail(test_name) + output = output.decode() + log += f" {output}" + pretty_print(log) + logs += output + "\n" + +# Optionally use tappy to parse the output +# apt install python3-tappy +with open("logs.txt", "w") as f: + f.write(logs) + +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_ret_alignment.py b/tools/testing/selftests/x86/bugs/its_ret_alignment.py new file mode 100644 index 000000000000..f40078d9f6ff --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_ret_alignment.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Tests if the RETs are correctly patched by evaluating the +# vmlinux .return_sites in /proc/kcore. +# +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Run on target machine +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_ret_alignment.py + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_ret_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_ret_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at user-supplied path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +rethunks_start_vmlinux, rethunks_sec_offset, size = c.get_section_info(vmlinux, '.return_sites') +ksft.print_msg(f"vmlinux: Section .return_sites (0x{rethunks_start_vmlinux:x}) found at 0x{rethunks_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, rethunks_sec_offset, size) +total_rethunk_tests = len(sites_offset) +ksft.print_msg(f"Found {total_rethunk_tests} rethunk sites") + +prog = c.get_runtime_kernel() +rethunks_start_kcore = prog.symbol('__return_sites').address +ksft.print_msg(f'kcore: __rethunk_sites: 0x{rethunks_start_kcore:x}') + +its_return_thunk = prog.symbol('its_return_thunk').address +ksft.print_msg(f'kcore: its_return_thunk: 0x{its_return_thunk:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 +tests_skipped = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(len(sites_offset)): + site = rethunks_start_kcore + sites_offset[i] + vmlinux_site = rethunks_start_vmlinux + sites_offset[i] + try: + passed = unknown = failed = skipped = False + + symbol = identify_address(prog, site) + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + + insn_end = site + kcore_insn.size - 1 + + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {symbol} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if "jmp" in kcore_insn.mnemonic: + passed = True + elif "ret" not in kcore_insn.mnemonic: + skipped = True + + if passed: + ksft.print_msg(f"\tPASSED: Found {kcore_insn.mnemonic} {kcore_insn.op_str}") + tests_passed += 1 + elif skipped: + ksft.print_msg(f"\tSKIPPED: Found '{kcore_insn.mnemonic}'") + tests_skipped += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: An unknown instruction: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.mnemonic} {kcore_insn.op_str}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASSED: \t{tests_passed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"FAILED: \t{tests_failed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"SKIPPED: \t{tests_skipped} \t/ {total_rethunk_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_rethunk_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed.") +else: + ksft.test_result_fail(f"{tests_failed} failed sites need ITS return thunks.") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_sysfs.py b/tools/testing/selftests/x86/bugs/its_sysfs.py new file mode 100644 index 000000000000..7bca81f2f606 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_sysfs.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for Indirect Target Selection(ITS) mitigation sysfs status. + +import sys, os, re +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft + +from common import * + +bug = "indirect_target_selection" +mitigation = get_sysfs(bug) + +ITS_MITIGATION_ALIGNED_THUNKS = "Mitigation: Aligned branch/return thunks" +ITS_MITIGATION_RETPOLINE_STUFF = "Mitigation: Retpolines, Stuffing RSB" +ITS_MITIGATION_VMEXIT_ONLY = "Mitigation: Vulnerable, KVM: Not affected" +ITS_MITIGATION_VULNERABLE = "Vulnerable" + +def check_mitigation(): + if mitigation == ITS_MITIGATION_ALIGNED_THUNKS: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF) + return + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_pass(bug, ITS_MITIGATION_ALIGNED_THUNKS) + return + + if mitigation == ITS_MITIGATION_RETPOLINE_STUFF: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + if sysfs_has('retbleed', 'Stuffing'): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + bug_check_fail(bug, ITS_MITIGATION_RETPOLINE_STUFF, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VMEXIT_ONLY: + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_pass(bug, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_fail(bug, ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VULNERABLE: + if sysfs_has("spectre_v2", "Vulnerable"): + bug_check_pass(bug, ITS_MITIGATION_VULNERABLE) + else: + bug_check_fail(bug, "Mitigation", ITS_MITIGATION_VULNERABLE) + + bug_status_unknown(bug, mitigation) + return + +ksft.print_header() +ksft.set_plan(1) +ksft.print_msg(f'{bug}: {mitigation} ...') + +if not basic_checks_sufficient(bug, mitigation): + check_mitigation() + +ksft.finished() diff --git a/tools/testing/selftests/x86/sigtrap_loop.c b/tools/testing/selftests/x86/sigtrap_loop.c new file mode 100644 index 000000000000..9d065479e89f --- /dev/null +++ b/tools/testing/selftests/x86/sigtrap_loop.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Intel Corporation + */ +#define _GNU_SOURCE + +#include <err.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ucontext.h> + +#ifdef __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + + return; +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + static unsigned int loop_count_on_same_ip; + static unsigned long last_trap_ip; + + if (last_trap_ip == ctx->uc_mcontext.gregs[REG_IP]) { + printf("\tTrapped at %016lx\n", last_trap_ip); + + /* + * If the same IP is hit more than 10 times in a row, it is + * _considered_ an infinite loop. + */ + if (++loop_count_on_same_ip > 10) { + printf("[FAIL]\tDetected SIGTRAP infinite loop\n"); + exit(1); + } + + return; + } + + loop_count_on_same_ip = 0; + last_trap_ip = ctx->uc_mcontext.gregs[REG_IP]; + printf("\tTrapped at %016lx\n", last_trap_ip); +} + +int main(int argc, char *argv[]) +{ + sethandler(SIGTRAP, sigtrap, 0); + + /* + * Set the Trap Flag (TF) to single-step the test code, therefore to + * trigger a SIGTRAP signal after each instruction until the TF is + * cleared. + * + * Because the arithmetic flags are not significant here, the TF is + * set by pushing 0x302 onto the stack and then popping it into the + * flags register. + * + * Four instructions in the following asm code are executed with the + * TF set, thus the SIGTRAP handler is expected to run four times. + */ + printf("[RUN]\tSIGTRAP infinite loop detection\n"); + asm volatile( +#ifdef __x86_64__ + /* + * Avoid clobbering the redzone + * + * Equivalent to "sub $128, %rsp", however -128 can be encoded + * in a single byte immediate while 128 uses 4 bytes. + */ + "add $-128, %rsp\n\t" +#endif + "push $0x302\n\t" + "popf\n\t" + "nop\n\t" + "nop\n\t" + "push $0x202\n\t" + "popf\n\t" +#ifdef __x86_64__ + "sub $-128, %rsp\n\t" +#endif + ); + + printf("[OK]\tNo SIGTRAP infinite loop detected\n"); + return 0; +} diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 17263696b5d8..61b3f571f7a7 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -147,7 +147,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) { if (kmalloc_verbose) - pr_debug("Bulk free %p[0-%lu]\n", list, size - 1); + pr_debug("Bulk free %p[0-%zu]\n", list, size - 1); pthread_mutex_lock(&cachep->lock); for (int i = 0; i < size; i++) @@ -165,7 +165,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, size_t i; if (kmalloc_verbose) - pr_debug("Bulk alloc %lu\n", size); + pr_debug("Bulk alloc %zu\n", size); pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs >= size) { diff --git a/tools/testing/shared/linux/maple_tree.h b/tools/testing/shared/linux/maple_tree.h index 06c89bdcc515..f67d47d32857 100644 --- a/tools/testing/shared/linux/maple_tree.h +++ b/tools/testing/shared/linux/maple_tree.h @@ -2,6 +2,6 @@ #define atomic_t int32_t #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) do {} while (0) +#define atomic_set(x, y) uatomic_set(x, y) #define U8_MAX UCHAR_MAX #include "../../../../include/linux/maple_tree.h" diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h index e01f66f98982..3e1b6adc027b 100644 --- a/tools/testing/vma/linux/atomic.h +++ b/tools/testing/vma/linux/atomic.h @@ -6,7 +6,7 @@ #define atomic_t int32_t #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) do {} while (0) +#define atomic_set(x, y) uatomic_set(x, y) #define U8_MAX UCHAR_MAX #endif /* _LINUX_ATOMIC_H */ diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index c5b9da034511..1d5bbc8464f1 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -735,6 +735,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, (void)adjust_next; } +static inline void hugetlb_split(struct vm_area_struct *, unsigned long) {} + static inline void vma_iter_free(struct vma_iterator *vmi) { mas_destroy(&vmi->mas); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 0b7f5bf546da..0c22ff7a8de2 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1283,21 +1283,25 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type) send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); control_expectln("RECEIVED"); - ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); - if (ret < 0) { - if (errno == EOPNOTSUPP) { - fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); - } else { + /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though + * the "RECEIVED" message means that the other side has received the + * data, there can be a delay in our kernel before updating the "unsent + * bytes" counter. Repeat SIOCOUTQ until it returns 0. + */ + timeout_begin(TIMEOUT); + do { + ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + break; + } perror("ioctl"); exit(EXIT_FAILURE); } - } else if (ret == 0 && sock_bytes_unsent != 0) { - fprintf(stderr, - "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n", - sock_bytes_unsent); - exit(EXIT_FAILURE); - } - + timeout_check("SIOCOUTQ"); + } while (sock_bytes_unsent != 0); + timeout_end(); close(fd); } diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index 245e9344932b..699a83f538a8 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -867,7 +867,7 @@ int osnoise_set_workload(struct osnoise_context *context, bool onoff) retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff); if (retval < 0) - return -1; + return -2; context->opt_workload = onoff; diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 2cc3ffcbc983..397dc962f5e2 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -1091,12 +1091,15 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param } } - if (params->user_hist) { - retval = osnoise_set_workload(tool->context, 0); - if (retval) { - err_msg("Failed to set OSNOISE_WORKLOAD option\n"); - goto out_err; - } + /* + * Set workload according to type of thread if the kernel supports it. + * On kernels without support, user threads will have already failed + * on missing timerlat_fd, and kernel threads do not need it. + */ + retval = osnoise_set_workload(tool->context, params->kernel_workload); + if (retval < -1) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; } return 0; @@ -1137,9 +1140,20 @@ out_err: } static int stop_tracing; +static struct trace_instance *hist_inst = NULL; static void stop_hist(int sig) { + if (stop_tracing) { + /* + * Stop requested twice in a row; abort event processing and + * exit immediately + */ + tracefs_iterate_stop(hist_inst->inst); + return; + } stop_tracing = 1; + if (hist_inst) + trace_instance_stop(hist_inst); } /* @@ -1185,6 +1199,12 @@ int timerlat_hist_main(int argc, char *argv[]) } trace = &tool->trace; + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + hist_inst = trace; retval = enable_timerlat(trace); if (retval) { @@ -1331,7 +1351,7 @@ int timerlat_hist_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&tool->trace, &record->trace)) { + if (trace_is_off(&tool->trace, &record->trace) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); if (!params->no_aa) diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index ac2ff38a57ee..0def5fec51ed 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -842,12 +842,15 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * } } - if (params->user_top) { - retval = osnoise_set_workload(top->context, 0); - if (retval) { - err_msg("Failed to set OSNOISE_WORKLOAD option\n"); - goto out_err; - } + /* + * Set workload according to type of thread if the kernel supports it. + * On kernels without support, user threads will have already failed + * on missing timerlat_fd, and kernel threads do not need it. + */ + retval = osnoise_set_workload(top->context, params->kernel_workload); + if (retval < -1) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; } if (isatty(1) && !params->quiet) @@ -891,9 +894,20 @@ out_err: } static int stop_tracing; +static struct trace_instance *top_inst = NULL; static void stop_top(int sig) { + if (stop_tracing) { + /* + * Stop requested twice in a row; abort event processing and + * exit immediately + */ + tracefs_iterate_stop(top_inst->inst); + return; + } stop_tracing = 1; + if (top_inst) + trace_instance_stop(top_inst); } /* @@ -940,6 +954,13 @@ int timerlat_top_main(int argc, char *argv[]) } trace = &top->trace; + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + top_inst = trace; + retval = enable_timerlat(trace); if (retval) { @@ -1099,7 +1120,7 @@ int timerlat_top_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&top->trace, &record->trace)) { + if (trace_is_off(&top->trace, &record->trace) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); if (!params->no_aa) diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index 170a706248ab..440323a997c6 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -197,6 +197,14 @@ int trace_instance_start(struct trace_instance *trace) } /* + * trace_instance_stop - stop tracing a given rtla instance + */ +int trace_instance_stop(struct trace_instance *trace) +{ + return tracefs_trace_off(trace->inst); +} + +/* * trace_events_free - free a list of trace events */ static void trace_events_free(struct trace_events *events) diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h index c7c92dc9a18a..76e1b77291ba 100644 --- a/tools/tracing/rtla/src/trace.h +++ b/tools/tracing/rtla/src/trace.h @@ -21,6 +21,7 @@ struct trace_instance { int trace_instance_init(struct trace_instance *trace, char *tool_name); int trace_instance_start(struct trace_instance *trace); +int trace_instance_stop(struct trace_instance *trace); void trace_instance_destroy(struct trace_instance *trace); struct trace_seq *get_trace_seq(void); diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c index f04479ecc96c..ced72950cb1e 100644 --- a/tools/verification/rv/src/in_kernel.c +++ b/tools/verification/rv/src/in_kernel.c @@ -353,7 +353,7 @@ ikm_event_handler(struct trace_seq *s, struct tep_record *record, if (config_has_id && (config_my_pid == id)) return 0; - else if (config_my_pid && (config_my_pid == pid)) + else if (config_my_pid == pid) return 0; tep_print_event(trace_event->tep, s, record, "%16s-%-8d ", TEP_PRINT_COMM, TEP_PRINT_PID); @@ -595,7 +595,7 @@ static int parse_arguments(char *monitor_name, int argc, char **argv) config_reactor = optarg; break; case 's': - config_my_pid = 0; + config_my_pid = -1; break; case 't': config_trace = 1; |