diff options
Diffstat (limited to 'tools')
77 files changed, 2763 insertions, 480 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/perf_regs.h b/tools/arch/arm64/include/uapi/asm/perf_regs.h index d54daafa89e3..fd157f46727e 100644 --- a/tools/arch/arm64/include/uapi/asm/perf_regs.h +++ b/tools/arch/arm64/include/uapi/asm/perf_regs.h @@ -36,6 +36,11 @@ enum perf_event_arm_regs { PERF_REG_ARM64_LR, PERF_REG_ARM64_SP, PERF_REG_ARM64_PC, - PERF_REG_ARM64_MAX, + + /* Extended/pseudo registers */ + PERF_REG_ARM64_VG = 46, // SVE Vector Granule + + PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1, + PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1 }; #endif /* _ASM_ARM64_PERF_REGS_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index ee15311b6be1..403e83b4adc8 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -76,6 +76,8 @@ /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf +#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 +#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT) #define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5 #define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT) @@ -154,6 +156,11 @@ #define MSR_IA32_POWER_CTL 0x000001fc #define MSR_IA32_POWER_CTL_BIT_EE 19 +/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */ +#define MSR_INTEGRITY_CAPS 0x000002d9 +#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 +#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) + #define MSR_LBR_NHM_FROM 0x00000680 #define MSR_LBR_NHM_TO 0x000006c0 #define MSR_LBR_CORE_FROM 0x00000040 @@ -312,6 +319,7 @@ /* Run Time Average Power Limiting (RAPL) Interface */ +#define MSR_VR_CURRENT_CONFIG 0x00000601 #define MSR_RAPL_POWER_UNIT 0x00000606 #define MSR_PKG_POWER_LIMIT 0x00000610 @@ -502,8 +510,10 @@ #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 #define MSR_AMD64_SEV_ES_ENABLED_BIT 1 +#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) +#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f @@ -524,6 +534,11 @@ #define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) #define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) +/* AMD Performance Counter Global Status and Control MSRs */ +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 +#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 @@ -688,6 +703,10 @@ #define MSR_IA32_PERF_CTL 0x00000199 #define INTEL_PERF_CTL_MASK 0xffff +/* AMD Branch Sampling configuration */ +#define MSR_AMD_DBG_EXTN_CFG 0xc000010f +#define MSR_AMD_SAMP_BR_FROM 0xc0010300 + #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index c6a48d0ef9ff..888a0421d43b 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -99,6 +99,10 @@ FEATURE_TESTS_EXTRA := \ clang \ libbpf \ libbpf-btf__load_from_kernel_by_id \ + libbpf-bpf_prog_load \ + libbpf-bpf_object__next_program \ + libbpf-bpf_object__next_map \ + libbpf-bpf_create_map \ libpfm4 \ libdebuginfod \ clang-bpf-co-re diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index cb4a2a4fa2e4..7c2a17e23c30 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -58,6 +58,11 @@ FILES= \ test-bpf.bin \ test-libbpf.bin \ test-libbpf-btf__load_from_kernel_by_id.bin \ + test-libbpf-bpf_prog_load.bin \ + test-libbpf-bpf_map_create.bin \ + test-libbpf-bpf_object__next_program.bin \ + test-libbpf-bpf_object__next_map.bin \ + test-libbpf-btf__raw_data.bin \ test-get_cpuid.bin \ test-sdt.bin \ test-cxx.bin \ @@ -291,6 +296,21 @@ $(OUTPUT)test-libbpf.bin: $(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin: $(BUILD) -lbpf +$(OUTPUT)test-libbpf-bpf_prog_load.bin: + $(BUILD) -lbpf + +$(OUTPUT)test-libbpf-bpf_map_create.bin: + $(BUILD) -lbpf + +$(OUTPUT)test-libbpf-bpf_object__next_program.bin: + $(BUILD) -lbpf + +$(OUTPUT)test-libbpf-bpf_object__next_map.bin: + $(BUILD) -lbpf + +$(OUTPUT)test-libbpf-btf__raw_data.bin: + $(BUILD) -lbpf + $(OUTPUT)test-sdt.bin: $(BUILD) diff --git a/tools/build/feature/test-libbpf-bpf_map_create.c b/tools/build/feature/test-libbpf-bpf_map_create.c new file mode 100644 index 000000000000..b9f550e332c8 --- /dev/null +++ b/tools/build/feature/test-libbpf-bpf_map_create.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/bpf.h> + +int main(void) +{ + return bpf_map_create(0 /* map_type */, NULL /* map_name */, 0, /* key_size */, + 0 /* value_size */, 0 /* max_entries */, NULL /* opts */); +} diff --git a/tools/build/feature/test-libbpf-bpf_object__next_map.c b/tools/build/feature/test-libbpf-bpf_object__next_map.c new file mode 100644 index 000000000000..64adb519e97e --- /dev/null +++ b/tools/build/feature/test-libbpf-bpf_object__next_map.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/libbpf.h> + +int main(void) +{ + bpf_object__next_map(NULL /* obj */, NULL /* prev */); + return 0; +} diff --git a/tools/build/feature/test-libbpf-bpf_object__next_program.c b/tools/build/feature/test-libbpf-bpf_object__next_program.c new file mode 100644 index 000000000000..8bf4fd26b545 --- /dev/null +++ b/tools/build/feature/test-libbpf-bpf_object__next_program.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/libbpf.h> + +int main(void) +{ + bpf_object__next_program(NULL /* obj */, NULL /* prev */); + return 0; +} diff --git a/tools/build/feature/test-libbpf-bpf_prog_load.c b/tools/build/feature/test-libbpf-bpf_prog_load.c new file mode 100644 index 000000000000..47f516d63ebc --- /dev/null +++ b/tools/build/feature/test-libbpf-bpf_prog_load.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/bpf.h> + +int main(void) +{ + return bpf_prog_load(0 /* prog_type */, NULL /* prog_name */, + NULL /* license */, NULL /* insns */, + 0 /* insn_cnt */, NULL /* opts */); +} diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c index f7c084428735..a17647f7d5a4 100644 --- a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c +++ b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include <bpf/libbpf.h> +#include <bpf/btf.h> int main(void) { - return btf__load_from_kernel_by_id(20151128, NULL); + btf__load_from_kernel_by_id(20151128); + return 0; } diff --git a/tools/build/feature/test-libbpf-btf__raw_data.c b/tools/build/feature/test-libbpf-btf__raw_data.c new file mode 100644 index 000000000000..57da31dd7581 --- /dev/null +++ b/tools/build/feature/test-libbpf-btf__raw_data.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/btf.h> + +int main(void) +{ + btf__raw_data(NULL /* btf_ro */, NULL /* size */); + return 0; +} diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h index ac190958c981..0197042b7dfb 100644 --- a/tools/include/uapi/asm-generic/fcntl.h +++ b/tools/include/uapi/asm-generic/fcntl.h @@ -115,13 +115,11 @@ #define F_GETSIG 11 /* for sockets. */ #endif -#ifndef CONFIG_64BIT #ifndef F_GETLK64 #define F_GETLK64 12 /* using 'struct flock64' */ #define F_SETLK64 13 #define F_SETLKW64 14 #endif -#endif #ifndef F_SETOWN_EX #define F_SETOWN_EX 15 @@ -187,25 +185,19 @@ struct f_owner_ex { #define F_LINUX_SPECIFIC_BASE 1024 -#ifndef HAVE_ARCH_STRUCT_FLOCK -#ifndef __ARCH_FLOCK_PAD -#define __ARCH_FLOCK_PAD -#endif - struct flock { short l_type; short l_whence; __kernel_off_t l_start; __kernel_off_t l_len; __kernel_pid_t l_pid; - __ARCH_FLOCK_PAD -}; +#ifdef __ARCH_FLOCK_EXTRA_SYSID + __ARCH_FLOCK_EXTRA_SYSID #endif - -#ifndef HAVE_ARCH_STRUCT_FLOCK64 -#ifndef __ARCH_FLOCK64_PAD -#define __ARCH_FLOCK64_PAD +#ifdef __ARCH_FLOCK_PAD + __ARCH_FLOCK_PAD #endif +}; struct flock64 { short l_type; @@ -213,8 +205,9 @@ struct flock64 { __kernel_loff_t l_start; __kernel_loff_t l_len; __kernel_pid_t l_pid; +#ifdef __ARCH_FLOCK64_PAD __ARCH_FLOCK64_PAD -}; #endif +}; #endif /* _ASM_GENERIC_FCNTL_H */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 1c48b0ae3ba3..45fa180cc56a 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -383,7 +383,7 @@ __SYSCALL(__NR_syslog, sys_syslog) /* kernel/ptrace.c */ #define __NR_ptrace 117 -__SYSCALL(__NR_ptrace, sys_ptrace) +__SC_COMP(__NR_ptrace, sys_ptrace, compat_sys_ptrace) /* kernel/sched/core.c */ #define __NR_sched_setparam 118 @@ -779,7 +779,7 @@ __SYSCALL(__NR_rseq, sys_rseq) #define __NR_kexec_file_load 294 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) /* 295 through 402 are unassigned to sync up with generic numbers, don't use */ -#if __BITS_PER_LONG == 32 +#if defined(__SYSCALL_COMPAT) || __BITS_PER_LONG == 32 #define __NR_clock_gettime64 403 __SYSCALL(__NR_clock_gettime64, sys_clock_gettime) #define __NR_clock_settime64 404 diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index ed66f2e38464..e6c98a6e3908 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -23,6 +23,7 @@ #include <perf/cpumap.h> #include <perf/threadmap.h> #include <api/fd/array.h> +#include "internal.h" void perf_evlist__init(struct perf_evlist *evlist) { @@ -39,10 +40,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (!evsel->own_cpus || evlist->has_user_cpus) { - perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); - } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) { + if (!evsel->own_cpus || + (!evsel->system_wide && evlist->has_user_cpus) || + (!evsel->system_wide && + !evsel->requires_cpu && + perf_cpu_map__empty(evlist->user_requested_cpus))) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); } else if (evsel->cpus != evsel->own_cpus) { @@ -50,8 +52,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, evsel->cpus = perf_cpu_map__get(evsel->own_cpus); } - perf_thread_map__put(evsel->threads); - evsel->threads = perf_thread_map__get(evlist->threads); + if (!evsel->system_wide) { + perf_thread_map__put(evsel->threads); + evsel->threads = perf_thread_map__get(evlist->threads); + } + evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); } @@ -298,7 +303,7 @@ add: int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) { - int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); int nr_threads = perf_thread_map__nr(evlist->threads); int nfds = 0; struct perf_evsel *evsel; @@ -428,9 +433,9 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_ static int mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, - int thread, int *_output, int *_output_overwrite) + int thread, int *_output, int *_output_overwrite, int *nr_mmaps) { - struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx); + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx); struct perf_evsel *evsel; int revent; @@ -484,6 +489,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, if (ops->mmap(map, mp, *output, evlist_cpu) < 0) return -1; + *nr_mmaps += 1; + if (!idx) perf_evlist__set_mmap_first(evlist, map, overwrite); } else { @@ -513,34 +520,12 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, } static int -mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, - struct perf_mmap_param *mp) -{ - int thread; - int nr_threads = perf_thread_map__nr(evlist->threads); - - for (thread = 0; thread < nr_threads; thread++) { - int output = -1; - int output_overwrite = -1; - - if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, - &output, &output_overwrite)) - goto out_unmap; - } - - return 0; - -out_unmap: - perf_evlist__munmap(evlist); - return -1; -} - -static int mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, struct perf_mmap_param *mp) { int nr_threads = perf_thread_map__nr(evlist->threads); - int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus); + int nr_cpus = perf_cpu_map__nr(evlist->all_cpus); + int nr_mmaps = 0; int cpu, thread; for (cpu = 0; cpu < nr_cpus; cpu++) { @@ -549,11 +534,14 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, for (thread = 0; thread < nr_threads; thread++) { if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, - thread, &output, &output_overwrite)) + thread, &output, &output_overwrite, &nr_mmaps)) goto out_unmap; } } + if (nr_mmaps != evlist->nr_mmaps) + pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps); + return 0; out_unmap: @@ -565,9 +553,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) { int nr_mmaps; - nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus); - if (perf_cpu_map__empty(evlist->user_requested_cpus)) - nr_mmaps = perf_thread_map__nr(evlist->threads); + /* One for each CPU */ + nr_mmaps = perf_cpu_map__nr(evlist->all_cpus); + if (perf_cpu_map__empty(evlist->all_cpus)) { + /* Plus one for each thread */ + nr_mmaps += perf_thread_map__nr(evlist->threads); + /* Minus the per-thread CPU (-1) */ + nr_mmaps -= 1; + } return nr_mmaps; } @@ -577,7 +570,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, struct perf_mmap_param *mp) { struct perf_evsel *evsel; - const struct perf_cpu_map *cpus = evlist->user_requested_cpus; if (!ops || !ops->get || !ops->mmap) return -EINVAL; @@ -596,9 +588,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - if (perf_cpu_map__empty(cpus)) - return mmap_per_thread(evlist, ops, mp); - return mmap_per_cpu(evlist, ops, mp); } diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index cfc9ebd7968e..2a912a1f1989 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -49,7 +49,18 @@ struct perf_evsel { /* parse modifier helper */ int nr_members; + /* + * system_wide is for events that need to be on every CPU, irrespective + * of user requested CPUs or threads. Map propagation will set cpus to + * this event's own_cpus, whereby they will contribute to evlist + * all_cpus. + */ bool system_wide; + /* + * Some events, for example uncore events, require a CPU. + * i.e. it cannot be the 'any CPU' value of -1. + */ + bool requires_cpu; int idx; }; diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 465be4e62a17..b4e9ef7edfef 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -758,6 +758,16 @@ include::intel-hybrid.txt[] If the URLs is not specified, the value of DEBUGINFOD_URLS system environment variable is used. +--off-cpu:: + Enable off-cpu profiling with BPF. The BPF program will collect + task scheduling information with (user) stacktrace and save them + as sample data of a software event named "offcpu-time". The + sample period will have the time the task slept in nanoseconds. + + Note that BPF can collect stack traces using frame pointer ("fp") + only, as of now. So the applications built without the frame + pointer might see bogus addresses. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index e0304e70f182..73e0762092fe 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -573,11 +573,36 @@ ifndef NO_LIBELF ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1) CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID endif + $(call feature_check,libbpf-bpf_prog_load) + ifeq ($(feature-libbpf-bpf_prog_load), 1) + CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD + endif + $(call feature_check,libbpf-bpf_object__next_program) + ifeq ($(feature-libbpf-bpf_object__next_program), 1) + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM + endif + $(call feature_check,libbpf-bpf_object__next_map) + ifeq ($(feature-libbpf-bpf_object__next_map), 1) + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP + endif + $(call feature_check,libbpf-btf__raw_data) + ifeq ($(feature-libbpf-btf__raw_data), 1) + CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA + endif + $(call feature_check,libbpf-bpf_map_create) + ifeq ($(feature-libbpf-bpf_map_create), 1) + CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE + endif else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif else CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID + CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM + CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP + CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA + CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE endif endif diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6e5aded855cc..8f738e11356d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1038,6 +1038,7 @@ SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h +SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): $(Q)$(MKDIR) -p $@ diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 476b037eea1c..006692c9b040 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -2,13 +2,19 @@ #include <errno.h> #include <regex.h> #include <string.h> +#include <sys/auxv.h> #include <linux/kernel.h> #include <linux/zalloc.h> +#include "../../../perf-sys.h" #include "../../../util/debug.h" #include "../../../util/event.h" #include "../../../util/perf_regs.h" +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + const struct sample_reg sample_reg_masks[] = { SMPL_REG(x0, PERF_REG_ARM64_X0), SMPL_REG(x1, PERF_REG_ARM64_X1), @@ -43,6 +49,7 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(lr, PERF_REG_ARM64_LR), SMPL_REG(sp, PERF_REG_ARM64_SP), SMPL_REG(pc, PERF_REG_ARM64_PC), + SMPL_REG(vg, PERF_REG_ARM64_VG), SMPL_REG_END }; @@ -131,3 +138,34 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) return SDT_ARG_VALID; } + +uint64_t arch__user_reg_mask(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .sample_type = PERF_SAMPLE_REGS_USER, + .disabled = 1, + .exclude_kernel = 1, + .sample_period = 1, + .sample_regs_user = PERF_REGS_MASK + }; + int fd; + + if (getauxval(AT_HWCAP) & HWCAP_SVE) + attr.sample_regs_user |= SMPL_REG_MASK(PERF_REG_ARM64_VG); + + /* + * Check if the pmu supports perf extended regs, before + * returning the register mask to sample. + */ + if (attr.sample_regs_user != PERF_REGS_MASK) { + event_attr_init(&attr); + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd != -1) { + close(fd); + return attr.sample_regs_user; + } + } + return PERF_REGS_MASK; +} diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c index 5aecf88e3de6..871af5992298 100644 --- a/tools/perf/arch/arm64/util/unwind-libunwind.c +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -10,77 +10,8 @@ int LIBUNWIND__ARCH_REG_ID(int regnum) { - switch (regnum) { - case UNW_AARCH64_X0: - return PERF_REG_ARM64_X0; - case UNW_AARCH64_X1: - return PERF_REG_ARM64_X1; - case UNW_AARCH64_X2: - return PERF_REG_ARM64_X2; - case UNW_AARCH64_X3: - return PERF_REG_ARM64_X3; - case UNW_AARCH64_X4: - return PERF_REG_ARM64_X4; - case UNW_AARCH64_X5: - return PERF_REG_ARM64_X5; - case UNW_AARCH64_X6: - return PERF_REG_ARM64_X6; - case UNW_AARCH64_X7: - return PERF_REG_ARM64_X7; - case UNW_AARCH64_X8: - return PERF_REG_ARM64_X8; - case UNW_AARCH64_X9: - return PERF_REG_ARM64_X9; - case UNW_AARCH64_X10: - return PERF_REG_ARM64_X10; - case UNW_AARCH64_X11: - return PERF_REG_ARM64_X11; - case UNW_AARCH64_X12: - return PERF_REG_ARM64_X12; - case UNW_AARCH64_X13: - return PERF_REG_ARM64_X13; - case UNW_AARCH64_X14: - return PERF_REG_ARM64_X14; - case UNW_AARCH64_X15: - return PERF_REG_ARM64_X15; - case UNW_AARCH64_X16: - return PERF_REG_ARM64_X16; - case UNW_AARCH64_X17: - return PERF_REG_ARM64_X17; - case UNW_AARCH64_X18: - return PERF_REG_ARM64_X18; - case UNW_AARCH64_X19: - return PERF_REG_ARM64_X19; - case UNW_AARCH64_X20: - return PERF_REG_ARM64_X20; - case UNW_AARCH64_X21: - return PERF_REG_ARM64_X21; - case UNW_AARCH64_X22: - return PERF_REG_ARM64_X22; - case UNW_AARCH64_X23: - return PERF_REG_ARM64_X23; - case UNW_AARCH64_X24: - return PERF_REG_ARM64_X24; - case UNW_AARCH64_X25: - return PERF_REG_ARM64_X25; - case UNW_AARCH64_X26: - return PERF_REG_ARM64_X26; - case UNW_AARCH64_X27: - return PERF_REG_ARM64_X27; - case UNW_AARCH64_X28: - return PERF_REG_ARM64_X28; - case UNW_AARCH64_X29: - return PERF_REG_ARM64_X29; - case UNW_AARCH64_X30: - return PERF_REG_ARM64_LR; - case UNW_AARCH64_SP: - return PERF_REG_ARM64_SP; - case UNW_AARCH64_PC: - return PERF_REG_ARM64_PC; - default: - pr_err("unwind: invalid reg id %d\n", regnum); + if (regnum < 0 || regnum >= PERF_REG_ARM64_EXTENDED_MAX) return -EINVAL; - } - return -EINVAL; + return regnum; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 2eaac4638aab..06c2cdfd8f2f 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -811,18 +811,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, if (!cpu_wide && perf_can_record_cpu_wide()) { struct evsel *switch_evsel; - err = parse_events(evlist, "dummy:u", NULL); - if (err) - return err; + switch_evsel = evlist__add_dummy_on_all_cpus(evlist); + if (!switch_evsel) + return -ENOMEM; - switch_evsel = evlist__last(evlist); - - switch_evsel->core.attr.freq = 0; - switch_evsel->core.attr.sample_period = 1; switch_evsel->core.attr.context_switch = 1; - - switch_evsel->core.system_wide = true; - switch_evsel->no_aux_samples = true; switch_evsel->immediate = true; evsel__set_sample_bit(switch_evsel, TID); @@ -871,20 +864,22 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, /* Add dummy event to keep tracking */ if (opts->full_auxtrace) { + bool need_system_wide_tracking; struct evsel *tracking_evsel; - err = parse_events(evlist, "dummy:u", NULL); - if (err) - return err; + /* + * User space tasks can migrate between CPUs, so when tracing + * selected CPUs, sideband for all CPUs is still needed. + */ + need_system_wide_tracking = evlist->core.has_user_cpus && + !intel_pt_evsel->core.attr.exclude_user; - tracking_evsel = evlist__last(evlist); + tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking); + if (!tracking_evsel) + return -ENOMEM; evlist__set_tracking_event(evlist, tracking_evsel); - tracking_evsel->core.attr.freq = 0; - tracking_evsel->core.attr.sample_period = 1; - - tracking_evsel->no_aux_samples = true; if (need_immediate) tracking_evsel->immediate = true; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c8230c48125f..80b525c065ed 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2801,9 +2801,7 @@ static int perf_c2c__report(int argc, const char **argv) "the input file to process"), OPT_INCR('N', "node-info", &c2c.node_info, "show extra node info in report (repeat for more info)"), -#ifdef HAVE_SLANG_SUPPORT OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"), -#endif OPT_BOOLEAN(0, "stats", &c2c.stats_only, "Display only statistic tables (implies --stdio)"), OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, @@ -2833,6 +2831,10 @@ static int perf_c2c__report(int argc, const char **argv) if (argc) usage_with_options(report_c2c_usage, options); +#ifndef HAVE_SLANG_SUPPORT + c2c.use_stdio = true; +#endif + if (c2c.stats_only) c2c.use_stdio = true; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a5cf6a99d67f..9a71f0330137 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -49,6 +49,7 @@ #include "util/clockid.h" #include "util/pmu-hybrid.h" #include "util/evlist-hybrid.h" +#include "util/off_cpu.h" #include "asm/bug.h" #include "perf.h" #include "cputopo.h" @@ -162,6 +163,7 @@ struct record { bool buildid_mmap; bool timestamp_filename; bool timestamp_boundary; + bool off_cpu; struct switch_output switch_output; unsigned long long samples; unsigned long output_max_size; /* = 0: unlimited */ @@ -869,7 +871,6 @@ static int record__auxtrace_init(struct record *rec __maybe_unused) static int record__config_text_poke(struct evlist *evlist) { struct evsel *evsel; - int err; /* Nothing to do if text poke is already configured */ evlist__for_each_entry(evlist, evsel) { @@ -877,32 +878,23 @@ static int record__config_text_poke(struct evlist *evlist) return 0; } - err = parse_events(evlist, "dummy:u", NULL); - if (err) - return err; - - evsel = evlist__last(evlist); + evsel = evlist__add_dummy_on_all_cpus(evlist); + if (!evsel) + return -ENOMEM; - evsel->core.attr.freq = 0; - evsel->core.attr.sample_period = 1; evsel->core.attr.text_poke = 1; evsel->core.attr.ksymbol = 1; - - evsel->core.system_wide = true; - evsel->no_aux_samples = true; evsel->immediate = true; - - /* Text poke must be collected on all CPUs */ - perf_cpu_map__put(evsel->core.own_cpus); - evsel->core.own_cpus = perf_cpu_map__new(NULL); - perf_cpu_map__put(evsel->core.cpus); - evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); - evsel__set_sample_bit(evsel, TIME); return 0; } +static int record__config_off_cpu(struct record *rec) +{ + return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts); +} + static bool record__kcore_readable(struct machine *machine) { char kcore[PATH_MAX]; @@ -982,14 +974,20 @@ static void record__thread_data_close_pipes(struct record_thread *thread_data) } } +static bool evlist__per_thread(struct evlist *evlist) +{ + return cpu_map__is_dummy(evlist->core.user_requested_cpus); +} + static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist) { int m, tm, nr_mmaps = evlist->core.nr_mmaps; struct mmap *mmap = evlist->mmap; struct mmap *overwrite_mmap = evlist->overwrite_mmap; - struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; + struct perf_cpu_map *cpus = evlist->core.all_cpus; + bool per_thread = evlist__per_thread(evlist); - if (cpu_map__is_dummy(cpus)) + if (per_thread) thread_data->nr_mmaps = nr_mmaps; else thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits, @@ -1010,7 +1008,7 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps); for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) { - if (cpu_map__is_dummy(cpus) || + if (per_thread || test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) { if (thread_data->maps) { thread_data->maps[tm] = &mmap[m]; @@ -1885,7 +1883,7 @@ static int record__synthesize(struct record *rec, bool tail) return err; } - err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus, + err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus, process_synthesized_event, NULL); if (err < 0) { pr_err("Couldn't synthesize cpu map.\n"); @@ -2600,6 +2598,9 @@ out_free_threads: } else status = err; + if (rec->off_cpu) + rec->bytes_written += off_cpu_write(rec->session); + record__synthesize(rec, true); /* this will be recalculated during process_buildids() */ rec->samples = 0; @@ -3324,6 +3325,7 @@ static struct option __record_options[] = { OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec", "write collected trace data into several data files using parallel threads", record__parse_threads), + OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"), OPT_END() }; @@ -3683,12 +3685,12 @@ static int record__init_thread_default_masks(struct record *rec, struct perf_cpu static int record__init_thread_masks(struct record *rec) { int ret = 0; - struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus; + struct perf_cpu_map *cpus = rec->evlist->core.all_cpus; if (!record__threads_enabled(rec)) return record__init_thread_default_masks(rec, cpus); - if (cpu_map__is_dummy(cpus)) { + if (evlist__per_thread(rec->evlist)) { pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n"); return -EINVAL; } @@ -3745,6 +3747,12 @@ int cmd_record(int argc, const char **argv) # undef REASON #endif +#ifndef HAVE_BPF_SKEL +# define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c) + set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true); +# undef set_nobuild +#endif + rec->opts.affinity = PERF_AFFINITY_SYS; rec->evlist = evlist__new(); @@ -3981,6 +3989,14 @@ int cmd_record(int argc, const char **argv) } } + if (rec->off_cpu) { + err = record__config_off_cpu(rec); + if (err) { + pr_err("record__config_off_cpu failed, error %d\n", err); + goto out; + } + } + if (record_opts__config(&rec->opts)) { err = -EINVAL; goto out; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7e6cc8bdf061..4ce87a8eb7d7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -382,9 +382,6 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_ if (!counter->supported) return -ENOENT; - if (counter->core.system_wide) - nthreads = 1; - for (thread = 0; thread < nthreads; thread++) { struct perf_counts_values *count; @@ -2261,7 +2258,7 @@ static void setup_system_wide(int forks) struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { - if (!counter->core.system_wide && + if (!counter->core.requires_cpu && strcmp(counter->name, "duration_time")) { return; } diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index cee61c4ed59e..e597e4bac90f 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -605,7 +605,7 @@ static int json_events(const char *fn, } else if (json_streq(map, field, "ExtSel")) { char *code = NULL; addfield(map, &code, "", "", val); - eventcode |= strtoul(code, NULL, 0) << 21; + eventcode |= strtoul(code, NULL, 0) << 8; free(code); } else if (json_streq(map, field, "EventName")) { addfield(map, &je.name, "", "", val); diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py new file mode 100755 index 000000000000..5f57d9829956 --- /dev/null +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -0,0 +1,272 @@ +# SPDX-License-Identifier: GPL-2.0 +# arm-cs-trace-disasm.py: ARM CoreSight Trace Dump With Disassember +# +# Author: Tor Jeremiassen <tor@ti.com> +# Mathieu Poirier <mathieu.poirier@linaro.org> +# Leo Yan <leo.yan@linaro.org> +# Al Grant <Al.Grant@arm.com> + +from __future__ import print_function +import os +from os import path +import sys +import re +from subprocess import * +from optparse import OptionParser, make_option + +from perf_trace_context import perf_set_itrace_options, \ + perf_sample_insn, perf_sample_srccode + +# Below are some example commands for using this script. +# +# Output disassembly with objdump: +# perf script -s scripts/python/arm-cs-trace-disasm.py \ +# -- -d objdump -k path/to/vmlinux +# Output disassembly with llvm-objdump: +# perf script -s scripts/python/arm-cs-trace-disasm.py \ +# -- -d llvm-objdump-11 -k path/to/vmlinux +# Output only source line and symbols: +# perf script -s scripts/python/arm-cs-trace-disasm.py + +# Command line parsing. +option_list = [ + # formatting options for the bottom entry of the stack + make_option("-k", "--vmlinux", dest="vmlinux_name", + help="Set path to vmlinux file"), + make_option("-d", "--objdump", dest="objdump_name", + help="Set path to objdump executable file"), + make_option("-v", "--verbose", dest="verbose", + action="store_true", default=False, + help="Enable debugging log") +] + +parser = OptionParser(option_list=option_list) +(options, args) = parser.parse_args() + +# Initialize global dicts and regular expression +disasm_cache = dict() +cpu_data = dict() +disasm_re = re.compile("^\s*([0-9a-fA-F]+):") +disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:") +cache_size = 64*1024 + +glb_source_file_name = None +glb_line_number = None +glb_dso = None + +def get_optional(perf_dict, field): + if field in perf_dict: + return perf_dict[field] + return "[unknown]" + +def get_offset(perf_dict, field): + if field in perf_dict: + return f"+0x{perf_dict[field]:x}" + return "" + +def get_dso_file_path(dso_name, dso_build_id): + if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"): + if (options.vmlinux_name): + return options.vmlinux_name; + else: + return dso_name + + if (dso_name == "[vdso]") : + append = "/vdso" + else: + append = "/elf" + + dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}" + # Replace duplicate slash chars to single slash char + dso_path = dso_path.replace('//', '/', 1) + return dso_path + +def read_disam(dso_fname, dso_start, start_addr, stop_addr): + addr_range = str(start_addr) + ":" + str(stop_addr) + ":" + dso_fname + + # Don't let the cache get too big, clear it when it hits max size + if (len(disasm_cache) > cache_size): + disasm_cache.clear(); + + if addr_range in disasm_cache: + disasm_output = disasm_cache[addr_range]; + else: + start_addr = start_addr - dso_start; + stop_addr = stop_addr - dso_start; + disasm = [ options.objdump_name, "-d", "-z", + f"--start-address=0x{start_addr:x}", + f"--stop-address=0x{stop_addr:x}" ] + disasm += [ dso_fname ] + disasm_output = check_output(disasm).decode('utf-8').split('\n') + disasm_cache[addr_range] = disasm_output + + return disasm_output + +def print_disam(dso_fname, dso_start, start_addr, stop_addr): + for line in read_disam(dso_fname, dso_start, start_addr, stop_addr): + m = disasm_func_re.search(line) + if m is None: + m = disasm_re.search(line) + if m is None: + continue + print(f"\t{line}") + +def print_sample(sample): + print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \ + f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \ + f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}") + +def trace_begin(): + print('ARM CoreSight Trace Data Assembler Dump') + +def trace_end(): + print('End') + +def trace_unhandled(event_name, context, event_fields_dict): + print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])) + +def common_start_str(comm, sample): + sec = int(sample["time"] / 1000000000) + ns = sample["time"] % 1000000000 + cpu = sample["cpu"] + pid = sample["pid"] + tid = sample["tid"] + return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} " + +# This code is copied from intel-pt-events.py for printing source code +# line and symbols. +def print_srccode(comm, param_dict, sample, symbol, dso): + ip = sample["ip"] + if symbol == "[unknown]": + start_str = common_start_str(comm, sample) + ("%x" % ip).rjust(16).ljust(40) + else: + offs = get_offset(param_dict, "symoff") + start_str = common_start_str(comm, sample) + (symbol + offs).ljust(40) + + global glb_source_file_name + global glb_line_number + global glb_dso + + source_file_name, line_number, source_line = perf_sample_srccode(perf_script_context) + if source_file_name: + if glb_line_number == line_number and glb_source_file_name == source_file_name: + src_str = "" + else: + if len(source_file_name) > 40: + src_file = ("..." + source_file_name[-37:]) + " " + else: + src_file = source_file_name.ljust(41) + + if source_line is None: + src_str = src_file + str(line_number).rjust(4) + " <source not found>" + else: + src_str = src_file + str(line_number).rjust(4) + " " + source_line + glb_dso = None + elif dso == glb_dso: + src_str = "" + else: + src_str = dso + glb_dso = dso + + glb_line_number = line_number + glb_source_file_name = source_file_name + + print(f"{start_str}{src_str}") + +def process_event(param_dict): + global cache_size + global options + + sample = param_dict["sample"] + comm = param_dict["comm"] + + name = param_dict["ev_name"] + dso = get_optional(param_dict, "dso") + dso_bid = get_optional(param_dict, "dso_bid") + dso_start = get_optional(param_dict, "dso_map_start") + dso_end = get_optional(param_dict, "dso_map_end") + symbol = get_optional(param_dict, "symbol") + + if (options.verbose == True): + print(f"Event type: {name}") + print_sample(sample) + + # If cannot find dso so cannot dump assembler, bail out + if (dso == '[unknown]'): + return + + # Validate dso start and end addresses + if ((dso_start == '[unknown]') or (dso_end == '[unknown]')): + print(f"Failed to find valid dso map for dso {dso}") + return + + if (name[0:12] == "instructions"): + print_srccode(comm, param_dict, sample, symbol, dso) + return + + # Don't proceed if this event is not a branch sample, . + if (name[0:8] != "branches"): + return + + cpu = sample["cpu"] + ip = sample["ip"] + addr = sample["addr"] + + # Initialize CPU data if it's empty, and directly return back + # if this is the first tracing event for this CPU. + if (cpu_data.get(str(cpu) + 'addr') == None): + cpu_data[str(cpu) + 'addr'] = addr + return + + # The format for packet is: + # + # +------------+------------+------------+ + # sample_prev: | addr | ip | cpu | + # +------------+------------+------------+ + # sample_next: | addr | ip | cpu | + # +------------+------------+------------+ + # + # We need to combine the two continuous packets to get the instruction + # range for sample_prev::cpu: + # + # [ sample_prev::addr .. sample_next::ip ] + # + # For this purose, sample_prev::addr is stored into cpu_data structure + # and read back for 'start_addr' when the new packet comes, and we need + # to use sample_next::ip to calculate 'stop_addr', plusing extra 4 for + # 'stop_addr' is for the sake of objdump so the final assembler dump can + # include last instruction for sample_next::ip. + start_addr = cpu_data[str(cpu) + 'addr'] + stop_addr = ip + 4 + + # Record for previous sample packet + cpu_data[str(cpu) + 'addr'] = addr + + # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 + if (start_addr == 0 and stop_addr == 4): + print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted") + return + + if (start_addr < int(dso_start) or start_addr > int(dso_end)): + print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}") + return + + if (stop_addr < int(dso_start) or stop_addr > int(dso_end)): + print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}") + return + + if (options.objdump_name != None): + # It doesn't need to decrease virtual memory offset for disassembly + # for kernel dso, so in this case we set vm_start to zero. + if (dso == "[kernel.kallsyms]"): + dso_vm_start = 0 + else: + dso_vm_start = int(dso_start) + + dso_fname = get_dso_file_path(dso, dso_bid) + if path.exists(dso_fname): + print_disam(dso_fname, dso_vm_start, start_addr, stop_addr) + else: + print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]") + + print_srccode(comm, param_dict, sample, symbol, dso) diff --git a/tools/perf/tests/shell/lib/perf_csv_output_lint.py b/tools/perf/tests/shell/lib/perf_csv_output_lint.py new file mode 100644 index 000000000000..714f283cfb1b --- /dev/null +++ b/tools/perf/tests/shell/lib/perf_csv_output_lint.py @@ -0,0 +1,48 @@ +#!/usr/bin/python +# SPDX-License-Identifier: GPL-2.0 + +import argparse +import sys + +# Basic sanity check of perf CSV output as specified in the man page. +# Currently just checks the number of fields per line in output. + +ap = argparse.ArgumentParser() +ap.add_argument('--no-args', action='store_true') +ap.add_argument('--interval', action='store_true') +ap.add_argument('--system-wide-no-aggr', action='store_true') +ap.add_argument('--system-wide', action='store_true') +ap.add_argument('--event', action='store_true') +ap.add_argument('--per-core', action='store_true') +ap.add_argument('--per-thread', action='store_true') +ap.add_argument('--per-die', action='store_true') +ap.add_argument('--per-node', action='store_true') +ap.add_argument('--per-socket', action='store_true') +ap.add_argument('--separator', default=',', nargs='?') +args = ap.parse_args() + +Lines = sys.stdin.readlines() + +def check_csv_output(exp): + for line in Lines: + if 'failed' not in line: + count = line.count(args.separator) + if count != exp: + sys.stdout.write(''.join(Lines)) + raise RuntimeError(f'wrong number of fields. expected {exp} in {line}') + +try: + if args.no_args or args.system_wide or args.event: + expected_items = 6 + elif args.interval or args.per_thread or args.system_wide_no_aggr: + expected_items = 7 + elif args.per_core or args.per_socket or args.per_node or args.per_die: + expected_items = 8 + else: + ap.print_help() + raise RuntimeError('No checking option specified') + check_csv_output(expected_items) + +except: + sys.stdout.write('Test failed for input: ' + ''.join(Lines)) + raise diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh new file mode 100755 index 000000000000..96e0739f7478 --- /dev/null +++ b/tools/perf/tests/shell/record_offcpu.sh @@ -0,0 +1,60 @@ +#!/bin/sh +# perf record offcpu profiling tests +# SPDX-License-Identifier: GPL-2.0 + +set -e + +err=0 +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) + +cleanup() { + rm -f ${perfdata} + rm -f ${perfdata}.old + trap - exit term int +} + +trap_cleanup() { + cleanup + exit 1 +} +trap trap_cleanup exit term int + +test_offcpu() { + echo "Basic off-cpu test" + if [ `id -u` != 0 ] + then + echo "Basic off-cpu test [Skipped permission]" + err=2 + return + fi + if perf record --off-cpu -o ${perfdata} --quiet true 2>&1 | grep BUILD_BPF_SKEL + then + echo "Basic off-cpu test [Skipped missing BPF support]" + err=2 + return + fi + if ! perf record --off-cpu -e dummy -o ${perfdata} sleep 1 2> /dev/null + then + echo "Basic off-cpu test [Failed record]" + err=1 + return + fi + if ! perf evlist -i ${perfdata} | grep -q "offcpu-time" + then + echo "Basic off-cpu test [Failed record]" + err=1 + return + fi + if ! perf report -i ${perfdata} -q --percent-limit=90 | egrep -q sleep + then + echo "Basic off-cpu test [Failed missing output]" + err=1 + return + fi + echo "Basic off-cpu test [Success]" +} + +test_offcpu + +cleanup +exit $err diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh new file mode 100755 index 000000000000..983220ef3cb4 --- /dev/null +++ b/tools/perf/tests/shell/stat+csv_output.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# perf stat CSV output linter +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +# Tests various perf stat CSV output commands for the +# correct number of fields and the CSV separator set to ','. + +set -e + +pythonchecker=$(dirname $0)/lib/perf_csv_output_lint.py +if [ "x$PYTHON" == "x" ] +then + if which python3 > /dev/null + then + PYTHON=python3 + elif which python > /dev/null + then + PYTHON=python + else + echo Skipping test, python not detected please set environment variable PYTHON. + exit 2 + fi +fi + +# Return true if perf_event_paranoid is > $1 and not running as root. +function ParanoidAndNotRoot() +{ + [ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ] +} + +check_no_args() +{ + echo -n "Checking CSV output: no args " + perf stat -x, true 2>&1 | $PYTHON $pythonchecker --no-args + echo "[Success]" +} + +check_system_wide() +{ + echo -n "Checking CSV output: system wide " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, -a true 2>&1 | $PYTHON $pythonchecker --system-wide + echo "[Success]" +} + +check_system_wide_no_aggr() +{ + echo -n "Checking CSV output: system wide " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + echo -n "Checking CSV output: system wide no aggregation " + perf stat -x, -A -a --no-merge true 2>&1 | $PYTHON $pythonchecker --system-wide-no-aggr + echo "[Success]" +} + +check_interval() +{ + echo -n "Checking CSV output: interval " + perf stat -x, -I 1000 true 2>&1 | $PYTHON $pythonchecker --interval + echo "[Success]" +} + + +check_event() +{ + echo -n "Checking CSV output: event " + perf stat -x, -e cpu-clock true 2>&1 | $PYTHON $pythonchecker --event + echo "[Success]" +} + +check_per_core() +{ + echo -n "Checking CSV output: per core " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, --per-core -a true 2>&1 | $PYTHON $pythonchecker --per-core + echo "[Success]" +} + +check_per_thread() +{ + echo -n "Checking CSV output: per thread " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, --per-thread -a true 2>&1 | $PYTHON $pythonchecker --per-thread + echo "[Success]" +} + +check_per_die() +{ + echo -n "Checking CSV output: per die " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, --per-die -a true 2>&1 | $PYTHON $pythonchecker --per-die + echo "[Success]" +} + +check_per_node() +{ + echo -n "Checking CSV output: per node " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, --per-node -a true 2>&1 | $PYTHON $pythonchecker --per-node + echo "[Success]" +} + +check_per_socket() +{ + echo -n "Checking CSV output: per socket " + if ParanoidAndNotRoot 0 + then + echo "[Skip] paranoid and not root" + return + fi + perf stat -x, --per-socket -a true 2>&1 | $PYTHON $pythonchecker --per-socket + echo "[Success]" +} + +check_no_args +check_system_wide +check_system_wide_no_aggr +check_interval +check_event +check_per_core +check_per_thread +check_per_die +check_per_node +check_per_socket +exit 0 diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh new file mode 100755 index 000000000000..a3298643884d --- /dev/null +++ b/tools/perf/tests/shell/test_intel_pt.sh @@ -0,0 +1,71 @@ +#!/bin/sh +# Miscellaneous Intel PT testing +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# Skip if no Intel PT +perf list | grep -q 'intel_pt//' || exit 2 + +skip_cnt=0 +ok_cnt=0 +err_cnt=0 + +tmpfile=`mktemp` +perfdatafile=`mktemp` + +can_cpu_wide() +{ + perf record -o ${tmpfile} -B -N --no-bpf-event -e dummy:u -C $1 true 2>&1 >/dev/null || return 2 + return 0 +} + +test_system_wide_side_band() +{ + # Need CPU 0 and CPU 1 + can_cpu_wide 0 || return $? + can_cpu_wide 1 || return $? + + # Record on CPU 0 a task running on CPU 1 + perf record -B -N --no-bpf-event -o ${perfdatafile} -e intel_pt//u -C 0 -- taskset --cpu-list 1 uname + + # Should get MMAP events from CPU 1 because they can be needed to decode + mmap_cnt=`perf script -i ${perfdatafile} --no-itrace --show-mmap-events -C 1 2>/dev/null | grep MMAP | wc -l` + + if [ ${mmap_cnt} -gt 0 ] ; then + return 0 + fi + + echo "Failed to record MMAP events on CPU 1 when tracing CPU 0" + return 1 +} + +count_result() +{ + if [ $1 -eq 2 ] ; then + skip_cnt=`expr ${skip_cnt} \+ 1` + return + fi + if [ $1 -eq 0 ] ; then + ok_cnt=`expr ${ok_cnt} \+ 1` + return + fi + err_cnt=`expr ${err_cnt} \+ 1` +} + +test_system_wide_side_band + +count_result $? + +rm -f ${tmpfile} +rm -f ${perfdatafile} + +if [ ${err_cnt} -gt 0 ] ; then + exit 1 +fi + +if [ ${ok_cnt} -gt 0 ] ; then + exit 0 +fi + +exit 2 diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 9a7209a99e16..a51267d88ca9 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -147,6 +147,7 @@ perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index b11549ae39df..511dd3caa1bc 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -125,7 +125,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, mm->tid = mp->tid; mm->cpu = mp->cpu.cpu; - if (!mp->len) { + if (!mp->len || !mp->mmap_needed) { mm->base = NULL; return 0; } @@ -168,13 +168,20 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, } void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, - bool per_cpu) + struct evlist *evlist, + struct evsel *evsel, int idx) { + bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + + mp->mmap_needed = evsel->needs_auxtrace_mmap; + + if (!mp->mmap_needed) + return; + mp->idx = idx; if (per_cpu) { - mp->cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, idx); + mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); if (evlist->core.threads) mp->tid = perf_thread_map__pid(evlist->core.threads, 0); else diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index dc38b6f57232..cd0d25c2751c 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -344,6 +344,10 @@ struct auxtrace_mmap { * @idx: index of this mmap * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu * mmap) otherwise %0 + * @mmap_needed: set to %false for non-auxtrace events. This is needed because + * auxtrace mmapping is done in the same code path as non-auxtrace + * mmapping but not every evsel that needs non-auxtrace mmapping + * also needs auxtrace mmapping. * @cpu: cpu number for a per-cpu mmap otherwise %-1 */ struct auxtrace_mmap_params { @@ -353,6 +357,7 @@ struct auxtrace_mmap_params { int prot; int idx; pid_t tid; + bool mmap_needed; struct perf_cpu cpu; }; @@ -490,8 +495,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, unsigned int auxtrace_pages, bool auxtrace_overwrite); void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, - bool per_cpu); + struct evlist *evlist, + struct evsel *evsel, int idx); typedef int (*process_auxtrace_t)(struct perf_tool *tool, struct mmap *map, @@ -863,8 +868,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, unsigned int auxtrace_pages, bool auxtrace_overwrite); void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, int idx, - bool per_cpu); + struct evlist *evlist, + struct evsel *evsel, int idx); #define ITRACE_HELP "" diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 8271ab764eb5..eee64ddb766d 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -35,11 +35,12 @@ struct btf *btf__load_from_kernel_by_id(__u32 id) } #endif -int __weak bpf_prog_load(enum bpf_prog_type prog_type, - const char *prog_name __maybe_unused, - const char *license, - const struct bpf_insn *insns, size_t insn_cnt, - const struct bpf_prog_load_opts *opts) +#ifndef HAVE_LIBBPF_BPF_PROG_LOAD +int bpf_prog_load(enum bpf_prog_type prog_type, + const char *prog_name __maybe_unused, + const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts) { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -47,8 +48,10 @@ int __weak bpf_prog_load(enum bpf_prog_type prog_type, opts->kern_version, opts->log_buf, opts->log_size); #pragma GCC diagnostic pop } +#endif -struct bpf_program * __weak +#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM +struct bpf_program * bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) { #pragma GCC diagnostic push @@ -56,8 +59,10 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev) return bpf_program__next(prev, obj); #pragma GCC diagnostic pop } +#endif -struct bpf_map * __weak +#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_MAP +struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { #pragma GCC diagnostic push @@ -65,8 +70,10 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) return bpf_map__next(prev, obj); #pragma GCC diagnostic pop } +#endif -const void * __weak +#ifndef HAVE_LIBBPF_BTF__RAW_DATA +const void * btf__raw_data(const struct btf *btf_ro, __u32 *size) { #pragma GCC diagnostic push @@ -74,6 +81,7 @@ btf__raw_data(const struct btf *btf_ro, __u32 *size) return btf__get_raw_data(btf_ro, size); #pragma GCC diagnostic pop } +#endif static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) { diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index d4931f54e1dd..ef1c15e4aeba 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -312,7 +312,10 @@ static bool bperf_attr_map_compatible(int attr_map_fd) (map_info.value_size == sizeof(struct perf_event_attr_map_entry)); } -int __weak +#ifndef HAVE_LIBBPF_BPF_MAP_CREATE +LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags); +int bpf_map_create(enum bpf_map_type map_type, const char *map_name __maybe_unused, __u32 key_size, @@ -325,6 +328,7 @@ bpf_map_create(enum bpf_map_type map_type, return bpf_create_map(map_type, key_size, value_size, max_entries, 0); #pragma GCC diagnostic pop } +#endif static int bperf_lock_attr_map(struct target *target) { diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c new file mode 100644 index 000000000000..b73e84a02264 --- /dev/null +++ b/tools/perf/util/bpf_off_cpu.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "util/bpf_counter.h" +#include "util/debug.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/off_cpu.h" +#include "util/perf-hooks.h" +#include "util/record.h" +#include "util/session.h" +#include "util/target.h" +#include "util/cpumap.h" +#include "util/thread_map.h" +#include "util/cgroup.h" +#include <bpf/bpf.h> + +#include "bpf_skel/off_cpu.skel.h" + +#define MAX_STACKS 32 +/* we don't need actual timestamp, just want to put the samples at last */ +#define OFF_CPU_TIMESTAMP (~0ull << 32) + +static struct off_cpu_bpf *skel; + +struct off_cpu_key { + u32 pid; + u32 tgid; + u32 stack_id; + u32 state; + u64 cgroup_id; +}; + +union off_cpu_data { + struct perf_event_header hdr; + u64 array[1024 / sizeof(u64)]; +}; + +static int off_cpu_config(struct evlist *evlist) +{ + struct evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + .size = sizeof(attr), /* to capture ABI version */ + }; + char *evname = strdup(OFFCPU_EVENT); + + if (evname == NULL) + return -ENOMEM; + + evsel = evsel__new(&attr); + if (!evsel) { + free(evname); + return -ENOMEM; + } + + evsel->core.attr.freq = 1; + evsel->core.attr.sample_period = 1; + /* off-cpu analysis depends on stack trace */ + evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; + + evlist__add(evlist, evsel); + + free(evsel->name); + evsel->name = evname; + + return 0; +} + +static void off_cpu_start(void *arg) +{ + struct evlist *evlist = arg; + + /* update task filter for the given workload */ + if (!skel->bss->has_cpu && !skel->bss->has_task && + perf_thread_map__pid(evlist->core.threads, 0) != -1) { + int fd; + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + pid = perf_thread_map__pid(evlist->core.threads, 0); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + + skel->bss->enabled = 1; +} + +static void off_cpu_finish(void *arg __maybe_unused) +{ + skel->bss->enabled = 0; + off_cpu_bpf__destroy(skel); +} + +/* v5.18 kernel added prev_state arg, so it needs to check the signature */ +static void check_sched_switch_args(void) +{ + const struct btf *btf = bpf_object__btf(skel->obj); + const struct btf_type *t1, *t2, *t3; + u32 type_id; + + type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch", + BTF_KIND_TYPEDEF); + if ((s32)type_id < 0) + return; + + t1 = btf__type_by_id(btf, type_id); + if (t1 == NULL) + return; + + t2 = btf__type_by_id(btf, t1->type); + if (t2 == NULL || !btf_is_ptr(t2)) + return; + + t3 = btf__type_by_id(btf, t2->type); + if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) { + /* new format: pass prev_state as 4th arg */ + skel->rodata->has_prev_state = true; + } +} + +int off_cpu_prepare(struct evlist *evlist, struct target *target, + struct record_opts *opts) +{ + int err, fd, i; + int ncpus = 1, ntasks = 1, ncgrps = 1; + + if (off_cpu_config(evlist) < 0) { + pr_err("Failed to config off-cpu BPF event\n"); + return -1; + } + + skel = off_cpu_bpf__open(); + if (!skel) { + pr_err("Failed to open off-cpu BPF skeleton\n"); + return -1; + } + + /* don't need to set cpu filter for system-wide mode */ + if (target->cpu_list) { + ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); + bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + } + + if (target__has_task(target)) { + ntasks = perf_thread_map__nr(evlist->core.threads); + bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + } + + if (evlist__first(evlist)->cgrp) { + ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */ + bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); + + if (!cgroup_is_v2("perf_event")) + skel->rodata->uses_cgroup_v1 = true; + } + + if (opts->record_cgroup) { + skel->rodata->needs_cgroup = true; + + if (!cgroup_is_v2("perf_event")) + skel->rodata->uses_cgroup_v1 = true; + } + + set_max_rlimit(); + check_sched_switch_args(); + + err = off_cpu_bpf__load(skel); + if (err) { + pr_err("Failed to load off-cpu skeleton\n"); + goto out; + } + + if (target->cpu_list) { + u32 cpu; + u8 val = 1; + + skel->bss->has_cpu = 1; + fd = bpf_map__fd(skel->maps.cpu_filter); + + for (i = 0; i < ncpus; i++) { + cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu; + bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); + } + } + + if (target__has_task(target)) { + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + + for (i = 0; i < ntasks; i++) { + pid = perf_thread_map__pid(evlist->core.threads, i); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + } + + if (evlist__first(evlist)->cgrp) { + struct evsel *evsel; + u8 val = 1; + + skel->bss->has_cgroup = 1; + fd = bpf_map__fd(skel->maps.cgroup_filter); + + evlist__for_each_entry(evlist, evsel) { + struct cgroup *cgrp = evsel->cgrp; + + if (cgrp == NULL) + continue; + + if (!cgrp->id && read_cgroup_id(cgrp) < 0) { + pr_err("Failed to read cgroup id of %s\n", + cgrp->name); + goto out; + } + + bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY); + } + } + + err = off_cpu_bpf__attach(skel); + if (err) { + pr_err("Failed to attach off-cpu BPF skeleton\n"); + goto out; + } + + if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) || + perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) { + pr_err("Failed to attach off-cpu skeleton\n"); + goto out; + } + + return 0; + +out: + off_cpu_bpf__destroy(skel); + return -1; +} + +int off_cpu_write(struct perf_session *session) +{ + int bytes = 0, size; + int fd, stack; + u64 sample_type, val, sid = 0; + struct evsel *evsel; + struct perf_data_file *file = &session->data->file; + struct off_cpu_key prev, key; + union off_cpu_data data = { + .hdr = { + .type = PERF_RECORD_SAMPLE, + .misc = PERF_RECORD_MISC_USER, + }, + }; + u64 tstamp = OFF_CPU_TIMESTAMP; + + skel->bss->enabled = 0; + + evsel = evlist__find_evsel_by_str(session->evlist, OFFCPU_EVENT); + if (evsel == NULL) { + pr_err("%s evsel not found\n", OFFCPU_EVENT); + return 0; + } + + sample_type = evsel->core.attr.sample_type; + + if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) { + if (evsel->core.id) + sid = evsel->core.id[0]; + } + + fd = bpf_map__fd(skel->maps.off_cpu); + stack = bpf_map__fd(skel->maps.stacks); + memset(&prev, 0, sizeof(prev)); + + while (!bpf_map_get_next_key(fd, &prev, &key)) { + int n = 1; /* start from perf_event_header */ + int ip_pos = -1; + + bpf_map_lookup_elem(fd, &key, &val); + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + data.array[n++] = sid; + if (sample_type & PERF_SAMPLE_IP) { + ip_pos = n; + data.array[n++] = 0; /* will be updated */ + } + if (sample_type & PERF_SAMPLE_TID) + data.array[n++] = (u64)key.pid << 32 | key.tgid; + if (sample_type & PERF_SAMPLE_TIME) + data.array[n++] = tstamp; + if (sample_type & PERF_SAMPLE_ID) + data.array[n++] = sid; + if (sample_type & PERF_SAMPLE_CPU) + data.array[n++] = 0; + if (sample_type & PERF_SAMPLE_PERIOD) + data.array[n++] = val; + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + int len = 0; + + /* data.array[n] is callchain->nr (updated later) */ + data.array[n + 1] = PERF_CONTEXT_USER; + data.array[n + 2] = 0; + + bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]); + while (data.array[n + 2 + len]) + len++; + + /* update length of callchain */ + data.array[n] = len + 1; + + /* update sample ip with the first callchain entry */ + if (ip_pos >= 0) + data.array[ip_pos] = data.array[n + 2]; + + /* calculate sample callchain data array length */ + n += len + 2; + } + if (sample_type & PERF_SAMPLE_CGROUP) + data.array[n++] = key.cgroup_id; + /* TODO: handle more sample types */ + + size = n * sizeof(u64); + data.hdr.size = size; + bytes += size; + + if (perf_data_file__write(file, &data, size) < 0) { + pr_err("failed to write perf data, error: %m\n"); + return bytes; + } + + prev = key; + /* increase dummy timestamp to sort later samples */ + tstamp++; + } + return bytes; +} diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c new file mode 100644 index 000000000000..792ae2847080 --- /dev/null +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2022 Google +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* task->flags for off-cpu analysis */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ + +/* task->state for off-cpu analysis */ +#define TASK_INTERRUPTIBLE 0x0001 +#define TASK_UNINTERRUPTIBLE 0x0002 + +#define MAX_STACKS 32 +#define MAX_ENTRIES 102400 + +struct tstamp_data { + __u32 stack_id; + __u32 state; + __u64 timestamp; +}; + +struct offcpu_key { + __u32 pid; + __u32 tgid; + __u32 stack_id; + __u32 state; + __u64 cgroup_id; +}; + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(key_size, sizeof(__u32)); + __uint(value_size, MAX_STACKS * sizeof(__u64)); + __uint(max_entries, MAX_ENTRIES); +} stacks SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tstamp_data); +} tstamp SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct offcpu_key)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, MAX_ENTRIES); +} off_cpu SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cpu_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} task_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cgroup_filter SEC(".maps"); + +/* old kernel task_struct definition */ +struct task_struct___old { + long state; +} __attribute__((preserve_access_index)); + +int enabled = 0; +int has_cpu = 0; +int has_task = 0; +int has_cgroup = 0; + +const volatile bool has_prev_state = false; +const volatile bool needs_cgroup = false; +const volatile bool uses_cgroup_v1 = false; + +/* + * Old kernel used to call it task_struct->state and now it's '__state'. + * Use BPF CO-RE "ignored suffix rule" to deal with it like below: + * + * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes + */ +static inline int get_task_state(struct task_struct *t) +{ + if (bpf_core_field_exists(t->__state)) + return BPF_CORE_READ(t, __state); + + /* recast pointer to capture task_struct___old type for compiler */ + struct task_struct___old *t_old = (void *)t; + + /* now use old "state" name of the field */ + return BPF_CORE_READ(t_old, state); +} + +static inline __u64 get_cgroup_id(struct task_struct *t) +{ + struct cgroup *cgrp; + + if (uses_cgroup_v1) + cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup); + else + cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp); + + return BPF_CORE_READ(cgrp, kn, id); +} + +static inline int can_record(struct task_struct *t, int state) +{ + /* kernel threads don't have user stack */ + if (t->flags & PF_KTHREAD) + return 0; + + if (state != TASK_INTERRUPTIBLE && + state != TASK_UNINTERRUPTIBLE) + return 0; + + if (has_cpu) { + __u32 cpu = bpf_get_smp_processor_id(); + __u8 *ok; + + ok = bpf_map_lookup_elem(&cpu_filter, &cpu); + if (!ok) + return 0; + } + + if (has_task) { + __u8 *ok; + __u32 pid = t->pid; + + ok = bpf_map_lookup_elem(&task_filter, &pid); + if (!ok) + return 0; + } + + if (has_cgroup) { + __u8 *ok; + __u64 cgrp_id = get_cgroup_id(t); + + ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id); + if (!ok) + return 0; + } + + return 1; +} + +static int off_cpu_stat(u64 *ctx, struct task_struct *prev, + struct task_struct *next, int state) +{ + __u64 ts; + __u32 stack_id; + struct tstamp_data *pelem; + + ts = bpf_ktime_get_ns(); + + if (!can_record(prev, state)) + goto next; + + stack_id = bpf_get_stackid(ctx, &stacks, + BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK); + + pelem = bpf_task_storage_get(&tstamp, prev, NULL, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!pelem) + goto next; + + pelem->timestamp = ts; + pelem->state = state; + pelem->stack_id = stack_id; + +next: + pelem = bpf_task_storage_get(&tstamp, next, NULL, 0); + + if (pelem && pelem->timestamp) { + struct offcpu_key key = { + .pid = next->pid, + .tgid = next->tgid, + .stack_id = pelem->stack_id, + .state = pelem->state, + .cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0, + }; + __u64 delta = ts - pelem->timestamp; + __u64 *total; + + total = bpf_map_lookup_elem(&off_cpu, &key); + if (total) + *total += delta; + else + bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + + /* prevent to reuse the timestamp later */ + pelem->timestamp = 0; + } + + return 0; +} + +SEC("tp_btf/sched_switch") +int on_switch(u64 *ctx) +{ + struct task_struct *prev, *next; + int prev_state; + + if (!enabled) + return 0; + + prev = (struct task_struct *)ctx[1]; + next = (struct task_struct *)ctx[2]; + + if (has_prev_state) + prev_state = (int)ctx[3]; + else + prev_state = get_task_state(prev); + + return off_cpu_stat(ctx, prev, next, prev_state); +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 7f9f588e88c6..48af7d379d82 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -242,14 +242,20 @@ int __evlist__add_default(struct evlist *evlist, bool precise) return 0; } -int evlist__add_dummy(struct evlist *evlist) +static struct evsel *evlist__dummy_event(struct evlist *evlist) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ }; - struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries); + + return evsel__new_idx(&attr, evlist->core.nr_entries); +} + +int evlist__add_dummy(struct evlist *evlist) +{ + struct evsel *evsel = evlist__dummy_event(evlist); if (evsel == NULL) return -ENOMEM; @@ -258,6 +264,51 @@ int evlist__add_dummy(struct evlist *evlist) return 0; } +static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel) +{ + evsel->core.system_wide = true; + + /* + * All CPUs. + * + * Note perf_event_open() does not accept CPUs that are not online, so + * in fact this CPU list will include only all online CPUs. + */ + perf_cpu_map__put(evsel->core.own_cpus); + evsel->core.own_cpus = perf_cpu_map__new(NULL); + perf_cpu_map__put(evsel->core.cpus); + evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); + + /* No threads */ + perf_thread_map__put(evsel->core.threads); + evsel->core.threads = perf_thread_map__new_dummy(); + + evlist__add(evlist, evsel); +} + +struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) +{ + struct evsel *evsel = evlist__dummy_event(evlist); + + if (!evsel) + return NULL; + + evsel->core.attr.exclude_kernel = 1; + evsel->core.attr.exclude_guest = 1; + evsel->core.attr.exclude_hv = 1; + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; + evsel->no_aux_samples = true; + evsel->name = strdup("dummy:u"); + + if (system_wide) + evlist__add_on_all_cpus(evlist, evsel); + else + evlist__add(evlist, evsel); + + return evsel; +} + static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { struct evsel *evsel, *n; @@ -747,15 +798,15 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist, static void perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist, - struct perf_evsel *_evsel __maybe_unused, + struct perf_evsel *_evsel, struct perf_mmap_param *_mp, int idx) { struct evlist *evlist = container_of(_evlist, struct evlist, core); struct mmap_params *mp = container_of(_mp, struct mmap_params, core); - bool per_cpu = !perf_cpu_map__empty(_evlist->user_requested_cpus); + struct evsel *evsel = container_of(_evsel, struct evsel, core); - auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu); + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx); } static struct perf_mmap* diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 4062f5aebfc1..1bde9ccf4e7d 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -114,6 +114,11 @@ int arch_evlist__add_default_attrs(struct evlist *evlist); struct evsel *arch_evlist__leader(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); +struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide); +static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist) +{ + return evlist__add_aux_dummy(evlist, true); +} int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, evsel__sb_cb_t cb, void *data); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ef169ad15236..ce499c5da8d7 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -296,8 +296,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) return NULL; evsel__init(evsel, attr, idx); - if (evsel__is_bpf_output(evsel)) { - evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + if (evsel__is_bpf_output(evsel) && !attr->sample_type) { + evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), evsel->core.attr.sample_period = 1; } @@ -409,6 +409,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->core.threads = perf_thread_map__get(orig->core.threads); evsel->core.nr_members = orig->core.nr_members; evsel->core.system_wide = orig->core.system_wide; + evsel->core.requires_cpu = orig->core.requires_cpu; if (orig->name) { evsel->name = strdup(orig->name); @@ -896,7 +897,7 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o "specifying a subset with --user-regs may render DWARF unwinding unreliable, " "so the minimal registers set (IP, SP) is explicitly forced.\n"); } else { - attr->sample_regs_user |= PERF_REGS_MASK; + attr->sample_regs_user |= arch__user_reg_mask(); } attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 15f60fd09424..014d82159656 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -24,7 +24,7 @@ #include "unwind.h" #include "libunwind-aarch64.h" #define perf_event_arm_regs perf_event_arm64_regs -#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> +#include <../../../arch/arm64/include/uapi/asm/perf_regs.h> #undef perf_event_arm_regs #include "../../arch/arm64/util/unwind-libunwind.c" diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 50502b4a7ca4..a4dff881be39 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -62,8 +62,8 @@ void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_u void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused, struct evlist *evlist __maybe_unused, - int idx __maybe_unused, - bool per_cpu __maybe_unused) + struct evsel *evsel __maybe_unused, + int idx __maybe_unused) { } diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h new file mode 100644 index 000000000000..548008f74d42 --- /dev/null +++ b/tools/perf/util/off_cpu.h @@ -0,0 +1,29 @@ +#ifndef PERF_UTIL_OFF_CPU_H +#define PERF_UTIL_OFF_CPU_H + +struct evlist; +struct target; +struct perf_session; +struct record_opts; + +#define OFFCPU_EVENT "offcpu-time" + +#ifdef HAVE_BPF_SKEL +int off_cpu_prepare(struct evlist *evlist, struct target *target, + struct record_opts *opts); +int off_cpu_write(struct perf_session *session); +#else +static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused, + struct target *target __maybe_unused, + struct record_opts *opts __maybe_unused) +{ + return -1; +} + +static inline int off_cpu_write(struct perf_session *session __maybe_unused) +{ + return -1; +} +#endif + +#endif /* PERF_UTIL_OFF_CPU_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 30a9d915853d..7ed235740431 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -365,7 +365,7 @@ __add_event(struct list_head *list, int *idx, (*idx)++; evsel->core.cpus = cpus; evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->core.system_wide = pmu ? pmu->is_uncore : false; + evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; if (name) diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index a982e40ee5a9..872dd3d38782 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -103,6 +103,8 @@ static const char *__perf_reg_name_arm64(int id) return "lr"; case PERF_REG_ARM64_PC: return "pc"; + case PERF_REG_ARM64_VG: + return "vg"; default: return NULL; } diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index a685d20165f7..aa5156c2bcff 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -38,5 +38,6 @@ util/units.c util/affinity.c util/rwsem.c util/hashmap.c +util/perf_regs.c util/pmu-hybrid.c util/fncache.c diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 659eb4e4b34b..adba01b7d9dd 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -755,12 +755,22 @@ static void set_regs_in_dict(PyObject *dict, } static void set_sym_in_dict(PyObject *dict, struct addr_location *al, - const char *dso_field, const char *sym_field, - const char *symoff_field) + const char *dso_field, const char *dso_bid_field, + const char *dso_map_start, const char *dso_map_end, + const char *sym_field, const char *symoff_field) { + char sbuild_id[SBUILD_ID_SIZE]; + if (al->map) { pydict_set_item_string_decref(dict, dso_field, _PyUnicode_FromString(al->map->dso->name)); + build_id__sprintf(&al->map->dso->bid, sbuild_id); + pydict_set_item_string_decref(dict, dso_bid_field, + _PyUnicode_FromString(sbuild_id)); + pydict_set_item_string_decref(dict, dso_map_start, + PyLong_FromUnsignedLong(al->map->start)); + pydict_set_item_string_decref(dict, dso_map_end, + PyLong_FromUnsignedLong(al->map->end)); } if (al->sym) { pydict_set_item_string_decref(dict, sym_field, @@ -840,7 +850,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, (const char *)sample->raw_data, sample->raw_size)); pydict_set_item_string_decref(dict, "comm", _PyUnicode_FromString(thread__comm_str(al->thread))); - set_sym_in_dict(dict, al, "dso", "symbol", "symoff"); + set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", + "symbol", "symoff"); pydict_set_item_string_decref(dict, "callchain", callchain); @@ -856,7 +867,9 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, if (addr_al) { pydict_set_item_string_decref(dict_sample, "addr_correlates_sym", PyBool_FromLong(1)); - set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_symbol", "addr_symoff"); + set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", + "addr_dso_map_start", "addr_dso_map_end", + "addr_symbol", "addr_symoff"); } if (sample->flags) diff --git a/tools/testing/crypto/chacha20-s390/Makefile b/tools/testing/crypto/chacha20-s390/Makefile new file mode 100644 index 000000000000..db81cd2fb9c5 --- /dev/null +++ b/tools/testing/crypto/chacha20-s390/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2022 Red Hat, Inc. +# Author: Vladis Dronov <vdronoff@gmail.com> + +obj-m += test_cipher.o +test_cipher-y := test-cipher.o + +all: + make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules +clean: + make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean diff --git a/tools/testing/crypto/chacha20-s390/run-tests.sh b/tools/testing/crypto/chacha20-s390/run-tests.sh new file mode 100644 index 000000000000..43108794b996 --- /dev/null +++ b/tools/testing/crypto/chacha20-s390/run-tests.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2022 Red Hat, Inc. +# Author: Vladis Dronov <vdronoff@gmail.com> +# +# This script runs (via instmod) test-cipher.ko module which invokes +# generic and s390-native ChaCha20 encryprion algorithms with different +# size of data. Check 'dmesg' for results. +# +# The insmod error is expected: +# insmod: ERROR: could not insert module test_cipher.ko: Operation not permitted + +lsmod | grep chacha | cut -f1 -d' ' | xargs rmmod +modprobe chacha_generic +modprobe chacha_s390 + +# run encryption for different data size, including whole block(s) +/- 1 +insmod test_cipher.ko size=63 +insmod test_cipher.ko size=64 +insmod test_cipher.ko size=65 +insmod test_cipher.ko size=127 +insmod test_cipher.ko size=128 +insmod test_cipher.ko size=129 +insmod test_cipher.ko size=511 +insmod test_cipher.ko size=512 +insmod test_cipher.ko size=513 +insmod test_cipher.ko size=4096 +insmod test_cipher.ko size=65611 +insmod test_cipher.ko size=6291456 +insmod test_cipher.ko size=62914560 + +# print test logs +dmesg | tail -170 diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c new file mode 100644 index 000000000000..34e8b855266f --- /dev/null +++ b/tools/testing/crypto/chacha20-s390/test-cipher.c @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2022 Red Hat, Inc. + * Author: Vladis Dronov <vdronoff@gmail.com> + */ + +#include <asm/elf.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <crypto/skcipher.h> +#include <crypto/akcipher.h> +#include <crypto/acompress.h> +#include <crypto/rng.h> +#include <crypto/drbg.h> +#include <crypto/kpp.h> +#include <crypto/internal/simd.h> +#include <crypto/chacha.h> +#include <crypto/aead.h> +#include <crypto/hash.h> +#include <linux/crypto.h> +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/fips.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/scatterlist.h> +#include <linux/time.h> +#include <linux/vmalloc.h> +#include <linux/zlib.h> +#include <linux/once.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/string.h> + +static unsigned int data_size __read_mostly = 256; +static unsigned int debug __read_mostly = 0; + +/* tie all skcipher structures together */ +struct skcipher_def { + struct scatterlist sginp, sgout; + struct crypto_skcipher *tfm; + struct skcipher_request *req; + struct crypto_wait wait; +}; + +/* Perform cipher operations with the chacha lib */ +static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) +{ + u32 chacha_state[CHACHA_STATE_WORDS]; + u8 iv[16], key[32]; + u64 start, end; + + memset(key, 'X', sizeof(key)); + memset(iv, 'I', sizeof(iv)); + + if (debug) { + print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET, + 16, 1, key, 32, 1); + + print_hex_dump(KERN_INFO, "iv: ", DUMP_PREFIX_OFFSET, + 16, 1, iv, 16, 1); + } + + /* Encrypt */ + chacha_init_arch(chacha_state, (u32*)key, iv); + + start = ktime_get_ns(); + chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20); + end = ktime_get_ns(); + + + if (debug) + print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET, + 16, 1, cipher, + (data_size > 64 ? 64 : data_size), 1); + + pr_info("lib encryption took: %lld nsec", end - start); + + /* Decrypt */ + chacha_init_arch(chacha_state, (u32 *)key, iv); + + start = ktime_get_ns(); + chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20); + end = ktime_get_ns(); + + if (debug) + print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET, + 16, 1, revert, + (data_size > 64 ? 64 : data_size), 1); + + pr_info("lib decryption took: %lld nsec", end - start); + + return 0; +} + +/* Perform cipher operations with skcipher */ +static unsigned int test_skcipher_encdec(struct skcipher_def *sk, + int enc) +{ + int rc; + + if (enc) { + rc = crypto_wait_req(crypto_skcipher_encrypt(sk->req), + &sk->wait); + if (rc) + pr_info("skcipher encrypt returned with result" + "%d\n", rc); + } + else + { + rc = crypto_wait_req(crypto_skcipher_decrypt(sk->req), + &sk->wait); + if (rc) + pr_info("skcipher decrypt returned with result" + "%d\n", rc); + } + + return rc; +} + +/* Initialize and trigger cipher operations */ +static int test_skcipher(char *name, u8 *revert, u8 *cipher, u8 *plain) +{ + struct skcipher_def sk; + struct crypto_skcipher *skcipher = NULL; + struct skcipher_request *req = NULL; + u8 iv[16], key[32]; + u64 start, end; + int ret = -EFAULT; + + skcipher = crypto_alloc_skcipher(name, 0, 0); + if (IS_ERR(skcipher)) { + pr_info("could not allocate skcipher %s handle\n", name); + return PTR_ERR(skcipher); + } + + req = skcipher_request_alloc(skcipher, GFP_KERNEL); + if (!req) { + pr_info("could not allocate skcipher request\n"); + ret = -ENOMEM; + goto out; + } + + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, + &sk.wait); + + memset(key, 'X', sizeof(key)); + memset(iv, 'I', sizeof(iv)); + + if (crypto_skcipher_setkey(skcipher, key, 32)) { + pr_info("key could not be set\n"); + ret = -EAGAIN; + goto out; + } + + if (debug) { + print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET, + 16, 1, key, 32, 1); + + print_hex_dump(KERN_INFO, "iv: ", DUMP_PREFIX_OFFSET, + 16, 1, iv, 16, 1); + } + + sk.tfm = skcipher; + sk.req = req; + + /* Encrypt in one pass */ + sg_init_one(&sk.sginp, plain, data_size); + sg_init_one(&sk.sgout, cipher, data_size); + skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout, + data_size, iv); + crypto_init_wait(&sk.wait); + + /* Encrypt data */ + start = ktime_get_ns(); + ret = test_skcipher_encdec(&sk, 1); + end = ktime_get_ns(); + + if (ret) + goto out; + + pr_info("%s tfm encryption successful, took %lld nsec\n", name, end - start); + + if (debug) + print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET, + 16, 1, cipher, + (data_size > 64 ? 64 : data_size), 1); + + /* Prepare for decryption */ + memset(iv, 'I', sizeof(iv)); + + sg_init_one(&sk.sginp, cipher, data_size); + sg_init_one(&sk.sgout, revert, data_size); + skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout, + data_size, iv); + crypto_init_wait(&sk.wait); + + /* Decrypt data */ + start = ktime_get_ns(); + ret = test_skcipher_encdec(&sk, 0); + end = ktime_get_ns(); + + if (ret) + goto out; + + pr_info("%s tfm decryption successful, took %lld nsec\n", name, end - start); + + if (debug) + print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET, + 16, 1, revert, + (data_size > 64 ? 64 : data_size), 1); + + /* Dump some internal skcipher data */ + if (debug) + pr_info("skcipher %s: cryptlen %d blksize %d stride %d " + "ivsize %d alignmask 0x%x\n", + name, sk.req->cryptlen, + crypto_skcipher_blocksize(sk.tfm), + crypto_skcipher_alg(sk.tfm)->walksize, + crypto_skcipher_ivsize(sk.tfm), + crypto_skcipher_alignmask(sk.tfm)); + +out: + if (skcipher) + crypto_free_skcipher(skcipher); + if (req) + skcipher_request_free(req); + return ret; +} + +static int __init chacha_s390_test_init(void) +{ + u8 *plain = NULL, *revert = NULL; + u8 *cipher_generic = NULL, *cipher_s390 = NULL; + int ret = -1; + + pr_info("s390 ChaCha20 test module: size=%d debug=%d\n", + data_size, debug); + + /* Allocate and fill buffers */ + plain = vmalloc(data_size); + if (!plain) { + pr_info("could not allocate plain buffer\n"); + ret = -2; + goto out; + } + memset(plain, 'a', data_size); + get_random_bytes(plain, (data_size > 256 ? 256 : data_size)); + + cipher_generic = vmalloc(data_size); + if (!cipher_generic) { + pr_info("could not allocate cipher_generic buffer\n"); + ret = -2; + goto out; + } + memset(cipher_generic, 0, data_size); + + cipher_s390 = vmalloc(data_size); + if (!cipher_s390) { + pr_info("could not allocate cipher_s390 buffer\n"); + ret = -2; + goto out; + } + memset(cipher_s390, 0, data_size); + + revert = vmalloc(data_size); + if (!revert) { + pr_info("could not allocate revert buffer\n"); + ret = -2; + goto out; + } + memset(revert, 0, data_size); + + if (debug) + print_hex_dump(KERN_INFO, "src: ", DUMP_PREFIX_OFFSET, + 16, 1, plain, + (data_size > 64 ? 64 : data_size), 1); + + /* Use chacha20 generic */ + ret = test_skcipher("chacha20-generic", revert, cipher_generic, plain); + if (ret) + goto out; + + if (memcmp(plain, revert, data_size)) { + pr_info("generic en/decryption check FAILED\n"); + ret = -2; + goto out; + } + else + pr_info("generic en/decryption check OK\n"); + + memset(revert, 0, data_size); + + /* Use chacha20 s390 */ + ret = test_skcipher("chacha20-s390", revert, cipher_s390, plain); + if (ret) + goto out; + + if (memcmp(plain, revert, data_size)) { + pr_info("s390 en/decryption check FAILED\n"); + ret = -2; + goto out; + } + else + pr_info("s390 en/decryption check OK\n"); + + if (memcmp(cipher_generic, cipher_s390, data_size)) { + pr_info("s390 vs generic check FAILED\n"); + ret = -2; + goto out; + } + else + pr_info("s390 vs generic check OK\n"); + + memset(cipher_s390, 0, data_size); + memset(revert, 0, data_size); + + /* Use chacha20 lib */ + test_lib_chacha(revert, cipher_s390, plain); + + if (memcmp(plain, revert, data_size)) { + pr_info("lib en/decryption check FAILED\n"); + ret = -2; + goto out; + } + else + pr_info("lib en/decryption check OK\n"); + + if (memcmp(cipher_generic, cipher_s390, data_size)) { + pr_info("lib vs generic check FAILED\n"); + ret = -2; + goto out; + } + else + pr_info("lib vs generic check OK\n"); + + pr_info("--- chacha20 s390 test end ---\n"); + +out: + if (plain) + vfree(plain); + if (cipher_generic) + vfree(cipher_generic); + if (cipher_s390) + vfree(cipher_s390); + if (revert) + vfree(revert); + + return -1; +} + +static void __exit chacha_s390_test_exit(void) +{ + pr_info("s390 ChaCha20 test module exit\n"); +} + +module_param_named(size, data_size, uint, 0660); +module_param(debug, int, 0660); +MODULE_PARM_DESC(size, "Size of a plaintext"); +MODULE_PARM_DESC(debug, "Debug level (0=off,1=on)"); + +module_init(chacha_s390_test_init); +module_exit(chacha_s390_test_exit); + +MODULE_DESCRIPTION("s390 ChaCha20 self-test"); +MODULE_AUTHOR("Vladis Dronov <vdronoff@gmail.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 82e49ab0937d..33543231d453 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -8,6 +8,8 @@ ldflags-y += --wrap=devm_cxl_port_enumerate_dports ldflags-y += --wrap=devm_cxl_setup_hdm ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders +ldflags-y += --wrap=cxl_await_media_ready +ldflags-y += --wrap=cxl_hdm_decode_init DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -34,7 +36,6 @@ cxl_port-y += config_check.o obj-m += cxl_mem.o cxl_mem-y := $(CXL_SRC)/mem.o -cxl_mem-y += mock_mem.o cxl_mem-y += config_check.o obj-m += cxl_core.o diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c deleted file mode 100644 index d1dec5845139..000000000000 --- a/tools/testing/cxl/mock_mem.c +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ - -#include <linux/types.h> - -struct cxl_dev_state; -bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) -{ - return true; -} diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index b6b726eff3e2..6b9239b2afd4 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -237,25 +237,11 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * return rc; } -static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds) -{ - msleep(100); - return 0; -} - static void label_area_release(void *lsa) { vfree(lsa); } -static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds) -{ - struct cxl_endpoint_dvsec_info *info; - - info = &cxlds->info; - info->mem_enabled = true; -} - static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -278,7 +264,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) cxlds->serial = pdev->id; cxlds->mbox_send = cxl_mock_mbox_send; - cxlds->wait_media_ready = cxl_mock_wait_media_ready; cxlds->payload_size = SZ_4K; rc = cxl_enumerate_cmds(cxlds); @@ -293,8 +278,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - mock_validate_dvsec_ranges(cxlds); - cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6e8c9d63c92d..f1f8c40948c5 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -193,6 +193,35 @@ int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL); +int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) +{ + int rc, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_dev(cxlds->dev)) + rc = 0; + else + rc = cxl_await_media_ready(cxlds); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); + +bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_hdm *cxlhdm) +{ + int rc = 0, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (!ops || !ops->is_mock_dev(cxlds->dev)) + rc = cxl_hdm_decode_init(cxlds, cxlhdm); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO index c25b2fdec45e..cd1a30d5acc9 100644 --- a/tools/testing/memblock/TODO +++ b/tools/testing/memblock/TODO @@ -23,6 +23,3 @@ TODO 5. Add tests for memblock_alloc_node() to check if the correct NUMA node is set for the new region - -6. Update comments in tests/basic_api.c to match the style used in - tests/alloc_*.c diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index fbc1ce160303..a7bc180316d6 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -26,8 +26,8 @@ static int memblock_initialization_check(void) /* * A simple test that adds a memory block of a specified base address * and size to the collection of available memory regions (memblock.memory). - * It checks if a new entry was created and if region counter and total memory - * were correctly updated. + * Expect to create a new entry. The region counter and total memory get + * updated. */ static int memblock_add_simple_check(void) { @@ -53,10 +53,10 @@ static int memblock_add_simple_check(void) } /* - * A simple test that adds a memory block of a specified base address, size + * A simple test that adds a memory block of a specified base address, size, * NUMA node and memory flags to the collection of available memory regions. - * It checks if the new entry, region counter and total memory size have - * expected values. + * Expect to create a new entry. The region counter and total memory get + * updated. */ static int memblock_add_node_simple_check(void) { @@ -87,9 +87,15 @@ static int memblock_add_node_simple_check(void) /* * A test that tries to add two memory blocks that don't overlap with one - * another. It checks if two correctly initialized entries were added to the - * collection of available memory regions (memblock.memory) and if this - * change was reflected in memblock.memory's total size and region counter. + * another: + * + * | +--------+ +--------+ | + * | | r1 | | r2 | | + * +--------+--------+--------+--------+--+ + * + * Expect to add two correctly initialized entries to the collection of + * available memory regions (memblock.memory). The total size and + * region counter fields get updated. */ static int memblock_add_disjoint_check(void) { @@ -124,11 +130,21 @@ static int memblock_add_disjoint_check(void) } /* - * A test that tries to add two memory blocks, where the second one overlaps - * with the beginning of the first entry (that is r1.base < r2.base + r2.size). - * After this, it checks if two entries are merged into one region that starts - * at r2.base and has size of two regions minus their intersection. It also - * verifies the reported total size of the available memory and region counter. + * A test that tries to add two memory blocks r1 and r2, where r2 overlaps + * with the beginning of r1 (that is r1.base < r2.base + r2.size): + * + * | +----+----+------------+ | + * | | |r2 | r1 | | + * +----+----+----+------------+----------+ + * ^ ^ + * | | + * | r1.base + * | + * r2.base + * + * Expect to merge the two entries into one region that starts at r2.base + * and has size of two regions minus their intersection. The total size of + * the available memory is updated, and the region counter stays the same. */ static int memblock_add_overlap_top_check(void) { @@ -162,12 +178,21 @@ static int memblock_add_overlap_top_check(void) } /* - * A test that tries to add two memory blocks, where the second one overlaps - * with the end of the first entry (that is r2.base < r1.base + r1.size). - * After this, it checks if two entries are merged into one region that starts - * at r1.base and has size of two regions minus their intersection. It verifies - * that memblock can still see only one entry and has a correct total size of - * the available memory. + * A test that tries to add two memory blocks r1 and r2, where r2 overlaps + * with the end of r1 (that is r2.base < r1.base + r1.size): + * + * | +--+------+----------+ | + * | | | r1 | r2 | | + * +--+--+------+----------+--------------+ + * ^ ^ + * | | + * | r2.base + * | + * r1.base + * + * Expect to merge the two entries into one region that starts at r1.base + * and has size of two regions minus their intersection. The total size of + * the available memory is updated, and the region counter stays the same. */ static int memblock_add_overlap_bottom_check(void) { @@ -201,11 +226,19 @@ static int memblock_add_overlap_bottom_check(void) } /* - * A test that tries to add two memory blocks, where the second one is - * within the range of the first entry (that is r1.base < r2.base && - * r2.base + r2.size < r1.base + r1.size). It checks if two entries are merged - * into one region that stays the same. The counter and total size of available - * memory are expected to not be updated. + * A test that tries to add two memory blocks r1 and r2, where r2 is + * within the range of r1 (that is r1.base < r2.base && + * r2.base + r2.size < r1.base + r1.size): + * + * | +-------+--+-----------------------+ + * | | |r2| r1 | + * +---+-------+--+-----------------------+ + * ^ + * | + * r1.base + * + * Expect to merge two entries into one region that stays the same. + * The counter and total size of available memory are not updated. */ static int memblock_add_within_check(void) { @@ -236,8 +269,8 @@ static int memblock_add_within_check(void) } /* - * A simple test that tries to add the same memory block twice. The counter - * and total size of available memory are expected to not be updated. + * A simple test that tries to add the same memory block twice. Expect + * the counter and total size of available memory to not be updated. */ static int memblock_add_twice_check(void) { @@ -270,12 +303,12 @@ static int memblock_add_checks(void) return 0; } - /* - * A simple test that marks a memory block of a specified base address - * and size as reserved and to the collection of reserved memory regions - * (memblock.reserved). It checks if a new entry was created and if region - * counter and total memory size were correctly updated. - */ +/* + * A simple test that marks a memory block of a specified base address + * and size as reserved and to the collection of reserved memory regions + * (memblock.reserved). Expect to create a new entry. The region counter + * and total memory size are updated. + */ static int memblock_reserve_simple_check(void) { struct memblock_region *rgn; @@ -297,10 +330,15 @@ static int memblock_reserve_simple_check(void) } /* - * A test that tries to mark two memory blocks that don't overlap as reserved - * and checks if two entries were correctly added to the collection of reserved - * memory regions (memblock.reserved) and if this change was reflected in - * memblock.reserved's total size and region counter. + * A test that tries to mark two memory blocks that don't overlap as reserved: + * + * | +--+ +----------------+ | + * | |r1| | r2 | | + * +--------+--+------+----------------+--+ + * + * Expect to add two entries to the collection of reserved memory regions + * (memblock.reserved). The total size and region counter for + * memblock.reserved are updated. */ static int memblock_reserve_disjoint_check(void) { @@ -335,13 +373,22 @@ static int memblock_reserve_disjoint_check(void) } /* - * A test that tries to mark two memory blocks as reserved, where the - * second one overlaps with the beginning of the first (that is - * r1.base < r2.base + r2.size). - * It checks if two entries are merged into one region that starts at r2.base - * and has size of two regions minus their intersection. The test also verifies - * that memblock can still see only one entry and has a correct total size of - * the reserved memory. + * A test that tries to mark two memory blocks r1 and r2 as reserved, + * where r2 overlaps with the beginning of r1 (that is + * r1.base < r2.base + r2.size): + * + * | +--------------+--+--------------+ | + * | | r2 | | r1 | | + * +--+--------------+--+--------------+--+ + * ^ ^ + * | | + * | r1.base + * | + * r2.base + * + * Expect to merge two entries into one region that starts at r2.base and + * has size of two regions minus their intersection. The total size of the + * reserved memory is updated, and the region counter is not updated. */ static int memblock_reserve_overlap_top_check(void) { @@ -375,13 +422,22 @@ static int memblock_reserve_overlap_top_check(void) } /* - * A test that tries to mark two memory blocks as reserved, where the - * second one overlaps with the end of the first entry (that is - * r2.base < r1.base + r1.size). - * It checks if two entries are merged into one region that starts at r1.base - * and has size of two regions minus their intersection. It verifies that - * memblock can still see only one entry and has a correct total size of the - * reserved memory. + * A test that tries to mark two memory blocks r1 and r2 as reserved, + * where r2 overlaps with the end of r1 (that is + * r2.base < r1.base + r1.size): + * + * | +--------------+--+--------------+ | + * | | r1 | | r2 | | + * +--+--------------+--+--------------+--+ + * ^ ^ + * | | + * | r2.base + * | + * r1.base + * + * Expect to merge two entries into one region that starts at r1.base and + * has size of two regions minus their intersection. The total size of the + * reserved memory is updated, and the region counter is not updated. */ static int memblock_reserve_overlap_bottom_check(void) { @@ -415,12 +471,21 @@ static int memblock_reserve_overlap_bottom_check(void) } /* - * A test that tries to mark two memory blocks as reserved, where the second - * one is within the range of the first entry (that is - * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)). - * It checks if two entries are merged into one region that stays the - * same. The counter and total size of available memory are expected to not be - * updated. + * A test that tries to mark two memory blocks r1 and r2 as reserved, + * where r2 is within the range of r1 (that is + * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)): + * + * | +-----+--+---------------------------| + * | | |r2| r1 | + * +-+-----+--+---------------------------+ + * ^ ^ + * | | + * | r2.base + * | + * r1.base + * + * Expect to merge two entries into one region that stays the same. The + * counter and total size of available memory are not updated. */ static int memblock_reserve_within_check(void) { @@ -452,7 +517,7 @@ static int memblock_reserve_within_check(void) /* * A simple test that tries to reserve the same memory block twice. - * The region counter and total size of reserved memory are expected to not + * Expect the region counter and total size of reserved memory to not * be updated. */ static int memblock_reserve_twice_check(void) @@ -485,14 +550,22 @@ static int memblock_reserve_checks(void) return 0; } - /* - * A simple test that tries to remove the first entry of the array of - * available memory regions. By "removing" a region we mean overwriting it - * with the next region in memblock.memory. To check this is the case, the - * test adds two memory blocks and verifies that the value of the latter - * was used to erase r1 region. It also checks if the region counter and - * total size were updated to expected values. - */ +/* + * A simple test that tries to remove a region r1 from the array of + * available memory regions. By "removing" a region we mean overwriting it + * with the next region r2 in memblock.memory: + * + * | ...... +----------------+ | + * | : r1 : | r2 | | + * +--+----+----------+----------------+--+ + * ^ + * | + * rgn.base + * + * Expect to add two memory blocks r1 and r2 and then remove r1 so that + * r2 is the first available region. The region counter and total size + * are updated. + */ static int memblock_remove_simple_check(void) { struct memblock_region *rgn; @@ -522,11 +595,22 @@ static int memblock_remove_simple_check(void) return 0; } - /* - * A test that tries to remove a region that was not registered as available - * memory (i.e. has no corresponding entry in memblock.memory). It verifies - * that array, regions counter and total size were not modified. - */ +/* + * A test that tries to remove a region r2 that was not registered as + * available memory (i.e. has no corresponding entry in memblock.memory): + * + * +----------------+ + * | r2 | + * +----------------+ + * | +----+ | + * | | r1 | | + * +--+----+------------------------------+ + * ^ + * | + * rgn.base + * + * Expect the array, regions counter and total size to not be modified. + */ static int memblock_remove_absent_check(void) { struct memblock_region *rgn; @@ -556,11 +640,23 @@ static int memblock_remove_absent_check(void) } /* - * A test that tries to remove a region which overlaps with the beginning of - * the already existing entry r1 (that is r1.base < r2.base + r2.size). It - * checks if only the intersection of both regions is removed from the available - * memory pool. The test also checks if the regions counter and total size are - * updated to expected values. + * A test that tries to remove a region r2 that overlaps with the + * beginning of the already existing entry r1 + * (that is r1.base < r2.base + r2.size): + * + * +-----------------+ + * | r2 | + * +-----------------+ + * | .........+--------+ | + * | : r1 | rgn | | + * +-----------------+--------+--------+--+ + * ^ ^ + * | | + * | rgn.base + * r1.base + * + * Expect that only the intersection of both regions is removed from the + * available memory pool. The regions counter and total size are updated. */ static int memblock_remove_overlap_top_check(void) { @@ -596,11 +692,21 @@ static int memblock_remove_overlap_top_check(void) } /* - * A test that tries to remove a region which overlaps with the end of the - * first entry (that is r2.base < r1.base + r1.size). It checks if only the - * intersection of both regions is removed from the available memory pool. - * The test also checks if the regions counter and total size are updated to - * expected values. + * A test that tries to remove a region r2 that overlaps with the end of + * the already existing region r1 (that is r2.base < r1.base + r1.size): + * + * +--------------------------------+ + * | r2 | + * +--------------------------------+ + * | +---+..... | + * | |rgn| r1 : | + * +-+---+----+---------------------------+ + * ^ + * | + * r1.base + * + * Expect that only the intersection of both regions is removed from the + * available memory pool. The regions counter and total size are updated. */ static int memblock_remove_overlap_bottom_check(void) { @@ -633,13 +739,23 @@ static int memblock_remove_overlap_bottom_check(void) } /* - * A test that tries to remove a region which is within the range of the - * already existing entry (that is - * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)). - * It checks if the region is split into two - one that ends at r2.base and - * second that starts at r2.base + size, with appropriate sizes. The test - * also checks if the region counter and total size were updated to - * expected values. + * A test that tries to remove a region r2 that is within the range of + * the already existing entry r1 (that is + * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)): + * + * +----+ + * | r2 | + * +----+ + * | +-------------+....+---------------+ | + * | | rgn1 | r1 | rgn2 | | + * +-+-------------+----+---------------+-+ + * ^ + * | + * r1.base + * + * Expect that the region is split into two - one that ends at r2.base and + * another that starts at r2.base + r2.size, with appropriate sizes. The + * region counter and total size are updated. */ static int memblock_remove_within_check(void) { @@ -690,12 +806,19 @@ static int memblock_remove_checks(void) } /* - * A simple test that tries to free a memory block that was marked earlier - * as reserved. By "freeing" a region we mean overwriting it with the next - * entry in memblock.reserved. To check this is the case, the test reserves - * two memory regions and verifies that the value of the latter was used to - * erase r1 region. - * The test also checks if the region counter and total size were updated. + * A simple test that tries to free a memory block r1 that was marked + * earlier as reserved. By "freeing" a region we mean overwriting it with + * the next entry r2 in memblock.reserved: + * + * | ...... +----+ | + * | : r1 : | r2 | | + * +--------------+----+-----------+----+-+ + * ^ + * | + * rgn.base + * + * Expect to reserve two memory regions and then erase r1 region with the + * value of r2. The region counter and total size are updated. */ static int memblock_free_simple_check(void) { @@ -726,11 +849,22 @@ static int memblock_free_simple_check(void) return 0; } - /* - * A test that tries to free a region that was not marked as reserved - * (i.e. has no corresponding entry in memblock.reserved). It verifies - * that array, regions counter and total size were not modified. - */ +/* + * A test that tries to free a region r2 that was not marked as reserved + * (i.e. has no corresponding entry in memblock.reserved): + * + * +----------------+ + * | r2 | + * +----------------+ + * | +----+ | + * | | r1 | | + * +--+----+------------------------------+ + * ^ + * | + * rgn.base + * + * The array, regions counter and total size are not modified. + */ static int memblock_free_absent_check(void) { struct memblock_region *rgn; @@ -760,11 +894,23 @@ static int memblock_free_absent_check(void) } /* - * A test that tries to free a region which overlaps with the beginning of - * the already existing entry r1 (that is r1.base < r2.base + r2.size). It - * checks if only the intersection of both regions is freed. The test also - * checks if the regions counter and total size are updated to expected - * values. + * A test that tries to free a region r2 that overlaps with the beginning + * of the already existing entry r1 (that is r1.base < r2.base + r2.size): + * + * +----+ + * | r2 | + * +----+ + * | ...+--------------+ | + * | : | r1 | | + * +----+--+--------------+---------------+ + * ^ ^ + * | | + * | rgn.base + * | + * r1.base + * + * Expect that only the intersection of both regions is freed. The + * regions counter and total size are updated. */ static int memblock_free_overlap_top_check(void) { @@ -798,10 +944,18 @@ static int memblock_free_overlap_top_check(void) } /* - * A test that tries to free a region which overlaps with the end of the - * first entry (that is r2.base < r1.base + r1.size). It checks if only the - * intersection of both regions is freed. The test also checks if the - * regions counter and total size are updated to expected values. + * A test that tries to free a region r2 that overlaps with the end of + * the already existing entry r1 (that is r2.base < r1.base + r1.size): + * + * +----------------+ + * | r2 | + * +----------------+ + * | +-----------+..... | + * | | r1 | : | + * +----+-----------+----+----------------+ + * + * Expect that only the intersection of both regions is freed. The + * regions counter and total size are updated. */ static int memblock_free_overlap_bottom_check(void) { @@ -835,13 +989,23 @@ static int memblock_free_overlap_bottom_check(void) } /* - * A test that tries to free a region which is within the range of the - * already existing entry (that is - * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)). - * It checks if the region is split into two - one that ends at r2.base and - * second that starts at r2.base + size, with appropriate sizes. It is - * expected that the region counter and total size fields were updated t - * reflect that change. + * A test that tries to free a region r2 that is within the range of the + * already existing entry r1 (that is + * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)): + * + * +----+ + * | r2 | + * +----+ + * | +------------+....+---------------+ + * | | rgn1 | r1 | rgn2 | + * +----+------------+----+---------------+ + * ^ + * | + * r1.base + * + * Expect that the region is split into two - one that ends at r2.base and + * another that starts at r2.base + r2.size, with appropriate sizes. The + * region counter and total size fields are updated. */ static int memblock_free_within_check(void) { diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c index af19c85558e7..c1ec099a3b1d 100644 --- a/tools/testing/nvdimm/pmem-dax.c +++ b/tools/testing/nvdimm/pmem-dax.c @@ -4,11 +4,13 @@ */ #include "test/nfit_test.h" #include <linux/blkdev.h> +#include <linux/dax.h> #include <pmem.h> #include <nd.h> long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, enum dax_access_mode mode, void **kaddr, + pfn_t *pfn) { resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index b752ce47ead3..ea956082e6a4 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -62,16 +62,14 @@ struct nfit_test_resource *get_nfit_res(resource_size_t resource) } EXPORT_SYMBOL(get_nfit_res); -static void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, - void __iomem *(*fallback_fn)(resource_size_t, unsigned long)) -{ - struct nfit_test_resource *nfit_res = get_nfit_res(offset); - - if (nfit_res) - return (void __iomem *) nfit_res->buf + offset - - nfit_res->res.start; - return fallback_fn(offset, size); -} +#define __nfit_test_ioremap(offset, size, fallback_fn) ({ \ + struct nfit_test_resource *nfit_res = get_nfit_res(offset); \ + nfit_res ? \ + (void __iomem *) nfit_res->buf + (offset) \ + - nfit_res->res.start \ + : \ + fallback_fn((offset), (size)) ; \ +}) void __iomem *__wrap_devm_ioremap(struct device *dev, resource_size_t offset, unsigned long size) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 1da76ccde448..c75abb497a1a 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -23,8 +23,6 @@ #include "nfit_test.h" #include "../watermark.h" -#include <asm/mce.h> - /* * Generate an NFIT table to describe the following topology: * @@ -3375,7 +3373,6 @@ static __exit void nfit_test_exit(void) { int i; - flush_workqueue(nfit_wq); destroy_workqueue(nfit_wq); for (i = 0; i < NUM_NFITS; i++) platform_device_unregister(&instances[i]->pdev); diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc index 312d23780096..be754f5bcf79 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -25,6 +25,8 @@ if [ $L -ne 256 ]; then exit_fail fi +cat kprobe_events >> $testlog + echo 1 > events/kprobes/enable echo 0 > events/kprobes/enable echo > kprobe_events diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index b7d188fc87c7..b9fa9cd709df 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -135,6 +135,11 @@ do { \ #define PPC_FEATURE2_ARCH_3_1 0x00040000 #endif +/* POWER10 features */ +#ifndef PPC_FEATURE2_MMA +#define PPC_FEATURE2_MMA 0x00020000 +#endif + #if defined(__powerpc64__) #define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP] #define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR] diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile index fcc91c205984..3948f7c510aa 100644 --- a/tools/testing/selftests/powerpc/math/Makefile +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt +TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt mma top_srcdir = ../../../../.. include ../../lib.mk @@ -17,3 +17,5 @@ $(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c $(OUTPUT)/vsx_preempt: CFLAGS += -mvsx $(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c + +$(OUTPUT)/mma: mma.c mma.S ../utils.c diff --git a/tools/testing/selftests/powerpc/math/mma.S b/tools/testing/selftests/powerpc/math/mma.S new file mode 100644 index 000000000000..8528c9849565 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/mma.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * + * Test basic matrix multiply assist (MMA) functionality if available. + * + * Copyright 2020, Alistair Popple, IBM Corp. + */ + .global test_mma +test_mma: + /* Load accumulator via VSX registers from image passed in r3 */ + lxvh8x 4,0,3 + lxvh8x 5,0,4 + + /* Clear and prime the accumulator (xxsetaccz) */ + .long 0x7c030162 + + /* Prime the accumulator with MMA VSX move to accumulator + * X-form (xxmtacc) (not needed due to above zeroing) */ + //.long 0x7c010162 + + /* xvi16ger2s */ + .long 0xec042958 + + /* Store result in image passed in r5 */ + stxvw4x 0,0,5 + addi 5,5,16 + stxvw4x 1,0,5 + addi 5,5,16 + stxvw4x 2,0,5 + addi 5,5,16 + stxvw4x 3,0,5 + addi 5,5,16 + + blr diff --git a/tools/testing/selftests/powerpc/math/mma.c b/tools/testing/selftests/powerpc/math/mma.c new file mode 100644 index 000000000000..3a71808c993f --- /dev/null +++ b/tools/testing/selftests/powerpc/math/mma.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test basic matrix multiply assist (MMA) functionality if available. + * + * Copyright 2020, Alistair Popple, IBM Corp. + */ +#include <stdio.h> +#include <stdint.h> + +#include "utils.h" + +extern void test_mma(uint16_t (*)[8], uint16_t (*)[8], uint32_t (*)[4*4]); + +static int mma(void) +{ + int i; + int rc = 0; + uint16_t x[] = {1, 0, 2, 0, 3, 0, 4, 0}; + uint16_t y[] = {1, 0, 2, 0, 3, 0, 4, 0}; + uint32_t z[4*4]; + uint32_t exp[4*4] = {1, 2, 3, 4, + 2, 4, 6, 8, + 3, 6, 9, 12, + 4, 8, 12, 16}; + + SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_3_1), "Need ISAv3.1"); + SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_MMA), "Need MMA"); + + test_mma(&x, &y, &z); + + for (i = 0; i < 16; i++) { + printf("MMA[%d] = %d ", i, z[i]); + + if (z[i] == exp[i]) { + printf(" (Correct)\n"); + } else { + printf(" (Incorrect)\n"); + rc = 1; + } + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(mma, "mma"); +} diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index aac4a59f9e28..4e1a294eec35 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -12,3 +12,4 @@ pkey_exec_prot pkey_siginfo stack_expansion_ldst stack_expansion_signal +large_vm_gpr_corruption diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 40253abc6208..27dc09d0bfee 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -4,7 +4,8 @@ noarg: TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ large_vm_fork_separation bad_accesses pkey_exec_prot \ - pkey_siginfo stack_expansion_signal stack_expansion_ldst + pkey_siginfo stack_expansion_signal stack_expansion_ldst \ + large_vm_gpr_corruption TEST_PROGS := stress_code_patching.sh TEST_GEN_PROGS_EXTENDED := tlbie_test @@ -19,6 +20,7 @@ $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 +$(OUTPUT)/large_vm_gpr_corruption: CFLAGS += -m64 $(OUTPUT)/bad_accesses: CFLAGS += -m64 $(OUTPUT)/pkey_exec_prot: CFLAGS += -m64 $(OUTPUT)/pkey_siginfo: CFLAGS += -m64 diff --git a/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c b/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c new file mode 100644 index 000000000000..927bfae99ed9 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright 2022, Michael Ellerman, IBM Corp. +// +// Test that the 4PB address space SLB handling doesn't corrupt userspace registers +// (r9-r13) due to a SLB fault while saving the PPR. +// +// The bug was introduced in f384796c4 ("powerpc/mm: Add support for handling > 512TB +// address in SLB miss") and fixed in 4c2de74cc869 ("powerpc/64: Interrupts save PPR on +// stack rather than thread_struct"). +// +// To hit the bug requires the task struct and kernel stack to be in different segments. +// Usually that requires more than 1TB of RAM, or if that's not practical, boot the kernel +// with "disable_1tb_segments". +// +// The test works by creating mappings above 512TB, to trigger the large address space +// support. It creates 64 mappings, double the size of the SLB, to cause SLB faults on +// each access (assuming naive replacement). It then loops over those mappings touching +// each, and checks that r9-r13 aren't corrupted. +// +// It then forks another child and tries again, because a new child process will get a new +// kernel stack and thread struct allocated, which may be more optimally placed to trigger +// the bug. It would probably be better to leave the previous child processes hanging +// around, so that kernel stack & thread struct allocations are not reused, but that would +// amount to a 30 second fork bomb. The current design reliably triggers the bug on +// unpatched kernels. + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils.h" + +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB +#endif + +#define BASE_ADDRESS (1ul << 50) // 1PB +#define STRIDE (2ul << 40) // 2TB +#define SLB_SIZE 32 +#define NR_MAPPINGS (SLB_SIZE * 2) + +static volatile sig_atomic_t signaled; + +static void signal_handler(int sig) +{ + signaled = 1; +} + +#define CHECK_REG(_reg) \ + if (_reg != _reg##_orig) { \ + printf(str(_reg) " corrupted! Expected 0x%lx != 0x%lx\n", _reg##_orig, \ + _reg); \ + _exit(1); \ + } + +static int touch_mappings(void) +{ + unsigned long r9_orig, r10_orig, r11_orig, r12_orig, r13_orig; + unsigned long r9, r10, r11, r12, r13; + unsigned long addr, *p; + int i; + + for (i = 0; i < NR_MAPPINGS; i++) { + addr = BASE_ADDRESS + (i * STRIDE); + p = (unsigned long *)addr; + + asm volatile("mr %0, %%r9 ;" // Read original GPR values + "mr %1, %%r10 ;" + "mr %2, %%r11 ;" + "mr %3, %%r12 ;" + "mr %4, %%r13 ;" + "std %10, 0(%11) ;" // Trigger SLB fault + "mr %5, %%r9 ;" // Save possibly corrupted values + "mr %6, %%r10 ;" + "mr %7, %%r11 ;" + "mr %8, %%r12 ;" + "mr %9, %%r13 ;" + "mr %%r9, %0 ;" // Restore original values + "mr %%r10, %1 ;" + "mr %%r11, %2 ;" + "mr %%r12, %3 ;" + "mr %%r13, %4 ;" + : "=&b"(r9_orig), "=&b"(r10_orig), "=&b"(r11_orig), + "=&b"(r12_orig), "=&b"(r13_orig), "=&b"(r9), "=&b"(r10), + "=&b"(r11), "=&b"(r12), "=&b"(r13) + : "b"(i), "b"(p) + : "r9", "r10", "r11", "r12", "r13"); + + CHECK_REG(r9); + CHECK_REG(r10); + CHECK_REG(r11); + CHECK_REG(r12); + CHECK_REG(r13); + } + + return 0; +} + +static int test(void) +{ + unsigned long page_size, addr, *p; + struct sigaction action; + bool hash_mmu; + int i, status; + pid_t pid; + + // This tests a hash MMU specific bug. + FAIL_IF(using_hash_mmu(&hash_mmu)); + SKIP_IF(!hash_mmu); + + page_size = sysconf(_SC_PAGESIZE); + + for (i = 0; i < NR_MAPPINGS; i++) { + addr = BASE_ADDRESS + (i * STRIDE); + + p = mmap((void *)addr, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0); + if (p == MAP_FAILED) { + perror("mmap"); + printf("Error: couldn't mmap(), confirm kernel has 4PB support?\n"); + return 1; + } + } + + action.sa_handler = signal_handler; + action.sa_flags = SA_RESTART; + FAIL_IF(sigaction(SIGALRM, &action, NULL) < 0); + + // Seen to always crash in under ~10s on affected kernels. + alarm(30); + + while (!signaled) { + // Fork new processes, to increase the chance that we hit the case where + // the kernel stack and task struct are in different segments. + pid = fork(); + if (pid == 0) + exit(touch_mappings()); + + FAIL_IF(waitpid(-1, &status, 0) == -1); + FAIL_IF(WIFSIGNALED(status)); + FAIL_IF(!WIFEXITED(status)); + FAIL_IF(WEXITSTATUS(status)); + } + + return 0; +} + +int main(void) +{ + return test_harness(test, "large_vm_gpr_corruption"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S deleted file mode 100644 index 08a7b5f133b9..000000000000 --- a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright 2014, Michael Ellerman, IBM Corp. - */ - -#include <ppc-asm.h> - - .text - -FUNC_START(thirty_two_instruction_loop) - cmpwi r3,0 - beqlr - addi r4,r3,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 - addi r4,r4,1 # 28 addi's - subi r3,r3,1 - b FUNC_NAME(thirty_two_instruction_loop) -FUNC_END(thirty_two_instruction_loop) diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index fca054bbc094..c01a31d5f4ee 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -274,7 +274,7 @@ u64 *get_intr_regs(struct event *event, void *sample_buff) return intr_regs; } -static const unsigned int __perf_reg_mask(const char *register_name) +static const int __perf_reg_mask(const char *register_name) { if (!strcmp(register_name, "R0")) return 0; diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index d42ca8c676c3..5b2abb719ef2 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -182,17 +182,23 @@ int spectre_v2_test(void) case COUNT_CACHE_FLUSH_HW: // These should all not affect userspace branch prediction if (miss_percent > 15) { - printf("Branch misses > 15%% unexpected in this configuration!\n"); - printf("Possible mis-match between reported & actual mitigation\n"); - /* - * Such a mismatch may be caused by a guest system - * reporting as vulnerable when the host is mitigated. - * Return skip code to avoid detecting this as an error. - * We are not vulnerable and reporting otherwise, so - * missing such a mismatch is safe. - */ - if (miss_percent > 95) + if (miss_percent > 95) { + /* + * Such a mismatch may be caused by a system being unaware + * the count cache is disabled. This may be to enable + * guest migration between hosts with different settings. + * Return skip code to avoid detecting this as an error. + * We are not vulnerable and reporting otherwise, so + * missing such a mismatch is safe. + */ + printf("Branch misses > 95%% unexpected in this configuration.\n"); + printf("Count cache likely disabled without Linux knowing.\n"); + if (state == COUNT_CACHE_FLUSH_SW) + printf("WARNING: Kernel performing unnecessary flushes.\n"); return 4; + } + printf("Branch misses > 15%% unexpected in this configuration!\n"); + printf("Possible mismatch between reported & actual mitigation\n"); return 1; } @@ -201,14 +207,14 @@ int spectre_v2_test(void) // This seems to affect userspace branch prediction a bit? if (miss_percent > 25) { printf("Branch misses > 25%% unexpected in this configuration!\n"); - printf("Possible mis-match between reported & actual mitigation\n"); + printf("Possible mismatch between reported & actual mitigation\n"); return 1; } break; case COUNT_CACHE_DISABLED: if (miss_percent < 95) { - printf("Branch misses < 20%% unexpected in this configuration!\n"); - printf("Possible mis-match between reported & actual mitigation\n"); + printf("Branch misses < 95%% unexpected in this configuration!\n"); + printf("Possible mismatch between reported & actual mitigation\n"); return 1; } break; diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 11fb417abb42..3822f4ea5f49 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -23,6 +23,7 @@ $(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/) $(call allow-override,LDCONFIG,ldconfig) INSTALL = install +MKDIR = mkdir FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ -fasynchronous-unwind-tables -fstack-clash-protection WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized @@ -31,7 +32,7 @@ TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) LDFLAGS := -ggdb -LIBS := $$($(PKG_CONFIG) --libs libtracefs) -lprocps +LIBS := $$($(PKG_CONFIG) --libs libtracefs) SRC := $(wildcard src/*.c) HDR := $(wildcard src/*.h) @@ -57,6 +58,41 @@ else DOCSRC = $(SRCTREE)/../../../Documentation/tools/rtla/ endif +LIBTRACEEVENT_MIN_VERSION = 1.5 +LIBTRACEFS_MIN_VERSION = 1.3 + +TEST_LIBTRACEEVENT = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEEVENT_MIN_VERSION) libtraceevent > /dev/null 2>&1 || echo n") +ifeq ("$(TEST_LIBTRACEEVENT)", "n") +.PHONY: warning_traceevent +warning_traceevent: + @echo "********************************************" + @echo "** NOTICE: libtraceevent version $(LIBTRACEEVENT_MIN_VERSION) or higher not found" + @echo "**" + @echo "** Consider installing the latest libtraceevent from your" + @echo "** distribution, e.g., 'dnf install libtraceevent' on Fedora," + @echo "** or from source:" + @echo "**" + @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ " + @echo "**" + @echo "********************************************" +endif + +TEST_LIBTRACEFS = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEFS_MIN_VERSION) libtracefs > /dev/null 2>&1 || echo n") +ifeq ("$(TEST_LIBTRACEFS)", "n") +.PHONY: warning_tracefs +warning_tracefs: + @echo "********************************************" + @echo "** NOTICE: libtracefs version $(LIBTRACEFS_MIN_VERSION) or higher not found" + @echo "**" + @echo "** Consider installing the latest libtracefs from your" + @echo "** distribution, e.g., 'dnf install libtracefs' on Fedora," + @echo "** or from source:" + @echo "**" + @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ " + @echo "**" + @echo "********************************************" +endif + .PHONY: all all: rtla @@ -68,7 +104,7 @@ static: $(OBJ) .PHONY: install install: doc_install - $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR) + $(MKDIR) -p $(DESTDIR)$(BINDIR) $(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR) $(STRIP) $(DESTDIR)$(BINDIR)/rtla @test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt index 6c88446f7e74..4af3fd40f171 100644 --- a/tools/tracing/rtla/README.txt +++ b/tools/tracing/rtla/README.txt @@ -1,19 +1,16 @@ RTLA: Real-Time Linux Analysis tools -The rtla is a meta-tool that includes a set of commands that -aims to analyze the real-time properties of Linux. But, instead of -testing Linux as a black box, rtla leverages kernel tracing -capabilities to provide precise information about the properties -and root causes of unexpected results. +The rtla meta-tool includes a set of commands that aims to analyze +the real-time properties of Linux. Instead of testing Linux as a black box, +rtla leverages kernel tracing capabilities to provide precise information +about the properties and root causes of unexpected results. Installing RTLA -RTLA depends on some libraries and tools. More precisely, it depends on the -following libraries: +RTLA depends on the following libraries and tools: - libtracefs - libtraceevent - - procps It also depends on python3-docutils to compile man pages. diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index b4380d45cacd..5d7ea479ac89 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -809,7 +809,7 @@ int osnoise_hist_main(int argc, char *argv[]) retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); if (retval) { err_msg("Failed to set sched parameters\n"); - goto out_hist; + goto out_free; } } @@ -819,7 +819,7 @@ int osnoise_hist_main(int argc, char *argv[]) record = osnoise_init_trace_tool("osnoise"); if (!record) { err_msg("Failed to enable the trace instance\n"); - goto out_hist; + goto out_free; } if (params->events) { @@ -869,6 +869,7 @@ int osnoise_hist_main(int argc, char *argv[]) out_hist: trace_events_destroy(&record->trace, params->events); params->events = NULL; +out_free: osnoise_free_histogram(tool->data); out_destroy: osnoise_destroy_tool(record); diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 72c2fd6ce005..76479bfb2922 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -572,7 +572,7 @@ int osnoise_top_main(int argc, char **argv) retval = osnoise_top_apply_config(tool, params); if (retval) { err_msg("Could not apply config\n"); - goto out_top; + goto out_free; } trace = &tool->trace; @@ -580,14 +580,14 @@ int osnoise_top_main(int argc, char **argv) retval = enable_osnoise(trace); if (retval) { err_msg("Failed to enable osnoise tracer\n"); - goto out_top; + goto out_free; } if (params->set_sched) { retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); if (retval) { err_msg("Failed to set sched parameters\n"); - goto out_top; + goto out_free; } } @@ -597,7 +597,7 @@ int osnoise_top_main(int argc, char **argv) record = osnoise_init_trace_tool("osnoise"); if (!record) { err_msg("Failed to enable the trace instance\n"); - goto out_top; + goto out_free; } if (params->events) { @@ -649,6 +649,7 @@ int osnoise_top_main(int argc, char **argv) out_top: trace_events_destroy(&record->trace, params->events); params->events = NULL; +out_free: osnoise_free_top(tool->data); osnoise_destroy_tool(record); osnoise_destroy_tool(tool); diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index dc908126c610..f3ec628f5e51 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -821,7 +821,7 @@ int timerlat_hist_main(int argc, char *argv[]) retval = timerlat_hist_apply_config(tool, params); if (retval) { err_msg("Could not apply config\n"); - goto out_hist; + goto out_free; } trace = &tool->trace; @@ -829,14 +829,14 @@ int timerlat_hist_main(int argc, char *argv[]) retval = enable_timerlat(trace); if (retval) { err_msg("Failed to enable timerlat tracer\n"); - goto out_hist; + goto out_free; } if (params->set_sched) { retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); if (retval) { err_msg("Failed to set sched parameters\n"); - goto out_hist; + goto out_free; } } @@ -844,7 +844,7 @@ int timerlat_hist_main(int argc, char *argv[]) dma_latency_fd = set_cpu_dma_latency(params->dma_latency); if (dma_latency_fd < 0) { err_msg("Could not set /dev/cpu_dma_latency.\n"); - goto out_hist; + goto out_free; } } @@ -854,7 +854,7 @@ int timerlat_hist_main(int argc, char *argv[]) record = osnoise_init_trace_tool("timerlat"); if (!record) { err_msg("Failed to enable the trace instance\n"); - goto out_hist; + goto out_free; } if (params->events) { @@ -904,6 +904,7 @@ out_hist: close(dma_latency_fd); trace_events_destroy(&record->trace, params->events); params->events = NULL; +out_free: timerlat_free_histogram(tool->data); osnoise_destroy_tool(record); osnoise_destroy_tool(tool); diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 1f754c3df53f..35452a1d45e9 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -612,7 +612,7 @@ int timerlat_top_main(int argc, char *argv[]) retval = timerlat_top_apply_config(top, params); if (retval) { err_msg("Could not apply config\n"); - goto out_top; + goto out_free; } trace = &top->trace; @@ -620,14 +620,14 @@ int timerlat_top_main(int argc, char *argv[]) retval = enable_timerlat(trace); if (retval) { err_msg("Failed to enable timerlat tracer\n"); - goto out_top; + goto out_free; } if (params->set_sched) { retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); if (retval) { err_msg("Failed to set sched parameters\n"); - goto out_top; + goto out_free; } } @@ -635,7 +635,7 @@ int timerlat_top_main(int argc, char *argv[]) dma_latency_fd = set_cpu_dma_latency(params->dma_latency); if (dma_latency_fd < 0) { err_msg("Could not set /dev/cpu_dma_latency.\n"); - goto out_top; + goto out_free; } } @@ -645,7 +645,7 @@ int timerlat_top_main(int argc, char *argv[]) record = osnoise_init_trace_tool("timerlat"); if (!record) { err_msg("Failed to enable the trace instance\n"); - goto out_top; + goto out_free; } if (params->events) { @@ -699,6 +699,7 @@ out_top: close(dma_latency_fd); trace_events_destroy(&record->trace, params->events); params->events = NULL; +out_free: timerlat_free_top(top->data); osnoise_destroy_tool(record); osnoise_destroy_tool(top); diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index da2b590edaed..5352167a1e75 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -3,7 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ -#include <proc/readproc.h> +#include <dirent.h> #include <stdarg.h> #include <stdlib.h> #include <string.h> @@ -255,50 +255,114 @@ int __set_sched_attr(int pid, struct sched_attr *attr) retval = sched_setattr(pid, attr, flags); if (retval < 0) { - err_msg("boost_with_deadline failed to boost pid %d: %s\n", + err_msg("Failed to set sched attributes to the pid %d: %s\n", pid, strerror(errno)); return 1; } return 0; } + +/* + * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm + * + * Check if the procfs entry is a directory of a process, and then check if the + * process has a comm with the prefix set in char *comm_prefix. As the + * current users of this function only check for kernel threads, there is no + * need to check for the threads for the process. + * + * Return: True if the proc_entry contains a comm file with comm_prefix*. + * Otherwise returns false. + */ +static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry) +{ + char buffer[MAX_PATH]; + int comm_fd, retval; + char *t_name; + + if (proc_entry->d_type != DT_DIR) + return 0; + + if (*proc_entry->d_name == '.') + return 0; + + /* check if the string is a pid */ + for (t_name = proc_entry->d_name; t_name; t_name++) { + if (!isdigit(*t_name)) + break; + } + + if (*t_name != '\0') + return 0; + + snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name); + comm_fd = open(buffer, O_RDONLY); + if (comm_fd < 0) + return 0; + + memset(buffer, 0, MAX_PATH); + retval = read(comm_fd, buffer, MAX_PATH); + + close(comm_fd); + + if (retval <= 0) + return 0; + + retval = strncmp(comm_prefix, buffer, strlen(comm_prefix)); + if (retval) + return 0; + + /* comm already have \n */ + debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer); + + return 1; +} + /* - * set_comm_sched_attr - set sched params to threads starting with char *comm + * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix * - * This function uses procps to list the currently running threads and then - * set the sched_attr *attr to the threads that start with char *comm. It is + * This function uses procfs to list the currently running threads and then set the + * sched_attr *attr to the threads that start with char *comm_prefix. It is * mainly used to set the priority to the kernel threads created by the * tracers. */ -int set_comm_sched_attr(const char *comm, struct sched_attr *attr) +int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr) { - int flags = PROC_FILLCOM | PROC_FILLSTAT; - PROCTAB *ptp; - proc_t task; + struct dirent *proc_entry; + DIR *procfs; int retval; - ptp = openproc(flags); - if (!ptp) { - err_msg("error openproc()\n"); - return -ENOENT; + if (strlen(comm_prefix) >= MAX_PATH) { + err_msg("Command prefix is too long: %d < strlen(%s)\n", + MAX_PATH, comm_prefix); + return 1; } - memset(&task, 0, sizeof(task)); + procfs = opendir("/proc"); + if (!procfs) { + err_msg("Could not open procfs\n"); + return 1; + } - while (readproc(ptp, &task)) { - retval = strncmp(comm, task.cmd, strlen(comm)); - if (retval) + while ((proc_entry = readdir(procfs))) { + + retval = procfs_is_workload_pid(comm_prefix, proc_entry); + if (!retval) continue; - retval = __set_sched_attr(task.tid, attr); - if (retval) + + /* procfs_is_workload_pid confirmed it is a pid */ + retval = __set_sched_attr(atoi(proc_entry->d_name), attr); + if (retval) { + err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name); goto out_err; - } + } - closeproc(ptp); + debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name); + } return 0; out_err: - closeproc(ptp); + closedir(procfs); return 1; } diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index fa08e374870a..5571afd3b549 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -6,6 +6,7 @@ * '18446744073709551615\0' */ #define BUFF_U64_STR_SIZE 24 +#define MAX_PATH 1024 #define container_of(ptr, type, member)({ \ const typeof(((type *)0)->member) *__mptr = (ptr); \ @@ -53,5 +54,5 @@ struct sched_attr { }; int parse_prio(char *arg, struct sched_attr *sched_param); -int set_comm_sched_attr(const char *comm, struct sched_attr *attr); +int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr); int set_cpu_dma_latency(int32_t latency); |