summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm64/include/uapi/asm/perf_regs.h7
-rw-r--r--tools/arch/x86/include/asm/msr-index.h19
-rw-r--r--tools/build/Makefile.feature4
-rw-r--r--tools/build/feature/Makefile20
-rw-r--r--tools/build/feature/test-libbpf-bpf_map_create.c8
-rw-r--r--tools/build/feature/test-libbpf-bpf_object__next_map.c8
-rw-r--r--tools/build/feature/test-libbpf-bpf_object__next_program.c8
-rw-r--r--tools/build/feature/test-libbpf-bpf_prog_load.c9
-rw-r--r--tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c5
-rw-r--r--tools/build/feature/test-libbpf-btf__raw_data.c8
-rw-r--r--tools/include/uapi/asm-generic/fcntl.h21
-rw-r--r--tools/include/uapi/asm-generic/unistd.h4
-rw-r--r--tools/lib/perf/evlist.c71
-rw-r--r--tools/lib/perf/include/internal/evsel.h11
-rw-r--r--tools/perf/Documentation/perf-record.txt10
-rw-r--r--tools/perf/Makefile.config25
-rw-r--r--tools/perf/Makefile.perf1
-rw-r--r--tools/perf/arch/arm64/util/perf_regs.c38
-rw-r--r--tools/perf/arch/arm64/util/unwind-libunwind.c73
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c31
-rw-r--r--tools/perf/builtin-c2c.c6
-rw-r--r--tools/perf/builtin-record.c64
-rw-r--r--tools/perf/builtin-stat.c5
-rw-r--r--tools/perf/pmu-events/jevents.c2
-rwxr-xr-xtools/perf/scripts/python/arm-cs-trace-disasm.py272
-rw-r--r--tools/perf/tests/shell/lib/perf_csv_output_lint.py48
-rwxr-xr-xtools/perf/tests/shell/record_offcpu.sh60
-rwxr-xr-xtools/perf/tests/shell/stat+csv_output.sh147
-rwxr-xr-xtools/perf/tests/shell/test_intel_pt.sh71
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/auxtrace.c15
-rw-r--r--tools/perf/util/auxtrace.h13
-rw-r--r--tools/perf/util/bpf-event.c24
-rw-r--r--tools/perf/util/bpf_counter.c6
-rw-r--r--tools/perf/util/bpf_off_cpu.c338
-rw-r--r--tools/perf/util/bpf_skel/off_cpu.bpf.c229
-rw-r--r--tools/perf/util/evlist.c61
-rw-r--r--tools/perf/util/evlist.h5
-rw-r--r--tools/perf/util/evsel.c7
-rw-r--r--tools/perf/util/libunwind/arm64.c2
-rw-r--r--tools/perf/util/mmap.c4
-rw-r--r--tools/perf/util/off_cpu.h29
-rw-r--r--tools/perf/util/parse-events.c2
-rw-r--r--tools/perf/util/perf_regs.c2
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c21
-rw-r--r--tools/testing/crypto/chacha20-s390/Makefile12
-rw-r--r--tools/testing/crypto/chacha20-s390/run-tests.sh34
-rw-r--r--tools/testing/crypto/chacha20-s390/test-cipher.c372
-rw-r--r--tools/testing/cxl/Kbuild3
-rw-r--r--tools/testing/cxl/mock_mem.c10
-rw-r--r--tools/testing/cxl/test/mem.c17
-rw-r--r--tools/testing/cxl/test/mock.c29
-rw-r--r--tools/testing/memblock/TODO3
-rw-r--r--tools/testing/memblock/tests/basic_api.c392
-rw-r--r--tools/testing/nvdimm/pmem-dax.c4
-rw-r--r--tools/testing/nvdimm/test/iomap.c18
-rw-r--r--tools/testing/nvdimm/test/nfit.c3
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc2
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h5
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/math/mma.S33
-rw-r--r--tools/testing/selftests/powerpc/math/mma.c48
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c156
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S43
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c2
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c32
-rw-r--r--tools/tracing/rtla/Makefile40
-rw-r--r--tools/tracing/rtla/README.txt13
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c5
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c9
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c11
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c11
-rw-r--r--tools/tracing/rtla/src/utils.c108
-rw-r--r--tools/tracing/rtla/src/utils.h3
77 files changed, 2763 insertions, 480 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/perf_regs.h b/tools/arch/arm64/include/uapi/asm/perf_regs.h
index d54daafa89e3..fd157f46727e 100644
--- a/tools/arch/arm64/include/uapi/asm/perf_regs.h
+++ b/tools/arch/arm64/include/uapi/asm/perf_regs.h
@@ -36,6 +36,11 @@ enum perf_event_arm_regs {
PERF_REG_ARM64_LR,
PERF_REG_ARM64_SP,
PERF_REG_ARM64_PC,
- PERF_REG_ARM64_MAX,
+
+ /* Extended/pseudo registers */
+ PERF_REG_ARM64_VG = 46, // SVE Vector Granule
+
+ PERF_REG_ARM64_MAX = PERF_REG_ARM64_PC + 1,
+ PERF_REG_ARM64_EXTENDED_MAX = PERF_REG_ARM64_VG + 1
};
#endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index ee15311b6be1..403e83b4adc8 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -76,6 +76,8 @@
/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
#define MSR_IA32_CORE_CAPS 0x000000cf
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2
+#define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS BIT(MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT)
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
@@ -154,6 +156,11 @@
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_POWER_CTL_BIT_EE 19
+/* Abbreviated from Intel SDM name IA32_INTEGRITY_CAPABILITIES */
+#define MSR_INTEGRITY_CAPS 0x000002d9
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
+#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
@@ -312,6 +319,7 @@
/* Run Time Average Power Limiting (RAPL) Interface */
+#define MSR_VR_CURRENT_CONFIG 0x00000601
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
@@ -502,8 +510,10 @@
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
+#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
+#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
@@ -524,6 +534,11 @@
#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+/* AMD Performance Counter Global Status and Control MSRs */
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
+#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
@@ -688,6 +703,10 @@
#define MSR_IA32_PERF_CTL 0x00000199
#define INTEL_PERF_CTL_MASK 0xffff
+/* AMD Branch Sampling configuration */
+#define MSR_AMD_DBG_EXTN_CFG 0xc000010f
+#define MSR_AMD_SAMP_BR_FROM 0xc0010300
+
#define MSR_IA32_MPERF 0x000000e7
#define MSR_IA32_APERF 0x000000e8
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index c6a48d0ef9ff..888a0421d43b 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -99,6 +99,10 @@ FEATURE_TESTS_EXTRA := \
clang \
libbpf \
libbpf-btf__load_from_kernel_by_id \
+ libbpf-bpf_prog_load \
+ libbpf-bpf_object__next_program \
+ libbpf-bpf_object__next_map \
+ libbpf-bpf_create_map \
libpfm4 \
libdebuginfod \
clang-bpf-co-re
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index cb4a2a4fa2e4..7c2a17e23c30 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -58,6 +58,11 @@ FILES= \
test-bpf.bin \
test-libbpf.bin \
test-libbpf-btf__load_from_kernel_by_id.bin \
+ test-libbpf-bpf_prog_load.bin \
+ test-libbpf-bpf_map_create.bin \
+ test-libbpf-bpf_object__next_program.bin \
+ test-libbpf-bpf_object__next_map.bin \
+ test-libbpf-btf__raw_data.bin \
test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
@@ -291,6 +296,21 @@ $(OUTPUT)test-libbpf.bin:
$(OUTPUT)test-libbpf-btf__load_from_kernel_by_id.bin:
$(BUILD) -lbpf
+$(OUTPUT)test-libbpf-bpf_prog_load.bin:
+ $(BUILD) -lbpf
+
+$(OUTPUT)test-libbpf-bpf_map_create.bin:
+ $(BUILD) -lbpf
+
+$(OUTPUT)test-libbpf-bpf_object__next_program.bin:
+ $(BUILD) -lbpf
+
+$(OUTPUT)test-libbpf-bpf_object__next_map.bin:
+ $(BUILD) -lbpf
+
+$(OUTPUT)test-libbpf-btf__raw_data.bin:
+ $(BUILD) -lbpf
+
$(OUTPUT)test-sdt.bin:
$(BUILD)
diff --git a/tools/build/feature/test-libbpf-bpf_map_create.c b/tools/build/feature/test-libbpf-bpf_map_create.c
new file mode 100644
index 000000000000..b9f550e332c8
--- /dev/null
+++ b/tools/build/feature/test-libbpf-bpf_map_create.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/bpf.h>
+
+int main(void)
+{
+ return bpf_map_create(0 /* map_type */, NULL /* map_name */, 0, /* key_size */,
+ 0 /* value_size */, 0 /* max_entries */, NULL /* opts */);
+}
diff --git a/tools/build/feature/test-libbpf-bpf_object__next_map.c b/tools/build/feature/test-libbpf-bpf_object__next_map.c
new file mode 100644
index 000000000000..64adb519e97e
--- /dev/null
+++ b/tools/build/feature/test-libbpf-bpf_object__next_map.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/libbpf.h>
+
+int main(void)
+{
+ bpf_object__next_map(NULL /* obj */, NULL /* prev */);
+ return 0;
+}
diff --git a/tools/build/feature/test-libbpf-bpf_object__next_program.c b/tools/build/feature/test-libbpf-bpf_object__next_program.c
new file mode 100644
index 000000000000..8bf4fd26b545
--- /dev/null
+++ b/tools/build/feature/test-libbpf-bpf_object__next_program.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/libbpf.h>
+
+int main(void)
+{
+ bpf_object__next_program(NULL /* obj */, NULL /* prev */);
+ return 0;
+}
diff --git a/tools/build/feature/test-libbpf-bpf_prog_load.c b/tools/build/feature/test-libbpf-bpf_prog_load.c
new file mode 100644
index 000000000000..47f516d63ebc
--- /dev/null
+++ b/tools/build/feature/test-libbpf-bpf_prog_load.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/bpf.h>
+
+int main(void)
+{
+ return bpf_prog_load(0 /* prog_type */, NULL /* prog_name */,
+ NULL /* license */, NULL /* insns */,
+ 0 /* insn_cnt */, NULL /* opts */);
+}
diff --git a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
index f7c084428735..a17647f7d5a4 100644
--- a/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
+++ b/tools/build/feature/test-libbpf-btf__load_from_kernel_by_id.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <bpf/libbpf.h>
+#include <bpf/btf.h>
int main(void)
{
- return btf__load_from_kernel_by_id(20151128, NULL);
+ btf__load_from_kernel_by_id(20151128);
+ return 0;
}
diff --git a/tools/build/feature/test-libbpf-btf__raw_data.c b/tools/build/feature/test-libbpf-btf__raw_data.c
new file mode 100644
index 000000000000..57da31dd7581
--- /dev/null
+++ b/tools/build/feature/test-libbpf-btf__raw_data.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/btf.h>
+
+int main(void)
+{
+ btf__raw_data(NULL /* btf_ro */, NULL /* size */);
+ return 0;
+}
diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h
index ac190958c981..0197042b7dfb 100644
--- a/tools/include/uapi/asm-generic/fcntl.h
+++ b/tools/include/uapi/asm-generic/fcntl.h
@@ -115,13 +115,11 @@
#define F_GETSIG 11 /* for sockets. */
#endif
-#ifndef CONFIG_64BIT
#ifndef F_GETLK64
#define F_GETLK64 12 /* using 'struct flock64' */
#define F_SETLK64 13
#define F_SETLKW64 14
#endif
-#endif
#ifndef F_SETOWN_EX
#define F_SETOWN_EX 15
@@ -187,25 +185,19 @@ struct f_owner_ex {
#define F_LINUX_SPECIFIC_BASE 1024
-#ifndef HAVE_ARCH_STRUCT_FLOCK
-#ifndef __ARCH_FLOCK_PAD
-#define __ARCH_FLOCK_PAD
-#endif
-
struct flock {
short l_type;
short l_whence;
__kernel_off_t l_start;
__kernel_off_t l_len;
__kernel_pid_t l_pid;
- __ARCH_FLOCK_PAD
-};
+#ifdef __ARCH_FLOCK_EXTRA_SYSID
+ __ARCH_FLOCK_EXTRA_SYSID
#endif
-
-#ifndef HAVE_ARCH_STRUCT_FLOCK64
-#ifndef __ARCH_FLOCK64_PAD
-#define __ARCH_FLOCK64_PAD
+#ifdef __ARCH_FLOCK_PAD
+ __ARCH_FLOCK_PAD
#endif
+};
struct flock64 {
short l_type;
@@ -213,8 +205,9 @@ struct flock64 {
__kernel_loff_t l_start;
__kernel_loff_t l_len;
__kernel_pid_t l_pid;
+#ifdef __ARCH_FLOCK64_PAD
__ARCH_FLOCK64_PAD
-};
#endif
+};
#endif /* _ASM_GENERIC_FCNTL_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 1c48b0ae3ba3..45fa180cc56a 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -383,7 +383,7 @@ __SYSCALL(__NR_syslog, sys_syslog)
/* kernel/ptrace.c */
#define __NR_ptrace 117
-__SYSCALL(__NR_ptrace, sys_ptrace)
+__SC_COMP(__NR_ptrace, sys_ptrace, compat_sys_ptrace)
/* kernel/sched/core.c */
#define __NR_sched_setparam 118
@@ -779,7 +779,7 @@ __SYSCALL(__NR_rseq, sys_rseq)
#define __NR_kexec_file_load 294
__SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
/* 295 through 402 are unassigned to sync up with generic numbers, don't use */
-#if __BITS_PER_LONG == 32
+#if defined(__SYSCALL_COMPAT) || __BITS_PER_LONG == 32
#define __NR_clock_gettime64 403
__SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
#define __NR_clock_settime64 404
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index ed66f2e38464..e6c98a6e3908 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -23,6 +23,7 @@
#include <perf/cpumap.h>
#include <perf/threadmap.h>
#include <api/fd/array.h>
+#include "internal.h"
void perf_evlist__init(struct perf_evlist *evlist)
{
@@ -39,10 +40,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
* We already have cpus for evsel (via PMU sysfs) so
* keep it, if there's no target cpu list defined.
*/
- if (!evsel->own_cpus || evlist->has_user_cpus) {
- perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
- } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) {
+ if (!evsel->own_cpus ||
+ (!evsel->system_wide && evlist->has_user_cpus) ||
+ (!evsel->system_wide &&
+ !evsel->requires_cpu &&
+ perf_cpu_map__empty(evlist->user_requested_cpus))) {
perf_cpu_map__put(evsel->cpus);
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
} else if (evsel->cpus != evsel->own_cpus) {
@@ -50,8 +52,11 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
}
- perf_thread_map__put(evsel->threads);
- evsel->threads = perf_thread_map__get(evlist->threads);
+ if (!evsel->system_wide) {
+ perf_thread_map__put(evsel->threads);
+ evsel->threads = perf_thread_map__get(evlist->threads);
+ }
+
evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
}
@@ -298,7 +303,7 @@ add:
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
- int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
int nr_threads = perf_thread_map__nr(evlist->threads);
int nfds = 0;
struct perf_evsel *evsel;
@@ -428,9 +433,9 @@ static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_
static int
mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
- int thread, int *_output, int *_output_overwrite)
+ int thread, int *_output, int *_output_overwrite, int *nr_mmaps)
{
- struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx);
+ struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->all_cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;
@@ -484,6 +489,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
return -1;
+ *nr_mmaps += 1;
+
if (!idx)
perf_evlist__set_mmap_first(evlist, map, overwrite);
} else {
@@ -513,34 +520,12 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
}
static int
-mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
- struct perf_mmap_param *mp)
-{
- int thread;
- int nr_threads = perf_thread_map__nr(evlist->threads);
-
- for (thread = 0; thread < nr_threads; thread++) {
- int output = -1;
- int output_overwrite = -1;
-
- if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread,
- &output, &output_overwrite))
- goto out_unmap;
- }
-
- return 0;
-
-out_unmap:
- perf_evlist__munmap(evlist);
- return -1;
-}
-
-static int
mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
struct perf_mmap_param *mp)
{
int nr_threads = perf_thread_map__nr(evlist->threads);
- int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
+ int nr_cpus = perf_cpu_map__nr(evlist->all_cpus);
+ int nr_mmaps = 0;
int cpu, thread;
for (cpu = 0; cpu < nr_cpus; cpu++) {
@@ -549,11 +534,14 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
for (thread = 0; thread < nr_threads; thread++) {
if (mmap_per_evsel(evlist, ops, cpu, mp, cpu,
- thread, &output, &output_overwrite))
+ thread, &output, &output_overwrite, &nr_mmaps))
goto out_unmap;
}
}
+ if (nr_mmaps != evlist->nr_mmaps)
+ pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps);
+
return 0;
out_unmap:
@@ -565,9 +553,14 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
{
int nr_mmaps;
- nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus);
- if (perf_cpu_map__empty(evlist->user_requested_cpus))
- nr_mmaps = perf_thread_map__nr(evlist->threads);
+ /* One for each CPU */
+ nr_mmaps = perf_cpu_map__nr(evlist->all_cpus);
+ if (perf_cpu_map__empty(evlist->all_cpus)) {
+ /* Plus one for each thread */
+ nr_mmaps += perf_thread_map__nr(evlist->threads);
+ /* Minus the per-thread CPU (-1) */
+ nr_mmaps -= 1;
+ }
return nr_mmaps;
}
@@ -577,7 +570,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_mmap_param *mp)
{
struct perf_evsel *evsel;
- const struct perf_cpu_map *cpus = evlist->user_requested_cpus;
if (!ops || !ops->get || !ops->mmap)
return -EINVAL;
@@ -596,9 +588,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
- if (perf_cpu_map__empty(cpus))
- return mmap_per_thread(evlist, ops, mp);
-
return mmap_per_cpu(evlist, ops, mp);
}
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
index cfc9ebd7968e..2a912a1f1989 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -49,7 +49,18 @@ struct perf_evsel {
/* parse modifier helper */
int nr_members;
+ /*
+ * system_wide is for events that need to be on every CPU, irrespective
+ * of user requested CPUs or threads. Map propagation will set cpus to
+ * this event's own_cpus, whereby they will contribute to evlist
+ * all_cpus.
+ */
bool system_wide;
+ /*
+ * Some events, for example uncore events, require a CPU.
+ * i.e. it cannot be the 'any CPU' value of -1.
+ */
+ bool requires_cpu;
int idx;
};
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 465be4e62a17..b4e9ef7edfef 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -758,6 +758,16 @@ include::intel-hybrid.txt[]
If the URLs is not specified, the value of DEBUGINFOD_URLS
system environment variable is used.
+--off-cpu::
+ Enable off-cpu profiling with BPF. The BPF program will collect
+ task scheduling information with (user) stacktrace and save them
+ as sample data of a software event named "offcpu-time". The
+ sample period will have the time the task slept in nanoseconds.
+
+ Note that BPF can collect stack traces using frame pointer ("fp")
+ only, as of now. So the applications built without the frame
+ pointer might see bogus addresses.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index e0304e70f182..73e0762092fe 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -573,11 +573,36 @@ ifndef NO_LIBELF
ifeq ($(feature-libbpf-btf__load_from_kernel_by_id), 1)
CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
endif
+ $(call feature_check,libbpf-bpf_prog_load)
+ ifeq ($(feature-libbpf-bpf_prog_load), 1)
+ CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD
+ endif
+ $(call feature_check,libbpf-bpf_object__next_program)
+ ifeq ($(feature-libbpf-bpf_object__next_program), 1)
+ CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
+ endif
+ $(call feature_check,libbpf-bpf_object__next_map)
+ ifeq ($(feature-libbpf-bpf_object__next_map), 1)
+ CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
+ endif
+ $(call feature_check,libbpf-btf__raw_data)
+ ifeq ($(feature-libbpf-btf__raw_data), 1)
+ CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA
+ endif
+ $(call feature_check,libbpf-bpf_map_create)
+ ifeq ($(feature-libbpf-bpf_map_create), 1)
+ CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE
+ endif
else
dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
endif
else
CFLAGS += -DHAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+ CFLAGS += -DHAVE_LIBBPF_BPF_PROG_LOAD
+ CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
+ CFLAGS += -DHAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
+ CFLAGS += -DHAVE_LIBBPF_BTF__RAW_DATA
+ CFLAGS += -DHAVE_LIBBPF_BPF_MAP_CREATE
endif
endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 6e5aded855cc..8f738e11356d 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1038,6 +1038,7 @@ SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
+SKELETONS += $(SKEL_OUT)/off_cpu.skel.h
$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
$(Q)$(MKDIR) -p $@
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 476b037eea1c..006692c9b040 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -2,13 +2,19 @@
#include <errno.h>
#include <regex.h>
#include <string.h>
+#include <sys/auxv.h>
#include <linux/kernel.h>
#include <linux/zalloc.h>
+#include "../../../perf-sys.h"
#include "../../../util/debug.h"
#include "../../../util/event.h"
#include "../../../util/perf_regs.h"
+#ifndef HWCAP_SVE
+#define HWCAP_SVE (1 << 22)
+#endif
+
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(x0, PERF_REG_ARM64_X0),
SMPL_REG(x1, PERF_REG_ARM64_X1),
@@ -43,6 +49,7 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(lr, PERF_REG_ARM64_LR),
SMPL_REG(sp, PERF_REG_ARM64_SP),
SMPL_REG(pc, PERF_REG_ARM64_PC),
+ SMPL_REG(vg, PERF_REG_ARM64_VG),
SMPL_REG_END
};
@@ -131,3 +138,34 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
+
+uint64_t arch__user_reg_mask(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = PERF_SAMPLE_REGS_USER,
+ .disabled = 1,
+ .exclude_kernel = 1,
+ .sample_period = 1,
+ .sample_regs_user = PERF_REGS_MASK
+ };
+ int fd;
+
+ if (getauxval(AT_HWCAP) & HWCAP_SVE)
+ attr.sample_regs_user |= SMPL_REG_MASK(PERF_REG_ARM64_VG);
+
+ /*
+ * Check if the pmu supports perf extended regs, before
+ * returning the register mask to sample.
+ */
+ if (attr.sample_regs_user != PERF_REGS_MASK) {
+ event_attr_init(&attr);
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd != -1) {
+ close(fd);
+ return attr.sample_regs_user;
+ }
+ }
+ return PERF_REGS_MASK;
+}
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
index 5aecf88e3de6..871af5992298 100644
--- a/tools/perf/arch/arm64/util/unwind-libunwind.c
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -10,77 +10,8 @@
int LIBUNWIND__ARCH_REG_ID(int regnum)
{
- switch (regnum) {
- case UNW_AARCH64_X0:
- return PERF_REG_ARM64_X0;
- case UNW_AARCH64_X1:
- return PERF_REG_ARM64_X1;
- case UNW_AARCH64_X2:
- return PERF_REG_ARM64_X2;
- case UNW_AARCH64_X3:
- return PERF_REG_ARM64_X3;
- case UNW_AARCH64_X4:
- return PERF_REG_ARM64_X4;
- case UNW_AARCH64_X5:
- return PERF_REG_ARM64_X5;
- case UNW_AARCH64_X6:
- return PERF_REG_ARM64_X6;
- case UNW_AARCH64_X7:
- return PERF_REG_ARM64_X7;
- case UNW_AARCH64_X8:
- return PERF_REG_ARM64_X8;
- case UNW_AARCH64_X9:
- return PERF_REG_ARM64_X9;
- case UNW_AARCH64_X10:
- return PERF_REG_ARM64_X10;
- case UNW_AARCH64_X11:
- return PERF_REG_ARM64_X11;
- case UNW_AARCH64_X12:
- return PERF_REG_ARM64_X12;
- case UNW_AARCH64_X13:
- return PERF_REG_ARM64_X13;
- case UNW_AARCH64_X14:
- return PERF_REG_ARM64_X14;
- case UNW_AARCH64_X15:
- return PERF_REG_ARM64_X15;
- case UNW_AARCH64_X16:
- return PERF_REG_ARM64_X16;
- case UNW_AARCH64_X17:
- return PERF_REG_ARM64_X17;
- case UNW_AARCH64_X18:
- return PERF_REG_ARM64_X18;
- case UNW_AARCH64_X19:
- return PERF_REG_ARM64_X19;
- case UNW_AARCH64_X20:
- return PERF_REG_ARM64_X20;
- case UNW_AARCH64_X21:
- return PERF_REG_ARM64_X21;
- case UNW_AARCH64_X22:
- return PERF_REG_ARM64_X22;
- case UNW_AARCH64_X23:
- return PERF_REG_ARM64_X23;
- case UNW_AARCH64_X24:
- return PERF_REG_ARM64_X24;
- case UNW_AARCH64_X25:
- return PERF_REG_ARM64_X25;
- case UNW_AARCH64_X26:
- return PERF_REG_ARM64_X26;
- case UNW_AARCH64_X27:
- return PERF_REG_ARM64_X27;
- case UNW_AARCH64_X28:
- return PERF_REG_ARM64_X28;
- case UNW_AARCH64_X29:
- return PERF_REG_ARM64_X29;
- case UNW_AARCH64_X30:
- return PERF_REG_ARM64_LR;
- case UNW_AARCH64_SP:
- return PERF_REG_ARM64_SP;
- case UNW_AARCH64_PC:
- return PERF_REG_ARM64_PC;
- default:
- pr_err("unwind: invalid reg id %d\n", regnum);
+ if (regnum < 0 || regnum >= PERF_REG_ARM64_EXTENDED_MAX)
return -EINVAL;
- }
- return -EINVAL;
+ return regnum;
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 2eaac4638aab..06c2cdfd8f2f 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -811,18 +811,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
if (!cpu_wide && perf_can_record_cpu_wide()) {
struct evsel *switch_evsel;
- err = parse_events(evlist, "dummy:u", NULL);
- if (err)
- return err;
+ switch_evsel = evlist__add_dummy_on_all_cpus(evlist);
+ if (!switch_evsel)
+ return -ENOMEM;
- switch_evsel = evlist__last(evlist);
-
- switch_evsel->core.attr.freq = 0;
- switch_evsel->core.attr.sample_period = 1;
switch_evsel->core.attr.context_switch = 1;
-
- switch_evsel->core.system_wide = true;
- switch_evsel->no_aux_samples = true;
switch_evsel->immediate = true;
evsel__set_sample_bit(switch_evsel, TID);
@@ -871,20 +864,22 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
/* Add dummy event to keep tracking */
if (opts->full_auxtrace) {
+ bool need_system_wide_tracking;
struct evsel *tracking_evsel;
- err = parse_events(evlist, "dummy:u", NULL);
- if (err)
- return err;
+ /*
+ * User space tasks can migrate between CPUs, so when tracing
+ * selected CPUs, sideband for all CPUs is still needed.
+ */
+ need_system_wide_tracking = evlist->core.has_user_cpus &&
+ !intel_pt_evsel->core.attr.exclude_user;
- tracking_evsel = evlist__last(evlist);
+ tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking);
+ if (!tracking_evsel)
+ return -ENOMEM;
evlist__set_tracking_event(evlist, tracking_evsel);
- tracking_evsel->core.attr.freq = 0;
- tracking_evsel->core.attr.sample_period = 1;
-
- tracking_evsel->no_aux_samples = true;
if (need_immediate)
tracking_evsel->immediate = true;
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c8230c48125f..80b525c065ed 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2801,9 +2801,7 @@ static int perf_c2c__report(int argc, const char **argv)
"the input file to process"),
OPT_INCR('N', "node-info", &c2c.node_info,
"show extra node info in report (repeat for more info)"),
-#ifdef HAVE_SLANG_SUPPORT
OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
-#endif
OPT_BOOLEAN(0, "stats", &c2c.stats_only,
"Display only statistic tables (implies --stdio)"),
OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full,
@@ -2833,6 +2831,10 @@ static int perf_c2c__report(int argc, const char **argv)
if (argc)
usage_with_options(report_c2c_usage, options);
+#ifndef HAVE_SLANG_SUPPORT
+ c2c.use_stdio = true;
+#endif
+
if (c2c.stats_only)
c2c.use_stdio = true;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a5cf6a99d67f..9a71f0330137 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,6 +49,7 @@
#include "util/clockid.h"
#include "util/pmu-hybrid.h"
#include "util/evlist-hybrid.h"
+#include "util/off_cpu.h"
#include "asm/bug.h"
#include "perf.h"
#include "cputopo.h"
@@ -162,6 +163,7 @@ struct record {
bool buildid_mmap;
bool timestamp_filename;
bool timestamp_boundary;
+ bool off_cpu;
struct switch_output switch_output;
unsigned long long samples;
unsigned long output_max_size; /* = 0: unlimited */
@@ -869,7 +871,6 @@ static int record__auxtrace_init(struct record *rec __maybe_unused)
static int record__config_text_poke(struct evlist *evlist)
{
struct evsel *evsel;
- int err;
/* Nothing to do if text poke is already configured */
evlist__for_each_entry(evlist, evsel) {
@@ -877,32 +878,23 @@ static int record__config_text_poke(struct evlist *evlist)
return 0;
}
- err = parse_events(evlist, "dummy:u", NULL);
- if (err)
- return err;
-
- evsel = evlist__last(evlist);
+ evsel = evlist__add_dummy_on_all_cpus(evlist);
+ if (!evsel)
+ return -ENOMEM;
- evsel->core.attr.freq = 0;
- evsel->core.attr.sample_period = 1;
evsel->core.attr.text_poke = 1;
evsel->core.attr.ksymbol = 1;
-
- evsel->core.system_wide = true;
- evsel->no_aux_samples = true;
evsel->immediate = true;
-
- /* Text poke must be collected on all CPUs */
- perf_cpu_map__put(evsel->core.own_cpus);
- evsel->core.own_cpus = perf_cpu_map__new(NULL);
- perf_cpu_map__put(evsel->core.cpus);
- evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
-
evsel__set_sample_bit(evsel, TIME);
return 0;
}
+static int record__config_off_cpu(struct record *rec)
+{
+ return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
+}
+
static bool record__kcore_readable(struct machine *machine)
{
char kcore[PATH_MAX];
@@ -982,14 +974,20 @@ static void record__thread_data_close_pipes(struct record_thread *thread_data)
}
}
+static bool evlist__per_thread(struct evlist *evlist)
+{
+ return cpu_map__is_dummy(evlist->core.user_requested_cpus);
+}
+
static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
{
int m, tm, nr_mmaps = evlist->core.nr_mmaps;
struct mmap *mmap = evlist->mmap;
struct mmap *overwrite_mmap = evlist->overwrite_mmap;
- struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
+ struct perf_cpu_map *cpus = evlist->core.all_cpus;
+ bool per_thread = evlist__per_thread(evlist);
- if (cpu_map__is_dummy(cpus))
+ if (per_thread)
thread_data->nr_mmaps = nr_mmaps;
else
thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
@@ -1010,7 +1008,7 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
- if (cpu_map__is_dummy(cpus) ||
+ if (per_thread ||
test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
if (thread_data->maps) {
thread_data->maps[tm] = &mmap[m];
@@ -1885,7 +1883,7 @@ static int record__synthesize(struct record *rec, bool tail)
return err;
}
- err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.user_requested_cpus,
+ err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
process_synthesized_event, NULL);
if (err < 0) {
pr_err("Couldn't synthesize cpu map.\n");
@@ -2600,6 +2598,9 @@ out_free_threads:
} else
status = err;
+ if (rec->off_cpu)
+ rec->bytes_written += off_cpu_write(rec->session);
+
record__synthesize(rec, true);
/* this will be recalculated during process_buildids() */
rec->samples = 0;
@@ -3324,6 +3325,7 @@ static struct option __record_options[] = {
OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
"write collected trace data into several data files using parallel threads",
record__parse_threads),
+ OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
OPT_END()
};
@@ -3683,12 +3685,12 @@ static int record__init_thread_default_masks(struct record *rec, struct perf_cpu
static int record__init_thread_masks(struct record *rec)
{
int ret = 0;
- struct perf_cpu_map *cpus = rec->evlist->core.user_requested_cpus;
+ struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
if (!record__threads_enabled(rec))
return record__init_thread_default_masks(rec, cpus);
- if (cpu_map__is_dummy(cpus)) {
+ if (evlist__per_thread(rec->evlist)) {
pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
return -EINVAL;
}
@@ -3745,6 +3747,12 @@ int cmd_record(int argc, const char **argv)
# undef REASON
#endif
+#ifndef HAVE_BPF_SKEL
+# define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
+ set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
+# undef set_nobuild
+#endif
+
rec->opts.affinity = PERF_AFFINITY_SYS;
rec->evlist = evlist__new();
@@ -3981,6 +3989,14 @@ int cmd_record(int argc, const char **argv)
}
}
+ if (rec->off_cpu) {
+ err = record__config_off_cpu(rec);
+ if (err) {
+ pr_err("record__config_off_cpu failed, error %d\n", err);
+ goto out;
+ }
+ }
+
if (record_opts__config(&rec->opts)) {
err = -EINVAL;
goto out;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7e6cc8bdf061..4ce87a8eb7d7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -382,9 +382,6 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_
if (!counter->supported)
return -ENOENT;
- if (counter->core.system_wide)
- nthreads = 1;
-
for (thread = 0; thread < nthreads; thread++) {
struct perf_counts_values *count;
@@ -2261,7 +2258,7 @@ static void setup_system_wide(int forks)
struct evsel *counter;
evlist__for_each_entry(evsel_list, counter) {
- if (!counter->core.system_wide &&
+ if (!counter->core.requires_cpu &&
strcmp(counter->name, "duration_time")) {
return;
}
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index cee61c4ed59e..e597e4bac90f 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -605,7 +605,7 @@ static int json_events(const char *fn,
} else if (json_streq(map, field, "ExtSel")) {
char *code = NULL;
addfield(map, &code, "", "", val);
- eventcode |= strtoul(code, NULL, 0) << 21;
+ eventcode |= strtoul(code, NULL, 0) << 8;
free(code);
} else if (json_streq(map, field, "EventName")) {
addfield(map, &je.name, "", "", val);
diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py
new file mode 100755
index 000000000000..5f57d9829956
--- /dev/null
+++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py
@@ -0,0 +1,272 @@
+# SPDX-License-Identifier: GPL-2.0
+# arm-cs-trace-disasm.py: ARM CoreSight Trace Dump With Disassember
+#
+# Author: Tor Jeremiassen <tor@ti.com>
+# Mathieu Poirier <mathieu.poirier@linaro.org>
+# Leo Yan <leo.yan@linaro.org>
+# Al Grant <Al.Grant@arm.com>
+
+from __future__ import print_function
+import os
+from os import path
+import sys
+import re
+from subprocess import *
+from optparse import OptionParser, make_option
+
+from perf_trace_context import perf_set_itrace_options, \
+ perf_sample_insn, perf_sample_srccode
+
+# Below are some example commands for using this script.
+#
+# Output disassembly with objdump:
+# perf script -s scripts/python/arm-cs-trace-disasm.py \
+# -- -d objdump -k path/to/vmlinux
+# Output disassembly with llvm-objdump:
+# perf script -s scripts/python/arm-cs-trace-disasm.py \
+# -- -d llvm-objdump-11 -k path/to/vmlinux
+# Output only source line and symbols:
+# perf script -s scripts/python/arm-cs-trace-disasm.py
+
+# Command line parsing.
+option_list = [
+ # formatting options for the bottom entry of the stack
+ make_option("-k", "--vmlinux", dest="vmlinux_name",
+ help="Set path to vmlinux file"),
+ make_option("-d", "--objdump", dest="objdump_name",
+ help="Set path to objdump executable file"),
+ make_option("-v", "--verbose", dest="verbose",
+ action="store_true", default=False,
+ help="Enable debugging log")
+]
+
+parser = OptionParser(option_list=option_list)
+(options, args) = parser.parse_args()
+
+# Initialize global dicts and regular expression
+disasm_cache = dict()
+cpu_data = dict()
+disasm_re = re.compile("^\s*([0-9a-fA-F]+):")
+disasm_func_re = re.compile("^\s*([0-9a-fA-F]+)\s.*:")
+cache_size = 64*1024
+
+glb_source_file_name = None
+glb_line_number = None
+glb_dso = None
+
+def get_optional(perf_dict, field):
+ if field in perf_dict:
+ return perf_dict[field]
+ return "[unknown]"
+
+def get_offset(perf_dict, field):
+ if field in perf_dict:
+ return f"+0x{perf_dict[field]:x}"
+ return ""
+
+def get_dso_file_path(dso_name, dso_build_id):
+ if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"):
+ if (options.vmlinux_name):
+ return options.vmlinux_name;
+ else:
+ return dso_name
+
+ if (dso_name == "[vdso]") :
+ append = "/vdso"
+ else:
+ append = "/elf"
+
+ dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}"
+ # Replace duplicate slash chars to single slash char
+ dso_path = dso_path.replace('//', '/', 1)
+ return dso_path
+
+def read_disam(dso_fname, dso_start, start_addr, stop_addr):
+ addr_range = str(start_addr) + ":" + str(stop_addr) + ":" + dso_fname
+
+ # Don't let the cache get too big, clear it when it hits max size
+ if (len(disasm_cache) > cache_size):
+ disasm_cache.clear();
+
+ if addr_range in disasm_cache:
+ disasm_output = disasm_cache[addr_range];
+ else:
+ start_addr = start_addr - dso_start;
+ stop_addr = stop_addr - dso_start;
+ disasm = [ options.objdump_name, "-d", "-z",
+ f"--start-address=0x{start_addr:x}",
+ f"--stop-address=0x{stop_addr:x}" ]
+ disasm += [ dso_fname ]
+ disasm_output = check_output(disasm).decode('utf-8').split('\n')
+ disasm_cache[addr_range] = disasm_output
+
+ return disasm_output
+
+def print_disam(dso_fname, dso_start, start_addr, stop_addr):
+ for line in read_disam(dso_fname, dso_start, start_addr, stop_addr):
+ m = disasm_func_re.search(line)
+ if m is None:
+ m = disasm_re.search(line)
+ if m is None:
+ continue
+ print(f"\t{line}")
+
+def print_sample(sample):
+ print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \
+ f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \
+ f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}")
+
+def trace_begin():
+ print('ARM CoreSight Trace Data Assembler Dump')
+
+def trace_end():
+ print('End')
+
+def trace_unhandled(event_name, context, event_fields_dict):
+ print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]))
+
+def common_start_str(comm, sample):
+ sec = int(sample["time"] / 1000000000)
+ ns = sample["time"] % 1000000000
+ cpu = sample["cpu"]
+ pid = sample["pid"]
+ tid = sample["tid"]
+ return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} "
+
+# This code is copied from intel-pt-events.py for printing source code
+# line and symbols.
+def print_srccode(comm, param_dict, sample, symbol, dso):
+ ip = sample["ip"]
+ if symbol == "[unknown]":
+ start_str = common_start_str(comm, sample) + ("%x" % ip).rjust(16).ljust(40)
+ else:
+ offs = get_offset(param_dict, "symoff")
+ start_str = common_start_str(comm, sample) + (symbol + offs).ljust(40)
+
+ global glb_source_file_name
+ global glb_line_number
+ global glb_dso
+
+ source_file_name, line_number, source_line = perf_sample_srccode(perf_script_context)
+ if source_file_name:
+ if glb_line_number == line_number and glb_source_file_name == source_file_name:
+ src_str = ""
+ else:
+ if len(source_file_name) > 40:
+ src_file = ("..." + source_file_name[-37:]) + " "
+ else:
+ src_file = source_file_name.ljust(41)
+
+ if source_line is None:
+ src_str = src_file + str(line_number).rjust(4) + " <source not found>"
+ else:
+ src_str = src_file + str(line_number).rjust(4) + " " + source_line
+ glb_dso = None
+ elif dso == glb_dso:
+ src_str = ""
+ else:
+ src_str = dso
+ glb_dso = dso
+
+ glb_line_number = line_number
+ glb_source_file_name = source_file_name
+
+ print(f"{start_str}{src_str}")
+
+def process_event(param_dict):
+ global cache_size
+ global options
+
+ sample = param_dict["sample"]
+ comm = param_dict["comm"]
+
+ name = param_dict["ev_name"]
+ dso = get_optional(param_dict, "dso")
+ dso_bid = get_optional(param_dict, "dso_bid")
+ dso_start = get_optional(param_dict, "dso_map_start")
+ dso_end = get_optional(param_dict, "dso_map_end")
+ symbol = get_optional(param_dict, "symbol")
+
+ if (options.verbose == True):
+ print(f"Event type: {name}")
+ print_sample(sample)
+
+ # If cannot find dso so cannot dump assembler, bail out
+ if (dso == '[unknown]'):
+ return
+
+ # Validate dso start and end addresses
+ if ((dso_start == '[unknown]') or (dso_end == '[unknown]')):
+ print(f"Failed to find valid dso map for dso {dso}")
+ return
+
+ if (name[0:12] == "instructions"):
+ print_srccode(comm, param_dict, sample, symbol, dso)
+ return
+
+ # Don't proceed if this event is not a branch sample, .
+ if (name[0:8] != "branches"):
+ return
+
+ cpu = sample["cpu"]
+ ip = sample["ip"]
+ addr = sample["addr"]
+
+ # Initialize CPU data if it's empty, and directly return back
+ # if this is the first tracing event for this CPU.
+ if (cpu_data.get(str(cpu) + 'addr') == None):
+ cpu_data[str(cpu) + 'addr'] = addr
+ return
+
+ # The format for packet is:
+ #
+ # +------------+------------+------------+
+ # sample_prev: | addr | ip | cpu |
+ # +------------+------------+------------+
+ # sample_next: | addr | ip | cpu |
+ # +------------+------------+------------+
+ #
+ # We need to combine the two continuous packets to get the instruction
+ # range for sample_prev::cpu:
+ #
+ # [ sample_prev::addr .. sample_next::ip ]
+ #
+ # For this purose, sample_prev::addr is stored into cpu_data structure
+ # and read back for 'start_addr' when the new packet comes, and we need
+ # to use sample_next::ip to calculate 'stop_addr', plusing extra 4 for
+ # 'stop_addr' is for the sake of objdump so the final assembler dump can
+ # include last instruction for sample_next::ip.
+ start_addr = cpu_data[str(cpu) + 'addr']
+ stop_addr = ip + 4
+
+ # Record for previous sample packet
+ cpu_data[str(cpu) + 'addr'] = addr
+
+ # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
+ if (start_addr == 0 and stop_addr == 4):
+ print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted")
+ return
+
+ if (start_addr < int(dso_start) or start_addr > int(dso_end)):
+ print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ return
+
+ if (stop_addr < int(dso_start) or stop_addr > int(dso_end)):
+ print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ return
+
+ if (options.objdump_name != None):
+ # It doesn't need to decrease virtual memory offset for disassembly
+ # for kernel dso, so in this case we set vm_start to zero.
+ if (dso == "[kernel.kallsyms]"):
+ dso_vm_start = 0
+ else:
+ dso_vm_start = int(dso_start)
+
+ dso_fname = get_dso_file_path(dso, dso_bid)
+ if path.exists(dso_fname):
+ print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
+ else:
+ print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]")
+
+ print_srccode(comm, param_dict, sample, symbol, dso)
diff --git a/tools/perf/tests/shell/lib/perf_csv_output_lint.py b/tools/perf/tests/shell/lib/perf_csv_output_lint.py
new file mode 100644
index 000000000000..714f283cfb1b
--- /dev/null
+++ b/tools/perf/tests/shell/lib/perf_csv_output_lint.py
@@ -0,0 +1,48 @@
+#!/usr/bin/python
+# SPDX-License-Identifier: GPL-2.0
+
+import argparse
+import sys
+
+# Basic sanity check of perf CSV output as specified in the man page.
+# Currently just checks the number of fields per line in output.
+
+ap = argparse.ArgumentParser()
+ap.add_argument('--no-args', action='store_true')
+ap.add_argument('--interval', action='store_true')
+ap.add_argument('--system-wide-no-aggr', action='store_true')
+ap.add_argument('--system-wide', action='store_true')
+ap.add_argument('--event', action='store_true')
+ap.add_argument('--per-core', action='store_true')
+ap.add_argument('--per-thread', action='store_true')
+ap.add_argument('--per-die', action='store_true')
+ap.add_argument('--per-node', action='store_true')
+ap.add_argument('--per-socket', action='store_true')
+ap.add_argument('--separator', default=',', nargs='?')
+args = ap.parse_args()
+
+Lines = sys.stdin.readlines()
+
+def check_csv_output(exp):
+ for line in Lines:
+ if 'failed' not in line:
+ count = line.count(args.separator)
+ if count != exp:
+ sys.stdout.write(''.join(Lines))
+ raise RuntimeError(f'wrong number of fields. expected {exp} in {line}')
+
+try:
+ if args.no_args or args.system_wide or args.event:
+ expected_items = 6
+ elif args.interval or args.per_thread or args.system_wide_no_aggr:
+ expected_items = 7
+ elif args.per_core or args.per_socket or args.per_node or args.per_die:
+ expected_items = 8
+ else:
+ ap.print_help()
+ raise RuntimeError('No checking option specified')
+ check_csv_output(expected_items)
+
+except:
+ sys.stdout.write('Test failed for input: ' + ''.join(Lines))
+ raise
diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh
new file mode 100755
index 000000000000..96e0739f7478
--- /dev/null
+++ b/tools/perf/tests/shell/record_offcpu.sh
@@ -0,0 +1,60 @@
+#!/bin/sh
+# perf record offcpu profiling tests
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+
+cleanup() {
+ rm -f ${perfdata}
+ rm -f ${perfdata}.old
+ trap - exit term int
+}
+
+trap_cleanup() {
+ cleanup
+ exit 1
+}
+trap trap_cleanup exit term int
+
+test_offcpu() {
+ echo "Basic off-cpu test"
+ if [ `id -u` != 0 ]
+ then
+ echo "Basic off-cpu test [Skipped permission]"
+ err=2
+ return
+ fi
+ if perf record --off-cpu -o ${perfdata} --quiet true 2>&1 | grep BUILD_BPF_SKEL
+ then
+ echo "Basic off-cpu test [Skipped missing BPF support]"
+ err=2
+ return
+ fi
+ if ! perf record --off-cpu -e dummy -o ${perfdata} sleep 1 2> /dev/null
+ then
+ echo "Basic off-cpu test [Failed record]"
+ err=1
+ return
+ fi
+ if ! perf evlist -i ${perfdata} | grep -q "offcpu-time"
+ then
+ echo "Basic off-cpu test [Failed record]"
+ err=1
+ return
+ fi
+ if ! perf report -i ${perfdata} -q --percent-limit=90 | egrep -q sleep
+ then
+ echo "Basic off-cpu test [Failed missing output]"
+ err=1
+ return
+ fi
+ echo "Basic off-cpu test [Success]"
+}
+
+test_offcpu
+
+cleanup
+exit $err
diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh
new file mode 100755
index 000000000000..983220ef3cb4
--- /dev/null
+++ b/tools/perf/tests/shell/stat+csv_output.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# perf stat CSV output linter
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+# Tests various perf stat CSV output commands for the
+# correct number of fields and the CSV separator set to ','.
+
+set -e
+
+pythonchecker=$(dirname $0)/lib/perf_csv_output_lint.py
+if [ "x$PYTHON" == "x" ]
+then
+ if which python3 > /dev/null
+ then
+ PYTHON=python3
+ elif which python > /dev/null
+ then
+ PYTHON=python
+ else
+ echo Skipping test, python not detected please set environment variable PYTHON.
+ exit 2
+ fi
+fi
+
+# Return true if perf_event_paranoid is > $1 and not running as root.
+function ParanoidAndNotRoot()
+{
+ [ $(id -u) != 0 ] && [ $(cat /proc/sys/kernel/perf_event_paranoid) -gt $1 ]
+}
+
+check_no_args()
+{
+ echo -n "Checking CSV output: no args "
+ perf stat -x, true 2>&1 | $PYTHON $pythonchecker --no-args
+ echo "[Success]"
+}
+
+check_system_wide()
+{
+ echo -n "Checking CSV output: system wide "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat -x, -a true 2>&1 | $PYTHON $pythonchecker --system-wide
+ echo "[Success]"
+}
+
+check_system_wide_no_aggr()
+{
+ echo -n "Checking CSV output: system wide "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ echo -n "Checking CSV output: system wide no aggregation "
+ perf stat -x, -A -a --no-merge true 2>&1 | $PYTHON $pythonchecker --system-wide-no-aggr
+ echo "[Success]"
+}
+
+check_interval()
+{
+ echo -n "Checking CSV output: interval "
+ perf stat -x, -I 1000 true 2>&1 | $PYTHON $pythonchecker --interval
+ echo "[Success]"
+}
+
+
+check_event()
+{
+ echo -n "Checking CSV output: event "
+ perf stat -x, -e cpu-clock true 2>&1 | $PYTHON $pythonchecker --event
+ echo "[Success]"
+}
+
+check_per_core()
+{
+ echo -n "Checking CSV output: per core "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat -x, --per-core -a true 2>&1 | $PYTHON $pythonchecker --per-core
+ echo "[Success]"
+}
+
+check_per_thread()
+{
+ echo -n "Checking CSV output: per thread "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat -x, --per-thread -a true 2>&1 | $PYTHON $pythonchecker --per-thread
+ echo "[Success]"
+}
+
+check_per_die()
+{
+ echo -n "Checking CSV output: per die "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat -x, --per-die -a true 2>&1 | $PYTHON $pythonchecker --per-die
+ echo "[Success]"
+}
+
+check_per_node()
+{
+ echo -n "Checking CSV output: per node "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat -x, --per-node -a true 2>&1 | $PYTHON $pythonchecker --per-node
+ echo "[Success]"
+}
+
+check_per_socket()
+{
+ echo -n "Checking CSV output: per socket "
+ if ParanoidAndNotRoot 0
+ then
+ echo "[Skip] paranoid and not root"
+ return
+ fi
+ perf stat -x, --per-socket -a true 2>&1 | $PYTHON $pythonchecker --per-socket
+ echo "[Success]"
+}
+
+check_no_args
+check_system_wide
+check_system_wide_no_aggr
+check_interval
+check_event
+check_per_core
+check_per_thread
+check_per_die
+check_per_node
+check_per_socket
+exit 0
diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh
new file mode 100755
index 000000000000..a3298643884d
--- /dev/null
+++ b/tools/perf/tests/shell/test_intel_pt.sh
@@ -0,0 +1,71 @@
+#!/bin/sh
+# Miscellaneous Intel PT testing
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# Skip if no Intel PT
+perf list | grep -q 'intel_pt//' || exit 2
+
+skip_cnt=0
+ok_cnt=0
+err_cnt=0
+
+tmpfile=`mktemp`
+perfdatafile=`mktemp`
+
+can_cpu_wide()
+{
+ perf record -o ${tmpfile} -B -N --no-bpf-event -e dummy:u -C $1 true 2>&1 >/dev/null || return 2
+ return 0
+}
+
+test_system_wide_side_band()
+{
+ # Need CPU 0 and CPU 1
+ can_cpu_wide 0 || return $?
+ can_cpu_wide 1 || return $?
+
+ # Record on CPU 0 a task running on CPU 1
+ perf record -B -N --no-bpf-event -o ${perfdatafile} -e intel_pt//u -C 0 -- taskset --cpu-list 1 uname
+
+ # Should get MMAP events from CPU 1 because they can be needed to decode
+ mmap_cnt=`perf script -i ${perfdatafile} --no-itrace --show-mmap-events -C 1 2>/dev/null | grep MMAP | wc -l`
+
+ if [ ${mmap_cnt} -gt 0 ] ; then
+ return 0
+ fi
+
+ echo "Failed to record MMAP events on CPU 1 when tracing CPU 0"
+ return 1
+}
+
+count_result()
+{
+ if [ $1 -eq 2 ] ; then
+ skip_cnt=`expr ${skip_cnt} \+ 1`
+ return
+ fi
+ if [ $1 -eq 0 ] ; then
+ ok_cnt=`expr ${ok_cnt} \+ 1`
+ return
+ fi
+ err_cnt=`expr ${err_cnt} \+ 1`
+}
+
+test_system_wide_side_band
+
+count_result $?
+
+rm -f ${tmpfile}
+rm -f ${perfdatafile}
+
+if [ ${err_cnt} -gt 0 ] ; then
+ exit 1
+fi
+
+if [ ${ok_cnt} -gt 0 ] ; then
+ exit 0
+fi
+
+exit 2
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 9a7209a99e16..a51267d88ca9 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -147,6 +147,7 @@ perf-$(CONFIG_LIBBPF) += bpf_map.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index b11549ae39df..511dd3caa1bc 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -125,7 +125,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
mm->tid = mp->tid;
mm->cpu = mp->cpu.cpu;
- if (!mp->len) {
+ if (!mp->len || !mp->mmap_needed) {
mm->base = NULL;
return 0;
}
@@ -168,13 +168,20 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
}
void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
- struct evlist *evlist, int idx,
- bool per_cpu)
+ struct evlist *evlist,
+ struct evsel *evsel, int idx)
{
+ bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus);
+
+ mp->mmap_needed = evsel->needs_auxtrace_mmap;
+
+ if (!mp->mmap_needed)
+ return;
+
mp->idx = idx;
if (per_cpu) {
- mp->cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, idx);
+ mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx);
if (evlist->core.threads)
mp->tid = perf_thread_map__pid(evlist->core.threads, 0);
else
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index dc38b6f57232..cd0d25c2751c 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -344,6 +344,10 @@ struct auxtrace_mmap {
* @idx: index of this mmap
* @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
* mmap) otherwise %0
+ * @mmap_needed: set to %false for non-auxtrace events. This is needed because
+ * auxtrace mmapping is done in the same code path as non-auxtrace
+ * mmapping but not every evsel that needs non-auxtrace mmapping
+ * also needs auxtrace mmapping.
* @cpu: cpu number for a per-cpu mmap otherwise %-1
*/
struct auxtrace_mmap_params {
@@ -353,6 +357,7 @@ struct auxtrace_mmap_params {
int prot;
int idx;
pid_t tid;
+ bool mmap_needed;
struct perf_cpu cpu;
};
@@ -490,8 +495,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
unsigned int auxtrace_pages,
bool auxtrace_overwrite);
void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
- struct evlist *evlist, int idx,
- bool per_cpu);
+ struct evlist *evlist,
+ struct evsel *evsel, int idx);
typedef int (*process_auxtrace_t)(struct perf_tool *tool,
struct mmap *map,
@@ -863,8 +868,8 @@ void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
unsigned int auxtrace_pages,
bool auxtrace_overwrite);
void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
- struct evlist *evlist, int idx,
- bool per_cpu);
+ struct evlist *evlist,
+ struct evsel *evsel, int idx);
#define ITRACE_HELP ""
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 8271ab764eb5..eee64ddb766d 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -35,11 +35,12 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
}
#endif
-int __weak bpf_prog_load(enum bpf_prog_type prog_type,
- const char *prog_name __maybe_unused,
- const char *license,
- const struct bpf_insn *insns, size_t insn_cnt,
- const struct bpf_prog_load_opts *opts)
+#ifndef HAVE_LIBBPF_BPF_PROG_LOAD
+int bpf_prog_load(enum bpf_prog_type prog_type,
+ const char *prog_name __maybe_unused,
+ const char *license,
+ const struct bpf_insn *insns, size_t insn_cnt,
+ const struct bpf_prog_load_opts *opts)
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
@@ -47,8 +48,10 @@ int __weak bpf_prog_load(enum bpf_prog_type prog_type,
opts->kern_version, opts->log_buf, opts->log_size);
#pragma GCC diagnostic pop
}
+#endif
-struct bpf_program * __weak
+#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
+struct bpf_program *
bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
{
#pragma GCC diagnostic push
@@ -56,8 +59,10 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
return bpf_program__next(prev, obj);
#pragma GCC diagnostic pop
}
+#endif
-struct bpf_map * __weak
+#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
+struct bpf_map *
bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
{
#pragma GCC diagnostic push
@@ -65,8 +70,10 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
return bpf_map__next(prev, obj);
#pragma GCC diagnostic pop
}
+#endif
-const void * __weak
+#ifndef HAVE_LIBBPF_BTF__RAW_DATA
+const void *
btf__raw_data(const struct btf *btf_ro, __u32 *size)
{
#pragma GCC diagnostic push
@@ -74,6 +81,7 @@ btf__raw_data(const struct btf *btf_ro, __u32 *size)
return btf__get_raw_data(btf_ro, size);
#pragma GCC diagnostic pop
}
+#endif
static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
{
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index d4931f54e1dd..ef1c15e4aeba 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -312,7 +312,10 @@ static bool bperf_attr_map_compatible(int attr_map_fd)
(map_info.value_size == sizeof(struct perf_event_attr_map_entry));
}
-int __weak
+#ifndef HAVE_LIBBPF_BPF_MAP_CREATE
+LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
+ int value_size, int max_entries, __u32 map_flags);
+int
bpf_map_create(enum bpf_map_type map_type,
const char *map_name __maybe_unused,
__u32 key_size,
@@ -325,6 +328,7 @@ bpf_map_create(enum bpf_map_type map_type,
return bpf_create_map(map_type, key_size, value_size, max_entries, 0);
#pragma GCC diagnostic pop
}
+#endif
static int bperf_lock_attr_map(struct target *target)
{
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
new file mode 100644
index 000000000000..b73e84a02264
--- /dev/null
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/bpf_counter.h"
+#include "util/debug.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/off_cpu.h"
+#include "util/perf-hooks.h"
+#include "util/record.h"
+#include "util/session.h"
+#include "util/target.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/cgroup.h"
+#include <bpf/bpf.h>
+
+#include "bpf_skel/off_cpu.skel.h"
+
+#define MAX_STACKS 32
+/* we don't need actual timestamp, just want to put the samples at last */
+#define OFF_CPU_TIMESTAMP (~0ull << 32)
+
+static struct off_cpu_bpf *skel;
+
+struct off_cpu_key {
+ u32 pid;
+ u32 tgid;
+ u32 stack_id;
+ u32 state;
+ u64 cgroup_id;
+};
+
+union off_cpu_data {
+ struct perf_event_header hdr;
+ u64 array[1024 / sizeof(u64)];
+};
+
+static int off_cpu_config(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_BPF_OUTPUT,
+ .size = sizeof(attr), /* to capture ABI version */
+ };
+ char *evname = strdup(OFFCPU_EVENT);
+
+ if (evname == NULL)
+ return -ENOMEM;
+
+ evsel = evsel__new(&attr);
+ if (!evsel) {
+ free(evname);
+ return -ENOMEM;
+ }
+
+ evsel->core.attr.freq = 1;
+ evsel->core.attr.sample_period = 1;
+ /* off-cpu analysis depends on stack trace */
+ evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN;
+
+ evlist__add(evlist, evsel);
+
+ free(evsel->name);
+ evsel->name = evname;
+
+ return 0;
+}
+
+static void off_cpu_start(void *arg)
+{
+ struct evlist *evlist = arg;
+
+ /* update task filter for the given workload */
+ if (!skel->bss->has_cpu && !skel->bss->has_task &&
+ perf_thread_map__pid(evlist->core.threads, 0) != -1) {
+ int fd;
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+ pid = perf_thread_map__pid(evlist->core.threads, 0);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+
+ skel->bss->enabled = 1;
+}
+
+static void off_cpu_finish(void *arg __maybe_unused)
+{
+ skel->bss->enabled = 0;
+ off_cpu_bpf__destroy(skel);
+}
+
+/* v5.18 kernel added prev_state arg, so it needs to check the signature */
+static void check_sched_switch_args(void)
+{
+ const struct btf *btf = bpf_object__btf(skel->obj);
+ const struct btf_type *t1, *t2, *t3;
+ u32 type_id;
+
+ type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch",
+ BTF_KIND_TYPEDEF);
+ if ((s32)type_id < 0)
+ return;
+
+ t1 = btf__type_by_id(btf, type_id);
+ if (t1 == NULL)
+ return;
+
+ t2 = btf__type_by_id(btf, t1->type);
+ if (t2 == NULL || !btf_is_ptr(t2))
+ return;
+
+ t3 = btf__type_by_id(btf, t2->type);
+ if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) {
+ /* new format: pass prev_state as 4th arg */
+ skel->rodata->has_prev_state = true;
+ }
+}
+
+int off_cpu_prepare(struct evlist *evlist, struct target *target,
+ struct record_opts *opts)
+{
+ int err, fd, i;
+ int ncpus = 1, ntasks = 1, ncgrps = 1;
+
+ if (off_cpu_config(evlist) < 0) {
+ pr_err("Failed to config off-cpu BPF event\n");
+ return -1;
+ }
+
+ skel = off_cpu_bpf__open();
+ if (!skel) {
+ pr_err("Failed to open off-cpu BPF skeleton\n");
+ return -1;
+ }
+
+ /* don't need to set cpu filter for system-wide mode */
+ if (target->cpu_list) {
+ ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
+ bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
+ }
+
+ if (target__has_task(target)) {
+ ntasks = perf_thread_map__nr(evlist->core.threads);
+ bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
+ }
+
+ if (evlist__first(evlist)->cgrp) {
+ ncgrps = evlist->core.nr_entries - 1; /* excluding a dummy */
+ bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
+
+ if (!cgroup_is_v2("perf_event"))
+ skel->rodata->uses_cgroup_v1 = true;
+ }
+
+ if (opts->record_cgroup) {
+ skel->rodata->needs_cgroup = true;
+
+ if (!cgroup_is_v2("perf_event"))
+ skel->rodata->uses_cgroup_v1 = true;
+ }
+
+ set_max_rlimit();
+ check_sched_switch_args();
+
+ err = off_cpu_bpf__load(skel);
+ if (err) {
+ pr_err("Failed to load off-cpu skeleton\n");
+ goto out;
+ }
+
+ if (target->cpu_list) {
+ u32 cpu;
+ u8 val = 1;
+
+ skel->bss->has_cpu = 1;
+ fd = bpf_map__fd(skel->maps.cpu_filter);
+
+ for (i = 0; i < ncpus; i++) {
+ cpu = perf_cpu_map__cpu(evlist->core.user_requested_cpus, i).cpu;
+ bpf_map_update_elem(fd, &cpu, &val, BPF_ANY);
+ }
+ }
+
+ if (target__has_task(target)) {
+ u32 pid;
+ u8 val = 1;
+
+ skel->bss->has_task = 1;
+ fd = bpf_map__fd(skel->maps.task_filter);
+
+ for (i = 0; i < ntasks; i++) {
+ pid = perf_thread_map__pid(evlist->core.threads, i);
+ bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
+ }
+ }
+
+ if (evlist__first(evlist)->cgrp) {
+ struct evsel *evsel;
+ u8 val = 1;
+
+ skel->bss->has_cgroup = 1;
+ fd = bpf_map__fd(skel->maps.cgroup_filter);
+
+ evlist__for_each_entry(evlist, evsel) {
+ struct cgroup *cgrp = evsel->cgrp;
+
+ if (cgrp == NULL)
+ continue;
+
+ if (!cgrp->id && read_cgroup_id(cgrp) < 0) {
+ pr_err("Failed to read cgroup id of %s\n",
+ cgrp->name);
+ goto out;
+ }
+
+ bpf_map_update_elem(fd, &cgrp->id, &val, BPF_ANY);
+ }
+ }
+
+ err = off_cpu_bpf__attach(skel);
+ if (err) {
+ pr_err("Failed to attach off-cpu BPF skeleton\n");
+ goto out;
+ }
+
+ if (perf_hooks__set_hook("record_start", off_cpu_start, evlist) ||
+ perf_hooks__set_hook("record_end", off_cpu_finish, evlist)) {
+ pr_err("Failed to attach off-cpu skeleton\n");
+ goto out;
+ }
+
+ return 0;
+
+out:
+ off_cpu_bpf__destroy(skel);
+ return -1;
+}
+
+int off_cpu_write(struct perf_session *session)
+{
+ int bytes = 0, size;
+ int fd, stack;
+ u64 sample_type, val, sid = 0;
+ struct evsel *evsel;
+ struct perf_data_file *file = &session->data->file;
+ struct off_cpu_key prev, key;
+ union off_cpu_data data = {
+ .hdr = {
+ .type = PERF_RECORD_SAMPLE,
+ .misc = PERF_RECORD_MISC_USER,
+ },
+ };
+ u64 tstamp = OFF_CPU_TIMESTAMP;
+
+ skel->bss->enabled = 0;
+
+ evsel = evlist__find_evsel_by_str(session->evlist, OFFCPU_EVENT);
+ if (evsel == NULL) {
+ pr_err("%s evsel not found\n", OFFCPU_EVENT);
+ return 0;
+ }
+
+ sample_type = evsel->core.attr.sample_type;
+
+ if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) {
+ if (evsel->core.id)
+ sid = evsel->core.id[0];
+ }
+
+ fd = bpf_map__fd(skel->maps.off_cpu);
+ stack = bpf_map__fd(skel->maps.stacks);
+ memset(&prev, 0, sizeof(prev));
+
+ while (!bpf_map_get_next_key(fd, &prev, &key)) {
+ int n = 1; /* start from perf_event_header */
+ int ip_pos = -1;
+
+ bpf_map_lookup_elem(fd, &key, &val);
+
+ if (sample_type & PERF_SAMPLE_IDENTIFIER)
+ data.array[n++] = sid;
+ if (sample_type & PERF_SAMPLE_IP) {
+ ip_pos = n;
+ data.array[n++] = 0; /* will be updated */
+ }
+ if (sample_type & PERF_SAMPLE_TID)
+ data.array[n++] = (u64)key.pid << 32 | key.tgid;
+ if (sample_type & PERF_SAMPLE_TIME)
+ data.array[n++] = tstamp;
+ if (sample_type & PERF_SAMPLE_ID)
+ data.array[n++] = sid;
+ if (sample_type & PERF_SAMPLE_CPU)
+ data.array[n++] = 0;
+ if (sample_type & PERF_SAMPLE_PERIOD)
+ data.array[n++] = val;
+ if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+ int len = 0;
+
+ /* data.array[n] is callchain->nr (updated later) */
+ data.array[n + 1] = PERF_CONTEXT_USER;
+ data.array[n + 2] = 0;
+
+ bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]);
+ while (data.array[n + 2 + len])
+ len++;
+
+ /* update length of callchain */
+ data.array[n] = len + 1;
+
+ /* update sample ip with the first callchain entry */
+ if (ip_pos >= 0)
+ data.array[ip_pos] = data.array[n + 2];
+
+ /* calculate sample callchain data array length */
+ n += len + 2;
+ }
+ if (sample_type & PERF_SAMPLE_CGROUP)
+ data.array[n++] = key.cgroup_id;
+ /* TODO: handle more sample types */
+
+ size = n * sizeof(u64);
+ data.hdr.size = size;
+ bytes += size;
+
+ if (perf_data_file__write(file, &data, size) < 0) {
+ pr_err("failed to write perf data, error: %m\n");
+ return bytes;
+ }
+
+ prev = key;
+ /* increase dummy timestamp to sort later samples */
+ tstamp++;
+ }
+ return bytes;
+}
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
new file mode 100644
index 000000000000..792ae2847080
--- /dev/null
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022 Google
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/* task->flags for off-cpu analysis */
+#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
+
+/* task->state for off-cpu analysis */
+#define TASK_INTERRUPTIBLE 0x0001
+#define TASK_UNINTERRUPTIBLE 0x0002
+
+#define MAX_STACKS 32
+#define MAX_ENTRIES 102400
+
+struct tstamp_data {
+ __u32 stack_id;
+ __u32 state;
+ __u64 timestamp;
+};
+
+struct offcpu_key {
+ __u32 pid;
+ __u32 tgid;
+ __u32 stack_id;
+ __u32 state;
+ __u64 cgroup_id;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, MAX_STACKS * sizeof(__u64));
+ __uint(max_entries, MAX_ENTRIES);
+} stacks SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct tstamp_data);
+} tstamp SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(struct offcpu_key));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, MAX_ENTRIES);
+} off_cpu SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cpu_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} task_filter SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u8));
+ __uint(max_entries, 1);
+} cgroup_filter SEC(".maps");
+
+/* old kernel task_struct definition */
+struct task_struct___old {
+ long state;
+} __attribute__((preserve_access_index));
+
+int enabled = 0;
+int has_cpu = 0;
+int has_task = 0;
+int has_cgroup = 0;
+
+const volatile bool has_prev_state = false;
+const volatile bool needs_cgroup = false;
+const volatile bool uses_cgroup_v1 = false;
+
+/*
+ * Old kernel used to call it task_struct->state and now it's '__state'.
+ * Use BPF CO-RE "ignored suffix rule" to deal with it like below:
+ *
+ * https://nakryiko.com/posts/bpf-core-reference-guide/#handling-incompatible-field-and-type-changes
+ */
+static inline int get_task_state(struct task_struct *t)
+{
+ if (bpf_core_field_exists(t->__state))
+ return BPF_CORE_READ(t, __state);
+
+ /* recast pointer to capture task_struct___old type for compiler */
+ struct task_struct___old *t_old = (void *)t;
+
+ /* now use old "state" name of the field */
+ return BPF_CORE_READ(t_old, state);
+}
+
+static inline __u64 get_cgroup_id(struct task_struct *t)
+{
+ struct cgroup *cgrp;
+
+ if (uses_cgroup_v1)
+ cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup);
+ else
+ cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp);
+
+ return BPF_CORE_READ(cgrp, kn, id);
+}
+
+static inline int can_record(struct task_struct *t, int state)
+{
+ /* kernel threads don't have user stack */
+ if (t->flags & PF_KTHREAD)
+ return 0;
+
+ if (state != TASK_INTERRUPTIBLE &&
+ state != TASK_UNINTERRUPTIBLE)
+ return 0;
+
+ if (has_cpu) {
+ __u32 cpu = bpf_get_smp_processor_id();
+ __u8 *ok;
+
+ ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_task) {
+ __u8 *ok;
+ __u32 pid = t->pid;
+
+ ok = bpf_map_lookup_elem(&task_filter, &pid);
+ if (!ok)
+ return 0;
+ }
+
+ if (has_cgroup) {
+ __u8 *ok;
+ __u64 cgrp_id = get_cgroup_id(t);
+
+ ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp_id);
+ if (!ok)
+ return 0;
+ }
+
+ return 1;
+}
+
+static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
+ struct task_struct *next, int state)
+{
+ __u64 ts;
+ __u32 stack_id;
+ struct tstamp_data *pelem;
+
+ ts = bpf_ktime_get_ns();
+
+ if (!can_record(prev, state))
+ goto next;
+
+ stack_id = bpf_get_stackid(ctx, &stacks,
+ BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK);
+
+ pelem = bpf_task_storage_get(&tstamp, prev, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!pelem)
+ goto next;
+
+ pelem->timestamp = ts;
+ pelem->state = state;
+ pelem->stack_id = stack_id;
+
+next:
+ pelem = bpf_task_storage_get(&tstamp, next, NULL, 0);
+
+ if (pelem && pelem->timestamp) {
+ struct offcpu_key key = {
+ .pid = next->pid,
+ .tgid = next->tgid,
+ .stack_id = pelem->stack_id,
+ .state = pelem->state,
+ .cgroup_id = needs_cgroup ? get_cgroup_id(next) : 0,
+ };
+ __u64 delta = ts - pelem->timestamp;
+ __u64 *total;
+
+ total = bpf_map_lookup_elem(&off_cpu, &key);
+ if (total)
+ *total += delta;
+ else
+ bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY);
+
+ /* prevent to reuse the timestamp later */
+ pelem->timestamp = 0;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/sched_switch")
+int on_switch(u64 *ctx)
+{
+ struct task_struct *prev, *next;
+ int prev_state;
+
+ if (!enabled)
+ return 0;
+
+ prev = (struct task_struct *)ctx[1];
+ next = (struct task_struct *)ctx[2];
+
+ if (has_prev_state)
+ prev_state = (int)ctx[3];
+ else
+ prev_state = get_task_state(prev);
+
+ return off_cpu_stat(ctx, prev, next, prev_state);
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 7f9f588e88c6..48af7d379d82 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -242,14 +242,20 @@ int __evlist__add_default(struct evlist *evlist, bool precise)
return 0;
}
-int evlist__add_dummy(struct evlist *evlist)
+static struct evsel *evlist__dummy_event(struct evlist *evlist)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_DUMMY,
.size = sizeof(attr), /* to capture ABI version */
};
- struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries);
+
+ return evsel__new_idx(&attr, evlist->core.nr_entries);
+}
+
+int evlist__add_dummy(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__dummy_event(evlist);
if (evsel == NULL)
return -ENOMEM;
@@ -258,6 +264,51 @@ int evlist__add_dummy(struct evlist *evlist)
return 0;
}
+static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel)
+{
+ evsel->core.system_wide = true;
+
+ /*
+ * All CPUs.
+ *
+ * Note perf_event_open() does not accept CPUs that are not online, so
+ * in fact this CPU list will include only all online CPUs.
+ */
+ perf_cpu_map__put(evsel->core.own_cpus);
+ evsel->core.own_cpus = perf_cpu_map__new(NULL);
+ perf_cpu_map__put(evsel->core.cpus);
+ evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
+
+ /* No threads */
+ perf_thread_map__put(evsel->core.threads);
+ evsel->core.threads = perf_thread_map__new_dummy();
+
+ evlist__add(evlist, evsel);
+}
+
+struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide)
+{
+ struct evsel *evsel = evlist__dummy_event(evlist);
+
+ if (!evsel)
+ return NULL;
+
+ evsel->core.attr.exclude_kernel = 1;
+ evsel->core.attr.exclude_guest = 1;
+ evsel->core.attr.exclude_hv = 1;
+ evsel->core.attr.freq = 0;
+ evsel->core.attr.sample_period = 1;
+ evsel->no_aux_samples = true;
+ evsel->name = strdup("dummy:u");
+
+ if (system_wide)
+ evlist__add_on_all_cpus(evlist, evsel);
+ else
+ evlist__add(evlist, evsel);
+
+ return evsel;
+}
+
static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
struct evsel *evsel, *n;
@@ -747,15 +798,15 @@ static struct mmap *evlist__alloc_mmap(struct evlist *evlist,
static void
perf_evlist__mmap_cb_idx(struct perf_evlist *_evlist,
- struct perf_evsel *_evsel __maybe_unused,
+ struct perf_evsel *_evsel,
struct perf_mmap_param *_mp,
int idx)
{
struct evlist *evlist = container_of(_evlist, struct evlist, core);
struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
- bool per_cpu = !perf_cpu_map__empty(_evlist->user_requested_cpus);
+ struct evsel *evsel = container_of(_evsel, struct evsel, core);
- auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, idx, per_cpu);
+ auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, evsel, idx);
}
static struct perf_mmap*
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 4062f5aebfc1..1bde9ccf4e7d 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -114,6 +114,11 @@ int arch_evlist__add_default_attrs(struct evlist *evlist);
struct evsel *arch_evlist__leader(struct list_head *list);
int evlist__add_dummy(struct evlist *evlist);
+struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide);
+static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist)
+{
+ return evlist__add_aux_dummy(evlist, true);
+}
int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
evsel__sb_cb_t cb, void *data);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ef169ad15236..ce499c5da8d7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -296,8 +296,8 @@ struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
return NULL;
evsel__init(evsel, attr, idx);
- if (evsel__is_bpf_output(evsel)) {
- evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+ if (evsel__is_bpf_output(evsel) && !attr->sample_type) {
+ evsel->core.attr.sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
evsel->core.attr.sample_period = 1;
}
@@ -409,6 +409,7 @@ struct evsel *evsel__clone(struct evsel *orig)
evsel->core.threads = perf_thread_map__get(orig->core.threads);
evsel->core.nr_members = orig->core.nr_members;
evsel->core.system_wide = orig->core.system_wide;
+ evsel->core.requires_cpu = orig->core.requires_cpu;
if (orig->name) {
evsel->name = strdup(orig->name);
@@ -896,7 +897,7 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
"specifying a subset with --user-regs may render DWARF unwinding unreliable, "
"so the minimal registers set (IP, SP) is explicitly forced.\n");
} else {
- attr->sample_regs_user |= PERF_REGS_MASK;
+ attr->sample_regs_user |= arch__user_reg_mask();
}
attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
index 15f60fd09424..014d82159656 100644
--- a/tools/perf/util/libunwind/arm64.c
+++ b/tools/perf/util/libunwind/arm64.c
@@ -24,7 +24,7 @@
#include "unwind.h"
#include "libunwind-aarch64.h"
#define perf_event_arm_regs perf_event_arm64_regs
-#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#include <../../../arch/arm64/include/uapi/asm/perf_regs.h>
#undef perf_event_arm_regs
#include "../../arch/arm64/util/unwind-libunwind.c"
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 50502b4a7ca4..a4dff881be39 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -62,8 +62,8 @@ void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_u
void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused,
struct evlist *evlist __maybe_unused,
- int idx __maybe_unused,
- bool per_cpu __maybe_unused)
+ struct evsel *evsel __maybe_unused,
+ int idx __maybe_unused)
{
}
diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h
new file mode 100644
index 000000000000..548008f74d42
--- /dev/null
+++ b/tools/perf/util/off_cpu.h
@@ -0,0 +1,29 @@
+#ifndef PERF_UTIL_OFF_CPU_H
+#define PERF_UTIL_OFF_CPU_H
+
+struct evlist;
+struct target;
+struct perf_session;
+struct record_opts;
+
+#define OFFCPU_EVENT "offcpu-time"
+
+#ifdef HAVE_BPF_SKEL
+int off_cpu_prepare(struct evlist *evlist, struct target *target,
+ struct record_opts *opts);
+int off_cpu_write(struct perf_session *session);
+#else
+static inline int off_cpu_prepare(struct evlist *evlist __maybe_unused,
+ struct target *target __maybe_unused,
+ struct record_opts *opts __maybe_unused)
+{
+ return -1;
+}
+
+static inline int off_cpu_write(struct perf_session *session __maybe_unused)
+{
+ return -1;
+}
+#endif
+
+#endif /* PERF_UTIL_OFF_CPU_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 30a9d915853d..7ed235740431 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -365,7 +365,7 @@ __add_event(struct list_head *list, int *idx,
(*idx)++;
evsel->core.cpus = cpus;
evsel->core.own_cpus = perf_cpu_map__get(cpus);
- evsel->core.system_wide = pmu ? pmu->is_uncore : false;
+ evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
evsel->auto_merge_stats = auto_merge_stats;
if (name)
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index a982e40ee5a9..872dd3d38782 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -103,6 +103,8 @@ static const char *__perf_reg_name_arm64(int id)
return "lr";
case PERF_REG_ARM64_PC:
return "pc";
+ case PERF_REG_ARM64_VG:
+ return "vg";
default:
return NULL;
}
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index a685d20165f7..aa5156c2bcff 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -38,5 +38,6 @@ util/units.c
util/affinity.c
util/rwsem.c
util/hashmap.c
+util/perf_regs.c
util/pmu-hybrid.c
util/fncache.c
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 659eb4e4b34b..adba01b7d9dd 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -755,12 +755,22 @@ static void set_regs_in_dict(PyObject *dict,
}
static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
- const char *dso_field, const char *sym_field,
- const char *symoff_field)
+ const char *dso_field, const char *dso_bid_field,
+ const char *dso_map_start, const char *dso_map_end,
+ const char *sym_field, const char *symoff_field)
{
+ char sbuild_id[SBUILD_ID_SIZE];
+
if (al->map) {
pydict_set_item_string_decref(dict, dso_field,
_PyUnicode_FromString(al->map->dso->name));
+ build_id__sprintf(&al->map->dso->bid, sbuild_id);
+ pydict_set_item_string_decref(dict, dso_bid_field,
+ _PyUnicode_FromString(sbuild_id));
+ pydict_set_item_string_decref(dict, dso_map_start,
+ PyLong_FromUnsignedLong(al->map->start));
+ pydict_set_item_string_decref(dict, dso_map_end,
+ PyLong_FromUnsignedLong(al->map->end));
}
if (al->sym) {
pydict_set_item_string_decref(dict, sym_field,
@@ -840,7 +850,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
(const char *)sample->raw_data, sample->raw_size));
pydict_set_item_string_decref(dict, "comm",
_PyUnicode_FromString(thread__comm_str(al->thread)));
- set_sym_in_dict(dict, al, "dso", "symbol", "symoff");
+ set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end",
+ "symbol", "symoff");
pydict_set_item_string_decref(dict, "callchain", callchain);
@@ -856,7 +867,9 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
if (addr_al) {
pydict_set_item_string_decref(dict_sample, "addr_correlates_sym",
PyBool_FromLong(1));
- set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_symbol", "addr_symoff");
+ set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid",
+ "addr_dso_map_start", "addr_dso_map_end",
+ "addr_symbol", "addr_symoff");
}
if (sample->flags)
diff --git a/tools/testing/crypto/chacha20-s390/Makefile b/tools/testing/crypto/chacha20-s390/Makefile
new file mode 100644
index 000000000000..db81cd2fb9c5
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Red Hat, Inc.
+# Author: Vladis Dronov <vdronoff@gmail.com>
+
+obj-m += test_cipher.o
+test_cipher-y := test-cipher.o
+
+all:
+ make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules
+clean:
+ make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean
diff --git a/tools/testing/crypto/chacha20-s390/run-tests.sh b/tools/testing/crypto/chacha20-s390/run-tests.sh
new file mode 100644
index 000000000000..43108794b996
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/run-tests.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Red Hat, Inc.
+# Author: Vladis Dronov <vdronoff@gmail.com>
+#
+# This script runs (via instmod) test-cipher.ko module which invokes
+# generic and s390-native ChaCha20 encryprion algorithms with different
+# size of data. Check 'dmesg' for results.
+#
+# The insmod error is expected:
+# insmod: ERROR: could not insert module test_cipher.ko: Operation not permitted
+
+lsmod | grep chacha | cut -f1 -d' ' | xargs rmmod
+modprobe chacha_generic
+modprobe chacha_s390
+
+# run encryption for different data size, including whole block(s) +/- 1
+insmod test_cipher.ko size=63
+insmod test_cipher.ko size=64
+insmod test_cipher.ko size=65
+insmod test_cipher.ko size=127
+insmod test_cipher.ko size=128
+insmod test_cipher.ko size=129
+insmod test_cipher.ko size=511
+insmod test_cipher.ko size=512
+insmod test_cipher.ko size=513
+insmod test_cipher.ko size=4096
+insmod test_cipher.ko size=65611
+insmod test_cipher.ko size=6291456
+insmod test_cipher.ko size=62914560
+
+# print test logs
+dmesg | tail -170
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c
new file mode 100644
index 000000000000..34e8b855266f
--- /dev/null
+++ b/tools/testing/crypto/chacha20-s390/test-cipher.c
@@ -0,0 +1,372 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2022 Red Hat, Inc.
+ * Author: Vladis Dronov <vdronoff@gmail.com>
+ */
+
+#include <asm/elf.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <crypto/skcipher.h>
+#include <crypto/akcipher.h>
+#include <crypto/acompress.h>
+#include <crypto/rng.h>
+#include <crypto/drbg.h>
+#include <crypto/kpp.h>
+#include <crypto/internal/simd.h>
+#include <crypto/chacha.h>
+#include <crypto/aead.h>
+#include <crypto/hash.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/fips.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/scatterlist.h>
+#include <linux/time.h>
+#include <linux/vmalloc.h>
+#include <linux/zlib.h>
+#include <linux/once.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+static unsigned int data_size __read_mostly = 256;
+static unsigned int debug __read_mostly = 0;
+
+/* tie all skcipher structures together */
+struct skcipher_def {
+ struct scatterlist sginp, sgout;
+ struct crypto_skcipher *tfm;
+ struct skcipher_request *req;
+ struct crypto_wait wait;
+};
+
+/* Perform cipher operations with the chacha lib */
+static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain)
+{
+ u32 chacha_state[CHACHA_STATE_WORDS];
+ u8 iv[16], key[32];
+ u64 start, end;
+
+ memset(key, 'X', sizeof(key));
+ memset(iv, 'I', sizeof(iv));
+
+ if (debug) {
+ print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET,
+ 16, 1, key, 32, 1);
+
+ print_hex_dump(KERN_INFO, "iv: ", DUMP_PREFIX_OFFSET,
+ 16, 1, iv, 16, 1);
+ }
+
+ /* Encrypt */
+ chacha_init_arch(chacha_state, (u32*)key, iv);
+
+ start = ktime_get_ns();
+ chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20);
+ end = ktime_get_ns();
+
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET,
+ 16, 1, cipher,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ pr_info("lib encryption took: %lld nsec", end - start);
+
+ /* Decrypt */
+ chacha_init_arch(chacha_state, (u32 *)key, iv);
+
+ start = ktime_get_ns();
+ chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20);
+ end = ktime_get_ns();
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET,
+ 16, 1, revert,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ pr_info("lib decryption took: %lld nsec", end - start);
+
+ return 0;
+}
+
+/* Perform cipher operations with skcipher */
+static unsigned int test_skcipher_encdec(struct skcipher_def *sk,
+ int enc)
+{
+ int rc;
+
+ if (enc) {
+ rc = crypto_wait_req(crypto_skcipher_encrypt(sk->req),
+ &sk->wait);
+ if (rc)
+ pr_info("skcipher encrypt returned with result"
+ "%d\n", rc);
+ }
+ else
+ {
+ rc = crypto_wait_req(crypto_skcipher_decrypt(sk->req),
+ &sk->wait);
+ if (rc)
+ pr_info("skcipher decrypt returned with result"
+ "%d\n", rc);
+ }
+
+ return rc;
+}
+
+/* Initialize and trigger cipher operations */
+static int test_skcipher(char *name, u8 *revert, u8 *cipher, u8 *plain)
+{
+ struct skcipher_def sk;
+ struct crypto_skcipher *skcipher = NULL;
+ struct skcipher_request *req = NULL;
+ u8 iv[16], key[32];
+ u64 start, end;
+ int ret = -EFAULT;
+
+ skcipher = crypto_alloc_skcipher(name, 0, 0);
+ if (IS_ERR(skcipher)) {
+ pr_info("could not allocate skcipher %s handle\n", name);
+ return PTR_ERR(skcipher);
+ }
+
+ req = skcipher_request_alloc(skcipher, GFP_KERNEL);
+ if (!req) {
+ pr_info("could not allocate skcipher request\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done,
+ &sk.wait);
+
+ memset(key, 'X', sizeof(key));
+ memset(iv, 'I', sizeof(iv));
+
+ if (crypto_skcipher_setkey(skcipher, key, 32)) {
+ pr_info("key could not be set\n");
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (debug) {
+ print_hex_dump(KERN_INFO, "key: ", DUMP_PREFIX_OFFSET,
+ 16, 1, key, 32, 1);
+
+ print_hex_dump(KERN_INFO, "iv: ", DUMP_PREFIX_OFFSET,
+ 16, 1, iv, 16, 1);
+ }
+
+ sk.tfm = skcipher;
+ sk.req = req;
+
+ /* Encrypt in one pass */
+ sg_init_one(&sk.sginp, plain, data_size);
+ sg_init_one(&sk.sgout, cipher, data_size);
+ skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout,
+ data_size, iv);
+ crypto_init_wait(&sk.wait);
+
+ /* Encrypt data */
+ start = ktime_get_ns();
+ ret = test_skcipher_encdec(&sk, 1);
+ end = ktime_get_ns();
+
+ if (ret)
+ goto out;
+
+ pr_info("%s tfm encryption successful, took %lld nsec\n", name, end - start);
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "encr:", DUMP_PREFIX_OFFSET,
+ 16, 1, cipher,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ /* Prepare for decryption */
+ memset(iv, 'I', sizeof(iv));
+
+ sg_init_one(&sk.sginp, cipher, data_size);
+ sg_init_one(&sk.sgout, revert, data_size);
+ skcipher_request_set_crypt(req, &sk.sginp, &sk.sgout,
+ data_size, iv);
+ crypto_init_wait(&sk.wait);
+
+ /* Decrypt data */
+ start = ktime_get_ns();
+ ret = test_skcipher_encdec(&sk, 0);
+ end = ktime_get_ns();
+
+ if (ret)
+ goto out;
+
+ pr_info("%s tfm decryption successful, took %lld nsec\n", name, end - start);
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "decr:", DUMP_PREFIX_OFFSET,
+ 16, 1, revert,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ /* Dump some internal skcipher data */
+ if (debug)
+ pr_info("skcipher %s: cryptlen %d blksize %d stride %d "
+ "ivsize %d alignmask 0x%x\n",
+ name, sk.req->cryptlen,
+ crypto_skcipher_blocksize(sk.tfm),
+ crypto_skcipher_alg(sk.tfm)->walksize,
+ crypto_skcipher_ivsize(sk.tfm),
+ crypto_skcipher_alignmask(sk.tfm));
+
+out:
+ if (skcipher)
+ crypto_free_skcipher(skcipher);
+ if (req)
+ skcipher_request_free(req);
+ return ret;
+}
+
+static int __init chacha_s390_test_init(void)
+{
+ u8 *plain = NULL, *revert = NULL;
+ u8 *cipher_generic = NULL, *cipher_s390 = NULL;
+ int ret = -1;
+
+ pr_info("s390 ChaCha20 test module: size=%d debug=%d\n",
+ data_size, debug);
+
+ /* Allocate and fill buffers */
+ plain = vmalloc(data_size);
+ if (!plain) {
+ pr_info("could not allocate plain buffer\n");
+ ret = -2;
+ goto out;
+ }
+ memset(plain, 'a', data_size);
+ get_random_bytes(plain, (data_size > 256 ? 256 : data_size));
+
+ cipher_generic = vmalloc(data_size);
+ if (!cipher_generic) {
+ pr_info("could not allocate cipher_generic buffer\n");
+ ret = -2;
+ goto out;
+ }
+ memset(cipher_generic, 0, data_size);
+
+ cipher_s390 = vmalloc(data_size);
+ if (!cipher_s390) {
+ pr_info("could not allocate cipher_s390 buffer\n");
+ ret = -2;
+ goto out;
+ }
+ memset(cipher_s390, 0, data_size);
+
+ revert = vmalloc(data_size);
+ if (!revert) {
+ pr_info("could not allocate revert buffer\n");
+ ret = -2;
+ goto out;
+ }
+ memset(revert, 0, data_size);
+
+ if (debug)
+ print_hex_dump(KERN_INFO, "src: ", DUMP_PREFIX_OFFSET,
+ 16, 1, plain,
+ (data_size > 64 ? 64 : data_size), 1);
+
+ /* Use chacha20 generic */
+ ret = test_skcipher("chacha20-generic", revert, cipher_generic, plain);
+ if (ret)
+ goto out;
+
+ if (memcmp(plain, revert, data_size)) {
+ pr_info("generic en/decryption check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("generic en/decryption check OK\n");
+
+ memset(revert, 0, data_size);
+
+ /* Use chacha20 s390 */
+ ret = test_skcipher("chacha20-s390", revert, cipher_s390, plain);
+ if (ret)
+ goto out;
+
+ if (memcmp(plain, revert, data_size)) {
+ pr_info("s390 en/decryption check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("s390 en/decryption check OK\n");
+
+ if (memcmp(cipher_generic, cipher_s390, data_size)) {
+ pr_info("s390 vs generic check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("s390 vs generic check OK\n");
+
+ memset(cipher_s390, 0, data_size);
+ memset(revert, 0, data_size);
+
+ /* Use chacha20 lib */
+ test_lib_chacha(revert, cipher_s390, plain);
+
+ if (memcmp(plain, revert, data_size)) {
+ pr_info("lib en/decryption check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("lib en/decryption check OK\n");
+
+ if (memcmp(cipher_generic, cipher_s390, data_size)) {
+ pr_info("lib vs generic check FAILED\n");
+ ret = -2;
+ goto out;
+ }
+ else
+ pr_info("lib vs generic check OK\n");
+
+ pr_info("--- chacha20 s390 test end ---\n");
+
+out:
+ if (plain)
+ vfree(plain);
+ if (cipher_generic)
+ vfree(cipher_generic);
+ if (cipher_s390)
+ vfree(cipher_s390);
+ if (revert)
+ vfree(revert);
+
+ return -1;
+}
+
+static void __exit chacha_s390_test_exit(void)
+{
+ pr_info("s390 ChaCha20 test module exit\n");
+}
+
+module_param_named(size, data_size, uint, 0660);
+module_param(debug, int, 0660);
+MODULE_PARM_DESC(size, "Size of a plaintext");
+MODULE_PARM_DESC(debug, "Debug level (0=off,1=on)");
+
+module_init(chacha_s390_test_init);
+module_exit(chacha_s390_test_exit);
+
+MODULE_DESCRIPTION("s390 ChaCha20 self-test");
+MODULE_AUTHOR("Vladis Dronov <vdronoff@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 82e49ab0937d..33543231d453 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -8,6 +8,8 @@ ldflags-y += --wrap=devm_cxl_port_enumerate_dports
ldflags-y += --wrap=devm_cxl_setup_hdm
ldflags-y += --wrap=devm_cxl_add_passthrough_decoder
ldflags-y += --wrap=devm_cxl_enumerate_decoders
+ldflags-y += --wrap=cxl_await_media_ready
+ldflags-y += --wrap=cxl_hdm_decode_init
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
@@ -34,7 +36,6 @@ cxl_port-y += config_check.o
obj-m += cxl_mem.o
cxl_mem-y := $(CXL_SRC)/mem.o
-cxl_mem-y += mock_mem.o
cxl_mem-y += config_check.o
obj-m += cxl_core.o
diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c
deleted file mode 100644
index d1dec5845139..000000000000
--- a/tools/testing/cxl/mock_mem.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
-
-#include <linux/types.h>
-
-struct cxl_dev_state;
-bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds)
-{
- return true;
-}
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index b6b726eff3e2..6b9239b2afd4 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -237,25 +237,11 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *
return rc;
}
-static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds)
-{
- msleep(100);
- return 0;
-}
-
static void label_area_release(void *lsa)
{
vfree(lsa);
}
-static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds)
-{
- struct cxl_endpoint_dvsec_info *info;
-
- info = &cxlds->info;
- info->mem_enabled = true;
-}
-
static int cxl_mock_mem_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -278,7 +264,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
cxlds->serial = pdev->id;
cxlds->mbox_send = cxl_mock_mbox_send;
- cxlds->wait_media_ready = cxl_mock_wait_media_ready;
cxlds->payload_size = SZ_4K;
rc = cxl_enumerate_cmds(cxlds);
@@ -293,8 +278,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
- mock_validate_dvsec_ranges(cxlds);
-
cxlmd = devm_cxl_add_memdev(cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 6e8c9d63c92d..f1f8c40948c5 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -193,6 +193,35 @@ int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL);
+int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
+{
+ int rc, index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (ops && ops->is_mock_dev(cxlds->dev))
+ rc = 0;
+ else
+ rc = cxl_await_media_ready(cxlds);
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL);
+
+bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
+ struct cxl_hdm *cxlhdm)
+{
+ int rc = 0, index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (!ops || !ops->is_mock_dev(cxlds->dev))
+ rc = cxl_hdm_decode_init(cxlds, cxlhdm);
+ put_cxl_mock_ops(index);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL);
+
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(ACPI);
MODULE_IMPORT_NS(CXL);
diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO
index c25b2fdec45e..cd1a30d5acc9 100644
--- a/tools/testing/memblock/TODO
+++ b/tools/testing/memblock/TODO
@@ -23,6 +23,3 @@ TODO
5. Add tests for memblock_alloc_node() to check if the correct NUMA node is set
for the new region
-
-6. Update comments in tests/basic_api.c to match the style used in
- tests/alloc_*.c
diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index fbc1ce160303..a7bc180316d6 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -26,8 +26,8 @@ static int memblock_initialization_check(void)
/*
* A simple test that adds a memory block of a specified base address
* and size to the collection of available memory regions (memblock.memory).
- * It checks if a new entry was created and if region counter and total memory
- * were correctly updated.
+ * Expect to create a new entry. The region counter and total memory get
+ * updated.
*/
static int memblock_add_simple_check(void)
{
@@ -53,10 +53,10 @@ static int memblock_add_simple_check(void)
}
/*
- * A simple test that adds a memory block of a specified base address, size
+ * A simple test that adds a memory block of a specified base address, size,
* NUMA node and memory flags to the collection of available memory regions.
- * It checks if the new entry, region counter and total memory size have
- * expected values.
+ * Expect to create a new entry. The region counter and total memory get
+ * updated.
*/
static int memblock_add_node_simple_check(void)
{
@@ -87,9 +87,15 @@ static int memblock_add_node_simple_check(void)
/*
* A test that tries to add two memory blocks that don't overlap with one
- * another. It checks if two correctly initialized entries were added to the
- * collection of available memory regions (memblock.memory) and if this
- * change was reflected in memblock.memory's total size and region counter.
+ * another:
+ *
+ * | +--------+ +--------+ |
+ * | | r1 | | r2 | |
+ * +--------+--------+--------+--------+--+
+ *
+ * Expect to add two correctly initialized entries to the collection of
+ * available memory regions (memblock.memory). The total size and
+ * region counter fields get updated.
*/
static int memblock_add_disjoint_check(void)
{
@@ -124,11 +130,21 @@ static int memblock_add_disjoint_check(void)
}
/*
- * A test that tries to add two memory blocks, where the second one overlaps
- * with the beginning of the first entry (that is r1.base < r2.base + r2.size).
- * After this, it checks if two entries are merged into one region that starts
- * at r2.base and has size of two regions minus their intersection. It also
- * verifies the reported total size of the available memory and region counter.
+ * A test that tries to add two memory blocks r1 and r2, where r2 overlaps
+ * with the beginning of r1 (that is r1.base < r2.base + r2.size):
+ *
+ * | +----+----+------------+ |
+ * | | |r2 | r1 | |
+ * +----+----+----+------------+----------+
+ * ^ ^
+ * | |
+ * | r1.base
+ * |
+ * r2.base
+ *
+ * Expect to merge the two entries into one region that starts at r2.base
+ * and has size of two regions minus their intersection. The total size of
+ * the available memory is updated, and the region counter stays the same.
*/
static int memblock_add_overlap_top_check(void)
{
@@ -162,12 +178,21 @@ static int memblock_add_overlap_top_check(void)
}
/*
- * A test that tries to add two memory blocks, where the second one overlaps
- * with the end of the first entry (that is r2.base < r1.base + r1.size).
- * After this, it checks if two entries are merged into one region that starts
- * at r1.base and has size of two regions minus their intersection. It verifies
- * that memblock can still see only one entry and has a correct total size of
- * the available memory.
+ * A test that tries to add two memory blocks r1 and r2, where r2 overlaps
+ * with the end of r1 (that is r2.base < r1.base + r1.size):
+ *
+ * | +--+------+----------+ |
+ * | | | r1 | r2 | |
+ * +--+--+------+----------+--------------+
+ * ^ ^
+ * | |
+ * | r2.base
+ * |
+ * r1.base
+ *
+ * Expect to merge the two entries into one region that starts at r1.base
+ * and has size of two regions minus their intersection. The total size of
+ * the available memory is updated, and the region counter stays the same.
*/
static int memblock_add_overlap_bottom_check(void)
{
@@ -201,11 +226,19 @@ static int memblock_add_overlap_bottom_check(void)
}
/*
- * A test that tries to add two memory blocks, where the second one is
- * within the range of the first entry (that is r1.base < r2.base &&
- * r2.base + r2.size < r1.base + r1.size). It checks if two entries are merged
- * into one region that stays the same. The counter and total size of available
- * memory are expected to not be updated.
+ * A test that tries to add two memory blocks r1 and r2, where r2 is
+ * within the range of r1 (that is r1.base < r2.base &&
+ * r2.base + r2.size < r1.base + r1.size):
+ *
+ * | +-------+--+-----------------------+
+ * | | |r2| r1 |
+ * +---+-------+--+-----------------------+
+ * ^
+ * |
+ * r1.base
+ *
+ * Expect to merge two entries into one region that stays the same.
+ * The counter and total size of available memory are not updated.
*/
static int memblock_add_within_check(void)
{
@@ -236,8 +269,8 @@ static int memblock_add_within_check(void)
}
/*
- * A simple test that tries to add the same memory block twice. The counter
- * and total size of available memory are expected to not be updated.
+ * A simple test that tries to add the same memory block twice. Expect
+ * the counter and total size of available memory to not be updated.
*/
static int memblock_add_twice_check(void)
{
@@ -270,12 +303,12 @@ static int memblock_add_checks(void)
return 0;
}
- /*
- * A simple test that marks a memory block of a specified base address
- * and size as reserved and to the collection of reserved memory regions
- * (memblock.reserved). It checks if a new entry was created and if region
- * counter and total memory size were correctly updated.
- */
+/*
+ * A simple test that marks a memory block of a specified base address
+ * and size as reserved and to the collection of reserved memory regions
+ * (memblock.reserved). Expect to create a new entry. The region counter
+ * and total memory size are updated.
+ */
static int memblock_reserve_simple_check(void)
{
struct memblock_region *rgn;
@@ -297,10 +330,15 @@ static int memblock_reserve_simple_check(void)
}
/*
- * A test that tries to mark two memory blocks that don't overlap as reserved
- * and checks if two entries were correctly added to the collection of reserved
- * memory regions (memblock.reserved) and if this change was reflected in
- * memblock.reserved's total size and region counter.
+ * A test that tries to mark two memory blocks that don't overlap as reserved:
+ *
+ * | +--+ +----------------+ |
+ * | |r1| | r2 | |
+ * +--------+--+------+----------------+--+
+ *
+ * Expect to add two entries to the collection of reserved memory regions
+ * (memblock.reserved). The total size and region counter for
+ * memblock.reserved are updated.
*/
static int memblock_reserve_disjoint_check(void)
{
@@ -335,13 +373,22 @@ static int memblock_reserve_disjoint_check(void)
}
/*
- * A test that tries to mark two memory blocks as reserved, where the
- * second one overlaps with the beginning of the first (that is
- * r1.base < r2.base + r2.size).
- * It checks if two entries are merged into one region that starts at r2.base
- * and has size of two regions minus their intersection. The test also verifies
- * that memblock can still see only one entry and has a correct total size of
- * the reserved memory.
+ * A test that tries to mark two memory blocks r1 and r2 as reserved,
+ * where r2 overlaps with the beginning of r1 (that is
+ * r1.base < r2.base + r2.size):
+ *
+ * | +--------------+--+--------------+ |
+ * | | r2 | | r1 | |
+ * +--+--------------+--+--------------+--+
+ * ^ ^
+ * | |
+ * | r1.base
+ * |
+ * r2.base
+ *
+ * Expect to merge two entries into one region that starts at r2.base and
+ * has size of two regions minus their intersection. The total size of the
+ * reserved memory is updated, and the region counter is not updated.
*/
static int memblock_reserve_overlap_top_check(void)
{
@@ -375,13 +422,22 @@ static int memblock_reserve_overlap_top_check(void)
}
/*
- * A test that tries to mark two memory blocks as reserved, where the
- * second one overlaps with the end of the first entry (that is
- * r2.base < r1.base + r1.size).
- * It checks if two entries are merged into one region that starts at r1.base
- * and has size of two regions minus their intersection. It verifies that
- * memblock can still see only one entry and has a correct total size of the
- * reserved memory.
+ * A test that tries to mark two memory blocks r1 and r2 as reserved,
+ * where r2 overlaps with the end of r1 (that is
+ * r2.base < r1.base + r1.size):
+ *
+ * | +--------------+--+--------------+ |
+ * | | r1 | | r2 | |
+ * +--+--------------+--+--------------+--+
+ * ^ ^
+ * | |
+ * | r2.base
+ * |
+ * r1.base
+ *
+ * Expect to merge two entries into one region that starts at r1.base and
+ * has size of two regions minus their intersection. The total size of the
+ * reserved memory is updated, and the region counter is not updated.
*/
static int memblock_reserve_overlap_bottom_check(void)
{
@@ -415,12 +471,21 @@ static int memblock_reserve_overlap_bottom_check(void)
}
/*
- * A test that tries to mark two memory blocks as reserved, where the second
- * one is within the range of the first entry (that is
- * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)).
- * It checks if two entries are merged into one region that stays the
- * same. The counter and total size of available memory are expected to not be
- * updated.
+ * A test that tries to mark two memory blocks r1 and r2 as reserved,
+ * where r2 is within the range of r1 (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)):
+ *
+ * | +-----+--+---------------------------|
+ * | | |r2| r1 |
+ * +-+-----+--+---------------------------+
+ * ^ ^
+ * | |
+ * | r2.base
+ * |
+ * r1.base
+ *
+ * Expect to merge two entries into one region that stays the same. The
+ * counter and total size of available memory are not updated.
*/
static int memblock_reserve_within_check(void)
{
@@ -452,7 +517,7 @@ static int memblock_reserve_within_check(void)
/*
* A simple test that tries to reserve the same memory block twice.
- * The region counter and total size of reserved memory are expected to not
+ * Expect the region counter and total size of reserved memory to not
* be updated.
*/
static int memblock_reserve_twice_check(void)
@@ -485,14 +550,22 @@ static int memblock_reserve_checks(void)
return 0;
}
- /*
- * A simple test that tries to remove the first entry of the array of
- * available memory regions. By "removing" a region we mean overwriting it
- * with the next region in memblock.memory. To check this is the case, the
- * test adds two memory blocks and verifies that the value of the latter
- * was used to erase r1 region. It also checks if the region counter and
- * total size were updated to expected values.
- */
+/*
+ * A simple test that tries to remove a region r1 from the array of
+ * available memory regions. By "removing" a region we mean overwriting it
+ * with the next region r2 in memblock.memory:
+ *
+ * | ...... +----------------+ |
+ * | : r1 : | r2 | |
+ * +--+----+----------+----------------+--+
+ * ^
+ * |
+ * rgn.base
+ *
+ * Expect to add two memory blocks r1 and r2 and then remove r1 so that
+ * r2 is the first available region. The region counter and total size
+ * are updated.
+ */
static int memblock_remove_simple_check(void)
{
struct memblock_region *rgn;
@@ -522,11 +595,22 @@ static int memblock_remove_simple_check(void)
return 0;
}
- /*
- * A test that tries to remove a region that was not registered as available
- * memory (i.e. has no corresponding entry in memblock.memory). It verifies
- * that array, regions counter and total size were not modified.
- */
+/*
+ * A test that tries to remove a region r2 that was not registered as
+ * available memory (i.e. has no corresponding entry in memblock.memory):
+ *
+ * +----------------+
+ * | r2 |
+ * +----------------+
+ * | +----+ |
+ * | | r1 | |
+ * +--+----+------------------------------+
+ * ^
+ * |
+ * rgn.base
+ *
+ * Expect the array, regions counter and total size to not be modified.
+ */
static int memblock_remove_absent_check(void)
{
struct memblock_region *rgn;
@@ -556,11 +640,23 @@ static int memblock_remove_absent_check(void)
}
/*
- * A test that tries to remove a region which overlaps with the beginning of
- * the already existing entry r1 (that is r1.base < r2.base + r2.size). It
- * checks if only the intersection of both regions is removed from the available
- * memory pool. The test also checks if the regions counter and total size are
- * updated to expected values.
+ * A test that tries to remove a region r2 that overlaps with the
+ * beginning of the already existing entry r1
+ * (that is r1.base < r2.base + r2.size):
+ *
+ * +-----------------+
+ * | r2 |
+ * +-----------------+
+ * | .........+--------+ |
+ * | : r1 | rgn | |
+ * +-----------------+--------+--------+--+
+ * ^ ^
+ * | |
+ * | rgn.base
+ * r1.base
+ *
+ * Expect that only the intersection of both regions is removed from the
+ * available memory pool. The regions counter and total size are updated.
*/
static int memblock_remove_overlap_top_check(void)
{
@@ -596,11 +692,21 @@ static int memblock_remove_overlap_top_check(void)
}
/*
- * A test that tries to remove a region which overlaps with the end of the
- * first entry (that is r2.base < r1.base + r1.size). It checks if only the
- * intersection of both regions is removed from the available memory pool.
- * The test also checks if the regions counter and total size are updated to
- * expected values.
+ * A test that tries to remove a region r2 that overlaps with the end of
+ * the already existing region r1 (that is r2.base < r1.base + r1.size):
+ *
+ * +--------------------------------+
+ * | r2 |
+ * +--------------------------------+
+ * | +---+..... |
+ * | |rgn| r1 : |
+ * +-+---+----+---------------------------+
+ * ^
+ * |
+ * r1.base
+ *
+ * Expect that only the intersection of both regions is removed from the
+ * available memory pool. The regions counter and total size are updated.
*/
static int memblock_remove_overlap_bottom_check(void)
{
@@ -633,13 +739,23 @@ static int memblock_remove_overlap_bottom_check(void)
}
/*
- * A test that tries to remove a region which is within the range of the
- * already existing entry (that is
- * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)).
- * It checks if the region is split into two - one that ends at r2.base and
- * second that starts at r2.base + size, with appropriate sizes. The test
- * also checks if the region counter and total size were updated to
- * expected values.
+ * A test that tries to remove a region r2 that is within the range of
+ * the already existing entry r1 (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)):
+ *
+ * +----+
+ * | r2 |
+ * +----+
+ * | +-------------+....+---------------+ |
+ * | | rgn1 | r1 | rgn2 | |
+ * +-+-------------+----+---------------+-+
+ * ^
+ * |
+ * r1.base
+ *
+ * Expect that the region is split into two - one that ends at r2.base and
+ * another that starts at r2.base + r2.size, with appropriate sizes. The
+ * region counter and total size are updated.
*/
static int memblock_remove_within_check(void)
{
@@ -690,12 +806,19 @@ static int memblock_remove_checks(void)
}
/*
- * A simple test that tries to free a memory block that was marked earlier
- * as reserved. By "freeing" a region we mean overwriting it with the next
- * entry in memblock.reserved. To check this is the case, the test reserves
- * two memory regions and verifies that the value of the latter was used to
- * erase r1 region.
- * The test also checks if the region counter and total size were updated.
+ * A simple test that tries to free a memory block r1 that was marked
+ * earlier as reserved. By "freeing" a region we mean overwriting it with
+ * the next entry r2 in memblock.reserved:
+ *
+ * | ...... +----+ |
+ * | : r1 : | r2 | |
+ * +--------------+----+-----------+----+-+
+ * ^
+ * |
+ * rgn.base
+ *
+ * Expect to reserve two memory regions and then erase r1 region with the
+ * value of r2. The region counter and total size are updated.
*/
static int memblock_free_simple_check(void)
{
@@ -726,11 +849,22 @@ static int memblock_free_simple_check(void)
return 0;
}
- /*
- * A test that tries to free a region that was not marked as reserved
- * (i.e. has no corresponding entry in memblock.reserved). It verifies
- * that array, regions counter and total size were not modified.
- */
+/*
+ * A test that tries to free a region r2 that was not marked as reserved
+ * (i.e. has no corresponding entry in memblock.reserved):
+ *
+ * +----------------+
+ * | r2 |
+ * +----------------+
+ * | +----+ |
+ * | | r1 | |
+ * +--+----+------------------------------+
+ * ^
+ * |
+ * rgn.base
+ *
+ * The array, regions counter and total size are not modified.
+ */
static int memblock_free_absent_check(void)
{
struct memblock_region *rgn;
@@ -760,11 +894,23 @@ static int memblock_free_absent_check(void)
}
/*
- * A test that tries to free a region which overlaps with the beginning of
- * the already existing entry r1 (that is r1.base < r2.base + r2.size). It
- * checks if only the intersection of both regions is freed. The test also
- * checks if the regions counter and total size are updated to expected
- * values.
+ * A test that tries to free a region r2 that overlaps with the beginning
+ * of the already existing entry r1 (that is r1.base < r2.base + r2.size):
+ *
+ * +----+
+ * | r2 |
+ * +----+
+ * | ...+--------------+ |
+ * | : | r1 | |
+ * +----+--+--------------+---------------+
+ * ^ ^
+ * | |
+ * | rgn.base
+ * |
+ * r1.base
+ *
+ * Expect that only the intersection of both regions is freed. The
+ * regions counter and total size are updated.
*/
static int memblock_free_overlap_top_check(void)
{
@@ -798,10 +944,18 @@ static int memblock_free_overlap_top_check(void)
}
/*
- * A test that tries to free a region which overlaps with the end of the
- * first entry (that is r2.base < r1.base + r1.size). It checks if only the
- * intersection of both regions is freed. The test also checks if the
- * regions counter and total size are updated to expected values.
+ * A test that tries to free a region r2 that overlaps with the end of
+ * the already existing entry r1 (that is r2.base < r1.base + r1.size):
+ *
+ * +----------------+
+ * | r2 |
+ * +----------------+
+ * | +-----------+..... |
+ * | | r1 | : |
+ * +----+-----------+----+----------------+
+ *
+ * Expect that only the intersection of both regions is freed. The
+ * regions counter and total size are updated.
*/
static int memblock_free_overlap_bottom_check(void)
{
@@ -835,13 +989,23 @@ static int memblock_free_overlap_bottom_check(void)
}
/*
- * A test that tries to free a region which is within the range of the
- * already existing entry (that is
- * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)).
- * It checks if the region is split into two - one that ends at r2.base and
- * second that starts at r2.base + size, with appropriate sizes. It is
- * expected that the region counter and total size fields were updated t
- * reflect that change.
+ * A test that tries to free a region r2 that is within the range of the
+ * already existing entry r1 (that is
+ * (r1.base < r2.base) && (r2.base + r2.size < r1.base + r1.size)):
+ *
+ * +----+
+ * | r2 |
+ * +----+
+ * | +------------+....+---------------+
+ * | | rgn1 | r1 | rgn2 |
+ * +----+------------+----+---------------+
+ * ^
+ * |
+ * r1.base
+ *
+ * Expect that the region is split into two - one that ends at r2.base and
+ * another that starts at r2.base + r2.size, with appropriate sizes. The
+ * region counter and total size fields are updated.
*/
static int memblock_free_within_check(void)
{
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
index af19c85558e7..c1ec099a3b1d 100644
--- a/tools/testing/nvdimm/pmem-dax.c
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -4,11 +4,13 @@
*/
#include "test/nfit_test.h"
#include <linux/blkdev.h>
+#include <linux/dax.h>
#include <pmem.h>
#include <nd.h>
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
- long nr_pages, void **kaddr, pfn_t *pfn)
+ long nr_pages, enum dax_access_mode mode, void **kaddr,
+ pfn_t *pfn)
{
resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index b752ce47ead3..ea956082e6a4 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -62,16 +62,14 @@ struct nfit_test_resource *get_nfit_res(resource_size_t resource)
}
EXPORT_SYMBOL(get_nfit_res);
-static void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
- void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
-{
- struct nfit_test_resource *nfit_res = get_nfit_res(offset);
-
- if (nfit_res)
- return (void __iomem *) nfit_res->buf + offset
- - nfit_res->res.start;
- return fallback_fn(offset, size);
-}
+#define __nfit_test_ioremap(offset, size, fallback_fn) ({ \
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset); \
+ nfit_res ? \
+ (void __iomem *) nfit_res->buf + (offset) \
+ - nfit_res->res.start \
+ : \
+ fallback_fn((offset), (size)) ; \
+})
void __iomem *__wrap_devm_ioremap(struct device *dev,
resource_size_t offset, unsigned long size)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 1da76ccde448..c75abb497a1a 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,8 +23,6 @@
#include "nfit_test.h"
#include "../watermark.h"
-#include <asm/mce.h>
-
/*
* Generate an NFIT table to describe the following topology:
*
@@ -3375,7 +3373,6 @@ static __exit void nfit_test_exit(void)
{
int i;
- flush_workqueue(nfit_wq);
destroy_workqueue(nfit_wq);
for (i = 0; i < NUM_NFITS; i++)
platform_device_unregister(&instances[i]->pdev);
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 312d23780096..be754f5bcf79 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -25,6 +25,8 @@ if [ $L -ne 256 ]; then
exit_fail
fi
+cat kprobe_events >> $testlog
+
echo 1 > events/kprobes/enable
echo 0 > events/kprobes/enable
echo > kprobe_events
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index b7d188fc87c7..b9fa9cd709df 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -135,6 +135,11 @@ do { \
#define PPC_FEATURE2_ARCH_3_1 0x00040000
#endif
+/* POWER10 features */
+#ifndef PPC_FEATURE2_MMA
+#define PPC_FEATURE2_MMA 0x00020000
+#endif
+
#if defined(__powerpc64__)
#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
#define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR]
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index fcc91c205984..3948f7c510aa 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt mma
top_srcdir = ../../../../..
include ../../lib.mk
@@ -17,3 +17,5 @@ $(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c
$(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c
+
+$(OUTPUT)/mma: mma.c mma.S ../utils.c
diff --git a/tools/testing/selftests/powerpc/math/mma.S b/tools/testing/selftests/powerpc/math/mma.S
new file mode 100644
index 000000000000..8528c9849565
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/mma.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Test basic matrix multiply assist (MMA) functionality if available.
+ *
+ * Copyright 2020, Alistair Popple, IBM Corp.
+ */
+ .global test_mma
+test_mma:
+ /* Load accumulator via VSX registers from image passed in r3 */
+ lxvh8x 4,0,3
+ lxvh8x 5,0,4
+
+ /* Clear and prime the accumulator (xxsetaccz) */
+ .long 0x7c030162
+
+ /* Prime the accumulator with MMA VSX move to accumulator
+ * X-form (xxmtacc) (not needed due to above zeroing) */
+ //.long 0x7c010162
+
+ /* xvi16ger2s */
+ .long 0xec042958
+
+ /* Store result in image passed in r5 */
+ stxvw4x 0,0,5
+ addi 5,5,16
+ stxvw4x 1,0,5
+ addi 5,5,16
+ stxvw4x 2,0,5
+ addi 5,5,16
+ stxvw4x 3,0,5
+ addi 5,5,16
+
+ blr
diff --git a/tools/testing/selftests/powerpc/math/mma.c b/tools/testing/selftests/powerpc/math/mma.c
new file mode 100644
index 000000000000..3a71808c993f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/mma.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test basic matrix multiply assist (MMA) functionality if available.
+ *
+ * Copyright 2020, Alistair Popple, IBM Corp.
+ */
+#include <stdio.h>
+#include <stdint.h>
+
+#include "utils.h"
+
+extern void test_mma(uint16_t (*)[8], uint16_t (*)[8], uint32_t (*)[4*4]);
+
+static int mma(void)
+{
+ int i;
+ int rc = 0;
+ uint16_t x[] = {1, 0, 2, 0, 3, 0, 4, 0};
+ uint16_t y[] = {1, 0, 2, 0, 3, 0, 4, 0};
+ uint32_t z[4*4];
+ uint32_t exp[4*4] = {1, 2, 3, 4,
+ 2, 4, 6, 8,
+ 3, 6, 9, 12,
+ 4, 8, 12, 16};
+
+ SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_3_1), "Need ISAv3.1");
+ SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_MMA), "Need MMA");
+
+ test_mma(&x, &y, &z);
+
+ for (i = 0; i < 16; i++) {
+ printf("MMA[%d] = %d ", i, z[i]);
+
+ if (z[i] == exp[i]) {
+ printf(" (Correct)\n");
+ } else {
+ printf(" (Incorrect)\n");
+ rc = 1;
+ }
+ }
+
+ return rc;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(mma, "mma");
+}
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index aac4a59f9e28..4e1a294eec35 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -12,3 +12,4 @@ pkey_exec_prot
pkey_siginfo
stack_expansion_ldst
stack_expansion_signal
+large_vm_gpr_corruption
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 40253abc6208..27dc09d0bfee 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -4,7 +4,8 @@ noarg:
TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
large_vm_fork_separation bad_accesses pkey_exec_prot \
- pkey_siginfo stack_expansion_signal stack_expansion_ldst
+ pkey_siginfo stack_expansion_signal stack_expansion_ldst \
+ large_vm_gpr_corruption
TEST_PROGS := stress_code_patching.sh
TEST_GEN_PROGS_EXTENDED := tlbie_test
@@ -19,6 +20,7 @@ $(OUTPUT)/prot_sao: ../utils.c
$(OUTPUT)/wild_bctr: CFLAGS += -m64
$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
+$(OUTPUT)/large_vm_gpr_corruption: CFLAGS += -m64
$(OUTPUT)/bad_accesses: CFLAGS += -m64
$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
$(OUTPUT)/pkey_siginfo: CFLAGS += -m64
diff --git a/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c b/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c
new file mode 100644
index 000000000000..927bfae99ed9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2022, Michael Ellerman, IBM Corp.
+//
+// Test that the 4PB address space SLB handling doesn't corrupt userspace registers
+// (r9-r13) due to a SLB fault while saving the PPR.
+//
+// The bug was introduced in f384796c4 ("powerpc/mm: Add support for handling > 512TB
+// address in SLB miss") and fixed in 4c2de74cc869 ("powerpc/64: Interrupts save PPR on
+// stack rather than thread_struct").
+//
+// To hit the bug requires the task struct and kernel stack to be in different segments.
+// Usually that requires more than 1TB of RAM, or if that's not practical, boot the kernel
+// with "disable_1tb_segments".
+//
+// The test works by creating mappings above 512TB, to trigger the large address space
+// support. It creates 64 mappings, double the size of the SLB, to cause SLB faults on
+// each access (assuming naive replacement). It then loops over those mappings touching
+// each, and checks that r9-r13 aren't corrupted.
+//
+// It then forks another child and tries again, because a new child process will get a new
+// kernel stack and thread struct allocated, which may be more optimally placed to trigger
+// the bug. It would probably be better to leave the previous child processes hanging
+// around, so that kernel stack & thread struct allocations are not reused, but that would
+// amount to a 30 second fork bomb. The current design reliably triggers the bug on
+// unpatched kernels.
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB
+#endif
+
+#define BASE_ADDRESS (1ul << 50) // 1PB
+#define STRIDE (2ul << 40) // 2TB
+#define SLB_SIZE 32
+#define NR_MAPPINGS (SLB_SIZE * 2)
+
+static volatile sig_atomic_t signaled;
+
+static void signal_handler(int sig)
+{
+ signaled = 1;
+}
+
+#define CHECK_REG(_reg) \
+ if (_reg != _reg##_orig) { \
+ printf(str(_reg) " corrupted! Expected 0x%lx != 0x%lx\n", _reg##_orig, \
+ _reg); \
+ _exit(1); \
+ }
+
+static int touch_mappings(void)
+{
+ unsigned long r9_orig, r10_orig, r11_orig, r12_orig, r13_orig;
+ unsigned long r9, r10, r11, r12, r13;
+ unsigned long addr, *p;
+ int i;
+
+ for (i = 0; i < NR_MAPPINGS; i++) {
+ addr = BASE_ADDRESS + (i * STRIDE);
+ p = (unsigned long *)addr;
+
+ asm volatile("mr %0, %%r9 ;" // Read original GPR values
+ "mr %1, %%r10 ;"
+ "mr %2, %%r11 ;"
+ "mr %3, %%r12 ;"
+ "mr %4, %%r13 ;"
+ "std %10, 0(%11) ;" // Trigger SLB fault
+ "mr %5, %%r9 ;" // Save possibly corrupted values
+ "mr %6, %%r10 ;"
+ "mr %7, %%r11 ;"
+ "mr %8, %%r12 ;"
+ "mr %9, %%r13 ;"
+ "mr %%r9, %0 ;" // Restore original values
+ "mr %%r10, %1 ;"
+ "mr %%r11, %2 ;"
+ "mr %%r12, %3 ;"
+ "mr %%r13, %4 ;"
+ : "=&b"(r9_orig), "=&b"(r10_orig), "=&b"(r11_orig),
+ "=&b"(r12_orig), "=&b"(r13_orig), "=&b"(r9), "=&b"(r10),
+ "=&b"(r11), "=&b"(r12), "=&b"(r13)
+ : "b"(i), "b"(p)
+ : "r9", "r10", "r11", "r12", "r13");
+
+ CHECK_REG(r9);
+ CHECK_REG(r10);
+ CHECK_REG(r11);
+ CHECK_REG(r12);
+ CHECK_REG(r13);
+ }
+
+ return 0;
+}
+
+static int test(void)
+{
+ unsigned long page_size, addr, *p;
+ struct sigaction action;
+ bool hash_mmu;
+ int i, status;
+ pid_t pid;
+
+ // This tests a hash MMU specific bug.
+ FAIL_IF(using_hash_mmu(&hash_mmu));
+ SKIP_IF(!hash_mmu);
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ for (i = 0; i < NR_MAPPINGS; i++) {
+ addr = BASE_ADDRESS + (i * STRIDE);
+
+ p = mmap((void *)addr, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ printf("Error: couldn't mmap(), confirm kernel has 4PB support?\n");
+ return 1;
+ }
+ }
+
+ action.sa_handler = signal_handler;
+ action.sa_flags = SA_RESTART;
+ FAIL_IF(sigaction(SIGALRM, &action, NULL) < 0);
+
+ // Seen to always crash in under ~10s on affected kernels.
+ alarm(30);
+
+ while (!signaled) {
+ // Fork new processes, to increase the chance that we hit the case where
+ // the kernel stack and task struct are in different segments.
+ pid = fork();
+ if (pid == 0)
+ exit(touch_mappings());
+
+ FAIL_IF(waitpid(-1, &status, 0) == -1);
+ FAIL_IF(WIFSIGNALED(status));
+ FAIL_IF(!WIFEXITED(status));
+ FAIL_IF(WEXITSTATUS(status));
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "large_vm_gpr_corruption");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
deleted file mode 100644
index 08a7b5f133b9..000000000000
--- a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2014, Michael Ellerman, IBM Corp.
- */
-
-#include <ppc-asm.h>
-
- .text
-
-FUNC_START(thirty_two_instruction_loop)
- cmpwi r3,0
- beqlr
- addi r4,r3,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1 # 28 addi's
- subi r3,r3,1
- b FUNC_NAME(thirty_two_instruction_loop)
-FUNC_END(thirty_two_instruction_loop)
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
index fca054bbc094..c01a31d5f4ee 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
@@ -274,7 +274,7 @@ u64 *get_intr_regs(struct event *event, void *sample_buff)
return intr_regs;
}
-static const unsigned int __perf_reg_mask(const char *register_name)
+static const int __perf_reg_mask(const char *register_name)
{
if (!strcmp(register_name, "R0"))
return 0;
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index d42ca8c676c3..5b2abb719ef2 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -182,17 +182,23 @@ int spectre_v2_test(void)
case COUNT_CACHE_FLUSH_HW:
// These should all not affect userspace branch prediction
if (miss_percent > 15) {
- printf("Branch misses > 15%% unexpected in this configuration!\n");
- printf("Possible mis-match between reported & actual mitigation\n");
- /*
- * Such a mismatch may be caused by a guest system
- * reporting as vulnerable when the host is mitigated.
- * Return skip code to avoid detecting this as an error.
- * We are not vulnerable and reporting otherwise, so
- * missing such a mismatch is safe.
- */
- if (miss_percent > 95)
+ if (miss_percent > 95) {
+ /*
+ * Such a mismatch may be caused by a system being unaware
+ * the count cache is disabled. This may be to enable
+ * guest migration between hosts with different settings.
+ * Return skip code to avoid detecting this as an error.
+ * We are not vulnerable and reporting otherwise, so
+ * missing such a mismatch is safe.
+ */
+ printf("Branch misses > 95%% unexpected in this configuration.\n");
+ printf("Count cache likely disabled without Linux knowing.\n");
+ if (state == COUNT_CACHE_FLUSH_SW)
+ printf("WARNING: Kernel performing unnecessary flushes.\n");
return 4;
+ }
+ printf("Branch misses > 15%% unexpected in this configuration!\n");
+ printf("Possible mismatch between reported & actual mitigation\n");
return 1;
}
@@ -201,14 +207,14 @@ int spectre_v2_test(void)
// This seems to affect userspace branch prediction a bit?
if (miss_percent > 25) {
printf("Branch misses > 25%% unexpected in this configuration!\n");
- printf("Possible mis-match between reported & actual mitigation\n");
+ printf("Possible mismatch between reported & actual mitigation\n");
return 1;
}
break;
case COUNT_CACHE_DISABLED:
if (miss_percent < 95) {
- printf("Branch misses < 20%% unexpected in this configuration!\n");
- printf("Possible mis-match between reported & actual mitigation\n");
+ printf("Branch misses < 95%% unexpected in this configuration!\n");
+ printf("Possible mismatch between reported & actual mitigation\n");
return 1;
}
break;
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
index 11fb417abb42..3822f4ea5f49 100644
--- a/tools/tracing/rtla/Makefile
+++ b/tools/tracing/rtla/Makefile
@@ -23,6 +23,7 @@ $(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/)
$(call allow-override,LDCONFIG,ldconfig)
INSTALL = install
+MKDIR = mkdir
FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \
-fasynchronous-unwind-tables -fstack-clash-protection
WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized
@@ -31,7 +32,7 @@ TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs)
CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS)
LDFLAGS := -ggdb
-LIBS := $$($(PKG_CONFIG) --libs libtracefs) -lprocps
+LIBS := $$($(PKG_CONFIG) --libs libtracefs)
SRC := $(wildcard src/*.c)
HDR := $(wildcard src/*.h)
@@ -57,6 +58,41 @@ else
DOCSRC = $(SRCTREE)/../../../Documentation/tools/rtla/
endif
+LIBTRACEEVENT_MIN_VERSION = 1.5
+LIBTRACEFS_MIN_VERSION = 1.3
+
+TEST_LIBTRACEEVENT = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEEVENT_MIN_VERSION) libtraceevent > /dev/null 2>&1 || echo n")
+ifeq ("$(TEST_LIBTRACEEVENT)", "n")
+.PHONY: warning_traceevent
+warning_traceevent:
+ @echo "********************************************"
+ @echo "** NOTICE: libtraceevent version $(LIBTRACEEVENT_MIN_VERSION) or higher not found"
+ @echo "**"
+ @echo "** Consider installing the latest libtraceevent from your"
+ @echo "** distribution, e.g., 'dnf install libtraceevent' on Fedora,"
+ @echo "** or from source:"
+ @echo "**"
+ @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ "
+ @echo "**"
+ @echo "********************************************"
+endif
+
+TEST_LIBTRACEFS = $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEFS_MIN_VERSION) libtracefs > /dev/null 2>&1 || echo n")
+ifeq ("$(TEST_LIBTRACEFS)", "n")
+.PHONY: warning_tracefs
+warning_tracefs:
+ @echo "********************************************"
+ @echo "** NOTICE: libtracefs version $(LIBTRACEFS_MIN_VERSION) or higher not found"
+ @echo "**"
+ @echo "** Consider installing the latest libtracefs from your"
+ @echo "** distribution, e.g., 'dnf install libtracefs' on Fedora,"
+ @echo "** or from source:"
+ @echo "**"
+ @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ "
+ @echo "**"
+ @echo "********************************************"
+endif
+
.PHONY: all
all: rtla
@@ -68,7 +104,7 @@ static: $(OBJ)
.PHONY: install
install: doc_install
- $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)
+ $(MKDIR) -p $(DESTDIR)$(BINDIR)
$(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR)
$(STRIP) $(DESTDIR)$(BINDIR)/rtla
@test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise
diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt
index 6c88446f7e74..4af3fd40f171 100644
--- a/tools/tracing/rtla/README.txt
+++ b/tools/tracing/rtla/README.txt
@@ -1,19 +1,16 @@
RTLA: Real-Time Linux Analysis tools
-The rtla is a meta-tool that includes a set of commands that
-aims to analyze the real-time properties of Linux. But, instead of
-testing Linux as a black box, rtla leverages kernel tracing
-capabilities to provide precise information about the properties
-and root causes of unexpected results.
+The rtla meta-tool includes a set of commands that aims to analyze
+the real-time properties of Linux. Instead of testing Linux as a black box,
+rtla leverages kernel tracing capabilities to provide precise information
+about the properties and root causes of unexpected results.
Installing RTLA
-RTLA depends on some libraries and tools. More precisely, it depends on the
-following libraries:
+RTLA depends on the following libraries and tools:
- libtracefs
- libtraceevent
- - procps
It also depends on python3-docutils to compile man pages.
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
index b4380d45cacd..5d7ea479ac89 100644
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -809,7 +809,7 @@ int osnoise_hist_main(int argc, char *argv[])
retval = set_comm_sched_attr("osnoise/", &params->sched_param);
if (retval) {
err_msg("Failed to set sched parameters\n");
- goto out_hist;
+ goto out_free;
}
}
@@ -819,7 +819,7 @@ int osnoise_hist_main(int argc, char *argv[])
record = osnoise_init_trace_tool("osnoise");
if (!record) {
err_msg("Failed to enable the trace instance\n");
- goto out_hist;
+ goto out_free;
}
if (params->events) {
@@ -869,6 +869,7 @@ int osnoise_hist_main(int argc, char *argv[])
out_hist:
trace_events_destroy(&record->trace, params->events);
params->events = NULL;
+out_free:
osnoise_free_histogram(tool->data);
out_destroy:
osnoise_destroy_tool(record);
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index 72c2fd6ce005..76479bfb2922 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -572,7 +572,7 @@ int osnoise_top_main(int argc, char **argv)
retval = osnoise_top_apply_config(tool, params);
if (retval) {
err_msg("Could not apply config\n");
- goto out_top;
+ goto out_free;
}
trace = &tool->trace;
@@ -580,14 +580,14 @@ int osnoise_top_main(int argc, char **argv)
retval = enable_osnoise(trace);
if (retval) {
err_msg("Failed to enable osnoise tracer\n");
- goto out_top;
+ goto out_free;
}
if (params->set_sched) {
retval = set_comm_sched_attr("osnoise/", &params->sched_param);
if (retval) {
err_msg("Failed to set sched parameters\n");
- goto out_top;
+ goto out_free;
}
}
@@ -597,7 +597,7 @@ int osnoise_top_main(int argc, char **argv)
record = osnoise_init_trace_tool("osnoise");
if (!record) {
err_msg("Failed to enable the trace instance\n");
- goto out_top;
+ goto out_free;
}
if (params->events) {
@@ -649,6 +649,7 @@ int osnoise_top_main(int argc, char **argv)
out_top:
trace_events_destroy(&record->trace, params->events);
params->events = NULL;
+out_free:
osnoise_free_top(tool->data);
osnoise_destroy_tool(record);
osnoise_destroy_tool(tool);
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index dc908126c610..f3ec628f5e51 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -821,7 +821,7 @@ int timerlat_hist_main(int argc, char *argv[])
retval = timerlat_hist_apply_config(tool, params);
if (retval) {
err_msg("Could not apply config\n");
- goto out_hist;
+ goto out_free;
}
trace = &tool->trace;
@@ -829,14 +829,14 @@ int timerlat_hist_main(int argc, char *argv[])
retval = enable_timerlat(trace);
if (retval) {
err_msg("Failed to enable timerlat tracer\n");
- goto out_hist;
+ goto out_free;
}
if (params->set_sched) {
retval = set_comm_sched_attr("timerlat/", &params->sched_param);
if (retval) {
err_msg("Failed to set sched parameters\n");
- goto out_hist;
+ goto out_free;
}
}
@@ -844,7 +844,7 @@ int timerlat_hist_main(int argc, char *argv[])
dma_latency_fd = set_cpu_dma_latency(params->dma_latency);
if (dma_latency_fd < 0) {
err_msg("Could not set /dev/cpu_dma_latency.\n");
- goto out_hist;
+ goto out_free;
}
}
@@ -854,7 +854,7 @@ int timerlat_hist_main(int argc, char *argv[])
record = osnoise_init_trace_tool("timerlat");
if (!record) {
err_msg("Failed to enable the trace instance\n");
- goto out_hist;
+ goto out_free;
}
if (params->events) {
@@ -904,6 +904,7 @@ out_hist:
close(dma_latency_fd);
trace_events_destroy(&record->trace, params->events);
params->events = NULL;
+out_free:
timerlat_free_histogram(tool->data);
osnoise_destroy_tool(record);
osnoise_destroy_tool(tool);
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 1f754c3df53f..35452a1d45e9 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -612,7 +612,7 @@ int timerlat_top_main(int argc, char *argv[])
retval = timerlat_top_apply_config(top, params);
if (retval) {
err_msg("Could not apply config\n");
- goto out_top;
+ goto out_free;
}
trace = &top->trace;
@@ -620,14 +620,14 @@ int timerlat_top_main(int argc, char *argv[])
retval = enable_timerlat(trace);
if (retval) {
err_msg("Failed to enable timerlat tracer\n");
- goto out_top;
+ goto out_free;
}
if (params->set_sched) {
retval = set_comm_sched_attr("timerlat/", &params->sched_param);
if (retval) {
err_msg("Failed to set sched parameters\n");
- goto out_top;
+ goto out_free;
}
}
@@ -635,7 +635,7 @@ int timerlat_top_main(int argc, char *argv[])
dma_latency_fd = set_cpu_dma_latency(params->dma_latency);
if (dma_latency_fd < 0) {
err_msg("Could not set /dev/cpu_dma_latency.\n");
- goto out_top;
+ goto out_free;
}
}
@@ -645,7 +645,7 @@ int timerlat_top_main(int argc, char *argv[])
record = osnoise_init_trace_tool("timerlat");
if (!record) {
err_msg("Failed to enable the trace instance\n");
- goto out_top;
+ goto out_free;
}
if (params->events) {
@@ -699,6 +699,7 @@ out_top:
close(dma_latency_fd);
trace_events_destroy(&record->trace, params->events);
params->events = NULL;
+out_free:
timerlat_free_top(top->data);
osnoise_destroy_tool(record);
osnoise_destroy_tool(top);
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
index da2b590edaed..5352167a1e75 100644
--- a/tools/tracing/rtla/src/utils.c
+++ b/tools/tracing/rtla/src/utils.c
@@ -3,7 +3,7 @@
* Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
*/
-#include <proc/readproc.h>
+#include <dirent.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
@@ -255,50 +255,114 @@ int __set_sched_attr(int pid, struct sched_attr *attr)
retval = sched_setattr(pid, attr, flags);
if (retval < 0) {
- err_msg("boost_with_deadline failed to boost pid %d: %s\n",
+ err_msg("Failed to set sched attributes to the pid %d: %s\n",
pid, strerror(errno));
return 1;
}
return 0;
}
+
+/*
+ * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
+ *
+ * Check if the procfs entry is a directory of a process, and then check if the
+ * process has a comm with the prefix set in char *comm_prefix. As the
+ * current users of this function only check for kernel threads, there is no
+ * need to check for the threads for the process.
+ *
+ * Return: True if the proc_entry contains a comm file with comm_prefix*.
+ * Otherwise returns false.
+ */
+static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
+{
+ char buffer[MAX_PATH];
+ int comm_fd, retval;
+ char *t_name;
+
+ if (proc_entry->d_type != DT_DIR)
+ return 0;
+
+ if (*proc_entry->d_name == '.')
+ return 0;
+
+ /* check if the string is a pid */
+ for (t_name = proc_entry->d_name; t_name; t_name++) {
+ if (!isdigit(*t_name))
+ break;
+ }
+
+ if (*t_name != '\0')
+ return 0;
+
+ snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
+ comm_fd = open(buffer, O_RDONLY);
+ if (comm_fd < 0)
+ return 0;
+
+ memset(buffer, 0, MAX_PATH);
+ retval = read(comm_fd, buffer, MAX_PATH);
+
+ close(comm_fd);
+
+ if (retval <= 0)
+ return 0;
+
+ retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
+ if (retval)
+ return 0;
+
+ /* comm already have \n */
+ debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
+
+ return 1;
+}
+
/*
- * set_comm_sched_attr - set sched params to threads starting with char *comm
+ * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
*
- * This function uses procps to list the currently running threads and then
- * set the sched_attr *attr to the threads that start with char *comm. It is
+ * This function uses procfs to list the currently running threads and then set the
+ * sched_attr *attr to the threads that start with char *comm_prefix. It is
* mainly used to set the priority to the kernel threads created by the
* tracers.
*/
-int set_comm_sched_attr(const char *comm, struct sched_attr *attr)
+int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
{
- int flags = PROC_FILLCOM | PROC_FILLSTAT;
- PROCTAB *ptp;
- proc_t task;
+ struct dirent *proc_entry;
+ DIR *procfs;
int retval;
- ptp = openproc(flags);
- if (!ptp) {
- err_msg("error openproc()\n");
- return -ENOENT;
+ if (strlen(comm_prefix) >= MAX_PATH) {
+ err_msg("Command prefix is too long: %d < strlen(%s)\n",
+ MAX_PATH, comm_prefix);
+ return 1;
}
- memset(&task, 0, sizeof(task));
+ procfs = opendir("/proc");
+ if (!procfs) {
+ err_msg("Could not open procfs\n");
+ return 1;
+ }
- while (readproc(ptp, &task)) {
- retval = strncmp(comm, task.cmd, strlen(comm));
- if (retval)
+ while ((proc_entry = readdir(procfs))) {
+
+ retval = procfs_is_workload_pid(comm_prefix, proc_entry);
+ if (!retval)
continue;
- retval = __set_sched_attr(task.tid, attr);
- if (retval)
+
+ /* procfs_is_workload_pid confirmed it is a pid */
+ retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
+ if (retval) {
+ err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
goto out_err;
- }
+ }
- closeproc(ptp);
+ debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
+ }
return 0;
out_err:
- closeproc(ptp);
+ closedir(procfs);
return 1;
}
diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h
index fa08e374870a..5571afd3b549 100644
--- a/tools/tracing/rtla/src/utils.h
+++ b/tools/tracing/rtla/src/utils.h
@@ -6,6 +6,7 @@
* '18446744073709551615\0'
*/
#define BUFF_U64_STR_SIZE 24
+#define MAX_PATH 1024
#define container_of(ptr, type, member)({ \
const typeof(((type *)0)->member) *__mptr = (ptr); \
@@ -53,5 +54,5 @@ struct sched_attr {
};
int parse_prio(char *arg, struct sched_attr *sched_param);
-int set_comm_sched_attr(const char *comm, struct sched_attr *attr);
+int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr);
int set_cpu_dma_latency(int32_t latency);