Merge tag 'perf-tools-for-v6.8-1-2024-01-09' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools

Pull perf tools updates from Arnaldo Carvalho de Melo: "Add Namhyung Kim as tools/perf/ co-maintainer, we're taking turns processing patches, switching roles from perf-tools to perf-tools-next at each Linux release. Data profiling: - Associate samples that identify loads and stores with data structures. This uses events available on Intel, AMD and others and DWARF info: # To get memory access samples in kernel for 1 second (on Intel) $ perf mem record -a -K --ldlat=4 -- sleep 1 # Similar for the AMD (but it requires 6.3+ kernel for BPF filters) $ perf mem record -a --filter 'mem_op == load || mem_op == store, ip > 0x8000000000000000' -- sleep 1 Then, amongst several modes of post processing, one can do things like: $ perf report -s type,typeoff --hierarchy --group --stdio ... # # Samples: 10K of events 'cpu/mem-loads,ldlat=4/P, cpu/mem-stores/P, dummy:u' # Event count (approx.): 602758064 # # Overhead Data Type / Data Type Offset # ........................... ............................ # 26.09% 3.28% 0.00% long unsigned int 26.09% 3.28% 0.00% long unsigned int +0 (no field) 18.48% 0.73% 0.00% struct page 10.83% 0.02% 0.00% struct page +8 (lru.next) 3.90% 0.28% 0.00% struct page +0 (flags) 3.45% 0.06% 0.00% struct page +24 (mapping) 0.25% 0.28% 0.00% struct page +48 (_mapcount.counter) 0.02% 0.06% 0.00% struct page +32 (index) 0.02% 0.00% 0.00% struct page +52 (_refcount.counter) 0.02% 0.01% 0.00% struct page +56 (memcg_data) 0.00% 0.01% 0.00% struct page +16 (lru.prev) 15.37% 17.54% 0.00% (stack operation) 15.37% 17.54% 0.00% (stack operation) +0 (no field) 11.71% 50.27% 0.00% (unknown) 11.71% 50.27% 0.00% (unknown) +0 (no field) $ perf annotate --data-type ... Annotate type: 'struct cfs_rq' in [kernel.kallsyms] (13 samples): ============================================================================ samples offset size field 13 0 640 struct cfs_rq { 2 0 16 struct load_weight load { 2 0 8 unsigned long weight; 0 8 4 u32 inv_weight; }; 0 16 8 unsigned long runnable_weight; 0 24 4 unsigned int nr_running; 1 28 4 unsigned int h_nr_running; ... $ perf annotate --data-type=page --group Annotate type: 'struct page' in [kernel.kallsyms] (480 samples): event[0] = cpu/mem-loads,ldlat=4/P event[1] = cpu/mem-stores/P event[2] = dummy:u =================================================================================== samples offset size field 447 33 0 0 64 struct page { 108 8 0 0 8 long unsigned int flags; 319 13 0 8 40 union { 319 13 0 8 40 struct { 236 2 0 8 16 union { 236 2 0 8 16 struct list_head lru { 236 1 0 8 8 struct list_head* next; 0 1 0 16 8 struct list_head* prev; }; 236 2 0 8 16 struct { 236 1 0 8 8 void* __filler; 0 1 0 16 4 unsigned int mlock_count; }; 236 2 0 8 16 struct list_head buddy_list { 236 1 0 8 8 struct list_head* next; 0 1 0 16 8 struct list_head* prev; }; 236 2 0 8 16 struct list_head pcp_list { 236 1 0 8 8 struct list_head* next; 0 1 0 16 8 struct list_head* prev; }; }; 82 4 0 24 8 struct address_space* mapping; 1 7 0 32 8 union { 1 7 0 32 8 long unsigned int index; 1 7 0 32 8 long unsigned int share; }; 0 0 0 40 8 long unsigned int private; }; This uses the existing annotate code, calling objdump to do the disassembly, with improvements to avoid having this take too long, but longer term a switch to a disassembler library, possibly reusing code in the kernel will be pursued. This is the initial implementation, please use it and report impressions and bugs. Make sure the kernel-debuginfo packages match the running kernel. The 'perf report' phase for non short perf.data files may take a while. There is a great article about it on LWN: https://lwn.net/Articles/955709/ - "Data-type profiling for perf" One last test I did while writing this text, on a AMD Ryzen 5950X, using a distro kernel, while doing a simple 'find /' on an otherwise idle system resulted in: # uname -r 6.6.9-100.fc38.x86_64 # perf -vv | grep BPF_ bpf: [ on ] # HAVE_LIBBPF_SUPPORT bpf_skeletons: [ on ] # HAVE_BPF_SKEL # rpm -qa | grep kernel-debuginfo kernel-debuginfo-common-x86_64-6.6.9-100.fc38.x86_64 kernel-debuginfo-6.6.9-100.fc38.x86_64 # # perf mem record -a --filter 'mem_op == load || mem_op == store, ip > 0x8000000000000000' ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.199 MB perf.data (2913 samples) ] # # ls -la perf.data -rw-------. 1 root root 2346486 Jan 9 18:36 perf.data # perf evlist ibs_op// dummy:u # perf evlist -v ibs_op//: type: 11, size: 136, config: 0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CPU|PERIOD|IDENTIFIER|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1 dummy:u: type: 1 (PERF_TYPE_SOFTWARE), size: 136, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|ADDR|CPU|IDENTIFIER|DATA_SRC|WEIGHT, read_format: ID, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, mmap_data: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1 # # perf report -s type,typeoff --hierarchy --group --stdio # Total Lost Samples: 0 # # Samples: 2K of events 'ibs_op//, dummy:u' # Event count (approx.): 1904553038 # # Overhead Data Type / Data Type Offset # ................... ............................ # 73.70% 0.00% (unknown) 73.70% 0.00% (unknown) +0 (no field) 3.01% 0.00% long unsigned int 3.00% 0.00% long unsigned int +0 (no field) 0.01% 0.00% long unsigned int +2 (no field) 2.73% 0.00% struct task_struct 1.71% 0.00% struct task_struct +52 (on_cpu) 0.38% 0.00% struct task_struct +2104 (rcu_read_unlock_special.b.blocked) 0.23% 0.00% struct task_struct +2100 (rcu_read_lock_nesting) 0.14% 0.00% struct task_struct +2384 () 0.06% 0.00% struct task_struct +3096 (signal) 0.05% 0.00% struct task_struct +3616 (cgroups) 0.05% 0.00% struct task_struct +2344 (active_mm) 0.02% 0.00% struct task_struct +46 (flags) 0.02% 0.00% struct task_struct +2096 (migration_disabled) 0.01% 0.00% struct task_struct +24 (__state) 0.01% 0.00% struct task_struct +3956 (mm_cid_active) 0.01% 0.00% struct task_struct +1048 (cpus_ptr) 0.01% 0.00% struct task_struct +184 (se.group_node.next) 0.01% 0.00% struct task_struct +20 (thread_info.cpu) 0.00% 0.00% struct task_struct +104 (on_rq) 0.00% 0.00% struct task_struct +2456 (pid) 1.36% 0.00% struct module 0.59% 0.00% struct module +952 (kallsyms) 0.42% 0.00% struct module +0 (state) 0.23% 0.00% struct module +8 (list.next) 0.12% 0.00% struct module +216 (syms) 0.95% 0.00% struct inode 0.41% 0.00% struct inode +40 (i_sb) 0.22% 0.00% struct inode +0 (i_mode) 0.06% 0.00% struct inode +76 (i_rdev) 0.06% 0.00% struct inode +56 (i_security) <SNIP> perf top/report: - Don't ignore job control, allowing control+Z + bg to work. - Add s390 raw data interpretation for PAI (Processor Activity Instrumentation) counters. perf archive: - Add new option '--all' to pack perf.data with DSOs. - Add new option '--unpack' to expand tarballs. Initialization speedups: - Lazily initialize zstd streams to save memory when not using it. - Lazily allocate/size mmap event copy. - Lazy load kernel symbols in 'perf record'. - Be lazier in allocating lost samples buffer in 'perf record'. - Don't synthesize BPF events when disabled via the command line (perf record --no-bpf-event). Assorted improvements: - Show note on AMD systems that the :p, :pp, :ppp and :P are all the same, as IBS (Instruction Based Sampling) is used and it is inherentely precise, not having levels of precision like in Intel systems. - When 'cycles' isn't available, fall back to the "task-clock" event when not system wide, not to 'cpu-clock'. - Add --debug-file option to redirect debug output, e.g.: $ perf --debug-file /tmp/perf.log record -v true - Shrink 'struct map' to under one cacheline by avoiding function pointers for selecting if addresses are identity or DSO relative, and using just a byte for some boolean struct members. - Resolve the arch specific strerrno just once to use in perf_env__arch_strerrno(). - Reduce memory for recording PERF_RECORD_LOST_SAMPLES event. Assorted fixes: - Fix the default 'perf top' usage on Intel hybrid systems, now it starts with a browser showing the number of samples for Efficiency (cpu_atom/cycles/P) and Performance (cpu_core/cycles/P). This behaviour is similar on ARM64, with its respective set of big.LITTLE processors. - Fix segfault on build_mem_topology() error path. - Fix 'perf mem' error on hybrid related to availability of mem event in a PMU. - Fix missing reference count gets (map, maps) in the db-export code. - Avoid recursively taking env->bpf_progs.lock in the 'perf_env' code. - Use the newly introduced maps__for_each_map() to add missing locking around iteration of 'struct map' entries. - Parse NOTE segments until the build id is found, don't stop on the first one, ELF files may have several such NOTE segments. - Remove 'egrep' usage, its deprecated, use 'grep -E' instead. - Warn first about missing libelf, not libbpf, that depends on libelf. - Use alternative to 'find ... -printf' as this isn't supported in busybox. - Address python 3.6 DeprecationWarning for string scapes. - Fix memory leak in uniq() in libsubcmd. - Fix man page formatting for 'perf lock' - Fix some spelling mistakes. perf tests: - Fail shell tests that needs some symbol in perf itself if it is stripped. These tests check if a symbol is resolved, if some hot function is indeed detected by profiling, etc. - The 'perf test sigtrap' test is currently failing on PREEMPT_RT, skip it if sleeping spinlocks are detected (using BTF) and point to the mailing list discussion about it. This test is also being skipped on several architectures (powerpc, s390x, arm and aarch64) due to other pending issues with intruction breakpoints. - Adjust test case perf record offcpu profiling tests for s390. - Fix 'Setup struct perf_event_attr' fails on s390 on z/VM guest, addressing issues caused by the fallback from cycles to task-clock done in this release. - Fix mask for VG register in the user-regs test. - Use shellcheck on 'perf test' shell scripts automatically to make sure changes don't introduce things it flags as problematic. - Add option to change objdump binary and allow it to be set via 'perf config'. - Add basic 'perf script', 'perf list --json" and 'perf diff' tests. - Basic branch counter support. - Make DSO tests a suite rather than individual. - Remove atomics from test_loop to avoid test failures. - Fix call chain match on powerpc for the record+probe_libc_inet_pton test. - Improve Intel hybrid tests. Vendor event files (JSON): powerpc: - Update datasource event name to fix duplicate events on IBM's Power10. - Add PVN for HX-C2000 CPU with Power8 Architecture. Intel: - Alderlake/rocketlake metric fixes. - Update emeraldrapids events to v1.02. - Update icelakex events to v1.23. - Update sapphirerapids events to v1.17. - Add skx, clx, icx and spr upi bandwidth metric. AMD: - Add Zen 4 memory controller events. RISC-V: - Add StarFive Dubhe-80 and Dubhe-90 JSON files. https://www.starfivetech.com/en/site/cpu-u - Add T-HEAD C9xx JSON file. https://github.com/riscv-software-src/opensbi/blob/master/docs/platform/thead-c9xx.md ARM64: - Remove UTF-8 characters from cmn.json, that were causing build failure in some distros. - Add core PMU events and metrics for Ampere One X. - Rename Ampere One's BPU_FLUSH_MEM_FAULT to GPC_FLUSH_MEM_FAULT libperf: - Rename several perf_cpu_map constructor names to clarify what they really do. - Ditto for some other methods, coping with some issues in their semantics, like perf_cpu_map__empty() -> perf_cpu_map__has_any_cpu_or_is_empty(). - Document perf_cpu_map__nr()'s behavior perf stat: - Exit if parse groups fails. - Combine the -A/--no-aggr and --no-merge options. - Fix help message for --metric-no-threshold option. Hardware tracing: ARM64 CoreSight: - Bump minimum OpenCSD version to ensure a bugfix is present. - Add 'T' itrace option for timestamp trace - Set start vm addr of exectable file to 0 and don't ignore first sample on the arm-cs-trace-disasm.py 'perf script'" * tag 'perf-tools-for-v6.8-1-2024-01-09' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (179 commits) MAINTAINERS: Add Namhyung as tools/perf/ co-maintainer perf test: test case 'Setup struct perf_event_attr' fails on s390 on z/vm perf db-export: Fix missing reference count get in call_path_from_sample() perf tests: Add perf script test libsubcmd: Fix memory leak in uniq() perf TUI: Don't ignore job control perf vendor events intel: Update sapphirerapids events to v1.17 perf vendor events intel: Update icelakex events to v1.23 perf vendor events intel: Update emeraldrapids events to v1.02 perf vendor events intel: Alderlake/rocketlake metric fixes perf x86 test: Add hybrid test for conflicting legacy/sysfs event perf x86 test: Update hybrid expectations perf vendor events amd: Add Zen 4 memory controller events perf stat: Fix hard coded LL miss units perf record: Reduce memory for recording PERF_RECORD_LOST_SAMPLES event perf env: Avoid recursively taking env->bpf_progs.lock perf annotate: Add --insn-stat option for debugging perf annotate: Add --type-stat option for debugging perf annotate: Support event group display perf annotate: Add --data-type option ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2024-01-20 01:25:23 +0300
committer: Linus Torvalds <torvalds@linux-foundation.org> 2024-01-20 01:25:23 +0300
commit: 9d64bf433c53cab2f48a3fff7a1f2a696bc5229a (patch)
tree: 0535254c177c27ec34adbc153d494cea9a9625a0 /tools/lib
parent: 57f22c8dab6b266ae36b89b073a4a33dea71e762 (diff)
parent: d988c9f511af71a3445b6a4f3a2c67208ff8e480 (diff)
download: linux-9d64bf433c53cab2f48a3fff7a1f2a696bc5229a.tar.xz
16 files changed, 161 insertions, 111 deletions
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 5cb0eeec2c8a..337fde770e45 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -16,6 +16,7 @@
 #include <sys/mount.h>
 
 #include "fs.h"
+#include "../io.h"
 #include "debug-internal.h"
 
 #define _STR(x) #x
@@ -344,53 +345,24 @@ int filename__read_ull(const char *filename, unsigned long long *value)
 	return filename__read_ull_base(filename, value, 0);
 }
 
-#define STRERR_BUFSIZE  128     /* For the buffer size of strerror_r */
-
 int filename__read_str(const char *filename, char **buf, size_t *sizep)
 {
-	size_t size = 0, alloc_size = 0;
-	void *bf = NULL, *nbf;
-	int fd, n, err = 0;
-	char sbuf[STRERR_BUFSIZE];
+	struct io io;
+	char bf[128];
+	int err;
 
-	fd = open(filename, O_RDONLY);
-	if (fd < 0)
+	io.fd = open(filename, O_RDONLY);
+	if (io.fd < 0)
 		return -errno;
-
-	do {
-		if (size == alloc_size) {
-			alloc_size += BUFSIZ;
-			nbf = realloc(bf, alloc_size);
-			if (!nbf) {
-				err = -ENOMEM;
-				break;
-			}
-
-			bf = nbf;
-		}
-
-		n = read(fd, bf + size, alloc_size - size);
-		if (n < 0) {
-			if (size) {
-				pr_warn("read failed %d: %s\n", errno,
-					strerror_r(errno, sbuf, sizeof(sbuf)));
-				err = 0;
-			} else
-				err = -errno;
-
-			break;
-		}
-
-		size += n;
-	} while (n > 0);
-
-	if (!err) {
-		*sizep = size;
-		*buf   = bf;
+	io__init(&io, io.fd, bf, sizeof(bf));
+	*buf = NULL;
+	err = io__getdelim(&io, buf, sizep, /*delim=*/-1);
+	if (err < 0) {
+		free(*buf);
+		*buf = NULL;
 	} else
-		free(bf);
-
-	close(fd);
+		err = 0;
+	close(io.fd);
 	return err;
 }
 
@@ -475,15 +447,22 @@ int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
 
 int sysfs__read_bool(const char *entry, bool *value)
 {
-	char *buf;
-	size_t size;
-	int ret;
+	struct io io;
+	char bf[16];
+	int ret = 0;
+	char path[PATH_MAX];
+	const char *sysfs = sysfs__mountpoint();
+
+	if (!sysfs)
+		return -1;
 
-	ret = sysfs__read_str(entry, &buf, &size);
-	if (ret < 0)
-		return ret;
+	snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+	io.fd = open(path, O_RDONLY);
+	if (io.fd < 0)
+		return -errno;
 
-	switch (buf[0]) {
+	io__init(&io, io.fd, bf, sizeof(bf));
+	switch (io__get_char(&io)) {
 	case '1':
 	case 'y':
 	case 'Y':
@@ -497,8 +476,7 @@ int sysfs__read_bool(const char *entry, bool *value)
 	default:
 		ret = -1;
 	}
-
-	free(buf);
+	close(io.fd);
 
 	return ret;
 }
diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
index a77b74c5fb65..84adf8102018 100644
--- a/tools/lib/api/io.h
+++ b/tools/lib/api/io.h
@@ -12,6 +12,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <linux/types.h>
 
 struct io {
 	/* File descriptor being read/ */
@@ -140,8 +141,8 @@ static inline int io__get_dec(struct io *io, __u64 *dec)
 	}
 }
 
-/* Read up to and including the first newline following the pattern of getline. */
-static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
+/* Read up to and including the first delim. */
+static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_len_out, int delim)
 {
 	char buf[128];
 	int buf_pos = 0;
@@ -151,7 +152,7 @@ static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_l
 
 	/* TODO: reuse previously allocated memory. */
 	free(*line_out);
-	while (ch != '\n') {
+	while (ch != delim) {
 		ch = io__get_char(io);
 
 		if (ch < 0)
@@ -184,4 +185,9 @@ err_out:
 	return -ENOMEM;
 }
 
+static inline ssize_t io__getline(struct io *io, char **line_out, size_t *line_len_out)
+{
+	return io__getdelim(io, line_out, line_len_out, /*delim=*/'\n');
+}
+
 #endif /* __API_IO__ */
diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c
index 8e1a926a9cfe..bc142f0664b5 100644
--- a/tools/lib/perf/Documentation/examples/sampling.c
+++ b/tools/lib/perf/Documentation/examples/sampling.c
@@ -39,7 +39,7 @@ int main(int argc, char **argv)
 
 	libperf_init(libperf_print);
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	if (!cpus) {
 		fprintf(stderr, "failed to create cpus\n");
 		return -1;
diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
index d6ca24f6ef78..2378980fab8a 100644
--- a/tools/lib/perf/Documentation/libperf-sampling.txt
+++ b/tools/lib/perf/Documentation/libperf-sampling.txt
@@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs:
 
 [source,c]
 --
- 42         cpus = perf_cpu_map__new(NULL);
+ 42         cpus = perf_cpu_map__new_online_cpus();
  43         if (!cpus) {
  44                 fprintf(stderr, "failed to create cpus\n");
  45                 return -1;
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index a8f1a237931b..fcfb9499ef9c 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -37,7 +37,7 @@ SYNOPSIS
 
   struct perf_cpu_map;
 
-  struct perf_cpu_map *perf_cpu_map__dummy_new(void);
+  struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
   struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
   struct perf_cpu_map *perf_cpu_map__read(FILE *file);
   struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
@@ -46,7 +46,7 @@ SYNOPSIS
   void perf_cpu_map__put(struct perf_cpu_map *map);
   int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
   int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
-  bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+  bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
   int perf_cpu_map__max(struct perf_cpu_map *map);
   bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu);
 
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 2a5a29217374..4adcd7920d03 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -9,6 +9,7 @@
 #include <unistd.h>
 #include <ctype.h>
 #include <limits.h>
+#include "internal.h"
 
 void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
 {
@@ -27,7 +28,7 @@ struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
 	return result;
 }
 
-struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+struct perf_cpu_map *perf_cpu_map__new_any_cpu(void)
 {
 	struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
 
@@ -66,15 +67,21 @@ void perf_cpu_map__put(struct perf_cpu_map *map)
 	}
 }
 
-static struct perf_cpu_map *cpu_map__default_new(void)
+static struct perf_cpu_map *cpu_map__new_sysconf(void)
 {
 	struct perf_cpu_map *cpus;
-	int nr_cpus;
+	int nr_cpus, nr_cpus_conf;
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	if (nr_cpus < 0)
 		return NULL;
 
+	nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF);
+	if (nr_cpus != nr_cpus_conf) {
+		pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.",
+			nr_cpus, nr_cpus_conf, nr_cpus);
+	}
+
 	cpus = perf_cpu_map__alloc(nr_cpus);
 	if (cpus != NULL) {
 		int i;
@@ -86,9 +93,27 @@ static struct perf_cpu_map *cpu_map__default_new(void)
 	return cpus;
 }
 
-struct perf_cpu_map *perf_cpu_map__default_new(void)
+static struct perf_cpu_map *cpu_map__new_sysfs_online(void)
 {
-	return cpu_map__default_new();
+	struct perf_cpu_map *cpus = NULL;
+	FILE *onlnf;
+
+	onlnf = fopen("/sys/devices/system/cpu/online", "r");
+	if (onlnf) {
+		cpus = perf_cpu_map__read(onlnf);
+		fclose(onlnf);
+	}
+	return cpus;
+}
+
+struct perf_cpu_map *perf_cpu_map__new_online_cpus(void)
+{
+	struct perf_cpu_map *cpus = cpu_map__new_sysfs_online();
+
+	if (cpus)
+		return cpus;
+
+	return cpu_map__new_sysconf();
 }
 
 
@@ -180,27 +205,11 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
 
 	if (nr_cpus > 0)
 		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
-	else
-		cpus = cpu_map__default_new();
 out_free_tmp:
 	free(tmp_cpus);
 	return cpus;
 }
 
-static struct perf_cpu_map *cpu_map__read_all_cpu_map(void)
-{
-	struct perf_cpu_map *cpus = NULL;
-	FILE *onlnf;
-
-	onlnf = fopen("/sys/devices/system/cpu/online", "r");
-	if (!onlnf)
-		return cpu_map__default_new();
-
-	cpus = perf_cpu_map__read(onlnf);
-	fclose(onlnf);
-	return cpus;
-}
-
 struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 {
 	struct perf_cpu_map *cpus = NULL;
@@ -211,7 +220,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 	int max_entries = 0;
 
 	if (!cpu_list)
-		return cpu_map__read_all_cpu_map();
+		return perf_cpu_map__new_online_cpus();
 
 	/*
 	 * must handle the case of empty cpumap to cover
@@ -268,10 +277,12 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 
 	if (nr_cpus > 0)
 		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
-	else if (*cpu_list != '\0')
-		cpus = cpu_map__default_new();
-	else
-		cpus = perf_cpu_map__dummy_new();
+	else if (*cpu_list != '\0') {
+		pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.",
+			   cpu_list);
+		cpus = perf_cpu_map__new_online_cpus();
+	} else
+		cpus = perf_cpu_map__new_any_cpu();
 invalid:
 	free(tmp_cpus);
 out:
@@ -300,7 +311,7 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
 	return cpus ? __perf_cpu_map__nr(cpus) : 1;
 }
 
-bool perf_cpu_map__empty(const struct perf_cpu_map *map)
+bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
 {
 	return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
 }
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 3acbbccc1901..058e3ff10f9b 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -39,7 +39,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
 	if (evsel->system_wide) {
 		/* System wide: set the cpu map of the evsel to all online CPUs. */
 		perf_cpu_map__put(evsel->cpus);
-		evsel->cpus = perf_cpu_map__new(NULL);
+		evsel->cpus = perf_cpu_map__new_online_cpus();
 	} else if (evlist->has_user_cpus && evsel->is_pmu_core) {
 		/*
 		 * User requested CPUs on a core PMU, ensure the requested CPUs
@@ -619,7 +619,7 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
 
 	/* One for each CPU */
 	nr_mmaps = perf_cpu_map__nr(evlist->all_cpus);
-	if (perf_cpu_map__empty(evlist->all_cpus)) {
+	if (perf_cpu_map__has_any_cpu_or_is_empty(evlist->all_cpus)) {
 		/* Plus one for each thread */
 		nr_mmaps += perf_thread_map__nr(evlist->threads);
 		/* Minus the per-thread CPU (-1) */
@@ -653,7 +653,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
 	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
 		return -ENOMEM;
 
-	if (perf_cpu_map__empty(cpus))
+	if (perf_cpu_map__has_any_cpu_or_is_empty(cpus))
 		return mmap_per_thread(evlist, ops, mp);
 
 	return mmap_per_cpu(evlist, ops, mp);
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 8b51b008a81f..c07160953224 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -120,7 +120,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
 		static struct perf_cpu_map *empty_cpu_map;
 
 		if (empty_cpu_map == NULL) {
-			empty_cpu_map = perf_cpu_map__dummy_new();
+			empty_cpu_map = perf_cpu_map__new_any_cpu();
 			if (empty_cpu_map == NULL)
 				return -ENOMEM;
 		}
diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h
index 5a062af8e9d8..5f08cab61ece 100644
--- a/tools/lib/perf/include/internal/mmap.h
+++ b/tools/lib/perf/include/internal/mmap.h
@@ -33,7 +33,8 @@ struct perf_mmap {
 	bool			 overwrite;
 	u64			 flush;
 	libperf_unmap_cb_t	 unmap_cb;
-	char			 event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+	void			*event_copy;
+	size_t			 event_copy_sz;
 	struct perf_mmap	*next;
 };
 
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index e38d859a384d..228c6c629b0c 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -19,10 +19,23 @@ struct perf_cache {
 struct perf_cpu_map;
 
 /**
- * perf_cpu_map__dummy_new - a map with a singular "any CPU"/dummy -1 value.
+ * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value.
+ */
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
+/**
+ * perf_cpu_map__new_online_cpus - a map read from
+ *                                 /sys/devices/system/cpu/online if
+ *                                 available. If reading wasn't possible a map
+ *                                 is created using the online processors
+ *                                 assuming the first 'n' processors are all
+ *                                 online.
+ */
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void);
+/**
+ * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no
+ *                     cpu_list argument is provided then
+ *                     perf_cpu_map__new_online_cpus is returned.
  */
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
@@ -31,12 +44,23 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
 							 struct perf_cpu_map *other);
 LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index
+ *                     is invalid.
+ */
 LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+/**
+ * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a
+ *                    cpu of -1 for an invalid index, this makes an empty map
+ *                    look like it contains the "any CPU"/dummy value. Otherwise
+ *                    the result is the number CPUs in the map plus one if the
+ *                    "any CPU"/dummy value is present.
+ */
 LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
 /**
- * perf_cpu_map__empty - is map either empty or the "any CPU"/dummy value.
+ * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
  */
-LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
 LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
 LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
 LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
@@ -51,6 +75,12 @@ LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map);
 	     (idx) < perf_cpu_map__nr(cpus);			\
 	     (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx))
 
+#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus)	\
+	for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx);	\
+	     (idx) < perf_cpu_map__nr(cpus);			\
+	     (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx))	\
+		if ((_cpu).cpu != -1)
+
 #define perf_cpu_map__for_each_idx(idx, cpus)				\
 	for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++)
 
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 190b56ae923a..10b3f3722642 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -1,15 +1,15 @@
 LIBPERF_0.0.1 {
 	global:
 		libperf_init;
-		perf_cpu_map__dummy_new;
-		perf_cpu_map__default_new;
+		perf_cpu_map__new_any_cpu;
+		perf_cpu_map__new_online_cpus;
 		perf_cpu_map__get;
 		perf_cpu_map__put;
 		perf_cpu_map__new;
 		perf_cpu_map__read;
 		perf_cpu_map__nr;
 		perf_cpu_map__cpu;
-		perf_cpu_map__empty;
+		perf_cpu_map__has_any_cpu_or_is_empty;
 		perf_cpu_map__max;
 		perf_cpu_map__has;
 		perf_thread_map__new_array;
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index 2184814b37dd..0c903c2372c9 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -19,6 +19,7 @@
 void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
 		     bool overwrite, libperf_unmap_cb_t unmap_cb)
 {
+	/* Assume fields were zero initialized. */
 	map->fd = -1;
 	map->overwrite = overwrite;
 	map->unmap_cb  = unmap_cb;
@@ -51,13 +52,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
 
 void perf_mmap__munmap(struct perf_mmap *map)
 {
-	if (map && map->base != NULL) {
+	if (!map)
+		return;
+
+	zfree(&map->event_copy);
+	map->event_copy_sz = 0;
+	if (map->base) {
 		munmap(map->base, perf_mmap__mmap_len(map));
 		map->base = NULL;
 		map->fd = -1;
 		refcount_set(&map->refcnt, 0);
 	}
-	if (map && map->unmap_cb)
+	if (map->unmap_cb)
 		map->unmap_cb(map);
 }
 
@@ -223,9 +229,17 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
 		 */
 		if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
 			unsigned int offset = *startp;
-			unsigned int len = min(sizeof(*event), size), cpy;
+			unsigned int len = size, cpy;
 			void *dst = map->event_copy;
 
+			if (size > map->event_copy_sz) {
+				dst = realloc(map->event_copy, size);
+				if (!dst)
+					return NULL;
+				map->event_copy = dst;
+				map->event_copy_sz = size;
+			}
+
 			do {
 				cpy = min(map->mask + 1 - (offset & map->mask), len);
 				memcpy(dst, &data[offset & map->mask], cpy);
diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c
index 87b0510a556f..c998b1dae863 100644
--- a/tools/lib/perf/tests/test-cpumap.c
+++ b/tools/lib/perf/tests/test-cpumap.c
@@ -21,7 +21,7 @@ int test_cpumap(int argc, char **argv)
 
 	libperf_init(libperf_print);
 
-	cpus = perf_cpu_map__dummy_new();
+	cpus = perf_cpu_map__new_any_cpu();
 	if (!cpus)
 		return -1;
 
@@ -29,7 +29,7 @@ int test_cpumap(int argc, char **argv)
 	perf_cpu_map__put(cpus);
 	perf_cpu_map__put(cpus);
 
-	cpus = perf_cpu_map__default_new();
+	cpus = perf_cpu_map__new_online_cpus();
 	if (!cpus)
 		return -1;
 
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index ed616fc19b4f..10f70cb41ff1 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -46,7 +46,7 @@ static int test_stat_cpu(void)
 	};
 	int err, idx;
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	__T("failed to create cpus", cpus);
 
 	evlist = perf_evlist__new();
@@ -261,7 +261,7 @@ static int test_mmap_thread(void)
 	threads = perf_thread_map__new_dummy();
 	__T("failed to create threads", threads);
 
-	cpus = perf_cpu_map__dummy_new();
+	cpus = perf_cpu_map__new_any_cpu();
 	__T("failed to create cpus", cpus);
 
 	perf_thread_map__set_pid(threads, 0, pid);
@@ -350,7 +350,7 @@ static int test_mmap_cpus(void)
 
 	attr.config = id;
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	__T("failed to create cpus", cpus);
 
 	evlist = perf_evlist__new();
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
index a11fc51bfb68..545ec3150546 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -27,7 +27,7 @@ static int test_stat_cpu(void)
 	};
 	int err, idx;
 
-	cpus = perf_cpu_map__new(NULL);
+	cpus = perf_cpu_map__new_online_cpus();
 	__T("failed to create cpus", cpus);
 
 	evsel = perf_evsel__new(&attr);
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index adfbae27dc36..8561b0f01a24 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -52,11 +52,21 @@ void uniq(struct cmdnames *cmds)
 	if (!cmds->cnt)
 		return;
 
-	for (i = j = 1; i < cmds->cnt; i++)
-		if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
-			cmds->names[j++] = cmds->names[i];
-
+	for (i = 1; i < cmds->cnt; i++) {
+		if (!strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
+			zfree(&cmds->names[i - 1]);
+	}
+	for (i = 0, j = 0; i < cmds->cnt; i++) {
+		if (cmds->names[i]) {
+			if (i == j)
+				j++;
+			else
+				cmds->names[j++] = cmds->names[i];
+		}
+	}
 	cmds->cnt = j;
+	while (j < i)
+		cmds->names[j++] = NULL;
 }
 
 void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
author	Linus Torvalds <torvalds@linux-foundation.org>	2024-01-20 01:25:23 +0300
committer	Linus Torvalds <torvalds@linux-foundation.org>	2024-01-20 01:25:23 +0300
commit	9d64bf433c53cab2f48a3fff7a1f2a696bc5229a (patch)
tree	0535254c177c27ec34adbc153d494cea9a9625a0 /tools/lib
parent	57f22c8dab6b266ae36b89b073a4a33dea71e762 (diff)
parent	d988c9f511af71a3445b6a4f3a2c67208ff8e480 (diff)
download	linux-9d64bf433c53cab2f48a3fff7a1f2a696bc5229a.tar.xz