diff options
Diffstat (limited to 'tools/perf')
-rw-r--r-- | tools/perf/builtin-stat.c | 18 | ||||
-rw-r--r-- | tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json | 10 | ||||
-rw-r--r-- | tools/perf/tests/perf-record.c | 4 | ||||
-rwxr-xr-x | tools/perf/tests/shell/record_lbr.sh | 29 | ||||
-rwxr-xr-x | tools/perf/tests/shell/stat.sh | 29 | ||||
-rwxr-xr-x | tools/perf/tests/shell/trace_btf_enum.sh | 11 | ||||
-rw-r--r-- | tools/perf/util/arm-spe-decoder/arm-spe-decoder.h | 18 | ||||
-rw-r--r-- | tools/perf/util/arm-spe.c | 34 | ||||
-rw-r--r-- | tools/perf/util/disasm.c | 7 | ||||
-rw-r--r-- | tools/perf/util/evsel.c | 31 | ||||
-rw-r--r-- | tools/perf/util/lzma.c | 2 | ||||
-rw-r--r-- | tools/perf/util/session.c | 2 | ||||
-rw-r--r-- | tools/perf/util/setup.py | 5 | ||||
-rw-r--r-- | tools/perf/util/zlib.c | 2 |
14 files changed, 152 insertions, 50 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 628c61397d2d..b578930ed76a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -639,8 +639,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) * (behavior changed with commit b0a873e). */ if (errno == EINVAL || errno == ENOSYS || - errno == ENOENT || errno == EOPNOTSUPP || - errno == ENXIO) { + errno == ENOENT || errno == ENXIO) { if (verbose > 0) ui__warning("%s event is not supported by the kernel.\n", evsel__name(counter)); @@ -658,7 +657,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) if (verbose > 0) ui__warning("%s\n", msg); return COUNTER_RETRY; - } else if (target__has_per_thread(&target) && + } else if (target__has_per_thread(&target) && errno != EOPNOTSUPP && evsel_list->core.threads && evsel_list->core.threads->err_thread != -1) { /* @@ -679,6 +678,19 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) return COUNTER_SKIP; } + if (errno == EOPNOTSUPP) { + if (verbose > 0) { + ui__warning("%s event is not supported by the kernel.\n", + evsel__name(counter)); + } + counter->supported = false; + counter->errored = true; + + if ((evsel__leader(counter) != counter) || + !(counter->core.leader->nr_members > 1)) + return COUNTER_SKIP; + } + evsel__open_strerror(counter, &target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json index 5228f94a793f..6817cac149e0 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json @@ -113,7 +113,7 @@ { "MetricName": "load_store_spec_rate", "MetricExpr": "LDST_SPEC / INST_SPEC", - "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed", + "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speculatively executed", "MetricGroup": "Operation_Mix", "ScaleUnit": "100percent of operations" }, @@ -132,7 +132,7 @@ { "MetricName": "pc_write_spec_rate", "MetricExpr": "PC_WRITE_SPEC / INST_SPEC", - "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed", + "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speculatively executed", "MetricGroup": "Operation_Mix", "ScaleUnit": "100percent of operations" }, @@ -195,14 +195,14 @@ { "MetricName": "stall_frontend_cache_rate", "MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES", - "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss", + "BriefDescription": "Proportion of cycles stalled and no operations delivered from frontend and cache miss", "MetricGroup": "Stall", "ScaleUnit": "100percent of cycles" }, { "MetricName": "stall_frontend_tlb_rate", "MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES", - "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss", + "BriefDescription": "Proportion of cycles stalled and no operations delivered from frontend and TLB miss", "MetricGroup": "Stall", "ScaleUnit": "100percent of cycles" }, @@ -391,7 +391,7 @@ "ScaleUnit": "100percent of cache acceses" }, { - "MetricName": "l1d_cache_access_prefetces", + "MetricName": "l1d_cache_access_prefetches", "MetricExpr": "L1D_CACHE_PRFM / L1D_CACHE", "BriefDescription": "L1D cache access - prefetch", "MetricGroup": "Cache", diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 1c4feec1adff..6e7f053006b4 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -115,6 +115,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest if (err < 0) { pr_debug("sched__get_first_possible_cpu: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); + evlist__cancel_workload(evlist); goto out_delete_evlist; } @@ -126,6 +127,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) { pr_debug("sched_setaffinity: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); + evlist__cancel_workload(evlist); goto out_delete_evlist; } @@ -137,6 +139,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest if (err < 0) { pr_debug("perf_evlist__open: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); + evlist__cancel_workload(evlist); goto out_delete_evlist; } @@ -149,6 +152,7 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest if (err < 0) { pr_debug("evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); + evlist__cancel_workload(evlist); goto out_delete_evlist; } diff --git a/tools/perf/tests/shell/record_lbr.sh b/tools/perf/tests/shell/record_lbr.sh index 32314641217e..5984ca9b78f8 100755 --- a/tools/perf/tests/shell/record_lbr.sh +++ b/tools/perf/tests/shell/record_lbr.sh @@ -4,7 +4,12 @@ set -e -if [ ! -f /sys/devices/cpu/caps/branches ] && [ ! -f /sys/devices/cpu_core/caps/branches ] +ParanoidAndNotRoot() { + [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ] +} + +if [ ! -f /sys/bus/event_source/devices/cpu/caps/branches ] && + [ ! -f /sys/bus/event_source/devices/cpu_core/caps/branches ] then echo "Skip: only x86 CPUs support LBR" exit 2 @@ -22,6 +27,7 @@ cleanup() { } trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" cleanup exit 1 } @@ -122,8 +128,11 @@ lbr_test "-j ind_call" "any indirect call" 2 lbr_test "-j ind_jmp" "any indirect jump" 100 lbr_test "-j call" "direct calls" 2 lbr_test "-j ind_call,u" "any indirect user call" 100 -lbr_test "-a -b" "system wide any branch" 2 -lbr_test "-a -j any_call" "system wide any call" 2 +if ! ParanoidAndNotRoot 1 +then + lbr_test "-a -b" "system wide any branch" 2 + lbr_test "-a -j any_call" "system wide any call" 2 +fi # Parallel parallel_lbr_test "-b" "parallel any branch" 100 & @@ -140,10 +149,16 @@ parallel_lbr_test "-j call" "parallel direct calls" 100 & pid6=$! parallel_lbr_test "-j ind_call,u" "parallel any indirect user call" 100 & pid7=$! -parallel_lbr_test "-a -b" "parallel system wide any branch" 100 & -pid8=$! -parallel_lbr_test "-a -j any_call" "parallel system wide any call" 100 & -pid9=$! +if ParanoidAndNotRoot 1 +then + pid8= + pid9= +else + parallel_lbr_test "-a -b" "parallel system wide any branch" 100 & + pid8=$! + parallel_lbr_test "-a -j any_call" "parallel system wide any call" 100 & + pid9=$! +fi for pid in $pid1 $pid2 $pid3 $pid4 $pid5 $pid6 $pid7 $pid8 $pid9 do diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh index 3f1e67795490..62f13dfeae8e 100755 --- a/tools/perf/tests/shell/stat.sh +++ b/tools/perf/tests/shell/stat.sh @@ -146,6 +146,34 @@ test_cputype() { echo "cputype test [Success]" } +test_hybrid() { + # Test the default stat command on hybrid devices opens one cycles event for + # each CPU type. + echo "hybrid test" + + # Count the number of core PMUs, assume minimum of 1 + pmus=$(ls /sys/bus/event_source/devices/*/cpus 2>/dev/null | wc -l) + if [ "$pmus" -lt 1 ] + then + pmus=1 + fi + + # Run default Perf stat + cycles_events=$(perf stat -- true 2>&1 | grep -E "/cycles/[uH]*| cycles[:uH]* " -c) + + # The expectation is that default output will have a cycles events on each + # hybrid PMU. In situations with no cycles PMU events, like virtualized, this + # can fall back to task-clock and so the end count may be 0. Fail if neither + # condition holds. + if [ "$pmus" -ne "$cycles_events" ] && [ "0" -ne "$cycles_events" ] + then + echo "hybrid test [Found $pmus PMUs but $cycles_events cycles events. Failed]" + err=1 + return + fi + echo "hybrid test [Success]" +} + test_default_stat test_stat_record_report test_stat_record_script @@ -153,4 +181,5 @@ test_stat_repeat_weak_groups test_topdown_groups test_topdown_weak_groups test_cputype +test_hybrid exit $err diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh index 8d1e6bbeac90..1447d7425f38 100755 --- a/tools/perf/tests/shell/trace_btf_enum.sh +++ b/tools/perf/tests/shell/trace_btf_enum.sh @@ -23,6 +23,14 @@ check_vmlinux() { fi } +check_permissions() { + if perf trace -e $syscall $TESTPROG 2>&1 | grep -q "Operation not permitted" + then + echo "trace+enum test [Skipped permissions]" + err=2 + fi +} + trace_landlock() { echo "Tracing syscall ${syscall}" @@ -54,6 +62,9 @@ trace_non_syscall() { } check_vmlinux +if [ $err = 0 ]; then + check_permissions +fi if [ $err = 0 ]; then trace_landlock diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 1443c28545a9..358c611eeddb 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -56,15 +56,15 @@ enum arm_spe_op_type { ARM_SPE_OP_BR_INDIRECT = 1 << 17, }; -enum arm_spe_neoverse_data_source { - ARM_SPE_NV_L1D = 0x0, - ARM_SPE_NV_L2 = 0x8, - ARM_SPE_NV_PEER_CORE = 0x9, - ARM_SPE_NV_LOCAL_CLUSTER = 0xa, - ARM_SPE_NV_SYS_CACHE = 0xb, - ARM_SPE_NV_PEER_CLUSTER = 0xc, - ARM_SPE_NV_REMOTE = 0xd, - ARM_SPE_NV_DRAM = 0xe, +enum arm_spe_common_data_source { + ARM_SPE_COMMON_DS_L1D = 0x0, + ARM_SPE_COMMON_DS_L2 = 0x8, + ARM_SPE_COMMON_DS_PEER_CORE = 0x9, + ARM_SPE_COMMON_DS_LOCAL_CLUSTER = 0xa, + ARM_SPE_COMMON_DS_SYS_CACHE = 0xb, + ARM_SPE_COMMON_DS_PEER_CLUSTER = 0xc, + ARM_SPE_COMMON_DS_REMOTE = 0xd, + ARM_SPE_COMMON_DS_DRAM = 0xe, }; struct arm_spe_record { diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 2c06f2a85400..9890b17241c3 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -411,15 +411,15 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, return arm_spe_deliver_synth_event(spe, speq, event, &sample); } -static const struct midr_range neoverse_spe[] = { +static const struct midr_range common_ds_encoding_cpus[] = { MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), {}, }; -static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { /* * Even though four levels of cache hierarchy are possible, no known @@ -441,17 +441,17 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } switch (record->source) { - case ARM_SPE_NV_L1D: + case ARM_SPE_COMMON_DS_L1D: data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_L2: + case ARM_SPE_COMMON_DS_L2: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_PEER_CORE: + case ARM_SPE_COMMON_DS_PEER_CORE: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -460,8 +460,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know if this is L1, L2 but we do know it was a cache-2-cache * transfer, so set SNOOPX_PEER */ - case ARM_SPE_NV_LOCAL_CLUSTER: - case ARM_SPE_NV_PEER_CLUSTER: + case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: + case ARM_SPE_COMMON_DS_PEER_CLUSTER: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -469,7 +469,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec /* * System cache is assumed to be L3 */ - case ARM_SPE_NV_SYS_CACHE: + case ARM_SPE_COMMON_DS_SYS_CACHE: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoop = PERF_MEM_SNOOP_HIT; @@ -478,13 +478,13 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know what level it hit in, except it came from the other * socket */ - case ARM_SPE_NV_REMOTE: - data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; - data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + case ARM_SPE_COMMON_DS_REMOTE: + data_src->mem_lvl = PERF_MEM_LVL_NA; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; break; - case ARM_SPE_NV_DRAM: + case ARM_SPE_COMMON_DS_DRAM: data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; @@ -514,13 +514,13 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco } if (record->type & ARM_SPE_REMOTE_ACCESS) - data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; } static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) { union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; - bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); + bool is_common = is_midr_in_range_list(midr, common_ds_encoding_cpus); /* Only synthesize data source for LDST operations */ if (!is_ldst_op(record->op)) @@ -533,8 +533,8 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m else return 0; - if (is_neoverse) - arm_spe__synth_data_source_neoverse(record, &data_src); + if (is_common) + arm_spe__synth_data_source_common(record, &data_src); else arm_spe__synth_data_source_generic(record, &data_src); diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index 648e8d87ef19..a228a7ba30ca 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -389,13 +389,16 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s * skip over possible up to 2 operands to get to address, e.g.: * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> */ - if (c++ != NULL) { + if (c != NULL) { + c++; ops->target.addr = strtoull(c, NULL, 16); if (!ops->target.addr) { c = strchr(c, ','); c = validate_comma(c, ops); - if (c++ != NULL) + if (c != NULL) { + c++; ops->target.addr = strtoull(c, NULL, 16); + } } } else { ops->target.addr = strtoull(ops->raw, NULL, 16); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6d7249cc1a99..dda107b12b8c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3237,7 +3237,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err, /* If event has exclude user then don't exclude kernel. */ if (evsel->core.attr.exclude_user) - return false; + goto no_fallback; /* Is there already the separator in the name. */ if (strchr(name, '/') || @@ -3245,7 +3245,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err, sep = ""; if (asprintf(&new_name, "%s%su", name, sep) < 0) - return false; + goto no_fallback; free(evsel->name); evsel->name = new_name; @@ -3256,8 +3256,31 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err, evsel->core.attr.exclude_hv = 1; return true; - } + } else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest && + !evsel->exclude_GH) { + const char *name = evsel__name(evsel); + char *new_name; + const char *sep = ":"; + + /* Is there already the separator in the name. */ + if (strchr(name, '/') || + (strchr(name, ':') && !evsel->is_libpfm_event)) + sep = ""; + + if (asprintf(&new_name, "%s%sH", name, sep) < 0) + goto no_fallback; + free(evsel->name); + evsel->name = new_name; + /* Apple M1 requires exclude_guest */ + scnprintf(msg, msgsize, "Trying to fall back to excluding guest samples"); + evsel->core.attr.exclude_guest = 1; + + return true; + } +no_fallback: + scnprintf(msg, msgsize, "No fallback found for '%s' for error %d", + evsel__name(evsel), err); return false; } @@ -3497,6 +3520,8 @@ bool evsel__is_hybrid(const struct evsel *evsel) struct evsel *evsel__leader(const struct evsel *evsel) { + if (evsel->core.leader == NULL) + return NULL; return container_of(evsel->core.leader, struct evsel, core); } diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index af9a97612f9d..f61574d1581e 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -113,7 +113,7 @@ bool lzma_is_compressed(const char *input) ssize_t rc; if (fd < 0) - return -1; + return false; rc = read(fd, buf, sizeof(buf)); close(fd); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dbaf07bf6c5f..89e5354fa094 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1371,7 +1371,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, const struct perf_tool *tool = session->tool; struct perf_sample sample = { .time = 0, }; int fd = perf_data__fd(session->data); - int err; + s64 err; if (event->header.type != PERF_RECORD_COMPRESSED || perf_tool__compressed_is_stub(tool)) dump_event(session->evlist, event, file_offset, &sample, file_path); diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 649550e9b7aa..abb567de3e8a 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,6 +1,7 @@ from os import getenv, path from subprocess import Popen, PIPE from re import sub +import shlex cc = getenv("CC") @@ -16,7 +17,9 @@ cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline src_feature_tests = getenv('srctree') + '/tools/build/feature' def clang_has_option(option): - cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines() + cmd = shlex.split(f"{cc} {cc_options} {option}") + cmd.append(path.join(src_feature_tests, "test-hello.c")) + cc_output = Popen(cmd, stderr=PIPE).stderr.readlines() return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o) or (b"unknown warning option" in o))] == [ ] if cc_is_clang: diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 78d2297c1b67..1f7c06523059 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -88,7 +88,7 @@ bool gzip_is_compressed(const char *input) ssize_t rc; if (fd < 0) - return -1; + return false; rc = read(fd, buf, sizeof(buf)); close(fd); |