summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNamhyung Kim <namhyung@kernel.org>2025-02-27 22:12:21 +0300
committerNamhyung Kim <namhyung@kernel.org>2025-03-13 10:18:10 +0300
commit9c3344141866b83e1e339ab2dc2006e03c6e6cf2 (patch)
treec4bb5e571a521bd416282aedc89df3e67ae214d6
parent5b562763d78a99e26054268003e3d0952e3ea89f (diff)
downloadlinux-9c3344141866b83e1e339ab2dc2006e03c6e6cf2.tar.xz
perf ftrace: Fix latency stats with BPF
When BPF collects the stats for the latency in usec, it first divides the time by 1000. But that means it would have 0 if the delta is small and won't update the total time properly. Let's keep the stats in nsec always and adjust to usec before printing. Before: $ sudo ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1 # DURATION | COUNT | GRAPH | 0 - 1 us | 765 | ############################################# | 1 - 2 us | 10 | | 2 - 4 us | 2 | | 4 - 8 us | 5 | | # statistics (in usec) total time: 0 <<<--- (here) avg time: 0 max time: 6 min time: 0 count: 782 After: $ sudo ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1 # DURATION | COUNT | GRAPH | 0 - 1 us | 880 | ############################################ | 1 - 2 us | 13 | | 2 - 4 us | 8 | | 4 - 8 us | 3 | | # statistics (in usec) total time: 268 <<<--- (here) avg time: 0 max time: 6 min time: 0 count: 904 Tested-by: Athira Rajeev <atrajeev@linux.ibm.com> Cc: Gabriele Monaco <gmonaco@redhat.com> Link: https://lore.kernel.org/r/20250227191223.1288473-1-namhyung@kernel.org Signed-off-by: Namhyung Kim <namhyung@kernel.org>
-rw-r--r--tools/perf/util/bpf_ftrace.c8
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c20
2 files changed, 15 insertions, 13 deletions
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 51f407a782d6..7324668cc83e 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -128,7 +128,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
return 0;
}
-int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
+int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
int buckets[], struct stats *stats)
{
int i, fd, err;
@@ -158,6 +158,12 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
stats->n = skel->bss->count;
stats->max = skel->bss->max;
stats->min = skel->bss->min;
+
+ if (!ftrace->use_nsec) {
+ stats->mean /= 1000;
+ stats->max /= 1000;
+ stats->min /= 1000;
+ }
}
free(hist);
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index 09e70d40a0f4..3d3d9f427c20 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -102,6 +102,7 @@ int BPF_PROG(func_end)
start = bpf_map_lookup_elem(&functime, &tid);
if (start) {
__s64 delta = bpf_ktime_get_ns() - *start;
+ __u64 val = delta;
__u32 key = 0;
__u64 *hist;
@@ -111,26 +112,24 @@ int BPF_PROG(func_end)
return 0;
if (bucket_range != 0) {
- delta /= cmp_base;
+ val = delta / cmp_base;
if (min_latency > 0) {
- if (delta > min_latency)
- delta -= min_latency;
+ if (val > min_latency)
+ val -= min_latency;
else
goto do_lookup;
}
// Less than 1 unit (ms or ns), or, in the future,
// than the min latency desired.
- if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
- // clang 12 doesn't like s64 / u32 division
- key = (__u64)delta / bucket_range + 1;
+ if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+ key = val / bucket_range + 1;
if (key >= bucket_num ||
- delta >= max_latency - min_latency)
+ val >= max_latency - min_latency)
key = bucket_num - 1;
}
- delta += min_latency;
goto do_lookup;
}
// calculate index using delta
@@ -146,10 +145,7 @@ do_lookup:
*hist += 1;
- if (bucket_range == 0)
- delta /= cmp_base;
-
- __sync_fetch_and_add(&total, delta);
+ __sync_fetch_and_add(&total, delta); // always in nsec
__sync_fetch_and_add(&count, 1);
if (delta > max)