diff options
author | Namhyung Kim <namhyung@kernel.org> | 2025-02-27 22:12:21 +0300 |
---|---|---|
committer | Namhyung Kim <namhyung@kernel.org> | 2025-03-13 10:18:10 +0300 |
commit | 9c3344141866b83e1e339ab2dc2006e03c6e6cf2 (patch) | |
tree | c4bb5e571a521bd416282aedc89df3e67ae214d6 | |
parent | 5b562763d78a99e26054268003e3d0952e3ea89f (diff) | |
download | linux-9c3344141866b83e1e339ab2dc2006e03c6e6cf2.tar.xz |
perf ftrace: Fix latency stats with BPF
When BPF collects the stats for the latency in usec, it first divides
the time by 1000. But that means it would have 0 if the delta is small
and won't update the total time properly.
Let's keep the stats in nsec always and adjust to usec before printing.
Before:
$ sudo ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1
# DURATION | COUNT | GRAPH |
0 - 1 us | 765 | ############################################# |
1 - 2 us | 10 | |
2 - 4 us | 2 | |
4 - 8 us | 5 | |
# statistics (in usec)
total time: 0 <<<--- (here)
avg time: 0
max time: 6
min time: 0
count: 782
After:
$ sudo ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1
# DURATION | COUNT | GRAPH |
0 - 1 us | 880 | ############################################ |
1 - 2 us | 13 | |
2 - 4 us | 8 | |
4 - 8 us | 3 | |
# statistics (in usec)
total time: 268 <<<--- (here)
avg time: 0
max time: 6
min time: 0
count: 904
Tested-by: Athira Rajeev <atrajeev@linux.ibm.com>
Cc: Gabriele Monaco <gmonaco@redhat.com>
Link: https://lore.kernel.org/r/20250227191223.1288473-1-namhyung@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
-rw-r--r-- | tools/perf/util/bpf_ftrace.c | 8 | ||||
-rw-r--r-- | tools/perf/util/bpf_skel/func_latency.bpf.c | 20 |
2 files changed, 15 insertions, 13 deletions
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index 51f407a782d6..7324668cc83e 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -128,7 +128,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) return 0; } -int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, int buckets[], struct stats *stats) { int i, fd, err; @@ -158,6 +158,12 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, stats->n = skel->bss->count; stats->max = skel->bss->max; stats->min = skel->bss->min; + + if (!ftrace->use_nsec) { + stats->mean /= 1000; + stats->max /= 1000; + stats->min /= 1000; + } } free(hist); diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index 09e70d40a0f4..3d3d9f427c20 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -102,6 +102,7 @@ int BPF_PROG(func_end) start = bpf_map_lookup_elem(&functime, &tid); if (start) { __s64 delta = bpf_ktime_get_ns() - *start; + __u64 val = delta; __u32 key = 0; __u64 *hist; @@ -111,26 +112,24 @@ int BPF_PROG(func_end) return 0; if (bucket_range != 0) { - delta /= cmp_base; + val = delta / cmp_base; if (min_latency > 0) { - if (delta > min_latency) - delta -= min_latency; + if (val > min_latency) + val -= min_latency; else goto do_lookup; } // Less than 1 unit (ms or ns), or, in the future, // than the min latency desired. - if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units ) - // clang 12 doesn't like s64 / u32 division - key = (__u64)delta / bucket_range + 1; + if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) + key = val / bucket_range + 1; if (key >= bucket_num || - delta >= max_latency - min_latency) + val >= max_latency - min_latency) key = bucket_num - 1; } - delta += min_latency; goto do_lookup; } // calculate index using delta @@ -146,10 +145,7 @@ do_lookup: *hist += 1; - if (bucket_range == 0) - delta /= cmp_base; - - __sync_fetch_and_add(&total, delta); + __sync_fetch_and_add(&total, delta); // always in nsec __sync_fetch_and_add(&count, 1); if (delta > max) |