// SPDX-License-Identifier: GPL-2.0 /* * Trace raw_syscalls tracepoints to collect system call statistics. */ #include "vmlinux.h" #include "syscall_summary.h" #include #include #include /* This is to calculate a delta between sys-enter and sys-exit for each thread */ struct syscall_trace { int nr; /* syscall number is only available at sys-enter */ int unused; u64 timestamp; }; #define MAX_ENTRIES (128 * 1024) struct syscall_trace_map { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); /* tid */ __type(value, struct syscall_trace); __uint(max_entries, MAX_ENTRIES); } syscall_trace_map SEC(".maps"); struct syscall_stats_map { __uint(type, BPF_MAP_TYPE_HASH); __type(key, struct syscall_key); __type(value, struct syscall_stats); __uint(max_entries, MAX_ENTRIES); } syscall_stats_map SEC(".maps"); int enabled; /* controlled from userspace */ const volatile enum syscall_aggr_mode aggr_mode; const volatile int use_cgroup_v2; int perf_subsys_id = -1; static inline __u64 get_current_cgroup_id(void) { struct task_struct *task; struct cgroup *cgrp; if (use_cgroup_v2) return bpf_get_current_cgroup_id(); task = bpf_get_current_task_btf(); if (perf_subsys_id == -1) { #if __has_builtin(__builtin_preserve_enum_value) perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, perf_event_cgrp_id); #else perf_subsys_id = perf_event_cgrp_id; #endif } cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup); return BPF_CORE_READ(cgrp, kn, id); } static void update_stats(int cpu_or_tid, u64 cgroup_id, int nr, s64 duration, long ret) { struct syscall_key key = { .cpu_or_tid = cpu_or_tid, .cgroup = cgroup_id, .nr = nr, }; struct syscall_stats *stats; stats = bpf_map_lookup_elem(&syscall_stats_map, &key); if (stats == NULL) { struct syscall_stats zero = {}; bpf_map_update_elem(&syscall_stats_map, &key, &zero, BPF_NOEXIST); stats = bpf_map_lookup_elem(&syscall_stats_map, &key); if (stats == NULL) return; } __sync_fetch_and_add(&stats->count, 1); if (ret < 0) __sync_fetch_and_add(&stats->error, 1); if (duration > 0) { __sync_fetch_and_add(&stats->total_time, duration); __sync_fetch_and_add(&stats->squared_sum, duration * duration); if (stats->max_time < duration) stats->max_time = duration; if (stats->min_time > duration || stats->min_time == 0) stats->min_time = duration; } return; } SEC("tp_btf/sys_enter") int sys_enter(u64 *ctx) { int tid; struct syscall_trace st; if (!enabled) return 0; st.nr = ctx[1]; /* syscall number */ st.unused = 0; st.timestamp = bpf_ktime_get_ns(); tid = bpf_get_current_pid_tgid(); bpf_map_update_elem(&syscall_trace_map, &tid, &st, BPF_ANY); return 0; } SEC("tp_btf/sys_exit") int sys_exit(u64 *ctx) { int tid; int key = 0; u64 cgroup = 0; long ret = ctx[1]; /* return value of the syscall */ struct syscall_trace *st; s64 delta; if (!enabled) return 0; tid = bpf_get_current_pid_tgid(); st = bpf_map_lookup_elem(&syscall_trace_map, &tid); if (st == NULL) return 0; if (aggr_mode == SYSCALL_AGGR_THREAD) key = tid; else if (aggr_mode == SYSCALL_AGGR_CGROUP) cgroup = get_current_cgroup_id(); else key = bpf_get_smp_processor_id(); delta = bpf_ktime_get_ns() - st->timestamp; update_stats(key, cgroup, st->nr, delta, ret); bpf_map_delete_elem(&syscall_trace_map, &tid); return 0; } char _license[] SEC("license") = "GPL";