From 96aaab686505c449e24d76e76507290dcc30e008 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:28 +0900 Subject: perf/core: Add PERF_RECORD_CGROUP event To support cgroup tracking, add CGROUP event to save a link between cgroup path and id number. This is needed since cgroups can go away when userspace tries to read the cgroup info (from the id) later. The attr.cgroup bit was also added to enable cgroup tracking from userspace. This event will be generated when a new cgroup becomes active. Userspace might need to synthesize those events for existing cgroups. Committer testing: From the resulting kernel, using /sys/kernel/btf/vmlinux: $ pahole perf_event_attr | grep -w cgroup -B5 -A1 __u64 write_backward:1; /* 40:27 8 */ __u64 namespaces:1; /* 40:28 8 */ __u64 ksymbol:1; /* 40:29 8 */ __u64 bpf_event:1; /* 40:30 8 */ __u64 aux_output:1; /* 40:31 8 */ __u64 cgroup:1; /* 40:32 8 */ __u64 __reserved_1:31; /* 40:33 8 */ $ Reported-by: kbuild test robot Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo [staticize perf_event_cgroup function] Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Johannes Weiner Cc: Mark Rutland Cc: Zefan Li Link: http://lore.kernel.org/lkml/20200325124536.2800725-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 397cfd65b3fe..de95f6c7b273 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -381,7 +381,8 @@ struct perf_event_attr { ksymbol : 1, /* include ksymbol events */ bpf_event : 1, /* include bpf events */ aux_output : 1, /* generate AUX records instead of events */ - __reserved_1 : 32; + cgroup : 1, /* include cgroup events */ + __reserved_1 : 31; union { __u32 wakeup_events; /* wakeup every n events */ @@ -1012,6 +1013,16 @@ enum perf_event_type { */ PERF_RECORD_BPF_EVENT = 18, + /* + * struct { + * struct perf_event_header header; + * u64 id; + * char path[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CGROUP = 19, + PERF_RECORD_MAX, /* non-ABI */ }; -- cgit v1.2.3 From 6546b19f95acc986807de981402bbac6b3a94b0f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 25 Mar 2020 21:45:29 +0900 Subject: perf/core: Add PERF_SAMPLE_CGROUP feature The PERF_SAMPLE_CGROUP bit is to save (perf_event) cgroup information in the sample. It will add a 64-bit id to identify current cgroup and it's the file handle in the cgroup file system. Userspace should use this information with PERF_RECORD_CGROUP event to match which cgroup it belongs. I put it before PERF_SAMPLE_AUX for simplicity since it just needs a 64-bit word. But if we want bigger samples, I can work on that direction too. Committer testing: $ pahole perf_sample_data | grep -w cgroup -B5 -A5 /* --- cacheline 4 boundary (256 bytes) was 56 bytes ago --- */ struct perf_regs regs_intr; /* 312 16 */ /* --- cacheline 5 boundary (320 bytes) was 8 bytes ago --- */ u64 stack_user_size; /* 328 8 */ u64 phys_addr; /* 336 8 */ u64 cgroup; /* 344 8 */ /* size: 384, cachelines: 6, members: 22 */ /* padding: 32 */ }; $ Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Johannes Weiner Cc: Mark Rutland Cc: Zefan Li Link: http://lore.kernel.org/lkml/20200325124536.2800725-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 1 + include/uapi/linux/perf_event.h | 3 ++- init/Kconfig | 3 ++- kernel/events/core.c | 22 ++++++++++++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8768a39b5258..9c3e7619c929 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1020,6 +1020,7 @@ struct perf_sample_data { u64 stack_user_size; u64 phys_addr; + u64 cgroup; } ____cacheline_aligned; /* default value for data source */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index de95f6c7b273..7b2d6fc9e6ed 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -142,8 +142,9 @@ enum perf_event_sample_format { PERF_SAMPLE_REGS_INTR = 1U << 18, PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, + PERF_SAMPLE_CGROUP = 1U << 21, - PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; diff --git a/init/Kconfig b/init/Kconfig index 20a6ac33761c..7766b06a0038 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1027,7 +1027,8 @@ config CGROUP_PERF help This option extends the perf per-cpu mode to restrict monitoring to threads which belong to the cgroup specified and run on the - designated cpu. + designated cpu. Or this can be used to have cgroup ID in samples + so that it can monitor performance events among cgroups. Say N if unsure. diff --git a/kernel/events/core.c b/kernel/events/core.c index 994932d5e474..1569979c8912 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1862,6 +1862,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) if (sample_type & PERF_SAMPLE_PHYS_ADDR) size += sizeof(data->phys_addr); + if (sample_type & PERF_SAMPLE_CGROUP) + size += sizeof(data->cgroup); + event->header_size = size; } @@ -6867,6 +6870,9 @@ void perf_output_sample(struct perf_output_handle *handle, if (sample_type & PERF_SAMPLE_PHYS_ADDR) perf_output_put(handle, data->phys_addr); + if (sample_type & PERF_SAMPLE_CGROUP) + perf_output_put(handle, data->cgroup); + if (sample_type & PERF_SAMPLE_AUX) { perf_output_put(handle, data->aux_size); @@ -7066,6 +7072,16 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_PHYS_ADDR) data->phys_addr = perf_virt_to_phys(data->addr); +#ifdef CONFIG_CGROUP_PERF + if (sample_type & PERF_SAMPLE_CGROUP) { + struct cgroup *cgrp; + + /* protected by RCU */ + cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup; + data->cgroup = cgroup_id(cgrp); + } +#endif + if (sample_type & PERF_SAMPLE_AUX) { u64 size; @@ -11264,6 +11280,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (attr->sample_type & PERF_SAMPLE_REGS_INTR) ret = perf_reg_validate(attr->sample_regs_intr); + +#ifndef CONFIG_CGROUP_PERF + if (attr->sample_type & PERF_SAMPLE_CGROUP) + return -EINVAL; +#endif + out: return ret; -- cgit v1.2.3