From 0259bf63f71e2accfeca4a4e346ede8edcc86aab Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Feb 2024 21:05:44 -0800 Subject: perf/core: Optimize perf_adjust_freq_unthr_context() It was unnecessarily disabling and enabling PMUs for each event. It should be done at PMU level. Add pmu_ctx->nr_freq counter to check it at each PMU. As PMU context has separate active lists for pinned group and flexible group, factor out a new function to do the job. Another minor optimization is that it can skip PMUs w/ CAP_NO_INTERRUPT even if it needs to unthrottle sampling events. Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Mingwei Zhang Reviewed-by: Ian Rogers Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20240207050545.2727923-1-namhyung@kernel.org --- include/linux/perf_event.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d2a15c0c6f8a..3e33b366347a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -883,6 +883,7 @@ struct perf_event_pmu_context { unsigned int nr_events; unsigned int nr_cgroups; + unsigned int nr_freq; atomic_t refcount; /* event <-> epc */ struct rcu_head rcu_head; @@ -897,6 +898,11 @@ struct perf_event_pmu_context { int rotate_necessary; }; +static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc) +{ + return !list_empty(&epc->flexible_active) || !list_empty(&epc->pinned_active); +} + struct perf_event_groups { struct rb_root tree; u64 index; -- cgit v1.2.3 From 14e40a9578b70cc5323e55f61292a7e021f6037c Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Thu, 11 Apr 2024 18:50:15 -0700 Subject: perf/bpf: Remove #ifdef CONFIG_BPF_SYSCALL from struct perf_event members This will allow __perf_event_overflow() (which is independent of CONFIG_BPF_SYSCALL) to use struct perf_event's prog to decide whether to call bpf_overflow_handler(). Suggested-by: Ingo Molnar Signed-off-by: Kyle Huey Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240412015019.7060-4-khuey@kylehuey.com --- include/linux/perf_event.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3e33b366347a..50e01db083ee 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -809,11 +809,9 @@ struct perf_event { u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; -#ifdef CONFIG_BPF_SYSCALL perf_overflow_handler_t orig_overflow_handler; struct bpf_prog *prog; u64 bpf_cookie; -#endif #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; -- cgit v1.2.3 From f11f10bfa1ca23b32020b2073aa13131a27978fe Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Thu, 11 Apr 2024 18:50:16 -0700 Subject: perf/bpf: Call BPF handler directly, not through overflow machinery To ultimately allow BPF programs attached to perf events to completely suppress all of the effects of a perf event overflow (rather than just the sample output, as they do today), call bpf_overflow_handler() from __perf_event_overflow() directly rather than modifying struct perf_event's overflow_handler. Return the BPF program's return value from bpf_overflow_handler() so that __perf_event_overflow() knows how to proceed. Remove the now unnecessary orig_overflow_handler from struct perf_event. This patch is solely a refactoring and results in no behavior change. Suggested-by: Namhyung Kim Signed-off-by: Kyle Huey Signed-off-by: Ingo Molnar Acked-by: Song Liu Acked-by: Jiri Olsa Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240412015019.7060-5-khuey@kylehuey.com --- include/linux/perf_event.h | 6 +----- kernel/events/core.c | 27 +++++++++++---------------- 2 files changed, 12 insertions(+), 21 deletions(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 50e01db083ee..2ce2fbc02ec6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -809,7 +809,6 @@ struct perf_event { u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; - perf_overflow_handler_t orig_overflow_handler; struct bpf_prog *prog; u64 bpf_cookie; @@ -1361,10 +1360,7 @@ __is_default_overflow_handler(perf_overflow_handler_t overflow_handler) #ifdef CONFIG_BPF_SYSCALL static inline bool uses_default_overflow_handler(struct perf_event *event) { - if (likely(is_default_overflow_handler(event))) - return true; - - return __is_default_overflow_handler(event->orig_overflow_handler); + return is_default_overflow_handler(event); } #else #define uses_default_overflow_handler(event) \ diff --git a/kernel/events/core.c b/kernel/events/core.c index d3f3f552e193..c6a6936183d5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9564,9 +9564,9 @@ static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *r } #ifdef CONFIG_BPF_SYSCALL -static void bpf_overflow_handler(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs) +static int bpf_overflow_handler(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) { struct bpf_perf_event_data_kern ctx = { .data = data, @@ -9587,10 +9587,8 @@ static void bpf_overflow_handler(struct perf_event *event, rcu_read_unlock(); out: __this_cpu_dec(bpf_prog_active); - if (!ret) - return; - event->orig_overflow_handler(event, data, regs); + return ret; } static int perf_event_set_bpf_handler(struct perf_event *event, @@ -9626,8 +9624,6 @@ static int perf_event_set_bpf_handler(struct perf_event *event, event->prog = prog; event->bpf_cookie = bpf_cookie; - event->orig_overflow_handler = READ_ONCE(event->overflow_handler); - WRITE_ONCE(event->overflow_handler, bpf_overflow_handler); return 0; } @@ -9638,15 +9634,15 @@ static void perf_event_free_bpf_handler(struct perf_event *event) if (!prog) return; - WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler); event->prog = NULL; bpf_prog_put(prog); } #else -static void bpf_overflow_handler(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs) +static int bpf_overflow_handler(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) { + return 1; } static int perf_event_set_bpf_handler(struct perf_event *event, @@ -9730,7 +9726,8 @@ static int __perf_event_overflow(struct perf_event *event, irq_work_queue(&event->pending_irq); } - READ_ONCE(event->overflow_handler)(event, data, regs); + if (!(event->prog && !bpf_overflow_handler(event, data, regs))) + READ_ONCE(event->overflow_handler)(event, data, regs); if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; @@ -11997,13 +11994,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, overflow_handler = parent_event->overflow_handler; context = parent_event->overflow_handler_context; #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING) - if (overflow_handler == bpf_overflow_handler) { + if (parent_event->prog) { struct bpf_prog *prog = parent_event->prog; bpf_prog_inc(prog); event->prog = prog; - event->orig_overflow_handler = - parent_event->orig_overflow_handler; } #endif } -- cgit v1.2.3 From 76f6d58845829e5d6ef55532e67a323e7d30c26e Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Thu, 11 Apr 2024 18:50:17 -0700 Subject: perf/bpf: Remove unneeded uses_default_overflow_handler() Now that struct perf_event's orig_overflow_handler is gone, there's no need for the functions and macros to support looking past overflow_handler to orig_overflow_handler. This patch is solely a refactoring and results in no behavior change. Signed-off-by: Kyle Huey Signed-off-by: Ingo Molnar Acked-by: Will Deacon Acked-by: Song Liu Acked-by: Jiri Olsa Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240412015019.7060-6-khuey@kylehuey.com --- arch/arm/kernel/hw_breakpoint.c | 8 ++++---- arch/arm64/kernel/hw_breakpoint.c | 4 ++-- include/linux/perf_event.h | 17 +++-------------- 3 files changed, 9 insertions(+), 20 deletions(-) (limited to 'include/linux/perf_event.h') diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index dc0fb7a81371..054e9199f30d 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -626,7 +626,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, hw->address &= ~alignment_mask; hw->ctrl.len <<= offset; - if (uses_default_overflow_handler(bp)) { + if (is_default_overflow_handler(bp)) { /* * Mismatch breakpoints are required for single-stepping * breakpoints. @@ -798,7 +798,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * Otherwise, insert a temporary mismatch breakpoint so that * we can single-step over the watchpoint trigger. */ - if (!uses_default_overflow_handler(wp)) + if (!is_default_overflow_handler(wp)) continue; step: enable_single_step(wp, instruction_pointer(regs)); @@ -811,7 +811,7 @@ step: info->trigger = addr; pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); perf_bp_event(wp, regs); - if (uses_default_overflow_handler(wp)) + if (is_default_overflow_handler(wp)) enable_single_step(wp, instruction_pointer(regs)); } @@ -886,7 +886,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) info->trigger = addr; pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); - if (uses_default_overflow_handler(bp)) + if (is_default_overflow_handler(bp)) enable_single_step(bp, addr); goto unlock; } diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 2f5755192c2b..722ac45f9f7b 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -655,7 +655,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr, perf_bp_event(bp, regs); /* Do we need to handle the stepping? */ - if (uses_default_overflow_handler(bp)) + if (is_default_overflow_handler(bp)) step = 1; unlock: rcu_read_unlock(); @@ -734,7 +734,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, static int watchpoint_report(struct perf_event *wp, unsigned long addr, struct pt_regs *regs) { - int step = uses_default_overflow_handler(wp); + int step = is_default_overflow_handler(wp); struct arch_hw_breakpoint *info = counter_arch_bp(wp); info->trigger = addr; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2ce2fbc02ec6..d5ff0c164875 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1345,8 +1345,10 @@ extern int perf_event_output(struct perf_event *event, struct pt_regs *regs); static inline bool -__is_default_overflow_handler(perf_overflow_handler_t overflow_handler) +is_default_overflow_handler(struct perf_event *event) { + perf_overflow_handler_t overflow_handler = event->overflow_handler; + if (likely(overflow_handler == perf_event_output_forward)) return true; if (unlikely(overflow_handler == perf_event_output_backward)) @@ -1354,19 +1356,6 @@ __is_default_overflow_handler(perf_overflow_handler_t overflow_handler) return false; } -#define is_default_overflow_handler(event) \ - __is_default_overflow_handler((event)->overflow_handler) - -#ifdef CONFIG_BPF_SYSCALL -static inline bool uses_default_overflow_handler(struct perf_event *event) -{ - return is_default_overflow_handler(event); -} -#else -#define uses_default_overflow_handler(event) \ - is_default_overflow_handler(event) -#endif - extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, -- cgit v1.2.3 From 4a013980666857c1eb2df6a2137817caa21d38a6 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Sat, 13 Apr 2024 07:16:16 -0700 Subject: perf: Move perf_event_fasync() to perf_event.h This will allow it to be called from perf_output_wakeup(). Signed-off-by: Kyle Huey Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20240413141618.4160-2-khuey@kylehuey.com --- include/linux/perf_event.h | 8 ++++++++ kernel/events/core.c | 8 -------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d5ff0c164875..a5304ae8c654 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1686,6 +1686,14 @@ perf_event_addr_filters(struct perf_event *event) return ifh; } +static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) +{ + /* Only the parent has fasync state */ + if (event->parent) + event = event->parent; + return &event->fasync; +} + extern void perf_event_addr_filters_sync(struct perf_event *event); extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id); diff --git a/kernel/events/core.c b/kernel/events/core.c index 6708c1121b9f..da9d9a1f4dca 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6703,14 +6703,6 @@ static const struct file_operations perf_fops = { * to user-space before waking everybody up. */ -static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) -{ - /* only the parent has fasync state */ - if (event->parent) - event = event->parent; - return &event->fasync; -} - void perf_event_wakeup(struct perf_event *event) { ring_buffer_wakeup(event); -- cgit v1.2.3