From 516792e67c39d31701641ab355acdb9cbfec0643 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 31 Aug 2015 15:12:56 +0300 Subject: perf/core: Delete PF_EXITING checks from perf_cgroup_exit() callback cgroup_exit() is not called from copy_process() after commit: e8604cb43690 ("cgroup: fix spurious lockdep warning in cgroup_exit()") from do_exit(). So this check is useless and the comment is obsolete. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/55E444C8.3020402@odin.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index f548f69c4299..76e64be9bfb5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9297,14 +9297,6 @@ static void perf_cgroup_exit(struct cgroup_subsys_state *css, struct cgroup_subsys_state *old_css, struct task_struct *task) { - /* - * cgroup_exit() is called in the copy_process() failure path. - * Ignore this case since the task hasn't ran yet, this avoids - * trying to poke a half freed task state from generic code. - */ - if (!(task->flags & PF_EXITING)) - return; - task_function_call(task, __perf_cgroup_move, task); } -- cgit v1.2.3 From fbbe07011581990ef74dfac06dc8511b1a14badb Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:45 -0700 Subject: perf/core: Add a 'flags' parameter to the PMU transactional interfaces Currently, the PMU interface allows reading only one counter at a time. But some PMUs like the 24x7 counters in Power, support reading several counters at once. To leveage this functionality, extend the transaction interface to support a "transaction type". The first type, PERF_PMU_TXN_ADD, refers to the existing transactions, i.e. used to _schedule_ all the events on the PMU as a group. A second transaction type, PERF_PMU_TXN_READ, will be used in a follow-on patch, by the 24x7 counters to read several counters at once. Extend the transaction interfaces to the PMU to accept a 'txn_flags' parameter and use this parameter to ignore any transactions that are not of type PERF_PMU_TXN_ADD. Thanks to Peter Zijlstra for his input. Signed-off-by: Sukadev Bhattiprolu [peterz: s390 compile fix] Signed-off-by: Peter Zijlstra (Intel) Acked-by: Michael Ellerman Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-3-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/powerpc/perf/core-book3s.c | 30 +++++++++++++++++++++++++++++- arch/s390/kernel/perf_cpum_cf.c | 30 +++++++++++++++++++++++++++++- arch/sparc/kernel/perf_event.c | 25 ++++++++++++++++++++++++- arch/x86/kernel/cpu/perf_event.c | 32 +++++++++++++++++++++++++++++++- arch/x86/kernel/cpu/perf_event.h | 1 + include/linux/perf_event.h | 14 +++++++++++--- kernel/events/core.c | 31 ++++++++++++++++++++++++++++--- 7 files changed, 153 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b0382f3f1095..c84074185c80 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -49,6 +49,7 @@ struct cpu_hw_events { unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned int group_flag; + unsigned int txn_flags; int n_txn_start; /* BHRB bits */ @@ -1586,11 +1587,21 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void power_pmu_start_txn(struct pmu *pmu) +static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; @@ -1604,6 +1615,14 @@ static void power_pmu_start_txn(struct pmu *pmu) static void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + unsigned int txn_flags; + + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1621,7 +1640,15 @@ static int power_pmu_commit_txn(struct pmu *pmu) if (!ppmu) return -EAGAIN; + cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuhw->txn_flags = 0; + return 0; + } + n = cpuhw->n_events; if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) return -EAGAIN; @@ -1633,6 +1660,7 @@ static int power_pmu_commit_txn(struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; + cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 56fdad479115..19138be412d6 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -72,6 +72,7 @@ struct cpu_hw_events { atomic_t ctr_set[CPUMF_CTR_SET_MAX]; u64 state, tx_state; unsigned int flags; + unsigned int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .ctr_set = { @@ -82,6 +83,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { }, .state = 0, .flags = 0, + .txn_flags = 0, }; static int get_counter_set(u64 event) @@ -572,11 +574,21 @@ static void cpumf_pmu_del(struct perf_event *event, int flags) /* * Start group events scheduling transaction. * Set flags to perform a single test at commit time. + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void cpumf_pmu_start_txn(struct pmu *pmu) +static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); cpuhw->flags |= PERF_EVENT_TXN; cpuhw->tx_state = cpuhw->state; @@ -589,8 +601,16 @@ static void cpumf_pmu_start_txn(struct pmu *pmu) */ static void cpumf_pmu_cancel_txn(struct pmu *pmu) { + unsigned int txn_flags; struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + WARN_ON(cpuhw->tx_state != cpuhw->state); cpuhw->flags &= ~PERF_EVENT_TXN; @@ -607,6 +627,13 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); u64 state; + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuhw->txn_flags = 0; + return 0; + } + /* check if the updated state can be scheduled */ state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1); state >>= CPUMF_LCCTL_ENABLE_SHIFT; @@ -614,6 +641,7 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu) return -EPERM; cpuhw->flags &= ~PERF_EVENT_TXN; + cpuhw->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index e3f89c7e5062..2c0984d146ec 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -109,6 +109,7 @@ struct cpu_hw_events { int enabled; unsigned int group_flag; + unsigned int txn_flags; }; static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -1494,10 +1495,16 @@ static int sparc_pmu_event_init(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void sparc_pmu_start_txn(struct pmu *pmu) +static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */ + + cpuhw->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; } @@ -1510,6 +1517,14 @@ static void sparc_pmu_start_txn(struct pmu *pmu) static void sparc_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); + unsigned int txn_flags; + + WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */ + + txn_flags = cpuhw->txn_flags; + cpuhw->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; cpuhw->group_flag &= ~PERF_EVENT_TXN; perf_pmu_enable(pmu); @@ -1528,6 +1543,13 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) if (!sparc_pmu) return -EINVAL; + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuc->txn_flags = 0; + return 0; + } + n = cpuc->n_events; if (check_excludes(cpuc->event, 0, n)) return -EINVAL; @@ -1535,6 +1557,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) return -EAGAIN; cpuc->group_flag &= ~PERF_EVENT_TXN; + cpuc->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 66dd3fe99b82..dc77ef470e0e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1748,9 +1748,21 @@ static inline void x86_pmu_read(struct perf_event *event) * Start group events scheduling transaction * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time + * + * We only support PERF_PMU_TXN_ADD transactions. Save the + * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD + * transactions. */ -static void x86_pmu_start_txn(struct pmu *pmu) +static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON_ONCE(cpuc->txn_flags); /* txn already in flight */ + + cpuc->txn_flags = txn_flags; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN); __this_cpu_write(cpu_hw_events.n_txn, 0); @@ -1763,6 +1775,16 @@ static void x86_pmu_start_txn(struct pmu *pmu) */ static void x86_pmu_cancel_txn(struct pmu *pmu) { + unsigned int txn_flags; + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + txn_flags = cpuc->txn_flags; + cpuc->txn_flags = 0; + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); /* * Truncate collected array by the number of events added in this @@ -1786,6 +1808,13 @@ static int x86_pmu_commit_txn(struct pmu *pmu) int assign[X86_PMC_IDX_MAX]; int n, ret; + WARN_ON_ONCE(!cpuc->txn_flags); /* no txn in flight */ + + if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) { + cpuc->txn_flags = 0; + return 0; + } + n = cpuc->n_events; if (!x86_pmu_initialized()) @@ -1802,6 +1831,7 @@ static int x86_pmu_commit_txn(struct pmu *pmu) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->group_flag &= ~PERF_EVENT_TXN; + cpuc->txn_flags = 0; perf_pmu_enable(pmu); return 0; } diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 5edf6d868fc1..c99c93b9e348 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -196,6 +196,7 @@ struct cpu_hw_events { int n_excl; /* the number of exclusive events */ unsigned int group_flag; + unsigned int txn_flags; int is_fake; /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d61ee4459eee..ea3b5dd21a4c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -201,6 +201,8 @@ struct perf_event; */ #define PERF_EVENT_TXN 0x1 +#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ + /** * pmu::capabilities flags */ @@ -331,20 +333,26 @@ struct pmu { * * Start the transaction, after this ->add() doesn't need to * do schedulability tests. + * + * Optional. */ - void (*start_txn) (struct pmu *pmu); /* optional */ + void (*start_txn) (struct pmu *pmu, unsigned int txn_flags); /* * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. + * + * Optional. */ - int (*commit_txn) (struct pmu *pmu); /* optional */ + int (*commit_txn) (struct pmu *pmu); /* * Will cancel the transaction, assumes ->del() is called * for each successful ->add() during the transaction. + * + * Optional. */ - void (*cancel_txn) (struct pmu *pmu); /* optional */ + void (*cancel_txn) (struct pmu *pmu); /* * Will return the value for perf_event_mmap_page::index for this event, diff --git a/kernel/events/core.c b/kernel/events/core.c index 76e64be9bfb5..c80cee82959f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1914,7 +1914,7 @@ group_sched_in(struct perf_event *group_event, if (group_event->state == PERF_EVENT_STATE_OFF) return 0; - pmu->start_txn(pmu); + pmu->start_txn(pmu, PERF_PMU_TXN_ADD); if (event_sched_in(group_event, cpuctx, ctx)) { pmu->cancel_txn(pmu); @@ -7267,24 +7267,49 @@ static void perf_pmu_nop_void(struct pmu *pmu) { } +static void perf_pmu_nop_txn(struct pmu *pmu, unsigned int flags) +{ +} + static int perf_pmu_nop_int(struct pmu *pmu) { return 0; } -static void perf_pmu_start_txn(struct pmu *pmu) +DEFINE_PER_CPU(unsigned int, nop_txn_flags); + +static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) { + __this_cpu_write(nop_txn_flags, flags); + + if (flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); } static int perf_pmu_commit_txn(struct pmu *pmu) { + unsigned int flags = __this_cpu_read(nop_txn_flags); + + __this_cpu_write(nop_txn_flags, 0); + + if (flags & ~PERF_PMU_TXN_ADD) + return 0; + perf_pmu_enable(pmu); return 0; } static void perf_pmu_cancel_txn(struct pmu *pmu) { + unsigned int flags = __this_cpu_read(nop_txn_flags); + + __this_cpu_write(nop_txn_flags, 0); + + if (flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_enable(pmu); } @@ -7523,7 +7548,7 @@ got_cpu_context: pmu->commit_txn = perf_pmu_commit_txn; pmu->cancel_txn = perf_pmu_cancel_txn; } else { - pmu->start_txn = perf_pmu_nop_void; + pmu->start_txn = perf_pmu_nop_txn; pmu->commit_txn = perf_pmu_nop_int; pmu->cancel_txn = perf_pmu_nop_void; } -- cgit v1.2.3 From 01add3eaf1b25e497b14ca210f3bfe5f5dd2b112 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:46 -0700 Subject: perf/core: Split perf_event_read() and perf_event_count() perf_event_read() does two things: - call the PMU to read/update the counter value, and - compute the total count of the event and its children Not all callers need both. perf_event_reset() for instance needs the first piece but doesn't need the second. Similarly, when we implement the ability to read a group of events using the transaction interface, we would need the two pieces done independently. Break up perf_event_read() and have it just read/update the counter and have the callers compute the total count if necessary. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-4-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index c80cee82959f..260bf8cfed51 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3275,7 +3275,7 @@ u64 perf_event_read_local(struct perf_event *event) return val; } -static u64 perf_event_read(struct perf_event *event) +static void perf_event_read(struct perf_event *event) { /* * If event is enabled and currently active on a CPU, update the @@ -3301,8 +3301,6 @@ static u64 perf_event_read(struct perf_event *event) update_event_times(event); raw_spin_unlock_irqrestore(&ctx->lock, flags); } - - return perf_event_count(event); } /* @@ -3818,14 +3816,18 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) *running = 0; mutex_lock(&event->child_mutex); - total += perf_event_read(event); + + perf_event_read(event); + total += perf_event_count(event); + *enabled += event->total_time_enabled + atomic64_read(&event->child_total_time_enabled); *running += event->total_time_running + atomic64_read(&event->child_total_time_running); list_for_each_entry(child, &event->child_list, child_list) { - total += perf_event_read(child); + perf_event_read(child); + total += perf_event_count(child); *enabled += child->total_time_enabled; *running += child->total_time_running; } @@ -3985,7 +3987,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - (void)perf_event_read(event); + perf_event_read(event); local64_set(&event->count, 0); perf_event_update_userpage(event); } -- cgit v1.2.3 From b15f495b4e9295cf21065d8569835a2f18cfe41b Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Thu, 3 Sep 2015 20:07:47 -0700 Subject: perf/core: Rename perf_event_read_{one,group}, perf_read_hw In order to free up the perf_event_read_group() name: s/perf_event_read_\(one\|group\)/perf_read_\1/g s/perf_read_hw/__perf_read/g Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-5-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 260bf8cfed51..67b7dba4f0b5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3742,7 +3742,7 @@ static void put_event(struct perf_event *event) * see the comment there. * * 2) there is a lock-inversion with mmap_sem through - * perf_event_read_group(), which takes faults while + * perf_read_group(), which takes faults while * holding ctx->mutex, however this is called after * the last filedesc died, so there is no possibility * to trigger the AB-BA case. @@ -3837,7 +3837,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static int perf_event_read_group(struct perf_event *event, +static int perf_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event->group_leader, *sub; @@ -3885,7 +3885,7 @@ static int perf_event_read_group(struct perf_event *event, return ret; } -static int perf_event_read_one(struct perf_event *event, +static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; @@ -3923,7 +3923,7 @@ static bool is_event_hup(struct perf_event *event) * Read the performance event - simple non blocking version for now */ static ssize_t -perf_read_hw(struct perf_event *event, char __user *buf, size_t count) +__perf_read(struct perf_event *event, char __user *buf, size_t count) { u64 read_format = event->attr.read_format; int ret; @@ -3941,9 +3941,9 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) WARN_ON_ONCE(event->ctx->parent_ctx); if (read_format & PERF_FORMAT_GROUP) - ret = perf_event_read_group(event, read_format, buf); + ret = perf_read_group(event, read_format, buf); else - ret = perf_event_read_one(event, read_format, buf); + ret = perf_read_one(event, read_format, buf); return ret; } @@ -3956,7 +3956,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) int ret; ctx = perf_event_ctx_lock(event); - ret = perf_read_hw(event, buf, count); + ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); return ret; -- cgit v1.2.3 From 0492d4c5b8c4dc3a7591bf6fa0e35d117812cc85 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Sep 2015 20:07:48 -0700 Subject: perf/core: Add group reads to perf_event_read() Enable perf_event_read() to update entire groups at once, this will be useful for read transactions. Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Sukadev Bhattiprolu Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20150723080435.GE25159@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 67b7dba4f0b5..4d89866edd4e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3184,12 +3184,18 @@ void perf_event_exec(void) rcu_read_unlock(); } +struct perf_read_data { + struct perf_event *event; + bool group; +}; + /* * Cross CPU call to read the hardware event */ static void __perf_event_read(void *info) { - struct perf_event *event = info; + struct perf_read_data *data = info; + struct perf_event *sub, *event = data->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); @@ -3208,9 +3214,21 @@ static void __perf_event_read(void *info) update_context_time(ctx); update_cgrp_time_from_event(event); } + update_event_times(event); if (event->state == PERF_EVENT_STATE_ACTIVE) event->pmu->read(event); + + if (!data->group) + goto unlock; + + list_for_each_entry(sub, &event->sibling_list, group_entry) { + update_event_times(sub); + if (sub->state == PERF_EVENT_STATE_ACTIVE) + sub->pmu->read(sub); + } + +unlock: raw_spin_unlock(&ctx->lock); } @@ -3275,15 +3293,19 @@ u64 perf_event_read_local(struct perf_event *event) return val; } -static void perf_event_read(struct perf_event *event) +static void perf_event_read(struct perf_event *event, bool group) { /* * If event is enabled and currently active on a CPU, update the * value in the event structure: */ if (event->state == PERF_EVENT_STATE_ACTIVE) { + struct perf_read_data data = { + .event = event, + .group = group, + }; smp_call_function_single(event->oncpu, - __perf_event_read, event, 1); + __perf_event_read, &data, 1); } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; @@ -3298,7 +3320,10 @@ static void perf_event_read(struct perf_event *event) update_context_time(ctx); update_cgrp_time_from_event(event); } - update_event_times(event); + if (group) + update_group_times(event); + else + update_event_times(event); raw_spin_unlock_irqrestore(&ctx->lock, flags); } } @@ -3817,7 +3842,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(&event->child_mutex); - perf_event_read(event); + perf_event_read(event, false); total += perf_event_count(event); *enabled += event->total_time_enabled + @@ -3826,7 +3851,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) atomic64_read(&event->child_total_time_running); list_for_each_entry(child, &event->child_list, child_list) { - perf_event_read(child); + perf_event_read(child, false); total += perf_event_count(child); *enabled += child->total_time_enabled; *running += child->total_time_running; @@ -3987,7 +4012,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - perf_event_read(event); + perf_event_read(event, false); local64_set(&event->count, 0); perf_event_update_userpage(event); } -- cgit v1.2.3 From fa8c269353d560b7c28119ad7617029f92e40b15 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Sep 2015 20:07:49 -0700 Subject: perf/core: Invert perf_read_group() loops In order to enable the use of perf_event_read(.group = true), we need to invert the sibling-child loop nesting of perf_read_group(). Currently we iterate the child list for each sibling, this precludes using group reads. Flip things around so we iterate each group for each child. Signed-off-by: Peter Zijlstra (Intel) [ Made the patch compile and things. ] Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-7-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 85 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 55 insertions(+), 30 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 4d89866edd4e..dd2a0b84c400 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3862,50 +3862,75 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static int perf_read_group(struct perf_event *event, - u64 read_format, char __user *buf) +static void __perf_read_group_add(struct perf_event *leader, + u64 read_format, u64 *values) { - struct perf_event *leader = event->group_leader, *sub; - struct perf_event_context *ctx = leader->ctx; - int n = 0, size = 0, ret; - u64 count, enabled, running; - u64 values[5]; + struct perf_event *sub; + int n = 1; /* skip @nr */ - lockdep_assert_held(&ctx->mutex); + perf_event_read(leader, true); + + /* + * Since we co-schedule groups, {enabled,running} times of siblings + * will be identical to those of the leader, so we only publish one + * set. + */ + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] += leader->total_time_enabled + + atomic64_read(&leader->child_total_time_enabled); + } - count = perf_event_read_value(leader, &enabled, &running); + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] += leader->total_time_running + + atomic64_read(&leader->child_total_time_running); + } - values[n++] = 1 + leader->nr_siblings; - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = enabled; - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = running; - values[n++] = count; + /* + * Write {count,id} tuples for every sibling. + */ + values[n++] += perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); - size = n * sizeof(u64); + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + values[n++] += perf_event_count(sub); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(sub); + } +} - if (copy_to_user(buf, values, size)) - return -EFAULT; +static int perf_read_group(struct perf_event *event, + u64 read_format, char __user *buf) +{ + struct perf_event *leader = event->group_leader, *child; + struct perf_event_context *ctx = leader->ctx; + int ret = event->read_size; + u64 *values; - ret = size; + lockdep_assert_held(&ctx->mutex); - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - n = 0; + values = kzalloc(event->read_size, GFP_KERNEL); + if (!values) + return -ENOMEM; - values[n++] = perf_event_read_value(sub, &enabled, &running); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_event_id(sub); + values[0] = 1 + leader->nr_siblings; + + /* + * By locking the child_mutex of the leader we effectively + * lock the child list of all siblings.. XXX explain how. + */ + mutex_lock(&leader->child_mutex); - size = n * sizeof(u64); + __perf_read_group_add(leader, read_format, values); + list_for_each_entry(child, &leader->child_list, child_list) + __perf_read_group_add(child, read_format, values); - if (copy_to_user(buf + ret, values, size)) { - return -EFAULT; - } + mutex_unlock(&leader->child_mutex); - ret += size; - } + if (copy_to_user(buf, values, event->read_size)) + ret = -EFAULT; + + kfree(values); return ret; } -- cgit v1.2.3 From 7d88962e230c8342080e7e2fe9dd5be43dc13b79 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:50 -0700 Subject: perf/core: Add return value for perf_event_read() When we implement the ability to read several counters at once (using the PERF_PMU_TXN_READ transaction interface), perf_event_read() can fail when the 'group' parameter is true (eg: trying to read too many events at once). For now, have perf_event_read() return an integer. Ignore the return value when the 'group' parameter is false. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-8-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index dd2a0b84c400..ade04dfab368 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3187,6 +3187,7 @@ void perf_event_exec(void) struct perf_read_data { struct perf_event *event; bool group; + int ret; }; /* @@ -3227,6 +3228,7 @@ static void __perf_event_read(void *info) if (sub->state == PERF_EVENT_STATE_ACTIVE) sub->pmu->read(sub); } + data->ret = 0; unlock: raw_spin_unlock(&ctx->lock); @@ -3293,8 +3295,10 @@ u64 perf_event_read_local(struct perf_event *event) return val; } -static void perf_event_read(struct perf_event *event, bool group) +static int perf_event_read(struct perf_event *event, bool group) { + int ret = 0; + /* * If event is enabled and currently active on a CPU, update the * value in the event structure: @@ -3303,9 +3307,11 @@ static void perf_event_read(struct perf_event *event, bool group) struct perf_read_data data = { .event = event, .group = group, + .ret = 0, }; smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); + ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; @@ -3326,6 +3332,8 @@ static void perf_event_read(struct perf_event *event, bool group) update_event_times(event); raw_spin_unlock_irqrestore(&ctx->lock, flags); } + + return ret; } /* @@ -3842,7 +3850,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) mutex_lock(&event->child_mutex); - perf_event_read(event, false); + (void)perf_event_read(event, false); total += perf_event_count(event); *enabled += event->total_time_enabled + @@ -3851,7 +3859,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) atomic64_read(&event->child_total_time_running); list_for_each_entry(child, &event->child_list, child_list) { - perf_event_read(child, false); + (void)perf_event_read(child, false); total += perf_event_count(child); *enabled += child->total_time_enabled; *running += child->total_time_running; @@ -3862,13 +3870,16 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static void __perf_read_group_add(struct perf_event *leader, +static int __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) { struct perf_event *sub; int n = 1; /* skip @nr */ + int ret; - perf_event_read(leader, true); + ret = perf_event_read(leader, true); + if (ret) + return ret; /* * Since we co-schedule groups, {enabled,running} times of siblings @@ -3897,6 +3908,8 @@ static void __perf_read_group_add(struct perf_event *leader, if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); } + + return 0; } static int perf_read_group(struct perf_event *event, @@ -3904,7 +3917,7 @@ static int perf_read_group(struct perf_event *event, { struct perf_event *leader = event->group_leader, *child; struct perf_event_context *ctx = leader->ctx; - int ret = event->read_size; + int ret; u64 *values; lockdep_assert_held(&ctx->mutex); @@ -3921,17 +3934,27 @@ static int perf_read_group(struct perf_event *event, */ mutex_lock(&leader->child_mutex); - __perf_read_group_add(leader, read_format, values); - list_for_each_entry(child, &leader->child_list, child_list) - __perf_read_group_add(child, read_format, values); + ret = __perf_read_group_add(leader, read_format, values); + if (ret) + goto unlock; + + list_for_each_entry(child, &leader->child_list, child_list) { + ret = __perf_read_group_add(child, read_format, values); + if (ret) + goto unlock; + } mutex_unlock(&leader->child_mutex); + ret = event->read_size; if (copy_to_user(buf, values, event->read_size)) ret = -EFAULT; + goto out; +unlock: + mutex_unlock(&leader->child_mutex); +out: kfree(values); - return ret; } @@ -4037,7 +4060,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) static void _perf_event_reset(struct perf_event *event) { - perf_event_read(event, false); + (void)perf_event_read(event, false); local64_set(&event->count, 0); perf_event_update_userpage(event); } -- cgit v1.2.3 From 4a00c16e552ea5e71756cd29cd2df7557ec9cac4 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:51 -0700 Subject: perf/core: Define PERF_PMU_TXN_READ interface Define a new PERF_PMU_TXN_READ interface to read a group of counters at once. pmu->start_txn() // Initialize before first event for each event in group pmu->read(event); // Queue each event to be read rc = pmu->commit_txn() // Read/update all queued counters Note that we use this interface with all PMUs. PMUs that implement this interface use the ->read() operation to _queue_ the counters to be read and use ->commit_txn() to actually read all the queued counters at once. PMUs that don't implement PERF_PMU_TXN_READ ignore ->start_txn() and ->commit_txn() and continue to read counters one at a time. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-9-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + kernel/events/core.c | 24 +++++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ea3b5dd21a4c..b83cea932f74 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -202,6 +202,7 @@ struct perf_event; #define PERF_EVENT_TXN 0x1 #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ /** * pmu::capabilities flags diff --git a/kernel/events/core.c b/kernel/events/core.c index ade04dfab368..55b0f7cf2614 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3199,6 +3199,7 @@ static void __perf_event_read(void *info) struct perf_event *sub, *event = data->event; struct perf_event_context *ctx = event->ctx; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + struct pmu *pmu = event->pmu; /* * If this is a task context, we need to check whether it is @@ -3217,18 +3218,31 @@ static void __perf_event_read(void *info) } update_event_times(event); - if (event->state == PERF_EVENT_STATE_ACTIVE) - event->pmu->read(event); + if (event->state != PERF_EVENT_STATE_ACTIVE) + goto unlock; - if (!data->group) + if (!data->group) { + pmu->read(event); + data->ret = 0; goto unlock; + } + + pmu->start_txn(pmu, PERF_PMU_TXN_READ); + + pmu->read(event); list_for_each_entry(sub, &event->sibling_list, group_entry) { update_event_times(sub); - if (sub->state == PERF_EVENT_STATE_ACTIVE) + if (sub->state == PERF_EVENT_STATE_ACTIVE) { + /* + * Use sibling's PMU rather than @event's since + * sibling could be on different (eg: software) PMU. + */ sub->pmu->read(sub); + } } - data->ret = 0; + + data->ret = pmu->commit_txn(pmu); unlock: raw_spin_unlock(&ctx->lock); -- cgit v1.2.3 From 18ab2cd3ee9d52dc64c5ae984146a261a328c4e8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 27 Sep 2015 23:25:50 +0800 Subject: perf/core, perf/x86: Change needlessly global functions and a variable to static Fixes various sparse warnings. Signed-off-by: Geliang Tang Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/70c14234da1bed6e3e67b9c419e2d5e376ab4f32.1443367286.git.geliangtang@163.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 8 ++++---- kernel/events/core.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index be4febc58b94..e38d338a6447 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -157,7 +157,7 @@ struct _cpuid4_info_regs { struct amd_northbridge *nb; }; -unsigned short num_cache_leaves; +static unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same information to the user. This makes some assumptions about the machine: @@ -326,7 +326,7 @@ static void amd_calc_l3_indices(struct amd_northbridge *nb) * * @returns: the disabled index if used or negative value if slot free. */ -int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) +static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) { unsigned int reg = 0; @@ -403,8 +403,8 @@ static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, * * @return: 0 on success, error status on failure */ -int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, - unsigned long index) +static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, + unsigned slot, unsigned long index) { int ret = 0; diff --git a/kernel/events/core.c b/kernel/events/core.c index f87b434c3c1e..ea02109aee77 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -196,7 +196,7 @@ static int perf_sample_period_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS; static int perf_sample_allowed_ns __read_mostly = DEFAULT_SAMPLE_PERIOD_NS * DEFAULT_CPU_TIME_MAX_PERCENT / 100; -void update_perf_cpu_limits(void) +static void update_perf_cpu_limits(void) { u64 tmp = perf_sample_period_ns; @@ -472,7 +472,7 @@ perf_cgroup_set_timestamp(struct task_struct *task, * mode SWOUT : schedule out everything * mode SWIN : schedule in based on cgroup for next */ -void perf_cgroup_switch(struct task_struct *task, int mode) +static void perf_cgroup_switch(struct task_struct *task, int mode) { struct perf_cpu_context *cpuctx; struct pmu *pmu; @@ -7390,7 +7390,7 @@ static int perf_pmu_nop_int(struct pmu *pmu) return 0; } -DEFINE_PER_CPU(unsigned int, nop_txn_flags); +static DEFINE_PER_CPU(unsigned int, nop_txn_flags); static void perf_pmu_start_txn(struct pmu *pmu, unsigned int flags) { @@ -7750,7 +7750,7 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) return ret; } -struct pmu *perf_init_event(struct perf_event *event) +static struct pmu *perf_init_event(struct perf_event *event) { struct pmu *pmu = NULL; int idx; -- cgit v1.2.3