diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-07 20:33:50 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-07 20:33:50 +0300 |
commit | 25d8d4eecace9de5a6a2193e4df1917afbdd3052 (patch) | |
tree | 1f1bbde6423745251c41fb4d1842e70b9f7bca07 /arch/powerpc/perf | |
parent | 60e76bb8a4e4c5398ea9053535e1fd0c9d6bb06e (diff) | |
parent | a7aaa2f26bfd932a654706b19859e7adf802bee2 (diff) | |
download | linux-25d8d4eecace9de5a6a2193e4df1917afbdd3052.tar.xz |
Merge tag 'powerpc-5.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
- Add support for (optionally) using queued spinlocks & rwlocks.
- Support for a new faster system call ABI using the scv instruction on
Power9 or later.
- Drop support for the PROT_SAO mmap/mprotect flag as it will be
unsupported on Power10 and future processors, leaving us with no way
to implement the functionality it requests. This risks breaking
userspace, though we believe it is unused in practice.
- A bug fix for, and then the removal of, our custom stack expansion
checking. We now allow stack expansion up to the rlimit, like other
architectures.
- Remove the remnants of our (previously disabled) topology update
code, which tried to react to NUMA layout changes on virtualised
systems, but was prone to crashes and other problems.
- Add PMU support for Power10 CPUs.
- A change to our signal trampoline so that we don't unbalance the link
stack (branch return predictor) in the signal delivery path.
- Lots of other cleanups, refactorings, smaller features and so on as
usual.
Thanks to: Abhishek Goel, Alastair D'Silva, Alexander A. Klimov, Alexey
Kardashevskiy, Alistair Popple, Andrew Donnellan, Aneesh Kumar K.V, Anju
T Sudhakar, Anton Blanchard, Arnd Bergmann, Athira Rajeev, Balamuruhan
S, Bharata B Rao, Bill Wendling, Bin Meng, Cédric Le Goater, Chris
Packham, Christophe Leroy, Christoph Hellwig, Daniel Axtens, Dan
Williams, David Lamparter, Desnes A. Nunes do Rosario, Erhard F., Finn
Thain, Frederic Barrat, Ganesh Goudar, Gautham R. Shenoy, Geoff Levand,
Greg Kurz, Gustavo A. R. Silva, Hari Bathini, Harish, Imre Kaloz, Joel
Stanley, Joe Perches, John Crispin, Jordan Niethe, Kajol Jain, Kamalesh
Babulal, Kees Cook, Laurent Dufour, Leonardo Bras, Li RongQing, Madhavan
Srinivasan, Mahesh Salgaonkar, Mark Cave-Ayland, Michal Suchanek, Milton
Miller, Mimi Zohar, Murilo Opsfelder Araujo, Nathan Chancellor, Nathan
Lynch, Naveen N. Rao, Nayna Jain, Nicholas Piggin, Oliver O'Halloran,
Palmer Dabbelt, Pedro Miraglia Franco de Carvalho, Philippe Bergheaud,
Pingfan Liu, Pratik Rajesh Sampat, Qian Cai, Qinglang Miao, Randy
Dunlap, Ravi Bangoria, Sachin Sant, Sam Bobroff, Sandipan Das, Santosh
Sivaraj, Satheesh Rajendran, Shirisha Ganta, Sourabh Jain, Srikar
Dronamraju, Stan Johnson, Stephen Rothwell, Thadeu Lima de Souza
Cascardo, Thiago Jung Bauermann, Tom Lane, Vaibhav Jain, Vladis Dronov,
Wei Yongjun, Wen Xiong, YueHaibing.
* tag 'powerpc-5.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (337 commits)
selftests/powerpc: Fix pkey syscall redefinitions
powerpc: Fix circular dependency between percpu.h and mmu.h
powerpc/powernv/sriov: Fix use of uninitialised variable
selftests/powerpc: Skip vmx/vsx/tar/etc tests on older CPUs
powerpc/40x: Fix assembler warning about r0
powerpc/papr_scm: Add support for fetching nvdimm 'fuel-gauge' metric
powerpc/papr_scm: Fetch nvdimm performance stats from PHYP
cpuidle: pseries: Fixup exit latency for CEDE(0)
cpuidle: pseries: Add function to parse extended CEDE records
cpuidle: pseries: Set the latency-hint before entering CEDE
selftests/powerpc: Fix online CPU selection
powerpc/perf: Consolidate perf_callchain_user_[64|32]()
powerpc/pseries/hotplug-cpu: Remove double free in error path
powerpc/pseries/mobility: Add pr_debug() for device tree changes
powerpc/pseries/mobility: Set pr_fmt()
powerpc/cacheinfo: Warn if cache object chain becomes unordered
powerpc/cacheinfo: Improve diagnostics about malformed cache lists
powerpc/cacheinfo: Use name@unit instead of full DT path in debug messages
powerpc/cacheinfo: Set pr_fmt()
powerpc: fix function annotations to avoid section mismatch warnings with gcc-10
...
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r-- | arch/powerpc/perf/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/perf/callchain.h | 25 | ||||
-rw-r--r-- | arch/powerpc/perf/callchain_32.c | 21 | ||||
-rw-r--r-- | arch/powerpc/perf/callchain_64.c | 13 | ||||
-rw-r--r-- | arch/powerpc/perf/core-book3s.c | 108 | ||||
-rw-r--r-- | arch/powerpc/perf/generic-compat-pmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/perf/hv-24x7.c | 54 | ||||
-rw-r--r-- | arch/powerpc/perf/imc-pmu.c | 29 | ||||
-rw-r--r-- | arch/powerpc/perf/internal.h | 1 | ||||
-rw-r--r-- | arch/powerpc/perf/isa207-common.c | 91 | ||||
-rw-r--r-- | arch/powerpc/perf/isa207-common.h | 37 | ||||
-rw-r--r-- | arch/powerpc/perf/mpc7450-pmu.c | 23 | ||||
-rw-r--r-- | arch/powerpc/perf/power10-events-list.h | 70 | ||||
-rw-r--r-- | arch/powerpc/perf/power10-pmu.c | 419 | ||||
-rw-r--r-- | arch/powerpc/perf/power5+-pmu.c | 19 | ||||
-rw-r--r-- | arch/powerpc/perf/power5-pmu.c | 19 | ||||
-rw-r--r-- | arch/powerpc/perf/power6-pmu.c | 18 | ||||
-rw-r--r-- | arch/powerpc/perf/power7-pmu.c | 19 | ||||
-rw-r--r-- | arch/powerpc/perf/power8-pmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/perf/power9-pmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/perf/ppc970-pmu.c | 26 |
21 files changed, 849 insertions, 151 deletions
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 53d614e98537..c02854dea2b2 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \ isa207-common.o power8-pmu.o power9-pmu.o \ - generic-compat-pmu.o + generic-compat-pmu.o power10-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h index 7a2cb9e1181a..ae24d4a00da6 100644 --- a/arch/powerpc/perf/callchain.h +++ b/arch/powerpc/perf/callchain.h @@ -2,7 +2,7 @@ #ifndef _POWERPC_PERF_CALLCHAIN_H #define _POWERPC_PERF_CALLCHAIN_H -int read_user_stack_slow(void __user *ptr, void *buf, int nb); +int read_user_stack_slow(const void __user *ptr, void *buf, int nb); void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs); void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, @@ -16,4 +16,27 @@ static inline bool invalid_user_sp(unsigned long sp) return (!sp || (sp & mask) || (sp > top)); } +/* + * On 32-bit we just access the address and let hash_page create a + * HPTE if necessary, so there is no need to fall back to reading + * the page tables. Since this is called at interrupt level, + * do_page_fault() won't treat a DSI as a page fault. + */ +static inline int __read_user_stack(const void __user *ptr, void *ret, + size_t size) +{ + unsigned long addr = (unsigned long)ptr; + int rc; + + if (addr > TASK_SIZE - size || (addr & (size - 1))) + return -EFAULT; + + rc = copy_from_user_nofault(ret, ptr, size); + + if (IS_ENABLED(CONFIG_PPC64) && rc) + return read_user_stack_slow(ptr, ret, size); + + return rc; +} + #endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index 542e68b8eae0..64e4013d8060 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -30,26 +30,9 @@ #endif /* CONFIG_PPC64 */ -/* - * On 32-bit we just access the address and let hash_page create a - * HPTE if necessary, so there is no need to fall back to reading - * the page tables. Since this is called at interrupt level, - * do_page_fault() won't treat a DSI as a page fault. - */ -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +static int read_user_stack_32(const unsigned int __user *ptr, unsigned int *ret) { - int rc; - - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - rc = copy_from_user_nofault(ret, ptr, sizeof(*ret)); - - if (IS_ENABLED(CONFIG_PPC64) && rc) - return read_user_stack_slow(ptr, ret, 4); - - return rc; + return __read_user_stack(ptr, ret, sizeof(*ret)); } /* diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index fa2a1b83b9b0..fed90e827f3a 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -23,7 +23,7 @@ * interrupt context, so if the access faults, we read the page tables * to find which page (if any) is mapped and access it directly. */ -int read_user_stack_slow(void __user *ptr, void *buf, int nb) +int read_user_stack_slow(const void __user *ptr, void *buf, int nb) { unsigned long addr = (unsigned long) ptr; @@ -44,16 +44,9 @@ int read_user_stack_slow(void __user *ptr, void *buf, int nb) return -EFAULT; } -static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) +static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret) { - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || - ((unsigned long)ptr & 7)) - return -EFAULT; - - if (!copy_from_user_nofault(ret, ptr, sizeof(*ret))) - return 0; - - return read_user_stack_slow(ptr, ret, 8); + return __read_user_stack(ptr, ret, sizeof(*ret)); } /* diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 01d70280d287..78fe34986594 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -37,12 +37,7 @@ struct cpu_hw_events { struct perf_event *event[MAX_HWEVENTS]; u64 events[MAX_HWEVENTS]; unsigned int flags[MAX_HWEVENTS]; - /* - * The order of the MMCR array is: - * - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2 - * - 32-bit, MMCR0, MMCR1, MMCR2 - */ - unsigned long mmcr[4]; + struct mmcr_regs mmcr; struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS]; u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; @@ -77,6 +72,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; /* * 32-bit doesn't have MMCRA but does have an MMCR2, * and a few other names are different. + * Also 32-bit doesn't have MMCR3, SIER2 and SIER3. + * Define them as zero knowing that any code path accessing + * these registers (via mtspr/mfspr) are done under ppmu flag + * check for PPMU_ARCH_31 and we will not enter that code path + * for 32-bit. */ #ifdef CONFIG_PPC32 @@ -90,7 +90,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define MMCR0_PMCC_U6 0 #define SPRN_MMCRA SPRN_MMCR2 +#define SPRN_MMCR3 0 +#define SPRN_SIER2 0 +#define SPRN_SIER3 0 #define MMCRA_SAMPLE_ENABLE 0 +#define MMCRA_BHRB_DISABLE 0 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { @@ -121,7 +125,7 @@ static void ebb_event_add(struct perf_event *event) { } static void ebb_switch_out(unsigned long mmcr0) { } static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { - return cpuhw->mmcr[0]; + return cpuhw->mmcr.mmcr0; } static inline void power_pmu_bhrb_enable(struct perf_event *event) {} @@ -466,8 +470,11 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events * * addresses at this point. Check the privileges before * exporting it to userspace (avoid exposure of regions * where we could have speculative execution) + * Incase of ISA v3.1, BHRB will capture only user-space + * addresses, hence include a check before filtering code */ - if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) + if (!(ppmu->flags & PPMU_ARCH_31) && + is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) continue; /* Branches are read most recent first (ie. mfbhrb 0 is @@ -586,11 +593,16 @@ static void ebb_switch_out(unsigned long mmcr0) current->thread.sdar = mfspr(SPRN_SDAR); current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK; current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK; + if (ppmu->flags & PPMU_ARCH_31) { + current->thread.mmcr3 = mfspr(SPRN_MMCR3); + current->thread.sier2 = mfspr(SPRN_SIER2); + current->thread.sier3 = mfspr(SPRN_SIER3); + } } static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) { - unsigned long mmcr0 = cpuhw->mmcr[0]; + unsigned long mmcr0 = cpuhw->mmcr.mmcr0; if (!ebb) goto out; @@ -624,7 +636,13 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) * unfreeze counters, it should not set exclude_xxx in its events and * instead manage the MMCR2 entirely by itself. */ - mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2); + mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2 | current->thread.mmcr2); + + if (ppmu->flags & PPMU_ARCH_31) { + mtspr(SPRN_MMCR3, current->thread.mmcr3); + mtspr(SPRN_SIER2, current->thread.sier2); + mtspr(SPRN_SIER3, current->thread.sier3); + } out: return mmcr0; } @@ -845,6 +863,11 @@ void perf_event_print_debug(void) pr_info("EBBRR: %016lx BESCR: %016lx\n", mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR)); } + + if (ppmu->flags & PPMU_ARCH_31) { + pr_info("MMCR3: %016lx SIER2: %016lx SIER3: %016lx\n", + mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3)); + } #endif pr_info("SIAR: %016lx SDAR: %016lx SIER: %016lx\n", mfspr(SPRN_SIAR), sdar, sier); @@ -1196,7 +1219,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; - unsigned long flags, mmcr0, val; + unsigned long flags, mmcr0, val, mmcra; if (!ppmu) return; @@ -1229,12 +1252,24 @@ static void power_pmu_disable(struct pmu *pmu) mb(); isync(); + val = mmcra = cpuhw->mmcr.mmcra; + /* * Disable instruction sampling if it was enabled */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mtspr(SPRN_MMCRA, - cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) + val &= ~MMCRA_SAMPLE_ENABLE; + + /* Disable BHRB via mmcra (BHRBRD) for p10 */ + if (ppmu->flags & PPMU_ARCH_31) + val |= MMCRA_BHRB_DISABLE; + + /* + * Write SPRN_MMCRA if mmcra has either disabled + * instruction sampling or BHRB. + */ + if (val != mmcra) { + mtspr(SPRN_MMCRA, mmcra); mb(); isync(); } @@ -1308,18 +1343,20 @@ static void power_pmu_enable(struct pmu *pmu) * (possibly updated for removal of events). */ if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); + if (ppmu->flags & PPMU_ARCH_31) + mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3); goto out_enable; } /* * Clear all MMCR settings and recompute them for the new set of events. */ - memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); + memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr)); if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, - cpuhw->mmcr, cpuhw->event)) { + &cpuhw->mmcr, cpuhw->event)) { /* shouldn't ever get here */ printk(KERN_ERR "oops compute_mmcr failed\n"); goto out; @@ -1333,11 +1370,11 @@ static void power_pmu_enable(struct pmu *pmu) */ event = cpuhw->event[0]; if (event->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; + cpuhw->mmcr.mmcr0 |= MMCR0_FCP; if (event->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_events_kernel; + cpuhw->mmcr.mmcr0 |= freeze_events_kernel; if (event->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; + cpuhw->mmcr.mmcr0 |= MMCR0_FCHV; } /* @@ -1346,12 +1383,15 @@ static void power_pmu_enable(struct pmu *pmu) * Then unfreeze the events. */ ppc_set_pmu_inuse(1); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); + mtspr(SPRN_MMCR0, (cpuhw->mmcr.mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | MMCR0_FC); if (ppmu->flags & PPMU_ARCH_207S) - mtspr(SPRN_MMCR2, cpuhw->mmcr[3]); + mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2); + + if (ppmu->flags & PPMU_ARCH_31) + mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3); /* * Read off any pre-existing events that need to move @@ -1402,7 +1442,7 @@ static void power_pmu_enable(struct pmu *pmu) perf_event_update_userpage(event); } cpuhw->n_limited = n_lim; - cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + cpuhw->mmcr.mmcr0 |= MMCR0_PMXE | MMCR0_FCECE; out_enable: pmao_restore_workaround(ebb); @@ -1418,9 +1458,9 @@ static void power_pmu_enable(struct pmu *pmu) /* * Enable instruction sampling if necessary */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) { mb(); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra); } out: @@ -1550,7 +1590,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) cpuhw->flags[i-1] = cpuhw->flags[i]; } --cpuhw->n_events; - ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); + ppmu->disable_pmc(event->hw.idx - 1, &cpuhw->mmcr); if (event->hw.idx) { write_pmc(event->hw.idx, 0); event->hw.idx = 0; @@ -1571,7 +1611,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags) } if (cpuhw->n_events == 0) { /* disable exceptions if no events are running */ - cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); + cpuhw->mmcr.mmcr0 &= ~(MMCR0_PMXE | MMCR0_FCECE); } if (has_branch_stack(event)) @@ -1795,7 +1835,7 @@ static void hw_perf_event_destroy(struct perf_event *event) static int hw_perf_cache_event(u64 config, u64 *eventp) { unsigned long type, op, result; - int ev; + u64 ev; if (!ppmu->cache_events) return -EINVAL; @@ -2246,7 +2286,7 @@ static void __perf_event_interrupt(struct pt_regs *regs) * XXX might want to use MSR.PM to keep the events frozen until * we get back out of this interrupt. */ - write_mmcr0(cpuhw, cpuhw->mmcr[0]); + write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0); if (nmi) nmi_exit(); @@ -2268,7 +2308,7 @@ static int power_pmu_prepare_cpu(unsigned int cpu) if (ppmu) { memset(cpuhw, 0, sizeof(*cpuhw)); - cpuhw->mmcr[0] = MMCR0_FC; + cpuhw->mmcr.mmcr0 = MMCR0_FC; } return 0; } @@ -2314,6 +2354,8 @@ static int __init init_ppc64_pmu(void) return 0; else if (!init_power9_pmu()) return 0; + else if (!init_power10_pmu()) + return 0; else if (!init_ppc970_pmu()) return 0; else diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c index 5e5a54d5588e..eb8a6aaf4cc1 100644 --- a/arch/powerpc/perf/generic-compat-pmu.c +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -101,7 +101,7 @@ static int compat_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0, diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index db213eb7cb02..cdb7bfbd157e 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -31,6 +31,8 @@ static int interface_version; /* Whether we have to aggregate result data for some domains. */ static bool aggregate_result_elements; +static cpumask_t hv_24x7_cpumask; + static bool domain_is_valid(unsigned domain) { switch (domain) { @@ -446,6 +448,12 @@ static ssize_t device_show_string(struct device *dev, return sprintf(buf, "%s\n", (char *)d->var); } +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &hv_24x7_cpumask); +} + static ssize_t sockets_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1113,6 +1121,7 @@ static DEVICE_ATTR_RO(domains); static DEVICE_ATTR_RO(sockets); static DEVICE_ATTR_RO(chipspersocket); static DEVICE_ATTR_RO(coresperchip); +static DEVICE_ATTR_RO(cpumask); static struct bin_attribute *if_bin_attrs[] = { &bin_attr_catalog, @@ -1126,6 +1135,7 @@ static struct attribute *if_attrs[] = { &dev_attr_sockets.attr, &dev_attr_chipspersocket.attr, &dev_attr_coresperchip.attr, + &dev_attr_cpumask.attr, NULL, }; @@ -1641,6 +1651,45 @@ static struct pmu h_24x7_pmu = { .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; +static int ppc_hv_24x7_cpu_online(unsigned int cpu) +{ + if (cpumask_empty(&hv_24x7_cpumask)) + cpumask_set_cpu(cpu, &hv_24x7_cpumask); + + return 0; +} + +static int ppc_hv_24x7_cpu_offline(unsigned int cpu) +{ + int target; + + /* Check if exiting cpu is used for collecting 24x7 events */ + if (!cpumask_test_and_clear_cpu(cpu, &hv_24x7_cpumask)) + return 0; + + /* Find a new cpu to collect 24x7 events */ + target = cpumask_last(cpu_active_mask); + + if (target < 0 || target >= nr_cpu_ids) { + pr_err("hv_24x7: CPU hotplug init failed\n"); + return -1; + } + + /* Migrate 24x7 events to the new target */ + cpumask_set_cpu(target, &hv_24x7_cpumask); + perf_pmu_migrate_context(&h_24x7_pmu, cpu, target); + + return 0; +} + +static int hv_24x7_cpu_hotplug_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE, + "perf/powerpc/hv_24x7:online", + ppc_hv_24x7_cpu_online, + ppc_hv_24x7_cpu_offline); +} + static int hv_24x7_init(void) { int r; @@ -1685,6 +1734,11 @@ static int hv_24x7_init(void) if (r) return r; + /* init cpuhotplug */ + r = hv_24x7_cpu_hotplug_init(); + if (r) + return r; + r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0edcfd0b491d..a45d694a5d5d 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1288,11 +1288,30 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem, header->size = sizeof(*header) + event->header_size; header->misc = 0; - if (is_kernel_addr(data->ip)) - header->misc |= PERF_RECORD_MISC_KERNEL; - else - header->misc |= PERF_RECORD_MISC_USER; - + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + switch (IMC_TRACE_RECORD_VAL_HVPR(mem->val)) { + case 0:/* when MSR HV and PR not set in the trace-record */ + header->misc |= PERF_RECORD_MISC_GUEST_KERNEL; + break; + case 1: /* MSR HV is 0 and PR is 1 */ + header->misc |= PERF_RECORD_MISC_GUEST_USER; + break; + case 2: /* MSR HV is 1 and PR is 0 */ + header->misc |= PERF_RECORD_MISC_HYPERVISOR; + break; + case 3: /* MSR HV is 1 and PR is 1 */ + header->misc |= PERF_RECORD_MISC_USER; + break; + default: + pr_info("IMC: Unable to set the flag based on MSR bits\n"); + break; + } + } else { + if (is_kernel_addr(data->ip)) + header->misc |= PERF_RECORD_MISC_KERNEL; + else + header->misc |= PERF_RECORD_MISC_USER; + } perf_event_header__init_id(header, data, event); return 0; diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h index f755c64da137..80bbf72bfec2 100644 --- a/arch/powerpc/perf/internal.h +++ b/arch/powerpc/perf/internal.h @@ -9,4 +9,5 @@ extern int init_power6_pmu(void); extern int init_power7_pmu(void); extern int init_power8_pmu(void); extern int init_power9_pmu(void); +extern int init_power10_pmu(void); extern int init_generic_compat_pmu(void); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4c86da5eb28a..964437adec18 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -55,7 +55,9 @@ static bool is_event_valid(u64 event) { u64 valid_mask = EVENT_VALID_MASK; - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_31)) + valid_mask = p10_EVENT_VALID_MASK; + else if (cpu_has_feature(CPU_FTR_ARCH_300)) valid_mask = p9_EVENT_VALID_MASK; return !(event & ~valid_mask); @@ -69,6 +71,14 @@ static inline bool is_event_marked(u64 event) return false; } +static unsigned long sdar_mod_val(u64 event) +{ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return p10_SDAR_MODE(event); + + return p9_SDAR_MODE(event); +} + static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) { /* @@ -79,7 +89,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling * mode and will be un-changed when setting MMCRA[63] (Marked events). * - * Incase of Power9: + * Incase of Power9/power10: * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), * or if group already have any marked events. * For rest @@ -90,8 +100,8 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; - else if (p9_SDAR_MODE(event)) - *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + else if (sdar_mod_val(event)) + *mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT; else *mmcra |= MMCRA_SDAR_MODE_DCACHE; } else @@ -134,7 +144,11 @@ static bool is_thresh_cmp_valid(u64 event) /* * Check the mantissa upper two bits are not zero, unless the * exponent is also zero. See the THRESH_CMP_MANTISSA doc. + * Power10: thresh_cmp is replaced by l2_l3 event select. */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + return false; + cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; exp = cmp >> 7; @@ -251,7 +265,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; - cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + cache = (event >> EVENT_CACHE_SEL_SHIFT) & + p10_EVENT_CACHE_SEL_MASK; + else + cache = (event >> EVENT_CACHE_SEL_SHIFT) & + EVENT_CACHE_SEL_MASK; ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK; if (pmc) { @@ -283,7 +302,10 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } if (unit >= 6 && unit <= 9) { - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { + mask |= CNST_L2L3_GROUP_MASK; + value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT); + } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { mask |= CNST_CACHE_GROUP_MASK; value |= CNST_CACHE_GROUP_VAL(event & 0xff); @@ -363,10 +385,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } int isa207_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) { unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; + unsigned long mmcr3; unsigned int pmc, pmc_inuse; int i; @@ -379,7 +402,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, pmc_inuse |= 1 << pmc; } - mmcra = mmcr1 = mmcr2 = 0; + mmcra = mmcr1 = mmcr2 = mmcr3 = 0; + + /* + * Disable bhrb unless explicitly requested + * by setting MMCRA (BHRBRD) bit. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mmcra |= MMCRA_BHRB_DISABLE; /* Second pass: assign PMCs, set all MMCR1 fields */ for (i = 0; i < n_ev; ++i) { @@ -438,8 +468,17 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_THR_CTL_SHIFT; val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; mmcra |= val << MMCRA_THR_SEL_SHIFT; - val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; - mmcra |= thresh_cmp_val(val); + if (!cpu_has_feature(CPU_FTR_ARCH_31)) { + val = (event[i] >> EVENT_THR_CMP_SHIFT) & + EVENT_THR_CMP_MASK; + mmcra |= thresh_cmp_val(val); + } + } + + if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) { + val = (event[i] >> p10_L2L3_EVENT_SHIFT) & + p10_EVENT_L2L3_SEL_MASK; + mmcr2 |= val << p10_L2L3_SEL_SHIFT; } if (event[i] & EVENT_WANTS_BHRB) { @@ -447,6 +486,11 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcra |= val << MMCRA_IFM_SHIFT; } + /* set MMCRA (BHRBRD) to 0 if there is user request for BHRB */ + if (cpu_has_feature(CPU_FTR_ARCH_31) && + (has_branch_stack(pevents[i]) || (event[i] & EVENT_WANTS_BHRB))) + mmcra &= ~MMCRA_BHRB_DISABLE; + if (pevents[i]->attr.exclude_user) mmcr2 |= MMCR2_FCP(pmc); @@ -460,34 +504,43 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcr2 |= MMCR2_FCS(pmc); } + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (pmc <= 4) { + val = (event[i] >> p10_EVENT_MMCR3_SHIFT) & + p10_EVENT_MMCR3_MASK; + mmcr3 |= val << MMCR3_SHIFT(pmc); + } + } + hwc[i] = pmc - 1; } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; /* pmc_inuse is 1-based */ if (pmc_inuse & 2) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x7c) - mmcr[0] |= MMCR0_PMCjCE; + mmcr->mmcr0 |= MMCR0_PMCjCE; /* If we're not using PMC 5 or 6, freeze them */ if (!(pmc_inuse & 0x60)) - mmcr[0] |= MMCR0_FC56; + mmcr->mmcr0 |= MMCR0_FC56; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - mmcr[3] = mmcr2; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; + mmcr->mmcr2 = mmcr2; + mmcr->mmcr3 = mmcr3; return 0; } -void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); } static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size) diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 63fd4f3f6013..044de65e96b9 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -87,6 +87,31 @@ EVENT_LINUX_MASK | \ EVENT_PSEL_MASK)) +/* Contants to support power10 raw encoding format */ +#define p10_SDAR_MODE_SHIFT 22 +#define p10_SDAR_MODE_MASK 0x3ull +#define p10_SDAR_MODE(v) (((v) >> p10_SDAR_MODE_SHIFT) & \ + p10_SDAR_MODE_MASK) +#define p10_EVENT_L2L3_SEL_MASK 0x1f +#define p10_L2L3_SEL_SHIFT 3 +#define p10_L2L3_EVENT_SHIFT 40 +#define p10_EVENT_THRESH_MASK 0xffffull +#define p10_EVENT_CACHE_SEL_MASK 0x3ull +#define p10_EVENT_MMCR3_MASK 0x7fffull +#define p10_EVENT_MMCR3_SHIFT 45 + +#define p10_EVENT_VALID_MASK \ + ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \ + (p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ + (p10_EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ + (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \ + (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \ + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + EVENT_LINUX_MASK | \ + EVENT_PSEL_MASK)) /* * Layout of constraint bits: * @@ -135,6 +160,9 @@ #define CNST_CACHE_PMC4_VAL (1ull << 54) #define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL +#define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55) +#define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f) + /* * For NC we are counting up to 4 events. This requires three bits, and we need * the fifth event to overflow and set the 4th bit. To achieve that we bias the @@ -191,7 +219,7 @@ #define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\ MMCRA_THR_CTR_EXP_MASK) -/* MMCR1 Threshold Compare bit constant for power9 */ +/* MMCRA Threshold Compare bit constant for power9 */ #define p9_MMCRA_THR_CMP_SHIFT 45 /* Bits in MMCR2 for PowerISA v2.07 */ @@ -202,6 +230,9 @@ #define MAX_ALT 2 #define MAX_PMU_COUNTERS 6 +/* Bits in MMCR3 for PowerISA v3.10 */ +#define MMCR3_SHIFT(pmc) (49 - (15 * ((pmc) - 1))) + #define ISA207_SIER_TYPE_SHIFT 15 #define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT) @@ -217,9 +248,9 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp); int isa207_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]); -void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]); +void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr); int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, const unsigned int ev_alt[][MAX_ALT]); void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c index 4d5ef92511d1..1919e9df9165 100644 --- a/arch/powerpc/perf/mpc7450-pmu.c +++ b/arch/powerpc/perf/mpc7450-pmu.c @@ -257,7 +257,7 @@ static const u32 pmcsel_mask[N_COUNTER] = { * Compute MMCR0/1/2 values for a set of events. */ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], - unsigned long mmcr[], + struct mmcr_regs *mmcr, struct perf_event *pevents[]) { u8 event_index[N_CLASSES][N_COUNTER]; @@ -321,9 +321,16 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], mmcr0 |= MMCR0_PMCnCE; /* Return MMCRx values */ - mmcr[0] = mmcr0; - mmcr[1] = mmcr1; - mmcr[2] = mmcr2; + mmcr->mmcr0 = mmcr0; + mmcr->mmcr1 = mmcr1; + mmcr->mmcr2 = mmcr2; + /* + * 32-bit doesn't have an MMCRA and uses SPRN_MMCR2 to define + * SPRN_MMCRA. So assign mmcra of cpu_hw_events with `mmcr2` + * value to ensure that any write to this SPRN_MMCRA will + * use mmcr2 value. + */ + mmcr->mmcra = mmcr2; return 0; } @@ -331,12 +338,12 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], * Disable counting by a PMC. * Note that the pmc argument is 0-based here, not 1-based. */ -static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void mpc7450_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 1) - mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); + mmcr->mmcr0 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); else - mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); + mmcr->mmcr1 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); } static int mpc7450_generic_events[] = { @@ -354,7 +361,7 @@ static int mpc7450_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0x225 }, [C(OP_WRITE)] = { 0, 0x227 }, diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h new file mode 100644 index 000000000000..60c1b8111082 --- /dev/null +++ b/arch/powerpc/perf/power10-events-list.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Performance counter support for POWER10 processors. + * + * Copyright 2020 Madhavan Srinivasan, IBM Corporation. + * Copyright 2020 Athira Rajeev, IBM Corporation. + */ + +/* + * Power10 event codes. + */ +EVENT(PM_RUN_CYC, 0x600f4); +EVENT(PM_DISP_STALL_CYC, 0x100f8); +EVENT(PM_EXEC_STALL, 0x30008); +EVENT(PM_RUN_INST_CMPL, 0x500fa); +EVENT(PM_BR_CMPL, 0x4d05e); +EVENT(PM_BR_MPRED_CMPL, 0x400f6); + +/* All L1 D cache load references counted at finish, gated by reject */ +EVENT(PM_LD_REF_L1, 0x100fc); +/* Load Missed L1 */ +EVENT(PM_LD_MISS_L1, 0x3e054); +/* Store Missed L1 */ +EVENT(PM_ST_MISS_L1, 0x300f0); +/* L1 cache data prefetches */ +EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS, 0x1002c); +/* Demand iCache Miss */ +EVENT(PM_L1_ICACHE_MISS, 0x200fc); +/* Instruction fetches from L1 */ +EVENT(PM_INST_FROM_L1, 0x04080); +/* Instruction Demand sectors wriittent into IL1 */ +EVENT(PM_INST_FROM_L1MISS, 0x03f00000001c040); +/* Instruction prefetch written into IL1 */ +EVENT(PM_IC_PREF_REQ, 0x040a0); +/* The data cache was reloaded from local core's L3 due to a demand load */ +EVENT(PM_DATA_FROM_L3, 0x01340000001c040); +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +EVENT(PM_DATA_FROM_L3MISS, 0x300fe); +/* Data PTEG reload */ +EVENT(PM_DTLB_MISS, 0x300fc); +/* ITLB Reloaded */ +EVENT(PM_ITLB_MISS, 0x400fc); + +EVENT(PM_RUN_CYC_ALT, 0x0001e); +EVENT(PM_RUN_INST_CMPL_ALT, 0x00002); + +/* + * Memory Access Events + * + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * To enable capturing of memory profiling, these MMCRA bits + * needs to be programmed and corresponding raw event format + * encoding. + * + * MMCRA bits encoding needed are + * SM (Sampling Mode) + * EM (Eligibility for Random Sampling) + * TECE (Threshold Event Counter Event) + * TS (Threshold Start Event) + * TE (Threshold End Event) + * + * Corresponding Raw Encoding bits: + * sample [EM,SM] + * thresh_sel (TECE) + * thresh start (TS) + * thresh end (TE) + */ + +EVENT(MEM_LOADS, 0x34340401e0); +EVENT(MEM_STORES, 0x343c0401e0); diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c new file mode 100644 index 000000000000..f7cff7f36a1c --- /dev/null +++ b/arch/powerpc/perf/power10-pmu.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Performance counter support for POWER10 processors. + * + * Copyright 2020 Madhavan Srinivasan, IBM Corporation. + * Copyright 2020 Athira Rajeev, IBM Corporation. + */ + +#define pr_fmt(fmt) "power10-pmu: " fmt + +#include "isa207-common.h" +#include "internal.h" + +/* + * Raw event encoding for Power10: + * + * 60 56 52 48 44 40 36 32 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * | | [ ] [ src_match ] [ src_mask ] | [ ] [ l2l3_sel ] [ thresh_ctl ] + * | | | | | | + * | | *- IFM (Linux) | | thresh start/stop -* + * | *- BHRB (Linux) | src_sel + * *- EBB (Linux) *invert_bit + * + * 28 24 20 16 12 8 4 0 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] m [ pmcxsel ] + * | | | | | | + * | | | | | *- mark + * | | | *- L1/L2/L3 cache_sel | + * | | sdar_mode | + * | *- sampling mode for marked events *- combine + * | + * *- thresh_sel + * + * Below uses IBM bit numbering. + * + * MMCR1[x:y] = unit (PMCxUNIT) + * MMCR1[24] = pmc1combine[0] + * MMCR1[25] = pmc1combine[1] + * MMCR1[26] = pmc2combine[0] + * MMCR1[27] = pmc2combine[1] + * MMCR1[28] = pmc3combine[0] + * MMCR1[29] = pmc3combine[1] + * MMCR1[30] = pmc4combine[0] + * MMCR1[31] = pmc4combine[1] + * + * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011 + * MMCR1[20:27] = thresh_ctl + * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001 + * MMCR1[20:27] = thresh_ctl + * else + * MMCRA[48:55] = thresh_ctl (THRESH START/END) + * + * if thresh_sel: + * MMCRA[45:47] = thresh_sel + * + * if l2l3_sel: + * MMCR2[56:60] = l2l3_sel[0:4] + * + * MMCR1[16] = cache_sel[0] + * MMCR1[17] = cache_sel[1] + * + * if mark: + * MMCRA[63] = 1 (SAMPLE_ENABLE) + * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG) + * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE) + * + * if EBB and BHRB: + * MMCRA[32:33] = IFM + * + * MMCRA[SDAR_MODE] = sdar_mode[0:1] + */ + +/* + * Some power10 event codes. + */ +#define EVENT(_name, _code) enum{_name = _code} + +#include "power10-events-list.h" + +#undef EVENT + +/* MMCRA IFM bits - POWER10 */ +#define POWER10_MMCRA_IFM1 0x0000000040000000UL +#define POWER10_MMCRA_IFM2 0x0000000080000000UL +#define POWER10_MMCRA_IFM3 0x00000000C0000000UL +#define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL + +/* Table of alternatives, sorted by column 0 */ +static const unsigned int power10_event_alternatives[][MAX_ALT] = { + { PM_RUN_CYC_ALT, PM_RUN_CYC }, + { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, +}; + +static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[]) +{ + int num_alt = 0; + + num_alt = isa207_get_alternatives(event, alt, + ARRAY_SIZE(power10_event_alternatives), flags, + power10_event_alternatives); + + return num_alt; +} + +GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC); +GENERIC_EVENT_ATTR(instructions, PM_RUN_INST_CMPL); +GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL); +GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); +GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1); +GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS); +GENERIC_EVENT_ATTR(mem-stores, MEM_STORES); + +CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1); +CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); +CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_LD_PREFETCH_CACHE_LINE_MISS); +CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); +CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); +CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); +CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ); +CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); +CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); +CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); +CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); +CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); + +static struct attribute *power10_events_attr[] = { + GENERIC_EVENT_PTR(PM_RUN_CYC), + GENERIC_EVENT_PTR(PM_RUN_INST_CMPL), + GENERIC_EVENT_PTR(PM_BR_CMPL), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_MISS_L1), + GENERIC_EVENT_PTR(MEM_LOADS), + GENERIC_EVENT_PTR(MEM_STORES), + CACHE_EVENT_PTR(PM_LD_MISS_L1), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_REQ), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BR_CMPL), + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + +static struct attribute_group power10_pmu_events_group = { + .name = "events", + .attrs = power10_events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-59"); +PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); +PMU_FORMAT_ATTR(mark, "config:8"); +PMU_FORMAT_ATTR(combine, "config:10-11"); +PMU_FORMAT_ATTR(unit, "config:12-15"); +PMU_FORMAT_ATTR(pmc, "config:16-19"); +PMU_FORMAT_ATTR(cache_sel, "config:20-21"); +PMU_FORMAT_ATTR(sdar_mode, "config:22-23"); +PMU_FORMAT_ATTR(sample_mode, "config:24-28"); +PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); +PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); +PMU_FORMAT_ATTR(thresh_start, "config:36-39"); +PMU_FORMAT_ATTR(l2l3_sel, "config:40-44"); +PMU_FORMAT_ATTR(src_sel, "config:45-46"); +PMU_FORMAT_ATTR(invert_bit, "config:47"); +PMU_FORMAT_ATTR(src_mask, "config:48-53"); +PMU_FORMAT_ATTR(src_match, "config:54-59"); + +static struct attribute *power10_pmu_format_attr[] = { + &format_attr_event.attr, + &format_attr_pmcxsel.attr, + &format_attr_mark.attr, + &format_attr_combine.attr, + &format_attr_unit.attr, + &format_attr_pmc.attr, + &format_attr_cache_sel.attr, + &format_attr_sdar_mode.attr, + &format_attr_sample_mode.attr, + &format_attr_thresh_sel.attr, + &format_attr_thresh_stop.attr, + &format_attr_thresh_start.attr, + &format_attr_l2l3_sel.attr, + &format_attr_src_sel.attr, + &format_attr_invert_bit.attr, + &format_attr_src_mask.attr, + &format_attr_src_match.attr, + NULL, +}; + +static struct attribute_group power10_pmu_format_group = { + .name = "format", + .attrs = power10_pmu_format_attr, +}; + +static const struct attribute_group *power10_pmu_attr_groups[] = { + &power10_pmu_format_group, + &power10_pmu_events_group, + NULL, +}; + +static int power10_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = PM_RUN_CYC, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_RUN_INST_CMPL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1, +}; + +static u64 power10_bhrb_filter_map(u64 branch_sample_type) +{ + u64 pmu_bhrb_filter = 0; + + /* BHRB and regular PMU events share the same privilege state + * filter configuration. BHRB is always recorded along with a + * regular PMU event. As the privilege state filter is handled + * in the basic PMC configuration of the accompanying regular + * PMU event, we ignore any separate BHRB specific request. + */ + + /* No branch filter requested */ + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) + return pmu_bhrb_filter; + + /* Invalid branch filter options - HW does not support */ + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM2; + return pmu_bhrb_filter; + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM3; + return pmu_bhrb_filter; + } + + if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + pmu_bhrb_filter |= POWER10_MMCRA_IFM1; + return pmu_bhrb_filter; + } + + /* Every thing else is unsupported */ + return -1; +} + +static void power10_config_bhrb(u64 pmu_bhrb_filter) +{ + pmu_bhrb_filter &= POWER10_MMCRA_BHRB_MASK; + + /* Enable BHRB filter in PMU */ + mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); +} + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_LD_REF_L1, + [C(RESULT_MISS)] = PM_LD_MISS_L1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ST_MISS_L1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS, + [C(RESULT_MISS)] = 0, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1, + [C(RESULT_MISS)] = PM_L1_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = PM_IC_PREF_REQ, + [C(RESULT_MISS)] = 0, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_DATA_FROM_L3, + [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = 0, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0, + [C(RESULT_MISS)] = PM_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = PM_BR_CMPL, + [C(RESULT_MISS)] = PM_BR_MPRED_CMPL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + }, +}; + +#undef C + +static struct power_pmu power10_pmu = { + .name = "POWER10", + .n_counter = MAX_PMU_COUNTERS, + .add_fields = ISA207_ADD_FIELDS, + .test_adder = ISA207_TEST_ADDER, + .group_constraint_mask = CNST_CACHE_PMC4_MASK, + .group_constraint_val = CNST_CACHE_PMC4_VAL, + .compute_mmcr = isa207_compute_mmcr, + .config_bhrb = power10_config_bhrb, + .bhrb_filter_map = power10_bhrb_filter_map, + .get_constraint = isa207_get_constraint, + .get_alternatives = power10_get_alternatives, + .get_mem_data_src = isa207_get_mem_data_src, + .get_mem_weight = isa207_get_mem_weight, + .disable_pmc = isa207_disable_pmc, + .flags = PPMU_HAS_SIER | PPMU_ARCH_207S | + PPMU_ARCH_31, + .n_generic = ARRAY_SIZE(power10_generic_events), + .generic_events = power10_generic_events, + .cache_events = &power10_cache_events, + .attr_groups = power10_pmu_attr_groups, + .bhrb_nr = 32, +}; + +int init_power10_pmu(void) +{ + int rc; + + /* Comes from cpu_specs[] */ + if (!cur_cpu_spec->oprofile_cpu_type || + strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10")) + return -ENODEV; + + rc = register_power_pmu(&power10_pmu); + if (rc) + return rc; + + /* Tell userspace that EBB is supported */ + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; + + return 0; +} diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index f8574547fc6b..a62b2cd7914f 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -448,7 +448,8 @@ static int power5p_marked_instr_event(u64 event) } static int power5p_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = 0; @@ -586,20 +587,20 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power5p_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); } static int power5p_generic_events[] = { @@ -618,7 +619,7 @@ static int power5p_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index da52ecab7c9a..8732b587cf71 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -379,7 +379,8 @@ static int power5_marked_instr_event(u64 event) } static int power5_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -528,20 +529,20 @@ static int power5_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power5_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); } static int power5_generic_events[] = { @@ -560,7 +561,7 @@ static int power5_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 3929cacf72ed..0e318cf87129 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -171,7 +171,7 @@ static int power6_marked_instr_event(u64 event) * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -243,13 +243,13 @@ static int p6_compute_mmcr(u64 event[], int n_ev, if (pmc < 4) mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc); } - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0xe) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } @@ -457,11 +457,11 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return nalt; } -static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void p6_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { /* Set PMCxSEL to 0 to disable PMCx */ if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power6_generic_events[] = { @@ -481,7 +481,7 @@ static int power6_generic_events[] = { * are event codes. * The "DTLB" and "ITLB" events relate to the DERAT and IERAT. */ -static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x280030, 0x80080 }, [C(OP_WRITE)] = { 0x180032, 0x80088 }, diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index a137813a3076..5e0bf09cf077 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -242,7 +242,8 @@ static int power7_marked_instr_event(u64 event) } static int power7_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr1 = 0; unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; @@ -298,20 +299,20 @@ static int power7_compute_mmcr(u64 event[], int n_ev, } /* Return MMCRx values */ - mmcr[0] = 0; + mmcr->mmcr0 = 0; if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; + mmcr->mmcr0 = MMCR0_PMC1CE; if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 |= MMCR0_PMCjCE; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void power7_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); + mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power7_generic_events[] = { @@ -332,7 +333,7 @@ static int power7_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0xc880, 0x400f0 }, [C(OP_WRITE)] = { 0, 0x300f0 }, diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 3a5fcc20ff31..5282e8415ddf 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -253,7 +253,7 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 08c3ef796198..05dae38b969a 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -310,7 +310,7 @@ static void power9_config_bhrb(u64 pmu_bhrb_filter) * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [ C(L1D) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 4035d93d87ab..d35223fb112c 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -253,7 +253,8 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) } static int p970_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]) + unsigned int hwc[], struct mmcr_regs *mmcr, + struct perf_event *pevents[]) { unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; unsigned int pmc, unit, byte, psel; @@ -393,27 +394,26 @@ static int p970_compute_mmcr(u64 event[], int n_ev, mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ /* Return MMCRx values */ - mmcr[0] = mmcr0; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; + mmcr->mmcr0 = mmcr0; + mmcr->mmcr1 = mmcr1; + mmcr->mmcra = mmcra; return 0; } -static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +static void p970_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr) { - int shift, i; + int shift; + /* + * Setting the PMCxSEL field to 0x08 disables PMC x. + */ if (pmc <= 1) { shift = MMCR0_PMC1SEL_SH - 7 * pmc; - i = 0; + mmcr->mmcr0 = (mmcr->mmcr0 & ~(0x1fUL << shift)) | (0x08UL << shift); } else { shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); - i = 1; + mmcr->mmcr1 = (mmcr->mmcr1 & ~(0x1fUL << shift)) | (0x08UL << shift); } - /* - * Setting the PMCxSEL field to 0x08 disables PMC x. - */ - mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); } static int ppc970_generic_events[] = { @@ -432,7 +432,7 @@ static int ppc970_generic_events[] = { * 0 means not supported, -1 means nonsensical, other values * are event codes. */ -static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static u64 ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x8810, 0x3810 }, [C(OP_WRITE)] = { 0x7810, 0x813 }, |