26 files changed, 1650 insertions, 273 deletions
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 74d1e780748b..ead55351b254 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -35,7 +35,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
 		return 0;		/* must be 16-byte aligned */
 	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
 		return 0;
-	if (sp >= prev_sp + STACK_FRAME_OVERHEAD)
+	if (sp >= prev_sp + STACK_FRAME_MIN_SIZE)
 		return 1;
 	/*
 	 * sp could decrease when we jump off an interrupt stack
@@ -243,7 +243,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 	sp = regs->gpr[1];
 	perf_callchain_store(entry, next_ip);
 
-	for (;;) {
+	while (entry->nr < PERF_MAX_STACK_DEPTH) {
 		fp = (unsigned long __user *) sp;
 		if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
 			return;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index fe52db2eea6a..12b638425bb9 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -36,7 +36,12 @@ struct cpu_hw_events {
 	struct perf_event *event[MAX_HWEVENTS];
 	u64 events[MAX_HWEVENTS];
 	unsigned int flags[MAX_HWEVENTS];
-	unsigned long mmcr[3];
+	/*
+	 * The order of the MMCR array is:
+	 *  - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2
+	 *  - 32-bit, MMCR0, MMCR1, MMCR2
+	 */
+	unsigned long mmcr[4];
 	struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
 	u8  limited_hwidx[MAX_LIMITED_HWCOUNTERS];
 	u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
@@ -54,9 +59,9 @@ struct cpu_hw_events {
 	struct	perf_branch_entry	bhrb_entries[BHRB_MAX_ENTRIES];
 };
 
-DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
 
-struct power_pmu *ppmu;
+static struct power_pmu *ppmu;
 
 /*
  * Normally, to ignore kernel events we set the FCS (freeze counters
@@ -112,14 +117,14 @@ static bool is_ebb_event(struct perf_event *event) { return false; }
 static int ebb_event_check(struct perf_event *event) { return 0; }
 static void ebb_event_add(struct perf_event *event) { }
 static void ebb_switch_out(unsigned long mmcr0) { }
-static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)
+static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
 {
-	return mmcr0;
+	return cpuhw->mmcr[0];
 }
 
 static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
 static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
-void power_pmu_flush_branch_stack(void) {}
+static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
 static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
 static void pmao_restore_workaround(bool ebb) { }
 #endif /* CONFIG_PPC32 */
@@ -334,7 +339,7 @@ static void power_pmu_bhrb_reset(void)
 
 static void power_pmu_bhrb_enable(struct perf_event *event)
 {
-	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
 	if (!ppmu->bhrb_nr)
 		return;
@@ -345,17 +350,19 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
 		cpuhw->bhrb_context = event->ctx;
 	}
 	cpuhw->bhrb_users++;
+	perf_sched_cb_inc(event->ctx->pmu);
 }
 
 static void power_pmu_bhrb_disable(struct perf_event *event)
 {
-	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
 	if (!ppmu->bhrb_nr)
 		return;
 
 	cpuhw->bhrb_users--;
 	WARN_ON_ONCE(cpuhw->bhrb_users < 0);
+	perf_sched_cb_dec(event->ctx->pmu);
 
 	if (!cpuhw->disabled && !cpuhw->bhrb_users) {
 		/* BHRB cannot be turned off when other
@@ -370,9 +377,12 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
 /* Called from ctxsw to prevent one process's branch entries to
  * mingle with the other process's entries during context switch.
  */
-void power_pmu_flush_branch_stack(void)
+static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-	if (ppmu->bhrb_nr)
+	if (!ppmu->bhrb_nr)
+		return;
+
+	if (sched_in)
 		power_pmu_bhrb_reset();
 }
 /* Calculate the to address for a branch */
@@ -403,7 +413,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
 }
 
 /* Processing BHRB entries */
-void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
+static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 {
 	u64 val;
 	u64 addr;
@@ -542,8 +552,10 @@ static void ebb_switch_out(unsigned long mmcr0)
 	current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK;
 }
 
-static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)
+static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
 {
+	unsigned long mmcr0 = cpuhw->mmcr[0];
+
 	if (!ebb)
 		goto out;
 
@@ -568,7 +580,15 @@ static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0)
 	mtspr(SPRN_SIAR, current->thread.siar);
 	mtspr(SPRN_SIER, current->thread.sier);
 	mtspr(SPRN_SDAR, current->thread.sdar);
-	mtspr(SPRN_MMCR2, current->thread.mmcr2);
+
+	/*
+	 * Merge the kernel & user values of MMCR2. The semantics we implement
+	 * are that the user MMCR2 can set bits, ie. cause counters to freeze,
+	 * but not clear bits. If a task wants to be able to clear bits, ie.
+	 * unfreeze counters, it should not set exclude_xxx in its events and
+	 * instead manage the MMCR2 entirely by itself.
+	 */
+	mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2);
 out:
 	return mmcr0;
 }
@@ -915,6 +935,14 @@ static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
 	int i, n, first;
 	struct perf_event *event;
 
+	/*
+	 * If the PMU we're on supports per event exclude settings then we
+	 * don't need to do any of this logic. NB. This assumes no PMU has both
+	 * per event exclude and limited PMCs.
+	 */
+	if (ppmu->flags & PPMU_ARCH_207S)
+		return 0;
+
 	n = n_prev + n_new;
 	if (n <= 1)
 		return 0;
@@ -1121,7 +1149,7 @@ static void power_pmu_disable(struct pmu *pmu)
 	if (!ppmu)
 		return;
 	local_irq_save(flags);
-	cpuhw = &__get_cpu_var(cpu_hw_events);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
 
 	if (!cpuhw->disabled) {
 		/*
@@ -1188,7 +1216,7 @@ static void power_pmu_enable(struct pmu *pmu)
 		return;
 	local_irq_save(flags);
 
-	cpuhw = &__get_cpu_var(cpu_hw_events);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
 	if (!cpuhw->disabled)
 		goto out;
 
@@ -1219,28 +1247,31 @@ static void power_pmu_enable(struct pmu *pmu)
 	}
 
 	/*
-	 * Compute MMCR* values for the new set of events
+	 * Clear all MMCR settings and recompute them for the new set of events.
 	 */
+	memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
+
 	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
-			       cpuhw->mmcr)) {
+			       cpuhw->mmcr, cpuhw->event)) {
 		/* shouldn't ever get here */
 		printk(KERN_ERR "oops compute_mmcr failed\n");
 		goto out;
 	}
 
-	/*
-	 * Add in MMCR0 freeze bits corresponding to the
-	 * attr.exclude_* bits for the first event.
-	 * We have already checked that all events have the
-	 * same values for these bits as the first event.
-	 */
-	event = cpuhw->event[0];
-	if (event->attr.exclude_user)
-		cpuhw->mmcr[0] |= MMCR0_FCP;
-	if (event->attr.exclude_kernel)
-		cpuhw->mmcr[0] |= freeze_events_kernel;
-	if (event->attr.exclude_hv)
-		cpuhw->mmcr[0] |= MMCR0_FCHV;
+	if (!(ppmu->flags & PPMU_ARCH_207S)) {
+		/*
+		 * Add in MMCR0 freeze bits corresponding to the attr.exclude_*
+		 * bits for the first event. We have already checked that all
+		 * events have the same value for these bits as the first event.
+		 */
+		event = cpuhw->event[0];
+		if (event->attr.exclude_user)
+			cpuhw->mmcr[0] |= MMCR0_FCP;
+		if (event->attr.exclude_kernel)
+			cpuhw->mmcr[0] |= freeze_events_kernel;
+		if (event->attr.exclude_hv)
+			cpuhw->mmcr[0] |= MMCR0_FCHV;
+	}
 
 	/*
 	 * Write the new configuration to MMCR* with the freeze
@@ -1252,6 +1283,8 @@ static void power_pmu_enable(struct pmu *pmu)
 	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
 	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
 				| MMCR0_FC);
+	if (ppmu->flags & PPMU_ARCH_207S)
+		mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
 
 	/*
 	 * Read off any pre-existing events that need to move
@@ -1307,10 +1340,7 @@ static void power_pmu_enable(struct pmu *pmu)
  out_enable:
 	pmao_restore_workaround(ebb);
 
-	if (ppmu->flags & PPMU_ARCH_207S)
-		mtspr(SPRN_MMCR2, 0);
-
-	mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]);
+	mmcr0 = ebb_switch_in(ebb, cpuhw);
 
 	mb();
 	if (cpuhw->bhrb_users)
@@ -1378,7 +1408,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
 	 * Add the event to the list (if there is room)
 	 * and check whether the total set is still feasible.
 	 */
-	cpuhw = &__get_cpu_var(cpu_hw_events);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
 	n0 = cpuhw->n_events;
 	if (n0 >= ppmu->n_counter)
 		goto out;
@@ -1444,7 +1474,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
 
 	power_pmu_read(event);
 
-	cpuhw = &__get_cpu_var(cpu_hw_events);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
 	for (i = 0; i < cpuhw->n_events; ++i) {
 		if (event == cpuhw->event[i]) {
 			while (++i < cpuhw->n_events) {
@@ -1548,9 +1578,9 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-void power_pmu_start_txn(struct pmu *pmu)
+static void power_pmu_start_txn(struct pmu *pmu)
 {
-	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
 	perf_pmu_disable(pmu);
 	cpuhw->group_flag |= PERF_EVENT_TXN;
@@ -1562,9 +1592,9 @@ void power_pmu_start_txn(struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  */
-void power_pmu_cancel_txn(struct pmu *pmu)
+static void power_pmu_cancel_txn(struct pmu *pmu)
 {
-	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
 	perf_pmu_enable(pmu);
@@ -1575,14 +1605,14 @@ void power_pmu_cancel_txn(struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Return 0 if success
  */
-int power_pmu_commit_txn(struct pmu *pmu)
+static int power_pmu_commit_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	long i, n;
 
 	if (!ppmu)
 		return -EAGAIN;
-	cpuhw = &__get_cpu_var(cpu_hw_events);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
 	n = cpuhw->n_events;
 	if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
 		return -EAGAIN;
@@ -1807,8 +1837,10 @@ static int power_pmu_event_init(struct perf_event *event)
 		cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
 					event->attr.branch_sample_type);
 
-		if(cpuhw->bhrb_filter == -1)
+		if (cpuhw->bhrb_filter == -1) {
+			put_cpu_var(cpu_hw_events);
 			return -EOPNOTSUPP;
+		}
 	}
 
 	put_cpu_var(cpu_hw_events);
@@ -1863,7 +1895,7 @@ ssize_t power_events_sysfs_show(struct device *dev,
 	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
 }
 
-struct pmu power_pmu = {
+static struct pmu power_pmu = {
 	.pmu_enable	= power_pmu_enable,
 	.pmu_disable	= power_pmu_disable,
 	.event_init	= power_pmu_event_init,
@@ -1876,7 +1908,7 @@ struct pmu power_pmu = {
 	.cancel_txn	= power_pmu_cancel_txn,
 	.commit_txn	= power_pmu_commit_txn,
 	.event_idx	= power_pmu_event_idx,
-	.flush_branch_stack = power_pmu_flush_branch_stack,
+	.sched_task	= power_pmu_sched_task,
 };
 
 /*
@@ -1939,7 +1971,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 
 		if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
 			struct cpu_hw_events *cpuhw;
-			cpuhw = &__get_cpu_var(cpu_hw_events);
+			cpuhw = this_cpu_ptr(&cpu_hw_events);
 			power_pmu_bhrb_read(cpuhw);
 			data.br_stack = &cpuhw->bhrb_stack;
 		}
@@ -2012,7 +2044,7 @@ static bool pmc_overflow(unsigned long val)
 static void perf_event_interrupt(struct pt_regs *regs)
 {
 	int i, j;
-	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 	struct perf_event *event;
 	unsigned long val[8];
 	int found, active;
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index d35ae52c69dc..5d747b4cb8ee 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -210,7 +210,7 @@ static void fsl_emb_pmu_disable(struct pmu *pmu)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	cpuhw = &__get_cpu_var(cpu_hw_events);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
 
 	if (!cpuhw->disabled) {
 		cpuhw->disabled = 1;
@@ -249,7 +249,7 @@ static void fsl_emb_pmu_enable(struct pmu *pmu)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	cpuhw = &__get_cpu_var(cpu_hw_events);
+	cpuhw = this_cpu_ptr(&cpu_hw_events);
 	if (!cpuhw->disabled)
 		goto out;
 
@@ -330,9 +330,11 @@ static int fsl_emb_pmu_add(struct perf_event *event, int flags)
 	}
 	local64_set(&event->hw.prev_count, val);
 
-	if (!(flags & PERF_EF_START)) {
+	if (unlikely(!(flags & PERF_EF_START))) {
 		event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 		val = 0;
+	} else {
+		event->hw.state &= ~(PERF_HES_STOPPED | PERF_HES_UPTODATE);
 	}
 
 	write_pmc(i, val);
@@ -389,6 +391,7 @@ static void fsl_emb_pmu_del(struct perf_event *event, int flags)
 static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
 {
 	unsigned long flags;
+	unsigned long val;
 	s64 left;
 
 	if (event->hw.idx < 0 || !event->hw.sample_period)
@@ -405,7 +408,10 @@ static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
 
 	event->hw.state = 0;
 	left = local64_read(&event->hw.period_left);
-	write_pmc(event->hw.idx, left);
+	val = 0;
+	if (left < 0x80000000L)
+		val = 0x80000000L - left;
+	write_pmc(event->hw.idx, val);
 
 	perf_event_update_userpage(event);
 	perf_pmu_enable(event->pmu);
@@ -653,7 +659,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 static void perf_event_interrupt(struct pt_regs *regs)
 {
 	int i;
-	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
 	struct perf_event *event;
 	unsigned long val;
 	int found = 0;
diff --git a/arch/powerpc/perf/hv-24x7-catalog.h b/arch/powerpc/perf/hv-24x7-catalog.h
index 21b19dd86d9c..69e2e1faf902 100644
--- a/arch/powerpc/perf/hv-24x7-catalog.h
+++ b/arch/powerpc/perf/hv-24x7-catalog.h
@@ -30,4 +30,29 @@ struct hv_24x7_catalog_page_0 {
 	__u8 reserved6[2];
 } __packed;
 
+struct hv_24x7_event_data {
+	__be16 length; /* in bytes, must be a multiple of 16 */
+	__u8 reserved1[2];
+	__u8 domain; /* Chip = 1, Core = 2 */
+	__u8 reserved2[1];
+	__be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */
+	__be16 event_group_record_len; /* in bytes */
+
+	/* in bytes, offset from event_group_record */
+	__be16 event_counter_offs;
+
+	/* verified_state, unverified_state, caveat_state, broken_state, ... */
+	__be32 flags;
+
+	__be16 primary_group_ix;
+	__be16 group_count;
+	__be16 event_name_len;
+	__u8 remainder[];
+	/* __u8 event_name[event_name_len - 2]; */
+	/* __be16 event_description_len; */
+	/* __u8 event_desc[event_description_len - 2]; */
+	/* __be16 detailed_desc_len; */
+	/* __u8 detailed_desc[detailed_desc_len - 2]; */
+} __packed;
+
 #endif
diff --git a/arch/powerpc/perf/hv-24x7-domains.h b/arch/powerpc/perf/hv-24x7-domains.h
new file mode 100644
index 000000000000..49c1efd50045
--- /dev/null
+++ b/arch/powerpc/perf/hv-24x7-domains.h
@@ -0,0 +1,28 @@
+
+/*
+ * DOMAIN(name, num, index_kind, is_physical)
+ *
+ * @name:	An all caps token, suitable for use in generating an enum
+ *		member and appending to an event name in sysfs.
+ *
+ * @num:	The number corresponding to the domain as given in
+ *		documentation. We assume the catalog domain and the hcall
+ *		domain have the same numbering (so far they do), but this
+ *		may need to be changed in the future.
+ *
+ * @index_kind: A stringifiable token describing the meaning of the index
+ *		within the given domain. Must fit the parsing rules of the
+ *		perf sysfs api.
+ *
+ * @is_physical: True if the domain is physical, false otherwise (if virtual).
+ *
+ * Note: The terms PHYS_CHIP, PHYS_CORE, VCPU correspond to physical chip,
+ *	 physical core and virtual processor in 24x7 Counters specifications.
+ */
+
+DOMAIN(PHYS_CHIP, 0x01, chip, true)
+DOMAIN(PHYS_CORE, 0x02, core, true)
+DOMAIN(VCPU_HOME_CORE, 0x03, vcpu, false)
+DOMAIN(VCPU_HOME_CHIP, 0x04, vcpu, false)
+DOMAIN(VCPU_HOME_NODE, 0x05, vcpu, false)
+DOMAIN(VCPU_REMOTE_NODE, 0x06, vcpu, false)
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index e0766b82e165..ec2eb20631d1 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -13,16 +13,66 @@
 #define pr_fmt(fmt) "hv-24x7: " fmt
 
 #include <linux/perf_event.h>
+#include <linux/rbtree.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
+
 #include <asm/firmware.h>
 #include <asm/hvcall.h>
 #include <asm/io.h>
+#include <linux/byteorder/generic.h>
 
 #include "hv-24x7.h"
 #include "hv-24x7-catalog.h"
 #include "hv-common.h"
 
+static const char *event_domain_suffix(unsigned domain)
+{
+	switch (domain) {
+#define DOMAIN(n, v, x, c)		\
+	case HV_PERF_DOMAIN_##n:	\
+		return "__" #n;
+#include "hv-24x7-domains.h"
+#undef DOMAIN
+	default:
+		WARN(1, "unknown domain %d\n", domain);
+		return "__UNKNOWN_DOMAIN_SUFFIX";
+	}
+}
+
+static bool domain_is_valid(unsigned domain)
+{
+	switch (domain) {
+#define DOMAIN(n, v, x, c)		\
+	case HV_PERF_DOMAIN_##n:	\
+		/* fall through */
+#include "hv-24x7-domains.h"
+#undef DOMAIN
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_physical_domain(unsigned domain)
+{
+	switch (domain) {
+#define DOMAIN(n, v, x, c)		\
+	case HV_PERF_DOMAIN_##n:	\
+		return c;
+#include "hv-24x7-domains.h"
+#undef DOMAIN
+	default:
+		return false;
+	}
+}
+
+static bool catalog_entry_domain_is_valid(unsigned domain)
+{
+	return is_physical_domain(domain);
+}
+
 /*
  * TODO: Merging events:
  * - Think of the hcall as an interface to a 4d array of counters:
@@ -44,13 +94,14 @@
 
 /*
  * Example usage:
- *  perf stat -e 'hv_24x7/domain=2,offset=8,starting_index=0,lpar=0xffffffff/'
+ *  perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/'
  */
 
 /* u3 0-6, one of HV_24X7_PERF_DOMAIN */
 EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
 /* u16 */
-EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 16, 31);
+EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
+EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
 /* u32, see "data_offset" */
 EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
 /* u16 */
@@ -63,7 +114,8 @@ EVENT_DEFINE_RANGE(reserved3, config2,  0, 63);
 static struct attribute *format_attrs[] = {
 	&format_attr_domain.attr,
 	&format_attr_offset.attr,
-	&format_attr_starting_index.attr,
+	&format_attr_core.attr,
+	&format_attr_vcpu.attr,
 	&format_attr_lpar.attr,
 	NULL,
 };
@@ -73,86 +125,124 @@ static struct attribute_group format_group = {
 	.attrs = format_attrs,
 };
 
+static struct attribute_group event_group = {
+	.name = "events",
+	/* .attrs is set in init */
+};
+
+static struct attribute_group event_desc_group = {
+	.name = "event_descs",
+	/* .attrs is set in init */
+};
+
+static struct attribute_group event_long_desc_group = {
+	.name = "event_long_descs",
+	/* .attrs is set in init */
+};
+
 static struct kmem_cache *hv_page_cache;
 
 /*
- * read_offset_data - copy data from one buffer to another while treating the
- *                    source buffer as a small view on the total avaliable
- *                    source data.
- *
- * @dest: buffer to copy into
- * @dest_len: length of @dest in bytes
- * @requested_offset: the offset within the source data we want. Must be > 0
- * @src: buffer to copy data from
- * @src_len: length of @src in bytes
- * @source_offset: the offset in the sorce data that (src,src_len) refers to.
- *                 Must be > 0
- *
- * returns the number of bytes copied.
- *
- * The following ascii art shows the various buffer possitioning we need to
- * handle, assigns some arbitrary varibles to points on the buffer, and then
- * shows how we fiddle with those values to get things we care about (copy
- * start in src and copy len)
- *
- * s = @src buffer
- * d = @dest buffer
- * '.' areas in d are written to.
- *
- *                       u
- *   x         w	 v  z
- * d           |.........|
- * s |----------------------|
- *
- *                      u
- *   x         w	z     v
- * d           |........------|
- * s |------------------|
- *
- *   x         w        u,z,v
- * d           |........|
- * s |------------------|
- *
- *   x,w                u,v,z
- * d |..................|
- * s |------------------|
- *
- *   x        u
- *   w        v		z
- * d |........|
- * s |------------------|
- *
- *   x      z   w      v
- * d            |------|
- * s |------|
- *
- * x = source_offset
- * w = requested_offset
- * z = source_offset + src_len
- * v = requested_offset + dest_len
+ * request_buffer and result_buffer are not required to be 4k aligned,
+ * but are not allowed to cross any 4k boundary. Aligning them to 4k is
+ * the simplest way to ensure that.
+ */
+#define H24x7_DATA_BUFFER_SIZE	4096
+DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+
+static char *event_name(struct hv_24x7_event_data *ev, int *len)
+{
+	*len = be16_to_cpu(ev->event_name_len) - 2;
+	return (char *)ev->remainder;
+}
+
+static char *event_desc(struct hv_24x7_event_data *ev, int *len)
+{
+	unsigned nl = be16_to_cpu(ev->event_name_len);
+	__be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
+
+	*len = be16_to_cpu(*desc_len) - 2;
+	return (char *)ev->remainder + nl;
+}
+
+static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
+{
+	unsigned nl = be16_to_cpu(ev->event_name_len);
+	__be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
+	unsigned desc_len = be16_to_cpu(*desc_len_);
+	__be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
+
+	*len = be16_to_cpu(*long_desc_len) - 2;
+	return (char *)ev->remainder + nl + desc_len;
+}
+
+static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev,
+					  void *end)
+{
+	void *start = ev;
+
+	return (start + offsetof(struct hv_24x7_event_data, remainder)) < end;
+}
+
+/*
+ * Things we don't check:
+ *  - padding for desc, name, and long/detailed desc is required to be '\0'
+ *    bytes.
  *
- * w_offset_in_s = w - x = requested_offset - source_offset
- * z_offset_in_s = z - x = src_len
- * v_offset_in_s = v - x = request_offset + dest_len - src_len
+ *  Return NULL if we pass end,
+ *  Otherwise return the address of the byte just following the event.
  */
-static ssize_t read_offset_data(void *dest, size_t dest_len,
-				loff_t requested_offset, void *src,
-				size_t src_len, loff_t source_offset)
+static void *event_end(struct hv_24x7_event_data *ev, void *end)
 {
-	size_t w_offset_in_s = requested_offset - source_offset;
-	size_t z_offset_in_s = src_len;
-	size_t v_offset_in_s = requested_offset + dest_len - src_len;
-	size_t u_offset_in_s = min(z_offset_in_s, v_offset_in_s);
-	size_t copy_len = u_offset_in_s - w_offset_in_s;
+	void *start = ev;
+	__be16 *dl_, *ldl_;
+	unsigned dl, ldl;
+	unsigned nl = be16_to_cpu(ev->event_name_len);
+
+	if (nl < 2) {
+		pr_debug("%s: name length too short: %d", __func__, nl);
+		return NULL;
+	}
 
-	if (requested_offset < 0 || source_offset < 0)
-		return -EINVAL;
+	if (start + nl > end) {
+		pr_debug("%s: start=%p + nl=%u > end=%p",
+				__func__, start, nl, end);
+		return NULL;
+	}
 
-	if (z_offset_in_s <= w_offset_in_s)
-		return 0;
+	dl_ = (__be16 *)(ev->remainder + nl - 2);
+	if (!IS_ALIGNED((uintptr_t)dl_, 2))
+		pr_warn("desc len not aligned %p", dl_);
+	dl = be16_to_cpu(*dl_);
+	if (dl < 2) {
+		pr_debug("%s: desc len too short: %d", __func__, dl);
+		return NULL;
+	}
 
-	memcpy(dest, src + w_offset_in_s, copy_len);
-	return copy_len;
+	if (start + nl + dl > end) {
+		pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p",
+				__func__, start, nl, dl, start + nl + dl, end);
+		return NULL;
+	}
+
+	ldl_ = (__be16 *)(ev->remainder + nl + dl - 2);
+	if (!IS_ALIGNED((uintptr_t)ldl_, 2))
+		pr_warn("long desc len not aligned %p", ldl_);
+	ldl = be16_to_cpu(*ldl_);
+	if (ldl < 2) {
+		pr_debug("%s: long desc len too short (ldl=%u)",
+				__func__, ldl);
+		return NULL;
+	}
+
+	if (start + nl + dl + ldl > end) {
+		pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p",
+				__func__, start, nl, dl, ldl, end);
+		return NULL;
+	}
+
+	return start + nl + dl + ldl;
 }
 
 static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
@@ -160,14 +250,12 @@ static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
 					      unsigned long index)
 {
 	pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
-			phys_4096,
-			version,
-			index);
+			phys_4096, version, index);
+
 	WARN_ON(!IS_ALIGNED(phys_4096, 4096));
+
 	return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
-			phys_4096,
-			version,
-			index);
+			phys_4096, version, index);
 }
 
 static unsigned long h_get_24x7_catalog_page(char page[],
@@ -177,17 +265,642 @@ static unsigned long h_get_24x7_catalog_page(char page[],
 					version, index);
 }
 
+static unsigned core_domains[] = {
+	HV_PERF_DOMAIN_PHYS_CORE,
+	HV_PERF_DOMAIN_VCPU_HOME_CORE,
+	HV_PERF_DOMAIN_VCPU_HOME_CHIP,
+	HV_PERF_DOMAIN_VCPU_HOME_NODE,
+	HV_PERF_DOMAIN_VCPU_REMOTE_NODE,
+};
+/* chip event data always yeilds a single event, core yeilds multiple */
+#define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains)
+
+static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
+{
+	const char *sindex;
+	const char *lpar;
+
+	if (is_physical_domain(domain)) {
+		lpar = "0x0";
+		sindex = "core";
+	} else {
+		lpar = "?";
+		sindex = "vcpu";
+	}
+
+	return kasprintf(GFP_KERNEL,
+			"domain=0x%x,offset=0x%x,%s=?,lpar=%s",
+			domain,
+			be16_to_cpu(event->event_counter_offs) +
+				be16_to_cpu(event->event_group_record_offs),
+			sindex,
+			lpar);
+}
+
+/* Avoid trusting fw to NUL terminate strings */
+static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp)
+{
+	return kasprintf(gfp, "%.*s", max_len, maybe_str);
+}
+
+static ssize_t device_show_string(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *d;
+
+	d = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sprintf(buf, "%s\n", (char *)d->var);
+}
+
+static struct attribute *device_str_attr_create_(char *name, char *str)
+{
+	struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+
+	if (!attr)
+		return NULL;
+
+	attr->var = str;
+	attr->attr.attr.name = name;
+	attr->attr.attr.mode = 0444;
+	attr->attr.show = device_show_string;
+
+	return &attr->attr.attr;
+}
+
+static struct attribute *device_str_attr_create(char *name, int name_max,
+						int name_nonce,
+						char *str, size_t str_max)
+{
+	char *n;
+	char *s = memdup_to_str(str, str_max, GFP_KERNEL);
+	struct attribute *a;
+
+	if (!s)
+		return NULL;
+
+	if (!name_nonce)
+		n = kasprintf(GFP_KERNEL, "%.*s", name_max, name);
+	else
+		n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name,
+					name_nonce);
+	if (!n)
+		goto out_s;
+
+	a = device_str_attr_create_(n, s);
+	if (!a)
+		goto out_n;
+
+	return a;
+out_n:
+	kfree(n);
+out_s:
+	kfree(s);
+	return NULL;
+}
+
+static void device_str_attr_destroy(struct attribute *attr)
+{
+	struct dev_ext_attribute *d;
+
+	d = container_of(attr, struct dev_ext_attribute, attr.attr);
+	kfree(d->var);
+	kfree(d->attr.attr.name);
+	kfree(d);
+}
+
+static struct attribute *event_to_attr(unsigned ix,
+				       struct hv_24x7_event_data *event,
+				       unsigned domain,
+				       int nonce)
+{
+	int event_name_len;
+	char *ev_name, *a_ev_name, *val;
+	const char *ev_suffix;
+	struct attribute *attr;
+
+	if (!domain_is_valid(domain)) {
+		pr_warn("catalog event %u has invalid domain %u\n",
+				ix, domain);
+		return NULL;
+	}
+
+	val = event_fmt(event, domain);
+	if (!val)
+		return NULL;
+
+	ev_suffix = event_domain_suffix(domain);
+	ev_name = event_name(event, &event_name_len);
+	if (!nonce)
+		a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s",
+				(int)event_name_len, ev_name, ev_suffix);
+	else
+		a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d",
+				(int)event_name_len, ev_name, ev_suffix, nonce);
+
+	if (!a_ev_name)
+		goto out_val;
+
+	attr = device_str_attr_create_(a_ev_name, val);
+	if (!attr)
+		goto out_name;
+
+	return attr;
+out_name:
+	kfree(a_ev_name);
+out_val:
+	kfree(val);
+	return NULL;
+}
+
+static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
+				int nonce)
+{
+	int nl, dl;
+	char *name = event_name(event, &nl);
+	char *desc = event_desc(event, &dl);
+
+	/* If there isn't a description, don't create the sysfs file */
+	if (!dl)
+		return NULL;
+
+	return device_str_attr_create(name, nl, nonce, desc, dl);
+}
+
+static struct attribute *
+event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
+{
+	int nl, dl;
+	char *name = event_name(event, &nl);
+	char *desc = event_long_desc(event, &dl);
+
+	/* If there isn't a description, don't create the sysfs file */
+	if (!dl)
+		return NULL;
+
+	return device_str_attr_create(name, nl, nonce, desc, dl);
+}
+
+static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs,
+		struct hv_24x7_event_data *event, int nonce)
+{
+	unsigned i;
+
+	switch (event->domain) {
+	case HV_PERF_DOMAIN_PHYS_CHIP:
+		*attrs = event_to_attr(ix, event, event->domain, nonce);
+		return 1;
+	case HV_PERF_DOMAIN_PHYS_CORE:
+		for (i = 0; i < ARRAY_SIZE(core_domains); i++) {
+			attrs[i] = event_to_attr(ix, event, core_domains[i],
+						nonce);
+			if (!attrs[i]) {
+				pr_warn("catalog event %u: individual attr %u "
+					"creation failure\n", ix, i);
+				for (; i; i--)
+					device_str_attr_destroy(attrs[i - 1]);
+				return -1;
+			}
+		}
+		return i;
+	default:
+		pr_warn("catalog event %u: domain %u is not allowed in the "
+				"catalog\n", ix, event->domain);
+		return -1;
+	}
+}
+
+static size_t event_to_attr_ct(struct hv_24x7_event_data *event)
+{
+	switch (event->domain) {
+	case HV_PERF_DOMAIN_PHYS_CHIP:
+		return 1;
+	case HV_PERF_DOMAIN_PHYS_CORE:
+		return ARRAY_SIZE(core_domains);
+	default:
+		return 0;
+	}
+}
+
+static unsigned long vmalloc_to_phys(void *v)
+{
+	struct page *p = vmalloc_to_page(v);
+
+	BUG_ON(!p);
+	return page_to_phys(p) + offset_in_page(v);
+}
+
+/* */
+struct event_uniq {
+	struct rb_node node;
+	const char *name;
+	int nl;
+	unsigned ct;
+	unsigned domain;
+};
+
+static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
+{
+	if (s1 < s2)
+		return 1;
+	if (s2 > s1)
+		return -1;
+
+	return memcmp(d1, d2, s1);
+}
+
+static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
+					size_t s2, unsigned d2)
+{
+	int r = memord(v1, s1, v2, s2);
+
+	if (r)
+		return r;
+	if (d1 > d2)
+		return 1;
+	if (d2 > d1)
+		return -1;
+	return 0;
+}
+
+static int event_uniq_add(struct rb_root *root, const char *name, int nl,
+				unsigned domain)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct event_uniq *data;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct event_uniq *it;
+		int result;
+
+		it = container_of(*new, struct event_uniq, node);
+		result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
+					it->domain);
+
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else {
+			it->ct++;
+			pr_info("found a duplicate event %.*s, ct=%u\n", nl,
+						name, it->ct);
+			return it->ct;
+		}
+	}
+
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	*data = (struct event_uniq) {
+		.name = name,
+		.nl = nl,
+		.ct = 0,
+		.domain = domain,
+	};
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+
+	/* data->ct */
+	return 0;
+}
+
+static void event_uniq_destroy(struct rb_root *root)
+{
+	/*
+	 * the strings we point to are in the giant block of memory filled by
+	 * the catalog, and are freed separately.
+	 */
+	struct event_uniq *pos, *n;
+
+	rbtree_postorder_for_each_entry_safe(pos, n, root, node)
+		kfree(pos);
+}
+
+
+/*
+ * ensure the event structure's sizes are self consistent and don't cause us to
+ * read outside of the event
+ *
+ * On success, return the event length in bytes.
+ * Otherwise, return -1 (and print as appropriate).
+ */
+static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
+					  size_t event_idx,
+					  size_t event_data_bytes,
+					  size_t event_entry_count,
+					  size_t offset, void *end)
+{
+	ssize_t ev_len;
+	void *ev_end, *calc_ev_end;
+
+	if (offset >= event_data_bytes)
+		return -1;
+
+	if (event_idx >= event_entry_count) {
+		pr_devel("catalog event data has %zu bytes of padding after last event\n",
+				event_data_bytes - offset);
+		return -1;
+	}
+
+	if (!event_fixed_portion_is_within(event, end)) {
+		pr_warn("event %zu fixed portion is not within range\n",
+				event_idx);
+		return -1;
+	}
+
+	ev_len = be16_to_cpu(event->length);
+
+	if (ev_len % 16)
+		pr_info("event %zu has length %zu not divisible by 16: event=%pK\n",
+				event_idx, ev_len, event);
+
+	ev_end = (__u8 *)event + ev_len;
+	if (ev_end > end) {
+		pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n",
+				event_idx, ev_len, ev_end, end,
+				offset);
+		return -1;
+	}
+
+	calc_ev_end = event_end(event, end);
+	if (!calc_ev_end) {
+		pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n",
+			event_idx, event_data_bytes, event, end,
+			offset);
+		return -1;
+	}
+
+	if (calc_ev_end > ev_end) {
+		pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
+			event_idx, event, ev_end, offset, calc_ev_end);
+		return -1;
+	}
+
+	return ev_len;
+}
+
+#define MAX_4K (SIZE_MAX / 4096)
+
+static int create_events_from_catalog(struct attribute ***events_,
+		struct attribute ***event_descs_,
+		struct attribute ***event_long_descs_)
+{
+	unsigned long hret;
+	size_t catalog_len, catalog_page_len, event_entry_count,
+	       event_data_len, event_data_offs,
+	       event_data_bytes, junk_events, event_idx, event_attr_ct, i,
+	       attr_max, event_idx_last, desc_ct, long_desc_ct;
+	ssize_t ct, ev_len;
+	uint32_t catalog_version_num;
+	struct attribute **events, **event_descs, **event_long_descs;
+	struct hv_24x7_catalog_page_0 *page_0 =
+		kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
+	void *page = page_0;
+	void *event_data, *end;
+	struct hv_24x7_event_data *event;
+	struct rb_root ev_uniq = RB_ROOT;
+	int ret = 0;
+
+	if (!page) {
+		ret = -ENOMEM;
+		goto e_out;
+	}
+
+	hret = h_get_24x7_catalog_page(page, 0, 0);
+	if (hret) {
+		ret = -EIO;
+		goto e_free;
+	}
+
+	catalog_version_num = be64_to_cpu(page_0->version);
+	catalog_page_len = be32_to_cpu(page_0->length);
+
+	if (MAX_4K < catalog_page_len) {
+		pr_err("invalid page count: %zu\n", catalog_page_len);
+		ret = -EIO;
+		goto e_free;
+	}
+
+	catalog_len = catalog_page_len * 4096;
+
+	event_entry_count = be16_to_cpu(page_0->event_entry_count);
+	event_data_offs   = be16_to_cpu(page_0->event_data_offs);
+	event_data_len    = be16_to_cpu(page_0->event_data_len);
+
+	pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n",
+			(size_t)catalog_version_num, catalog_len,
+			event_entry_count, event_data_offs, event_data_len);
+
+	if ((MAX_4K < event_data_len)
+			|| (MAX_4K < event_data_offs)
+			|| (MAX_4K - event_data_offs < event_data_len)) {
+		pr_err("invalid event data offs %zu and/or len %zu\n",
+				event_data_offs, event_data_len);
+		ret = -EIO;
+		goto e_free;
+	}
+
+	if ((event_data_offs + event_data_len) > catalog_page_len) {
+		pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n",
+				event_data_offs,
+				event_data_offs + event_data_len,
+				catalog_page_len);
+		ret = -EIO;
+		goto e_free;
+	}
+
+	if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) {
+		pr_err("event_entry_count %zu is invalid\n",
+				event_entry_count);
+		ret = -EIO;
+		goto e_free;
+	}
+
+	event_data_bytes = event_data_len * 4096;
+
+	/*
+	 * event data can span several pages, events can cross between these
+	 * pages. Use vmalloc to make this easier.
+	 */
+	event_data = vmalloc(event_data_bytes);
+	if (!event_data) {
+		pr_err("could not allocate event data\n");
+		ret = -ENOMEM;
+		goto e_free;
+	}
+
+	end = event_data + event_data_bytes;
+
+	/*
+	 * using vmalloc_to_phys() like this only works if PAGE_SIZE is
+	 * divisible by 4096
+	 */
+	BUILD_BUG_ON(PAGE_SIZE % 4096);
+
+	for (i = 0; i < event_data_len; i++) {
+		hret = h_get_24x7_catalog_page_(
+				vmalloc_to_phys(event_data + i * 4096),
+				catalog_version_num,
+				i + event_data_offs);
+		if (hret) {
+			pr_err("failed to get event data in page %zu\n",
+					i + event_data_offs);
+			ret = -EIO;
+			goto e_event_data;
+		}
+	}
+
+	/*
+	 * scan the catalog to determine the number of attributes we need, and
+	 * verify it at the same time.
+	 */
+	for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0;
+	     ;
+	     event_idx++, event = (void *)event + ev_len) {
+		size_t offset = (void *)event - (void *)event_data;
+		char *name;
+		int nl;
+
+		ev_len = catalog_event_len_validate(event, event_idx,
+						    event_data_bytes,
+						    event_entry_count,
+						    offset, end);
+		if (ev_len < 0)
+			break;
+
+		name = event_name(event, &nl);
+
+		if (event->event_group_record_len == 0) {
+			pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
+					event_idx, nl, name);
+			junk_events++;
+			continue;
+		}
+
+		if (!catalog_entry_domain_is_valid(event->domain)) {
+			pr_info("event %zu (%.*s) has invalid domain %d\n",
+					event_idx, nl, name, event->domain);
+			junk_events++;
+			continue;
+		}
+
+		attr_max += event_to_attr_ct(event);
+	}
+
+	event_idx_last = event_idx;
+	if (event_idx_last != event_entry_count)
+		pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n",
+				event_idx_last, event_entry_count, junk_events);
+
+	events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
+	if (!events) {
+		ret = -ENOMEM;
+		goto e_event_data;
+	}
+
+	event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
+				GFP_KERNEL);
+	if (!event_descs) {
+		ret = -ENOMEM;
+		goto e_event_attrs;
+	}
+
+	event_long_descs = kmalloc_array(event_idx + 1,
+			sizeof(*event_long_descs), GFP_KERNEL);
+	if (!event_long_descs) {
+		ret = -ENOMEM;
+		goto e_event_descs;
+	}
+
+	/* Iterate over the catalog filling in the attribute vector */
+	for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
+				event = event_data, event_idx = 0;
+			event_idx < event_idx_last;
+			event_idx++, ev_len = be16_to_cpu(event->length),
+				event = (void *)event + ev_len) {
+		char *name;
+		int nl;
+		int nonce;
+		/*
+		 * these are the only "bad" events that are intermixed and that
+		 * we can ignore without issue. make sure to skip them here
+		 */
+		if (event->event_group_record_len == 0)
+			continue;
+		if (!catalog_entry_domain_is_valid(event->domain))
+			continue;
+
+		name  = event_name(event, &nl);
+		nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
+		ct    = event_data_to_attrs(event_idx, events + event_attr_ct,
+					    event, nonce);
+		if (ct <= 0) {
+			pr_warn("event %zu (%.*s) creation failure, skipping\n",
+				event_idx, nl, name);
+			junk_events++;
+		} else {
+			event_attr_ct += ct;
+			event_descs[desc_ct] = event_to_desc_attr(event, nonce);
+			if (event_descs[desc_ct])
+				desc_ct++;
+			event_long_descs[long_desc_ct] =
+					event_to_long_desc_attr(event, nonce);
+			if (event_long_descs[long_desc_ct])
+				long_desc_ct++;
+		}
+	}
+
+	pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n",
+			event_idx, event_attr_ct, junk_events, desc_ct);
+
+	events[event_attr_ct] = NULL;
+	event_descs[desc_ct] = NULL;
+	event_long_descs[long_desc_ct] = NULL;
+
+	event_uniq_destroy(&ev_uniq);
+	vfree(event_data);
+	kmem_cache_free(hv_page_cache, page);
+
+	*events_ = events;
+	*event_descs_ = event_descs;
+	*event_long_descs_ = event_long_descs;
+	return 0;
+
+e_event_descs:
+	kfree(event_descs);
+e_event_attrs:
+	kfree(events);
+e_event_data:
+	vfree(event_data);
+e_free:
+	kmem_cache_free(hv_page_cache, page);
+e_out:
+	*events_ = NULL;
+	*event_descs_ = NULL;
+	*event_long_descs_ = NULL;
+	return ret;
+}
+
 static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
 			    struct bin_attribute *bin_attr, char *buf,
 			    loff_t offset, size_t count)
 {
 	unsigned long hret;
 	ssize_t ret = 0;
-	size_t catalog_len = 0, catalog_page_len = 0, page_count = 0;
+	size_t catalog_len = 0, catalog_page_len = 0;
 	loff_t page_offset = 0;
+	loff_t offset_in_page;
+	size_t copy_len;
 	uint64_t catalog_version_num = 0;
 	void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
 	struct hv_24x7_catalog_page_0 *page_0 = page;
+
 	if (!page)
 		return -ENOMEM;
 
@@ -202,7 +915,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
 	catalog_len = catalog_page_len * 4096;
 
 	page_offset = offset / 4096;
-	page_count  = count  / 4096;
+	offset_in_page = offset % 4096;
 
 	if (page_offset >= catalog_page_len)
 		goto e_free;
@@ -216,18 +929,23 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
 		}
 	}
 
-	ret = read_offset_data(buf, count, offset,
-				page, 4096, page_offset * 4096);
+	copy_len = 4096 - offset_in_page;
+	if (copy_len > count)
+		copy_len = count;
+
+	memcpy(buf, page+offset_in_page, copy_len);
+	ret = copy_len;
+
 e_free:
 	if (hret)
 		pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
 		       " rc=%ld\n",
 		       catalog_version_num, page_offset, hret);
-	kfree(page);
+	kmem_cache_free(hv_page_cache, page);
 
-	pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n",
-			offset, page_offset, count, page_count, catalog_len,
-			catalog_page_len, ret);
+	pr_devel("catalog_read: offset=%lld(%lld) count=%zu "
+			"catalog_len=%zu(%zu) => %zd\n", offset, page_offset,
+			count, catalog_len, catalog_page_len, ret);
 
 	return ret;
 }
@@ -250,7 +968,7 @@ static ssize_t _name##_show(struct device *dev,			\
 	}							\
 	ret = sprintf(buf, _fmt, _expr);			\
 e_free:								\
-	kfree(page);						\
+	kmem_cache_free(hv_page_cache, page);			\
 	return ret;						\
 }								\
 static DEVICE_ATTR_RO(_name)
@@ -280,82 +998,141 @@ static struct attribute_group if_group = {
 
 static const struct attribute_group *attr_groups[] = {
 	&format_group,
+	&event_group,
+	&event_desc_group,
+	&event_long_desc_group,
 	&if_group,
 	NULL,
 };
 
-static bool is_physical_domain(int domain)
+static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
+			struct hv_24x7_data_result_buffer *result_buffer,
+			unsigned long ret)
 {
-	return  domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP ||
-		domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
+	struct hv_24x7_request *req;
+
+	req = &request_buffer->requests[0];
+	pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => "
+			"ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
+			req->performance_domain, req->data_offset,
+			req->starting_ix, req->starting_lpar_ix, ret, ret,
+			result_buffer->detailed_rc,
+			result_buffer->failing_request_ix);
 }
 
-static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
-					 u16 lpar, u64 *res,
-					 bool success_expected)
+/*
+ * Start the process for a new H_GET_24x7_DATA hcall.
+ */
+static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
+			struct hv_24x7_data_result_buffer *result_buffer)
+{
+
+	memset(request_buffer, 0, 4096);
+	memset(result_buffer, 0, 4096);
+
+	request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
+	/* memset above set request_buffer->num_requests to 0 */
+}
+
+/*
+ * Commit (i.e perform) the H_GET_24x7_DATA hcall using the data collected
+ * by 'init_24x7_request()' and 'add_event_to_24x7_request()'.
+ */
+static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
+			struct hv_24x7_data_result_buffer *result_buffer)
 {
 	unsigned long ret;
 
 	/*
-	 * request_buffer and result_buffer are not required to be 4k aligned,
-	 * but are not allowed to cross any 4k boundary. Aligning them to 4k is
-	 * the simplest way to ensure that.
+	 * NOTE: Due to variable number of array elements in request and
+	 *	 result buffer(s), sizeof() is not reliable. Use the actual
+	 *	 allocated buffer size, H24x7_DATA_BUFFER_SIZE.
 	 */
-	struct reqb {
-		struct hv_24x7_request_buffer buf;
-		struct hv_24x7_request req;
-	} __packed __aligned(4096) request_buffer = {
-		.buf = {
-			.interface_version = HV_24X7_IF_VERSION_CURRENT,
-			.num_requests = 1,
-		},
-		.req = {
-			.performance_domain = domain,
-			.data_size = cpu_to_be16(8),
-			.data_offset = cpu_to_be32(offset),
-			.starting_lpar_ix = cpu_to_be16(lpar),
-			.max_num_lpars = cpu_to_be16(1),
-			.starting_ix = cpu_to_be16(ix),
-			.max_ix = cpu_to_be16(1),
-		}
-	};
+	ret = plpar_hcall_norets(H_GET_24X7_DATA,
+			virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
+			virt_to_phys(result_buffer),  H24x7_DATA_BUFFER_SIZE);
 
-	struct resb {
-		struct hv_24x7_data_result_buffer buf;
-		struct hv_24x7_result res;
-		struct hv_24x7_result_element elem;
-		__be64 result;
-	} __packed __aligned(4096) result_buffer = {};
+	if (ret)
+		log_24x7_hcall(request_buffer, result_buffer, ret);
 
-	ret = plpar_hcall_norets(H_GET_24X7_DATA,
-			virt_to_phys(&request_buffer), sizeof(request_buffer),
-			virt_to_phys(&result_buffer),  sizeof(result_buffer));
+	return ret;
+}
 
-	if (ret) {
-		if (success_expected)
-			pr_err_ratelimited("hcall failed: %d %#x %#x %d => 0x%lx (%ld) detail=0x%x failing ix=%x\n",
-					domain, offset, ix, lpar,
-					ret, ret,
-					result_buffer.buf.detailed_rc,
-					result_buffer.buf.failing_request_ix);
-		return ret;
+/*
+ * Add the given @event to the next slot in the 24x7 request_buffer.
+ *
+ * Note that H_GET_24X7_DATA hcall allows reading several counters'
+ * values in a single HCALL. We expect the caller to add events to the
+ * request buffer one by one, make the HCALL and process the results.
+ */
+static int add_event_to_24x7_request(struct perf_event *event,
+				struct hv_24x7_request_buffer *request_buffer)
+{
+	u16 idx;
+	int i;
+	struct hv_24x7_request *req;
+
+	if (request_buffer->num_requests > 254) {
+		pr_devel("Too many requests for 24x7 HCALL %d\n",
+				request_buffer->num_requests);
+		return -EINVAL;
 	}
 
-	*res = be64_to_cpu(result_buffer.result);
-	return ret;
+	if (is_physical_domain(event_get_domain(event)))
+		idx = event_get_core(event);
+	else
+		idx = event_get_vcpu(event);
+
+	i = request_buffer->num_requests++;
+	req = &request_buffer->requests[i];
+
+	req->performance_domain = event_get_domain(event);
+	req->data_size = cpu_to_be16(8);
+	req->data_offset = cpu_to_be32(event_get_offset(event));
+	req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)),
+	req->max_num_lpars = cpu_to_be16(1);
+	req->starting_ix = cpu_to_be16(idx);
+	req->max_ix = cpu_to_be16(1);
+
+	return 0;
 }
 
-static unsigned long event_24x7_request(struct perf_event *event, u64 *res,
-		bool success_expected)
+static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
 {
-	return single_24x7_request(event_get_domain(event),
-				event_get_offset(event),
-				event_get_starting_index(event),
-				event_get_lpar(event),
-				res,
-				success_expected);
+	unsigned long ret;
+	struct hv_24x7_request_buffer *request_buffer;
+	struct hv_24x7_data_result_buffer *result_buffer;
+	struct hv_24x7_result *resb;
+
+	BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
+	BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
+
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+	init_24x7_request(request_buffer, result_buffer);
+
+	ret = add_event_to_24x7_request(event, request_buffer);
+	if (ret)
+		goto out;
+
+	ret = make_24x7_request(request_buffer, result_buffer);
+	if (ret) {
+		log_24x7_hcall(request_buffer, result_buffer, ret);
+		goto out;
+	}
+
+	/* process result from hcall */
+	resb = &result_buffer->results[0];
+	*count = be64_to_cpu(resb->elements[0].element_data[0]);
+
+out:
+	put_cpu_var(hv_24x7_reqb);
+	put_cpu_var(hv_24x7_resb);
+	return ret;
 }
 
+
 static int h_24x7_event_init(struct perf_event *event)
 {
 	struct hv_perf_caps caps;
@@ -387,8 +1164,7 @@ static int h_24x7_event_init(struct perf_event *event)
 	    event->attr.exclude_hv     ||
 	    event->attr.exclude_idle   ||
 	    event->attr.exclude_host   ||
-	    event->attr.exclude_guest  ||
-	    is_sampling_event(event)) /* no sampling */
+	    event->attr.exclude_guest)
 		return -EINVAL;
 
 	/* no branch sampling */
@@ -414,17 +1190,17 @@ static int h_24x7_event_init(struct perf_event *event)
 		return -EIO;
 	}
 
-	/* PHYSICAL domains & other lpars require extra capabilities */
+	/* Physical domains & other lpars require extra capabilities */
 	if (!caps.collect_privileged && (is_physical_domain(domain) ||
 		(event_get_lpar(event) != event_get_lpar_max()))) {
-		pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n",
+		pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n",
 				is_physical_domain(domain),
 				event_get_lpar(event));
 		return -EACCES;
 	}
 
 	/* see if the event complains */
-	if (event_24x7_request(event, &ct, false)) {
+	if (single_24x7_request(event, &ct)) {
 		pr_devel("test hcall failed\n");
 		return -EIO;
 	}
@@ -436,7 +1212,7 @@ static u64 h_24x7_get_value(struct perf_event *event)
 {
 	unsigned long ret;
 	u64 ct;
-	ret = event_24x7_request(event, &ct, true);
+	ret = single_24x7_request(event, &ct);
 	if (ret)
 		/* We checked this in event init, shouldn't fail here... */
 		return 0;
@@ -444,15 +1220,22 @@ static u64 h_24x7_get_value(struct perf_event *event)
 	return ct;
 }
 
-static void h_24x7_event_update(struct perf_event *event)
+static void update_event_count(struct perf_event *event, u64 now)
 {
 	s64 prev;
-	u64 now;
-	now = h_24x7_get_value(event);
+
 	prev = local64_xchg(&event->hw.prev_count, now);
 	local64_add(now - prev, &event->count);
 }
 
+static void h_24x7_event_read(struct perf_event *event)
+{
+	u64 now;
+
+	now = h_24x7_get_value(event);
+	update_event_count(event, now);
+}
+
 static void h_24x7_event_start(struct perf_event *event, int flags)
 {
 	if (flags & PERF_EF_RELOAD)
@@ -461,7 +1244,7 @@ static void h_24x7_event_start(struct perf_event *event, int flags)
 
 static void h_24x7_event_stop(struct perf_event *event, int flags)
 {
-	h_24x7_event_update(event);
+	h_24x7_event_read(event);
 }
 
 static int h_24x7_event_add(struct perf_event *event, int flags)
@@ -472,11 +1255,6 @@ static int h_24x7_event_add(struct perf_event *event, int flags)
 	return 0;
 }
 
-static int h_24x7_event_idx(struct perf_event *event)
-{
-	return 0;
-}
-
 static struct pmu h_24x7_pmu = {
 	.task_ctx_nr = perf_invalid_context,
 
@@ -487,8 +1265,7 @@ static struct pmu h_24x7_pmu = {
 	.del         = h_24x7_event_stop,
 	.start       = h_24x7_event_start,
 	.stop        = h_24x7_event_stop,
-	.read        = h_24x7_event_update,
-	.event_idx   = h_24x7_event_idx,
+	.read        = h_24x7_event_read,
 };
 
 static int hv_24x7_init(void)
@@ -513,6 +1290,16 @@ static int hv_24x7_init(void)
 	if (!hv_page_cache)
 		return -ENOMEM;
 
+	/* sampling not supported */
+	h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
+	r = create_events_from_catalog(&event_group.attrs,
+				   &event_desc_group.attrs,
+				   &event_long_desc_group.attrs);
+
+	if (r)
+		return r;
+
 	r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
 	if (r)
 		return r;
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
index 720ebce4b435..0f9fa21a29f2 100644
--- a/arch/powerpc/perf/hv-24x7.h
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -3,14 +3,14 @@
 
 #include <linux/types.h>
 
+enum hv_perf_domains {
+#define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v,
+#include "hv-24x7-domains.h"
+#undef DOMAIN
+};
+
 struct hv_24x7_request {
 	/* PHYSICAL domains require enabling via phyp/hmc. */
-#define HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP 0x01
-#define HV_24X7_PERF_DOMAIN_PHYSICAL_CORE 0x02
-#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CORE   0x03
-#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CHIP   0x04
-#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_NODE   0x05
-#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_REMOTE_NODE 0x06
 	__u8 performance_domain;
 	__u8 reserved[0x1];
 
@@ -50,7 +50,7 @@ struct hv_24x7_request_buffer {
 	__u8 interface_version;
 	__u8 num_requests;
 	__u8 reserved[0xE];
-	struct hv_24x7_request requests[];
+	struct hv_24x7_request requests[1];
 } __packed;
 
 struct hv_24x7_result_element {
@@ -66,7 +66,7 @@ struct hv_24x7_result_element {
 	__be32 lpar_cfg_instance_id;
 
 	/* size = @result_element_data_size of cointaining result. */
-	__u8 element_data[];
+	__u64 element_data[1];
 } __packed;
 
 struct hv_24x7_result {
@@ -87,7 +87,7 @@ struct hv_24x7_result {
 	/* WARNING: only valid for first result element due to variable sizes
 	 *          of result elements */
 	/* struct hv_24x7_result_element[@num_elements_returned] */
-	struct hv_24x7_result_element elements[];
+	struct hv_24x7_result_element elements[1];
 } __packed;
 
 struct hv_24x7_data_result_buffer {
@@ -103,7 +103,7 @@ struct hv_24x7_data_result_buffer {
 	__u8 reserved2[0x8];
 	/* WARNING: only valid for the first result due to variable sizes of
 	 *	    results */
-	struct hv_24x7_result results[]; /* [@num_results] */
+	struct hv_24x7_result results[1]; /* [@num_results] */
 } __packed;
 
 #endif
diff --git a/arch/powerpc/perf/hv-common.c b/arch/powerpc/perf/hv-common.c
index 47e02b366f58..7dce8f109967 100644
--- a/arch/powerpc/perf/hv-common.c
+++ b/arch/powerpc/perf/hv-common.c
@@ -9,13 +9,13 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps)
 	unsigned long r;
 	struct p {
 		struct hv_get_perf_counter_info_params params;
-		struct cv_system_performance_capabilities caps;
+		struct hv_gpci_system_performance_capabilities caps;
 	} __packed __aligned(sizeof(uint64_t));
 
 	struct p arg = {
 		.params = {
 			.counter_request = cpu_to_be32(
-					CIR_SYSTEM_PERFORMANCE_CAPABILITIES),
+				HV_GPCI_system_performance_capabilities),
 			.starting_index = cpu_to_be32(-1),
 			.counter_info_version_in = 0,
 		}
@@ -31,9 +31,9 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps)
 
 	caps->version = arg.params.counter_info_version_out;
 	caps->collect_privileged = !!arg.caps.perf_collect_privileged;
-	caps->ga = !!(arg.caps.capability_mask & CV_CM_GA);
-	caps->expanded = !!(arg.caps.capability_mask & CV_CM_EXPANDED);
-	caps->lab = !!(arg.caps.capability_mask & CV_CM_LAB);
+	caps->ga = !!(arg.caps.capability_mask & HV_GPCI_CM_GA);
+	caps->expanded = !!(arg.caps.capability_mask & HV_GPCI_CM_EXPANDED);
+	caps->lab = !!(arg.caps.capability_mask & HV_GPCI_CM_LAB);
 
 	return r;
 }
diff --git a/arch/powerpc/perf/hv-common.h b/arch/powerpc/perf/hv-common.h
index 5d79cecbd73d..349aaba4d2d1 100644
--- a/arch/powerpc/perf/hv-common.h
+++ b/arch/powerpc/perf/hv-common.h
@@ -20,6 +20,16 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps);
 PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end);		\
 EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end)
 
+/*
+ * The EVENT_DEFINE_RANGE_FORMAT() macro above includes helper functions
+ * for the fields (eg: event_get_starting_index()). For some fields we
+ * need the bit-range definition, but no the helper functions. Define a
+ * lite version of the above macro without the helpers and silence
+ * compiler warnings unused static functions.
+ */
+#define EVENT_DEFINE_RANGE_FORMAT_LITE(name, attr_var, bit_start, bit_end) \
+PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end);
+
 #define EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end)	\
 static u64 event_get_##name##_max(void)					\
 {									\
diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h
new file mode 100644
index 000000000000..acd17648cd18
--- /dev/null
+++ b/arch/powerpc/perf/hv-gpci-requests.h
@@ -0,0 +1,261 @@
+
+#include "req-gen/_begin.h"
+
+/*
+ * Based on the document "getPerfCountInfo v1.07"
+ */
+
+/*
+ * #define REQUEST_NAME counter_request_name
+ * #define REQUEST_NUM r_num
+ * #define REQUEST_IDX_KIND starting_index_kind
+ * #include I(REQUEST_BEGIN)
+ * REQUEST(
+ *	__field(...)
+ *	__field(...)
+ *	__array(...)
+ *	__count(...)
+ * )
+ * #include I(REQUEST_END)
+ *
+ * - starting_index_kind is one of the following, depending on the event:
+ *
+ *   hw_chip_id: hardware chip id or -1 for current hw chip
+ *   partition_id
+ *   sibling_part_id,
+ *   phys_processor_idx:
+ *   0xffffffffffffffff: or -1, which means it is irrelavant for the event
+ *
+ * __count(offset, bytes, name):
+ *	a counter that should be exposed via perf
+ * __field(offset, bytes, name)
+ *	a normal field
+ * __array(offset, bytes, name)
+ *	an array of bytes
+ *
+ *
+ *	@bytes for __count, and __field _must_ be a numeral token
+ *	in decimal, not an expression and not in hex.
+ *
+ *
+ * TODO:
+ *	- expose secondary index (if any counter ever uses it, only 0xA0
+ *	  appears to use it right now, and it doesn't have any counters)
+ *	- embed versioning info
+ *	- include counter descriptions
+ */
+#define REQUEST_NAME dispatch_timebase_by_processor
+#define REQUEST_NUM 0x10
+#define REQUEST_IDX_KIND "phys_processor_idx=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__count(0,	8,	processor_time_in_timebase_cycles)
+	__field(0x8,	4,	hw_processor_id)
+	__field(0xC,	2,	owning_part_id)
+	__field(0xE,	1,	processor_state)
+	__field(0xF,	1,	version)
+	__field(0x10,	4,	hw_chip_id)
+	__field(0x14,	4,	phys_module_id)
+	__field(0x18,	4,	primary_affinity_domain_idx)
+	__field(0x1C,	4,	secondary_affinity_domain_idx)
+	__field(0x20,	4,	processor_version)
+	__field(0x24,	2,	logical_processor_idx)
+	__field(0x26,	2,	reserved)
+	__field(0x28,	4,	processor_id_register)
+	__field(0x2C,	4,	phys_processor_idx)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME entitled_capped_uncapped_donated_idle_timebase_by_partition
+#define REQUEST_NUM 0x20
+#define REQUEST_IDX_KIND "sibling_part_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	8,	partition_id)
+	__count(0x8,	8,	entitled_cycles)
+	__count(0x10,	8,	consumed_capped_cycles)
+	__count(0x18,	8,	consumed_uncapped_cycles)
+	__count(0x20,	8,	cycles_donated)
+	__count(0x28,	8,	purr_idle_cycles)
+)
+#include I(REQUEST_END)
+
+/*
+ * Not available for counter_info_version >= 0x8, use
+ * run_instruction_cycles_by_partition(0x100) instead.
+ */
+#define REQUEST_NAME run_instructions_run_cycles_by_partition
+#define REQUEST_NUM 0x30
+#define REQUEST_IDX_KIND "sibling_part_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	8,	partition_id)
+	__count(0x8,	8,	instructions_completed)
+	__count(0x10,	8,	cycles)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME system_performance_capabilities
+#define REQUEST_NUM 0x40
+#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	1,	perf_collect_privileged)
+	__field(0x1,	1,	capability_mask)
+	__array(0x2,	0xE,	reserved)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_abc_links
+#define REQUEST_NUM 0x50
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	4,	hw_chip_id)
+	__array(0x4,	0xC,	reserved1)
+	__count(0x10,	8,	total_link_cycles)
+	__count(0x18,	8,	idle_cycles_for_a_link)
+	__count(0x20,	8,	idle_cycles_for_b_link)
+	__count(0x28,	8,	idle_cycles_for_c_link)
+	__array(0x30,	0x20,	reserved2)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_wxyz_links
+#define REQUEST_NUM 0x60
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	4,	hw_chip_id)
+	__array(0x4,	0xC,	reserved1)
+	__count(0x10,	8,	total_link_cycles)
+	__count(0x18,	8,	idle_cycles_for_w_link)
+	__count(0x20,	8,	idle_cycles_for_x_link)
+	__count(0x28,	8,	idle_cycles_for_y_link)
+	__count(0x30,	8,	idle_cycles_for_z_link)
+	__array(0x38,	0x28,	reserved2)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_gx_links
+#define REQUEST_NUM 0x70
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	4,	hw_chip_id)
+	__array(0x4,	0xC,	reserved1)
+	__count(0x10,	8,	gx0_in_address_cycles)
+	__count(0x18,	8,	gx0_in_data_cycles)
+	__count(0x20,	8,	gx0_in_retries)
+	__count(0x28,	8,	gx0_in_bus_cycles)
+	__count(0x30,	8,	gx0_in_cycles_total)
+	__count(0x38,	8,	gx0_out_address_cycles)
+	__count(0x40,	8,	gx0_out_data_cycles)
+	__count(0x48,	8,	gx0_out_retries)
+	__count(0x50,	8,	gx0_out_bus_cycles)
+	__count(0x58,	8,	gx0_out_cycles_total)
+	__count(0x60,	8,	gx1_in_address_cycles)
+	__count(0x68,	8,	gx1_in_data_cycles)
+	__count(0x70,	8,	gx1_in_retries)
+	__count(0x78,	8,	gx1_in_bus_cycles)
+	__count(0x80,	8,	gx1_in_cycles_total)
+	__count(0x88,	8,	gx1_out_address_cycles)
+	__count(0x90,	8,	gx1_out_data_cycles)
+	__count(0x98,	8,	gx1_out_retries)
+	__count(0xA0,	8,	gx1_out_bus_cycles)
+	__count(0xA8,	8,	gx1_out_cycles_total)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_mc_links
+#define REQUEST_NUM 0x80
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	4,	hw_chip_id)
+	__array(0x4,	0xC,	reserved1)
+	__count(0x10,	8,	mc0_frames)
+	__count(0x18,	8,	mc0_reads)
+	__count(0x20,	8,	mc0_write)
+	__count(0x28,	8,	mc0_total_cycles)
+	__count(0x30,	8,	mc1_frames)
+	__count(0x38,	8,	mc1_reads)
+	__count(0x40,	8,	mc1_writes)
+	__count(0x48,	8,	mc1_total_cycles)
+)
+#include I(REQUEST_END)
+
+/* Processor_config (0x90) skipped, no counters */
+/* Current_processor_frequency (0x91) skipped, no counters */
+
+#define REQUEST_NAME processor_core_utilization
+#define REQUEST_NUM 0x94
+#define REQUEST_IDX_KIND "phys_processor_idx=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	4,	phys_processor_idx)
+	__field(0x4,	4,	hw_processor_id)
+	__count(0x8,	8,	cycles_across_any_thread)
+	__count(0x10,	8,	timebase_at_collection)
+	__count(0x18,	8,	purr_cycles)
+	__count(0x20,	8,	sum_of_cycles_across_all_threads)
+	__count(0x28,	8,	instructions_completed)
+)
+#include I(REQUEST_END)
+
+/* Processor_core_power_mode (0x95) skipped, no counters */
+/* Affinity_domain_information_by_virtual_processor (0xA0) skipped,
+ *	no counters */
+/* Affinity_domain_information_by_domain (0xB0) skipped, no counters */
+/* Affinity_domain_information_by_partition (0xB1) skipped, no counters */
+/* Physical_memory_info (0xC0) skipped, no counters */
+/* Processor_bus_topology (0xD0) skipped, no counters */
+
+#define REQUEST_NAME partition_hypervisor_queuing_times
+#define REQUEST_NUM 0xE0
+#define REQUEST_IDX_KIND "partition_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	2, partition_id)
+	__array(0x2,	6, reserved1)
+	__count(0x8,	8, time_waiting_for_entitlement)
+	__count(0x10,	8, times_waited_for_entitlement)
+	__count(0x18,	8, time_waiting_for_phys_processor)
+	__count(0x20,	8, times_waited_for_phys_processor)
+	__count(0x28,	8, dispatches_on_home_core)
+	__count(0x30,	8, dispatches_on_home_primary_affinity_domain)
+	__count(0x38,	8, dispatches_on_home_secondary_affinity_domain)
+	__count(0x40,	8, dispatches_off_home_secondary_affinity_domain)
+	__count(0x48,	8, dispatches_on_dedicated_processor_donating_cycles)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME system_hypervisor_times
+#define REQUEST_NUM 0xF0
+#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#include I(REQUEST_BEGIN)
+REQUEST(__count(0,	8,	time_spent_to_dispatch_virtual_processors)
+	__count(0x8,	8,	time_spent_processing_virtual_processor_timers)
+	__count(0x10,	8,	time_spent_managing_partitions_over_entitlement)
+	__count(0x18,	8,	time_spent_on_system_management)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME system_tlbie_count_and_time
+#define REQUEST_NUM 0xF4
+#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff"
+#include I(REQUEST_BEGIN)
+REQUEST(__count(0,	8,	tlbie_instructions_issued)
+	/*
+	 * FIXME: The spec says the offset here is 0x10, which I suspect
+	 *	  is wrong.
+	 */
+	__count(0x8,	8,	time_spent_issuing_tlbies)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME partition_instruction_count_and_time
+#define REQUEST_NUM 0x100
+#define REQUEST_IDX_KIND "partition_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0,	2,	partition_id)
+	__array(0x2,	0x6,	reserved1)
+	__count(0x8,	8,	instructions_performed)
+	__count(0x10,	8,	time_collected)
+)
+#include I(REQUEST_END)
+
+/* set_mmcrh (0x80001000) skipped, no counters */
+/* retrieve_hpmcx (0x80002000) skipped, no counters */
+
+#include "req-gen/_end.h"
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index c9d399a2df82..856fe6e03c2a 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -31,7 +31,18 @@
 /* u32 */
 EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31);
 /* u32 */
+/*
+ * Note that starting_index, phys_processor_idx, sibling_part_id,
+ * hw_chip_id, partition_id all refer to the same bit range. They
+ * are basically aliases for the starting_index. The specific alias
+ * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h
+ */
 EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63);
+
 /* u16 */
 EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15);
 /* u8 */
@@ -44,6 +55,10 @@ EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
 static struct attribute *format_attrs[] = {
 	&format_attr_request.attr,
 	&format_attr_starting_index.attr,
+	&format_attr_phys_processor_idx.attr,
+	&format_attr_sibling_part_id.attr,
+	&format_attr_hw_chip_id.attr,
+	&format_attr_partition_id.attr,
 	&format_attr_secondary_index.attr,
 	&format_attr_counter_info_version.attr,
 
@@ -57,6 +72,11 @@ static struct attribute_group format_group = {
 	.attrs = format_attrs,
 };
 
+static struct attribute_group event_group = {
+	.name  = "events",
+	.attrs = hv_gpci_event_attrs,
+};
+
 #define HV_CAPS_ATTR(_name, _format)				\
 static ssize_t _name##_show(struct device *dev,			\
 			    struct device_attribute *attr,	\
@@ -102,6 +122,7 @@ static struct attribute_group interface_group = {
 
 static const struct attribute_group *attr_groups[] = {
 	&format_group,
+	&event_group,
 	&interface_group,
 	NULL,
 };
@@ -210,8 +231,7 @@ static int h_gpci_event_init(struct perf_event *event)
 	    event->attr.exclude_hv     ||
 	    event->attr.exclude_idle   ||
 	    event->attr.exclude_host   ||
-	    event->attr.exclude_guest  ||
-	    is_sampling_event(event)) /* no sampling */
+	    event->attr.exclude_guest)
 		return -EINVAL;
 
 	/* no branch sampling */
@@ -247,11 +267,6 @@ static int h_gpci_event_init(struct perf_event *event)
 	return 0;
 }
 
-static int h_gpci_event_idx(struct perf_event *event)
-{
-	return 0;
-}
-
 static struct pmu h_gpci_pmu = {
 	.task_ctx_nr = perf_invalid_context,
 
@@ -263,7 +278,6 @@ static struct pmu h_gpci_pmu = {
 	.start       = h_gpci_event_start,
 	.stop        = h_gpci_event_stop,
 	.read        = h_gpci_event_update,
-	.event_idx   = h_gpci_event_idx,
 };
 
 static int hv_gpci_init(void)
@@ -272,6 +286,8 @@ static int hv_gpci_init(void)
 	unsigned long hret;
 	struct hv_perf_caps caps;
 
+	hv_gpci_assert_offsets_correct();
+
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
 		pr_debug("not a virtualized system, not enabling\n");
 		return -ENODEV;
@@ -284,6 +300,9 @@ static int hv_gpci_init(void)
 		return -ENODEV;
 	}
 
+	/* sampling not supported */
+	h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
 	r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
 	if (r)
 		return r;
diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h
index b25f460c9cce..86ede8275961 100644
--- a/arch/powerpc/perf/hv-gpci.h
+++ b/arch/powerpc/perf/hv-gpci.h
@@ -42,32 +42,19 @@ struct hv_get_perf_counter_info_params {
  */
 #define COUNTER_INFO_VERSION_CURRENT 0x8
 
-/*
- * These determine the counter_value[] layout and the meaning of starting_index
- * and secondary_index.
- *
- * Unless otherwise noted, @secondary_index is unused and ignored.
- */
-enum counter_info_requests {
-
-	/* GENERAL */
-
-	/* @starting_index: must be -1 (to refer to the current partition)
-	 */
-	CIR_SYSTEM_PERFORMANCE_CAPABILITIES = 0X40,
+/* capability mask masks. */
+enum {
+	HV_GPCI_CM_GA = (1 << 7),
+	HV_GPCI_CM_EXPANDED = (1 << 6),
+	HV_GPCI_CM_LAB = (1 << 5)
 };
 
-struct cv_system_performance_capabilities {
-	/* If != 0, allowed to collect data from other partitions */
-	__u8 perf_collect_privileged;
-
-	/* These following are only valid if counter_info_version >= 0x3 */
-#define CV_CM_GA       (1 << 7)
-#define CV_CM_EXPANDED (1 << 6)
-#define CV_CM_LAB      (1 << 5)
-	/* remaining bits are reserved */
-	__u8 capability_mask;
-	__u8 reserved[0xE];
-} __packed;
+#define REQUEST_FILE "../hv-gpci-requests.h"
+#define NAME_LOWER hv_gpci
+#define NAME_UPPER HV_GPCI
+#include "req-gen/perf.h"
+#undef REQUEST_FILE
+#undef NAME_LOWER
+#undef NAME_UPPER
 
 #endif
diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c
index fe21b515ca44..d115c5635bf3 100644
--- a/arch/powerpc/perf/mpc7450-pmu.c
+++ b/arch/powerpc/perf/mpc7450-pmu.c
@@ -260,8 +260,9 @@ static const u32 pmcsel_mask[N_COUNTER] = {
 /*
  * Compute MMCR0/1/2 values for a set of events.
  */
-static int mpc7450_compute_mmcr(u64 event[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[])
+static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
+				unsigned long mmcr[],
+				struct perf_event *pevents[])
 {
 	u8 event_index[N_CLASSES][N_COUNTER];
 	int n_classevent[N_CLASSES];
diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c
index 9103a1de864d..ce6072fa481b 100644
--- a/arch/powerpc/perf/power4-pmu.c
+++ b/arch/powerpc/perf/power4-pmu.c
@@ -356,7 +356,7 @@ static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 }
 
 static int p4_compute_mmcr(u64 event[], int n_ev,
-			   unsigned int hwc[], unsigned long mmcr[])
+			   unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
 	unsigned int pmc, unit, byte, psel, lower;
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index b03b6dc0172d..0526dac66007 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -452,7 +452,7 @@ static int power5p_marked_instr_event(u64 event)
 }
 
 static int power5p_compute_mmcr(u64 event[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[])
+				unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = 0;
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index 1e8ce423c3af..4dc99f9f7962 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -383,7 +383,7 @@ static int power5_marked_instr_event(u64 event)
 }
 
 static int power5_compute_mmcr(u64 event[], int n_ev,
-			       unsigned int hwc[], unsigned long mmcr[])
+			       unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 31128e086fed..9c9d646b68a1 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -175,7 +175,7 @@ static int power6_marked_instr_event(u64 event)
  * Assign PMC numbers and compute MMCR1 value for a set of events
  */
 static int p6_compute_mmcr(u64 event[], int n_ev,
-			   unsigned int hwc[], unsigned long mmcr[])
+			   unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 56c67bca2f75..5b62f2389290 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -245,7 +245,7 @@ static int power7_marked_instr_event(u64 event)
 }
 
 static int power7_compute_mmcr(u64 event[], int n_ev,
-			       unsigned int hwc[], unsigned long mmcr[])
+			       unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 639cd9156585..396351db601b 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/perf_event.h>
 #include <asm/firmware.h>
+#include <asm/cputable.h>
 
 
 /*
@@ -266,6 +267,11 @@
 #define MMCRA_SDAR_MODE_TLB		(1ull << 42)
 #define MMCRA_IFM_SHIFT			30
 
+/* Bits in MMCR2 for POWER8 */
+#define MMCR2_FCS(pmc)			(1ull << (63 - (((pmc) - 1) * 9)))
+#define MMCR2_FCP(pmc)			(1ull << (62 - (((pmc) - 1) * 9)))
+#define MMCR2_FCH(pmc)			(1ull << (57 - (((pmc) - 1) * 9)))
+
 
 static inline bool event_is_fab_match(u64 event)
 {
@@ -393,9 +399,10 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long
 }
 
 static int power8_compute_mmcr(u64 event[], int n_ev,
-			       unsigned int hwc[], unsigned long mmcr[])
+			       unsigned int hwc[], unsigned long mmcr[],
+			       struct perf_event *pevents[])
 {
-	unsigned long mmcra, mmcr1, unit, combine, psel, cache, val;
+	unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
 	unsigned int pmc, pmc_inuse;
 	int i;
 
@@ -410,7 +417,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
 
 	/* In continous sampling mode, update SDAR on TLB miss */
 	mmcra = MMCRA_SDAR_MODE_TLB;
-	mmcr1 = 0;
+	mmcr1 = mmcr2 = 0;
 
 	/* Second pass: assign PMCs, set all MMCR1 fields */
 	for (i = 0; i < n_ev; ++i) {
@@ -472,6 +479,19 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
 			mmcra |= val << MMCRA_IFM_SHIFT;
 		}
 
+		if (pevents[i]->attr.exclude_user)
+			mmcr2 |= MMCR2_FCP(pmc);
+
+		if (pevents[i]->attr.exclude_hv)
+			mmcr2 |= MMCR2_FCH(pmc);
+
+		if (pevents[i]->attr.exclude_kernel) {
+			if (cpu_has_feature(CPU_FTR_HVMODE))
+				mmcr2 |= MMCR2_FCH(pmc);
+			else
+				mmcr2 |= MMCR2_FCS(pmc);
+		}
+
 		hwc[i] = pmc - 1;
 	}
 
@@ -491,6 +511,7 @@ static int power8_compute_mmcr(u64 event[], int n_ev,
 
 	mmcr[1] = mmcr1;
 	mmcr[2] = mmcra;
+	mmcr[3] = mmcr2;
 
 	return 0;
 }
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 20139ceeacf6..8b6a8a36fa38 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -257,7 +257,7 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
 }
 
 static int p970_compute_mmcr(u64 event[], int n_ev,
-			     unsigned int hwc[], unsigned long mmcr[])
+			     unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
 {
 	unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
 	unsigned int pmc, unit, byte, psel;
diff --git a/arch/powerpc/perf/req-gen/_begin.h b/arch/powerpc/perf/req-gen/_begin.h
new file mode 100644
index 000000000000..acfb17a55c16
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_begin.h
@@ -0,0 +1,13 @@
+/* Include paths to be used in interface defining headers */
+#ifndef POWERPC_PERF_REQ_GEN_H_
+#define POWERPC_PERF_REQ_GEN_H_
+
+#define CAT2_STR_(t, s) __stringify(t/s)
+#define CAT2_STR(t, s) CAT2_STR_(t, s)
+#define I(...) __VA_ARGS__
+
+#endif
+
+#define REQ_GEN_PREFIX req-gen
+#define REQUEST_BEGIN CAT2_STR(REQ_GEN_PREFIX, _request-begin.h)
+#define REQUEST_END   CAT2_STR(REQ_GEN_PREFIX, _request-end.h)
diff --git a/arch/powerpc/perf/req-gen/_clear.h b/arch/powerpc/perf/req-gen/_clear.h
new file mode 100644
index 000000000000..422974f89573
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_clear.h
@@ -0,0 +1,5 @@
+
+#undef __field_
+#undef __count_
+#undef __array_
+#undef REQUEST_
diff --git a/arch/powerpc/perf/req-gen/_end.h b/arch/powerpc/perf/req-gen/_end.h
new file mode 100644
index 000000000000..8a406980b6bf
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_end.h
@@ -0,0 +1,4 @@
+
+#undef REQ_GEN_PREFIX
+#undef REQUEST_BEGIN
+#undef REQUEST_END
diff --git a/arch/powerpc/perf/req-gen/_request-begin.h b/arch/powerpc/perf/req-gen/_request-begin.h
new file mode 100644
index 000000000000..f6d98642cf1d
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_request-begin.h
@@ -0,0 +1,15 @@
+
+#define REQUEST(r_contents) \
+	REQUEST_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, I(r_contents))
+
+#define __field(f_offset, f_bytes, f_name) \
+	__field_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \
+		 f_offset, f_bytes, f_name)
+
+#define __array(f_offset, f_bytes, f_name) \
+	__array_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \
+		 f_offset, f_bytes, f_name)
+
+#define __count(f_offset, f_bytes, f_name) \
+	__count_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \
+		 f_offset, f_bytes, f_name)
diff --git a/arch/powerpc/perf/req-gen/_request-end.h b/arch/powerpc/perf/req-gen/_request-end.h
new file mode 100644
index 000000000000..5573be6c3588
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_request-end.h
@@ -0,0 +1,8 @@
+#undef REQUEST
+#undef __field
+#undef __array
+#undef __count
+
+#undef REQUEST_NAME
+#undef REQUEST_NUM
+#undef REQUEST_IDX_KIND
diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h
new file mode 100644
index 000000000000..1b122469323d
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/perf.h
@@ -0,0 +1,155 @@
+#ifndef LINUX_POWERPC_PERF_REQ_GEN_PERF_H_
+#define LINUX_POWERPC_PERF_REQ_GEN_PERF_H_
+
+#include <linux/perf_event.h>
+
+#ifndef REQUEST_FILE
+#error "REQUEST_FILE must be defined before including"
+#endif
+
+#ifndef NAME_LOWER
+#error "NAME_LOWER must be defined before including"
+#endif
+
+#ifndef NAME_UPPER
+#error "NAME_UPPER must be defined before including"
+#endif
+
+#define BE_TYPE_b1 __u8
+#define BE_TYPE_b2 __be16
+#define BE_TYPE_b4 __be32
+#define BE_TYPE_b8 __be64
+
+#define BYTES_TO_BE_TYPE(bytes) \
+		BE_TYPE_b##bytes
+
+#define CAT2_(a, b) a ## b
+#define CAT2(a, b) CAT2_(a, b)
+#define CAT3_(a, b, c) a ## b ## c
+#define CAT3(a, b, c) CAT3_(a, b, c)
+
+/*
+ * enumerate the request values as
+ * <NAME_UPPER>_<request name> = <request value>
+ */
+#define REQUEST_VALUE__(name_upper, r_name) name_upper ## _ ## r_name
+#define REQUEST_VALUE_(name_upper, r_name) REQUEST_VALUE__(name_upper, r_name)
+#define REQUEST_VALUE(r_name) REQUEST_VALUE_(NAME_UPPER, r_name)
+
+#include "_clear.h"
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \
+	REQUEST_VALUE(r_name) = r_value,
+enum CAT2(NAME_LOWER, _requests) {
+#include REQUEST_FILE
+};
+
+/*
+ * For each request:
+ * struct <NAME_LOWER>_<request name> {
+ *	r_fields
+ * };
+ */
+#include "_clear.h"
+#define STRUCT_NAME__(name_lower, r_name) name_lower ## _ ## r_name
+#define STRUCT_NAME_(name_lower, r_name) STRUCT_NAME__(name_lower, r_name)
+#define STRUCT_NAME(r_name) STRUCT_NAME_(NAME_LOWER, r_name)
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields)	\
+struct STRUCT_NAME(r_name) {				\
+	r_fields					\
+};
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) \
+	BYTES_TO_BE_TYPE(f_bytes) f_name;
+#define __count_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) \
+	__field_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name)
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_bytes, a_name) \
+	__u8 a_name[a_bytes];
+
+#include REQUEST_FILE
+
+/*
+ * Generate a check of the field offsets
+ * <NAME_LOWER>_assert_offsets_correct()
+ */
+#include "_clear.h"
+#define REQUEST_(r_name, r_value, index, r_fields)			\
+r_fields
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name) \
+	BUILD_BUG_ON(offsetof(struct STRUCT_NAME(r_name), f_name) != f_offset);
+#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \
+	__field_(r_name, r_value, r_idx_1, c_offset, c_size, c_name)
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) \
+	__field_(r_name, r_value, r_idx_1, a_offset, a_size, a_name)
+
+static inline void CAT2(NAME_LOWER, _assert_offsets_correct)(void)
+{
+#include REQUEST_FILE
+}
+
+/*
+ * Generate event attributes:
+ * PMU_EVENT_ATTR_STRING(<request name>_<field name>,
+ *	<NAME_LOWER>_event_attr_<request name>_<field name>,
+ *		"request=<request value>"
+ *		"starting_index=<starting index type>"
+ *		"counter_info_version=CURRENT_COUNTER_INFO_VERSION"
+ *		"length=<f_size>"
+ *		"offset=<f_offset>")
+ *
+ *	TODO: counter_info_version may need to vary, we should interperate the
+ *	value to some extent
+ */
+#define EVENT_ATTR_NAME__(name, r_name, c_name) \
+	name ## _event_attr_ ## r_name ## _ ## c_name
+#define EVENT_ATTR_NAME_(name, r_name, c_name) \
+	EVENT_ATTR_NAME__(name, r_name, c_name)
+#define EVENT_ATTR_NAME(r_name, c_name) \
+	EVENT_ATTR_NAME_(NAME_LOWER, r_name, c_name)
+
+#include "_clear.h"
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name)
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name)
+#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name)	\
+PMU_EVENT_ATTR_STRING(							\
+		CAT3(r_name, _, c_name),				\
+		EVENT_ATTR_NAME(r_name, c_name),			\
+		"request=" __stringify(r_value) ","			\
+		r_idx_1 ","						\
+		"counter_info_version="					\
+			__stringify(COUNTER_INFO_VERSION_CURRENT) ","	\
+		"length=" #c_size ","					\
+		"offset=" #c_offset)
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields)			\
+	r_fields
+
+#include REQUEST_FILE
+
+/*
+ * Define event attribute array
+ * static struct attribute *hv_gpci_event_attrs[] = {
+ *	&<NAME_LOWER>_event_attr_<request name>_<field name>.attr,
+ * };
+ */
+#include "_clear.h"
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name)
+#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name)	\
+	&EVENT_ATTR_NAME(r_name, c_name).attr.attr,
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name)
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields)			\
+	r_fields
+
+static __maybe_unused struct attribute *hv_gpci_event_attrs[] = {
+#include REQUEST_FILE
+	NULL
+};
+
+/* cleanup */
+#include "_clear.h"
+#undef EVENT_ATTR_NAME
+#undef EVENT_ATTR_NAME_
+#undef BIT_NAME
+#undef BIT_NAME_
+#undef STRUCT_NAME
+#undef REQUEST_VALUE
+#undef REQUEST_VALUE_
+
+#endif