powerpc/perf/hv-24x7: Aggregate result elements on POWER9 SMT8

On POWER9 SMT8 the 24x7 API returns two result elements for physical core and virtual CPU events and we need to add their counts to get the final result. Reviewed-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com> Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
author: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> 2017-06-30 00:55:38 +0300
committer: Michael Ellerman <mpe@ellerman.id.au> 2017-07-02 13:40:33 +0300
commit: bfaa7834b60e01135af4e8e06a9477bef2368f44 (patch)
tree: a3f1101b2637ef2773289a59a9b27369d31331c1 /arch/powerpc/perf
parent: 2e6553aae3e6bd13cf176855d67233dce8817381 (diff)
download: linux-bfaa7834b60e01135af4e8e06a9477bef2368f44.tar.xz
1 files changed, 42 insertions, 11 deletions
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index a9a4df6e6e22..9c88b82f6229 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -31,6 +31,9 @@
 /* Version of the 24x7 hypervisor API that we should use in this machine. */
 static int interface_version;
 
+/* Whether we have to aggregate result data for some domains. */
+static bool aggregate_result_elements;
+
 static bool domain_is_valid(unsigned domain)
 {
 	switch (domain) {
@@ -58,6 +61,15 @@ static bool is_physical_domain(unsigned domain)
 	}
 }
 
+/* Domains for which more than one result element are returned for each event. */
+static bool domain_needs_aggregation(unsigned int domain)
+{
+	return aggregate_result_elements &&
+			(domain == HV_PERF_DOMAIN_PHYS_CORE ||
+			 (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE &&
+			  domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
+}
+
 static const char *domain_name(unsigned domain)
 {
 	if (!domain_is_valid(domain))
@@ -1145,17 +1157,23 @@ static int add_event_to_24x7_request(struct perf_event *event,
 	req->starting_ix = cpu_to_be16(idx);
 	req->max_ix = cpu_to_be16(1);
 
-	if (request_buffer->interface_version > 1 &&
-	    req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
-		req->starting_thread_group_ix = idx % 2;
-		req->max_num_thread_groups = 1;
+	if (request_buffer->interface_version > 1) {
+		if (domain_needs_aggregation(req->performance_domain))
+			req->max_num_thread_groups = -1;
+		else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
+			req->starting_thread_group_ix = idx % 2;
+			req->max_num_thread_groups = 1;
+		}
 	}
 
 	return 0;
 }
 
 /**
- * get_count_from_result - get event count from the given result
+ * get_count_from_result - get event count from all result elements in result
+ *
+ * If the event corresponding to this result needs aggregation of the result
+ * element values, then this function does that.
  *
  * @event:	Event associated with @res.
  * @resb:	Result buffer containing @res.
@@ -1172,6 +1190,8 @@ static int get_count_from_result(struct perf_event *event,
 	u16 data_size = be16_to_cpu(res->result_element_data_size);
 	unsigned int data_offset;
 	void *element_data;
+	int i;
+	u64 count;
 
 	/*
 	 * We can bail out early if the result is empty.
@@ -1189,8 +1209,10 @@ static int get_count_from_result(struct perf_event *event,
 	/*
 	 * Since we always specify 1 as the maximum for the smallest resource
 	 * we're requesting, there should to be only one element per result.
+	 * Except when an event needs aggregation, in which case there are more.
 	 */
-	if (num_elements != 1) {
+	if (num_elements != 1 &&
+	    !domain_needs_aggregation(event_get_domain(event))) {
 		pr_err("Error: result of request %hhu has %hu elements\n",
 		       res->result_ix, num_elements);
 
@@ -1211,13 +1233,17 @@ static int get_count_from_result(struct perf_event *event,
 		data_offset = offsetof(struct hv_24x7_result_element_v2,
 				       element_data);
 
-	element_data = res->elements + data_offset;
+	/* Go through the result elements in the result. */
+	for (i = count = 0, element_data = res->elements + data_offset;
+	     i < num_elements;
+	     i++, element_data += data_size + data_offset)
+		count += be64_to_cpu(*((u64 *) element_data));
 
-	*countp = be64_to_cpu(*((u64 *) element_data));
+	*countp = count;
 
-	/* The next result is after the result element. */
+	/* The next result is after the last result element. */
 	if (next)
-		*next = element_data + data_size;
+		*next = element_data - data_offset;
 
 	return 0;
 }
@@ -1568,9 +1594,14 @@ static int hv_24x7_init(void)
 	/* POWER8 only supports v1, while POWER9 only supports v2. */
 	if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
 		interface_version = 1;
-	else
+	else {
 		interface_version = 2;
 
+		/* SMT8 in POWER9 needs to aggregate result elements. */
+		if (threads_per_core == 8)
+			aggregate_result_elements = true;
+	}
+
 	hret = hv_perf_caps_get(&caps);
 	if (hret) {
 		pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
author	Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>	2017-06-30 00:55:38 +0300
committer	Michael Ellerman <mpe@ellerman.id.au>	2017-07-02 13:40:33 +0300
commit	bfaa7834b60e01135af4e8e06a9477bef2368f44 (patch)
tree	a3f1101b2637ef2773289a59a9b27369d31331c1 /arch/powerpc/perf
parent	2e6553aae3e6bd13cf176855d67233dce8817381 (diff)
download	linux-bfaa7834b60e01135af4e8e06a9477bef2368f44.tar.xz