diff options
Diffstat (limited to 'arch/powerpc/perf')
-rw-r--r-- | arch/powerpc/perf/hv-24x7.c | 53 |
1 files changed, 42 insertions, 11 deletions
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index a9a4df6e6e22..9c88b82f6229 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -31,6 +31,9 @@ /* Version of the 24x7 hypervisor API that we should use in this machine. */ static int interface_version; +/* Whether we have to aggregate result data for some domains. */ +static bool aggregate_result_elements; + static bool domain_is_valid(unsigned domain) { switch (domain) { @@ -58,6 +61,15 @@ static bool is_physical_domain(unsigned domain) } } +/* Domains for which more than one result element are returned for each event. */ +static bool domain_needs_aggregation(unsigned int domain) +{ + return aggregate_result_elements && + (domain == HV_PERF_DOMAIN_PHYS_CORE || + (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE && + domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE)); +} + static const char *domain_name(unsigned domain) { if (!domain_is_valid(domain)) @@ -1145,17 +1157,23 @@ static int add_event_to_24x7_request(struct perf_event *event, req->starting_ix = cpu_to_be16(idx); req->max_ix = cpu_to_be16(1); - if (request_buffer->interface_version > 1 && - req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) { - req->starting_thread_group_ix = idx % 2; - req->max_num_thread_groups = 1; + if (request_buffer->interface_version > 1) { + if (domain_needs_aggregation(req->performance_domain)) + req->max_num_thread_groups = -1; + else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) { + req->starting_thread_group_ix = idx % 2; + req->max_num_thread_groups = 1; + } } return 0; } /** - * get_count_from_result - get event count from the given result + * get_count_from_result - get event count from all result elements in result + * + * If the event corresponding to this result needs aggregation of the result + * element values, then this function does that. * * @event: Event associated with @res. * @resb: Result buffer containing @res. @@ -1172,6 +1190,8 @@ static int get_count_from_result(struct perf_event *event, u16 data_size = be16_to_cpu(res->result_element_data_size); unsigned int data_offset; void *element_data; + int i; + u64 count; /* * We can bail out early if the result is empty. @@ -1189,8 +1209,10 @@ static int get_count_from_result(struct perf_event *event, /* * Since we always specify 1 as the maximum for the smallest resource * we're requesting, there should to be only one element per result. + * Except when an event needs aggregation, in which case there are more. */ - if (num_elements != 1) { + if (num_elements != 1 && + !domain_needs_aggregation(event_get_domain(event))) { pr_err("Error: result of request %hhu has %hu elements\n", res->result_ix, num_elements); @@ -1211,13 +1233,17 @@ static int get_count_from_result(struct perf_event *event, data_offset = offsetof(struct hv_24x7_result_element_v2, element_data); - element_data = res->elements + data_offset; + /* Go through the result elements in the result. */ + for (i = count = 0, element_data = res->elements + data_offset; + i < num_elements; + i++, element_data += data_size + data_offset) + count += be64_to_cpu(*((u64 *) element_data)); - *countp = be64_to_cpu(*((u64 *) element_data)); + *countp = count; - /* The next result is after the result element. */ + /* The next result is after the last result element. */ if (next) - *next = element_data + data_size; + *next = element_data - data_offset; return 0; } @@ -1568,9 +1594,14 @@ static int hv_24x7_init(void) /* POWER8 only supports v1, while POWER9 only supports v2. */ if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8")) interface_version = 1; - else + else { interface_version = 2; + /* SMT8 in POWER9 needs to aggregate result elements. */ + if (threads_per_core == 8) + aggregate_result_elements = true; + } + hret = hv_perf_caps_get(&caps); if (hret) { pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", |