From 641cc938815dfd09f8fa1ec72deb814f0938ac33 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Mar 2012 20:09:14 +0100 Subject: perf: Adding sysfs group format attribute for pmu device Adding sysfs group 'format' attribute for pmu device that contains a syntax description on how to construct raw events. The event configuration is described in following struct pefr_event_attr attributes: config config1 config2 Each sysfs attribute within the format attribute group, describes mapping of name and bitfield definition within one of above attributes. eg: "/sys/.../format/event" contains "config:0-7" "/sys/.../format/umask" contains "config:8-15" "/sys/.../format/usr" contains "config:16" the attribute value syntax is: line: config ':' bits config: 'config' | 'config1' | 'config2" bits: bits ',' bit_term | bit_term bit_term: VALUE '-' VALUE | VALUE Adding format attribute definitions for x86 cpu pmus. Acked-by: Peter Zijlstra Signed-off-by: Peter Zijlstra Signed-off-by: Jiri Olsa Link: http://lkml.kernel.org/n/tip-vhdk5y2hyype9j63prymty36@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event.c | 7 +++++++ arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_amd.c | 18 +++++++++++++++++ arch/x86/kernel/cpu/perf_event_intel.c | 36 ++++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/perf_event_p6.c | 19 ++++++++++++++++++ 5 files changed, 81 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0a18d16cb58d..453ac9497574 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1314,6 +1314,11 @@ static void __init pmu_check_apic(void) pr_info("no hardware sampling interrupt available.\n"); } +static struct attribute_group x86_pmu_format_group = { + .name = "format", + .attrs = NULL, +}; + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1388,6 +1393,7 @@ static int __init init_hw_perf_events(void) } x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ + x86_pmu_format_group.attrs = x86_pmu.format_attrs; pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); @@ -1668,6 +1674,7 @@ static struct attribute_group x86_pmu_attr_group = { static const struct attribute_group *x86_pmu_attr_groups[] = { &x86_pmu_attr_group, + &x86_pmu_format_group, NULL, }; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 8484e77c211e..6638aaf54493 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -339,6 +339,7 @@ struct x86_pmu { * sysfs attrs */ int attr_rdpmc; + struct attribute **format_attrs; /* * CPU Hotplug hooks diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index dd002faff7a6..95e7fe1c5f0b 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -404,6 +404,21 @@ static void amd_pmu_cpu_dead(int cpu) } } +PMU_FORMAT_ATTR(event, "config:0-7,32-35"); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *amd_format_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + static __initconst const struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = x86_pmu_handle_irq, @@ -426,6 +441,8 @@ static __initconst const struct x86_pmu amd_pmu = { .get_event_constraints = amd_get_event_constraints, .put_event_constraints = amd_put_event_constraints, + .format_attrs = amd_format_attr, + .cpu_prepare = amd_pmu_cpu_prepare, .cpu_starting = amd_pmu_cpu_starting, .cpu_dead = amd_pmu_cpu_dead, @@ -596,6 +613,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { .cpu_dead = amd_pmu_cpu_dead, #endif .cpu_starting = amd_pmu_cpu_starting, + .format_attrs = amd_format_attr, }; __init int amd_pmu_init(void) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 6a84e7f28f05..26b3e2fef104 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1431,6 +1431,24 @@ static void core_pmu_enable_all(int added) } } +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(pc, "config:19" ); +PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_arch_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, @@ -1455,6 +1473,7 @@ static __initconst const struct x86_pmu core_pmu = { .put_event_constraints = intel_put_event_constraints, .event_constraints = intel_core_event_constraints, .guest_get_msrs = core_guest_get_msrs, + .format_attrs = intel_arch_formats_attr, }; struct intel_shared_regs *allocate_shared_regs(int cpu) @@ -1553,6 +1572,21 @@ static void intel_pmu_flush_branch_stack(void) intel_pmu_lbr_reset(); } +PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); + +static struct attribute *intel_arch3_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_any.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + + &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ + NULL, +}; + static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -1576,6 +1610,8 @@ static __initconst const struct x86_pmu intel_pmu = { .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, + .format_attrs = intel_arch3_formats_attr, + .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index c7181befecde..32bcfc7dd230 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -87,6 +87,23 @@ static void p6_pmu_enable_event(struct perf_event *event) (void)checking_wrmsrl(hwc->config_base, val); } +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(pc, "config:19" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_p6_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_pc.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + static __initconst const struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = x86_pmu_handle_irq, @@ -115,6 +132,8 @@ static __initconst const struct x86_pmu p6_pmu = { .cntval_mask = (1ULL << 32) - 1, .get_event_constraints = x86_get_event_constraints, .event_constraints = p6_event_constraints, + + .format_attrs = intel_p6_formats_attr, }; __init int p6_pmu_init(void) -- cgit v1.2.3 From c7206205d00ab375839bd6c7ddb247d600693c09 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 22 Mar 2012 17:26:36 +0100 Subject: perf: Fix mmap_page capabilities and docs Complete the syscall-less self-profiling feature and address all complaints, namely: - capabilities, so we can detect what is actually available at runtime Add a capabilities field to perf_event_mmap_page to indicate what is actually available for use. - on x86: RDPMC weirdness due to being 40/48 bits and not sign-extending properly. - ABI documentation as to how all this stuff works. Also improve the documentation for the new features. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1332433596.2487.33.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 10 ++++- include/linux/perf_event.h | 83 +++++++++++++++++++++++++++++++++++----- kernel/events/core.c | 4 +- 3 files changed, 84 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 453ac9497574..4ef8104958ee 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1622,6 +1622,9 @@ static int x86_pmu_event_idx(struct perf_event *event) { int idx = event->hw.idx; + if (!x86_pmu.attr_rdpmc) + return 0; + if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { idx -= X86_PMC_IDX_FIXED; idx |= 1 << 30; @@ -1706,14 +1709,19 @@ static struct pmu pmu = { .flush_branch_stack = x86_pmu_flush_branch_stack, }; -void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) +void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { + userpg->cap_usr_time = 0; + userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; + userpg->pmc_width = x86_pmu.cntval_bits; + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) return; if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) return; + userpg->cap_usr_time = 1; userpg->time_mult = this_cpu_read(cyc2ns); userpg->time_shift = CYC2NS_SCALE_FACTOR; userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 57ae485e80fc..ca9ed4e6a286 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -299,18 +299,31 @@ struct perf_event_mmap_page { /* * Bits needed to read the hw events in user-space. * - * u32 seq; - * s64 count; + * u32 seq, time_mult, time_shift, idx, width; + * u64 count, enabled, running; + * u64 cyc, time_offset; + * s64 pmc = 0; * * do { * seq = pc->lock; - * * barrier() - * if (pc->index) { - * count = pmc_read(pc->index - 1); - * count += pc->offset; - * } else - * goto regular_read; + * + * enabled = pc->time_enabled; + * running = pc->time_running; + * + * if (pc->cap_usr_time && enabled != running) { + * cyc = rdtsc(); + * time_offset = pc->time_offset; + * time_mult = pc->time_mult; + * time_shift = pc->time_shift; + * } + * + * idx = pc->index; + * count = pc->offset; + * if (pc->cap_usr_rdpmc && idx) { + * width = pc->pmc_width; + * pmc = rdpmc(idx - 1); + * } * * barrier(); * } while (pc->lock != seq); @@ -323,14 +336,57 @@ struct perf_event_mmap_page { __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ __u64 time_running; /* time event on cpu */ - __u32 time_mult, time_shift; + union { + __u64 capabilities; + __u64 cap_usr_time : 1, + cap_usr_rdpmc : 1, + cap_____res : 62; + }; + + /* + * If cap_usr_rdpmc this field provides the bit-width of the value + * read using the rdpmc() or equivalent instruction. This can be used + * to sign extend the result like: + * + * pmc <<= 64 - width; + * pmc >>= 64 - width; // signed shift right + * count += pmc; + */ + __u16 pmc_width; + + /* + * If cap_usr_time the below fields can be used to compute the time + * delta since time_enabled (in ns) using rdtsc or similar. + * + * u64 quot, rem; + * u64 delta; + * + * quot = (cyc >> time_shift); + * rem = cyc & ((1 << time_shift) - 1); + * delta = time_offset + quot * time_mult + + * ((rem * time_mult) >> time_shift); + * + * Where time_offset,time_mult,time_shift and cyc are read in the + * seqcount loop described above. This delta can then be added to + * enabled and possible running (if idx), improving the scaling: + * + * enabled += delta; + * if (idx) + * running += delta; + * + * quot = count / running; + * rem = count % running; + * count = quot * enabled + (rem * enabled) / running; + */ + __u16 time_shift; + __u32 time_mult; __u64 time_offset; /* * Hole for extension of the self monitor capabilities */ - __u64 __reserved[121]; /* align to 1k */ + __u64 __reserved[120]; /* align to 1k */ /* * Control data for the mmap() data buffer. @@ -347,6 +403,13 @@ struct perf_event_mmap_page { __u64 data_tail; /* user-space written tail */ }; +/* + * Build time assertion that we keep the data_head at the intended location. + * IOW, validation we got the __reserved[] size right. + */ +extern char __assert_mmap_data_head_offset + [1 - 2*!!(offsetof(struct perf_event_mmap_page, data_head) != 1024)]; + #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) diff --git a/kernel/events/core.c b/kernel/events/core.c index c61234b1a988..dc3b05272511 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3348,7 +3348,7 @@ static void calc_timer_values(struct perf_event *event, *running = ctx_time - event->tstamp_running; } -void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) +void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { } @@ -3398,7 +3398,7 @@ void perf_event_update_userpage(struct perf_event *event) userpg->time_running = running + atomic64_read(&event->child_total_time_running); - perf_update_user_clock(userpg, now); + arch_perf_update_userpage(userpg, now); barrier(); ++userpg->lock; -- cgit v1.2.3