diff options
-rw-r--r-- | arch/x86/events/core.c | 5 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 90 | ||||
-rw-r--r-- | include/linux/perf_event.h | 29 |
3 files changed, 102 insertions, 22 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index ebf723f33794..0f3d01562ded 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2257,7 +2257,10 @@ static int x86_pmu_event_idx(struct perf_event *event) if (!(hwc->flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return 0; - return hwc->event_base_rdpmc + 1; + if (is_metric_idx(hwc->idx)) + return INTEL_PMC_FIXED_RDPMC_METRICS + 1; + else + return hwc->event_base_rdpmc + 1; } static ssize_t get_attr_rdpmc(struct device *cdev, diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index db833346c119..c72e4904e056 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2258,7 +2258,13 @@ static int icl_set_topdown_event_period(struct perf_event *event) if (left == x86_pmu.max_period) { wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); wrmsrl(MSR_PERF_METRICS, 0); - local64_set(&hwc->period_left, 0); + hwc->saved_slots = 0; + hwc->saved_metric = 0; + } + + if ((hwc->saved_slots) && is_slots_event(event)) { + wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots); + wrmsrl(MSR_PERF_METRICS, hwc->saved_metric); } perf_event_update_userpage(event); @@ -2279,7 +2285,7 @@ static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx) return mul_u64_u32_div(slots, val, 0xff); } -static void __icl_update_topdown_event(struct perf_event *event, +static u64 icl_get_topdown_value(struct perf_event *event, u64 slots, u64 metrics) { int idx = event->hw.idx; @@ -2290,7 +2296,50 @@ static void __icl_update_topdown_event(struct perf_event *event, else delta = slots; - local64_add(delta, &event->count); + return delta; +} + +static void __icl_update_topdown_event(struct perf_event *event, + u64 slots, u64 metrics, + u64 last_slots, u64 last_metrics) +{ + u64 delta, last = 0; + + delta = icl_get_topdown_value(event, slots, metrics); + if (last_slots) + last = icl_get_topdown_value(event, last_slots, last_metrics); + + /* + * The 8bit integer fraction of metric may be not accurate, + * especially when the changes is very small. + * For example, if only a few bad_spec happens, the fraction + * may be reduced from 1 to 0. If so, the bad_spec event value + * will be 0 which is definitely less than the last value. + * Avoid update event->count for this case. + */ + if (delta > last) { + delta -= last; + local64_add(delta, &event->count); + } +} + +static void update_saved_topdown_regs(struct perf_event *event, + u64 slots, u64 metrics) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_event *other; + int idx; + + event->hw.saved_slots = slots; + event->hw.saved_metric = metrics; + + for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) { + if (!is_topdown_idx(idx)) + continue; + other = cpuc->events[idx]; + other->hw.saved_slots = slots; + other->hw.saved_metric = metrics; + } } /* @@ -2304,6 +2353,7 @@ static u64 icl_update_topdown_event(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct perf_event *other; u64 slots, metrics; + bool reset = true; int idx; /* read Fixed counter 3 */ @@ -2318,19 +2368,39 @@ static u64 icl_update_topdown_event(struct perf_event *event) if (!is_topdown_idx(idx)) continue; other = cpuc->events[idx]; - __icl_update_topdown_event(other, slots, metrics); + __icl_update_topdown_event(other, slots, metrics, + event ? event->hw.saved_slots : 0, + event ? event->hw.saved_metric : 0); } /* * Check and update this event, which may have been cleared * in active_mask e.g. x86_pmu_stop() */ - if (event && !test_bit(event->hw.idx, cpuc->active_mask)) - __icl_update_topdown_event(event, slots, metrics); + if (event && !test_bit(event->hw.idx, cpuc->active_mask)) { + __icl_update_topdown_event(event, slots, metrics, + event->hw.saved_slots, + event->hw.saved_metric); - /* The fixed counter 3 has to be written before the PERF_METRICS. */ - wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); - wrmsrl(MSR_PERF_METRICS, 0); + /* + * In x86_pmu_stop(), the event is cleared in active_mask first, + * then drain the delta, which indicates context switch for + * counting. + * Save metric and slots for context switch. + * Don't need to reset the PERF_METRICS and Fixed counter 3. + * Because the values will be restored in next schedule in. + */ + update_saved_topdown_regs(event, slots, metrics); + reset = false; + } + + if (reset) { + /* The fixed counter 3 has to be written before the PERF_METRICS. */ + wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0); + wrmsrl(MSR_PERF_METRICS, 0); + if (event) + update_saved_topdown_regs(event, 0, 0); + } return slots; } @@ -3578,8 +3648,6 @@ static int intel_pmu_hw_config(struct perf_event *event) */ leader->hw.flags |= PERF_X86_EVENT_TOPDOWN; event->hw.flags |= PERF_X86_EVENT_TOPDOWN; - - event->hw.flags &= ~PERF_X86_EVENT_RDPMC_ALLOWED; } } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6048650f8c1d..46a3974eb4fe 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -212,17 +212,26 @@ struct hw_perf_event { */ u64 sample_period; - /* - * The period we started this sample with. - */ - u64 last_period; + union { + struct { /* Sampling */ + /* + * The period we started this sample with. + */ + u64 last_period; - /* - * However much is left of the current period; note that this is - * a full 64bit value and allows for generation of periods longer - * than hardware might allow. - */ - local64_t period_left; + /* + * However much is left of the current period; + * note that this is a full 64bit value and + * allows for generation of periods longer + * than hardware might allow. + */ + local64_t period_left; + }; + struct { /* Topdown events counting for context switch */ + u64 saved_metric; + u64 saved_slots; + }; + }; /* * State for throttling the event, see __perf_event_overflow() and |