diff options
Diffstat (limited to 'arch/x86/events')
-rw-r--r-- | arch/x86/events/core.c | 28 | ||||
-rw-r--r-- | arch/x86/events/intel/core.c | 35 | ||||
-rw-r--r-- | arch/x86/events/intel/cstate.c | 44 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 132 | ||||
-rw-r--r-- | arch/x86/events/intel/pt.c | 13 | ||||
-rw-r--r-- | arch/x86/events/intel/rapl.c | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.c | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 34 | ||||
-rw-r--r-- | arch/x86/events/msr.c | 3 | ||||
-rw-r--r-- | arch/x86/events/perf_event.h | 11 |
10 files changed, 229 insertions, 75 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 140d33288e78..a6006e7bb729 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -48,7 +48,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; -struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE; +DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key); u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -990,7 +990,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, if (!dogrp) return n; - list_for_each_entry(event, &leader->sibling_list, group_entry) { + for_each_sibling_event(event, leader) { if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF) continue; @@ -1156,16 +1156,13 @@ int x86_perf_event_set_period(struct perf_event *event) per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; - if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) || - local64_read(&hwc->prev_count) != (u64)-left) { - /* - * The hw event starts counting from this event offset, - * mark it to be able to extra future deltas: - */ - local64_set(&hwc->prev_count, (u64)-left); + /* + * The hw event starts counting from this event offset, + * mark it to be able to extra future deltas: + */ + local64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); - } + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* * Due to erratum on certan cpu we need @@ -1884,6 +1881,8 @@ early_initcall(init_hw_perf_events); static inline void x86_pmu_read(struct perf_event *event) { + if (x86_pmu.read) + return x86_pmu.read(event); x86_perf_event_update(event); } @@ -2118,7 +2117,8 @@ static int x86_pmu_event_init(struct perf_event *event) event->destroy(event); } - if (READ_ONCE(x86_pmu.attr_rdpmc)) + if (READ_ONCE(x86_pmu.attr_rdpmc) && + !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; return err; @@ -2206,9 +2206,9 @@ static ssize_t set_attr_rdpmc(struct device *cdev, * but only root can trigger it, so it's okay. */ if (val == 2) - static_key_slow_inc(&rdpmc_always_available); + static_branch_inc(&rdpmc_always_available_key); else - static_key_slow_dec(&rdpmc_always_available); + static_branch_dec(&rdpmc_always_available_key); on_each_cpu(refresh_pce, NULL, 1); } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 56457cb73448..607bf565a90c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2060,6 +2060,14 @@ static void intel_pmu_del_event(struct perf_event *event) intel_pmu_pebs_del(event); } +static void intel_pmu_read_event(struct perf_event *event) +{ + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_auto_reload_read(event); + else + x86_perf_event_update(event); +} + static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) { int idx = hwc->idx - INTEL_PMC_IDX_FIXED; @@ -2201,16 +2209,23 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) int bit, loops; u64 status; int handled; + int pmu_enabled; cpuc = this_cpu_ptr(&cpu_hw_events); /* + * Save the PMU state. + * It needs to be restored when leaving the handler. + */ + pmu_enabled = cpuc->enabled; + /* * No known reason to not always do late ACK, * but just in case do it opt-in. */ if (!x86_pmu.late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); intel_bts_disable_local(); + cpuc->enabled = 0; __intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); @@ -2320,7 +2335,8 @@ again: done: /* Only restore PMU state when it's active. See x86_pmu_disable(). */ - if (cpuc->enabled) + cpuc->enabled = pmu_enabled; + if (pmu_enabled) __intel_pmu_enable_all(0, true); intel_bts_enable_local(); @@ -2952,9 +2968,9 @@ static void intel_pebs_aliases_skl(struct perf_event *event) return intel_pebs_aliases_precdist(event); } -static unsigned long intel_pmu_free_running_flags(struct perf_event *event) +static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) { - unsigned long flags = x86_pmu.free_running_flags; + unsigned long flags = x86_pmu.large_pebs_flags; if (event->attr.use_clockid) flags &= ~PERF_SAMPLE_TIME; @@ -2976,8 +2992,8 @@ static int intel_pmu_hw_config(struct perf_event *event) if (!event->attr.freq) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & - ~intel_pmu_free_running_flags(event))) - event->hw.flags |= PERF_X86_EVENT_FREERUNNING; + ~intel_pmu_large_pebs_flags(event))) + event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); @@ -3188,13 +3204,13 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, * Therefore the effective (average) period matches the requested period, * despite coarser hardware granularity. */ -static unsigned bdw_limit_period(struct perf_event *event, unsigned left) +static u64 bdw_limit_period(struct perf_event *event, u64 left) { if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xc0, .umask=0x01)) { if (left < 128) left = 128; - left &= ~0x3fu; + left &= ~0x3fULL; } return left; } @@ -3460,7 +3476,7 @@ static __initconst const struct x86_pmu core_pmu = { .event_map = intel_pmu_event_map, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, - .free_running_flags = PEBS_FREERUNNING_FLAGS, + .large_pebs_flags = LARGE_PEBS_FLAGS, /* * Intel PMCs cannot be accessed sanely above 32-bit width, @@ -3495,6 +3511,7 @@ static __initconst const struct x86_pmu intel_pmu = { .disable = intel_pmu_disable_event, .add = intel_pmu_add_event, .del = intel_pmu_del_event, + .read = intel_pmu_read_event, .hw_config = intel_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, @@ -3502,7 +3519,7 @@ static __initconst const struct x86_pmu intel_pmu = { .event_map = intel_pmu_event_map, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, - .free_running_flags = PEBS_FREERUNNING_FLAGS, + .large_pebs_flags = LARGE_PEBS_FLAGS, /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 72db0664a53d..9aca448bb8e6 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -40,50 +40,51 @@ * Model specific counters: * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 - * Available model: SLM,AMT,GLM + * Available model: SLM,AMT,GLM,CNL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM, + CNL * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW - * SKL,KNL,GLM + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 - * Available model: SNB,IVB,HSW,BDW,SKL + * Available model: SNB,IVB,HSW,BDW,SKL,CNL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 - * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM + * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL - * GLM + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, + * GLM,CNL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW - * SKL,KNL,GLM + * SKL,KNL,GLM,CNL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 - * Available model: HSW ULT only + * Available model: HSW ULT,CNL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 - * Available model: HSW ULT only + * Available model: HSW ULT,CNL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT, GLM + * Available model: HSW ULT,GLM,CNL * Scope: Package (physical package) * */ @@ -486,6 +487,21 @@ static const struct cstate_model hswult_cstates __initconst = { BIT(PERF_CSTATE_PKG_C10_RES), }; +static const struct cstate_model cnl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES) | + BIT(PERF_CSTATE_PKG_C8_RES) | + BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + static const struct cstate_model slm_cstates __initconst = { .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | BIT(PERF_CSTATE_CORE_C6_RES), @@ -557,6 +573,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 18c25ab28557..da6780122786 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -935,7 +935,7 @@ void intel_pmu_pebs_add(struct perf_event *event) bool needed_cb = pebs_needs_sched_cb(cpuc); cpuc->n_pebs++; - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs++; pebs_update_state(needed_cb, cpuc, event->ctx->pmu); @@ -975,7 +975,7 @@ void intel_pmu_pebs_del(struct perf_event *event) bool needed_cb = pebs_needs_sched_cb(cpuc); cpuc->n_pebs--; - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs--; pebs_update_state(needed_cb, cpuc, event->ctx->pmu); @@ -1153,6 +1153,7 @@ static void setup_pebs_sample_data(struct perf_event *event, if (pebs == NULL) return; + regs->flags &= ~PERF_EFLAGS_EXACT; sample_type = event->attr.sample_type; dsrc = sample_type & PERF_SAMPLE_DATA_SRC; @@ -1197,7 +1198,6 @@ static void setup_pebs_sample_data(struct perf_event *event, */ *regs = *iregs; regs->flags = pebs->flags; - set_linear_ip(regs, pebs->ip); if (sample_type & PERF_SAMPLE_REGS_INTR) { regs->ax = pebs->ax; @@ -1233,13 +1233,22 @@ static void setup_pebs_sample_data(struct perf_event *event, #endif } - if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { - regs->ip = pebs->real_ip; - regs->flags |= PERF_EFLAGS_EXACT; - } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs)) - regs->flags |= PERF_EFLAGS_EXACT; - else - regs->flags &= ~PERF_EFLAGS_EXACT; + if (event->attr.precise_ip > 1) { + /* Haswell and later have the eventing IP, so use it: */ + if (x86_pmu.intel_cap.pebs_format >= 2) { + set_linear_ip(regs, pebs->real_ip); + regs->flags |= PERF_EFLAGS_EXACT; + } else { + /* Otherwise use PEBS off-by-1 IP: */ + set_linear_ip(regs, pebs->ip); + + /* ... and try to fix it up using the LBR entries: */ + if (intel_pmu_pebs_fixup_ip(regs)) + regs->flags |= PERF_EFLAGS_EXACT; + } + } else + set_linear_ip(regs, pebs->ip); + if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) && x86_pmu.intel_cap.pebs_format >= 1) @@ -1306,17 +1315,93 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) return NULL; } +void intel_pmu_auto_reload_read(struct perf_event *event) +{ + WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); + + perf_pmu_disable(event->pmu); + intel_pmu_drain_pebs_buffer(); + perf_pmu_enable(event->pmu); +} + +/* + * Special variant of intel_pmu_save_and_restart() for auto-reload. + */ +static int +intel_pmu_save_and_restart_reload(struct perf_event *event, int count) +{ + struct hw_perf_event *hwc = &event->hw; + int shift = 64 - x86_pmu.cntval_bits; + u64 period = hwc->sample_period; + u64 prev_raw_count, new_raw_count; + s64 new, old; + + WARN_ON(!period); + + /* + * drain_pebs() only happens when the PMU is disabled. + */ + WARN_ON(this_cpu_read(cpu_hw_events.enabled)); + + prev_raw_count = local64_read(&hwc->prev_count); + rdpmcl(hwc->event_base_rdpmc, new_raw_count); + local64_set(&hwc->prev_count, new_raw_count); + + /* + * Since the counter increments a negative counter value and + * overflows on the sign switch, giving the interval: + * + * [-period, 0] + * + * the difference between two consequtive reads is: + * + * A) value2 - value1; + * when no overflows have happened in between, + * + * B) (0 - value1) + (value2 - (-period)); + * when one overflow happened in between, + * + * C) (0 - value1) + (n - 1) * (period) + (value2 - (-period)); + * when @n overflows happened in between. + * + * Here A) is the obvious difference, B) is the extension to the + * discrete interval, where the first term is to the top of the + * interval and the second term is from the bottom of the next + * interval and C) the extension to multiple intervals, where the + * middle term is the whole intervals covered. + * + * An equivalent of C, by reduction, is: + * + * value2 - value1 + n * period + */ + new = ((s64)(new_raw_count << shift) >> shift); + old = ((s64)(prev_raw_count << shift) >> shift); + local64_add(new - old + count * period, &event->count); + + perf_event_update_userpage(event); + + return 0; +} + static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *base, void *top, int bit, int count) { + struct hw_perf_event *hwc = &event->hw; struct perf_sample_data data; struct pt_regs regs; void *at = get_next_pebs_record_by_bit(base, top, bit); - if (!intel_pmu_save_and_restart(event) && - !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)) + if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { + /* + * Now, auto-reload is only enabled in fixed period mode. + * The reload value is always hwc->sample_period. + * May need to change it, if auto-reload is enabled in + * freq mode later. + */ + intel_pmu_save_and_restart_reload(event, count); + } else if (!intel_pmu_save_and_restart(event)) return; while (count > 1) { @@ -1368,8 +1453,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) return; n = top - at; - if (n <= 0) + if (n <= 0) { + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_save_and_restart_reload(event, 0); return; + } __intel_pmu_pebs_event(event, iregs, at, top, 0, n); } @@ -1392,8 +1480,22 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) ds->pebs_index = ds->pebs_buffer_base; - if (unlikely(base >= top)) + if (unlikely(base >= top)) { + /* + * The drain_pebs() could be called twice in a short period + * for auto-reload event in pmu::read(). There are no + * overflows have happened in between. + * It needs to call intel_pmu_save_and_restart_reload() to + * update the event->count for this case. + */ + for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, + x86_pmu.max_pebs_events) { + event = cpuc->events[bit]; + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_save_and_restart_reload(event, 0); + } return; + } for (at = base; at < top; at += x86_pmu.pebs_record_size) { struct pebs_record_nhm *p = at; @@ -1530,7 +1632,7 @@ void __init intel_ds_init(void) x86_pmu.pebs_record_size = sizeof(struct pebs_record_skl); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; - x86_pmu.free_running_flags |= PERF_SAMPLE_TIME; + x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; break; default: diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 81fd41d5a0d9..3b993942a0e4 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1186,8 +1186,12 @@ static int pt_event_addr_filters_validate(struct list_head *filters) int range = 0; list_for_each_entry(filter, filters, entry) { - /* PT doesn't support single address triggers */ - if (!filter->range || !filter->size) + /* + * PT doesn't support single address triggers and + * 'start' filters. + */ + if (!filter->size || + filter->action == PERF_ADDR_FILTER_ACTION_START) return -EOPNOTSUPP; if (!filter->inode) { @@ -1227,7 +1231,10 @@ static void pt_event_addr_filters_sync(struct perf_event *event) filters->filter[range].msr_a = msr_a; filters->filter[range].msr_b = msr_b; - filters->filter[range].config = filter->filter ? 1 : 2; + if (filter->action == PERF_ADDR_FILTER_ACTION_FILTER) + filters->filter[range].config = 1; + else + filters->filter[range].config = 2; range++; } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index a2efb490f743..32f3e9423e99 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -774,6 +774,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init), diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 7874c980d569..a7956fc7ca1d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -354,7 +354,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, if (!dogrp) return n; - list_for_each_entry(event, &leader->sibling_list, group_entry) { + for_each_sibling_event(event, leader) { if (!is_box_event(box, event) || event->state <= PERF_EVENT_STATE_OFF) continue; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 6d8044ab1060..c98b943e58b4 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3343,6 +3343,7 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x38, 0xff, 0x3), EVENT_EXTRA_END }; @@ -3562,24 +3563,27 @@ static struct intel_uncore_type *skx_msr_uncores[] = { NULL, }; +/* + * To determine the number of CHAs, it should read bits 27:0 in the CAPID6 + * register which located at Device 30, Function 3, Offset 0x9C. PCI ID 0x2083. + */ +#define SKX_CAPID6 0x9c +#define SKX_CHA_BIT_MASK GENMASK(27, 0) + static int skx_count_chabox(void) { - struct pci_dev *chabox_dev = NULL; - int bus, count = 0; + struct pci_dev *dev = NULL; + u32 val = 0; - while (1) { - chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev); - if (!chabox_dev) - break; - if (count == 0) - bus = chabox_dev->bus->number; - if (bus != chabox_dev->bus->number) - break; - count++; - } + dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2083, dev); + if (!dev) + goto out; - pci_dev_put(chabox_dev); - return count; + pci_read_config_dword(dev, SKX_CAPID6, &val); + val &= SKX_CHA_BIT_MASK; +out: + pci_dev_put(dev); + return hweight32(val); } void skx_uncore_cpu_init(void) @@ -3606,7 +3610,7 @@ static struct intel_uncore_type skx_uncore_imc = { }; static struct attribute *skx_upi_uncore_formats_attr[] = { - &format_attr_event_ext.attr, + &format_attr_event.attr, &format_attr_umask_ext.attr, &format_attr_edge.attr, &format_attr_inv.attr, diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 18e2628e2d8f..e7edf19e64c2 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -188,10 +188,11 @@ static inline u64 msr_read_counter(struct perf_event *event) if (event->hw.event_base) rdmsrl(event->hw.event_base, now); else - rdtscll(now); + now = rdtsc_ordered(); return now; } + static void msr_event_update(struct perf_event *event) { u64 prev, now; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 78f91ec1056e..9f3711470ec1 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -69,7 +69,7 @@ struct event_constraint { #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ #define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */ #define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */ -#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */ +#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */ struct amd_nb { @@ -88,7 +88,7 @@ struct amd_nb { * REGS_USER can be handled for events limited to ring 3. * */ -#define PEBS_FREERUNNING_FLAGS \ +#define LARGE_PEBS_FLAGS \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ @@ -520,6 +520,7 @@ struct x86_pmu { void (*disable)(struct perf_event *); void (*add)(struct perf_event *); void (*del)(struct perf_event *); + void (*read)(struct perf_event *event); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; @@ -557,7 +558,7 @@ struct x86_pmu { struct x86_pmu_quirk *quirks; int perfctr_second_write; bool late_ack; - unsigned (*limit_period)(struct perf_event *event, unsigned l); + u64 (*limit_period)(struct perf_event *event, u64 l); /* * sysfs attrs @@ -608,7 +609,7 @@ struct x86_pmu { struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); int max_pebs_events; - unsigned long free_running_flags; + unsigned long large_pebs_flags; /* * Intel LBR @@ -923,6 +924,8 @@ void intel_pmu_pebs_disable_all(void); void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in); +void intel_pmu_auto_reload_read(struct perf_event *event); + void intel_ds_init(void); void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); |