diff options
Diffstat (limited to 'arch/x86/events/intel')
-rw-r--r-- | arch/x86/events/intel/core.c | 34 | ||||
-rw-r--r-- | arch/x86/events/intel/ds.c | 82 | ||||
-rw-r--r-- | arch/x86/events/intel/lbr.c | 56 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore.h | 2 | ||||
-rw-r--r-- | arch/x86/events/intel/uncore_snbep.c | 10 |
5 files changed, 128 insertions, 56 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 707b2a96e516..035c37481f57 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2041,15 +2041,15 @@ static void intel_pmu_disable_event(struct perf_event *event) cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); cpuc->intel_cp_status &= ~(1ull << hwc->idx); + if (unlikely(event->attr.precise_ip)) + intel_pmu_pebs_disable(event); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; } x86_pmu_disable_event(event); - - if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_disable(event); } static void intel_pmu_del_event(struct perf_event *event) @@ -2068,17 +2068,19 @@ static void intel_pmu_read_event(struct perf_event *event) x86_perf_event_update(event); } -static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) +static void intel_pmu_enable_fixed(struct perf_event *event) { + struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx - INTEL_PMC_IDX_FIXED; - u64 ctrl_val, bits, mask; + u64 ctrl_val, mask, bits = 0; /* - * Enable IRQ generation (0x8), + * Enable IRQ generation (0x8), if not PEBS, * and enable ring-3 counting (0x2) and ring-0 counting (0x1) * if requested: */ - bits = 0x8ULL; + if (!event->attr.precise_ip) + bits |= 0x8; if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) bits |= 0x2; if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) @@ -2120,14 +2122,14 @@ static void intel_pmu_enable_event(struct perf_event *event) if (unlikely(event_is_checkpointed(event))) cpuc->intel_cp_status |= (1ull << hwc->idx); + if (unlikely(event->attr.precise_ip)) + intel_pmu_pebs_enable(event); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_enable_fixed(hwc); + intel_pmu_enable_fixed(event); return; } - if (unlikely(event->attr.precise_ip)) - intel_pmu_pebs_enable(event); - __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } @@ -2280,7 +2282,10 @@ again: * counters from the GLOBAL_STATUS mask and we always process PEBS * events via drain_pebs(). */ - status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + status &= ~cpuc->pebs_enabled; + else + status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); /* * PEBS overflow sets bit 62 in the global status register @@ -2997,6 +3002,9 @@ static int intel_pmu_hw_config(struct perf_event *event) } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); + + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; } if (needs_branch_stack(event)) { @@ -4069,7 +4077,6 @@ __init int intel_pmu_init(void) intel_pmu_lbr_init_skl(); x86_pmu.event_constraints = intel_slm_event_constraints; - x86_pmu.pebs_constraints = intel_glp_pebs_event_constraints; x86_pmu.extra_regs = intel_glm_extra_regs; /* * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS @@ -4079,6 +4086,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_prec_dist = true; x86_pmu.lbr_pt_coexist = true; x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.get_event_constraints = glp_get_event_constraints; x86_pmu.cpu_events = glm_events_attrs; /* Goldmont Plus has 4-wide pipeline */ diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8a10a045b57b..b7b01d762d32 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -408,9 +408,11 @@ static int alloc_bts_buffer(int cpu) ds->bts_buffer_base = (unsigned long) cea; ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL); ds->bts_index = ds->bts_buffer_base; - max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE); - ds->bts_absolute_maximum = ds->bts_buffer_base + max; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16); + max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; + ds->bts_absolute_maximum = ds->bts_buffer_base + + max * BTS_RECORD_SIZE; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - + (max / 16) * BTS_RECORD_SIZE; return 0; } @@ -711,12 +713,6 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_glp_pebs_event_constraints[] = { - /* Allow all events as PEBS with no flags */ - INTEL_ALL_EVENT_CONSTRAINT(0, 0xf), - EVENT_CONSTRAINT_END -}; - struct event_constraint intel_nehalem_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_FLAGS_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ @@ -869,6 +865,13 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event) } } + /* + * Extended PEBS support + * Makes the PEBS code search the normal constraints. + */ + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + return NULL; + return &emptyconstraint; } @@ -894,10 +897,16 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc) { struct debug_store *ds = cpuc->ds; u64 threshold; + int reserved; + + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed; + else + reserved = x86_pmu.max_pebs_events; if (cpuc->n_pebs == cpuc->n_large_pebs) { threshold = ds->pebs_absolute_maximum - - x86_pmu.max_pebs_events * x86_pmu.pebs_record_size; + reserved * x86_pmu.pebs_record_size; } else { threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size; } @@ -961,7 +970,11 @@ void intel_pmu_pebs_enable(struct perf_event *event) * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. */ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { - ds->pebs_event_reset[hwc->idx] = + unsigned int idx = hwc->idx; + + if (idx >= INTEL_PMC_IDX_FIXED) + idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); + ds->pebs_event_reset[idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; } else { ds->pebs_event_reset[hwc->idx] = 0; @@ -1184,16 +1197,20 @@ static void setup_pebs_sample_data(struct perf_event *event, } /* + * We must however always use iregs for the unwinder to stay sane; the + * record BP,SP,IP can point into thin air when the record is from a + * previous PMI context or an (I)RET happend between the record and + * PMI. + */ + if (sample_type & PERF_SAMPLE_CALLCHAIN) + data->callchain = perf_callchain(event, iregs); + + /* * We use the interrupt regs as a base because the PEBS record does not * contain a full regs set, specifically it seems to lack segment * descriptors, which get used by things like user_mode(). * * In the simple case fix up only the IP for PERF_SAMPLE_IP. - * - * We must however always use BP,SP from iregs for the unwinder to stay - * sane; the record BP,SP can point into thin air when the record is - * from a previous PMI context or an (I)RET happend between the record - * and PMI. */ *regs = *iregs; @@ -1212,15 +1229,8 @@ static void setup_pebs_sample_data(struct perf_event *event, regs->si = pebs->si; regs->di = pebs->di; - /* - * Per the above; only set BP,SP if we don't need callchains. - * - * XXX: does this make sense? - */ - if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { - regs->bp = pebs->bp; - regs->sp = pebs->sp; - } + regs->bp = pebs->bp; + regs->sp = pebs->sp; #ifndef CONFIG_X86_32 regs->r8 = pebs->r8; @@ -1482,9 +1492,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct debug_store *ds = cpuc->ds; struct perf_event *event; void *base, *at, *top; - short counts[MAX_PEBS_EVENTS] = {}; - short error[MAX_PEBS_EVENTS] = {}; - int bit, i; + short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + short error[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; + int bit, i, size; + u64 mask; if (!x86_pmu.pebs_active) return; @@ -1494,6 +1505,13 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) ds->pebs_index = ds->pebs_buffer_base; + mask = (1ULL << x86_pmu.max_pebs_events) - 1; + size = x86_pmu.max_pebs_events; + if (x86_pmu.flags & PMU_FL_PEBS_ALL) { + mask |= ((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED; + size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed; + } + if (unlikely(base >= top)) { /* * The drain_pebs() could be called twice in a short period @@ -1503,7 +1521,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) * update the event->count for this case. */ for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, - x86_pmu.max_pebs_events) { + size) { event = cpuc->events[bit]; if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) intel_pmu_save_and_restart_reload(event, 0); @@ -1516,12 +1534,12 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) u64 pebs_status; pebs_status = p->status & cpuc->pebs_enabled; - pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; + pebs_status &= mask; /* PEBS v3 has more accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) { for_each_set_bit(bit, (unsigned long *)&pebs_status, - x86_pmu.max_pebs_events) + size) counts[bit]++; continue; @@ -1569,7 +1587,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) counts[bit]++; } - for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) { + for (bit = 0; bit < size; bit++) { if ((counts[bit] == 0) && (error[bit] == 0)) continue; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index cf372b90557e..f3e006bed9a7 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -216,6 +216,8 @@ static void intel_pmu_lbr_reset_64(void) void intel_pmu_lbr_reset(void) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + if (!x86_pmu.lbr_nr) return; @@ -223,6 +225,9 @@ void intel_pmu_lbr_reset(void) intel_pmu_lbr_reset_32(); else intel_pmu_lbr_reset_64(); + + cpuc->last_task_ctx = NULL; + cpuc->last_log_id = 0; } /* @@ -334,6 +339,7 @@ static inline u64 rdlbr_to(unsigned int idx) static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int i; unsigned lbr_idx, mask; u64 tos; @@ -344,9 +350,21 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) return; } - mask = x86_pmu.lbr_nr - 1; tos = task_ctx->tos; - for (i = 0; i < tos; i++) { + /* + * Does not restore the LBR registers, if + * - No one else touched them, and + * - Did not enter C6 + */ + if ((task_ctx == cpuc->last_task_ctx) && + (task_ctx->log_id == cpuc->last_log_id) && + rdlbr_from(tos)) { + task_ctx->lbr_stack_state = LBR_NONE; + return; + } + + mask = x86_pmu.lbr_nr - 1; + for (i = 0; i < task_ctx->valid_lbrs; i++) { lbr_idx = (tos - i) & mask; wrlbr_from(lbr_idx, task_ctx->lbr_from[i]); wrlbr_to (lbr_idx, task_ctx->lbr_to[i]); @@ -354,14 +372,24 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx) if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } + + for (; i < x86_pmu.lbr_nr; i++) { + lbr_idx = (tos - i) & mask; + wrlbr_from(lbr_idx, 0); + wrlbr_to(lbr_idx, 0); + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) + wrmsrl(MSR_LBR_INFO_0 + lbr_idx, 0); + } + wrmsrl(x86_pmu.lbr_tos, tos); task_ctx->lbr_stack_state = LBR_NONE; } static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); unsigned lbr_idx, mask; - u64 tos; + u64 tos, from; int i; if (task_ctx->lbr_callstack_users == 0) { @@ -371,15 +399,22 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx) mask = x86_pmu.lbr_nr - 1; tos = intel_pmu_lbr_tos(); - for (i = 0; i < tos; i++) { + for (i = 0; i < x86_pmu.lbr_nr; i++) { lbr_idx = (tos - i) & mask; - task_ctx->lbr_from[i] = rdlbr_from(lbr_idx); + from = rdlbr_from(lbr_idx); + if (!from) + break; + task_ctx->lbr_from[i] = from; task_ctx->lbr_to[i] = rdlbr_to(lbr_idx); if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]); } + task_ctx->valid_lbrs = i; task_ctx->tos = tos; task_ctx->lbr_stack_state = LBR_VALID; + + cpuc->last_task_ctx = task_ctx; + cpuc->last_log_id = ++task_ctx->log_id; } void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) @@ -531,7 +566,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) */ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { - bool need_info = false; + bool need_info = false, call_stack = false; unsigned long mask = x86_pmu.lbr_nr - 1; int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); @@ -542,7 +577,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) if (cpuc->lbr_sel) { need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); if (cpuc->lbr_sel->config & LBR_CALL_STACK) - num = tos; + call_stack = true; } for (i = 0; i < num; i++) { @@ -555,6 +590,13 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) from = rdlbr_from(lbr_idx); to = rdlbr_to(lbr_idx); + /* + * Read LBR call stack entries + * until invalid entry (0s) is detected. + */ + if (call_stack && !from) + break; + if (lbr_format == LBR_FORMAT_INFO && need_info) { u64 info; diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index c9e1e0bef3c3..e17ab885b1e9 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -28,7 +28,7 @@ #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff -#define UNCORE_EXTRA_PCI_DEV_MAX 3 +#define UNCORE_EXTRA_PCI_DEV_MAX 4 #define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 87dc0263a2e1..51d7c117e3c7 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1029,6 +1029,7 @@ void snbep_uncore_cpu_init(void) enum { SNBEP_PCI_QPI_PORT0_FILTER, SNBEP_PCI_QPI_PORT1_FILTER, + BDX_PCI_QPI_PORT2_FILTER, HSWEP_PCI_PCU_3, }; @@ -3286,15 +3287,18 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { }, { /* QPI Port 0 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT0_FILTER), }, { /* QPI Port 1 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + SNBEP_PCI_QPI_PORT1_FILTER), }, { /* QPI Port 2 filter */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2), + .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, + BDX_PCI_QPI_PORT2_FILTER), }, { /* PCU.3 (for Capability registers) */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0), |