summaryrefslogtreecommitdiff
path: root/arch/x86/events/intel/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events/intel/core.c')
-rw-r--r--arch/x86/events/intel/core.c814
1 files changed, 629 insertions, 185 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 99c590da0ae2..c2fb729c270e 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -23,6 +23,7 @@
#include <asm/intel_pt.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
+#include <asm/msr.h>
#include "../perf_event.h"
@@ -397,34 +398,28 @@ static struct event_constraint intel_lnc_event_constraints[] = {
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6),
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7),
+ INTEL_EVENT_CONSTRAINT(0x20, 0xf),
+
+ INTEL_UEVENT_CONSTRAINT(0x012a, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x012b, 0xf),
INTEL_UEVENT_CONSTRAINT(0x0148, 0x4),
INTEL_UEVENT_CONSTRAINT(0x0175, 0x4),
INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff),
INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff),
- /*
- * Generally event codes < 0x90 are restricted to counters 0-3.
- * The 0x2E and 0x3C are exception, which has no restriction.
- */
- INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf),
- INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf),
INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1),
INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1),
INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1),
INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8),
+ INTEL_UEVENT_CONSTRAINT(0x01cd, 0x3fc),
INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3),
- INTEL_EVENT_CONSTRAINT(0xce, 0x1),
INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf),
- /*
- * Generally event codes >= 0x90 are likely to have no restrictions.
- * The exception are defined as above.
- */
- INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0x3ff),
+
+ INTEL_UEVENT_CONSTRAINT(0x00e0, 0xf),
EVENT_CONSTRAINT_END
};
@@ -2230,6 +2225,18 @@ static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
+EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01");
+EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02");
+EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02");
+
+static struct attribute *skt_events_attrs[] = {
+ EVENT_PTR(td_fe_bound_skt),
+ EVENT_PTR(td_retiring_skt),
+ EVENT_PTR(td_bad_spec_cmt),
+ EVENT_PTR(td_be_bound_skt),
+ NULL,
+};
+
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -2291,7 +2298,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0);
if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();
@@ -2300,7 +2307,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts)
static __always_inline void intel_pmu_disable_all(void)
{
__intel_pmu_disable_all(true);
- intel_pmu_pebs_disable_all();
+ static_call_cond(x86_pmu_pebs_disable_all)();
intel_pmu_lbr_disable_all();
}
@@ -2312,11 +2319,11 @@ static void __intel_pmu_enable_all(int added, bool pmi)
intel_pmu_lbr_enable_all(pmi);
if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) {
- wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val);
+ wrmsrq(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val);
cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val;
}
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL,
intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@ -2332,7 +2339,7 @@ static void __intel_pmu_enable_all(int added, bool pmi)
static void intel_pmu_enable_all(int added)
{
- intel_pmu_pebs_enable_all();
+ static_call_cond(x86_pmu_pebs_enable_all)();
__intel_pmu_enable_all(added, false);
}
@@ -2432,12 +2439,12 @@ static void intel_pmu_nhm_workaround(void)
}
for (i = 0; i < 4; i++) {
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
- wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+ wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+ wrmsrq(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
}
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
for (i = 0; i < 4; i++) {
event = cpuc->events[i];
@@ -2447,7 +2454,7 @@ static void intel_pmu_nhm_workaround(void)
__x86_pmu_enable_event(&event->hw,
ARCH_PERFMON_EVENTSEL_ENABLE);
} else
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
+ wrmsrq(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
}
}
@@ -2464,7 +2471,7 @@ static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
if (cpuc->tfa_shadow != val) {
cpuc->tfa_shadow = val;
- wrmsrl(MSR_TSX_FORCE_ABORT, val);
+ wrmsrq(MSR_TSX_FORCE_ABORT, val);
}
}
@@ -2495,14 +2502,14 @@ static inline u64 intel_pmu_get_status(void)
{
u64 status;
- rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+ rdmsrq(MSR_CORE_PERF_GLOBAL_STATUS, status);
return status;
}
static inline void intel_pmu_ack_status(u64 ack)
{
- wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}
static inline bool event_is_checkpointed(struct perf_event *event)
@@ -2589,7 +2596,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
* so we don't trigger the event without PEBS bit set.
*/
if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_disable(event);
+ static_call(x86_pmu_pebs_disable)(event);
}
static void intel_pmu_assign_event(struct perf_event *event, int idx)
@@ -2609,6 +2616,9 @@ static void intel_pmu_del_event(struct perf_event *event)
intel_pmu_lbr_del(event);
if (event->attr.precise_ip)
intel_pmu_pebs_del(event);
+ if (is_pebs_counter_event_group(event) ||
+ is_acr_event_group(event))
+ this_cpu_ptr(&cpu_hw_events)->n_late_setup--;
}
static int icl_set_topdown_event_period(struct perf_event *event)
@@ -2625,15 +2635,15 @@ static int icl_set_topdown_event_period(struct perf_event *event)
* Don't need to clear them again.
*/
if (left == x86_pmu.max_period) {
- wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
- wrmsrl(MSR_PERF_METRICS, 0);
+ wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrq(MSR_PERF_METRICS, 0);
hwc->saved_slots = 0;
hwc->saved_metric = 0;
}
if ((hwc->saved_slots) && is_slots_event(event)) {
- wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
- wrmsrl(MSR_PERF_METRICS, hwc->saved_metric);
+ wrmsrq(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
+ wrmsrq(MSR_PERF_METRICS, hwc->saved_metric);
}
perf_event_update_userpage(event);
@@ -2720,7 +2730,7 @@ static void update_saved_topdown_regs(struct perf_event *event, u64 slots,
* modify by a NMI. PMU has to be disabled before calling this function.
*/
-static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
+static u64 intel_update_topdown_event(struct perf_event *event, int metric_end, u64 *val)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *other;
@@ -2728,13 +2738,24 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
bool reset = true;
int idx;
- /* read Fixed counter 3 */
- rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
- if (!slots)
- return 0;
+ if (!val) {
+ /* read Fixed counter 3 */
+ slots = rdpmc(3 | INTEL_PMC_FIXED_RDPMC_BASE);
+ if (!slots)
+ return 0;
- /* read PERF_METRICS */
- rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
+ /* read PERF_METRICS */
+ metrics = rdpmc(INTEL_PMC_FIXED_RDPMC_METRICS);
+ } else {
+ slots = val[0];
+ metrics = val[1];
+ /*
+ * Don't reset the PERF_METRICS and Fixed counter 3
+ * for each PEBS record read. Utilize the RDPMC metrics
+ * clear mode.
+ */
+ reset = false;
+ }
for_each_set_bit(idx, cpuc->active_mask, metric_end + 1) {
if (!is_topdown_idx(idx))
@@ -2768,8 +2789,8 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
if (reset) {
/* The fixed counter 3 has to be written before the PERF_METRICS. */
- wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
- wrmsrl(MSR_PERF_METRICS, 0);
+ wrmsrq(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrq(MSR_PERF_METRICS, 0);
if (event)
update_saved_topdown_regs(event, 0, 0, metric_end);
}
@@ -2777,36 +2798,47 @@ static u64 intel_update_topdown_event(struct perf_event *event, int metric_end)
return slots;
}
-static u64 icl_update_topdown_event(struct perf_event *event)
+static u64 icl_update_topdown_event(struct perf_event *event, u64 *val)
{
return intel_update_topdown_event(event, INTEL_PMC_IDX_METRIC_BASE +
- x86_pmu.num_topdown_events - 1);
+ x86_pmu.num_topdown_events - 1,
+ val);
}
-DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
+DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, intel_pmu_topdown_event_update);
-static void intel_pmu_read_topdown_event(struct perf_event *event)
+static void intel_pmu_read_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN) ||
+ is_pebs_counter_event_group(event)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ bool pmu_enabled = cpuc->enabled;
- /* Only need to call update_topdown_event() once for group read. */
- if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
- !is_slots_event(event))
- return;
+ /* Only need to call update_topdown_event() once for group read. */
+ if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ))
+ return;
- perf_pmu_disable(event->pmu);
- static_call(intel_pmu_update_topdown_event)(event);
- perf_pmu_enable(event->pmu);
-}
+ cpuc->enabled = 0;
+ if (pmu_enabled)
+ intel_pmu_disable_all();
-static void intel_pmu_read_event(struct perf_event *event)
-{
- if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
- intel_pmu_auto_reload_read(event);
- else if (is_topdown_count(event))
- intel_pmu_read_topdown_event(event);
- else
- x86_perf_event_update(event);
+ /*
+ * If the PEBS counters snapshotting is enabled,
+ * the topdown event is available in PEBS records.
+ */
+ if (is_topdown_count(event) && !is_pebs_counter_event_group(event))
+ static_call(intel_pmu_update_topdown_event)(event, NULL);
+ else
+ intel_pmu_drain_pebs_buffer();
+
+ cpuc->enabled = pmu_enabled;
+ if (pmu_enabled)
+ intel_pmu_enable_all(0);
+
+ return;
+ }
+
+ x86_perf_event_update(event);
}
static void intel_pmu_enable_fixed(struct perf_event *event)
@@ -2826,6 +2858,9 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
return;
idx = INTEL_PMC_IDX_FIXED_SLOTS;
+
+ if (event->attr.config1 & INTEL_TD_CFG_METRIC_CLEAR)
+ bits |= INTEL_FIXED_3_METRICS_CLEAR;
}
intel_set_masks(event, idx);
@@ -2861,6 +2896,54 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
cpuc->fixed_ctrl_val |= bits;
}
+static void intel_pmu_config_acr(int idx, u64 mask, u32 reload)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int msr_b, msr_c;
+ int msr_offset;
+
+ if (!mask && !cpuc->acr_cfg_b[idx])
+ return;
+
+ if (idx < INTEL_PMC_IDX_FIXED) {
+ msr_b = MSR_IA32_PMC_V6_GP0_CFG_B;
+ msr_c = MSR_IA32_PMC_V6_GP0_CFG_C;
+ msr_offset = x86_pmu.addr_offset(idx, false);
+ } else {
+ msr_b = MSR_IA32_PMC_V6_FX0_CFG_B;
+ msr_c = MSR_IA32_PMC_V6_FX0_CFG_C;
+ msr_offset = x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
+ }
+
+ if (cpuc->acr_cfg_b[idx] != mask) {
+ wrmsrl(msr_b + msr_offset, mask);
+ cpuc->acr_cfg_b[idx] = mask;
+ }
+ /* Only need to update the reload value when there is a valid config value. */
+ if (mask && cpuc->acr_cfg_c[idx] != reload) {
+ wrmsrl(msr_c + msr_offset, reload);
+ cpuc->acr_cfg_c[idx] = reload;
+ }
+}
+
+static void intel_pmu_enable_acr(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ if (!is_acr_event_group(event) || !event->attr.config2) {
+ /*
+ * The disable doesn't clear the ACR CFG register.
+ * Check and clear the ACR CFG register.
+ */
+ intel_pmu_config_acr(hwc->idx, 0, 0);
+ return;
+ }
+
+ intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period);
+}
+
+DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
+
static void intel_pmu_enable_event(struct perf_event *event)
{
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
@@ -2868,16 +2951,19 @@ static void intel_pmu_enable_event(struct perf_event *event)
int idx = hwc->idx;
if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_enable(event);
+ static_call(x86_pmu_pebs_enable)(event);
switch (idx) {
case 0 ... INTEL_PMC_IDX_FIXED - 1:
if (branch_sample_counters(event))
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
intel_set_masks(event, idx);
+ static_call_cond(intel_pmu_enable_acr_event)(event);
__x86_pmu_enable_event(hwc, enable_mask);
break;
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
+ static_call_cond(intel_pmu_enable_acr_event)(event);
+ fallthrough;
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
intel_pmu_enable_fixed(event);
break;
@@ -2895,12 +2981,51 @@ static void intel_pmu_enable_event(struct perf_event *event)
}
}
+static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc)
+{
+ struct perf_event *event, *leader;
+ int i, j, idx;
+
+ for (i = 0; i < cpuc->n_events; i++) {
+ leader = cpuc->event_list[i];
+ if (!is_acr_event_group(leader))
+ continue;
+
+ /* The ACR events must be contiguous. */
+ for (j = i; j < cpuc->n_events; j++) {
+ event = cpuc->event_list[j];
+ if (event->group_leader != leader->group_leader)
+ break;
+ for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
+ if (WARN_ON_ONCE(i + idx > cpuc->n_events))
+ return;
+ __set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1);
+ }
+ }
+ i = j - 1;
+ }
+}
+
+void intel_pmu_late_setup(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ if (!cpuc->n_late_setup)
+ return;
+
+ intel_pmu_pebs_late_setup(cpuc);
+ intel_pmu_acr_late_setup(cpuc);
+}
+
static void intel_pmu_add_event(struct perf_event *event)
{
if (event->attr.precise_ip)
intel_pmu_pebs_add(event);
if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_add(event);
+ if (is_pebs_counter_event_group(event) ||
+ is_acr_event_group(event))
+ this_cpu_ptr(&cpu_hw_events)->n_late_setup++;
}
/*
@@ -2918,7 +3043,7 @@ int intel_pmu_save_and_restart(struct perf_event *event)
*/
if (unlikely(event_is_checkpointed(event))) {
/* No race with NMIs because the counter should not be armed */
- wrmsrl(event->hw.event_base, 0);
+ wrmsrq(event->hw.event_base, 0);
local64_set(&event->hw.prev_count, 0);
}
return static_call(x86_pmu_set_period)(event);
@@ -2935,7 +3060,7 @@ static int intel_pmu_set_period(struct perf_event *event)
static u64 intel_pmu_update(struct perf_event *event)
{
if (unlikely(is_topdown_count(event)))
- return static_call(intel_pmu_update_topdown_event)(event);
+ return static_call(intel_pmu_update_topdown_event)(event, NULL);
return x86_perf_event_update(event);
}
@@ -2957,13 +3082,13 @@ static void intel_pmu_reset(void)
pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
for_each_set_bit(idx, cntr_mask, INTEL_PMC_MAX_GENERIC) {
- wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
- wrmsrl_safe(x86_pmu_event_addr(idx), 0ull);
+ wrmsrq_safe(x86_pmu_config_addr(idx), 0ull);
+ wrmsrq_safe(x86_pmu_event_addr(idx), 0ull);
}
for_each_set_bit(idx, fixed_cntr_mask, INTEL_PMC_MAX_FIXED) {
if (fixed_counter_disabled(idx, cpuc->pmu))
continue;
- wrmsrl_safe(x86_pmu_fixed_ctr_addr(idx), 0ull);
+ wrmsrq_safe(x86_pmu_fixed_ctr_addr(idx), 0ull);
}
if (ds)
@@ -2972,7 +3097,7 @@ static void intel_pmu_reset(void)
/* Ack all overflows and disable fixed counters */
if (x86_pmu.version >= 2) {
intel_pmu_ack_status(intel_pmu_get_status());
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsrq(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}
/* Reset LBRs and LBR freezing */
@@ -3016,8 +3141,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
continue;
perf_sample_data_init(data, 0, event->hw.last_period);
- if (perf_event_overflow(event, data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, data, regs);
/* Inject one fake event is enough. */
break;
@@ -3030,7 +3154,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
int bit;
int handled = 0;
- u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
inc_irq_stat(apic_perf_irqs);
@@ -3073,8 +3196,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
handled++;
x86_pmu_handle_guest_pebs(regs, &data);
- x86_pmu.drain_pebs(regs, &data);
- status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
+ static_call(x86_pmu_drain_pebs)(regs, &data);
/*
* PMI throttle may be triggered, which stops the PEBS event.
@@ -3084,7 +3206,16 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
* Update the MSR if pebs_enabled is changed.
*/
if (pebs_enabled != cpuc->pebs_enabled)
- wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+ wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+
+ /*
+ * Above PEBS handler (PEBS counters snapshotting) has updated fixed
+ * counter 3 and perf metrics counts if they are in counter group,
+ * unnecessary to update again.
+ */
+ if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] &&
+ is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS]))
+ status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
}
/*
@@ -3101,9 +3232,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
handled++;
- static_call(intel_pmu_update_topdown_event)(NULL);
+ static_call(intel_pmu_update_topdown_event)(NULL, NULL);
}
+ status &= hybrid(cpuc->pmu, intel_ctrl);
+
/*
* Checkpointed counters can lead to 'spurious' PMIs because the
* rollback caused by the PMI will have cleared the overflow status
@@ -3113,22 +3246,45 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
+ u64 last_period;
handled++;
if (!test_bit(bit, cpuc->active_mask))
continue;
+ /*
+ * There may be unprocessed PEBS records in the PEBS buffer,
+ * which still stores the previous values.
+ * Process those records first before handling the latest value.
+ * For example,
+ * A is a regular counter
+ * B is a PEBS event which reads A
+ * C is a PEBS event
+ *
+ * The following can happen:
+ * B-assist A=1
+ * C A=2
+ * B-assist A=3
+ * A-overflow-PMI A=4
+ * C-assist-PMI (PEBS buffer) A=5
+ *
+ * The PEBS buffer has to be drained before handling the A-PMI
+ */
+ if (is_pebs_counter_event_group(event))
+ x86_pmu.drain_pebs(regs, &data);
+
+ last_period = event->hw.last_period;
+
if (!intel_pmu_save_and_restart(event))
continue;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
if (has_branch_stack(event))
intel_pmu_lbr_save_brstack(&data, cpuc, event);
- if (perf_event_overflow(event, &data, regs))
- x86_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
return handled;
@@ -3690,10 +3846,9 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
if (cpuc->excl_cntrs)
return intel_get_excl_constraints(cpuc, event, idx, c2);
- /* Not all counters support the branch counter feature. */
- if (branch_sample_counters(event)) {
+ if (event->hw.dyn_constraint != ~0ULL) {
c2 = dyn_constraint(cpuc, c2, idx);
- c2->idxmsk64 &= x86_pmu.lbr_counters;
+ c2->idxmsk64 &= event->hw.dyn_constraint;
c2->weight = hweight64(c2->idxmsk64);
}
@@ -3955,6 +4110,118 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
}
+static u64 intel_pmu_freq_start_period(struct perf_event *event)
+{
+ int type = event->attr.type;
+ u64 config, factor;
+ s64 start;
+
+ /*
+ * The 127 is the lowest possible recommended SAV (sample after value)
+ * for a 4000 freq (default freq), according to the event list JSON file.
+ * Also, assume the workload is idle 50% time.
+ */
+ factor = 64 * 4000;
+ if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE)
+ goto end;
+
+ /*
+ * The estimation of the start period in the freq mode is
+ * based on the below assumption.
+ *
+ * For a cycles or an instructions event, 1GHZ of the
+ * underlying platform, 1 IPC. The workload is idle 50% time.
+ * The start period = 1,000,000,000 * 1 / freq / 2.
+ * = 500,000,000 / freq
+ *
+ * Usually, the branch-related events occur less than the
+ * instructions event. According to the Intel event list JSON
+ * file, the SAV (sample after value) of a branch-related event
+ * is usually 1/4 of an instruction event.
+ * The start period of branch-related events = 125,000,000 / freq.
+ *
+ * The cache-related events occurs even less. The SAV is usually
+ * 1/20 of an instruction event.
+ * The start period of cache-related events = 25,000,000 / freq.
+ */
+ config = event->attr.config & PERF_HW_EVENT_MASK;
+ if (type == PERF_TYPE_HARDWARE) {
+ switch (config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ case PERF_COUNT_HW_INSTRUCTIONS:
+ case PERF_COUNT_HW_BUS_CYCLES:
+ case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND:
+ case PERF_COUNT_HW_STALLED_CYCLES_BACKEND:
+ case PERF_COUNT_HW_REF_CPU_CYCLES:
+ factor = 500000000;
+ break;
+ case PERF_COUNT_HW_BRANCH_INSTRUCTIONS:
+ case PERF_COUNT_HW_BRANCH_MISSES:
+ factor = 125000000;
+ break;
+ case PERF_COUNT_HW_CACHE_REFERENCES:
+ case PERF_COUNT_HW_CACHE_MISSES:
+ factor = 25000000;
+ break;
+ default:
+ goto end;
+ }
+ }
+
+ if (type == PERF_TYPE_HW_CACHE)
+ factor = 25000000;
+end:
+ /*
+ * Usually, a prime or a number with less factors (close to prime)
+ * is chosen as an SAV, which makes it less likely that the sampling
+ * period synchronizes with some periodic event in the workload.
+ * Minus 1 to make it at least avoiding values near power of twos
+ * for the default freq.
+ */
+ start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1;
+
+ if (start > x86_pmu.max_period)
+ start = x86_pmu.max_period;
+
+ if (x86_pmu.limit_period)
+ x86_pmu.limit_period(event, &start);
+
+ return start;
+}
+
+static inline bool intel_pmu_has_acr(struct pmu *pmu)
+{
+ return !!hybrid(pmu, acr_cause_mask64);
+}
+
+static bool intel_pmu_is_acr_group(struct perf_event *event)
+{
+ /* The group leader has the ACR flag set */
+ if (is_acr_event_group(event))
+ return true;
+
+ /* The acr_mask is set */
+ if (event->attr.config2)
+ return true;
+
+ return false;
+}
+
+static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
+ u64 *cause_mask, int *num)
+{
+ event->hw.dyn_constraint &= hybrid(event->pmu, acr_cntr_mask64);
+ *cause_mask |= event->attr.config2;
+ *num += 1;
+}
+
+static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event,
+ int idx, u64 cause_mask)
+{
+ if (test_bit(idx, (unsigned long *)&cause_mask))
+ event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64);
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -3966,6 +4233,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ if (event->attr.freq && event->attr.sample_freq) {
+ event->hw.sample_period = intel_pmu_freq_start_period(event);
+ event->hw.last_period = event->hw.sample_period;
+ local64_set(&event->hw.period_left, event->hw.sample_period);
+ }
+
if (event->attr.precise_ip) {
if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
return -EINVAL;
@@ -4010,15 +4283,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
leader = event->group_leader;
if (branch_sample_call_stack(leader))
return -EINVAL;
- if (branch_sample_counters(leader))
+ if (branch_sample_counters(leader)) {
num++;
+ leader->hw.dyn_constraint &= x86_pmu.lbr_counters;
+ }
leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
for_each_sibling_event(sibling, leader) {
if (branch_sample_call_stack(sibling))
return -EINVAL;
- if (branch_sample_counters(sibling))
+ if (branch_sample_counters(sibling)) {
num++;
+ sibling->hw.dyn_constraint &= x86_pmu.lbr_counters;
+ }
}
if (num > fls(x86_pmu.lbr_counters))
@@ -4066,6 +4343,101 @@ static int intel_pmu_hw_config(struct perf_event *event)
event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
}
+ if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
+ (x86_pmu.intel_cap.pebs_format >= 6) &&
+ x86_pmu.intel_cap.pebs_baseline &&
+ is_sampling_event(event) &&
+ event->attr.precise_ip)
+ event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
+
+ if (intel_pmu_has_acr(event->pmu) && intel_pmu_is_acr_group(event)) {
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct pmu *pmu = event->pmu;
+ bool has_sw_event = false;
+ int num = 0, idx = 0;
+ u64 cause_mask = 0;
+
+ /* Not support perf metrics */
+ if (is_metric_event(event))
+ return -EINVAL;
+
+ /* Not support freq mode */
+ if (event->attr.freq)
+ return -EINVAL;
+
+ /* PDist is not supported */
+ if (event->attr.config2 && event->attr.precise_ip > 2)
+ return -EINVAL;
+
+ /* The reload value cannot exceeds the max period */
+ if (event->attr.sample_period > x86_pmu.max_period)
+ return -EINVAL;
+ /*
+ * The counter-constraints of each event cannot be finalized
+ * unless the whole group is scanned. However, it's hard
+ * to know whether the event is the last one of the group.
+ * Recalculate the counter-constraints for each event when
+ * adding a new event.
+ *
+ * The group is traversed twice, which may be optimized later.
+ * In the first round,
+ * - Find all events which do reload when other events
+ * overflow and set the corresponding counter-constraints
+ * - Add all events, which can cause other events reload,
+ * in the cause_mask
+ * - Error out if the number of events exceeds the HW limit
+ * - The ACR events must be contiguous.
+ * Error out if there are non-X86 events between ACR events.
+ * This is not a HW limit, but a SW limit.
+ * With the assumption, the intel_pmu_acr_late_setup() can
+ * easily convert the event idx to counter idx without
+ * traversing the whole event list.
+ */
+ if (!is_x86_event(leader))
+ return -EINVAL;
+
+ if (leader->attr.config2)
+ intel_pmu_set_acr_cntr_constr(leader, &cause_mask, &num);
+
+ if (leader->nr_siblings) {
+ for_each_sibling_event(sibling, leader) {
+ if (!is_x86_event(sibling)) {
+ has_sw_event = true;
+ continue;
+ }
+ if (!sibling->attr.config2)
+ continue;
+ if (has_sw_event)
+ return -EINVAL;
+ intel_pmu_set_acr_cntr_constr(sibling, &cause_mask, &num);
+ }
+ }
+ if (leader != event && event->attr.config2) {
+ if (has_sw_event)
+ return -EINVAL;
+ intel_pmu_set_acr_cntr_constr(event, &cause_mask, &num);
+ }
+
+ if (hweight64(cause_mask) > hweight64(hybrid(pmu, acr_cause_mask64)) ||
+ num > hweight64(hybrid(event->pmu, acr_cntr_mask64)))
+ return -EINVAL;
+ /*
+ * In the second round, apply the counter-constraints for
+ * the events which can cause other events reload.
+ */
+ intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask);
+
+ if (leader->nr_siblings) {
+ for_each_sibling_event(sibling, leader)
+ intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask);
+ }
+
+ if (leader != event)
+ intel_pmu_set_acr_caused_constr(event, idx, cause_mask);
+
+ leader->hw.flags |= PERF_X86_EVENT_ACR;
+ }
+
if ((event->attr.type == PERF_TYPE_HARDWARE) ||
(event->attr.type == PERF_TYPE_HW_CACHE))
return 0;
@@ -4081,7 +4453,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
* is used in a metrics group, it too cannot support sampling.
*/
if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) {
- if (event->attr.config1 || event->attr.config2)
+ /* The metrics_clear can only be set for the slots event */
+ if (event->attr.config1 &&
+ (!is_slots_event(event) || (event->attr.config1 & ~INTEL_TD_CFG_METRIC_CLEAR)))
+ return -EINVAL;
+
+ if (event->attr.config2)
return -EINVAL;
/*
@@ -4160,7 +4537,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (x86_pmu.version < 3)
return -EINVAL;
- ret = perf_allow_cpu(&event->attr);
+ ret = perf_allow_cpu();
if (ret)
return ret;
@@ -4208,7 +4585,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
.guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask,
};
- if (!x86_pmu.pebs)
+ if (!x86_pmu.ds_pebs)
return arr;
/*
@@ -4249,7 +4626,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
arr[pebs_enable] = (struct perf_guest_switch_msr){
.msr = MSR_IA32_PEBS_ENABLE,
.host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask,
- .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
+ .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable,
};
if (arr[pebs_enable].host) {
@@ -4598,9 +4975,9 @@ static int adl_hw_config(struct perf_event *event)
return -EOPNOTSUPP;
}
-static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
+static enum intel_cpu_type adl_get_hybrid_cpu_type(void)
{
- return HYBRID_INTEL_CORE;
+ return INTEL_CPU_TYPE_CORE;
}
static inline bool erratum_hsw11(struct perf_event *event)
@@ -4690,6 +5067,8 @@ PMU_FORMAT_ATTR(in_tx, "config:32" );
PMU_FORMAT_ATTR(in_tx_cp, "config:33" );
PMU_FORMAT_ATTR(eq, "config:36" ); /* v6 + */
+PMU_FORMAT_ATTR(metrics_clear, "config1:0"); /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
+
static ssize_t umask2_show(struct device *dev,
struct device_attribute *attr,
char *page)
@@ -4709,6 +5088,7 @@ static struct device_attribute format_attr_umask2 =
static struct attribute *format_evtsel_ext_attrs[] = {
&format_attr_umask2.attr,
&format_attr_eq.attr,
+ &format_attr_metrics_clear.attr,
NULL
};
@@ -4733,6 +5113,13 @@ evtsel_ext_is_visible(struct kobject *kobj, struct attribute *attr, int i)
if (i == 1)
return (mask & ARCH_PERFMON_EVENTSEL_EQ) ? attr->mode : 0;
+ /* PERF_CAPABILITIES.RDPMC_METRICS_CLEAR */
+ if (i == 2) {
+ union perf_capabilities intel_cap = hybrid(dev_get_drvdata(dev), intel_cap);
+
+ return intel_cap.rdpmc_metrics_clear ? attr->mode : 0;
+ }
+
return 0;
}
@@ -4796,7 +5183,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
goto err;
}
- if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
+ if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_DYN_CONSTRAINT)) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
@@ -4885,27 +5272,39 @@ static inline bool intel_pmu_broken_perf_cap(void)
return false;
}
-static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
+static void update_pmu_cap(struct pmu *pmu)
{
- unsigned int sub_bitmaps, eax, ebx, ecx, edx;
+ unsigned int cntr, fixed_cntr, ecx, edx;
+ union cpuid35_eax eax;
+ union cpuid35_ebx ebx;
- cpuid(ARCH_PERFMON_EXT_LEAF, &sub_bitmaps, &ebx, &ecx, &edx);
+ cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
- if (ebx & ARCH_PERFMON_EXT_UMASK2)
- pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2;
- if (ebx & ARCH_PERFMON_EXT_EQ)
- pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ;
+ if (ebx.split.umask2)
+ hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2;
+ if (ebx.split.eq)
+ hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ;
- if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
+ if (eax.split.cntr_subleaf) {
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
- &eax, &ebx, &ecx, &edx);
- pmu->cntr_mask64 = eax;
- pmu->fixed_cntr_mask64 = ebx;
+ &cntr, &fixed_cntr, &ecx, &edx);
+ hybrid(pmu, cntr_mask64) = cntr;
+ hybrid(pmu, fixed_cntr_mask64) = fixed_cntr;
+ }
+
+ if (eax.split.acr_subleaf) {
+ cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
+ &cntr, &fixed_cntr, &ecx, &edx);
+ /* The mask of the counters which can be reloaded */
+ hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED);
+
+ /* The mask of the counters which can cause a reload of reloadable counters */
+ hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED);
}
if (!intel_pmu_broken_perf_cap()) {
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
+ rdmsrq(MSR_IA32_PERF_CAPABILITIES, hybrid(pmu, intel_cap).capabilities);
}
}
@@ -4923,11 +5322,6 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
else
pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
- if (pmu->intel_cap.pebs_output_pt_available)
- pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
- else
- pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT;
-
intel_pmu_check_event_constraints(pmu->event_constraints,
pmu->cntr_mask64,
pmu->fixed_cntr_mask64,
@@ -4938,7 +5332,8 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
{
- u8 cpu_type = get_this_hybrid_cpu_type();
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+ enum intel_cpu_type cpu_type = c->topo.intel_type;
int i;
/*
@@ -4947,7 +5342,7 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
* on it. There should be a fixup function provided for these
* troublesome CPUs (->get_hybrid_cpu_type).
*/
- if (cpu_type == HYBRID_INTEL_NONE) {
+ if (cpu_type == INTEL_CPU_TYPE_UNKNOWN) {
if (x86_pmu.get_hybrid_cpu_type)
cpu_type = x86_pmu.get_hybrid_cpu_type();
else
@@ -4964,16 +5359,16 @@ static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
u32 native_id;
- if (cpu_type == HYBRID_INTEL_CORE && pmu_type == hybrid_big)
+ if (cpu_type == INTEL_CPU_TYPE_CORE && pmu_type == hybrid_big)
return &x86_pmu.hybrid_pmu[i];
- if (cpu_type == HYBRID_INTEL_ATOM) {
+ if (cpu_type == INTEL_CPU_TYPE_ATOM) {
if (x86_pmu.num_hybrid_pmus == 2 && pmu_type == hybrid_small)
return &x86_pmu.hybrid_pmu[i];
- native_id = get_this_hybrid_cpu_native_id();
- if (native_id == skt_native_id && pmu_type == hybrid_small)
+ native_id = c->topo.intel_native_model_id;
+ if (native_id == INTEL_ATOM_SKT_NATIVE_ID && pmu_type == hybrid_small)
return &x86_pmu.hybrid_pmu[i];
- if (native_id == cmt_native_id && pmu_type == hybrid_tiny)
+ if (native_id == INTEL_ATOM_CMT_NATIVE_ID && pmu_type == hybrid_tiny)
return &x86_pmu.hybrid_pmu[i];
}
}
@@ -4996,7 +5391,7 @@ static bool init_hybrid_pmu(int cpu)
goto end;
if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
- update_pmu_cap(pmu);
+ update_pmu_cap(&pmu->pmu);
intel_pmu_check_hybrid_pmus(pmu);
@@ -5005,9 +5400,6 @@ static bool init_hybrid_pmu(int cpu)
pr_info("%s PMU driver: ", pmu->name);
- if (pmu->intel_cap.pebs_output_pt_available)
- pr_cont("PEBS-via-PT ");
-
pr_cont("\n");
x86_pmu_show_pmu_cap(&pmu->pmu);
@@ -5030,8 +5422,11 @@ static void intel_pmu_cpu_starting(int cpu)
init_debug_store_on_cpu(cpu);
/*
- * Deal with CPUs that don't clear their LBRs on power-up.
+ * Deal with CPUs that don't clear their LBRs on power-up, and that may
+ * even boot with LBRs enabled.
*/
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr)
+ msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT);
intel_pmu_lbr_reset();
cpuc->lbr_sel = NULL;
@@ -5057,7 +5452,7 @@ static void intel_pmu_cpu_starting(int cpu)
if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) {
union perf_capabilities perf_cap;
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
+ rdmsrq(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
if (!perf_cap.perf_metrics) {
x86_pmu.intel_cap.perf_metrics = 0;
x86_pmu.intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
@@ -5150,16 +5545,10 @@ static void intel_pmu_cpu_dead(int cpu)
}
static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
- bool sched_in)
+ struct task_struct *task, bool sched_in)
{
intel_pmu_pebs_sched_task(pmu_ctx, sched_in);
- intel_pmu_lbr_sched_task(pmu_ctx, sched_in);
-}
-
-static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
- struct perf_event_pmu_context *next_epc)
-{
- intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc);
+ intel_pmu_lbr_sched_task(pmu_ctx, task, sched_in);
}
static int intel_pmu_check_period(struct perf_event *event, u64 value)
@@ -5330,7 +5719,6 @@ static __initconst const struct x86_pmu intel_pmu = {
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
- .swap_task_ctx = intel_pmu_swap_task_ctx,
.check_period = intel_pmu_check_period,
@@ -5377,46 +5765,36 @@ static __init void intel_clovertown_quirk(void)
* these chips.
*/
pr_warn("PEBS disabled due to CPU errata\n");
- x86_pmu.pebs = 0;
+ x86_pmu.ds_pebs = 0;
x86_pmu.pebs_constraints = NULL;
}
-static const struct x86_cpu_desc isolation_ucodes[] = {
- INTEL_CPU_DESC(INTEL_HASWELL, 3, 0x0000001f),
- INTEL_CPU_DESC(INTEL_HASWELL_L, 1, 0x0000001e),
- INTEL_CPU_DESC(INTEL_HASWELL_G, 1, 0x00000015),
- INTEL_CPU_DESC(INTEL_HASWELL_X, 2, 0x00000037),
- INTEL_CPU_DESC(INTEL_HASWELL_X, 4, 0x0000000a),
- INTEL_CPU_DESC(INTEL_BROADWELL, 4, 0x00000023),
- INTEL_CPU_DESC(INTEL_BROADWELL_G, 1, 0x00000014),
- INTEL_CPU_DESC(INTEL_BROADWELL_D, 2, 0x00000010),
- INTEL_CPU_DESC(INTEL_BROADWELL_D, 3, 0x07000009),
- INTEL_CPU_DESC(INTEL_BROADWELL_D, 4, 0x0f000009),
- INTEL_CPU_DESC(INTEL_BROADWELL_D, 5, 0x0e000002),
- INTEL_CPU_DESC(INTEL_BROADWELL_X, 1, 0x0b000014),
- INTEL_CPU_DESC(INTEL_SKYLAKE_X, 3, 0x00000021),
- INTEL_CPU_DESC(INTEL_SKYLAKE_X, 4, 0x00000000),
- INTEL_CPU_DESC(INTEL_SKYLAKE_X, 5, 0x00000000),
- INTEL_CPU_DESC(INTEL_SKYLAKE_X, 6, 0x00000000),
- INTEL_CPU_DESC(INTEL_SKYLAKE_X, 7, 0x00000000),
- INTEL_CPU_DESC(INTEL_SKYLAKE_X, 11, 0x00000000),
- INTEL_CPU_DESC(INTEL_SKYLAKE_L, 3, 0x0000007c),
- INTEL_CPU_DESC(INTEL_SKYLAKE, 3, 0x0000007c),
- INTEL_CPU_DESC(INTEL_KABYLAKE, 9, 0x0000004e),
- INTEL_CPU_DESC(INTEL_KABYLAKE_L, 9, 0x0000004e),
- INTEL_CPU_DESC(INTEL_KABYLAKE_L, 10, 0x0000004e),
- INTEL_CPU_DESC(INTEL_KABYLAKE_L, 11, 0x0000004e),
- INTEL_CPU_DESC(INTEL_KABYLAKE_L, 12, 0x0000004e),
- INTEL_CPU_DESC(INTEL_KABYLAKE, 10, 0x0000004e),
- INTEL_CPU_DESC(INTEL_KABYLAKE, 11, 0x0000004e),
- INTEL_CPU_DESC(INTEL_KABYLAKE, 12, 0x0000004e),
- INTEL_CPU_DESC(INTEL_KABYLAKE, 13, 0x0000004e),
+static const struct x86_cpu_id isolation_ucodes[] = {
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL, 3, 3, 0x0000001f),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_L, 1, 1, 0x0000001e),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_G, 1, 1, 0x00000015),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 2, 2, 0x00000037),
+ X86_MATCH_VFM_STEPS(INTEL_HASWELL_X, 4, 4, 0x0000000a),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL, 4, 4, 0x00000023),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_G, 1, 1, 0x00000014),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 2, 2, 0x00000010),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 3, 3, 0x07000009),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 4, 4, 0x0f000009),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_D, 5, 5, 0x0e000002),
+ X86_MATCH_VFM_STEPS(INTEL_BROADWELL_X, 1, 1, 0x0b000014),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 3, 3, 0x00000021),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 4, 7, 0x00000000),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_X, 11, 11, 0x00000000),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE_L, 3, 3, 0x0000007c),
+ X86_MATCH_VFM_STEPS(INTEL_SKYLAKE, 3, 3, 0x0000007c),
+ X86_MATCH_VFM_STEPS(INTEL_KABYLAKE, 9, 13, 0x0000004e),
+ X86_MATCH_VFM_STEPS(INTEL_KABYLAKE_L, 9, 12, 0x0000004e),
{}
};
static void intel_check_pebs_isolation(void)
{
- x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes);
+ x86_pmu.pebs_no_isolation = !x86_match_min_microcode_rev(isolation_ucodes);
}
static __init void intel_pebs_isolation_quirk(void)
@@ -5426,16 +5804,16 @@ static __init void intel_pebs_isolation_quirk(void)
intel_check_pebs_isolation();
}
-static const struct x86_cpu_desc pebs_ucodes[] = {
- INTEL_CPU_DESC(INTEL_SANDYBRIDGE, 7, 0x00000028),
- INTEL_CPU_DESC(INTEL_SANDYBRIDGE_X, 6, 0x00000618),
- INTEL_CPU_DESC(INTEL_SANDYBRIDGE_X, 7, 0x0000070c),
+static const struct x86_cpu_id pebs_ucodes[] = {
+ X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE, 7, 7, 0x00000028),
+ X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE_X, 6, 6, 0x00000618),
+ X86_MATCH_VFM_STEPS(INTEL_SANDYBRIDGE_X, 7, 7, 0x0000070c),
{}
};
static bool intel_snb_pebs_broken(void)
{
- return !x86_cpu_has_min_microcode_rev(pebs_ucodes);
+ return !x86_match_min_microcode_rev(pebs_ucodes);
}
static void intel_snb_check_microcode(void)
@@ -5482,24 +5860,24 @@ static bool check_msr(unsigned long msr, u64 mask)
* matches, this is needed to detect certain hardware emulators
* (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
- if (rdmsrl_safe(msr, &val_old))
+ if (rdmsrq_safe(msr, &val_old))
return false;
/*
- * Only change the bits which can be updated by wrmsrl.
+ * Only change the bits which can be updated by wrmsrq.
*/
val_tmp = val_old ^ mask;
if (is_lbr_from(msr))
val_tmp = lbr_from_signext_quirk_wr(val_tmp);
- if (wrmsrl_safe(msr, val_tmp) ||
- rdmsrl_safe(msr, &val_new))
+ if (wrmsrq_safe(msr, val_tmp) ||
+ rdmsrq_safe(msr, &val_new))
return false;
/*
- * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
- * should equal rdmsrl()'s even with the quirk.
+ * Quirk only affects validation in wrmsr(), so wrmsrq()'s value
+ * should equal rdmsrq()'s even with the quirk.
*/
if (val_new != val_tmp)
return false;
@@ -5510,7 +5888,7 @@ static bool check_msr(unsigned long msr, u64 mask)
/* Here it's sure that the MSR can be safely accessed.
* Restore the old value and return.
*/
- wrmsrl(msr, val_old);
+ wrmsrq(msr, val_old);
return true;
}
@@ -5875,7 +6253,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
static umode_t
pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
{
- return x86_pmu.pebs ? attr->mode : 0;
+ return x86_pmu.ds_pebs ? attr->mode : 0;
}
static umode_t
@@ -5906,6 +6284,21 @@ td_is_visible(struct kobject *kobj, struct attribute *attr, int i)
return attr->mode;
}
+PMU_FORMAT_ATTR(acr_mask, "config2:0-63");
+
+static struct attribute *format_acr_attrs[] = {
+ &format_attr_acr_mask.attr,
+ NULL
+};
+
+static umode_t
+acr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+
+ return intel_pmu_has_acr(dev_get_drvdata(dev)) ? attr->mode : 0;
+}
+
static struct attribute_group group_events_td = {
.name = "events",
.is_visible = td_is_visible,
@@ -5948,6 +6341,12 @@ static struct attribute_group group_format_evtsel_ext = {
.is_visible = evtsel_ext_is_visible,
};
+static struct attribute_group group_format_acr = {
+ .name = "format",
+ .attrs = format_acr_attrs,
+ .is_visible = acr_is_visible,
+};
+
static struct attribute_group group_default = {
.attrs = intel_pmu_attrs,
.is_visible = default_is_visible,
@@ -5962,6 +6361,7 @@ static const struct attribute_group *attr_update[] = {
&group_format_extra,
&group_format_extra_skl,
&group_format_evtsel_ext,
+ &group_format_acr,
&group_default,
NULL,
};
@@ -6246,6 +6646,7 @@ static const struct attribute_group *hybrid_attr_update[] = {
&group_caps_lbr,
&hybrid_group_format_extra,
&group_format_evtsel_ext,
+ &group_format_acr,
&group_default,
&hybrid_group_cpus,
NULL,
@@ -6362,11 +6763,9 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
if (pmu->pmu_type & hybrid_small_tiny) {
pmu->intel_cap.perf_metrics = 0;
- pmu->intel_cap.pebs_output_pt_available = 1;
pmu->mid_ack = true;
} else if (pmu->pmu_type & hybrid_big) {
pmu->intel_cap.perf_metrics = 1;
- pmu->intel_cap.pebs_output_pt_available = 0;
pmu->late_ack = true;
}
}
@@ -6440,6 +6839,7 @@ static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
intel_pmu_init_grt(pmu);
hybrid(pmu, event_constraints) = intel_skt_event_constraints;
hybrid(pmu, extra_regs) = intel_cmt_extra_regs;
+ static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
}
__init int intel_pmu_init(void)
@@ -6458,15 +6858,21 @@ __init int intel_pmu_init(void)
char *name;
struct x86_hybrid_pmu *pmu;
+ /* Architectural Perfmon was introduced starting with Core "Yonah" */
if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
switch (boot_cpu_data.x86) {
- case 0x6:
- return p6_pmu_init();
- case 0xb:
+ case 6:
+ if (boot_cpu_data.x86_vfm < INTEL_CORE_YONAH)
+ return p6_pmu_init();
+ break;
+ case 11:
return knc_pmu_init();
- case 0xf:
+ case 15:
return p4_pmu_init();
}
+
+ pr_cont("unsupported CPU family %d model %d ",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV;
}
@@ -6494,6 +6900,7 @@ __init int intel_pmu_init(void)
x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
+ x86_pmu.config_mask = X86_RAW_EVENT_MASK;
/*
* Quirk: v2 perfmon does not report fixed-purpose events, so
@@ -6510,7 +6917,7 @@ __init int intel_pmu_init(void)
if (boot_cpu_has(X86_FEATURE_PDCM)) {
u64 capabilities;
- rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
+ rdmsrq(MSR_IA32_PERF_CAPABILITIES, capabilities);
x86_pmu.intel_cap.capabilities = capabilities;
}
@@ -6522,7 +6929,7 @@ __init int intel_pmu_init(void)
if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
intel_pmu_arch_lbr_init();
- intel_ds_init();
+ intel_pebs_init();
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
@@ -6533,6 +6940,12 @@ __init int intel_pmu_init(void)
}
/*
+ * Many features on and after V6 require dynamic constraint,
+ * e.g., Arch PEBS, ACR.
+ */
+ if (version >= 6)
+ x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
+ /*
* Install the hw-cache-events table:
*/
switch (boot_cpu_data.x86_vfm) {
@@ -6614,7 +7027,7 @@ __init int intel_pmu_init(void)
case INTEL_ATOM_SILVERMONT_D:
case INTEL_ATOM_SILVERMONT_MID:
case INTEL_ATOM_AIRMONT:
- case INTEL_ATOM_AIRMONT_MID:
+ case INTEL_ATOM_SILVERMONT_MID2:
memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
@@ -6743,6 +7156,18 @@ __init int intel_pmu_init(void)
name = "crestmont";
break;
+ case INTEL_ATOM_DARKMONT_X:
+ intel_pmu_init_skt(NULL);
+ intel_pmu_pebs_data_source_cmt();
+ x86_pmu.pebs_latency_data = cmt_latency_data;
+ x86_pmu.get_event_constraints = cmt_get_event_constraints;
+ td_attr = skt_events_attrs;
+ mem_attr = grt_mem_attrs;
+ extra_attr = cmt_format_attr;
+ pr_cont("Darkmont events, ");
+ name = "darkmont";
+ break;
+
case INTEL_WESTMERE:
case INTEL_WESTMERE_EP:
case INTEL_WESTMERE_EX:
@@ -7173,8 +7598,17 @@ __init int intel_pmu_init(void)
name = "meteorlake_hybrid";
break;
+ case INTEL_PANTHERLAKE_L:
+ pr_cont("Pantherlake Hybrid events, ");
+ name = "pantherlake_hybrid";
+ goto lnl_common;
+
case INTEL_LUNARLAKE_M:
case INTEL_ARROWLAKE:
+ pr_cont("Lunarlake Hybrid events, ");
+ name = "lunarlake_hybrid";
+
+ lnl_common:
intel_pmu_init_hybrid(hybrid_big_small);
x86_pmu.pebs_latency_data = lnl_latency_data;
@@ -7196,8 +7630,6 @@ __init int intel_pmu_init(void)
intel_pmu_init_skt(&pmu->pmu);
intel_pmu_pebs_data_source_lnl();
- pr_cont("Lunarlake Hybrid events, ");
- name = "lunarlake_hybrid";
break;
case INTEL_ARROWLAKE_H:
@@ -7285,6 +7717,18 @@ __init int intel_pmu_init(void)
x86_pmu.attr_update = hybrid_attr_update;
}
+ /*
+ * The archPerfmonExt (0x23) includes an enhanced enumeration of
+ * PMU architectural features with a per-core view. For non-hybrid,
+ * each core has the same PMU capabilities. It's good enough to
+ * update the x86_pmu from the booting CPU. For hybrid, the x86_pmu
+ * is used to keep the common capabilities. Still keep the values
+ * from the leaf 0xa. The core specific update will be done later
+ * when a new type is online.
+ */
+ if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
+ update_pmu_cap(NULL);
+
intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
&x86_pmu.fixed_cntr_mask64,
&x86_pmu.intel_ctrl);