summaryrefslogtreecommitdiff
path: root/arch/x86/events/intel/ds.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/events/intel/ds.c')
-rw-r--r--arch/x86/events/intel/ds.c219
1 files changed, 123 insertions, 96 deletions
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 04b83d5af4c4..08de293bebad 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -953,11 +953,11 @@ unlock:
return 1;
}
-static inline void intel_pmu_drain_pebs_buffer(void)
+void intel_pmu_drain_pebs_buffer(void)
{
struct perf_sample_data data;
- x86_pmu.drain_pebs(NULL, &data);
+ static_call(x86_pmu_drain_pebs)(NULL, &data);
}
/*
@@ -1338,8 +1338,10 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event)
* + precise_ip < 2 for the non event IP
* + For RTM TSX weight we need GPRs for the abort code.
*/
- gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
- (attr->sample_regs_intr & PEBS_GP_REGS);
+ gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) &&
+ (attr->sample_regs_intr & PEBS_GP_REGS)) ||
+ ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (attr->sample_regs_user & PEBS_GP_REGS));
tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
((attr->config & INTEL_ARCH_EVENT_MASK) ==
@@ -1789,8 +1791,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
- if (sample_type & PERF_SAMPLE_CALLCHAIN)
- perf_sample_save_callchain(data, event, iregs);
+ perf_sample_save_callchain(data, event, iregs);
/*
* We use the interrupt regs as a base because the PEBS record does not
@@ -1889,8 +1890,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
if (x86_pmu.intel_cap.pebs_format >= 3)
setup_pebs_time(event, data, pebs->tsc);
- if (has_branch_stack(event))
- perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
+ perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL);
}
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1917,8 +1917,6 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
}
#define PEBS_LATENCY_MASK 0xffff
-#define PEBS_CACHE_LATENCY_OFFSET 32
-#define PEBS_RETIRE_LATENCY_OFFSET 32
/*
* With adaptive PEBS the layout depends on what fields are configured.
@@ -1932,8 +1930,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct pebs_basic *basic = __pebs;
void *next_record = basic + 1;
- u64 sample_type;
- u64 format_size;
+ u64 sample_type, format_group;
struct pebs_meminfo *meminfo = NULL;
struct pebs_gprs *gprs = NULL;
struct x86_perf_regs *perf_regs;
@@ -1945,7 +1942,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
perf_regs->xmm_regs = NULL;
sample_type = event->attr.sample_type;
- format_size = basic->format_size;
+ format_group = basic->format_group;
perf_sample_data_init(data, 0, event->hw.last_period);
data->period = event->hw.last_period;
@@ -1957,8 +1954,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
- if (sample_type & PERF_SAMPLE_CALLCHAIN)
- perf_sample_save_callchain(data, event, iregs);
+ perf_sample_save_callchain(data, event, iregs);
*regs = *iregs;
/* The ip in basic is EventingIP */
@@ -1967,7 +1963,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
- data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
+ data->weight.var3_w = basic->retire_latency;
else
data->weight.var3_w = 0;
}
@@ -1977,12 +1973,12 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
* Save the pointer here but process later.
*/
- if (format_size & PEBS_DATACFG_MEMINFO) {
+ if (format_group & PEBS_DATACFG_MEMINFO) {
meminfo = next_record;
next_record = meminfo + 1;
}
- if (format_size & PEBS_DATACFG_GP) {
+ if (format_group & PEBS_DATACFG_GP) {
gprs = next_record;
next_record = gprs + 1;
@@ -1991,18 +1987,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
regs->flags &= ~PERF_EFLAGS_EXACT;
}
- if (sample_type & PERF_SAMPLE_REGS_INTR)
+ if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
adaptive_pebs_save_regs(regs, gprs);
}
- if (format_size & PEBS_DATACFG_MEMINFO) {
+ if (format_group & PEBS_DATACFG_MEMINFO) {
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
- u64 weight = meminfo->latency;
+ u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
+ meminfo->cache_latency : meminfo->mem_latency;
- if (x86_pmu.flags & PMU_FL_INSTR_LATENCY) {
- data->weight.var2_w = weight & PEBS_LATENCY_MASK;
- weight >>= PEBS_CACHE_LATENCY_OFFSET;
- }
+ if (x86_pmu.flags & PMU_FL_INSTR_LATENCY)
+ data->weight.var2_w = meminfo->instr_latency;
/*
* Although meminfo::latency is defined as a u64,
@@ -2010,12 +2005,13 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
* in practice on Ice Lake and earlier platforms.
*/
if (sample_type & PERF_SAMPLE_WEIGHT) {
- data->weight.full = weight ?:
+ data->weight.full = latency ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
} else {
- data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
+ data->weight.var1_dw = (u32)latency ?:
intel_get_tsx_weight(meminfo->tsx_tuning);
}
+
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
@@ -2036,16 +2032,16 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
}
}
- if (format_size & PEBS_DATACFG_XMMS) {
+ if (format_group & PEBS_DATACFG_XMMS) {
struct pebs_xmm *xmm = next_record;
next_record = xmm + 1;
perf_regs->xmm_regs = xmm->xmm;
}
- if (format_size & PEBS_DATACFG_LBRS) {
+ if (format_group & PEBS_DATACFG_LBRS) {
struct lbr_entry *lbr = next_record;
- int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
+ int num_lbr = ((format_group >> PEBS_DATACFG_LBR_SHIFT)
& 0xff) + 1;
next_record = next_record + num_lbr * sizeof(struct lbr_entry);
@@ -2055,11 +2051,11 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
}
}
- WARN_ONCE(next_record != __pebs + (format_size >> 48),
- "PEBS record size %llu, expected %llu, config %llx\n",
- format_size >> 48,
+ WARN_ONCE(next_record != __pebs + basic->format_size,
+ "PEBS record size %u, expected %llu, config %llx\n",
+ basic->format_size,
(u64)(next_record - __pebs),
- basic->format_size);
+ format_group);
}
static inline void *
@@ -2100,15 +2096,6 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
return NULL;
}
-void intel_pmu_auto_reload_read(struct perf_event *event)
-{
- WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
-
- perf_pmu_disable(event->pmu);
- intel_pmu_drain_pebs_buffer();
- perf_pmu_enable(event->pmu);
-}
-
/*
* Special variant of intel_pmu_save_and_restart() for auto-reload.
*/
@@ -2170,46 +2157,33 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
return 0;
}
+typedef void (*setup_fn)(struct perf_event *, struct pt_regs *, void *,
+ struct perf_sample_data *, struct pt_regs *);
+
+static struct pt_regs dummy_iregs;
+
static __always_inline void
__intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs,
+ struct pt_regs *regs,
struct perf_sample_data *data,
- void *base, void *top,
- int bit, int count,
- void (*setup_sample)(struct perf_event *,
- struct pt_regs *,
- void *,
- struct perf_sample_data *,
- struct pt_regs *))
+ void *at,
+ setup_fn setup_sample)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
- struct x86_perf_regs perf_regs;
- struct pt_regs *regs = &perf_regs.regs;
- void *at = get_next_pebs_record_by_bit(base, top, bit);
- static struct pt_regs dummy_iregs;
-
- if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
- /*
- * Now, auto-reload is only enabled in fixed period mode.
- * The reload value is always hwc->sample_period.
- * May need to change it, if auto-reload is enabled in
- * freq mode later.
- */
- intel_pmu_save_and_restart_reload(event, count);
- } else if (!intel_pmu_save_and_restart(event))
- return;
-
- if (!iregs)
- iregs = &dummy_iregs;
+ setup_sample(event, iregs, at, data, regs);
+ perf_event_output(event, data, regs);
+}
- while (count > 1) {
- setup_sample(event, iregs, at, data, regs);
- perf_event_output(event, data, regs);
- at += cpuc->pebs_record_size;
- at = get_next_pebs_record_by_bit(at, top, bit);
- count--;
- }
+static __always_inline void
+__intel_pmu_pebs_last_event(struct perf_event *event,
+ struct pt_regs *iregs,
+ struct pt_regs *regs,
+ struct perf_sample_data *data,
+ void *at,
+ int count,
+ setup_fn setup_sample)
+{
+ struct hw_perf_event *hwc = &event->hw;
setup_sample(event, iregs, at, data, regs);
if (iregs == &dummy_iregs) {
@@ -2228,6 +2202,44 @@ __intel_pmu_pebs_event(struct perf_event *event,
if (perf_event_overflow(event, data, regs))
x86_pmu_stop(event, 0);
}
+
+ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+ /*
+ * Now, auto-reload is only enabled in fixed period mode.
+ * The reload value is always hwc->sample_period.
+ * May need to change it, if auto-reload is enabled in
+ * freq mode later.
+ */
+ intel_pmu_save_and_restart_reload(event, count);
+ } else
+ intel_pmu_save_and_restart(event);
+}
+
+static __always_inline void
+__intel_pmu_pebs_events(struct perf_event *event,
+ struct pt_regs *iregs,
+ struct perf_sample_data *data,
+ void *base, void *top,
+ int bit, int count,
+ setup_fn setup_sample)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct x86_perf_regs perf_regs;
+ struct pt_regs *regs = &perf_regs.regs;
+ void *at = get_next_pebs_record_by_bit(base, top, bit);
+ int cnt = count;
+
+ if (!iregs)
+ iregs = &dummy_iregs;
+
+ while (cnt > 1) {
+ __intel_pmu_pebs_event(event, iregs, regs, data, at, setup_sample);
+ at += cpuc->pebs_record_size;
+ at = get_next_pebs_record_by_bit(at, top, bit);
+ cnt--;
+ }
+
+ __intel_pmu_pebs_last_event(event, iregs, regs, data, at, count, setup_sample);
}
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data)
@@ -2264,8 +2276,8 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_
return;
}
- __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n,
- setup_pebs_fixed_sample_data);
+ __intel_pmu_pebs_events(event, iregs, data, at, top, 0, n,
+ setup_pebs_fixed_sample_data);
}
static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
@@ -2396,9 +2408,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
}
if (counts[bit]) {
- __intel_pmu_pebs_event(event, iregs, data, base,
- top, bit, counts[bit],
- setup_pebs_fixed_sample_data);
+ __intel_pmu_pebs_events(event, iregs, data, base,
+ top, bit, counts[bit],
+ setup_pebs_fixed_sample_data);
}
}
}
@@ -2406,8 +2418,12 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
{
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct debug_store *ds = cpuc->ds;
+ struct x86_perf_regs perf_regs;
+ struct pt_regs *regs = &perf_regs.regs;
+ struct pebs_basic *basic;
struct perf_event *event;
void *base, *at, *top;
int bit;
@@ -2429,30 +2445,41 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
return;
}
- for (at = base; at < top; at += cpuc->pebs_record_size) {
+ if (!iregs)
+ iregs = &dummy_iregs;
+
+ /* Process all but the last event for each counter. */
+ for (at = base; at < top; at += basic->format_size) {
u64 pebs_status;
- pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
- pebs_status &= mask;
+ basic = at;
+ if (basic->format_size != cpuc->pebs_record_size)
+ continue;
- for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX)
- counts[bit]++;
+ pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
+ for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
+ event = cpuc->events[bit];
+
+ if (WARN_ON_ONCE(!event) ||
+ WARN_ON_ONCE(!event->attr.precise_ip))
+ continue;
+
+ if (counts[bit]++) {
+ __intel_pmu_pebs_event(event, iregs, regs, data, last[bit],
+ setup_pebs_adaptive_sample_data);
+ }
+ last[bit] = at;
+ }
}
for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
- if (counts[bit] == 0)
+ if (!counts[bit])
continue;
event = cpuc->events[bit];
- if (WARN_ON_ONCE(!event))
- continue;
-
- if (WARN_ON_ONCE(!event->attr.precise_ip))
- continue;
- __intel_pmu_pebs_event(event, iregs, data, base,
- top, bit, counts[bit],
- setup_pebs_adaptive_sample_data);
+ __intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
+ counts[bit], setup_pebs_adaptive_sample_data);
}
}