summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/events/intel/lbr.c79
-rw-r--r--arch/x86/events/perf_event.h21
-rw-r--r--arch/x86/include/asm/fpu/types.h20
-rw-r--r--arch/x86/include/asm/fpu/xstate.h3
-rw-r--r--arch/x86/include/asm/perf_event.h4
-rw-r--r--arch/x86/kernel/fpu/xstate.c2
6 files changed, 119 insertions, 10 deletions
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 3ad528996d1c..cb1a0495339b 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -483,6 +483,17 @@ static void intel_pmu_arch_lbr_restore(void *ctx)
}
}
+/*
+ * Restore the Architecture LBR state from the xsave area in the perf
+ * context data for the task via the XRSTORS instruction.
+ */
+static void intel_pmu_arch_lbr_xrstors(void *ctx)
+{
+ struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
+
+ copy_kernel_to_dynamic_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR);
+}
+
static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
{
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
@@ -557,6 +568,17 @@ static void intel_pmu_arch_lbr_save(void *ctx)
entries[x86_pmu.lbr_nr - 1].from = 0;
}
+/*
+ * Save the Architecture LBR state to the xsave area in the perf
+ * context data for the task via the XSAVES instruction.
+ */
+static void intel_pmu_arch_lbr_xsaves(void *ctx)
+{
+ struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
+
+ copy_dynamic_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR);
+}
+
static void __intel_pmu_lbr_save(void *ctx)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1639,12 +1661,40 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}
+/*
+ * LBR state size is variable based on the max number of registers.
+ * This calculates the expected state size, which should match
+ * what the hardware enumerates for the size of XFEATURE_LBR.
+ */
+static inline unsigned int get_lbr_state_size(void)
+{
+ return sizeof(struct arch_lbr_state) +
+ x86_pmu.lbr_nr * sizeof(struct lbr_entry);
+}
+
+static bool is_arch_lbr_xsave_available(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_XSAVES))
+ return false;
+
+ /*
+ * Check the LBR state with the corresponding software structure.
+ * Disable LBR XSAVES support if the size doesn't match.
+ */
+ if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
+ return false;
+
+ return true;
+}
+
void __init intel_pmu_arch_lbr_init(void)
{
+ struct pmu *pmu = x86_get_pmu();
union cpuid28_eax eax;
union cpuid28_ebx ebx;
union cpuid28_ecx ecx;
unsigned int unused_edx;
+ bool arch_lbr_xsave;
size_t size;
u64 lbr_nr;
@@ -1670,9 +1720,22 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
x86_pmu.lbr_nr = lbr_nr;
- size = sizeof(struct x86_perf_task_context_arch_lbr) +
- lbr_nr * sizeof(struct lbr_entry);
- x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0);
+
+ arch_lbr_xsave = is_arch_lbr_xsave_available();
+ if (arch_lbr_xsave) {
+ size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
+ get_lbr_state_size();
+ pmu->task_ctx_cache = create_lbr_kmem_cache(size,
+ XSAVE_ALIGNMENT);
+ }
+
+ if (!pmu->task_ctx_cache) {
+ arch_lbr_xsave = false;
+
+ size = sizeof(struct x86_perf_task_context_arch_lbr) +
+ lbr_nr * sizeof(struct lbr_entry);
+ pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
+ }
x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
@@ -1705,8 +1768,14 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
- x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
- x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
+ if (arch_lbr_xsave) {
+ x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
+ x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
+ pr_cont("XSAVE ");
+ } else {
+ x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
+ x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
+ }
pr_cont("Architectural LBR, ");
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 3f7c329374bb..d5e351c1f3c1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -777,6 +777,27 @@ struct x86_perf_task_context_arch_lbr {
struct lbr_entry entries[];
};
+/*
+ * Add padding to guarantee the 64-byte alignment of the state buffer.
+ *
+ * The structure is dynamically allocated. The size of the LBR state may vary
+ * based on the number of LBR registers.
+ *
+ * Do not put anything after the LBR state.
+ */
+struct x86_perf_task_context_arch_lbr_xsave {
+ struct x86_perf_task_context_opt opt;
+
+ union {
+ struct xregs_state xsave;
+ struct {
+ struct fxregs_state i387;
+ struct xstate_header header;
+ struct arch_lbr_state lbr;
+ } __attribute__ ((packed, aligned (XSAVE_ALIGNMENT)));
+ };
+};
+
#define x86_add_quirk(func_) \
do { \
static struct x86_pmu_quirk __quirk __initdata = { \
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 132e9cc26d60..c87364ea6446 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -236,6 +236,26 @@ struct pkru_state {
u32 pad;
} __packed;
+/*
+ * State component 15: Architectural LBR configuration state.
+ * The size of Arch LBR state depends on the number of LBRs (lbr_depth).
+ */
+
+struct lbr_entry {
+ u64 from;
+ u64 to;
+ u64 info;
+};
+
+struct arch_lbr_state {
+ u64 lbr_ctl;
+ u64 lbr_depth;
+ u64 ler_from;
+ u64 ler_to;
+ u64 ler_info;
+ struct lbr_entry entries[];
+} __packed;
+
struct xstate_header {
u64 xfeatures;
u64 xcomp_bv;
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index c029fce627cf..1559554af931 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -21,6 +21,8 @@
#define XSAVE_YMM_SIZE 256
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
+#define XSAVE_ALIGNMENT 64
+
/* All currently supported user features */
#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
XFEATURE_MASK_SSE | \
@@ -101,6 +103,7 @@ extern void __init update_regset_xstate_info(unsigned int size,
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
const void *get_xsave_field_ptr(int xfeature_nr);
int using_compacted_format(void);
+int xfeature_size(int xfeature_nr);
int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 2e29558c9c6b..0c1b13720525 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -282,10 +282,6 @@ struct pebs_xmm {
u64 xmm[16*2]; /* two entries for each register */
};
-struct lbr_entry {
- u64 from, to, info;
-};
-
/*
* IBS cpuid feature detection
*/
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b0c22b7dae0a..10cf8789c378 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -488,7 +488,7 @@ static int xfeature_uncompacted_offset(int xfeature_nr)
return ebx;
}
-static int xfeature_size(int xfeature_nr)
+int xfeature_size(int xfeature_nr)
{
u32 eax, ebx, ecx, edx;