summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2026-02-09 21:35:16 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2026-02-11 20:45:40 +0300
commitbf2c3138ae3694d4687cbe451c774c288ae2ad06 (patch)
treefbdbea99bc0e07933c5d42b483a4342629ce10ec /include
parent1b13885edf0a55a451a26d5fa53e7877b31debb5 (diff)
parentd374b89edbb9a8d552e03348f59287ff779b4c9d (diff)
downloadlinux-bf2c3138ae3694d4687cbe451c774c288ae2ad06.tar.xz
Merge tag 'kvm-x86-pmu-6.20' of https://github.com/kvm-x86/linux into HEAD
KVM mediated PMU support for 6.20 Add support for mediated PMUs, where KVM gives the guest full ownership of PMU hardware (contexted switched around the fastpath run loop) and allows direct access to data MSRs and PMCs (restricted by the vPMU model), but intercepts access to control registers, e.g. to enforce event filtering and to prevent the guest from profiling sensitive host state. To keep overall complexity reasonable, mediated PMU usage is all or nothing for a given instance of KVM (controlled via module param). The Mediated PMU is disabled default, partly to maintain backwards compatilibity for existing setup, partly because there are tradeoffs when running with a mediated PMU that may be non-starters for some use cases, e.g. the host loses the ability to profile guests with mediated PMUs, the fastpath run loop is also a blind spot, entry/exit transitions are more expensive, etc. Versus the emulated PMU, where KVM is "just another perf user", the mediated PMU delivers more accurate profiling and monitoring (no risk of contention and thus dropped events), with significantly less overhead (fewer exits and faster emulation/programming of event selectors) E.g. when running Specint-2017 on a single-socket Sapphire Rapids with 56 cores and no-SMT, and using perf from within the guest: Perf command: a. basic-sampling: perf record -F 1000 -e 6-instructions -a --overwrite b. multiplex-sampling: perf record -F 1000 -e 10-instructions -a --overwrite Guest performance overhead: --------------------------------------------------------------------------- | Test case | emulated vPMU | all passthrough | passthrough with | | | | | event filters | --------------------------------------------------------------------------- | basic-sampling | 33.62% | 4.24% | 6.21% | --------------------------------------------------------------------------- | multiplex-sampling | 79.32% | 7.34% | 10.45% | ---------------------------------------------------------------------------
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/Kbuild1
-rw-r--r--include/linux/kvm_host.h11
-rw-r--r--include/linux/perf_event.h35
-rw-r--r--include/linux/unwind_user.h18
4 files changed, 55 insertions, 10 deletions
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 295c94a3ccc1..9aff61e7b8f2 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -32,6 +32,7 @@ mandatory-y += irq_work.h
mandatory-y += kdebug.h
mandatory-y += kmap_size.h
mandatory-y += kprobes.h
+mandatory-y += kvm_types.h
mandatory-y += linkage.h
mandatory-y += local.h
mandatory-y += local64.h
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 021d1fa09e92..c05a79c21745 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1756,10 +1756,17 @@ static inline bool kvm_arch_intc_initialized(struct kvm *kvm)
#ifdef CONFIG_GUEST_PERF_EVENTS
unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu);
-void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void));
+void __kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void),
+ void (*mediated_pmi_handler)(void));
+
+static inline void kvm_register_perf_callbacks(void)
+{
+ __kvm_register_perf_callbacks(NULL, NULL);
+}
+
void kvm_unregister_perf_callbacks(void);
#else
-static inline void kvm_register_perf_callbacks(void *ign) {}
+static inline void kvm_register_perf_callbacks(void) {}
static inline void kvm_unregister_perf_callbacks(void) {}
#endif /* CONFIG_GUEST_PERF_EVENTS */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9ded2e582c60..48d851fbd8ea 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -305,6 +305,7 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
#define PERF_PMU_CAP_AUX_PAUSE 0x0200
#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400
+#define PERF_PMU_CAP_MEDIATED_VPMU 0x0800
/**
* pmu::scope
@@ -998,6 +999,11 @@ struct perf_event_groups {
u64 index;
};
+struct perf_time_ctx {
+ u64 time;
+ u64 stamp;
+ u64 offset;
+};
/**
* struct perf_event_context - event context structure
@@ -1036,9 +1042,12 @@ struct perf_event_context {
/*
* Context clock, runs when context enabled.
*/
- u64 time;
- u64 timestamp;
- u64 timeoffset;
+ struct perf_time_ctx time;
+
+ /*
+ * Context clock, runs when in the guest mode.
+ */
+ struct perf_time_ctx timeguest;
/*
* These fields let us detect when two contexts have both
@@ -1171,9 +1180,8 @@ struct bpf_perf_event_data_kern {
* This is a per-cpu dynamically allocated data structure.
*/
struct perf_cgroup_info {
- u64 time;
- u64 timestamp;
- u64 timeoffset;
+ struct perf_time_ctx time;
+ struct perf_time_ctx timeguest;
int active;
};
@@ -1669,6 +1677,8 @@ struct perf_guest_info_callbacks {
unsigned int (*state)(void);
unsigned long (*get_ip)(void);
unsigned int (*handle_intel_pt_intr)(void);
+
+ void (*handle_mediated_pmi)(void);
};
#ifdef CONFIG_GUEST_PERF_EVENTS
@@ -1678,6 +1688,7 @@ extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
+DECLARE_STATIC_CALL(__perf_guest_handle_mediated_pmi, *perf_guest_cbs->handle_mediated_pmi);
static inline unsigned int perf_guest_state(void)
{
@@ -1694,6 +1705,11 @@ static inline unsigned int perf_guest_handle_intel_pt_intr(void)
return static_call(__perf_guest_handle_intel_pt_intr)();
}
+static inline void perf_guest_handle_mediated_pmi(void)
+{
+ static_call(__perf_guest_handle_mediated_pmi)();
+}
+
extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
@@ -1914,6 +1930,13 @@ extern int perf_event_account_interrupt(struct perf_event *event);
extern int perf_event_period(struct perf_event *event, u64 value);
extern u64 perf_event_pause(struct perf_event *event, bool reset);
+#ifdef CONFIG_PERF_GUEST_MEDIATED_PMU
+int perf_create_mediated_pmu(void);
+void perf_release_mediated_pmu(void);
+void perf_load_guest_context(void);
+void perf_put_guest_context(void);
+#endif
+
#else /* !CONFIG_PERF_EVENTS: */
static inline void *
diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h
index 7f7282516bf5..64618618febd 100644
--- a/include/linux/unwind_user.h
+++ b/include/linux/unwind_user.h
@@ -5,8 +5,22 @@
#include <linux/unwind_user_types.h>
#include <asm/unwind_user.h>
-#ifndef ARCH_INIT_USER_FP_FRAME
- #define ARCH_INIT_USER_FP_FRAME
+#ifndef CONFIG_HAVE_UNWIND_USER_FP
+
+#define ARCH_INIT_USER_FP_FRAME(ws)
+
+#endif
+
+#ifndef ARCH_INIT_USER_FP_ENTRY_FRAME
+#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws)
+#endif
+
+#ifndef unwind_user_at_function_start
+static inline bool unwind_user_at_function_start(struct pt_regs *regs)
+{
+ return false;
+}
+#define unwind_user_at_function_start unwind_user_at_function_start
#endif
int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries);