diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-13 03:26:58 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-13 03:26:58 +0300 |
commit | 8e5b0adeea19309c8ce0e3c9119061554973efa9 (patch) | |
tree | cfca61dab75ac07f8651ab0bc33b597707abf5cb | |
parent | 13eaa5bda0df8f5c1c4f2a4fb4a0bc20787dcc68 (diff) | |
parent | a9f4a6e92b3b319296fb078da2615f618f6cd80c (diff) | |
download | linux-8e5b0adeea19309c8ce0e3c9119061554973efa9.tar.xz |
Merge tag 'perf_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Borislav Petkov:
"Cleanup of the perf/kvm interaction."
* tag 'perf_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf: Drop guest callback (un)register stubs
KVM: arm64: Drop perf.c and fold its tiny bits of code into arm.c
KVM: arm64: Hide kvm_arm_pmu_available behind CONFIG_HW_PERF_EVENTS=y
KVM: arm64: Convert to the generic perf callbacks
KVM: x86: Move Intel Processor Trace interrupt handler to vmx.c
KVM: Move x86's perf guest info callbacks to generic KVM
KVM: x86: More precisely identify NMI from guest when handling PMI
KVM: x86: Drop current_vcpu for kvm_running_vcpu + kvm_arch_vcpu variable
perf/core: Use static_call to optimize perf_guest_info_callbacks
perf: Force architectures to opt-in to guest callbacks
perf: Add wrappers for invoking guest callbacks
perf/core: Rework guest callbacks to prepare for static_call support
perf: Drop dead and useless guest "support" from arm, csky, nds32 and riscv
perf: Stop pretending that perf can handle multiple guest callbacks
KVM: x86: Register Processor Trace interrupt hook iff PT enabled in guest
KVM: x86: Register perf callbacks after calling vendor's hardware_setup()
perf: Protect perf_guest_cbs with RCU
29 files changed, 248 insertions, 256 deletions
diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 3b69a76d341e..bc6b246ab55e 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -64,11 +64,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs { struct frame_tail __user *tail; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* We don't support guest os callchain now */ - return; - } - perf_callchain_store(entry, regs->ARM_pc); if (!current->mm) @@ -100,20 +95,12 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re { struct stackframe fr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* We don't support guest os callchain now */ - return; - } - arm_get_current_stackframe(regs, &fr); walk_stackframe(&fr, callchain_trace, entry); } unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); - return instruction_pointer(regs); } @@ -121,17 +108,10 @@ unsigned long perf_misc_flags(struct pt_regs *regs) { int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) - misc |= PERF_RECORD_MISC_GUEST_USER; - else - misc |= PERF_RECORD_MISC_GUEST_KERNEL; - } else { - if (user_mode(regs)) - misc |= PERF_RECORD_MISC_USER; - else - misc |= PERF_RECORD_MISC_KERNEL; - } + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; return misc; } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 2a5f7f38006f..541e7a813eb8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -675,8 +675,15 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); int kvm_handle_mmio_return(struct kvm_vcpu *vcpu); int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa); -int kvm_perf_init(void); -int kvm_perf_teardown(void); +/* + * Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event, + * arrived in guest context. For arm64, any event that arrives while a vCPU is + * loaded is considered to be "in guest". + */ +static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu) +{ + return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu; +} long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu); gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index c96a9a0043bf..7eaf1f7c4168 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -102,7 +102,9 @@ KVM_NVHE_ALIAS(__stop___kvm_ex_table); KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); /* PMU available static key */ +#ifdef CONFIG_HW_PERF_EVENTS KVM_NVHE_ALIAS(kvm_arm_pmu_available); +#endif /* Position-independent library routines */ KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page); diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index e9b7d99f4e3a..65b196e3ca6c 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -102,7 +102,7 @@ compat_user_backtrace(struct compat_frame_tail __user *tail, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_state()) { /* We don't support guest os callchain now */ return; } @@ -141,7 +141,7 @@ static bool callchain_trace(void *data, unsigned long pc) void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_state()) { /* We don't support guest os callchain now */ return; } @@ -151,18 +151,19 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + if (perf_guest_state()) + return perf_guest_get_ip(); return instruction_pointer(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + unsigned int guest_state = perf_guest_state(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_state) { + if (guest_state & PERF_GUEST_USER) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 8ffcbe29395e..e9761d84f982 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -39,6 +39,7 @@ menuconfig KVM select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_VCPU_RUN_PID_CHANGE select SCHED_INFO + select GUEST_PERF_EVENTS if PERF_EVENTS help Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 989bb5dad2c8..0bcc378b7961 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -12,7 +12,7 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ $(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \ - arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ + arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e4727dc771bf..f026fd01bf7b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -503,6 +503,13 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) return vcpu_mode_priv(vcpu); } +#ifdef CONFIG_GUEST_PERF_EVENTS +unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) +{ + return *vcpu_pc(vcpu); +} +#endif + /* Just ensure a guest exit from a particular CPU */ static void exit_vm_noop(void *info) { @@ -1775,7 +1782,8 @@ static int init_subsystems(void) if (err) goto out; - kvm_perf_init(); + kvm_register_perf_callbacks(NULL); + kvm_sys_reg_table_init(); out: @@ -2163,7 +2171,7 @@ out_err: /* NOP: Compiling as a module not supported */ void kvm_arch_exit(void) { - kvm_perf_teardown(); + kvm_unregister_perf_callbacks(); } static int __init early_kvm_mode_cfg(char *arg) diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c deleted file mode 100644 index c84fe24b2ea1..000000000000 --- a/arch/arm64/kvm/perf.c +++ /dev/null @@ -1,59 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Based on the x86 implementation. - * - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/perf_event.h> -#include <linux/kvm_host.h> - -#include <asm/kvm_emulate.h> - -DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); - -static int kvm_is_in_guest(void) -{ - return kvm_get_running_vcpu() != NULL; -} - -static int kvm_is_user_mode(void) -{ - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_running_vcpu(); - - if (vcpu) - return !vcpu_mode_priv(vcpu); - - return 0; -} - -static unsigned long kvm_get_guest_ip(void) -{ - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_running_vcpu(); - - if (vcpu) - return *vcpu_pc(vcpu); - - return 0; -} - -static struct perf_guest_info_callbacks kvm_guest_cbs = { - .is_in_guest = kvm_is_in_guest, - .is_user_mode = kvm_is_user_mode, - .get_guest_ip = kvm_get_guest_ip, -}; - -int kvm_perf_init(void) -{ - return perf_register_guest_info_callbacks(&kvm_guest_cbs); -} - -int kvm_perf_teardown(void) -{ - return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); -} diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index a5e4bbf5e68f..3308ceefa129 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -14,6 +14,8 @@ #include <kvm/arm_pmu.h> #include <kvm/arm_vgic.h> +DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); + static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c index ab55e98ee8f6..92057de08f4f 100644 --- a/arch/csky/kernel/perf_callchain.c +++ b/arch/csky/kernel/perf_callchain.c @@ -88,10 +88,6 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, { unsigned long fp = 0; - /* C-SKY does not support virtualization. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return; - fp = regs->regs[4]; perf_callchain_store(entry, regs->pc); @@ -112,12 +108,6 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, { struct stackframe fr; - /* C-SKY does not support virtualization. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - pr_warn("C-SKY does not support perf in guest mode!"); - return; - } - fr.fp = regs->regs[4]; fr.lr = regs->lr; walk_stackframe(&fr, entry); diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index 0ce6f9f307e6..a78a879e7ef1 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1371,11 +1371,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, leaf_fp = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* We don't support guest os callchain now */ - return; - } - perf_callchain_store(entry, regs->ipc); fp = regs->fp; gp = regs->gp; @@ -1481,10 +1476,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, { struct stackframe fr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* We don't support guest os callchain now */ - return; - } fr.fp = regs->fp; fr.lp = regs->lp; fr.sp = regs->sp; @@ -1493,10 +1484,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, unsigned long perf_instruction_pointer(struct pt_regs *regs) { - /* However, NDS32 does not support virtualization */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); - return instruction_pointer(regs); } @@ -1504,18 +1491,10 @@ unsigned long perf_misc_flags(struct pt_regs *regs) { int misc = 0; - /* However, NDS32 does not support virtualization */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) - misc |= PERF_RECORD_MISC_GUEST_USER; - else - misc |= PERF_RECORD_MISC_GUEST_KERNEL; - } else { - if (user_mode(regs)) - misc |= PERF_RECORD_MISC_USER; - else - misc |= PERF_RECORD_MISC_KERNEL; - } + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; return misc; } diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 0bb1854dce83..1fc075b8f764 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -58,10 +58,6 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, { unsigned long fp = 0; - /* RISC-V does not support perf in guest mode. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return; - fp = regs->s0; perf_callchain_store(entry, regs->epc); @@ -78,11 +74,5 @@ static bool fill_callchain(void *entry, unsigned long pc) void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - /* RISC-V does not support perf in guest mode. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - pr_warn("RISC-V does not support perf in guest mode!"); - return; - } - walk_stackframe(NULL, regs, fill_callchain, entry); } diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 68dea7ce6a22..e686c5e0537b 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2771,7 +2771,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re struct unwind_state state; unsigned long addr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_state()) { /* TODO: We don't support guest os callchain now */ return; } @@ -2874,7 +2874,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs struct stack_frame frame; const struct stack_frame __user *fp; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_state()) { /* TODO: We don't support guest os callchain now */ return; } @@ -2951,18 +2951,19 @@ static unsigned long code_segment_base(struct pt_regs *regs) unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + if (perf_guest_state()) + return perf_guest_get_ip(); return regs->ip + code_segment_base(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + unsigned int guest_state = perf_guest_state(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_state) { + if (guest_state & PERF_GUEST_USER) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index ec6444f2c9dc..fd9f908debe5 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2901,10 +2901,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) { handled++; - if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() && - perf_guest_cbs->handle_intel_pt_intr)) - perf_guest_cbs->handle_intel_pt_intr(); - else + if (!perf_guest_handle_intel_pt_intr()) intel_pt_interrupt(); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 555f4de47ef2..d0ad98ddd459 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -774,6 +774,7 @@ struct kvm_vcpu_arch { unsigned nmi_pending; /* NMI queued after currently running handler */ bool nmi_injected; /* Trying to inject an NMI this entry */ bool smi_pending; /* SMI queued after currently running handler */ + u8 handling_intr_from_guest; struct kvm_mtrr mtrr_state; u64 pat; @@ -1519,6 +1520,7 @@ struct kvm_x86_init_ops { int (*disabled_by_bios)(void); int (*check_processor_compatibility)(void); int (*hardware_setup)(void); + unsigned int (*handle_intel_pt_intr)(void); struct kvm_x86_ops *runtime_ops; }; @@ -1568,6 +1570,9 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) return -ENOTSUPP; } +#define kvm_arch_pmi_in_guest(vcpu) \ + ((vcpu) && (vcpu)->arch.handling_intr_from_guest) + int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); @@ -1897,8 +1902,6 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); -int kvm_is_in_guest(void); - void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 619186138176..47bdbe705a76 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -36,6 +36,7 @@ config KVM select KVM_MMIO select SCHED_INFO select PERF_EVENTS + select GUEST_PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_NO_POLL diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 09873f6488f7..0c2133eb4cf6 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -87,7 +87,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event, * woken up. So we should wake it, but this is impossible from * NMI context. Do it from irq work instead. */ - if (!kvm_is_in_guest()) + if (!kvm_handling_nmi_from_guest(pmc->vcpu)) irq_work_queue(&pmc_to_pmu(pmc)->irq_work); else kvm_make_request(KVM_REQ_PMI, pmc->vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5151efa424ac..9079d2fdc12e 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3933,7 +3933,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) } if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) - kvm_before_interrupt(vcpu); + kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); kvm_load_host_xsave_state(vcpu); stgi(); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0dbf94eb954f..1187cd1e38aa 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6344,7 +6344,9 @@ void vmx_do_interrupt_nmi_irqoff(unsigned long entry); static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu, unsigned long entry) { - kvm_before_interrupt(vcpu); + bool is_nmi = entry == (unsigned long)asm_exc_nmi_noist; + + kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ); vmx_do_interrupt_nmi_irqoff(entry); kvm_after_interrupt(vcpu); } @@ -7693,6 +7695,20 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, }; +static unsigned int vmx_handle_intel_pt_intr(void) +{ + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + + /* '0' on failure so that the !PT case can use a RET0 static call. */ + if (!kvm_arch_pmi_in_guest(vcpu)) + return 0; + + kvm_make_request(KVM_REQ_PMI, vcpu); + __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT, + (unsigned long *)&vcpu->arch.pmu.global_status); + return 1; +} + static __init void vmx_setup_user_return_msrs(void) { @@ -7719,6 +7735,8 @@ static __init void vmx_setup_user_return_msrs(void) kvm_add_user_return_msr(vmx_uret_msrs_list[i]); } +static struct kvm_x86_init_ops vmx_init_ops __initdata; + static __init int hardware_setup(void) { unsigned long host_bndcfgs; @@ -7877,6 +7895,10 @@ static __init int hardware_setup(void) return -EINVAL; if (!enable_ept || !cpu_has_vmx_intel_pt()) pt_mode = PT_MODE_SYSTEM; + if (pt_mode == PT_MODE_HOST_GUEST) + vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr; + else + vmx_init_ops.handle_intel_pt_intr = NULL; setup_default_sgx_lepubkeyhash(); @@ -7905,6 +7927,7 @@ static struct kvm_x86_init_ops vmx_init_ops __initdata = { .disabled_by_bios = vmx_disabled_by_bios, .check_processor_compatibility = vmx_check_processor_compat, .hardware_setup = hardware_setup, + .handle_intel_pt_intr = NULL, .runtime_ops = &vmx_x86_ops, }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e50e97ac4408..829d03fcb481 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8519,50 +8519,6 @@ static void kvm_timer_init(void) kvmclock_cpu_online, kvmclock_cpu_down_prep); } -DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); -EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu); - -int kvm_is_in_guest(void) -{ - return __this_cpu_read(current_vcpu) != NULL; -} - -static int kvm_is_user_mode(void) -{ - int user_mode = 3; - - if (__this_cpu_read(current_vcpu)) - user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu)); - - return user_mode != 0; -} - -static unsigned long kvm_get_guest_ip(void) -{ - unsigned long ip = 0; - - if (__this_cpu_read(current_vcpu)) - ip = kvm_rip_read(__this_cpu_read(current_vcpu)); - - return ip; -} - -static void kvm_handle_intel_pt_intr(void) -{ - struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu); - - kvm_make_request(KVM_REQ_PMI, vcpu); - __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT, - (unsigned long *)&vcpu->arch.pmu.global_status); -} - -static struct perf_guest_info_callbacks kvm_guest_cbs = { - .is_in_guest = kvm_is_in_guest, - .is_user_mode = kvm_is_user_mode, - .get_guest_ip = kvm_get_guest_ip, - .handle_intel_pt_intr = kvm_handle_intel_pt_intr, -}; - #ifdef CONFIG_X86_64 static void pvclock_gtod_update_fn(struct work_struct *work) { @@ -8676,8 +8632,6 @@ int kvm_arch_init(void *opaque) kvm_timer_init(); - perf_register_guest_info_callbacks(&kvm_guest_cbs); - if (boot_cpu_has(X86_FEATURE_XSAVE)) { host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0; @@ -8709,7 +8663,6 @@ void kvm_arch_exit(void) clear_hv_tscchange_cb(); #endif kvm_lapic_exit(); - perf_unregister_guest_info_callbacks(&kvm_guest_cbs); if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, @@ -9936,7 +9889,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * interrupts on processors that implement an interrupt shadow, the * stat.exits increment will do nicely. */ - kvm_before_interrupt(vcpu); + kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); local_irq_enable(); ++vcpu->stat.exits; local_irq_disable(); @@ -11269,6 +11222,8 @@ int kvm_arch_hardware_setup(void *opaque) memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops)); kvm_ops_static_call_update(); + kvm_register_perf_callbacks(ops->handle_intel_pt_intr); + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) supported_xss = 0; @@ -11296,6 +11251,8 @@ int kvm_arch_hardware_setup(void *opaque) void kvm_arch_hardware_unsetup(void) { + kvm_unregister_perf_callbacks(); + static_call(kvm_x86_hardware_unsetup)(); } @@ -11885,6 +11842,11 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) return vcpu->arch.preempted_in_kernel; } +unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) +{ + return kvm_rip_read(vcpu); +} + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 4abcd8d9836d..6aeca8f1da91 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -392,18 +392,27 @@ static inline bool kvm_cstate_in_guest(struct kvm *kvm) return kvm->arch.cstate_in_guest; } -DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); +enum kvm_intr_type { + /* Values are arbitrary, but must be non-zero. */ + KVM_HANDLING_IRQ = 1, + KVM_HANDLING_NMI, +}; -static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) +static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu, + enum kvm_intr_type intr) { - __this_cpu_write(current_vcpu, vcpu); + WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr); } static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) { - __this_cpu_write(current_vcpu, NULL); + WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0); } +static inline bool kvm_handling_nmi_from_guest(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.handling_intr_from_guest == KVM_HANDLING_NMI; +} static inline bool kvm_pat_valid(u64 data) { diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 6bcd3d8ca6ac..85246dd9faa1 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -23,6 +23,7 @@ config XEN_PV select PARAVIRT_XXL select XEN_HAVE_PVMMU select XEN_HAVE_VPMU + select GUEST_PERF_EVENTS help Support running as a Xen PV guest. diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index e13b0b49fcdf..89dd6b1708b0 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -413,34 +413,29 @@ int pmu_apic_update(uint32_t val) } /* perf callbacks */ -static int xen_is_in_guest(void) +static unsigned int xen_guest_state(void) { const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + unsigned int state = 0; if (!xenpmu_data) { pr_warn_once("%s: pmudata not initialized\n", __func__); - return 0; + return state; } if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF)) - return 0; + return state; - return 1; -} - -static int xen_is_user_mode(void) -{ - const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); + state |= PERF_GUEST_ACTIVE; - if (!xenpmu_data) { - pr_warn_once("%s: pmudata not initialized\n", __func__); - return 0; + if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) { + if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER) + state |= PERF_GUEST_USER; + } else if (xenpmu_data->pmu.r.regs.cpl & 3) { + state |= PERF_GUEST_USER; } - if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) - return (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER); - else - return !!(xenpmu_data->pmu.r.regs.cpl & 3); + return state; } static unsigned long xen_get_guest_ip(void) @@ -456,9 +451,8 @@ static unsigned long xen_get_guest_ip(void) } static struct perf_guest_info_callbacks xen_guest_cbs = { - .is_in_guest = xen_is_in_guest, - .is_user_mode = xen_is_user_mode, - .get_guest_ip = xen_get_guest_ip, + .state = xen_guest_state, + .get_ip = xen_get_guest_ip, }; /* Convert registers from Xen's format to Linux' */ diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 90f21898aad8..f9ed4c171d7b 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -13,13 +13,6 @@ #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) #define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) -DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); - -static __always_inline bool kvm_arm_support_pmu_v3(void) -{ - return static_branch_likely(&kvm_arm_pmu_available); -} - #ifdef CONFIG_HW_PERF_EVENTS struct kvm_pmc { @@ -36,6 +29,13 @@ struct kvm_pmu { struct irq_work overflow_work; }; +DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available); + +static __always_inline bool kvm_arm_support_pmu_v3(void) +{ + return static_branch_likely(&kvm_arm_pmu_available); +} + #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); @@ -65,6 +65,11 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); struct kvm_pmu { }; +static inline bool kvm_arm_support_pmu_v3(void) +{ + return false; +} + #define kvm_arm_pmu_irq_initialized(v) (false) static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c310648cc8f1..1a91e3a70dc0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1166,6 +1166,16 @@ static inline bool kvm_arch_intc_initialized(struct kvm *kvm) } #endif +#ifdef CONFIG_GUEST_PERF_EVENTS +unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu); + +void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void)); +void kvm_unregister_perf_callbacks(void); +#else +static inline void kvm_register_perf_callbacks(void *ign) {} +static inline void kvm_unregister_perf_callbacks(void) {} +#endif /* CONFIG_GUEST_PERF_EVENTS */ + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 474eaecd63ba..117f230bcdfd 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -26,11 +26,13 @@ # include <asm/local64.h> #endif +#define PERF_GUEST_ACTIVE 0x01 +#define PERF_GUEST_USER 0x02 + struct perf_guest_info_callbacks { - int (*is_in_guest)(void); - int (*is_user_mode)(void); - unsigned long (*get_guest_ip)(void); - void (*handle_intel_pt_intr)(void); + unsigned int (*state)(void); + unsigned long (*get_ip)(void); + unsigned int (*handle_intel_pt_intr)(void); }; #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -1251,9 +1253,32 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, enum perf_bpf_event_type type, u16 flags); -extern struct perf_guest_info_callbacks *perf_guest_cbs; -extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); -extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); +#ifdef CONFIG_GUEST_PERF_EVENTS +extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; + +DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); +DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip); +DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); + +static inline unsigned int perf_guest_state(void) +{ + return static_call(__perf_guest_state)(); +} +static inline unsigned long perf_guest_get_ip(void) +{ + return static_call(__perf_guest_get_ip)(); +} +static inline unsigned int perf_guest_handle_intel_pt_intr(void) +{ + return static_call(__perf_guest_handle_intel_pt_intr)(); +} +extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); +extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); +#else +static inline unsigned int perf_guest_state(void) { return 0; } +static inline unsigned long perf_guest_get_ip(void) { return 0; } +static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; } +#endif /* CONFIG_GUEST_PERF_EVENTS */ extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); @@ -1497,11 +1522,6 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { } static inline void perf_bp_event(struct perf_event *event, void *data) { } -static inline int perf_register_guest_info_callbacks -(struct perf_guest_info_callbacks *callbacks) { return 0; } -static inline int perf_unregister_guest_info_callbacks -(struct perf_guest_info_callbacks *callbacks) { return 0; } - static inline void perf_event_mmap(struct vm_area_struct *vma) { } typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data); diff --git a/init/Kconfig b/init/Kconfig index f2ae41e6717f..c5a8f80f0a1b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1797,6 +1797,10 @@ config HAVE_PERF_EVENTS help See tools/perf/design.txt for details. +config GUEST_PERF_EVENTS + bool + depends on HAVE_PERF_EVENTS + config PERF_USE_VMALLOC bool help diff --git a/kernel/events/core.c b/kernel/events/core.c index 1362b9b770d8..fc18664f49b0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6525,26 +6525,43 @@ static void perf_pending_event(struct irq_work *entry) perf_swevent_put_recursion_context(rctx); } -/* - * We assume there is only KVM supporting the callbacks. - * Later on, we might change it to a list if there is - * another virtualization implementation supporting the callbacks. - */ -struct perf_guest_info_callbacks *perf_guest_cbs; +#ifdef CONFIG_GUEST_PERF_EVENTS +struct perf_guest_info_callbacks __rcu *perf_guest_cbs; + +DEFINE_STATIC_CALL_RET0(__perf_guest_state, *perf_guest_cbs->state); +DEFINE_STATIC_CALL_RET0(__perf_guest_get_ip, *perf_guest_cbs->get_ip); +DEFINE_STATIC_CALL_RET0(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr); -int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) +void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) { - perf_guest_cbs = cbs; - return 0; + if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs))) + return; + + rcu_assign_pointer(perf_guest_cbs, cbs); + static_call_update(__perf_guest_state, cbs->state); + static_call_update(__perf_guest_get_ip, cbs->get_ip); + + /* Implementing ->handle_intel_pt_intr is optional. */ + if (cbs->handle_intel_pt_intr) + static_call_update(__perf_guest_handle_intel_pt_intr, + cbs->handle_intel_pt_intr); } EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); -int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) +void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) { - perf_guest_cbs = NULL; - return 0; + if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs) != cbs)) + return; + + rcu_assign_pointer(perf_guest_cbs, NULL); + static_call_update(__perf_guest_state, (void *)&__static_call_return0); + static_call_update(__perf_guest_get_ip, (void *)&__static_call_return0); + static_call_update(__perf_guest_handle_intel_pt_intr, + (void *)&__static_call_return0); + synchronize_rcu(); } EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); +#endif static void perf_output_sample_regs(struct perf_output_handle *handle, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 72c4e6b39389..938975ff75a0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5419,6 +5419,50 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void) return &kvm_running_vcpu; } +#ifdef CONFIG_GUEST_PERF_EVENTS +static unsigned int kvm_guest_state(void) +{ + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + unsigned int state; + + if (!kvm_arch_pmi_in_guest(vcpu)) + return 0; + + state = PERF_GUEST_ACTIVE; + if (!kvm_arch_vcpu_in_kernel(vcpu)) + state |= PERF_GUEST_USER; + + return state; +} + +static unsigned long kvm_guest_get_ip(void) +{ + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + + /* Retrieving the IP must be guarded by a call to kvm_guest_state(). */ + if (WARN_ON_ONCE(!kvm_arch_pmi_in_guest(vcpu))) + return 0; + + return kvm_arch_vcpu_get_ip(vcpu); +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .state = kvm_guest_state, + .get_ip = kvm_guest_get_ip, + .handle_intel_pt_intr = NULL, +}; + +void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void)) +{ + kvm_guest_cbs.handle_intel_pt_intr = pt_intr_handler; + perf_register_guest_info_callbacks(&kvm_guest_cbs); +} +void kvm_unregister_perf_callbacks(void) +{ + perf_unregister_guest_info_callbacks(&kvm_guest_cbs); +} +#endif + struct kvm_cpu_compat_check { void *opaque; int *ret; |