From 2387149eade25f32dcf1398811b3d0293181d005 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sun, 4 Jun 2017 14:43:51 +0200 Subject: KVM: improve arch vcpu request defining Marc Zyngier suggested that we define the arch specific VCPU request base, rather than requiring each arch to remember to start from 8. That suggestion, along with Radim Krcmar's recent VCPU request flag addition, snowballed into defining something of an arch VCPU request defining API. No functional change. (Looks like x86 is running out of arch VCPU request bits. Maybe someday we'll need to extend to 64.) Signed-off-by: Andrew Jones Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8c0664309815..3724b51aab64 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -126,6 +126,13 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_PENDING_TIMER 2 #define KVM_REQ_UNHALT 3 +#define KVM_REQUEST_ARCH_BASE 8 + +#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ + BUILD_BUG_ON((unsigned)(nr) >= 32 - KVM_REQUEST_ARCH_BASE); \ + (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ +}) +#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From 2fa6e1e12a024b48b2c7ea39f50205246e027da7 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Sun, 4 Jun 2017 14:43:52 +0200 Subject: KVM: add kvm_request_pending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A first step in vcpu->requests encapsulation. Additionally, we now use READ_ONCE() when accessing vcpu->requests, which ensures we always load vcpu->requests when it's accessed. This is important as other threads can change it any time. Also, READ_ONCE() documents that vcpu->requests is used with other threads, likely requiring memory barriers, which it does. Signed-off-by: Radim Krčmář [ Documented the new use of READ_ONCE() and converted another check in arch/mips/kvm/vz.c ] Signed-off-by: Andrew Jones Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/mips/kvm/trap_emul.c | 2 +- arch/mips/kvm/vz.c | 2 +- arch/powerpc/kvm/booke.c | 2 +- arch/powerpc/kvm/powerpc.c | 5 ++--- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 5 +++++ 7 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index a563759fd142..6a0d7040d882 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -1094,7 +1094,7 @@ static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu, struct mm_struct *mm; int i; - if (likely(!vcpu->requests)) + if (likely(!kvm_request_pending(vcpu))) return; if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 71d8856ade64..74805035edc8 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -2337,7 +2337,7 @@ static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu) int ret = 0; int i; - if (!vcpu->requests) + if (!kvm_request_pending(vcpu)) return 0; if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3eaac3809977..071b87ee682f 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -687,7 +687,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) kvmppc_core_check_exceptions(vcpu); - if (vcpu->requests) { + if (kvm_request_pending(vcpu)) { /* Exception delivery raised request; start over */ return 1; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f7cf2cd564ef..fd64f087737c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -55,8 +55,7 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops); int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !!(v->arch.pending_exceptions) || - v->requests; + return !!(v->arch.pending_exceptions) || kvm_request_pending(v); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) @@ -108,7 +107,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) */ smp_mb(); - if (vcpu->requests) { + if (kvm_request_pending(vcpu)) { /* Make sure we process requests preemptable */ local_irq_enable(); trace_kvm_check_requests(vcpu); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 689ac48361c6..ad41e0fa3a21 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2440,7 +2440,7 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { retry: kvm_s390_vcpu_request_handled(vcpu); - if (!vcpu->requests) + if (!kvm_request_pending(vcpu)) return 0; /* * We use MMU_RELOAD just to re-arm the ipte notifier for the diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 464da936c53d..f81060518635 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6710,7 +6710,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; - if (vcpu->requests) { + if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) @@ -6874,7 +6874,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->sync_pir_to_irr(vcpu); } - if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests + if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || need_resched() || signal_pending(current)) { vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3724b51aab64..0b50e7b35ed4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1105,6 +1105,11 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) set_bit(req & KVM_REQUEST_MASK, &vcpu->requests); } +static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->requests); +} + static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) { return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests); -- cgit v1.2.3 From a2befacf50940017e0de8461c4b924a929c4edc5 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 2 May 2017 13:41:02 +0200 Subject: KVM: arm64: Allow creating the PMU without the in-kernel GIC Since we got support for devices in userspace which allows reporting the PMU overflow output status to userspace, we should actually allow creating the PMU on systems without an in-kernel irqchip, which in turn requires us to slightly clarify error codes for the ABI and move things around for the initialization phase. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- Documentation/virtual/kvm/devices/vcpu.txt | 16 ++++++---- include/kvm/arm_pmu.h | 6 ++++ virt/kvm/arm/arm.c | 4 +++ virt/kvm/arm/pmu.c | 51 +++++++++++++++++++++++------- 4 files changed, 58 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt index 02f50686c418..d7236a3e01dc 100644 --- a/Documentation/virtual/kvm/devices/vcpu.txt +++ b/Documentation/virtual/kvm/devices/vcpu.txt @@ -16,7 +16,9 @@ Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a Returns: -EBUSY: The PMU overflow interrupt is already set -ENXIO: The overflow interrupt not set when attempting to get it -ENODEV: PMUv3 not supported - -EINVAL: Invalid PMU overflow interrupt number supplied + -EINVAL: Invalid PMU overflow interrupt number supplied or + trying to set the IRQ number without using an in-kernel + irqchip. A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt @@ -25,11 +27,11 @@ all vcpus, while as an SPI it must be a separate number per vcpu. 1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT Parameters: no additional parameter in kvm_device_attr.addr -Returns: -ENODEV: PMUv3 not supported - -ENXIO: PMUv3 not properly configured as required prior to calling this - attribute +Returns: -ENODEV: PMUv3 not supported or GIC not initialized + -ENXIO: PMUv3 not properly configured or in-kernel irqchip not + configured as required prior to calling this attribute -EBUSY: PMUv3 already initialized -Request the initialization of the PMUv3. This must be done after creating the -in-kernel irqchip. Creating a PMU with a userspace irqchip is currently not -supported. +Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel +virtual GIC implementation, this must be done after initializing the in-kernel +irqchip. diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 1ab4633adf4f..f6e030617467 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -35,6 +35,7 @@ struct kvm_pmu { int irq_num; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; bool ready; + bool created; bool irq_level; }; @@ -63,6 +64,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); #else struct kvm_pmu { }; @@ -112,6 +114,10 @@ static inline int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, { return -ENXIO; } +static inline int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) +{ + return 0; +} #endif #endif diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index cac5c2f2ddba..72816d3f23a7 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -527,6 +527,10 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) } ret = kvm_timer_enable(vcpu); + if (ret) + return ret; + + ret = kvm_arm_pmu_v3_enable(vcpu); return ret; } diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 2451607dc25e..96a9cab39982 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -449,29 +449,50 @@ bool kvm_arm_support_pmu_v3(void) return (perf_num_counters() > 0); } -static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) +int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) { - if (!kvm_arm_support_pmu_v3()) - return -ENODEV; + if (!vcpu->arch.pmu.created) + return 0; /* - * We currently require an in-kernel VGIC to use the PMU emulation, - * because we do not support forwarding PMU overflow interrupts to - * userspace yet. + * A valid interrupt configuration for the PMU is either to have a + * properly configured interrupt number and using an in-kernel + * irqchip, or to neither set an IRQ nor create an in-kernel irqchip. */ - if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm)) + if (kvm_arm_pmu_irq_initialized(vcpu) != irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + kvm_pmu_vcpu_reset(vcpu); + vcpu->arch.pmu.ready = true; + + return 0; +} + +static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_support_pmu_v3()) return -ENODEV; - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) || - !kvm_arm_pmu_irq_initialized(vcpu)) + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENXIO; - if (kvm_arm_pmu_v3_ready(vcpu)) + if (vcpu->arch.pmu.created) return -EBUSY; - kvm_pmu_vcpu_reset(vcpu); - vcpu->arch.pmu.ready = true; + if (irqchip_in_kernel(vcpu->kvm)) { + /* + * If using the PMU with an in-kernel virtual GIC + * implementation, we require the GIC to be already + * initialized when initializing the PMU. + */ + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -ENXIO; + } + vcpu->arch.pmu.created = true; return 0; } @@ -510,6 +531,9 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENODEV; @@ -544,6 +568,9 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENODEV; -- cgit v1.2.3 From 3cba4af31c61fc9420fdcf083f509a6c20a6d8e5 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 2 May 2017 20:11:49 +0200 Subject: KVM: arm/arm64: Move irq_is_ppi() to header file We are about to need this define in the arch timer code as well so move it to a common location. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- include/kvm/arm_vgic.h | 2 ++ virt/kvm/arm/pmu.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 97b8d3728b31..dde59fbc5f80 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -38,6 +38,8 @@ #define VGIC_MIN_LPI 8192 #define KVM_IRQCHIP_NUM_PINS (1020 - 32) +#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) + enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ VGIC_V3, /* New fancy GICv3 */ diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 96a9cab39982..574feff55a6c 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -496,8 +496,6 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) return 0; } -#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) - /* * For one VM the interrupt type must be same for each vcpu. * As a PPI, the interrupt number is the same for all vcpus, -- cgit v1.2.3 From 85e69ad7f2cc6dd829987a70cf32785b1d8c8b27 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 2 May 2017 20:14:06 +0200 Subject: KVM: arm/arm64: Move timer IRQ default init to arch_timer.c We currently initialize the arch timer IRQ numbers from the reset code, presumably because we once intended to model multiple CPU or SoC types from within the kernel and have hard-coded reset values in the reset code. As we are moving towards userspace being in charge of more fine-grained CPU emulation and stitching together the pieces needed to emulate a particular type of CPU, we should no longer have a tight coupling between resetting a VCPU and setting IRQ numbers. Therefore, move the logic to define and use the default IRQ numbers to the timer code and set the IRQ number immediately when creating the VCPU. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- arch/arm/kvm/reset.c | 16 +--------------- arch/arm64/kvm/reset.c | 16 +--------------- include/kvm/arm_arch_timer.h | 4 +--- virt/kvm/arm/arch_timer.c | 28 ++++++++++++++++------------ 4 files changed, 19 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 1da8b2d14550..5ed0c3ee33d6 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -37,16 +37,6 @@ static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; -static const struct kvm_irq_level cortexa_ptimer_irq = { - { .irq = 30 }, - .level = 1, -}; - -static const struct kvm_irq_level cortexa_vtimer_irq = { - { .irq = 27 }, - .level = 1, -}; - /******************************************************************************* * Exported reset function @@ -62,16 +52,12 @@ static const struct kvm_irq_level cortexa_vtimer_irq = { int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_regs *reset_regs; - const struct kvm_irq_level *cpu_vtimer_irq; - const struct kvm_irq_level *cpu_ptimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); - cpu_vtimer_irq = &cortexa_vtimer_irq; - cpu_ptimer_irq = &cortexa_ptimer_irq; break; default: return -ENODEV; @@ -84,5 +70,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_reset_coprocs(vcpu); /* Reset arch_timer context */ - return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); + return kvm_timer_vcpu_reset(vcpu); } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 561badf93de8..3256b9228e75 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -46,16 +46,6 @@ static const struct kvm_regs default_regs_reset32 = { COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), }; -static const struct kvm_irq_level default_ptimer_irq = { - .irq = 30, - .level = 1, -}; - -static const struct kvm_irq_level default_vtimer_irq = { - .irq = 27, - .level = 1, -}; - static bool cpu_has_32bit_el1(void) { u64 pfr0; @@ -108,8 +98,6 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - const struct kvm_irq_level *cpu_vtimer_irq; - const struct kvm_irq_level *cpu_ptimer_irq; const struct kvm_regs *cpu_reset; switch (vcpu->arch.target) { @@ -122,8 +110,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) cpu_reset = &default_regs_reset; } - cpu_vtimer_irq = &default_vtimer_irq; - cpu_ptimer_irq = &default_ptimer_irq; break; } @@ -137,5 +123,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_pmu_vcpu_reset(vcpu); /* Reset timer */ - return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); + return kvm_timer_vcpu_reset(vcpu); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 295584f31a4e..f1c967a4f603 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -57,9 +57,7 @@ struct arch_timer_cpu { int kvm_timer_hyp_init(void); int kvm_timer_enable(struct kvm_vcpu *vcpu); -int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *virt_irq, - const struct kvm_irq_level *phys_irq); +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 7933b1f8f7b7..72d5aa7d4c64 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -35,6 +35,16 @@ static struct timecounter *timecounter; static unsigned int host_vtimer_irq; static u32 host_vtimer_irq_flags; +static const struct kvm_irq_level default_ptimer_irq = { + .irq = 30, + .level = 1, +}; + +static const struct kvm_irq_level default_vtimer_irq = { + .irq = 27, + .level = 1, +}; + void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { vcpu_vtimer(vcpu)->active_cleared_last = false; @@ -445,22 +455,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) kvm_timer_update_state(vcpu); } -int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *virt_irq, - const struct kvm_irq_level *phys_irq) +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - /* - * The vcpu timer irq number cannot be determined in - * kvm_timer_vcpu_init() because it is called much before - * kvm_vcpu_set_target(). To handle this, we determine - * vcpu timer irq number when the vcpu is reset. - */ - vtimer->irq.irq = virt_irq->irq; - ptimer->irq.irq = phys_irq->irq; - /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 * and to 0 for ARMv7. We provide an implementation that always @@ -496,6 +495,8 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* Synchronize cntvoff across all vtimers of a VM. */ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); @@ -504,6 +505,9 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->timer.function = kvm_timer_expire; + + vtimer->irq.irq = default_vtimer_irq.irq; + ptimer->irq.irq = default_ptimer_irq.irq; } static void kvm_timer_init_interrupt(void *info) -- cgit v1.2.3 From 99a1db7a2c9b2ecb9a801cee3f6a7a71945a2fca Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 2 May 2017 20:19:15 +0200 Subject: KVM: arm/arm64: Allow setting the timer IRQ numbers from userspace First we define an ABI using the vcpu devices that lets userspace set the interrupt numbers for the various timers on both the 32-bit and 64-bit KVM/ARM implementations. Second, we add the definitions for the groups and attributes introduced by the above ABI. (We add the PMU define on the 32-bit side as well for symmetry and it may get used some day.) Third, we set up the arch-specific vcpu device operation handlers to call into the timer code for anything related to the KVM_ARM_VCPU_TIMER_CTRL group. Fourth, we implement support for getting and setting the timer interrupt numbers using the above defined ABI in the arch timer code. Fifth, we introduce error checking upon enabling the arch timer (which is called when first running a VCPU) to check that all VCPUs are configured to use the same PPI for the timer (as mandated by the architecture) and that the virtual and physical timers are not configured to use the same IRQ number. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- Documentation/virtual/kvm/devices/vcpu.txt | 25 +++++++ arch/arm/include/uapi/asm/kvm.h | 8 +++ arch/arm/kvm/guest.c | 9 +++ arch/arm64/include/uapi/asm/kvm.h | 3 + arch/arm64/kvm/guest.c | 9 +++ include/kvm/arm_arch_timer.h | 4 ++ virt/kvm/arm/arch_timer.c | 104 +++++++++++++++++++++++++++++ 7 files changed, 162 insertions(+) (limited to 'include') diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt index d7236a3e01dc..2b5dab16c4f2 100644 --- a/Documentation/virtual/kvm/devices/vcpu.txt +++ b/Documentation/virtual/kvm/devices/vcpu.txt @@ -35,3 +35,28 @@ Returns: -ENODEV: PMUv3 not supported or GIC not initialized Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel virtual GIC implementation, this must be done after initializing the in-kernel irqchip. + + +2. GROUP: KVM_ARM_VCPU_TIMER_CTRL +Architectures: ARM,ARM64 + +2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER +2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER +Parameters: in kvm_device_attr.addr the address for the timer interrupt is a + pointer to an int +Returns: -EINVAL: Invalid timer interrupt number + -EBUSY: One or more VCPUs has already run + +A value describing the architected timer interrupt number when connected to an +in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the +attribute overrides the default values (see below). + +KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27) +KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30) + +Setting the same PPI for different timers will prevent the VCPUs from running. +Setting the interrupt number on a VCPU configures all VCPUs created at that +time to use the number provided for a given timer, overwriting any previously +configured values on other VCPUs. Userspace should configure the interrupt +numbers on at least one VCPU after creating all VCPUs and before running any +VCPUs. diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 5e3c673fa3f4..5db2d4c6a55f 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -203,6 +203,14 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 +/* Device Control API on vcpu fd */ +#define KVM_ARM_VCPU_PMU_V3_CTRL 0 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_TIMER_CTRL 1 +#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 +#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index acea05e9db4e..1e0784ebbfd6 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -308,6 +308,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, int ret; switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_set_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -322,6 +325,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int ret; switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_get_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -336,6 +342,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, int ret; switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_has_attr(vcpu, attr); + break; default: ret = -ENXIO; break; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 70eea2ecc663..9f3ca24bbcc6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -232,6 +232,9 @@ struct kvm_arch_memory_slot { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_TIMER_CTRL 1 +#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 +#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index b37446a8ffdb..5c7f657dd207 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -390,6 +390,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_set_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -407,6 +410,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_get_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_get_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -424,6 +430,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_has_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_has_attr(vcpu, attr); + break; default: ret = -ENXIO; break; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index f1c967a4f603..f0053f884b4a 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -68,6 +68,10 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); +int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); + bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); void kvm_timer_schedule(struct kvm_vcpu *vcpu); void kvm_timer_unschedule(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 72d5aa7d4c64..e03da1abd11f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -617,6 +618,28 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } +static bool timer_irqs_are_valid(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int vtimer_irq, ptimer_irq; + int i; + + vcpu = kvm_get_vcpu(kvm, 0); + vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; + ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + + if (vtimer_irq == ptimer_irq) + return false; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || + vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) + return false; + } + + return true; +} + int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -636,6 +659,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (!vgic_initialized(vcpu->kvm)) return -ENODEV; + if (!timer_irqs_are_valid(vcpu->kvm)) { + kvm_debug("incorrectly configured timer irqs\n"); + return -EINVAL; + } + /* * Find the physical IRQ number corresponding to the host_vtimer_irq */ @@ -685,3 +713,79 @@ void kvm_timer_init_vhe(void) val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); write_sysreg(val, cnthctl_el2); } + +static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; + vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; + } +} + +int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + int irq; + + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + if (get_user(irq, uaddr)) + return -EFAULT; + + if (!(irq_is_ppi(irq))) + return -EINVAL; + + if (vcpu->arch.timer_cpu.enabled) + return -EBUSY; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); + break; + default: + return -ENXIO; + } + + return 0; +} + +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *timer; + int irq; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + timer = vcpu_vtimer(vcpu); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + timer = vcpu_ptimer(vcpu); + break; + default: + return -ENXIO; + } + + irq = timer->irq.irq; + return put_user(irq, uaddr); +} + +int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + return 0; + } + + return -ENXIO; +} -- cgit v1.2.3 From c6ccd30e0de384f506449474ca780ff680ad4217 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 4 May 2017 13:24:20 +0200 Subject: KVM: arm/arm64: Introduce an allocator for in-kernel irq lines Having multiple devices being able to signal the same interrupt line is very confusing and almost certainly guarantees a configuration error. Therefore, introduce a very simple allocator which allows a device to claim an interrupt line from the vgic for a given VM. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- include/kvm/arm_vgic.h | 5 +++++ virt/kvm/arm/vgic/vgic.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index dde59fbc5f80..5d5b34534ce9 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -121,6 +121,9 @@ struct vgic_irq { u8 source; /* GICv2 SGIs only */ u8 priority; enum vgic_irq_config config; /* Level or edge */ + + void *owner; /* Opaque pointer to reserve an interrupt + for in-kernel devices. */ }; struct vgic_register_region; @@ -340,4 +343,6 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); */ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); +int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner); + #endif /* __KVM_ARM_VGIC_H */ diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index c66feaca2a5d..9628945234e4 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -434,6 +434,39 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) return 0; } +/** + * kvm_vgic_set_owner - Set the owner of an interrupt for a VM + * + * @vcpu: Pointer to the VCPU (used for PPIs) + * @intid: The virtual INTID identifying the interrupt (PPI or SPI) + * @owner: Opaque pointer to the owner + * + * Returns 0 if intid is not already used by another in-kernel device and the + * owner is set, otherwise returns an error code. + */ +int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) +{ + struct vgic_irq *irq; + int ret = 0; + + if (!vgic_initialized(vcpu->kvm)) + return -EAGAIN; + + /* SGIs and LPIs cannot be wired up to any device */ + if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) + return -EINVAL; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + spin_lock(&irq->irq_lock); + if (irq->owner && irq->owner != owner) + ret = -EEXIST; + else + irq->owner = owner; + spin_unlock(&irq->irq_lock); + + return ret; +} + /** * vgic_prune_ap_list - Remove non-relevant interrupts from the list * -- cgit v1.2.3 From cb3f0ad881a6cee39c6a652b4aa4f12f341d98f0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 16 May 2017 12:41:18 +0200 Subject: KVM: arm/arm64: Disallow userspace control of in-kernel IRQ lines When injecting an IRQ to the VGIC, you now have to present an owner token for that IRQ line to show that you are the owner of that line. IRQ lines driven from userspace or via an irqfd do not have an owner and will simply pass a NULL pointer. Also get rid of the unused kvm_vgic_inject_mapped_irq prototype. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- include/kvm/arm_vgic.h | 4 +--- virt/kvm/arm/arch_timer.c | 3 ++- virt/kvm/arm/arm.c | 4 ++-- virt/kvm/arm/pmu.c | 3 ++- virt/kvm/arm/vgic/vgic-irqfd.c | 2 +- virt/kvm/arm/vgic/vgic.c | 15 +++++++++++---- 6 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 5d5b34534ce9..131668f8599c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -300,9 +300,7 @@ int kvm_vgic_hyp_init(void); void kvm_vgic_init_cpu_hardware(void); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level); -int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level); + bool level, void *owner); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 07f6f9bfc1f2..8e89d63005c7 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -226,7 +226,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, if (likely(irqchip_in_kernel(vcpu->kvm))) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, - timer_ctx->irq.level); + timer_ctx->irq.level, + timer_ctx); WARN_ON(ret); } } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 72816d3f23a7..a265acc53e39 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -832,7 +832,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level); + return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL); case KVM_ARM_IRQ_TYPE_SPI: if (!irqchip_in_kernel(kvm)) return -ENXIO; @@ -840,7 +840,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num < VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, irq_num, level); + return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL); } return -EINVAL; diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 3f0866925b6b..9923eb90cdc7 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -215,7 +215,8 @@ static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) if (likely(irqchip_in_kernel(vcpu->kvm))) { int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - pmu->irq_num, overflow); + pmu->irq_num, overflow, + &vcpu->arch.pmu); WARN_ON(ret); } } diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index f138ed2e9c63..b7baf581611a 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -34,7 +34,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, if (!vgic_valid_spi(kvm, spi_id)) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, spi_id, level); + return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); } /** diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 9628945234e4..fed717e07938 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -235,10 +235,14 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) /* * Only valid injection if changing level for level-triggered IRQs or for a - * rising edge. + * rising edge, and in-kernel connected IRQ lines can only be controlled by + * their owner. */ -static bool vgic_validate_injection(struct vgic_irq *irq, bool level) +static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) { + if (irq->owner != owner) + return false; + switch (irq->config) { case VGIC_CONFIG_LEVEL: return irq->line_level != level; @@ -350,13 +354,16 @@ retry: * false: to ignore the call * Level-sensitive true: raise the input signal * false: lower the input signal + * @owner: The opaque pointer to the owner of the IRQ being raised to verify + * that the caller is allowed to inject this IRQ. Userspace + * injections will have owner == NULL. * * The VGIC is not concerned with devices being active-LOW or active-HIGH for * level-sensitive interrupts. You can think of the level parameter as 1 * being HIGH and 0 being LOW and all devices being active-HIGH. */ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level) + bool level, void *owner) { struct kvm_vcpu *vcpu; struct vgic_irq *irq; @@ -378,7 +385,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, spin_lock(&irq->irq_lock); - if (!vgic_validate_injection(irq, level)) { + if (!vgic_validate_injection(irq, level, owner)) { /* Nothing to see here, move along... */ spin_unlock(&irq->irq_lock); vgic_put_irq(kvm, irq); -- cgit v1.2.3 From ebb127f2d6f32665643165289151bd20929d9931 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 16 May 2017 19:53:50 +0200 Subject: KVM: arm/arm64: Don't assume initialized vgic when setting PMU IRQ The PMU IRQ number is set through the VCPU device's KVM_SET_DEVICE_ATTR ioctl handler for the KVM_ARM_VCPU_PMU_V3_IRQ attribute, but there is no enforced or stated requirement that this must happen after initializing the VGIC. As a result, calling vgic_valid_spi() which relies on the nr_spis being set during the VGIC init can incorrectly fail. Introduce irq_is_spi, which determines if an IRQ number is within the SPI range without verifying it against the actual VGIC properties. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- include/kvm/arm_vgic.h | 2 ++ virt/kvm/arm/pmu.c | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 131668f8599c..a2ae9d2de21f 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -39,6 +39,8 @@ #define KVM_IRQCHIP_NUM_PINS (1020 - 32) #define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) +#define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \ + (irq) <= VGIC_MAX_SPI) enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 9923eb90cdc7..fc8a723ff387 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -458,10 +458,24 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) /* * A valid interrupt configuration for the PMU is either to have a * properly configured interrupt number and using an in-kernel - * irqchip, or to neither set an IRQ nor create an in-kernel irqchip. + * irqchip, or to not have an in-kernel GIC and not set an IRQ. */ - if (kvm_arm_pmu_irq_initialized(vcpu) != irqchip_in_kernel(vcpu->kvm)) - return -EINVAL; + if (irqchip_in_kernel(vcpu->kvm)) { + int irq = vcpu->arch.pmu.irq_num; + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -EINVAL; + + /* + * If we are using an in-kernel vgic, at this point we know + * the vgic will be initialized, so we can check the PMU irq + * number against the dimensions of the vgic and make sure + * it's valid. + */ + if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) + return -EINVAL; + } else if (kvm_arm_pmu_irq_initialized(vcpu)) { + return -EINVAL; + } kvm_pmu_vcpu_reset(vcpu); vcpu->arch.pmu.ready = true; @@ -547,7 +561,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -EFAULT; /* The PMU overflow interrupt can be a PPI or a valid SPI. */ - if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq))) + if (!(irq_is_ppi(irq) || irq_is_spi(irq))) return -EINVAL; if (!pmu_irq_is_valid(vcpu->kvm, irq)) -- cgit v1.2.3 From 59da1cbfd840d69bd7a310249924da3fc202c417 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:33 +0100 Subject: KVM: arm64: vgic-v3: Add hook to handle guest GICv3 sysreg accesses at EL2 In order to start handling guest access to GICv3 system registers, let's add a hook that will get called when we trap a system register access. This is gated by a new static key (vgic_v3_cpuif_trap). Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_hyp.h | 1 + arch/arm64/kvm/hyp/switch.c | 14 ++++++++++++++ include/kvm/arm_vgic.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 38 ++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic/vgic-v3.c | 2 ++ 5 files changed, 56 insertions(+) (limited to 'include') diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index b18e852d27e8..4572a9b560fa 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -127,6 +127,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); +int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); void __timer_save_state(struct kvm_vcpu *vcpu); void __timer_restore_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index e5f089de6526..945e79c641c4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -350,6 +350,20 @@ again: } } + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + exit_code == ARM_EXCEPTION_TRAP && + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { + int ret = __vgic_v3_perform_cpuif_access(vcpu); + + if (ret == 1) { + __skip_instr(vcpu); + goto again; + } + + /* 0 falls through to be handled out of EL2 */ + } + fp_enabled = __fpsimd_enabled(); __sysreg_save_guest_state(guest_ctxt); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2d923a6b2175..34dba516ef24 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -292,6 +292,7 @@ struct vgic_cpu { }; extern struct static_key_false vgic_v2_cpuif_trap; +extern struct static_key_false vgic_v3_cpuif_trap; int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); void kvm_vgic_early_init(struct kvm *kvm); diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 3dd8f0c4419e..e6c05b95a1b1 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -19,6 +19,7 @@ #include #include +#include #include #define vtr_to_max_lr_idx(v) ((v) & 0xf) @@ -371,3 +372,40 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } + +#ifdef CONFIG_ARM64 + +int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) +{ + int rt; + u32 esr; + u32 vmcr; + void (*fn)(struct kvm_vcpu *, u32, int); + bool is_read; + u32 sysreg; + + esr = kvm_vcpu_get_hsr(vcpu); + if (vcpu_mode_is_32bit(vcpu)) { + if (!kvm_condition_valid(vcpu)) + return 1; + + sysreg = esr_cp15_to_sysreg(esr); + } else { + sysreg = esr_sys64_to_sysreg(esr); + } + + is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; + + switch (sysreg) { + default: + return 0; + } + + vmcr = __vgic_v3_read_vmcr(); + rt = kvm_vcpu_sys_get_rt(vcpu); + fn(vcpu, vmcr, rt); + + return 1; +} + +#endif diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 030248e669f6..fac6e23cd0b3 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -429,6 +429,8 @@ out: return ret; } +DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); + /** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node -- cgit v1.2.3 From 132a324ab62fe4fb8d6dcc2ab4eddb0e93b69afe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:36 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_IAR1_EL1 handler Add a handler for reading the guest's view of the ICC_IAR1_EL1 register. This involves finding the highest priority Group-1 interrupt, checking against both PMR and the active group priority, activating the interrupt and setting the group priority as active. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 163 +++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1fa293a37f4a..d70668fae003 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -405,6 +405,7 @@ #define ICH_LR_PHYS_ID_SHIFT 32 #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) #define ICH_LR_PRIORITY_SHIFT 48 +#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) /* These are for GICv2 emulation only */ #define GICH_LR_VIRTUALID (0x3ffUL << 0) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 25f09721241f..5a20f8d5bada 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -24,6 +24,7 @@ #define vtr_to_max_lr_idx(v) ((v) & 0xf) #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) +#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) { @@ -381,6 +382,88 @@ static int __hyp_text __vgic_v3_bpr_min(void) return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); } +static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; + + return crm != 8; +} + +#define GICv3_IDLE_PRIORITY 0xff + +static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, + u32 vmcr, + u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u8 priority = GICv3_IDLE_PRIORITY; + int i, lr = -1; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* Not pending in the state? */ + if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT) + continue; + + /* Group-0 interrupt, but Group-0 disabled? */ + if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK)) + continue; + + /* Group-1 interrupt, but Group-1 disabled? */ + if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK)) + continue; + + /* Not the highest priority? */ + if (lr_prio >= priority) + continue; + + /* This is a candidate */ + priority = lr_prio; + *lr_val = val; + lr = i; + } + + if (lr == -1) + *lr_val = ICC_IAR1_EL1_SPURIOUS; + + return lr; +} + +static int __hyp_text __vgic_v3_get_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 val; + + /* + * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers + * contain the active priority levels for this VCPU + * for the maximum number of supported priority + * levels, and we return the full priority level only + * if the BPR is programmed to its minimum, otherwise + * we return a combination of the priority level and + * subpriority, as determined by the setting of the + * BPR, but without the full subpriority. + */ + val = __vgic_v3_read_ap0rn(i); + val |= __vgic_v3_read_ap1rn(i); + if (!val) { + hap += 32; + continue; + } + + return (hap + __ffs(val)) << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) { return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; @@ -401,6 +484,83 @@ static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) return bpr; } +/* + * Convert a priority to a preemption level, taking the relevant BPR + * into account by zeroing the sub-priority bits. + */ +static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) +{ + unsigned int bpr; + + if (!grp) + bpr = __vgic_v3_get_bpr0(vmcr) + 1; + else + bpr = __vgic_v3_get_bpr1(vmcr); + + return pri & (GENMASK(7, 0) << bpr); +} + +/* + * The priority value is independent of any of the BPR values, so we + * normalize it using the minumal BPR value. This guarantees that no + * matter what the guest does with its BPR, we can always set/get the + * same value of a priority. + */ +static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) +{ + u8 pre, ap; + u32 val; + int apr; + + pre = __vgic_v3_pri_to_pre(pri, vmcr, grp); + ap = pre >> __vgic_v3_bpr_min(); + apr = ap / 32; + + if (!grp) { + val = __vgic_v3_read_ap0rn(apr); + __vgic_v3_write_ap0rn(val | BIT(ap % 32), apr); + } else { + val = __vgic_v3_read_ap1rn(apr); + __vgic_v3_write_ap1rn(val | BIT(ap % 32), apr); + } +} + +static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 lr_val; + u8 lr_prio, pmr; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr < 0) + goto spurious; + + if (grp != !!(lr_val & ICH_LR_GROUP)) + goto spurious; + + pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + if (pmr <= lr_prio) + goto spurious; + + if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp)) + goto spurious; + + lr_val &= ~ICH_LR_STATE; + /* No active state for LPIs */ + if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI) + lr_val |= ICH_LR_ACTIVE_BIT; + __gic_v3_set_lr(lr_val, lr); + __vgic_v3_set_active_priority(lr_prio, vmcr, grp); + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); + return; + +spurious: + vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); +} + static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); @@ -465,6 +625,9 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; switch (sysreg) { + case SYS_ICC_IAR1_EL1: + fn = __vgic_v3_read_iar; + break; case SYS_ICC_GRPEN1_EL1: if (is_read) fn = __vgic_v3_read_igrpen1; -- cgit v1.2.3 From b6f49035b4bf6e2709f2a5fed3107f5438c1fd02 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:37 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_EOIR1_EL1 handler Add a handler for writing the guest's view of the ICC_EOIR1_EL1 register. This involves dropping the priority of the interrupt, and deactivating it if required (EOImode == 0). Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 2 + virt/kvm/arm/hyp/vgic-v3-sr.c | 120 +++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index d70668fae003..1f458ac6f494 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,8 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_EOIcount_SHIFT 27 +#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) #define ICH_VMCR_ACK_CTL_SHIFT 2 #define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 5a20f8d5bada..e9ff99112c4d 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -432,6 +432,26 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, return lr; } +static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, + int intid, u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; + int i; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + + if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid && + (val & ICH_LR_ACTIVE_BIT)) { + *lr_val = val; + return i; + } + } + + *lr_val = ICC_IAR1_EL1_SPURIOUS; + return -1; +} + static int __hyp_text __vgic_v3_get_highest_active_priority(void) { u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); @@ -525,6 +545,44 @@ static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) } } +static int __hyp_text __vgic_v3_clear_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 ap0, ap1; + int c0, c1; + + ap0 = __vgic_v3_read_ap0rn(i); + ap1 = __vgic_v3_read_ap1rn(i); + if (!ap0 && !ap1) { + hap += 32; + continue; + } + + c0 = ap0 ? __ffs(ap0) : 32; + c1 = ap1 ? __ffs(ap1) : 32; + + /* Always clear the LSB, which is the highest priority */ + if (c0 < c1) { + ap0 &= ~BIT(c0); + __vgic_v3_write_ap0rn(ap0, i); + hap += c0; + } else { + ap1 &= ~BIT(c1); + __vgic_v3_write_ap1rn(ap1, i); + hap += c1; + } + + /* Rescale to 8 bits of priority */ + return hap << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 lr_val; @@ -561,6 +619,65 @@ spurious: vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); } +static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) +{ + lr_val &= ~ICH_LR_ACTIVE_BIT; + if (lr_val & ICH_LR_HW) { + u32 pid; + + pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT; + gic_write_dir(pid); + } + + __gic_v3_set_lr(lr_val, lr); +} + +static void __hyp_text __vgic_v3_bump_eoicount(void) +{ + u32 hcr; + + hcr = read_gicreg(ICH_HCR_EL2); + hcr += 1 << ICH_HCR_EOIcount_SHIFT; + write_gicreg(hcr, ICH_HCR_EL2); +} + +static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + u8 lr_prio, act_prio; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + /* Drop priority in any case */ + act_prio = __vgic_v3_clear_highest_active_priority(); + + /* If EOIing an LPI, no deactivate to be performed */ + if (vid >= VGIC_MIN_LPI) + return; + + /* EOImode == 1, nothing to be done here */ + if (vmcr & ICH_VMCR_EOIM_MASK) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* If priorities or group do not match, the guest has fscked-up. */ + if (grp != !!(lr_val & ICH_LR_GROUP) || + __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio) + return; + + /* Let's now perform the deactivation */ + __vgic_v3_clear_active_lr(lr, lr_val); +} + static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); @@ -628,6 +745,9 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) case SYS_ICC_IAR1_EL1: fn = __vgic_v3_read_iar; break; + case SYS_ICC_EOIR1_EL1: + fn = __vgic_v3_write_eoir; + break; case SYS_ICC_GRPEN1_EL1: if (is_read) fn = __vgic_v3_read_igrpen1; -- cgit v1.2.3 From 9c7bfc288c71068ab323b802dba2eb87fd08b127 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:40 +0100 Subject: KVM: arm64: vgic-v3: Enable trapping of Group-1 system registers In order to be able to trap Group-1 GICv3 system registers, we need to set ICH_HCR_EL2.TALL1 before entering the guest. This is conditionally done after having restored the guest's state, and cleared on exit. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 11 +++++++++++ virt/kvm/arm/vgic/vgic-v3.c | 4 ++++ 3 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1f458ac6f494..6b05d2ac8c54 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_TALL1 (1 << 12) #define ICH_HCR_EOIcount_SHIFT 27 #define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index f031e8f088ae..a2a62f030341 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -258,6 +258,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); } } else { + if (static_branch_unlikely(&vgic_v3_cpuif_trap)) + write_gicreg(0, ICH_HCR_EL2); + cpu_if->vgic_elrsr = 0xffff; cpu_if->vgic_ap0r[0] = 0; cpu_if->vgic_ap0r[1] = 0; @@ -330,6 +333,14 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) for (i = 0; i < used_lrs; i++) __gic_v3_set_lr(cpu_if->vgic_lr[i], i); + } else { + /* + * If we need to trap system registers, we must write + * ICH_HCR_EL2 anyway, even if no interrupts are being + * injected, + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap)) + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); } /* diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index fac6e23cd0b3..722bdebdfbb5 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -21,6 +21,8 @@ #include "vgic.h" +static bool group1_trap; + void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; @@ -258,6 +260,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) /* Get the show on the road... */ vgic_v3->vgic_hcr = ICH_HCR_EN; + if (group1_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL1; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) -- cgit v1.2.3 From abf55766f7b062234083ff612446ff8d47e2417e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:45 +0100 Subject: KVM: arm64: vgic-v3: Enable trapping of Group-0 system registers In order to be able to trap Group-0 GICv3 system registers, we need to set ICH_HCR_EL2.TALL0 begore entering the guest. This is conditionnaly done after having restored the guest's state, and cleared on exit. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + virt/kvm/arm/vgic/vgic-v3.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 6b05d2ac8c54..c7f31a962cfc 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) #define ICH_HCR_EOIcount_SHIFT 27 #define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 4b2b62b73470..39046ee5be25 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -21,6 +21,7 @@ #include "vgic.h" +static bool group0_trap; static bool group1_trap; void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) @@ -260,6 +261,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) /* Get the show on the road... */ vgic_v3->vgic_hcr = ICH_HCR_EN; + if (group0_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL0; if (group1_trap) vgic_v3->vgic_hcr |= ICH_HCR_TALL1; } @@ -492,7 +495,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); - if (group1_trap) { + if (group0_trap || group1_trap) { kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n"); static_branch_enable(&vgic_v3_cpuif_trap); } -- cgit v1.2.3 From ff89511ef29b794d6a9c6b62f5ea76fc013cdae7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:53 +0100 Subject: KVM: arm64: Enable GICv3 common sysreg trapping via command-line Now that we're able to safely handle common sysreg access, let's give the user the opportunity to enable it by passing a specific command-line option (vgic_v3.common_trap). Tested-by: Alexander Graf Acked-by: David Daney Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ include/linux/irqchip/arm-gic-v3.h | 1 + virt/kvm/arm/vgic/vgic-v3.c | 11 ++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 88bdc421351f..aa8341e73b35 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1837,6 +1837,10 @@ [KVM,ARM] Trap guest accesses to GICv3 group-1 system registers + kvm-arm.vgic_v3_common_trap= + [KVM,ARM] Trap guest accesses to GICv3 common + system registers + kvm-intel.ept= [KVM,Intel] Disable extended page tables (virtualized MMU) support on capable Intel chips. Default is 1 (enabled) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c7f31a962cfc..6a1f87ff94e2 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_TC (1 << 10) #define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) #define ICH_HCR_EOIcount_SHIFT 27 diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 35c00efc110b..91cf8b4f01f1 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -23,6 +23,7 @@ static bool group0_trap; static bool group1_trap; +static bool common_trap; void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { @@ -265,6 +266,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_hcr |= ICH_HCR_TALL0; if (group1_trap) vgic_v3->vgic_hcr |= ICH_HCR_TALL1; + if (common_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TC; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) @@ -450,6 +453,12 @@ static int __init early_group1_trap_cfg(char *buf) } early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); +static int __init early_common_trap_cfg(char *buf) +{ + return strtobool(buf, &common_trap); +} +early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); + /** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node @@ -508,7 +517,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) } #endif - if (group0_trap || group1_trap) { + if (group0_trap || group1_trap || common_trap) { kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n"); static_branch_enable(&vgic_v3_cpuif_trap); } -- cgit v1.2.3 From 134764ed6e12d9f99b3de68b8aaeae1ba842d91c Mon Sep 17 00:00:00 2001 From: Aravinda Prasad Date: Thu, 11 May 2017 16:32:48 +0530 Subject: KVM: PPC: Book3S HV: Add new capability to control MCE behaviour This introduces a new KVM capability to control how KVM behaves on machine check exception (MCE) in HV KVM guests. If this capability has not been enabled, KVM redirects machine check exceptions to guest's 0x200 vector, if the address in error belongs to the guest. With this capability enabled, KVM will cause a guest exit with the exit reason indicating an NMI. The new capability is required to avoid problems if a new kernel/KVM is used with an old QEMU, running a guest that doesn't issue "ibm,nmi-register". As old QEMU does not understand the NMI exit type, it treats it as a fatal error. However, the guest could have handled the machine check error if the exception was delivered to guest's 0x200 interrupt vector instead of NMI exit in case of old QEMU. [paulus@ozlabs.org - Reworded the commit message to be clearer, enable only on HV KVM.] Signed-off-by: Aravinda Prasad Reviewed-by: David Gibson Signed-off-by: Mahesh Salgaonkar Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 11 +++++++++++ arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/powerpc.c | 14 ++++++++++++++ include/uapi/linux/kvm.h | 1 + 5 files changed, 28 insertions(+) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 68b66b538d2d..1531a3cd548f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4011,6 +4011,17 @@ vsmt_mode being returned when the KVM_CAP_PPC_SMT capability is subsequently queried for the VM. This capability is only supported by HV KVM, and can only be set before any VCPUs have been created. +7.12 KVM_CAP_PPC_FWNMI + +Architectures: ppc +Parameters: none + +With this capability a machine check exception in the guest address +space will cause KVM to exit the guest with NMI exit reason. This +enables QEMU to build error log and branch to guest kernel registered +machine check handling routine. Without this capability KVM will +branch to guests' 0x200 interrupt vector. + 8. Other capabilities. ---------------------- diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 683c3c82ce9c..05866391f406 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -287,6 +287,7 @@ struct kvm_arch { cpumask_t need_tlb_flush; cpumask_t cpu_in_guest; u8 radix; + u8 fwnmi_enabled; pgd_t *pgtable; u64 process_table; struct dentry *debugfs_dir; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 293fbdf96e7d..ae8e89e0d083 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -485,6 +485,7 @@ int main(void) OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); OFFSET(KVM_RADIX, kvm, arch.radix); + OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8208c2b95a93..ccaa7a407c15 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -623,6 +623,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) /* Disable this on POWER9 until code handles new HPTE format */ r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300); break; +#endif +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + case KVM_CAP_PPC_FWNMI: + r = hv_enabled; + break; #endif case KVM_CAP_PPC_HTM: r = cpu_has_feature(CPU_FTR_TM_COMP) && @@ -1543,6 +1548,15 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; } #endif /* CONFIG_KVM_XICS */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + case KVM_CAP_PPC_FWNMI: + r = -EINVAL; + if (!is_kvmppc_hv_enabled(vcpu->kvm)) + break; + r = 0; + vcpu->kvm->arch.fwnmi_enabled = true; + break; +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ default: r = -EINVAL; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 577429a95ad8..89bc145b4051 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -895,6 +895,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SPAPR_TCE_VFIO 142 #define KVM_CAP_X86_GUEST_MWAIT 143 #define KVM_CAP_ARM_USER_IRQ 144 +#define KVM_CAP_PPC_FWNMI 145 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 2ed4f9dd19c0f76f7fb56c4b201696d29149325c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 21 Jun 2017 16:01:27 +1000 Subject: KVM: PPC: Book3S HV: Add capability to report possible virtual SMT modes Now that userspace can set the virtual SMT mode by enabling the KVM_CAP_PPC_SMT capability, it is useful for userspace to be able to query the set of possible virtual SMT modes. This provides a new capability, KVM_CAP_PPC_SMT_POSSIBLE, to provide this information. The return value is a bitmap of possible modes, with bit N set if virtual SMT mode 2^N is available. That is, 1 indicates SMT1 is available, 2 indicates that SMT2 is available, 3 indicates that both SMT1 and SMT2 are available, and so on. Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 11 +++++++++++ arch/powerpc/kvm/powerpc.c | 10 ++++++++++ include/uapi/linux/kvm.h | 1 + 3 files changed, 22 insertions(+) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 1531a3cd548f..63875dbb6ef5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4010,6 +4010,8 @@ be 0. A successful call to enable this capability will result in vsmt_mode being returned when the KVM_CAP_PPC_SMT capability is subsequently queried for the VM. This capability is only supported by HV KVM, and can only be set before any VCPUs have been created. +The KVM_CAP_PPC_SMT_POSSIBLE capability indicates which virtual SMT +modes are available. 7.12 KVM_CAP_PPC_FWNMI @@ -4183,3 +4185,12 @@ Currently the following bits are defined for the device_irq_level bitmap: Future versions of kvm may implement additional events. These will get indicated by returning a higher number from KVM_CHECK_EXTENSION and will be listed above. + +8.10 KVM_CAP_PPC_SMT_POSSIBLE + +Architectures: ppc + +Querying this capability returns a bitmap indicating the possible +virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N +(counting from the right) is set, then a virtual SMT mode of 2^N is +available. diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ccaa7a407c15..b14736f3870b 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -566,6 +566,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = threads_per_subcore; } break; + case KVM_CAP_PPC_SMT_POSSIBLE: + r = 1; + if (hv_enabled) { + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + r = ((threads_per_subcore << 1) - 1); + else + /* P9 can emulate dbells, so allow any mode */ + r = 8 | 4 | 2 | 1; + } + break; case KVM_CAP_PPC_RMA: r = 0; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 89bc145b4051..36ea74aa3422 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -896,6 +896,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_X86_GUEST_MWAIT 143 #define KVM_CAP_ARM_USER_IRQ 144 #define KVM_CAP_PPC_FWNMI 145 +#define KVM_CAP_PPC_SMT_POSSIBLE 146 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From 4036e3874a1ce41a4f7267289f9a0d8e5cd49408 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 4 Aug 2016 17:58:47 +0200 Subject: KVM: s390: ioctls to get and set guest storage attributes * Add the struct used in the ioctls to get and set CMMA attributes. * Add the two functions needed to get and set the CMMA attributes for guest pages. * Add the two ioctls that use the aforementioned functions. Signed-off-by: Claudio Imbrenda Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- Documentation/virtual/kvm/api.txt | 135 +++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.c | 202 +++++++++++++++++++++++++++++++++++++- include/uapi/linux/kvm.h | 33 +++++++ 3 files changed, 369 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4029943887a3..912b7df8215a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3255,6 +3255,141 @@ Otherwise, if the MCE is a corrected error, KVM will just store it in the corresponding bank (provided this bank is not holding a previously reported uncorrected error). +4.107 KVM_S390_GET_CMMA_BITS + +Capability: KVM_CAP_S390_CMMA_MIGRATION +Architectures: s390 +Type: vm ioctl +Parameters: struct kvm_s390_cmma_log (in, out) +Returns: 0 on success, a negative value on error + +This ioctl is used to get the values of the CMMA bits on the s390 +architecture. It is meant to be used in two scenarios: +- During live migration to save the CMMA values. Live migration needs + to be enabled via the KVM_REQ_START_MIGRATION VM property. +- To non-destructively peek at the CMMA values, with the flag + KVM_S390_CMMA_PEEK set. + +The ioctl takes parameters via the kvm_s390_cmma_log struct. The desired +values are written to a buffer whose location is indicated via the "values" +member in the kvm_s390_cmma_log struct. The values in the input struct are +also updated as needed. +Each CMMA value takes up one byte. + +struct kvm_s390_cmma_log { + __u64 start_gfn; + __u32 count; + __u32 flags; + union { + __u64 remaining; + __u64 mask; + }; + __u64 values; +}; + +start_gfn is the number of the first guest frame whose CMMA values are +to be retrieved, + +count is the length of the buffer in bytes, + +values points to the buffer where the result will be written to. + +If count is greater than KVM_S390_SKEYS_MAX, then it is considered to be +KVM_S390_SKEYS_MAX. KVM_S390_SKEYS_MAX is re-used for consistency with +other ioctls. + +The result is written in the buffer pointed to by the field values, and +the values of the input parameter are updated as follows. + +Depending on the flags, different actions are performed. The only +supported flag so far is KVM_S390_CMMA_PEEK. + +The default behaviour if KVM_S390_CMMA_PEEK is not set is: +start_gfn will indicate the first page frame whose CMMA bits were dirty. +It is not necessarily the same as the one passed as input, as clean pages +are skipped. + +count will indicate the number of bytes actually written in the buffer. +It can (and very often will) be smaller than the input value, since the +buffer is only filled until 16 bytes of clean values are found (which +are then not copied in the buffer). Since a CMMA migration block needs +the base address and the length, for a total of 16 bytes, we will send +back some clean data if there is some dirty data afterwards, as long as +the size of the clean data does not exceed the size of the header. This +allows to minimize the amount of data to be saved or transferred over +the network at the expense of more roundtrips to userspace. The next +invocation of the ioctl will skip over all the clean values, saving +potentially more than just the 16 bytes we found. + +If KVM_S390_CMMA_PEEK is set: +the existing storage attributes are read even when not in migration +mode, and no other action is performed; + +the output start_gfn will be equal to the input start_gfn, + +the output count will be equal to the input count, except if the end of +memory has been reached. + +In both cases: +the field "remaining" will indicate the total number of dirty CMMA values +still remaining, or 0 if KVM_S390_CMMA_PEEK is set and migration mode is +not enabled. + +mask is unused. + +values points to the userspace buffer where the result will be stored. + +This ioctl can fail with -ENOMEM if not enough memory can be allocated to +complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if +KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with +-EFAULT if the userspace address is invalid or if no page table is +present for the addresses (e.g. when using hugepages). + +4.108 KVM_S390_SET_CMMA_BITS + +Capability: KVM_CAP_S390_CMMA_MIGRATION +Architectures: s390 +Type: vm ioctl +Parameters: struct kvm_s390_cmma_log (in) +Returns: 0 on success, a negative value on error + +This ioctl is used to set the values of the CMMA bits on the s390 +architecture. It is meant to be used during live migration to restore +the CMMA values, but there are no restrictions on its use. +The ioctl takes parameters via the kvm_s390_cmma_values struct. +Each CMMA value takes up one byte. + +struct kvm_s390_cmma_log { + __u64 start_gfn; + __u32 count; + __u32 flags; + union { + __u64 remaining; + __u64 mask; + }; + __u64 values; +}; + +start_gfn indicates the starting guest frame number, + +count indicates how many values are to be considered in the buffer, + +flags is not used and must be 0. + +mask indicates which PGSTE bits are to be considered. + +remaining is not used. + +values points to the buffer in userspace where to store the values. + +This ioctl can fail with -ENOMEM if not enough memory can be allocated to +complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if +the count field is too large (e.g. more than KVM_S390_CMMA_SIZE_MAX) or +if the flags field was not 0, with -EFAULT if the userspace address is +invalid, if invalid pages are written to (e.g. after the end of memory) +or if no page table is present for the addresses (e.g. when using +hugepages). + 5. The kvm_run structure ------------------------ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c2b391499374..e100a7ff35c7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -30,8 +30,8 @@ #include #include #include - #include + #include #include #include @@ -387,6 +387,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_SKEYS: case KVM_CAP_S390_IRQ_STATE: case KVM_CAP_S390_USER_INSTR0: + case KVM_CAP_S390_CMMA_MIGRATION: case KVM_CAP_S390_AIS: r = 1; break; @@ -1419,6 +1420,182 @@ out: return r; } +/* + * Base address and length must be sent at the start of each block, therefore + * it's cheaper to send some clean data, as long as it's less than the size of + * two longs. + */ +#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) +/* for consistency */ +#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX) + +/* + * This function searches for the next page with dirty CMMA attributes, and + * saves the attributes in the buffer up to either the end of the buffer or + * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; + * no trailing clean bytes are saved. + * In case no dirty bits were found, or if CMMA was not enabled or used, the + * output buffer will indicate 0 as length. + */ +static int kvm_s390_get_cmma_bits(struct kvm *kvm, + struct kvm_s390_cmma_log *args) +{ + struct kvm_s390_migration_state *s = kvm->arch.migration_state; + unsigned long bufsize, hva, pgstev, i, next, cur; + int srcu_idx, peek, r = 0, rr; + u8 *res; + + cur = args->start_gfn; + i = next = pgstev = 0; + + if (unlikely(!kvm->arch.use_cmma)) + return -ENXIO; + /* Invalid/unsupported flags were specified */ + if (args->flags & ~KVM_S390_CMMA_PEEK) + return -EINVAL; + /* Migration mode query, and we are not doing a migration */ + peek = !!(args->flags & KVM_S390_CMMA_PEEK); + if (!peek && !s) + return -EINVAL; + /* CMMA is disabled or was not used, or the buffer has length zero */ + bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); + if (!bufsize || !kvm->mm->context.use_cmma) { + memset(args, 0, sizeof(*args)); + return 0; + } + + if (!peek) { + /* We are not peeking, and there are no dirty pages */ + if (!atomic64_read(&s->dirty_pages)) { + memset(args, 0, sizeof(*args)); + return 0; + } + cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, + args->start_gfn); + if (cur >= s->bitmap_size) /* nothing found, loop back */ + cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0); + if (cur >= s->bitmap_size) { /* again! (very unlikely) */ + memset(args, 0, sizeof(*args)); + return 0; + } + next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1); + } + + res = vmalloc(bufsize); + if (!res) + return -ENOMEM; + + args->start_gfn = cur; + + down_read(&kvm->mm->mmap_sem); + srcu_idx = srcu_read_lock(&kvm->srcu); + while (i < bufsize) { + hva = gfn_to_hva(kvm, cur); + if (kvm_is_error_hva(hva)) { + r = -EFAULT; + break; + } + /* decrement only if we actually flipped the bit to 0 */ + if (!peek && test_and_clear_bit(cur, s->pgste_bitmap)) + atomic64_dec(&s->dirty_pages); + r = get_pgste(kvm->mm, hva, &pgstev); + if (r < 0) + pgstev = 0; + /* save the value */ + res[i++] = (pgstev >> 24) & 0x3; + /* + * if the next bit is too far away, stop. + * if we reached the previous "next", find the next one + */ + if (!peek) { + if (next > cur + KVM_S390_MAX_BIT_DISTANCE) + break; + if (cur == next) + next = find_next_bit(s->pgste_bitmap, + s->bitmap_size, cur + 1); + /* reached the end of the bitmap or of the buffer, stop */ + if ((next >= s->bitmap_size) || + (next >= args->start_gfn + bufsize)) + break; + } + cur++; + } + srcu_read_unlock(&kvm->srcu, srcu_idx); + up_read(&kvm->mm->mmap_sem); + args->count = i; + args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; + + rr = copy_to_user((void __user *)args->values, res, args->count); + if (rr) + r = -EFAULT; + + vfree(res); + return r; +} + +/* + * This function sets the CMMA attributes for the given pages. If the input + * buffer has zero length, no action is taken, otherwise the attributes are + * set and the mm->context.use_cmma flag is set. + */ +static int kvm_s390_set_cmma_bits(struct kvm *kvm, + const struct kvm_s390_cmma_log *args) +{ + unsigned long hva, mask, pgstev, i; + uint8_t *bits; + int srcu_idx, r = 0; + + mask = args->mask; + + if (!kvm->arch.use_cmma) + return -ENXIO; + /* invalid/unsupported flags */ + if (args->flags != 0) + return -EINVAL; + /* Enforce sane limit on memory allocation */ + if (args->count > KVM_S390_CMMA_SIZE_MAX) + return -EINVAL; + /* Nothing to do */ + if (args->count == 0) + return 0; + + bits = vmalloc(sizeof(*bits) * args->count); + if (!bits) + return -ENOMEM; + + r = copy_from_user(bits, (void __user *)args->values, args->count); + if (r) { + r = -EFAULT; + goto out; + } + + down_read(&kvm->mm->mmap_sem); + srcu_idx = srcu_read_lock(&kvm->srcu); + for (i = 0; i < args->count; i++) { + hva = gfn_to_hva(kvm, args->start_gfn + i); + if (kvm_is_error_hva(hva)) { + r = -EFAULT; + break; + } + + pgstev = bits[i]; + pgstev = pgstev << 24; + mask &= _PGSTE_GPS_USAGE_MASK; + set_pgste_bits(kvm->mm, hva, mask, pgstev); + } + srcu_read_unlock(&kvm->srcu, srcu_idx); + up_read(&kvm->mm->mmap_sem); + + if (!kvm->mm->context.use_cmma) { + down_write(&kvm->mm->mmap_sem); + kvm->mm->context.use_cmma = 1; + up_write(&kvm->mm->mmap_sem); + } +out: + vfree(bits); + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1497,6 +1674,29 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_s390_set_skeys(kvm, &args); break; } + case KVM_S390_GET_CMMA_BITS: { + struct kvm_s390_cmma_log args; + + r = -EFAULT; + if (copy_from_user(&args, argp, sizeof(args))) + break; + r = kvm_s390_get_cmma_bits(kvm, &args); + if (!r) { + r = copy_to_user(argp, &args, sizeof(args)); + if (r) + r = -EFAULT; + } + break; + } + case KVM_S390_SET_CMMA_BITS: { + struct kvm_s390_cmma_log args; + + r = -EFAULT; + if (copy_from_user(&args, argp, sizeof(args))) + break; + r = kvm_s390_set_cmma_bits(kvm, &args); + break; + } default: r = -ENOTTY; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 577429a95ad8..2b8dc1ca18d4 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -155,6 +155,35 @@ struct kvm_s390_skeys { __u32 reserved[9]; }; +#define KVM_S390_CMMA_PEEK (1 << 0) + +/** + * kvm_s390_cmma_log - Used for CMMA migration. + * + * Used both for input and output. + * + * @start_gfn: Guest page number to start from. + * @count: Size of the result buffer. + * @flags: Control operation mode via KVM_S390_CMMA_* flags + * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty + * pages are still remaining. + * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set + * in the PGSTE. + * @values: Pointer to the values buffer. + * + * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. + */ +struct kvm_s390_cmma_log { + __u64 start_gfn; + __u32 count; + __u32 flags; + union { + __u64 remaining; + __u64 mask; + }; + __u64 values; +}; + struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_HCALL 2 @@ -895,6 +924,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SPAPR_TCE_VFIO 142 #define KVM_CAP_X86_GUEST_MWAIT 143 #define KVM_CAP_ARM_USER_IRQ 144 +#define KVM_CAP_S390_CMMA_MIGRATION 145 #ifdef KVM_CAP_IRQ_ROUTING @@ -1318,6 +1348,9 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) /* Available with KVM_CAP_X86_SMM */ #define KVM_SMI _IO(KVMIO, 0xb7) +/* Available with KVM_CAP_S390_CMMA_MIGRATION */ +#define KVM_S390_GET_CMMA_BITS _IOW(KVMIO, 0xb8, struct kvm_s390_cmma_log) +#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- cgit v1.2.3