diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-02-11 00:16:35 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-02-11 00:16:35 +0300 |
commit | 15303ba5d1cd9b28d03a980456c0978c0ea3b208 (patch) | |
tree | b9200d5b7474661cf36468038529a5269ee83238 /virt | |
parent | 9a61df9e5f7471fe5be3e02bd0bed726b2761a54 (diff) | |
parent | 1ab03c072feb579c9fd116de25be2b211e6bff6a (diff) | |
download | linux-15303ba5d1cd9b28d03a980456c0978c0ea3b208.tar.xz |
Merge tag 'kvm-4.16-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Radim Krčmář:
"ARM:
- icache invalidation optimizations, improving VM startup time
- support for forwarded level-triggered interrupts, improving
performance for timers and passthrough platform devices
- a small fix for power-management notifiers, and some cosmetic
changes
PPC:
- add MMIO emulation for vector loads and stores
- allow HPT guests to run on a radix host on POWER9 v2.2 CPUs without
requiring the complex thread synchronization of older CPU versions
- improve the handling of escalation interrupts with the XIVE
interrupt controller
- support decrement register migration
- various cleanups and bugfixes.
s390:
- Cornelia Huck passed maintainership to Janosch Frank
- exitless interrupts for emulated devices
- cleanup of cpuflag handling
- kvm_stat counter improvements
- VSIE improvements
- mm cleanup
x86:
- hypervisor part of SEV
- UMIP, RDPID, and MSR_SMI_COUNT emulation
- paravirtualized TLB shootdown using the new KVM_VCPU_PREEMPTED bit
- allow guests to see TOPOEXT, GFNI, VAES, VPCLMULQDQ, and more
AVX512 features
- show vcpu id in its anonymous inode name
- many fixes and cleanups
- per-VCPU MSR bitmaps (already merged through x86/pti branch)
- stable KVM clock when nesting on Hyper-V (merged through
x86/hyperv)"
* tag 'kvm-4.16-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (197 commits)
KVM: PPC: Book3S: Add MMIO emulation for VMX instructions
KVM: PPC: Book3S HV: Branch inside feature section
KVM: PPC: Book3S HV: Make HPT resizing work on POWER9
KVM: PPC: Book3S HV: Fix handling of secondary HPTEG in HPT resizing code
KVM: PPC: Book3S PR: Fix broken select due to misspelling
KVM: x86: don't forget vcpu_put() in kvm_arch_vcpu_ioctl_set_sregs()
KVM: PPC: Book3S PR: Fix svcpu copying with preemption enabled
KVM: PPC: Book3S HV: Drop locks before reading guest memory
kvm: x86: remove efer_reload entry in kvm_vcpu_stat
KVM: x86: AMD Processor Topology Information
x86/kvm/vmx: do not use vm-exit instruction length for fast MMIO when running nested
kvm: embed vcpu id to dentry of vcpu anon inode
kvm: Map PFN-type memory regions as writable (if possible)
x86/kvm: Make it compile on 32bit and with HYPYERVISOR_GUEST=n
KVM: arm/arm64: Fixup userspace irqchip static key optimization
KVM: arm/arm64: Fix userspace_irqchip_in_use counting
KVM: arm/arm64: Fix incorrect timer_is_pending logic
MAINTAINERS: update KVM/s390 maintainers
MAINTAINERS: add Halil as additional vfio-ccw maintainer
MAINTAINERS: add David as a reviewer for KVM/s390
...
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 138 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 153 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/vgic-v2-sr.c | 1 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 64 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 4 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 115 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v2.c | 29 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 29 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 41 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 8 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 62 |
12 files changed, 464 insertions, 183 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 70691c08e1ed..cca7e065a075 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -51,3 +51,6 @@ config KVM_COMPAT config HAVE_KVM_IRQ_BYPASS bool + +config HAVE_KVM_VCPU_ASYNC_IOCTL + bool diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index cc29a8148328..70268c0bec79 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -92,7 +92,6 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; struct arch_timer_context *vtimer; - u32 cnt_ctl; /* * We may see a timer interrupt after vcpu_put() has been called which @@ -104,15 +103,11 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) return IRQ_HANDLED; vtimer = vcpu_vtimer(vcpu); - if (!vtimer->irq.level) { - cnt_ctl = read_sysreg_el0(cntv_ctl); - cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT | - ARCH_TIMER_CTRL_IT_MASK; - if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT)) - kvm_timer_update_irq(vcpu, true, vtimer); - } + if (kvm_timer_should_fire(vtimer)) + kvm_timer_update_irq(vcpu, true, vtimer); - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + if (static_branch_unlikely(&userspace_irqchip_in_use) && + unlikely(!irqchip_in_kernel(vcpu->kvm))) kvm_vtimer_update_mask_user(vcpu); return IRQ_HANDLED; @@ -238,6 +233,16 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) { u64 cval, now; + if (timer_ctx->loaded) { + u32 cnt_ctl; + + /* Only the virtual timer can be loaded so far */ + cnt_ctl = read_sysreg_el0(cntv_ctl); + return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && + (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && + !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); + } + if (!kvm_timer_irq_can_fire(timer_ctx)) return false; @@ -252,15 +257,7 @@ bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - if (vtimer->irq.level || ptimer->irq.level) - return true; - - /* - * When this is called from withing the wait loop of kvm_vcpu_block(), - * the software view of the timer state is up to date (timer->loaded - * is false), and so we can simply check if the timer should fire now. - */ - if (!vtimer->loaded && kvm_timer_should_fire(vtimer)) + if (kvm_timer_should_fire(vtimer)) return true; return kvm_timer_should_fire(ptimer); @@ -278,9 +275,9 @@ void kvm_timer_update_run(struct kvm_vcpu *vcpu) /* Populate the device bitmap with the timer states */ regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | KVM_ARM_DEV_EL1_PTIMER); - if (vtimer->irq.level) + if (kvm_timer_should_fire(vtimer)) regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; - if (ptimer->irq.level) + if (kvm_timer_should_fire(ptimer)) regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; } @@ -293,7 +290,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level); - if (likely(irqchip_in_kernel(vcpu->kvm))) { + if (!static_branch_unlikely(&userspace_irqchip_in_use) || + likely(irqchip_in_kernel(vcpu->kvm))) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, timer_ctx->irq.level, @@ -331,12 +329,20 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + bool level; if (unlikely(!timer->enabled)) return; - if (kvm_timer_should_fire(vtimer) != vtimer->irq.level) - kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer); + /* + * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part + * of its lifecycle is offloaded to the hardware, and we therefore may + * not have lowered the irq.level value before having to signal a new + * interrupt, but have to signal an interrupt every time the level is + * asserted. + */ + level = kvm_timer_should_fire(vtimer); + kvm_timer_update_irq(vcpu, level, vtimer); if (kvm_timer_should_fire(ptimer) != ptimer->irq.level) kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer); @@ -344,6 +350,12 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) phys_timer_emulate(vcpu); } +static void __timer_snapshot_state(struct arch_timer_context *timer) +{ + timer->cnt_ctl = read_sysreg_el0(cntv_ctl); + timer->cnt_cval = read_sysreg_el0(cntv_cval); +} + static void vtimer_save_state(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -355,10 +367,8 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu) if (!vtimer->loaded) goto out; - if (timer->enabled) { - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - vtimer->cnt_cval = read_sysreg_el0(cntv_cval); - } + if (timer->enabled) + __timer_snapshot_state(vtimer); /* Disable the virtual timer */ write_sysreg_el0(0, cntv_ctl); @@ -456,8 +466,7 @@ static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu) bool phys_active; int ret; - phys_active = vtimer->irq.level || - kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); + phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq); ret = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, @@ -504,8 +513,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; - return vtimer->irq.level != vlevel || - ptimer->irq.level != plevel; + return kvm_timer_should_fire(vtimer) != vlevel || + kvm_timer_should_fire(ptimer) != plevel; } void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) @@ -537,54 +546,27 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) set_cntvoff(0); } -static void unmask_vtimer_irq(struct kvm_vcpu *vcpu) +/* + * With a userspace irqchip we have to check if the guest de-asserted the + * timer and if so, unmask the timer irq signal on the host interrupt + * controller to ensure that we see future timer signals. + */ +static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { - kvm_vtimer_update_mask_user(vcpu); - return; - } - - /* - * If the guest disabled the timer without acking the interrupt, then - * we must make sure the physical and virtual active states are in - * sync by deactivating the physical interrupt, because otherwise we - * wouldn't see the next timer interrupt in the host. - */ - if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) { - int ret; - ret = irq_set_irqchip_state(host_vtimer_irq, - IRQCHIP_STATE_ACTIVE, - false); - WARN_ON(ret); + __timer_snapshot_state(vtimer); + if (!kvm_timer_should_fire(vtimer)) { + kvm_timer_update_irq(vcpu, false, vtimer); + kvm_vtimer_update_mask_user(vcpu); + } } } -/** - * kvm_timer_sync_hwstate - sync timer state from cpu - * @vcpu: The vcpu pointer - * - * Check if any of the timers have expired while we were running in the guest, - * and inject an interrupt if that was the case. - */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { - struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); - - /* - * If we entered the guest with the vtimer output asserted we have to - * check if the guest has modified the timer so that we should lower - * the line at this point. - */ - if (vtimer->irq.level) { - vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl); - vtimer->cnt_cval = read_sysreg_el0(cntv_cval); - if (!kvm_timer_should_fire(vtimer)) { - kvm_timer_update_irq(vcpu, false, vtimer); - unmask_vtimer_irq(vcpu); - } - } + unmask_vtimer_irq_user(vcpu); } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) @@ -818,6 +800,19 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) return true; } +bool kvm_arch_timer_get_input_level(int vintid) +{ + struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu(); + struct arch_timer_context *timer; + + if (vintid == vcpu_vtimer(vcpu)->irq.irq) + timer = vcpu_vtimer(vcpu); + else + BUG(); /* We only map the vtimer so far */ + + return kvm_timer_should_fire(timer); +} + int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -839,7 +834,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return -EINVAL; } - ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq); + ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq, + kvm_arch_timer_get_input_level); if (ret) return ret; diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 7e3941f2ecde..86941f6181bb 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -71,17 +71,17 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { - BUG_ON(preemptible()); __this_cpu_write(kvm_arm_running_vcpu, vcpu); } +DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); + /** * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU. * Must be called from non-preemptible context */ struct kvm_vcpu *kvm_arm_get_running_vcpu(void) { - BUG_ON(preemptible()); return __this_cpu_read(kvm_arm_running_vcpu); } @@ -295,6 +295,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { + if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm))) + static_branch_dec(&userspace_irqchip_in_use); + kvm_mmu_free_memory_caches(vcpu); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); @@ -381,17 +384,24 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + vcpu_load(vcpu); + if (vcpu->arch.power_off) mp_state->mp_state = KVM_MP_STATE_STOPPED; else mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + vcpu_put(vcpu); return 0; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + int ret = 0; + + vcpu_load(vcpu); + switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: vcpu->arch.power_off = false; @@ -400,10 +410,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu_power_off(vcpu); break; default: - return -EINVAL; + ret = -EINVAL; } - return 0; + vcpu_put(vcpu); + return ret; } /** @@ -524,14 +535,22 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) vcpu->arch.has_run_once = true; - /* - * Map the VGIC hardware resources before running a vcpu the first - * time on this VM. - */ - if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) { - ret = kvm_vgic_map_resources(kvm); - if (ret) - return ret; + if (likely(irqchip_in_kernel(kvm))) { + /* + * Map the VGIC hardware resources before running a vcpu the + * first time on this VM. + */ + if (unlikely(!vgic_ready(kvm))) { + ret = kvm_vgic_map_resources(kvm); + if (ret) + return ret; + } + } else { + /* + * Tell the rest of the code that there are userspace irqchip + * VMs in the wild. + */ + static_branch_inc(&userspace_irqchip_in_use); } ret = kvm_timer_enable(vcpu); @@ -619,21 +638,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (unlikely(!kvm_vcpu_initialized(vcpu))) return -ENOEXEC; + vcpu_load(vcpu); + ret = kvm_vcpu_first_run_init(vcpu); if (ret) - return ret; + goto out; if (run->exit_reason == KVM_EXIT_MMIO) { ret = kvm_handle_mmio_return(vcpu, vcpu->run); if (ret) - return ret; - if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) - return 0; + goto out; + if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) { + ret = 0; + goto out; + } } - if (run->immediate_exit) - return -EINTR; + if (run->immediate_exit) { + ret = -EINTR; + goto out; + } kvm_sigset_activate(vcpu); @@ -666,19 +691,30 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vgic_flush_hwstate(vcpu); /* - * If we have a singal pending, or need to notify a userspace - * irqchip about timer or PMU level changes, then we exit (and - * update the timer level state in kvm_timer_update_run - * below). + * Exit if we have a signal pending so that we can deliver the + * signal to user space. */ - if (signal_pending(current) || - kvm_timer_should_notify_user(vcpu) || - kvm_pmu_should_notify_user(vcpu)) { + if (signal_pending(current)) { ret = -EINTR; run->exit_reason = KVM_EXIT_INTR; } /* + * If we're using a userspace irqchip, then check if we need + * to tell a userspace irqchip about timer or PMU level + * changes and if so, exit to userspace (the actual level + * state gets updated in kvm_timer_update_run and + * kvm_pmu_update_run below). + */ + if (static_branch_unlikely(&userspace_irqchip_in_use)) { + if (kvm_timer_should_notify_user(vcpu) || + kvm_pmu_should_notify_user(vcpu)) { + ret = -EINTR; + run->exit_reason = KVM_EXIT_INTR; + } + } + + /* * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. * See the comment in kvm_vcpu_exiting_guest_mode() and @@ -690,7 +726,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; kvm_pmu_sync_hwstate(vcpu); - kvm_timer_sync_hwstate(vcpu); + if (static_branch_unlikely(&userspace_irqchip_in_use)) + kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); local_irq_enable(); preempt_enable(); @@ -738,7 +775,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * we don't want vtimer interrupts to race with syncing the * timer virtual interrupt state. */ - kvm_timer_sync_hwstate(vcpu); + if (static_branch_unlikely(&userspace_irqchip_in_use)) + kvm_timer_sync_hwstate(vcpu); /* * We may have taken a host interrupt in HYP mode (ie @@ -779,6 +817,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); +out: + vcpu_put(vcpu); return ret; } @@ -994,66 +1034,88 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; struct kvm_device_attr attr; + long r; + + vcpu_load(vcpu); switch (ioctl) { case KVM_ARM_VCPU_INIT: { struct kvm_vcpu_init init; + r = -EFAULT; if (copy_from_user(&init, argp, sizeof(init))) - return -EFAULT; + break; - return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); + r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); + break; } case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { struct kvm_one_reg reg; + r = -ENOEXEC; if (unlikely(!kvm_vcpu_initialized(vcpu))) - return -ENOEXEC; + break; + r = -EFAULT; if (copy_from_user(®, argp, sizeof(reg))) - return -EFAULT; + break; + if (ioctl == KVM_SET_ONE_REG) - return kvm_arm_set_reg(vcpu, ®); + r = kvm_arm_set_reg(vcpu, ®); else - return kvm_arm_get_reg(vcpu, ®); + r = kvm_arm_get_reg(vcpu, ®); + break; } case KVM_GET_REG_LIST: { struct kvm_reg_list __user *user_list = argp; struct kvm_reg_list reg_list; unsigned n; + r = -ENOEXEC; if (unlikely(!kvm_vcpu_initialized(vcpu))) - return -ENOEXEC; + break; + r = -EFAULT; if (copy_from_user(®_list, user_list, sizeof(reg_list))) - return -EFAULT; + break; n = reg_list.n; reg_list.n = kvm_arm_num_regs(vcpu); if (copy_to_user(user_list, ®_list, sizeof(reg_list))) - return -EFAULT; + break; + r = -E2BIG; if (n < reg_list.n) - return -E2BIG; - return kvm_arm_copy_reg_indices(vcpu, user_list->reg); + break; + r = kvm_arm_copy_reg_indices(vcpu, user_list->reg); + break; } case KVM_SET_DEVICE_ATTR: { + r = -EFAULT; if (copy_from_user(&attr, argp, sizeof(attr))) - return -EFAULT; - return kvm_arm_vcpu_set_attr(vcpu, &attr); + break; + r = kvm_arm_vcpu_set_attr(vcpu, &attr); + break; } case KVM_GET_DEVICE_ATTR: { + r = -EFAULT; if (copy_from_user(&attr, argp, sizeof(attr))) - return -EFAULT; - return kvm_arm_vcpu_get_attr(vcpu, &attr); + break; + r = kvm_arm_vcpu_get_attr(vcpu, &attr); + break; } case KVM_HAS_DEVICE_ATTR: { + r = -EFAULT; if (copy_from_user(&attr, argp, sizeof(attr))) - return -EFAULT; - return kvm_arm_vcpu_has_attr(vcpu, &attr); + break; + r = kvm_arm_vcpu_has_attr(vcpu, &attr); + break; } default: - return -EINVAL; + r = -EINVAL; } + + vcpu_put(vcpu); + return r; } /** @@ -1246,6 +1308,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, cpu_hyp_reset(); return NOTIFY_OK; + case CPU_PM_ENTER_FAILED: case CPU_PM_EXIT: if (__this_cpu_read(kvm_arm_hardware_enabled)) /* The hardware was enabled before suspend. */ diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index d7fd46fe9efb..4fe6e797e8b3 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -21,6 +21,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) { diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index f8eaf86b740a..ec62d1cccab7 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -924,6 +924,25 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache return 0; } +static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr) +{ + pmd_t *pmdp; + pte_t *ptep; + + pmdp = stage2_get_pmd(kvm, NULL, addr); + if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp)) + return false; + + if (pmd_thp_or_huge(*pmdp)) + return kvm_s2pmd_exec(pmdp); + + ptep = pte_offset_kernel(pmdp, addr); + if (!ptep || pte_none(*ptep) || !pte_present(*ptep)) + return false; + + return kvm_s2pte_exec(ptep); +} + static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, phys_addr_t addr, const pte_t *new_pte, unsigned long flags) @@ -1255,10 +1274,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, - unsigned long size) +static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) { - __coherent_cache_guest_page(vcpu, pfn, size); + __clean_dcache_guest_page(pfn, size); +} + +static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) +{ + __invalidate_icache_guest_page(pfn, size); } static void kvm_send_hwpoison_signal(unsigned long address, @@ -1284,7 +1307,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, unsigned long fault_status) { int ret; - bool write_fault, writable, hugetlb = false, force_pte = false; + bool write_fault, exec_fault, writable, hugetlb = false, force_pte = false; unsigned long mmu_seq; gfn_t gfn = fault_ipa >> PAGE_SHIFT; struct kvm *kvm = vcpu->kvm; @@ -1296,7 +1319,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, unsigned long flags = 0; write_fault = kvm_is_write_fault(vcpu); - if (fault_status == FSC_PERM && !write_fault) { + exec_fault = kvm_vcpu_trap_is_iabt(vcpu); + VM_BUG_ON(write_fault && exec_fault); + + if (fault_status == FSC_PERM && !write_fault && !exec_fault) { kvm_err("Unexpected L2 read permission error\n"); return -EFAULT; } @@ -1389,7 +1415,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, new_pmd = kvm_s2pmd_mkwrite(new_pmd); kvm_set_pfn_dirty(pfn); } - coherent_cache_guest_page(vcpu, pfn, PMD_SIZE); + + if (fault_status != FSC_PERM) + clean_dcache_guest_page(pfn, PMD_SIZE); + + if (exec_fault) { + new_pmd = kvm_s2pmd_mkexec(new_pmd); + invalidate_icache_guest_page(pfn, PMD_SIZE); + } else if (fault_status == FSC_PERM) { + /* Preserve execute if XN was already cleared */ + if (stage2_is_exec(kvm, fault_ipa)) + new_pmd = kvm_s2pmd_mkexec(new_pmd); + } + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); } else { pte_t new_pte = pfn_pte(pfn, mem_type); @@ -1399,7 +1437,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, kvm_set_pfn_dirty(pfn); mark_page_dirty(kvm, gfn); } - coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE); + + if (fault_status != FSC_PERM) + clean_dcache_guest_page(pfn, PAGE_SIZE); + + if (exec_fault) { + new_pte = kvm_s2pte_mkexec(new_pte); + invalidate_icache_guest_page(pfn, PAGE_SIZE); + } else if (fault_status == FSC_PERM) { + /* Preserve execute if XN was already cleared */ + if (stage2_is_exec(kvm, fault_ipa)) + new_pte = kvm_s2pte_mkexec(new_pte); + } + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 8e633bd9cc1e..465095355666 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1034,10 +1034,8 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, device = vgic_its_alloc_device(its, device_id, itt_addr, num_eventid_bits); - if (IS_ERR(device)) - return PTR_ERR(device); - return 0; + return PTR_ERR_OR_ZERO(device); } /* diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index deb51ee16a3d..83d82bd7dc4e 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -16,6 +16,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <kvm/iodev.h> +#include <kvm/arm_arch_timer.h> #include <kvm/arm_vgic.h> #include "vgic.h" @@ -122,10 +123,43 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, return value; } +/* + * This function will return the VCPU that performed the MMIO access and + * trapped from within the VM, and will return NULL if this is a userspace + * access. + * + * We can disable preemption locally around accessing the per-CPU variable, + * and use the resolved vcpu pointer after enabling preemption again, because + * even if the current thread is migrated to another CPU, reading the per-CPU + * value later will give us the same value as we update the per-CPU variable + * in the preempt notifier handlers. + */ +static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void) +{ + struct kvm_vcpu *vcpu; + + preempt_disable(); + vcpu = kvm_arm_get_running_vcpu(); + preempt_enable(); + return vcpu; +} + +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool is_uaccess) +{ + if (is_uaccess) + return; + + irq->pending_latch = true; + vgic_irq_set_phys_active(irq, true); +} + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { + bool is_uaccess = !vgic_get_mmio_requester_vcpu(); u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; unsigned long flags; @@ -134,17 +168,45 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); spin_lock_irqsave(&irq->irq_lock, flags); - irq->pending_latch = true; - + if (irq->hw) + vgic_hw_irq_spending(vcpu, irq, is_uaccess); + else + irq->pending_latch = true; vgic_queue_irq_unlock(vcpu->kvm, irq, flags); vgic_put_irq(vcpu->kvm, irq); } } +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool is_uaccess) +{ + if (is_uaccess) + return; + + irq->pending_latch = false; + + /* + * We don't want the guest to effectively mask the physical + * interrupt by doing a write to SPENDR followed by a write to + * CPENDR for HW interrupts, so we clear the active state on + * the physical side if the virtual interrupt is not active. + * This may lead to taking an additional interrupt on the + * host, but that should not be a problem as the worst that + * can happen is an additional vgic injection. We also clear + * the pending state to maintain proper semantics for edge HW + * interrupts. + */ + vgic_irq_set_phys_pending(irq, false); + if (!irq->active) + vgic_irq_set_phys_active(irq, false); +} + void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { + bool is_uaccess = !vgic_get_mmio_requester_vcpu(); u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; unsigned long flags; @@ -154,7 +216,10 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, spin_lock_irqsave(&irq->irq_lock, flags); - irq->pending_latch = false; + if (irq->hw) + vgic_hw_irq_cpending(vcpu, irq, is_uaccess); + else + irq->pending_latch = false; spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); @@ -181,27 +246,24 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, return value; } +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool active, bool is_uaccess) +{ + if (is_uaccess) + return; + + irq->active = active; + vgic_irq_set_phys_active(irq, active); +} + static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, - bool new_active_state) + bool active) { - struct kvm_vcpu *requester_vcpu; unsigned long flags; - spin_lock_irqsave(&irq->irq_lock, flags); + struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu(); - /* - * The vcpu parameter here can mean multiple things depending on how - * this function is called; when handling a trap from the kernel it - * depends on the GIC version, and these functions are also called as - * part of save/restore from userspace. - * - * Therefore, we have to figure out the requester in a reliable way. - * - * When accessing VGIC state from user space, the requester_vcpu is - * NULL, which is fine, because we guarantee that no VCPUs are running - * when accessing VGIC state from user space so irq->vcpu->cpu is - * always -1. - */ - requester_vcpu = kvm_arm_get_running_vcpu(); + spin_lock_irqsave(&irq->irq_lock, flags); /* * If this virtual IRQ was written into a list register, we @@ -213,14 +275,23 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, * vgic_change_active_prepare) and still has to sync back this IRQ, * so we release and re-acquire the spin_lock to let the other thread * sync back the IRQ. + * + * When accessing VGIC state from user space, requester_vcpu is + * NULL, which is fine, because we guarantee that no VCPUs are running + * when accessing VGIC state from user space so irq->vcpu->cpu is + * always -1. */ while (irq->vcpu && /* IRQ may have state in an LR somewhere */ irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */ irq->vcpu->cpu != -1) /* VCPU thread is running */ cond_resched_lock(&irq->irq_lock); - irq->active = new_active_state; - if (new_active_state) + if (irq->hw) + vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu); + else + irq->active = active; + + if (irq->active) vgic_queue_irq_unlock(vcpu->kvm, irq, flags); else spin_unlock_irqrestore(&irq->irq_lock, flags); diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 80897102da26..c32d7b93ffd1 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -105,6 +105,26 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) irq->pending_latch = false; } + /* + * Level-triggered mapped IRQs are special because we only + * observe rising edges as input to the VGIC. + * + * If the guest never acked the interrupt we have to sample + * the physical line and set the line level, because the + * device state could have changed or we simply need to + * process the still pending interrupt later. + * + * If this causes us to lower the level, we have to also clear + * the physical active state, since we will otherwise never be + * told when the interrupt becomes asserted again. + */ + if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) { + irq->line_level = vgic_get_phys_line_level(irq); + + if (!irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -162,6 +182,15 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) val |= GICH_LR_EOI; } + /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v2_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) + irq->line_level = false; + /* The GICv2 LR only holds five bits of priority. */ val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index f47e8481fa45..6b329414e57a 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -96,6 +96,26 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) irq->pending_latch = false; } + /* + * Level-triggered mapped IRQs are special because we only + * observe rising edges as input to the VGIC. + * + * If the guest never acked the interrupt we have to sample + * the physical line and set the line level, because the + * device state could have changed or we simply need to + * process the still pending interrupt later. + * + * If this causes us to lower the level, we have to also clear + * the physical active state, since we will otherwise never be + * told when the interrupt becomes asserted again. + */ + if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) { + irq->line_level = vgic_get_phys_line_level(irq); + + if (!irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } @@ -146,6 +166,15 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v3_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) + irq->line_level = false; + + /* * We currently only support Group1 interrupts, which is a * known defect. This needs to be addressed at some point. */ diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index ecb8e25f5fe5..c7c5ef190afa 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -144,6 +144,38 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) kfree(irq); } +void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) +{ + WARN_ON(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + pending)); +} + +bool vgic_get_phys_line_level(struct vgic_irq *irq) +{ + bool line_level; + + BUG_ON(!irq->hw); + + if (irq->get_input_level) + return irq->get_input_level(irq->intid); + + WARN_ON(irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &line_level)); + return line_level; +} + +/* Set/Clear the physical active state */ +void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) +{ + + BUG_ON(!irq->hw); + WARN_ON(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_ACTIVE, + active)); +} + /** * kvm_vgic_target_oracle - compute the target vcpu for an irq * @@ -413,7 +445,8 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, /* @irq->irq_lock must be held */ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, - unsigned int host_irq) + unsigned int host_irq, + bool (*get_input_level)(int vindid)) { struct irq_desc *desc; struct irq_data *data; @@ -433,6 +466,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->hw = true; irq->host_irq = host_irq; irq->hwintid = data->hwirq; + irq->get_input_level = get_input_level; return 0; } @@ -441,10 +475,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) { irq->hw = false; irq->hwintid = 0; + irq->get_input_level = NULL; } int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid) + u32 vintid, bool (*get_input_level)(int vindid)) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); unsigned long flags; @@ -453,7 +488,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, BUG_ON(!irq); spin_lock_irqsave(&irq->irq_lock, flags); - ret = kvm_vgic_map_irq(vcpu, irq, host_irq); + ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level); spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index efbcf8f96f9c..12c37b89f7a3 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -104,6 +104,11 @@ static inline bool irq_is_pending(struct vgic_irq *irq) return irq->pending_latch || irq->line_level; } +static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq) +{ + return irq->config == VGIC_CONFIG_LEVEL && irq->hw; +} + /* * This struct provides an intermediate representation of the fields contained * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC @@ -140,6 +145,9 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); +bool vgic_get_phys_line_level(struct vgic_irq *irq); +void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); +void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, unsigned long flags); void vgic_kick_vcpus(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 001085b611ad..4501e658e8d6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -151,17 +151,12 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn) /* * Switches to specified vcpu, until a matching vcpu_put() */ -int vcpu_load(struct kvm_vcpu *vcpu) +void vcpu_load(struct kvm_vcpu *vcpu) { - int cpu; - - if (mutex_lock_killable(&vcpu->mutex)) - return -EINTR; - cpu = get_cpu(); + int cpu = get_cpu(); preempt_notifier_register(&vcpu->preempt_notifier); kvm_arch_vcpu_load(vcpu, cpu); put_cpu(); - return 0; } EXPORT_SYMBOL_GPL(vcpu_load); @@ -171,7 +166,6 @@ void vcpu_put(struct kvm_vcpu *vcpu) kvm_arch_vcpu_put(vcpu); preempt_notifier_unregister(&vcpu->preempt_notifier); preempt_enable(); - mutex_unlock(&vcpu->mutex); } EXPORT_SYMBOL_GPL(vcpu_put); @@ -1416,7 +1410,8 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) static int hva_to_pfn_remapped(struct vm_area_struct *vma, unsigned long addr, bool *async, - bool write_fault, kvm_pfn_t *p_pfn) + bool write_fault, bool *writable, + kvm_pfn_t *p_pfn) { unsigned long pfn; int r; @@ -1442,6 +1437,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, } + if (writable) + *writable = true; /* * Get a reference here because callers of *hva_to_pfn* and @@ -1507,7 +1504,7 @@ retry: if (vma == NULL) pfn = KVM_PFN_ERR_FAULT; else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { - r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn); + r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn); if (r == -EAGAIN) goto retry; if (r < 0) @@ -2400,7 +2397,10 @@ static struct file_operations kvm_vcpu_fops = { */ static int create_vcpu_fd(struct kvm_vcpu *vcpu) { - return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); + char name[8 + 1 + ITOA_MAX_LEN + 1]; + + snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id); + return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC); } static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) @@ -2532,19 +2532,16 @@ static long kvm_vcpu_ioctl(struct file *filp, if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) return -EINVAL; -#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) /* - * Special cases: vcpu ioctls that are asynchronous to vcpu execution, - * so vcpu_load() would break it. + * Some architectures have vcpu ioctls that are asynchronous to vcpu + * execution; mutex_lock() would break them. */ - if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT) - return kvm_arch_vcpu_ioctl(filp, ioctl, arg); -#endif - - - r = vcpu_load(vcpu); - if (r) + r = kvm_arch_vcpu_async_ioctl(filp, ioctl, arg); + if (r != -ENOIOCTLCMD) return r; + + if (mutex_lock_killable(&vcpu->mutex)) + return -EINTR; switch (ioctl) { case KVM_RUN: { struct pid *oldpid; @@ -2716,7 +2713,7 @@ out_free1: r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); } out: - vcpu_put(vcpu); + mutex_unlock(&vcpu->mutex); kfree(fpu); kfree(kvm_sregs); return r; @@ -3168,21 +3165,18 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return PTR_ERR(kvm); #ifdef CONFIG_KVM_MMIO r = kvm_coalesced_mmio_init(kvm); - if (r < 0) { - kvm_put_kvm(kvm); - return r; - } + if (r < 0) + goto put_kvm; #endif r = get_unused_fd_flags(O_CLOEXEC); - if (r < 0) { - kvm_put_kvm(kvm); - return r; - } + if (r < 0) + goto put_kvm; + file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (IS_ERR(file)) { put_unused_fd(r); - kvm_put_kvm(kvm); - return PTR_ERR(file); + r = PTR_ERR(file); + goto put_kvm; } /* @@ -3200,6 +3194,10 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) fd_install(r, file); return r; + +put_kvm: + kvm_put_kvm(kvm); + return r; } static long kvm_dev_ioctl(struct file *filp, |