diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-23 21:17:56 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-23 21:17:56 +0300 |
commit | f9a705ad1c077ec2872c641f0db9c0d5b4a097bb (patch) | |
tree | 7f5d18d74f700be5bcf72ec5f4955f016eac9ab9 /arch/powerpc/kvm | |
parent | 9313f8026328d0309d093f6774be4b8f5340c0e5 (diff) | |
parent | 29cf0f5007a215b51feb0ae25ca5353480d53ead (diff) | |
download | linux-f9a705ad1c077ec2872c641f0db9c0d5b4a097bb.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"For x86, there is a new alternative and (in the future) more scalable
implementation of extended page tables that does not need a reverse
map from guest physical addresses to host physical addresses.
For now it is disabled by default because it is still lacking a few of
the existing MMU's bells and whistles. However it is a very solid
piece of work and it is already available for people to hammer on it.
Other updates:
ARM:
- New page table code for both hypervisor and guest stage-2
- Introduction of a new EL2-private host context
- Allow EL2 to have its own private per-CPU variables
- Support of PMU event filtering
- Complete rework of the Spectre mitigation
PPC:
- Fix for running nested guests with in-kernel IRQ chip
- Fix race condition causing occasional host hard lockup
- Minor cleanups and bugfixes
x86:
- allow trapping unknown MSRs to userspace
- allow userspace to force #GP on specific MSRs
- INVPCID support on AMD
- nested AMD cleanup, on demand allocation of nested SVM state
- hide PV MSRs and hypercalls for features not enabled in CPUID
- new test for MSR_IA32_TSC writes from host and guest
- cleanups: MMU, CPUID, shared MSRs
- LAPIC latency optimizations ad bugfixes"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (232 commits)
kvm: x86/mmu: NX largepage recovery for TDP MMU
kvm: x86/mmu: Don't clear write flooding count for direct roots
kvm: x86/mmu: Support MMIO in the TDP MMU
kvm: x86/mmu: Support write protection for nesting in tdp MMU
kvm: x86/mmu: Support disabling dirty logging for the tdp MMU
kvm: x86/mmu: Support dirty logging for the TDP MMU
kvm: x86/mmu: Support changed pte notifier in tdp MMU
kvm: x86/mmu: Add access tracking for tdp_mmu
kvm: x86/mmu: Support invalidate range MMU notifier for TDP MMU
kvm: x86/mmu: Allocate struct kvm_mmu_pages for all pages in TDP MMU
kvm: x86/mmu: Add TDP MMU PF handler
kvm: x86/mmu: Remove disallowed_hugepage_adjust shadow_walk_iterator arg
kvm: x86/mmu: Support zapping SPTEs in the TDP MMU
KVM: Cache as_id in kvm_memory_slot
kvm: x86/mmu: Add functions to handle changed TDP SPTEs
kvm: x86/mmu: Allocate and free TDP MMU roots
kvm: x86/mmu: Init / Uninit the TDP MMU
kvm: x86/mmu: Introduce tdp_iter
KVM: mmu: extract spte.h and spte.c
KVM: mmu: Separate updating a PTE from kvm_set_pte_rmapp
...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/book3s.c | 8 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio_hv.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 24 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_interrupts.S | 9 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_nested.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_xics.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_pr.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xics.c | 86 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive_native.c | 12 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke.c | 6 |
12 files changed, 110 insertions, 49 deletions
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 49db50d1db04..44bf567b6589 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -558,12 +558,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - return -ENOTSUPP; + return -EOPNOTSUPP; } int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - return -ENOTSUPP; + return -EOPNOTSUPP; } int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, @@ -879,13 +879,15 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) #ifdef CONFIG_KVM_XICS /* - * Free the XIVE devices which are not directly freed by the + * Free the XIVE and XICS devices which are not directly freed by the * device 'release' method */ kfree(kvm->arch.xive_devices.native); kvm->arch.xive_devices.native = NULL; kfree(kvm->arch.xive_devices.xics_on_xive); kvm->arch.xive_devices.xics_on_xive = NULL; + kfree(kvm->arch.xics_device); + kvm->arch.xics_device = NULL; #endif /* CONFIG_KVM_XICS */ } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 22a677b18695..bb35490400e9 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -347,7 +347,7 @@ static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, return __radix_pte_update(ptep, clr, set); } -void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr, +static void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr, pte_t *ptep, pte_t pte) { radix__set_pte_at(kvm->mm, addr, ptep, pte, 0); diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 1a529df0ab44..8da93fdfa59e 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -283,7 +283,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *siter; struct mm_struct *mm = kvm->mm; unsigned long npages, size = args->size; - int ret = -ENOMEM; + int ret; if (!args->size || args->page_shift < 12 || args->page_shift > 34 || (args->offset + args->size > (ULLONG_MAX >> args->page_shift))) @@ -489,7 +489,7 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm, return ret; } -long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, +static long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry, unsigned long ua, enum dma_data_direction dir) { diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index ac6ac192b8bb..470e7c518a10 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -237,7 +237,7 @@ static long iommu_tce_xchg_no_kill_rm(struct mm_struct *mm, return ret; } -extern void iommu_tce_kill_rm(struct iommu_table *tbl, +static void iommu_tce_kill_rm(struct iommu_table *tbl, unsigned long entry, unsigned long pages) { if (tbl->it_ops->tce_kill) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3bd3118c7633..e3b1839fc251 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -111,7 +111,7 @@ module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)"); #ifdef CONFIG_KVM_XICS -static struct kernel_param_ops module_param_ops = { +static const struct kernel_param_ops module_param_ops = { .set = param_set_int, .get = param_get_int, }; @@ -3442,9 +3442,19 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long host_psscr = mfspr(SPRN_PSSCR); unsigned long host_pidr = mfspr(SPRN_PID); + /* + * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0, + * so set HDICE before writing HDEC. + */ + mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr | LPCR_HDICE); + isync(); + hdec = time_limit - mftb(); - if (hdec < 0) + if (hdec < 0) { + mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr); + isync(); return BOOK3S_INTERRUPT_HV_DECREMENTER; + } mtspr(SPRN_HDEC, hdec); if (vc->tb_offset) { @@ -3565,7 +3575,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, * Virtual-mode guest entry for POWER9 and later when the host and * guest are both using the radix MMU. The LPIDR has already been set. */ -int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, +static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr) { struct kvmppc_vcore *vc = vcpu->arch.vcore; @@ -3579,7 +3589,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, dec = mfspr(SPRN_DEC); tb = mftb(); - if (dec < 512) + if (dec < 0) return BOOK3S_INTERRUPT_HV_DECREMENTER; local_paca->kvm_hstate.dec_expires = dec + tb; if (local_paca->kvm_hstate.dec_expires < time_limit) @@ -5257,6 +5267,12 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, case KVM_PPC_ALLOCATE_HTAB: { u32 htab_order; + /* If we're a nested hypervisor, we currently only support radix */ + if (kvmhv_on_pseries()) { + r = -EOPNOTSUPP; + break; + } + r = -EFAULT; if (get_user(htab_order, (u32 __user *)argp)) break; diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 59822cba454d..327417d79eac 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -58,13 +58,16 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* * Put whatever is in the decrementer into the * hypervisor decrementer. + * Because of a hardware deviation in P8 and P9, + * we need to set LPCR[HDICE] before writing HDEC. */ -BEGIN_FTR_SECTION ld r5, HSTATE_KVM_VCORE(r13) ld r6, VCORE_KVM(r5) ld r9, KVM_HOST_LPCR(r6) - andis. r9, r9, LPCR_LD@h -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + ori r8, r9, LPCR_HDICE + mtspr SPRN_LPCR, r8 + isync + andis. r0, r9, LPCR_LD@h mfspr r8,SPRN_DEC mftb r7 BEGIN_FTR_SECTION diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 6822d23a2da4..33b58549a9aa 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -569,7 +569,7 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) kvmhv_set_nested_ptbl(gp); } -struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) +static struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) { struct kvm_nested_guest *gp; long shadow_lpid; diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 4d7e5610731a..c2c9c733f359 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -764,7 +764,7 @@ int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) return ics_rm_eoi(vcpu, irq); } -unsigned long eoi_rc; +static unsigned long eoi_rc; static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) { diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 88fac22fbf09..b1fefa63e125 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -569,7 +569,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) #endif } -void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) +static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) { u32 host_pvr; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 381bf8dea193..5fee5a11550d 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1334,47 +1334,97 @@ static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return -ENXIO; } -static void kvmppc_xics_free(struct kvm_device *dev) +/* + * Called when device fd is closed. kvm->lock is held. + */ +static void kvmppc_xics_release(struct kvm_device *dev) { struct kvmppc_xics *xics = dev->private; int i; struct kvm *kvm = xics->kvm; + struct kvm_vcpu *vcpu; + + pr_devel("Releasing xics device\n"); + + /* + * Since this is the device release function, we know that + * userspace does not have any open fd referring to the + * device. Therefore there can not be any of the device + * attribute set/get functions being executed concurrently, + * and similarly, the connect_vcpu and set/clr_mapped + * functions also cannot be being executed. + */ debugfs_remove(xics->dentry); + /* + * We should clean up the vCPU interrupt presenters first. + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + /* + * Take vcpu->mutex to ensure that no one_reg get/set ioctl + * (i.e. kvmppc_xics_[gs]et_icp) can be done concurrently. + * Holding the vcpu->mutex also means that execution is + * excluded for the vcpu until the ICP was freed. When the vcpu + * can execute again, vcpu->arch.icp and vcpu->arch.irq_type + * have been cleared and the vcpu will not be going into the + * XICS code anymore. + */ + mutex_lock(&vcpu->mutex); + kvmppc_xics_free_icp(vcpu); + mutex_unlock(&vcpu->mutex); + } + if (kvm) kvm->arch.xics = NULL; - for (i = 0; i <= xics->max_icsid; i++) + for (i = 0; i <= xics->max_icsid; i++) { kfree(xics->ics[i]); - kfree(xics); + xics->ics[i] = NULL; + } + /* + * A reference of the kvmppc_xics pointer is now kept under + * the xics_device pointer of the machine for reuse. It is + * freed when the VM is destroyed for now until we fix all the + * execution paths. + */ kfree(dev); } +static struct kvmppc_xics *kvmppc_xics_get_device(struct kvm *kvm) +{ + struct kvmppc_xics **kvm_xics_device = &kvm->arch.xics_device; + struct kvmppc_xics *xics = *kvm_xics_device; + + if (!xics) { + xics = kzalloc(sizeof(*xics), GFP_KERNEL); + *kvm_xics_device = xics; + } else { + memset(xics, 0, sizeof(*xics)); + } + + return xics; +} + static int kvmppc_xics_create(struct kvm_device *dev, u32 type) { struct kvmppc_xics *xics; struct kvm *kvm = dev->kvm; - int ret = 0; - xics = kzalloc(sizeof(*xics), GFP_KERNEL); + pr_devel("Creating xics for partition\n"); + + /* Already there ? */ + if (kvm->arch.xics) + return -EEXIST; + + xics = kvmppc_xics_get_device(kvm); if (!xics) return -ENOMEM; dev->private = xics; xics->dev = dev; xics->kvm = kvm; - - /* Already there ? */ - if (kvm->arch.xics) - ret = -EEXIST; - else - kvm->arch.xics = xics; - - if (ret) { - kfree(xics); - return ret; - } + kvm->arch.xics = xics; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE if (cpu_has_feature(CPU_FTR_ARCH_206) && @@ -1399,7 +1449,7 @@ struct kvm_device_ops kvm_xics_ops = { .name = "kvm-xics", .create = kvmppc_xics_create, .init = kvmppc_xics_init, - .destroy = kvmppc_xics_free, + .release = kvmppc_xics_release, .set_attr = xics_set_attr, .get_attr = xics_get_attr, .has_attr = xics_has_attr, @@ -1415,7 +1465,7 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, return -EPERM; if (xics->kvm != vcpu->kvm) return -EPERM; - if (vcpu->arch.irq_type) + if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) return -EBUSY; r = kvmppc_xics_create_icp(vcpu, xcpu); diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index bdea91df1497..d0c2db0e07fa 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -1227,17 +1227,7 @@ static int xive_native_debug_show(struct seq_file *m, void *private) return 0; } -static int xive_native_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, xive_native_debug_show, inode->i_private); -} - -static const struct file_operations xive_native_debug_fops = { - .open = xive_native_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(xive_native_debug); static void xive_native_debugfs_init(struct kvmppc_xive *xive) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3e1c9f08e302..b1abcb816439 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1747,12 +1747,12 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - return -ENOTSUPP; + return -EOPNOTSUPP; } int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - return -ENOTSUPP; + return -EOPNOTSUPP; } int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, @@ -1773,7 +1773,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - return -ENOTSUPP; + return -EOPNOTSUPP; } void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) |