summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/kvm/kvm-ia64.c2
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/powerpc/include/asm/fsl_hcalls.h36
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h2
-rw-r--r--arch/powerpc/include/asm/kvm_booke_hv_asm.h4
-rw-r--r--arch/powerpc/include/asm/kvm_host.h38
-rw-r--r--arch/powerpc/include/asm/kvm_para.h15
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h64
-rw-r--r--arch/powerpc/include/asm/reg_booke.h7
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild1
-rw-r--r--arch/powerpc/include/uapi/asm/epapr_hcalls.h (renamed from arch/powerpc/include/asm/epapr_hcalls.h)36
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h59
-rw-r--r--arch/powerpc/include/uapi/asm/kvm_para.h7
-rw-r--r--arch/powerpc/kernel/epapr_hcalls.S28
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c11
-rw-r--r--arch/powerpc/kernel/kvm.c2
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c5
-rw-r--r--arch/powerpc/kvm/44x.c1
-rw-r--r--arch/powerpc/kvm/44x_emulate.c110
-rw-r--r--arch/powerpc/kvm/Kconfig3
-rw-r--r--arch/powerpc/kvm/book3s.c125
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c67
-rw-r--r--arch/powerpc/kvm/book3s_hv.c337
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c6
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c5
-rw-r--r--arch/powerpc/kvm/book3s_pr.c180
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S15
-rw-r--r--arch/powerpc/kvm/booke.c310
-rw-r--r--arch/powerpc/kvm/booke_emulate.c22
-rw-r--r--arch/powerpc/kvm/e500.h3
-rw-r--r--arch/powerpc/kvm/e500_tlb.c92
-rw-r--r--arch/powerpc/kvm/powerpc.c150
-rw-r--r--arch/powerpc/kvm/trace.h200
-rw-r--r--arch/powerpc/platforms/Kconfig1
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c9
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c2
-rw-r--r--arch/s390/kvm/interrupt.c19
-rw-r--r--arch/s390/kvm/kvm-s390.c2
-rw-r--r--arch/x86/kvm/mmu.c45
-rw-r--r--arch/x86/kvm/paging_tmpl.h115
-rw-r--r--arch/x86/kvm/svm.c7
-rw-r--r--arch/x86/kvm/vmx.c45
-rw-r--r--arch/x86/kvm/x86.c14
46 files changed, 1713 insertions, 496 deletions
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 8b3a9c0e771d..c71acd7f9a13 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1362,11 +1362,9 @@ static void kvm_release_vm_pages(struct kvm *kvm)
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int j;
- unsigned long base_gfn;
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
- base_gfn = memslot->base_gfn;
for (j = 0; j < memslot->npages; j++) {
if (memslot->rmap[j])
put_page((struct page *)memslot->rmap[j]);
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index a4fe15e33c6f..324495a08c87 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -1,4 +1,3 @@
-
generic-y += clkdev.h
generic-y += rwsem.h
diff --git a/arch/powerpc/include/asm/fsl_hcalls.h b/arch/powerpc/include/asm/fsl_hcalls.h
index 922d9b5fe3d5..3abb58394da4 100644
--- a/arch/powerpc/include/asm/fsl_hcalls.h
+++ b/arch/powerpc/include/asm/fsl_hcalls.h
@@ -96,7 +96,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask)
r11 = FH_HCALL_TOKEN(FH_SEND_NMI);
r3 = vcpu_mask;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -151,7 +151,7 @@ static inline unsigned int fh_partition_get_dtprop(int handle,
r9 = (uint32_t)propvalue_addr;
r10 = *propvalue_len;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11),
"+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7),
"+r" (r8), "+r" (r9), "+r" (r10)
@@ -205,7 +205,7 @@ static inline unsigned int fh_partition_set_dtprop(int handle,
r9 = (uint32_t)propvalue_addr;
r10 = propvalue_len;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11),
"+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7),
"+r" (r8), "+r" (r9), "+r" (r10)
@@ -229,7 +229,7 @@ static inline unsigned int fh_partition_restart(unsigned int partition)
r11 = FH_HCALL_TOKEN(FH_PARTITION_RESTART);
r3 = partition;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -262,7 +262,7 @@ static inline unsigned int fh_partition_get_status(unsigned int partition,
r11 = FH_HCALL_TOKEN(FH_PARTITION_GET_STATUS);
r3 = partition;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -295,7 +295,7 @@ static inline unsigned int fh_partition_start(unsigned int partition,
r4 = entry_point;
r5 = load;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5)
: : EV_HCALL_CLOBBERS3
);
@@ -317,7 +317,7 @@ static inline unsigned int fh_partition_stop(unsigned int partition)
r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP);
r3 = partition;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -376,7 +376,7 @@ static inline unsigned int fh_partition_memcpy(unsigned int source,
#endif
r7 = count;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11),
"+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7)
: : EV_HCALL_CLOBBERS5
@@ -399,7 +399,7 @@ static inline unsigned int fh_dma_enable(unsigned int liodn)
r11 = FH_HCALL_TOKEN(FH_DMA_ENABLE);
r3 = liodn;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -421,7 +421,7 @@ static inline unsigned int fh_dma_disable(unsigned int liodn)
r11 = FH_HCALL_TOKEN(FH_DMA_DISABLE);
r3 = liodn;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -447,7 +447,7 @@ static inline unsigned int fh_vmpic_get_msir(unsigned int interrupt,
r11 = FH_HCALL_TOKEN(FH_VMPIC_GET_MSIR);
r3 = interrupt;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -469,7 +469,7 @@ static inline unsigned int fh_system_reset(void)
r11 = FH_HCALL_TOKEN(FH_SYSTEM_RESET);
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "=r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -506,7 +506,7 @@ static inline unsigned int fh_err_get_info(int queue, uint32_t *bufsize,
r6 = addr_lo;
r7 = peek;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6),
"+r" (r7)
: : EV_HCALL_CLOBBERS5
@@ -542,7 +542,7 @@ static inline unsigned int fh_get_core_state(unsigned int handle,
r3 = handle;
r4 = vcpu;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -572,7 +572,7 @@ static inline unsigned int fh_enter_nap(unsigned int handle, unsigned int vcpu)
r3 = handle;
r4 = vcpu;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -597,7 +597,7 @@ static inline unsigned int fh_exit_nap(unsigned int handle, unsigned int vcpu)
r3 = handle;
r4 = vcpu;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -618,7 +618,7 @@ static inline unsigned int fh_claim_device(unsigned int handle)
r11 = FH_HCALL_TOKEN(FH_CLAIM_DEVICE);
r3 = handle;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -645,7 +645,7 @@ static inline unsigned int fh_partition_stop_dma(unsigned int handle)
r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP_DMA);
r3 = handle;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 7aefdb3e1ce4..36fcf4190461 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -160,7 +160,7 @@ extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
+ struct kvm_memory_slot *memslot, unsigned long *map);
extern void kvmppc_entry_trampoline(void);
extern void kvmppc_hv_entry_trampoline(void);
diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
index 30a600fa1b6a..a37a12a9a7d7 100644
--- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -38,9 +38,9 @@
#ifdef CONFIG_KVM_BOOKE_HV
BEGIN_FTR_SECTION
mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
- bf 3, kvmppc_resume_\intno\()_\srr1
+ bf 3, 1975f
b kvmppc_handler_\intno\()_\srr1
-kvmppc_resume_\intno\()_\srr1:
+1975:
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
#endif
.endm
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 28e8f5e5c63e..68f5a308737a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -46,7 +46,7 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#if !defined(CONFIG_KVM_440)
#include <linux/mmu_notifier.h>
#define KVM_ARCH_WANT_MMU_NOTIFIER
@@ -204,7 +204,7 @@ struct revmap_entry {
};
/*
- * We use the top bit of each memslot->rmap entry as a lock bit,
+ * We use the top bit of each memslot->arch.rmap entry as a lock bit,
* and bit 32 as a present flag. The bottom 32 bits are the
* index in the guest HPT of a HPTE that points to the page.
*/
@@ -215,14 +215,17 @@ struct revmap_entry {
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful
-/* Low-order bits in kvm->arch.slot_phys[][] */
+/* Low-order bits in memslot->arch.slot_phys[] */
#define KVMPPC_PAGE_ORDER_MASK 0x1f
#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */
#define KVMPPC_GOT_PAGE 0x80
struct kvm_arch_memory_slot {
+#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long *rmap;
+ unsigned long *slot_phys;
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
};
struct kvm_arch {
@@ -246,8 +249,6 @@ struct kvm_arch {
unsigned long hpt_npte;
unsigned long hpt_mask;
spinlock_t slot_phys_lock;
- unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
- int slot_npages[KVM_MEM_SLOTS_NUM];
unsigned short last_vcpu[NR_CPUS];
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
struct kvmppc_linear_info *hpt_li;
@@ -346,6 +347,27 @@ struct kvmppc_slb {
bool class : 1;
};
+# ifdef CONFIG_PPC_FSL_BOOK3E
+#define KVMPPC_BOOKE_IAC_NUM 2
+#define KVMPPC_BOOKE_DAC_NUM 2
+# else
+#define KVMPPC_BOOKE_IAC_NUM 4
+#define KVMPPC_BOOKE_DAC_NUM 2
+# endif
+#define KVMPPC_BOOKE_MAX_IAC 4
+#define KVMPPC_BOOKE_MAX_DAC 2
+
+struct kvmppc_booke_debug_reg {
+ u32 dbcr0;
+ u32 dbcr1;
+ u32 dbcr2;
+#ifdef CONFIG_KVM_E500MC
+ u32 dbcr4;
+#endif
+ u64 iac[KVMPPC_BOOKE_MAX_IAC];
+ u64 dac[KVMPPC_BOOKE_MAX_DAC];
+};
+
struct kvm_vcpu_arch {
ulong host_stack;
u32 host_pid;
@@ -440,8 +462,6 @@ struct kvm_vcpu_arch {
u32 ccr0;
u32 ccr1;
- u32 dbcr0;
- u32 dbcr1;
u32 dbsr;
u64 mmcr[3];
@@ -471,9 +491,12 @@ struct kvm_vcpu_arch {
ulong fault_esr;
ulong queued_dear;
ulong queued_esr;
+ spinlock_t wdt_lock;
+ struct timer_list wdt_timer;
u32 tlbcfg[4];
u32 mmucfg;
u32 epr;
+ struct kvmppc_booke_debug_reg dbg_reg;
#endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
@@ -486,6 +509,7 @@ struct kvm_vcpu_arch {
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
+ u8 watchdog_enabled;
u8 sane;
u8 cpu_type;
u8 hcall_needed;
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 9365860fb7f6..2b119654b4c1 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -21,7 +21,6 @@
#include <uapi/asm/kvm_para.h>
-
#ifdef CONFIG_KVM_GUEST
#include <linux/of.h>
@@ -55,7 +54,7 @@ static unsigned long kvm_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr)
{
- return HC_EV_UNIMPLEMENTED;
+ return EV_UNIMPLEMENTED;
}
#endif
@@ -66,7 +65,7 @@ static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2)
unsigned long out[8];
unsigned long r;
- r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ r = kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
*r2 = out[0];
return r;
@@ -77,7 +76,7 @@ static inline long kvm_hypercall0(unsigned int nr)
unsigned long in[8];
unsigned long out[8];
- return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
}
static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
@@ -86,7 +85,7 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
unsigned long out[8];
in[0] = p1;
- return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
}
static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
@@ -97,7 +96,7 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
in[0] = p1;
in[1] = p2;
- return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
}
static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
@@ -109,7 +108,7 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
in[0] = p1;
in[1] = p2;
in[2] = p3;
- return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
}
static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
@@ -123,7 +122,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
in[1] = p2;
in[2] = p3;
in[3] = p4;
- return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+ return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
}
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e006f0bdea95..609cca3e9426 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
+#include <linux/bug.h>
#ifdef CONFIG_PPC_BOOK3S
#include <asm/kvm_book3s.h>
#else
@@ -68,6 +69,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
extern void kvmppc_decrementer_func(unsigned long data);
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
+extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
/* Core-specific hooks */
@@ -104,6 +107,7 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
+extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int op, int *advance);
@@ -111,6 +115,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
ulong val);
extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
ulong *val);
+extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
extern int kvmppc_booke_init(void);
extern void kvmppc_booke_exit(void);
@@ -139,16 +144,26 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
extern void kvm_release_hpt(struct kvmppc_linear_info *li);
extern int kvmppc_core_init_vm(struct kvm *kvm);
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
+extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont);
+extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages);
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem);
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old);
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
struct kvm_ppc_smmu_info *info);
+extern void kvmppc_core_flush_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
extern int kvmppc_bookehv_init(void);
extern void kvmppc_bookehv_exit(void);
+extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
+
/*
* Cuts out inst bits with ordering according to spec.
* That means the leftmost bit is zero. All given bits are included.
@@ -182,6 +197,41 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
return r;
}
+union kvmppc_one_reg {
+ u32 wval;
+ u64 dval;
+ vector128 vval;
+ u64 vsxval[2];
+ struct {
+ u64 addr;
+ u64 length;
+ } vpaval;
+};
+
+#define one_reg_size(id) \
+ (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+#define get_reg_val(id, reg) ({ \
+ union kvmppc_one_reg __u; \
+ switch (one_reg_size(id)) { \
+ case 4: __u.wval = (reg); break; \
+ case 8: __u.dval = (reg); break; \
+ default: BUG(); \
+ } \
+ __u; \
+})
+
+
+#define set_reg_val(id, val) ({ \
+ u64 __v; \
+ switch (one_reg_size(id)) { \
+ case 4: __v = (val).wval; break; \
+ case 8: __v = (val).dval; break; \
+ default: BUG(); \
+ } \
+ __v; \
+})
+
void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
@@ -190,6 +240,8 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
@@ -230,5 +282,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
}
}
+/* Please call after prepare_to_enter. This function puts the lazy ee state
+ back to normal mode, without actually enabling interrupts. */
+static inline void kvmppc_lazy_ee_enable(void)
+{
+#ifdef CONFIG_PPC64
+ /* Only need to enable IRQs by hard enabling them after this */
+ local_paca->irq_happened = 0;
+ local_paca->soft_enabled = 1;
+#endif
+}
#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2d916c4982c5..e07e6af5e1ff 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -539,6 +539,13 @@
#define TCR_FIE 0x00800000 /* FIT Interrupt Enable */
#define TCR_ARE 0x00400000 /* Auto Reload Enable */
+#ifdef CONFIG_E500
+#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \
+ (((tcr) & 0x1E0000) >> 15))
+#else
+#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30)
+#endif
+
/* Bit definitions for the TSR. */
#define TSR_ENW 0x80000000 /* Enable Next Watchdog */
#define TSR_WIS 0x40000000 /* WDT Interrupt Status */
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index a33c3c03bb2e..9eedfc5a557b 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -42,3 +42,4 @@ header-y += termios.h
header-y += types.h
header-y += ucontext.h
header-y += unistd.h
+header-y += epapr_hcalls.h
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
index bf2c06c33871..b8d94459a929 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
@@ -50,10 +50,6 @@
#ifndef _EPAPR_HCALLS_H
#define _EPAPR_HCALLS_H
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <asm/byteorder.h>
-
#define EV_BYTE_CHANNEL_SEND 1
#define EV_BYTE_CHANNEL_RECEIVE 2
#define EV_BYTE_CHANNEL_POLL 3
@@ -88,7 +84,8 @@
#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num))
#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num)
-/* epapr error codes */
+/* epapr return codes */
+#define EV_SUCCESS 0
#define EV_EPERM 1 /* Operation not permitted */
#define EV_ENOENT 2 /* Entry Not Found */
#define EV_EIO 3 /* I/O error occured */
@@ -108,6 +105,11 @@
#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */
#define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/byteorder.h>
+
/*
* Hypercall register clobber list
*
@@ -193,7 +195,7 @@ static inline unsigned int ev_int_set_config(unsigned int interrupt,
r5 = priority;
r6 = destination;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6)
: : EV_HCALL_CLOBBERS4
);
@@ -222,7 +224,7 @@ static inline unsigned int ev_int_get_config(unsigned int interrupt,
r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG);
r3 = interrupt;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6)
: : EV_HCALL_CLOBBERS4
);
@@ -252,7 +254,7 @@ static inline unsigned int ev_int_set_mask(unsigned int interrupt,
r3 = interrupt;
r4 = mask;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -277,7 +279,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt,
r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK);
r3 = interrupt;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -305,7 +307,7 @@ static inline unsigned int ev_int_eoi(unsigned int interrupt)
r11 = EV_HCALL_TOKEN(EV_INT_EOI);
r3 = interrupt;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -344,7 +346,7 @@ static inline unsigned int ev_byte_channel_send(unsigned int handle,
r7 = be32_to_cpu(p[2]);
r8 = be32_to_cpu(p[3]);
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3),
"+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8)
: : EV_HCALL_CLOBBERS6
@@ -383,7 +385,7 @@ static inline unsigned int ev_byte_channel_receive(unsigned int handle,
r3 = handle;
r4 = *count;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "+r" (r4),
"=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8)
: : EV_HCALL_CLOBBERS6
@@ -421,7 +423,7 @@ static inline unsigned int ev_byte_channel_poll(unsigned int handle,
r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL);
r3 = handle;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5)
: : EV_HCALL_CLOBBERS3
);
@@ -454,7 +456,7 @@ static inline unsigned int ev_int_iack(unsigned int handle,
r11 = EV_HCALL_TOKEN(EV_INT_IACK);
r3 = handle;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3), "=r" (r4)
: : EV_HCALL_CLOBBERS2
);
@@ -478,7 +480,7 @@ static inline unsigned int ev_doorbell_send(unsigned int handle)
r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND);
r3 = handle;
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "+r" (r3)
: : EV_HCALL_CLOBBERS1
);
@@ -498,12 +500,12 @@ static inline unsigned int ev_idle(void)
r11 = EV_HCALL_TOKEN(EV_IDLE);
- __asm__ __volatile__ ("sc 1"
+ asm volatile("bl epapr_hypercall_start"
: "+r" (r11), "=r" (r3)
: : EV_HCALL_CLOBBERS1
);
return r3;
}
-
+#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 1bea4d8ea6f4..b89ae4db45ce 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -221,6 +221,12 @@ struct kvm_sregs {
__u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */
__u32 dbcr[3];
+ /*
+ * iac/dac registers are 64bit wide, while this API
+ * interface provides only lower 32 bits on 64 bit
+ * processors. ONE_REG interface is added for 64bit
+ * iac/dac registers.
+ */
__u32 iac[4];
__u32 dac[2];
__u32 dvc[2];
@@ -326,5 +332,58 @@ struct kvm_book3e_206_tlb_params {
};
#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
+#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_PPC_IAC3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_PPC_DABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_PPC_DSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9)
+#define KVM_REG_PPC_PURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa)
+#define KVM_REG_PPC_SPURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb)
+#define KVM_REG_PPC_DAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc)
+#define KVM_REG_PPC_DSISR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd)
+#define KVM_REG_PPC_AMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe)
+#define KVM_REG_PPC_UAMOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf)
+
+#define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
+#define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
+#define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
+
+#define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
+#define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
+#define KVM_REG_PPC_PMC3 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a)
+#define KVM_REG_PPC_PMC4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b)
+#define KVM_REG_PPC_PMC5 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c)
+#define KVM_REG_PPC_PMC6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d)
+#define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e)
+#define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f)
+
+/* 32 floating-point registers */
+#define KVM_REG_PPC_FPR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20)
+#define KVM_REG_PPC_FPR(n) (KVM_REG_PPC_FPR0 + (n))
+#define KVM_REG_PPC_FPR31 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f)
+
+/* 32 VMX/Altivec vector registers */
+#define KVM_REG_PPC_VR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40)
+#define KVM_REG_PPC_VR(n) (KVM_REG_PPC_VR0 + (n))
+#define KVM_REG_PPC_VR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f)
+
+/* 32 double-width FP registers for VSX */
+/* High-order halves overlap with FP regs */
+#define KVM_REG_PPC_VSR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60)
+#define KVM_REG_PPC_VSR(n) (KVM_REG_PPC_VSR0 + (n))
+#define KVM_REG_PPC_VSR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f)
+
+/* FP and vector status/control registers */
+#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
+
+/* Virtual processor areas */
+/* For SLB & DTL, address in high (first) half, length in low half */
+#define KVM_REG_PPC_VPA_ADDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82)
+#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
+#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
index 5e04383a1db5..ed0e0254b47f 100644
--- a/arch/powerpc/include/uapi/asm/kvm_para.h
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -75,9 +75,10 @@ struct kvm_vcpu_arch_shared {
};
#define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */
-#define HC_VENDOR_KVM (42 << 16)
-#define HC_EV_SUCCESS 0
-#define HC_EV_UNIMPLEMENTED 12
+
+#define KVM_HCALL_TOKEN(num) _EV_HCALL_TOKEN(EV_KVM_VENDOR_ID, num)
+
+#include <uapi/asm/epapr_hcalls.h>
#define KVM_FEATURE_MAGIC_PAGE 1
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
index 697b390ebfd8..62c0dc237826 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -8,13 +8,41 @@
*/
#include <linux/threads.h>
+#include <asm/epapr_hcalls.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
#include <asm/asm-offsets.h>
+/* epapr_ev_idle() was derived from e500_idle() */
+_GLOBAL(epapr_ev_idle)
+ CURRENT_THREAD_INFO(r3, r1)
+ PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */
+ ori r4, r4,_TLF_NAPPING /* so when we take an exception */
+ PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+
+ wrteei 1
+
+idle_loop:
+ LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
+
+.global epapr_ev_idle_start
+epapr_ev_idle_start:
+ li r3, -1
+ nop
+ nop
+ nop
+
+ /*
+ * Guard against spurious wakeups from a hypervisor --
+ * only interrupt will cause us to return to LR due to
+ * _TLF_NAPPING.
+ */
+ b idle_loop
+
/* Hypercall entry point. Will be patched with device tree instructions. */
.global epapr_hypercall_start
epapr_hypercall_start:
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 028aeae370b6..f3eab8594d9f 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -21,6 +21,10 @@
#include <asm/epapr_hcalls.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
+#include <asm/machdep.h>
+
+extern void epapr_ev_idle(void);
+extern u32 epapr_ev_idle_start[];
bool epapr_paravirt_enabled;
@@ -41,8 +45,13 @@ static int __init epapr_paravirt_init(void)
if (len % 4 || len > (4 * 4))
return -ENODEV;
- for (i = 0; i < (len / 4); i++)
+ for (i = 0; i < (len / 4); i++) {
patch_instruction(epapr_hypercall_start + i, insts[i]);
+ patch_instruction(epapr_ev_idle_start + i, insts[i]);
+ }
+
+ if (of_get_property(hyper_node, "has-idle", NULL))
+ ppc_md.power_save = epapr_ev_idle;
epapr_paravirt_enabled = true;
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 867db1de8949..a61b133c4f99 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -419,7 +419,7 @@ static void kvm_map_magic_page(void *data)
in[0] = KVM_MAGIC_PAGE;
in[1] = KVM_MAGIC_PAGE;
- kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE);
+ kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
*features = out[0];
}
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 19e4288d8486..78b8766fd79e 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -43,6 +43,7 @@
#include <asm/dcr.h>
#include <asm/ftrace.h>
#include <asm/switch_to.h>
+#include <asm/epapr_hcalls.h>
#ifdef CONFIG_PPC32
extern void transfer_to_handler(void);
@@ -191,3 +192,7 @@ EXPORT_SYMBOL(__arch_hweight64);
#ifdef CONFIG_PPC_BOOK3S_64
EXPORT_SYMBOL_GPL(mmu_psize_defs);
#endif
+
+#ifdef CONFIG_EPAPR_PARAVIRT
+EXPORT_SYMBOL(epapr_hypercall_start);
+#endif
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 50e7dbc7356c..3d7fd21c65f9 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -83,6 +83,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu_44x->shadow_refs[i].gtlb_index = -1;
vcpu->arch.cpu_type = KVM_CPU_440;
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
return 0;
}
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index c8c61578fdfc..1a793c4c4a67 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,12 +27,68 @@
#include "booke.h"
#include "44x_tlb.h"
+#define XOP_MFDCRX 259
#define XOP_MFDCR 323
+#define XOP_MTDCRX 387
#define XOP_MTDCR 451
#define XOP_TLBSX 914
#define XOP_ICCCI 966
#define XOP_TLBWE 978
+static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
+{
+ /* emulate some access in kernel */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
+ return EMULATE_DONE;
+ default:
+ vcpu->run->dcr.dcrn = dcrn;
+ vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs);
+ vcpu->run->dcr.is_write = 1;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ return EMULATE_DO_DCR;
+ }
+}
+
+static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
+{
+ /* The guest may access CPR0 registers to determine the timebase
+ * frequency, and it must know the real host frequency because it
+ * can directly access the timebase registers.
+ *
+ * It would be possible to emulate those accesses in userspace,
+ * but userspace can really only figure out the end frequency.
+ * We could decompose that into the factors that compute it, but
+ * that's tricky math, and it's easier to just report the real
+ * CPR0 values.
+ */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
+ break;
+ case DCRN_CPR0_CONFIG_DATA:
+ local_irq_disable();
+ mtdcr(DCRN_CPR0_CONFIG_ADDR,
+ vcpu->arch.cpr0_cfgaddr);
+ kvmppc_set_gpr(vcpu, rt,
+ mfdcr(DCRN_CPR0_CONFIG_DATA));
+ local_irq_enable();
+ break;
+ default:
+ vcpu->run->dcr.dcrn = dcrn;
+ vcpu->run->dcr.data = 0;
+ vcpu->run->dcr.is_write = 0;
+ vcpu->arch.io_gpr = rt;
+ vcpu->arch.dcr_needed = 1;
+ kvmppc_account_exit(vcpu, DCR_EXITS);
+ return EMULATE_DO_DCR;
+ }
+
+ return EMULATE_DONE;
+}
+
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
@@ -50,55 +106,21 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
switch (get_xop(inst)) {
case XOP_MFDCR:
- /* The guest may access CPR0 registers to determine the timebase
- * frequency, and it must know the real host frequency because it
- * can directly access the timebase registers.
- *
- * It would be possible to emulate those accesses in userspace,
- * but userspace can really only figure out the end frequency.
- * We could decompose that into the factors that compute it, but
- * that's tricky math, and it's easier to just report the real
- * CPR0 values.
- */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
- break;
- case DCRN_CPR0_CONFIG_DATA:
- local_irq_disable();
- mtdcr(DCRN_CPR0_CONFIG_ADDR,
- vcpu->arch.cpr0_cfgaddr);
- kvmppc_set_gpr(vcpu, rt,
- mfdcr(DCRN_CPR0_CONFIG_DATA));
- local_irq_enable();
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = 0;
- run->dcr.is_write = 0;
- vcpu->arch.io_gpr = rt;
- vcpu->arch.dcr_needed = 1;
- kvmppc_account_exit(vcpu, DCR_EXITS);
- emulated = EMULATE_DO_DCR;
- }
+ emulated = emulate_mfdcr(vcpu, rt, dcrn);
+ break;
+ case XOP_MFDCRX:
+ emulated = emulate_mfdcr(vcpu, rt,
+ kvmppc_get_gpr(vcpu, ra));
break;
case XOP_MTDCR:
- /* emulate some access in kernel */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = kvmppc_get_gpr(vcpu, rs);
- run->dcr.is_write = 1;
- vcpu->arch.dcr_needed = 1;
- kvmppc_account_exit(vcpu, DCR_EXITS);
- emulated = EMULATE_DO_DCR;
- }
+ emulated = emulate_mtdcr(vcpu, rs, dcrn);
+ break;
+ case XOP_MTDCRX:
+ emulated = emulate_mtdcr(vcpu, rs,
+ kvmppc_get_gpr(vcpu, ra));
break;
case XOP_TLBWE:
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index f4dacb9c57fa..71f0cd9edf33 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM_BOOK3S_64_HANDLER
config KVM_BOOK3S_PR
bool
select KVM_MMIO
+ select MMU_NOTIFIER
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
@@ -123,6 +124,7 @@ config KVM_E500V2
depends on EXPERIMENTAL && E500 && !PPC_E500MC
select KVM
select KVM_MMIO
+ select MMU_NOTIFIER
---help---
Support running unmodified E500 guest kernels in virtual machines on
E500v2 host processors.
@@ -138,6 +140,7 @@ config KVM_E500MC
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
+ select MMU_NOTIFIER
---help---
Support running unmodified E500MC/E5500 (32-bit) guest kernels in
virtual machines on E500MC/E5500 host processors.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3f2a8360c857..a4b645285240 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return 0;
}
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
@@ -476,6 +485,122 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -ENOTSUPP;
}
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r;
+ union kvmppc_one_reg val;
+ int size;
+ long int i;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+ case KVM_REG_PPC_DAR:
+ val = get_reg_val(reg->id, vcpu->arch.shared->dar);
+ break;
+ case KVM_REG_PPC_DSISR:
+ val = get_reg_val(reg->id, vcpu->arch.shared->dsisr);
+ break;
+ case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+ i = reg->id - KVM_REG_PPC_FPR0;
+ val = get_reg_val(reg->id, vcpu->arch.fpr[i]);
+ break;
+ case KVM_REG_PPC_FPSCR:
+ val = get_reg_val(reg->id, vcpu->arch.fpscr);
+ break;
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0];
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
+ break;
+#endif /* CONFIG_ALTIVEC */
+ default:
+ r = -EINVAL;
+ break;
+ }
+ }
+ if (r)
+ return r;
+
+ if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+ r = -EFAULT;
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r;
+ union kvmppc_one_reg val;
+ int size;
+ long int i;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+ return -EFAULT;
+
+ r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+ case KVM_REG_PPC_DAR:
+ vcpu->arch.shared->dar = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_DSISR:
+ vcpu->arch.shared->dsisr = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+ i = reg->id - KVM_REG_PPC_FPR0;
+ vcpu->arch.fpr[i] = set_reg_val(reg->id, val);
+ break;
+ case KVM_REG_PPC_FPSCR:
+ vcpu->arch.fpscr = set_reg_val(reg->id, val);
+ break;
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
+ break;
+#endif /* CONFIG_ALTIVEC */
+ default:
+ r = -EINVAL;
+ break;
+ }
+ }
+
+ return r;
+}
+
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index b0f625a33345..d1107a9b5d13 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -254,6 +254,7 @@ next_pteg:
kvmppc_mmu_hpte_cache_map(vcpu, pte);
+ kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
out:
return r;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 4d72f9ebc554..d0205a545a81 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -171,6 +171,7 @@ map_again:
kvmppc_mmu_hpte_cache_map(vcpu, pte);
}
+ kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
out:
return r;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d95d11322a15..2a89a36e7263 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
+#include <linux/srcu.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
@@ -260,7 +261,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
/*
* This is called to get a reference to a guest page if there isn't
- * one already in the kvm->arch.slot_phys[][] arrays.
+ * one already in the memslot->arch.slot_phys[] array.
*/
static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
struct kvm_memory_slot *memslot,
@@ -275,7 +276,7 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
struct vm_area_struct *vma;
unsigned long pfn, i, npages;
- physp = kvm->arch.slot_phys[memslot->id];
+ physp = memslot->arch.slot_phys;
if (!physp)
return -EINVAL;
if (physp[gfn - memslot->base_gfn])
@@ -570,7 +571,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct kvm *kvm = vcpu->kvm;
unsigned long *hptep, hpte[3], r;
unsigned long mmu_seq, psize, pte_size;
- unsigned long gfn, hva, pfn;
+ unsigned long gpa, gfn, hva, pfn;
struct kvm_memory_slot *memslot;
unsigned long *rmap;
struct revmap_entry *rev;
@@ -608,15 +609,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Translate the logical address and get the page */
psize = hpte_page_size(hpte[0], r);
- gfn = hpte_rpn(r, psize);
+ gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1));
+ gfn = gpa >> PAGE_SHIFT;
memslot = gfn_to_memslot(kvm, gfn);
/* No memslot means it's an emulated MMIO region */
- if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
- unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
- }
if (!kvm->arch.using_mmu_notifiers)
return -EFAULT; /* should never get here */
@@ -710,7 +710,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Check if we might have been invalidated; let the guest retry if so */
ret = RESUME_GUEST;
- if (mmu_notifier_retry(vcpu, mmu_seq)) {
+ if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
unlock_rmap(rmap);
goto out_unlock;
}
@@ -850,7 +850,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
psize = hpte_page_size(hptep[0], ptel);
if ((hptep[0] & HPTE_V_VALID) &&
hpte_rpn(ptel, psize) == gfn) {
- hptep[0] |= HPTE_V_ABSENT;
+ if (kvm->arch.using_mmu_notifiers)
+ hptep[0] |= HPTE_V_ABSENT;
kvmppc_invalidate_hpte(kvm, hptep, i);
/* Harvest R and C */
rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
@@ -877,6 +878,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
return 0;
}
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+ unsigned long *rmapp;
+ unsigned long gfn;
+ unsigned long n;
+
+ rmapp = memslot->arch.rmap;
+ gfn = memslot->base_gfn;
+ for (n = memslot->npages; n; --n) {
+ /*
+ * Testing the present bit without locking is OK because
+ * the memslot has been marked invalid already, and hence
+ * no new HPTEs referencing this page can be created,
+ * thus the present bit can't go from 0 to 1.
+ */
+ if (*rmapp & KVMPPC_RMAP_PRESENT)
+ kvm_unmap_rmapp(kvm, rmapp, gfn);
+ ++rmapp;
+ ++gfn;
+ }
+}
+
static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long gfn)
{
@@ -1030,16 +1053,16 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
return ret;
}
-long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long *map)
{
unsigned long i;
- unsigned long *rmapp, *map;
+ unsigned long *rmapp;
preempt_disable();
rmapp = memslot->arch.rmap;
- map = memslot->dirty_bitmap;
for (i = 0; i < memslot->npages; ++i) {
- if (kvm_test_clear_dirty(kvm, rmapp))
+ if (kvm_test_clear_dirty(kvm, rmapp) && map)
__set_bit_le(i, map);
++rmapp;
}
@@ -1057,20 +1080,22 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
unsigned long hva, psize, offset;
unsigned long pa;
unsigned long *physp;
+ int srcu_idx;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
- return NULL;
+ goto err;
if (!kvm->arch.using_mmu_notifiers) {
- physp = kvm->arch.slot_phys[memslot->id];
+ physp = memslot->arch.slot_phys;
if (!physp)
- return NULL;
+ goto err;
physp += gfn - memslot->base_gfn;
pa = *physp;
if (!pa) {
if (kvmppc_get_guest_page(kvm, gfn, memslot,
PAGE_SIZE) < 0)
- return NULL;
+ goto err;
pa = *physp;
}
page = pfn_to_page(pa >> PAGE_SHIFT);
@@ -1079,9 +1104,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
hva = gfn_to_hva_memslot(memslot, gfn);
npages = get_user_pages_fast(hva, 1, 1, pages);
if (npages < 1)
- return NULL;
+ goto err;
page = pages[0];
}
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
psize = PAGE_SIZE;
if (PageHuge(page)) {
page = compound_head(page);
@@ -1091,6 +1118,10 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
if (nb_ret)
*nb_ret = psize - offset;
return page_address(page) + offset;
+
+ err:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return NULL;
}
void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 721d4603a235..cd8025db3017 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -30,6 +30,7 @@
#include <linux/cpumask.h>
#include <linux/spinlock.h>
#include <linux/page-flags.h>
+#include <linux/srcu.h>
#include <asm/reg.h>
#include <asm/cputable.h>
@@ -142,6 +143,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
vpa->yield_count = 1;
}
+static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
+ unsigned long addr, unsigned long len)
+{
+ /* check address is cacheline aligned */
+ if (addr & (L1_CACHE_BYTES - 1))
+ return -EINVAL;
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ if (v->next_gpa != addr || v->len != len) {
+ v->next_gpa = addr;
+ v->len = addr ? len : 0;
+ v->update_pending = 1;
+ }
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ return 0;
+}
+
/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
struct reg_vpa {
u32 dummy;
@@ -320,7 +337,8 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->arch.vpa_update_lock);
if (vcpu->arch.vpa.update_pending) {
kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
- init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+ if (vcpu->arch.vpa.pinned_addr)
+ init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
}
if (vcpu->arch.dtl.update_pending) {
kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
@@ -366,13 +384,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
struct kvm_vcpu *tvcpu;
+ int idx;
switch (req) {
case H_ENTER:
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6),
kvmppc_get_gpr(vcpu, 7));
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
case H_CEDE:
break;
@@ -411,6 +432,7 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
int r = RESUME_HOST;
+ int srcu_idx;
vcpu->stat.sum_exits++;
@@ -470,12 +492,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* have been handled already.
*/
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvmppc_book3s_hv_page_fault(run, vcpu,
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
break;
case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvmppc_book3s_hv_page_fault(run, vcpu,
kvmppc_get_pc(vcpu), 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
break;
/*
* This occurs if the guest executes an illegal instruction.
@@ -535,36 +561,175 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
{
- int r = -EINVAL;
+ int r = 0;
+ long int i;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_HIOR:
- r = put_user(0, (u64 __user *)reg->addr);
+ *val = get_reg_val(id, 0);
+ break;
+ case KVM_REG_PPC_DABR:
+ *val = get_reg_val(id, vcpu->arch.dabr);
+ break;
+ case KVM_REG_PPC_DSCR:
+ *val = get_reg_val(id, vcpu->arch.dscr);
+ break;
+ case KVM_REG_PPC_PURR:
+ *val = get_reg_val(id, vcpu->arch.purr);
+ break;
+ case KVM_REG_PPC_SPURR:
+ *val = get_reg_val(id, vcpu->arch.spurr);
+ break;
+ case KVM_REG_PPC_AMR:
+ *val = get_reg_val(id, vcpu->arch.amr);
+ break;
+ case KVM_REG_PPC_UAMOR:
+ *val = get_reg_val(id, vcpu->arch.uamor);
+ break;
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
+ i = id - KVM_REG_PPC_MMCR0;
+ *val = get_reg_val(id, vcpu->arch.mmcr[i]);
+ break;
+ case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
+ i = id - KVM_REG_PPC_PMC1;
+ *val = get_reg_val(id, vcpu->arch.pmc[i]);
+ break;
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ /* VSX => FP reg i is stored in arch.vsr[2*i] */
+ long int i = id - KVM_REG_PPC_FPR0;
+ *val = get_reg_val(id, vcpu->arch.vsr[2 * i]);
+ } else {
+ /* let generic code handle it */
+ r = -EINVAL;
+ }
+ break;
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ long int i = id - KVM_REG_PPC_VSR0;
+ val->vsxval[0] = vcpu->arch.vsr[2 * i];
+ val->vsxval[1] = vcpu->arch.vsr[2 * i + 1];
+ } else {
+ r = -ENXIO;
+ }
+ break;
+#endif /* CONFIG_VSX */
+ case KVM_REG_PPC_VPA_ADDR:
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ break;
+ case KVM_REG_PPC_VPA_SLB:
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
+ val->vpaval.length = vcpu->arch.slb_shadow.len;
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ break;
+ case KVM_REG_PPC_VPA_DTL:
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ val->vpaval.addr = vcpu->arch.dtl.next_gpa;
+ val->vpaval.length = vcpu->arch.dtl.len;
+ spin_unlock(&vcpu->arch.vpa_update_lock);
break;
default:
+ r = -EINVAL;
break;
}
return r;
}
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
{
- int r = -EINVAL;
+ int r = 0;
+ long int i;
+ unsigned long addr, len;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_HIOR:
- {
- u64 hior;
/* Only allow this to be set to zero */
- r = get_user(hior, (u64 __user *)reg->addr);
- if (!r && (hior != 0))
+ if (set_reg_val(id, *val))
r = -EINVAL;
break;
- }
+ case KVM_REG_PPC_DABR:
+ vcpu->arch.dabr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_DSCR:
+ vcpu->arch.dscr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PURR:
+ vcpu->arch.purr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_SPURR:
+ vcpu->arch.spurr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_AMR:
+ vcpu->arch.amr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_UAMOR:
+ vcpu->arch.uamor = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
+ i = id - KVM_REG_PPC_MMCR0;
+ vcpu->arch.mmcr[i] = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
+ i = id - KVM_REG_PPC_PMC1;
+ vcpu->arch.pmc[i] = set_reg_val(id, *val);
+ break;
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ /* VSX => FP reg i is stored in arch.vsr[2*i] */
+ long int i = id - KVM_REG_PPC_FPR0;
+ vcpu->arch.vsr[2 * i] = set_reg_val(id, *val);
+ } else {
+ /* let generic code handle it */
+ r = -EINVAL;
+ }
+ break;
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+ if (cpu_has_feature(CPU_FTR_VSX)) {
+ long int i = id - KVM_REG_PPC_VSR0;
+ vcpu->arch.vsr[2 * i] = val->vsxval[0];
+ vcpu->arch.vsr[2 * i + 1] = val->vsxval[1];
+ } else {
+ r = -ENXIO;
+ }
+ break;
+#endif /* CONFIG_VSX */
+ case KVM_REG_PPC_VPA_ADDR:
+ addr = set_reg_val(id, *val);
+ r = -EINVAL;
+ if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
+ vcpu->arch.dtl.next_gpa))
+ break;
+ r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
+ break;
+ case KVM_REG_PPC_VPA_SLB:
+ addr = val->vpaval.addr;
+ len = val->vpaval.length;
+ r = -EINVAL;
+ if (addr && !vcpu->arch.vpa.next_gpa)
+ break;
+ r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
+ break;
+ case KVM_REG_PPC_VPA_DTL:
+ addr = val->vpaval.addr;
+ len = val->vpaval.length;
+ r = -EINVAL;
+ if (len < sizeof(struct dtl_entry))
+ break;
+ if (addr && !vcpu->arch.vpa.next_gpa)
+ break;
+ len -= len % sizeof(struct dtl_entry);
+ r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
+ break;
default:
+ r = -EINVAL;
break;
}
@@ -697,17 +862,11 @@ extern void xics_wake_cpu(int cpu);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *v;
-
if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
return;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
--vc->n_runnable;
++vc->n_busy;
- /* decrement the physical thread id of each following vcpu */
- v = vcpu;
- list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
- --v->arch.ptid;
list_del(&vcpu->arch.run_list);
}
@@ -820,6 +979,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
long ret;
u64 now;
int ptid, i, need_vpa_update;
+ int srcu_idx;
/* don't start if any threads have a signal pending */
need_vpa_update = 0;
@@ -898,6 +1058,9 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
spin_unlock(&vc->lock);
kvm_guest_enter();
+
+ srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
+
__kvmppc_vcore_entry(NULL, vcpu0);
for (i = 0; i < threads_per_core; ++i)
kvmppc_release_hwthread(vc->pcpu + i);
@@ -913,6 +1076,8 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_EXITING;
spin_unlock(&vc->lock);
+ srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
+
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
kvm_guest_exit();
@@ -1273,7 +1438,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
n = kvm_dirty_bitmap_bytes(memslot);
memset(memslot->dirty_bitmap, 0, n);
- r = kvmppc_hv_get_dirty_log(kvm, memslot);
+ r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
if (r)
goto out;
@@ -1299,53 +1464,86 @@ static unsigned long slb_pgsize_encoding(unsigned long psize)
return senc;
}
-int kvmppc_core_prepare_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+static void unpin_slot(struct kvm_memory_slot *memslot)
{
- unsigned long npages;
- unsigned long *phys;
+ unsigned long *physp;
+ unsigned long j, npages, pfn;
+ struct page *page;
- /* Allocate a slot_phys array */
- phys = kvm->arch.slot_phys[mem->slot];
- if (!kvm->arch.using_mmu_notifiers && !phys) {
- npages = mem->memory_size >> PAGE_SHIFT;
- phys = vzalloc(npages * sizeof(unsigned long));
- if (!phys)
- return -ENOMEM;
- kvm->arch.slot_phys[mem->slot] = phys;
- kvm->arch.slot_npages[mem->slot] = npages;
+ physp = memslot->arch.slot_phys;
+ npages = memslot->npages;
+ if (!physp)
+ return;
+ for (j = 0; j < npages; j++) {
+ if (!(physp[j] & KVMPPC_GOT_PAGE))
+ continue;
+ pfn = physp[j] >> PAGE_SHIFT;
+ page = pfn_to_page(pfn);
+ SetPageDirty(page);
+ put_page(page);
}
+}
+
+void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+ if (!dont || free->arch.rmap != dont->arch.rmap) {
+ vfree(free->arch.rmap);
+ free->arch.rmap = NULL;
+ }
+ if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
+ unpin_slot(free);
+ vfree(free->arch.slot_phys);
+ free->arch.slot_phys = NULL;
+ }
+}
+
+int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
+ if (!slot->arch.rmap)
+ return -ENOMEM;
+ slot->arch.slot_phys = NULL;
return 0;
}
-static void unpin_slot(struct kvm *kvm, int slot_id)
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem)
{
- unsigned long *physp;
- unsigned long j, npages, pfn;
- struct page *page;
+ unsigned long *phys;
- physp = kvm->arch.slot_phys[slot_id];
- npages = kvm->arch.slot_npages[slot_id];
- if (physp) {
- spin_lock(&kvm->arch.slot_phys_lock);
- for (j = 0; j < npages; j++) {
- if (!(physp[j] & KVMPPC_GOT_PAGE))
- continue;
- pfn = physp[j] >> PAGE_SHIFT;
- page = pfn_to_page(pfn);
- SetPageDirty(page);
- put_page(page);
- }
- kvm->arch.slot_phys[slot_id] = NULL;
- spin_unlock(&kvm->arch.slot_phys_lock);
- vfree(physp);
+ /* Allocate a slot_phys array if needed */
+ phys = memslot->arch.slot_phys;
+ if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
+ phys = vzalloc(memslot->npages * sizeof(unsigned long));
+ if (!phys)
+ return -ENOMEM;
+ memslot->arch.slot_phys = phys;
}
+
+ return 0;
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old)
{
+ unsigned long npages = mem->memory_size >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ if (npages && old.npages) {
+ /*
+ * If modifying a memslot, reset all the rmap dirty bits.
+ * If this is a new memslot, we don't need to do anything
+ * since the rmap array starts out as all zeroes,
+ * i.e. no pages are dirty.
+ */
+ memslot = id_to_memslot(kvm->memslots, mem->slot);
+ kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
+ }
}
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
@@ -1362,6 +1560,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
unsigned long rmls;
unsigned long *physp;
unsigned long i, npages;
+ int srcu_idx;
mutex_lock(&kvm->lock);
if (kvm->arch.rma_setup_done)
@@ -1377,12 +1576,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
}
/* Look up the memslot for guest physical address 0 */
+ srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, 0);
/* We must have some memory at 0 by now */
err = -EINVAL;
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
- goto out;
+ goto out_srcu;
/* Look up the VMA for the start of this memory slot */
hva = memslot->userspace_addr;
@@ -1406,14 +1606,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
err = -EPERM;
if (cpu_has_feature(CPU_FTR_ARCH_201)) {
pr_err("KVM: CPU requires an RMO\n");
- goto out;
+ goto out_srcu;
}
/* We can handle 4k, 64k or 16M pages in the VRMA */
err = -EINVAL;
if (!(psize == 0x1000 || psize == 0x10000 ||
psize == 0x1000000))
- goto out;
+ goto out_srcu;
/* Update VRMASD field in the LPCR */
senc = slb_pgsize_encoding(psize);
@@ -1436,7 +1636,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
err = -EINVAL;
if (rmls < 0) {
pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
- goto out;
+ goto out_srcu;
}
atomic_inc(&ri->use_count);
kvm->arch.rma = ri;
@@ -1465,17 +1665,24 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* Initialize phys addrs of pages in RMO */
npages = ri->npages;
porder = __ilog2(npages);
- physp = kvm->arch.slot_phys[memslot->id];
- spin_lock(&kvm->arch.slot_phys_lock);
- for (i = 0; i < npages; ++i)
- physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
- spin_unlock(&kvm->arch.slot_phys_lock);
+ physp = memslot->arch.slot_phys;
+ if (physp) {
+ if (npages > memslot->npages)
+ npages = memslot->npages;
+ spin_lock(&kvm->arch.slot_phys_lock);
+ for (i = 0; i < npages; ++i)
+ physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
+ porder;
+ spin_unlock(&kvm->arch.slot_phys_lock);
+ }
}
/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
smp_wmb();
kvm->arch.rma_setup_done = 1;
err = 0;
+ out_srcu:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
out:
mutex_unlock(&kvm->lock);
return err;
@@ -1528,12 +1735,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
void kvmppc_core_destroy_vm(struct kvm *kvm)
{
- unsigned long i;
-
- if (!kvm->arch.using_mmu_notifiers)
- for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
- unpin_slot(kvm, i);
-
if (kvm->arch.rma) {
kvm_release_rma(kvm->arch.rma);
kvm->arch.rma = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fb4eac290fef..ec0a9e5de100 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -157,8 +157,8 @@ static void __init kvm_linear_init_one(ulong size, int count, int type)
linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
for (i = 0; i < count; ++i) {
linear = alloc_bootmem_align(size, size);
- pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
- size >> 20);
+ pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
+ size >> 20);
linear_info[i].base_virt = linear;
linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
linear_info[i].npages = npages;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fb0e821622d4..5e06e3153888 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -81,7 +81,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
ptel = rev->guest_rpte |= rcbits;
gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
- if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ if (!memslot)
return;
rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
@@ -183,7 +183,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
rmap = &memslot->arch.rmap[slot_fn];
if (!kvm->arch.using_mmu_notifiers) {
- physp = kvm->arch.slot_phys[memslot->id];
+ physp = memslot->arch.slot_phys;
if (!physp)
return H_PARAMETER;
physp += slot_fn;
@@ -297,7 +297,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
lock_rmap(rmap);
/* Check for pending invalidations under the rmap chain lock */
if (kvm->arch.using_mmu_notifiers &&
- mmu_notifier_retry(vcpu, mmu_seq)) {
+ mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 41cb0017e757..2c86b0d63714 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -114,11 +114,6 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
hlist_del_init_rcu(&pte->list_vpte);
hlist_del_init_rcu(&pte->list_vpte_long);
- if (pte->pte.may_write)
- kvm_release_pfn_dirty(pte->pfn);
- else
- kvm_release_pfn_clean(pte->pfn);
-
spin_unlock(&vcpu3s->mmu_lock);
vcpu3s->hpte_cache_count--;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 05c28f59f77f..b853696b6d8e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -52,8 +52,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
#define MSR_USER32 MSR_USER
#define MSR_USER64 MSR_USER
#define HW_PAGE_SIZE PAGE_SIZE
-#define __hard_irq_disable local_irq_disable
-#define __hard_irq_enable local_irq_enable
#endif
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -66,7 +64,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
svcpu_put(svcpu);
#endif
-
+ vcpu->cpu = smp_processor_id();
#ifdef CONFIG_PPC_BOOK3S_32
current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
#endif
@@ -86,8 +84,64 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
kvmppc_giveup_ext(vcpu, MSR_FP);
kvmppc_giveup_ext(vcpu, MSR_VEC);
kvmppc_giveup_ext(vcpu, MSR_VSX);
+ vcpu->cpu = -1;
+}
+
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+{
+ int r = 1; /* Indicate we want to get back into the guest */
+
+ /* We misuse TLB_FLUSH to indicate that we want to clear
+ all shadow cache entries */
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+ return r;
+}
+
+/************* MMU Notifiers *************/
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ trace_kvm_unmap_hva(hva);
+
+ /*
+ * Flush all shadow tlb entries everywhere. This is slow, but
+ * we are 100% sure that we catch the to be unmapped page
+ */
+ kvm_flush_remote_tlbs(kvm);
+
+ return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ /* kvm_unmap_hva flushes everything anyways */
+ kvm_unmap_hva(kvm, start);
+
+ return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ /* The page will get remapped properly on its next fault */
+ kvm_unmap_hva(kvm, hva);
}
+/*****************************************/
+
static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
{
ulong smsr = vcpu->arch.shared->msr;
@@ -540,18 +594,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
int r = RESUME_HOST;
+ int s;
vcpu->stat.sum_exits++;
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
- /* We get here with MSR.EE=0, so enable it to be a nice citizen */
- __hard_irq_enable();
+ /* We get here with MSR.EE=1 */
+
+ trace_kvm_exit(exit_nr, vcpu);
+ kvm_guest_exit();
- trace_kvm_book3s_exit(exit_nr, vcpu);
- preempt_enable();
- kvm_resched(vcpu);
switch (exit_nr) {
case BOOK3S_INTERRUPT_INST_STORAGE:
{
@@ -802,7 +856,6 @@ program_interrupt:
}
}
- preempt_disable();
if (!(r & RESUME_HOST)) {
/* To avoid clobbering exit_reason, only check for signals if
* we aren't already exiting to userspace for some other
@@ -814,20 +867,13 @@ program_interrupt:
* and if we really did time things so badly, then we just exit
* again due to a host external interrupt.
*/
- __hard_irq_disable();
- if (signal_pending(current)) {
- __hard_irq_enable();
-#ifdef EXIT_DEBUG
- printk(KERN_EMERG "KVM: Going back to host\n");
-#endif
- vcpu->stat.signal_exits++;
- run->exit_reason = KVM_EXIT_INTR;
- r = -EINTR;
+ local_irq_disable();
+ s = kvmppc_prepare_to_enter(vcpu);
+ if (s <= 0) {
+ local_irq_enable();
+ r = s;
} else {
- /* In case an interrupt came in that was triggered
- * from userspace (like DEC), we need to check what
- * to inject now! */
- kvmppc_core_prepare_to_enter(vcpu);
+ kvmppc_lazy_ee_enable();
}
}
@@ -899,34 +945,59 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
{
- int r = -EINVAL;
+ int r = 0;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_HIOR:
- r = copy_to_user((u64 __user *)(long)reg->addr,
- &to_book3s(vcpu)->hior, sizeof(u64));
+ *val = get_reg_val(id, to_book3s(vcpu)->hior);
+ break;
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
+ long int i = id - KVM_REG_PPC_VSR0;
+
+ if (!cpu_has_feature(CPU_FTR_VSX)) {
+ r = -ENXIO;
+ break;
+ }
+ val->vsxval[0] = vcpu->arch.fpr[i];
+ val->vsxval[1] = vcpu->arch.vsr[i];
break;
+ }
+#endif /* CONFIG_VSX */
default:
+ r = -EINVAL;
break;
}
return r;
}
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
{
- int r = -EINVAL;
+ int r = 0;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_HIOR:
- r = copy_from_user(&to_book3s(vcpu)->hior,
- (u64 __user *)(long)reg->addr, sizeof(u64));
- if (!r)
- to_book3s(vcpu)->hior_explicit = true;
+ to_book3s(vcpu)->hior = set_reg_val(id, *val);
+ to_book3s(vcpu)->hior_explicit = true;
+ break;
+#ifdef CONFIG_VSX
+ case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
+ long int i = id - KVM_REG_PPC_VSR0;
+
+ if (!cpu_has_feature(CPU_FTR_VSX)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.fpr[i] = val->vsxval[0];
+ vcpu->arch.vsr[i] = val->vsxval[1];
break;
+ }
+#endif /* CONFIG_VSX */
default:
+ r = -EINVAL;
break;
}
@@ -1020,8 +1091,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#endif
ulong ext_msr;
- preempt_disable();
-
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -1029,21 +1098,16 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
goto out;
}
- kvmppc_core_prepare_to_enter(vcpu);
-
/*
* Interrupts could be timers for the guest which we have to inject
* again, so let's postpone them until we're in the guest and if we
* really did time things so badly, then we just exit again due to
* a host external interrupt.
*/
- __hard_irq_disable();
-
- /* No need to go into the guest when all we do is going out */
- if (signal_pending(current)) {
- __hard_irq_enable();
- kvm_run->exit_reason = KVM_EXIT_INTR;
- ret = -EINTR;
+ local_irq_disable();
+ ret = kvmppc_prepare_to_enter(vcpu);
+ if (ret <= 0) {
+ local_irq_enable();
goto out;
}
@@ -1080,11 +1144,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
- kvm_guest_enter();
+ kvmppc_lazy_ee_enable();
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
- kvm_guest_exit();
+ /* No need for kvm_guest_exit. It's done in handle_exit.
+ We also get here with interrupts enabled. */
current->thread.regs->msr = ext_msr;
@@ -1113,7 +1178,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#endif
out:
- preempt_enable();
+ vcpu->mode = OUTSIDE_GUEST_MODE;
return ret;
}
@@ -1181,14 +1246,31 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
}
#endif /* CONFIG_PPC64 */
+void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
+
+int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return 0;
+}
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem)
{
return 0;
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old)
+{
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
}
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 9ecf6e35cd8d..b2f8258b545a 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -170,20 +170,21 @@ kvmppc_handler_skip_ins:
* Call kvmppc_handler_trampoline_enter in real mode
*
* On entry, r4 contains the guest shadow MSR
+ * MSR.EE has to be 0 when calling this function
*/
_GLOBAL(kvmppc_entry_trampoline)
mfmsr r5
LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
toreal(r7)
- li r9, MSR_RI
- ori r9, r9, MSR_EE
- andc r9, r5, r9 /* Clear EE and RI in MSR value */
li r6, MSR_IR | MSR_DR
- ori r6, r6, MSR_EE
- andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */
- MTMSR_EERI(r9) /* Clear EE and RI in MSR */
- mtsrr0 r7 /* before we set srr0/1 */
+ andc r6, r5, r6 /* Clear DR and IR in MSR value */
+ /*
+ * Set EE in HOST_MSR so that it's enabled when we get into our
+ * C exit handler function
+ */
+ ori r5, r5, MSR_EE
+ mtsrr0 r7
mtsrr1 r6
RFI
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d25a097c852b..3d1f35dc7862 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -39,6 +39,7 @@
#include "timing.h"
#include "booke.h"
+#include "trace.h"
unsigned long kvmppc_booke_handlers;
@@ -62,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "doorbell", VCPU_STAT(dbell_exits) },
{ "guest doorbell", VCPU_STAT(gdbell_exits) },
+ { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ NULL }
};
@@ -120,6 +122,16 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
}
#endif
+static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
+ /* We always treat the FP bit as enabled from the host
+ perspective, so only need to adjust the shadow MSR */
+ vcpu->arch.shadow_msr &= ~MSR_FP;
+ vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP;
+#endif
+}
+
/*
* Helper function for "full" MSR writes. No need to call this if only
* EE/CE/ME/DE/RI are changing.
@@ -136,11 +148,13 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
kvmppc_mmu_msr_notify(vcpu, old_msr);
kvmppc_vcpu_sync_spe(vcpu);
+ kvmppc_vcpu_sync_fpu(vcpu);
}
static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
unsigned int priority)
{
+ trace_kvm_booke_queue_irqprio(vcpu, priority);
set_bit(priority, &vcpu->arch.pending_exceptions);
}
@@ -206,6 +220,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
}
+static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+ clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
#ifdef CONFIG_KVM_BOOKE_HV
@@ -325,6 +349,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
msr_mask = MSR_CE | MSR_ME | MSR_DE;
int_class = INT_CLASS_NONCRIT;
break;
+ case BOOKE_IRQPRIO_WATCHDOG:
case BOOKE_IRQPRIO_CRITICAL:
case BOOKE_IRQPRIO_DBELL_CRIT:
allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -404,12 +429,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
return allowed;
}
+/*
+ * Return the number of jiffies until the next timeout. If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * because the larger value can break the timer APIs.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+ u64 tb, wdt_tb, wdt_ticks = 0;
+ u64 nr_jiffies = 0;
+ u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+ wdt_tb = 1ULL << (63 - period);
+ tb = get_tb();
+ /*
+ * The watchdog timeout will hapeen when TB bit corresponding
+ * to watchdog will toggle from 0 to 1.
+ */
+ if (tb & wdt_tb)
+ wdt_ticks = wdt_tb;
+
+ wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+ /* Convert timebase ticks to jiffies */
+ nr_jiffies = wdt_ticks;
+
+ if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+ nr_jiffies++;
+
+ return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+ unsigned long nr_jiffies;
+ unsigned long flags;
+
+ /*
+ * If TSR_ENW and TSR_WIS are not set then no need to exit to
+ * userspace, so clear the KVM_REQ_WATCHDOG request.
+ */
+ if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
+ clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+
+ spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
+ nr_jiffies = watchdog_next_timeout(vcpu);
+ /*
+ * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+ * then do not run the watchdog timer as this can break timer APIs.
+ */
+ if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+ mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+ else
+ del_timer(&vcpu->arch.wdt_timer);
+ spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
+}
+
+void kvmppc_watchdog_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+ u32 tsr, new_tsr;
+ int final;
+
+ do {
+ new_tsr = tsr = vcpu->arch.tsr;
+ final = 0;
+
+ /* Time out event */
+ if (tsr & TSR_ENW) {
+ if (tsr & TSR_WIS)
+ final = 1;
+ else
+ new_tsr = tsr | TSR_WIS;
+ } else {
+ new_tsr = tsr | TSR_ENW;
+ }
+ } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+ if (new_tsr & TSR_WIS) {
+ smp_wmb();
+ kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+
+ /*
+ * If this is final watchdog expiry and some action is required
+ * then exit to userspace.
+ */
+ if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
+ vcpu->arch.watchdog_enabled) {
+ smp_wmb();
+ kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
+ kvm_vcpu_kick(vcpu);
+ }
+
+ /*
+ * Stop running the watchdog timer after final expiration to
+ * prevent the host from being flooded with timers if the
+ * guest sets a short period.
+ * Timers will resume when TSR/TCR is updated next time.
+ */
+ if (!final)
+ arm_next_watchdog(vcpu);
+}
+
static void update_timer_ints(struct kvm_vcpu *vcpu)
{
if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
kvmppc_core_queue_dec(vcpu);
else
kvmppc_core_dequeue_dec(vcpu);
+
+ if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+ kvmppc_core_queue_watchdog(vcpu);
+ else
+ kvmppc_core_dequeue_watchdog(vcpu);
}
static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
@@ -417,13 +551,6 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned int priority;
- if (vcpu->requests) {
- if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
- smp_mb();
- update_timer_ints(vcpu);
- }
- }
-
priority = __ffs(*pending);
while (priority < BOOKE_IRQPRIO_MAX) {
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
@@ -459,37 +586,20 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
return r;
}
-/*
- * Common checks before entering the guest world. Call with interrupts
- * disabled.
- *
- * returns !0 if a signal is pending and check_signal is true
- */
-static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
{
- int r = 0;
-
- WARN_ON_ONCE(!irqs_disabled());
- while (true) {
- if (need_resched()) {
- local_irq_enable();
- cond_resched();
- local_irq_disable();
- continue;
- }
-
- if (signal_pending(current)) {
- r = 1;
- break;
- }
+ int r = 1; /* Indicate we want to get back into the guest */
- if (kvmppc_core_prepare_to_enter(vcpu)) {
- /* interrupts got enabled in between, so we
- are back at square 1 */
- continue;
- }
+ if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
+ update_timer_ints(vcpu);
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ kvmppc_core_flush_tlb(vcpu);
+#endif
- break;
+ if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_WATCHDOG;
+ r = 0;
}
return r;
@@ -497,7 +607,7 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
- int ret;
+ int ret, s;
#ifdef CONFIG_PPC_FPU
unsigned int fpscr;
int fpexc_mode;
@@ -510,11 +620,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
}
local_irq_disable();
- if (kvmppc_prepare_to_enter(vcpu)) {
- kvm_run->exit_reason = KVM_EXIT_INTR;
- ret = -EINTR;
+ s = kvmppc_prepare_to_enter(vcpu);
+ if (s <= 0) {
+ local_irq_enable();
+ ret = s;
goto out;
}
+ kvmppc_lazy_ee_enable();
kvm_guest_enter();
@@ -542,6 +654,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+ /* No need for kvm_guest_exit. It's done in handle_exit.
+ We also get here with interrupts enabled. */
+
#ifdef CONFIG_PPC_FPU
kvmppc_save_guest_fp(vcpu);
@@ -557,10 +672,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
current->thread.fpexc_mode = fpexc_mode;
#endif
- kvm_guest_exit();
-
out:
- local_irq_enable();
+ vcpu->mode = OUTSIDE_GUEST_MODE;
return ret;
}
@@ -668,6 +781,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
int r = RESUME_HOST;
+ int s;
/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);
@@ -677,6 +791,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_enable();
+ trace_kvm_exit(exit_nr, vcpu);
+ kvm_guest_exit();
+
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
@@ -971,10 +1088,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
if (!(r & RESUME_HOST)) {
local_irq_disable();
- if (kvmppc_prepare_to_enter(vcpu)) {
- run->exit_reason = KVM_EXIT_INTR;
- r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
- kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+ s = kvmppc_prepare_to_enter(vcpu);
+ if (s <= 0) {
+ local_irq_enable();
+ r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
+ } else {
+ kvmppc_lazy_ee_enable();
}
}
@@ -1011,6 +1130,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return r;
}
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ /* setup watchdog timer once */
+ spin_lock_init(&vcpu->arch.wdt_lock);
+ setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
+ (unsigned long)vcpu);
+
+ return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+ del_timer_sync(&vcpu->arch.wdt_timer);
+}
+
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
@@ -1106,7 +1240,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
}
if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+ u32 old_tsr = vcpu->arch.tsr;
+
vcpu->arch.tsr = sregs->u.e.tsr;
+
+ if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+ arm_next_watchdog(vcpu);
+
update_timer_ints(vcpu);
}
@@ -1221,12 +1361,56 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
- return -EINVAL;
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_IAC1:
+ case KVM_REG_PPC_IAC2:
+ case KVM_REG_PPC_IAC3:
+ case KVM_REG_PPC_IAC4: {
+ int iac = reg->id - KVM_REG_PPC_IAC1;
+ r = copy_to_user((u64 __user *)(long)reg->addr,
+ &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
+ break;
+ }
+ case KVM_REG_PPC_DAC1:
+ case KVM_REG_PPC_DAC2: {
+ int dac = reg->id - KVM_REG_PPC_DAC1;
+ r = copy_to_user((u64 __user *)(long)reg->addr,
+ &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
+ break;
+ }
+ default:
+ break;
+ }
+ return r;
}
int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
- return -EINVAL;
+ int r = -EINVAL;
+
+ switch (reg->id) {
+ case KVM_REG_PPC_IAC1:
+ case KVM_REG_PPC_IAC2:
+ case KVM_REG_PPC_IAC3:
+ case KVM_REG_PPC_IAC4: {
+ int iac = reg->id - KVM_REG_PPC_IAC1;
+ r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
+ (u64 __user *)(long)reg->addr, sizeof(u64));
+ break;
+ }
+ case KVM_REG_PPC_DAC1:
+ case KVM_REG_PPC_DAC2: {
+ int dac = reg->id - KVM_REG_PPC_DAC1;
+ r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
+ (u64 __user *)(long)reg->addr, sizeof(u64));
+ break;
+ }
+ default:
+ break;
+ }
+ return r;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
@@ -1253,20 +1437,38 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
return -ENOTSUPP;
}
+void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
+
+int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return 0;
+}
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem)
{
return 0;
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old)
+{
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
}
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
{
vcpu->arch.tcr = new_tcr;
+ arm_next_watchdog(vcpu);
update_timer_ints(vcpu);
}
@@ -1281,6 +1483,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
{
clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+ /*
+ * We may have stopped the watchdog due to
+ * being stuck on final expiration.
+ */
+ if (tsr_bits & (TSR_ENW | TSR_WIS))
+ arm_next_watchdog(vcpu);
+
update_timer_ints(vcpu);
}
@@ -1298,12 +1508,14 @@ void kvmppc_decrementer_func(unsigned long data)
void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ vcpu->cpu = smp_processor_id();
current->thread.kvm_vcpu = vcpu;
}
void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
{
current->thread.kvm_vcpu = NULL;
+ vcpu->cpu = -1;
}
int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 12834bb608ab..514790f41aba 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -133,10 +133,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
vcpu->arch.csrr1 = spr_val;
break;
case SPRN_DBCR0:
- vcpu->arch.dbcr0 = spr_val;
+ vcpu->arch.dbg_reg.dbcr0 = spr_val;
break;
case SPRN_DBCR1:
- vcpu->arch.dbcr1 = spr_val;
+ vcpu->arch.dbg_reg.dbcr1 = spr_val;
break;
case SPRN_DBSR:
vcpu->arch.dbsr &= ~spr_val;
@@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
kvmppc_clr_tsr_bits(vcpu, spr_val);
break;
case SPRN_TCR:
+ /*
+ * WRC is a 2-bit field that is supposed to preserve its
+ * value once written to non-zero.
+ */
+ if (vcpu->arch.tcr & TCR_WRC_MASK) {
+ spr_val &= ~TCR_WRC_MASK;
+ spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+ }
kvmppc_set_tcr(vcpu, spr_val);
break;
@@ -229,6 +237,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_IVOR15:
vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
break;
+ case SPRN_MCSR:
+ vcpu->arch.mcsr &= ~spr_val;
+ break;
default:
emulated = EMULATE_FAIL;
@@ -258,10 +269,10 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
*spr_val = vcpu->arch.csrr1;
break;
case SPRN_DBCR0:
- *spr_val = vcpu->arch.dbcr0;
+ *spr_val = vcpu->arch.dbg_reg.dbcr0;
break;
case SPRN_DBCR1:
- *spr_val = vcpu->arch.dbcr1;
+ *spr_val = vcpu->arch.dbg_reg.dbcr1;
break;
case SPRN_DBSR:
*spr_val = vcpu->arch.dbsr;
@@ -321,6 +332,9 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_IVOR15:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
break;
+ case SPRN_MCSR:
+ *spr_val = vcpu->arch.mcsr;
+ break;
default:
emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index aa8b81428bf4..d1622864549e 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -27,8 +27,7 @@
#define E500_TLB_NUM 2
#define E500_TLB_VALID 1
-#define E500_TLB_DIRTY 2
-#define E500_TLB_BITMAP 4
+#define E500_TLB_BITMAP 2
struct tlbe_ref {
pfn_t pfn;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index ff38b664195d..c73389477d17 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -304,17 +304,13 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
ref->flags = E500_TLB_VALID;
if (tlbe_is_writable(gtlbe))
- ref->flags |= E500_TLB_DIRTY;
+ kvm_set_pfn_dirty(pfn);
}
static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
{
if (ref->flags & E500_TLB_VALID) {
- if (ref->flags & E500_TLB_DIRTY)
- kvm_release_pfn_dirty(ref->pfn);
- else
- kvm_release_pfn_clean(ref->pfn);
-
+ trace_kvm_booke206_ref_release(ref->pfn, ref->flags);
ref->flags = 0;
}
}
@@ -357,6 +353,13 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
clear_tlb_privs(vcpu_e500);
}
+void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ clear_tlb_refs(vcpu_e500);
+ clear_tlb1_bitmap(vcpu_e500);
+}
+
static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
unsigned int eaddr, int as)
{
@@ -541,6 +544,9 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
/* Clear i-cache for new pages */
kvmppc_mmu_flush_icache(pfn);
+
+ /* Drop refcount on page, so that mmu notifiers can clear it */
+ kvm_release_pfn_clean(pfn);
}
/* XXX only map the one-one case, for now use TLB0 */
@@ -1039,8 +1045,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
sesel = 0; /* unused */
priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
- kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
- &priv->ref, eaddr, &stlbe);
+ /* Only triggers after clear_tlb_refs */
+ if (unlikely(!(priv->ref.flags & E500_TLB_VALID)))
+ kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
+ else
+ kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
+ &priv->ref, eaddr, &stlbe);
break;
case 1: {
@@ -1060,6 +1070,49 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
}
+/************* MMU Notifiers *************/
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ trace_kvm_unmap_hva(hva);
+
+ /*
+ * Flush all shadow tlb entries everywhere. This is slow, but
+ * we are 100% sure that we catch the to be unmapped page
+ */
+ kvm_flush_remote_tlbs(kvm);
+
+ return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ /* kvm_unmap_hva flushes everything anyways */
+ kvm_unmap_hva(kvm, start);
+
+ return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ /* XXX could be more clever ;) */
+ return 0;
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ /* The page will get remapped properly on its next fault */
+ kvm_unmap_hva(kvm, hva);
+}
+
+/*****************************************/
+
static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
{
int i;
@@ -1081,6 +1134,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
}
vcpu_e500->num_shared_tlb_pages = 0;
+
+ kfree(vcpu_e500->shared_tlb_pages);
vcpu_e500->shared_tlb_pages = NULL;
} else {
kfree(vcpu_e500->gtlb_arch);
@@ -1178,21 +1233,27 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
}
virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
- if (!virt)
+ if (!virt) {
+ ret = -ENOMEM;
goto err_put_page;
+ }
privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
GFP_KERNEL);
privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
GFP_KERNEL);
- if (!privs[0] || !privs[1])
- goto err_put_page;
+ if (!privs[0] || !privs[1]) {
+ ret = -ENOMEM;
+ goto err_privs;
+ }
g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
GFP_KERNEL);
- if (!g2h_bitmap)
- goto err_put_page;
+ if (!g2h_bitmap) {
+ ret = -ENOMEM;
+ goto err_privs;
+ }
free_gtlb(vcpu_e500);
@@ -1232,10 +1293,11 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
-err_put_page:
+err_privs:
kfree(privs[0]);
kfree(privs[1]);
+err_put_page:
for (i = 0; i < num_pages; i++)
put_page(pages[i]);
@@ -1332,7 +1394,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
if (!vcpu_e500->gtlb_priv[1])
goto err;
- vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) *
+ vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
vcpu_e500->gtlb_params[1].entries,
GFP_KERNEL);
if (!vcpu_e500->g2h_tlb1_map)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4d213b8b0fb5..deb0d596d815 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -30,6 +30,7 @@
#include <asm/kvm_ppc.h>
#include <asm/tlbflush.h>
#include <asm/cputhreads.h>
+#include <asm/irqflags.h>
#include "timing.h"
#include "../mm/mmu_decl.h"
@@ -38,8 +39,7 @@
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !(v->arch.shared->msr & MSR_WE) ||
- !!(v->arch.pending_exceptions) ||
+ return !!(v->arch.pending_exceptions) ||
v->requests;
}
@@ -48,6 +48,85 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return 1;
}
+#ifndef CONFIG_KVM_BOOK3S_64_HV
+/*
+ * Common checks before entering the guest world. Call with interrupts
+ * disabled.
+ *
+ * returns:
+ *
+ * == 1 if we're ready to go into guest state
+ * <= 0 if we need to go back to the host with return value
+ */
+int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+ int r = 1;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ while (true) {
+ if (need_resched()) {
+ local_irq_enable();
+ cond_resched();
+ local_irq_disable();
+ continue;
+ }
+
+ if (signal_pending(current)) {
+ kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+ vcpu->run->exit_reason = KVM_EXIT_INTR;
+ r = -EINTR;
+ break;
+ }
+
+ vcpu->mode = IN_GUEST_MODE;
+
+ /*
+ * Reading vcpu->requests must happen after setting vcpu->mode,
+ * so we don't miss a request because the requester sees
+ * OUTSIDE_GUEST_MODE and assumes we'll be checking requests
+ * before next entering the guest (and thus doesn't IPI).
+ */
+ smp_mb();
+
+ if (vcpu->requests) {
+ /* Make sure we process requests preemptable */
+ local_irq_enable();
+ trace_kvm_check_requests(vcpu);
+ r = kvmppc_core_check_requests(vcpu);
+ local_irq_disable();
+ if (r > 0)
+ continue;
+ break;
+ }
+
+ if (kvmppc_core_prepare_to_enter(vcpu)) {
+ /* interrupts got enabled in between, so we
+ are back at square 1 */
+ continue;
+ }
+
+#ifdef CONFIG_PPC64
+ /* lazy EE magic */
+ hard_irq_disable();
+ if (lazy_irq_pending()) {
+ /* Got an interrupt in between, try again */
+ local_irq_enable();
+ local_irq_disable();
+ kvm_guest_exit();
+ continue;
+ }
+
+ trace_hardirqs_on();
+#endif
+
+ kvm_guest_enter();
+ break;
+ }
+
+ return r;
+}
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
{
int nr = kvmppc_get_gpr(vcpu, 11);
@@ -67,18 +146,18 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
}
switch (nr) {
- case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
+ case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
{
vcpu->arch.magic_page_pa = param1;
vcpu->arch.magic_page_ea = param2;
r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
- r = HC_EV_SUCCESS;
+ r = EV_SUCCESS;
break;
}
- case HC_VENDOR_KVM | KVM_HC_FEATURES:
- r = HC_EV_SUCCESS;
+ case KVM_HCALL_TOKEN(KVM_HC_FEATURES):
+ r = EV_SUCCESS;
#if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
/* XXX Missing magic page on 44x */
r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
@@ -86,8 +165,13 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
/* Second return value is in r4 */
break;
+ case EV_HCALL_TOKEN(EV_IDLE):
+ r = EV_SUCCESS;
+ kvm_vcpu_block(vcpu);
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ break;
default:
- r = HC_EV_UNIMPLEMENTED;
+ r = EV_UNIMPLEMENTED;
break;
}
@@ -220,6 +304,7 @@ int kvm_dev_ioctl_check_extension(long ext)
switch (ext) {
#ifdef CONFIG_BOOKE
case KVM_CAP_PPC_BOOKE_SREGS:
+ case KVM_CAP_PPC_BOOKE_WATCHDOG:
#else
case KVM_CAP_PPC_SEGSTATE:
case KVM_CAP_PPC_HIOR:
@@ -260,10 +345,16 @@ int kvm_dev_ioctl_check_extension(long ext)
if (cpu_has_feature(CPU_FTR_ARCH_201))
r = 2;
break;
+#endif
case KVM_CAP_SYNC_MMU:
+#ifdef CONFIG_KVM_BOOK3S_64_HV
r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
- break;
+#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ r = 1;
+#else
+ r = 0;
#endif
+ break;
case KVM_CAP_NR_VCPUS:
/*
* Recommending a number of CPUs is somewhat arbitrary; we
@@ -302,19 +393,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
- if (!dont || free->arch.rmap != dont->arch.rmap) {
- vfree(free->arch.rmap);
- free->arch.rmap = NULL;
- }
+ kvmppc_core_free_memslot(free, dont);
}
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
{
- slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
- if (!slot->arch.rmap)
- return -ENOMEM;
-
- return 0;
+ return kvmppc_core_create_memslot(slot, npages);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -323,7 +407,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
- return kvmppc_core_prepare_memory_region(kvm, mem);
+ return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
@@ -331,7 +415,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_memory_slot old,
int user_alloc)
{
- kvmppc_core_commit_memory_region(kvm, mem);
+ kvmppc_core_commit_memory_region(kvm, mem, old);
}
void kvm_arch_flush_shadow_all(struct kvm *kvm)
@@ -341,6 +425,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
+ kvmppc_core_flush_memslot(kvm, slot);
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
@@ -390,6 +475,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
+ int ret;
+
hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
@@ -398,13 +485,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
#ifdef CONFIG_KVM_EXIT_TIMING
mutex_init(&vcpu->arch.exit_timing_lock);
#endif
-
- return 0;
+ ret = kvmppc_subarch_vcpu_init(vcpu);
+ return ret;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_destroy(vcpu);
+ kvmppc_subarch_vcpu_uninit(vcpu);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -420,7 +508,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
#endif
kvmppc_core_vcpu_load(vcpu, cpu);
- vcpu->cpu = smp_processor_id();
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -429,7 +516,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
#ifdef CONFIG_BOOKE
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
#endif
- vcpu->cpu = -1;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -649,6 +735,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
r = 0;
vcpu->arch.papr_enabled = true;
break;
+#ifdef CONFIG_BOOKE
+ case KVM_CAP_PPC_BOOKE_WATCHDOG:
+ r = 0;
+ vcpu->arch.watchdog_enabled = true;
+ break;
+#endif
#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
case KVM_CAP_SW_TLB: {
struct kvm_config_tlb cfg;
@@ -751,9 +843,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
{
+ u32 inst_nop = 0x60000000;
+#ifdef CONFIG_KVM_BOOKE_HV
+ u32 inst_sc1 = 0x44000022;
+ pvinfo->hcall[0] = inst_sc1;
+ pvinfo->hcall[1] = inst_nop;
+ pvinfo->hcall[2] = inst_nop;
+ pvinfo->hcall[3] = inst_nop;
+#else
u32 inst_lis = 0x3c000000;
u32 inst_ori = 0x60000000;
- u32 inst_nop = 0x60000000;
u32 inst_sc = 0x44000002;
u32 inst_imm_mask = 0xffff;
@@ -770,6 +869,9 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
pvinfo->hcall[2] = inst_sc;
pvinfo->hcall[3] = inst_nop;
+#endif
+
+ pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
return 0;
}
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index ddb6a2149d44..e326489a5420 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -31,6 +31,126 @@ TRACE_EVENT(kvm_ppc_instr,
__entry->inst, __entry->pc, __entry->emulate)
);
+#ifdef CONFIG_PPC_BOOK3S
+#define kvm_trace_symbol_exit \
+ {0x100, "SYSTEM_RESET"}, \
+ {0x200, "MACHINE_CHECK"}, \
+ {0x300, "DATA_STORAGE"}, \
+ {0x380, "DATA_SEGMENT"}, \
+ {0x400, "INST_STORAGE"}, \
+ {0x480, "INST_SEGMENT"}, \
+ {0x500, "EXTERNAL"}, \
+ {0x501, "EXTERNAL_LEVEL"}, \
+ {0x502, "EXTERNAL_HV"}, \
+ {0x600, "ALIGNMENT"}, \
+ {0x700, "PROGRAM"}, \
+ {0x800, "FP_UNAVAIL"}, \
+ {0x900, "DECREMENTER"}, \
+ {0x980, "HV_DECREMENTER"}, \
+ {0xc00, "SYSCALL"}, \
+ {0xd00, "TRACE"}, \
+ {0xe00, "H_DATA_STORAGE"}, \
+ {0xe20, "H_INST_STORAGE"}, \
+ {0xe40, "H_EMUL_ASSIST"}, \
+ {0xf00, "PERFMON"}, \
+ {0xf20, "ALTIVEC"}, \
+ {0xf40, "VSX"}
+#else
+#define kvm_trace_symbol_exit \
+ {0, "CRITICAL"}, \
+ {1, "MACHINE_CHECK"}, \
+ {2, "DATA_STORAGE"}, \
+ {3, "INST_STORAGE"}, \
+ {4, "EXTERNAL"}, \
+ {5, "ALIGNMENT"}, \
+ {6, "PROGRAM"}, \
+ {7, "FP_UNAVAIL"}, \
+ {8, "SYSCALL"}, \
+ {9, "AP_UNAVAIL"}, \
+ {10, "DECREMENTER"}, \
+ {11, "FIT"}, \
+ {12, "WATCHDOG"}, \
+ {13, "DTLB_MISS"}, \
+ {14, "ITLB_MISS"}, \
+ {15, "DEBUG"}, \
+ {32, "SPE_UNAVAIL"}, \
+ {33, "SPE_FP_DATA"}, \
+ {34, "SPE_FP_ROUND"}, \
+ {35, "PERFORMANCE_MONITOR"}, \
+ {36, "DOORBELL"}, \
+ {37, "DOORBELL_CRITICAL"}, \
+ {38, "GUEST_DBELL"}, \
+ {39, "GUEST_DBELL_CRIT"}, \
+ {40, "HV_SYSCALL"}, \
+ {41, "HV_PRIV"}
+#endif
+
+TRACE_EVENT(kvm_exit,
+ TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+ TP_ARGS(exit_nr, vcpu),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, exit_nr )
+ __field( unsigned long, pc )
+ __field( unsigned long, msr )
+ __field( unsigned long, dar )
+#ifdef CONFIG_KVM_BOOK3S_PR
+ __field( unsigned long, srr1 )
+#endif
+ __field( unsigned long, last_inst )
+ ),
+
+ TP_fast_assign(
+#ifdef CONFIG_KVM_BOOK3S_PR
+ struct kvmppc_book3s_shadow_vcpu *svcpu;
+#endif
+ __entry->exit_nr = exit_nr;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->dar = kvmppc_get_fault_dar(vcpu);
+ __entry->msr = vcpu->arch.shared->msr;
+#ifdef CONFIG_KVM_BOOK3S_PR
+ svcpu = svcpu_get(vcpu);
+ __entry->srr1 = svcpu->shadow_srr1;
+ svcpu_put(svcpu);
+#endif
+ __entry->last_inst = vcpu->arch.last_inst;
+ ),
+
+ TP_printk("exit=%s"
+ " | pc=0x%lx"
+ " | msr=0x%lx"
+ " | dar=0x%lx"
+#ifdef CONFIG_KVM_BOOK3S_PR
+ " | srr1=0x%lx"
+#endif
+ " | last_inst=0x%lx"
+ ,
+ __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
+ __entry->pc,
+ __entry->msr,
+ __entry->dar,
+#ifdef CONFIG_KVM_BOOK3S_PR
+ __entry->srr1,
+#endif
+ __entry->last_inst
+ )
+);
+
+TRACE_EVENT(kvm_unmap_hva,
+ TP_PROTO(unsigned long hva),
+ TP_ARGS(hva),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, hva )
+ ),
+
+ TP_fast_assign(
+ __entry->hva = hva;
+ ),
+
+ TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
TRACE_EVENT(kvm_stlb_inval,
TP_PROTO(unsigned int stlb_index),
TP_ARGS(stlb_index),
@@ -98,41 +218,31 @@ TRACE_EVENT(kvm_gtlb_write,
__entry->word1, __entry->word2)
);
-
-/*************************************************************************
- * Book3S trace points *
- *************************************************************************/
-
-#ifdef CONFIG_KVM_BOOK3S_PR
-
-TRACE_EVENT(kvm_book3s_exit,
- TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
- TP_ARGS(exit_nr, vcpu),
+TRACE_EVENT(kvm_check_requests,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
TP_STRUCT__entry(
- __field( unsigned int, exit_nr )
- __field( unsigned long, pc )
- __field( unsigned long, msr )
- __field( unsigned long, dar )
- __field( unsigned long, srr1 )
+ __field( __u32, cpu_nr )
+ __field( __u32, requests )
),
TP_fast_assign(
- struct kvmppc_book3s_shadow_vcpu *svcpu;
- __entry->exit_nr = exit_nr;
- __entry->pc = kvmppc_get_pc(vcpu);
- __entry->dar = kvmppc_get_fault_dar(vcpu);
- __entry->msr = vcpu->arch.shared->msr;
- svcpu = svcpu_get(vcpu);
- __entry->srr1 = svcpu->shadow_srr1;
- svcpu_put(svcpu);
+ __entry->cpu_nr = vcpu->vcpu_id;
+ __entry->requests = vcpu->requests;
),
- TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
- __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar,
- __entry->srr1)
+ TP_printk("vcpu=%x requests=%x",
+ __entry->cpu_nr, __entry->requests)
);
+
+/*************************************************************************
+ * Book3S trace points *
+ *************************************************************************/
+
+#ifdef CONFIG_KVM_BOOK3S_PR
+
TRACE_EVENT(kvm_book3s_reenter,
TP_PROTO(int r, struct kvm_vcpu *vcpu),
TP_ARGS(r, vcpu),
@@ -395,6 +505,44 @@ TRACE_EVENT(kvm_booke206_gtlb_write,
__entry->mas2, __entry->mas7_3)
);
+TRACE_EVENT(kvm_booke206_ref_release,
+ TP_PROTO(__u64 pfn, __u32 flags),
+ TP_ARGS(pfn, flags),
+
+ TP_STRUCT__entry(
+ __field( __u64, pfn )
+ __field( __u32, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->pfn = pfn;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("pfn=%llx flags=%x",
+ __entry->pfn, __entry->flags)
+);
+
+TRACE_EVENT(kvm_booke_queue_irqprio,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
+ TP_ARGS(vcpu, priority),
+
+ TP_STRUCT__entry(
+ __field( __u32, cpu_nr )
+ __field( __u32, priority )
+ __field( unsigned long, pending )
+ ),
+
+ TP_fast_assign(
+ __entry->cpu_nr = vcpu->vcpu_id;
+ __entry->priority = priority;
+ __entry->pending = vcpu->arch.pending_exceptions;
+ ),
+
+ TP_printk("vcpu=%x prio=%x pending=%lx",
+ __entry->cpu_nr, __entry->priority, __entry->pending)
+);
+
#endif
#endif /* _TRACE_KVM_H */
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index e7a896acd982..48a920d51489 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -90,6 +90,7 @@ config MPIC
config PPC_EPAPR_HV_PIC
bool
default n
+ select EPAPR_PARAVIRT
config MPIC_WEIRD
bool
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 51ffafae561e..63c5f04ea580 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -236,7 +236,6 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
u32 intr_index;
u32 have_shift = 0;
struct fsl_msi_cascade_data *cascade_data;
- unsigned int ret;
cascade_data = irq_get_handler_data(irq);
msi_data = cascade_data->msi_data;
@@ -268,7 +267,9 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
case FSL_PIC_IP_IPIC:
msir_value = fsl_msi_read(msi_data->msi_regs, msir_index * 0x4);
break;
- case FSL_PIC_IP_VMPIC:
+#ifdef CONFIG_EPAPR_PARAVIRT
+ case FSL_PIC_IP_VMPIC: {
+ unsigned int ret;
ret = fh_vmpic_get_msir(virq_to_hw(irq), &msir_value);
if (ret) {
pr_err("fsl-msi: fh_vmpic_get_msir() failed for "
@@ -277,6 +278,8 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
}
break;
}
+#endif
+ }
while (msir_value) {
intr_index = ffs(msir_value) - 1;
@@ -508,10 +511,12 @@ static const struct of_device_id fsl_of_msi_ids[] = {
.compatible = "fsl,ipic-msi",
.data = &ipic_msi_feature,
},
+#ifdef CONFIG_EPAPR_PARAVIRT
{
.compatible = "fsl,vmpic-msi",
.data = &vmpic_msi_feature,
},
+#endif
{}
};
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index c449dbd1c938..97118dc3d285 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -253,6 +253,7 @@ struct platform_diu_data_ops diu_ops;
EXPORT_SYMBOL(diu_ops);
#endif
+#ifdef CONFIG_EPAPR_PARAVIRT
/*
* Restart the current partition
*
@@ -278,3 +279,4 @@ void fsl_hv_halt(void)
pr_info("hv exit\n");
fh_partition_stop(-1);
}
+#endif
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index ff1e2f8ef94a..c30615e605ac 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -629,10 +629,27 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
break;
case KVM_S390_SIGP_STOP:
case KVM_S390_RESTART:
+ VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
+ inti->type = s390int->type;
+ break;
case KVM_S390_INT_EXTERNAL_CALL:
+ if (s390int->parm & 0xffff0000) {
+ kfree(inti);
+ return -EINVAL;
+ }
+ VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+ s390int->parm);
+ inti->type = s390int->type;
+ inti->extcall.code = s390int->parm;
+ break;
case KVM_S390_INT_EMERGENCY:
- VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
+ if (s390int->parm & 0xffff0000) {
+ kfree(inti);
+ return -EINVAL;
+ }
+ VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm);
inti->type = s390int->type;
+ inti->emerg.code = s390int->parm;
break;
case KVM_S390_INT_VIRTIO:
case KVM_S390_INT_SERVICE:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ecced9d18986..38883f0bf27e 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -997,7 +997,7 @@ static int __init kvm_s390_init(void)
}
memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
facilities[0] &= 0xff00fff3f47c0000ULL;
- facilities[1] &= 0x201c000000000000ULL;
+ facilities[1] &= 0x001c000000000000ULL;
return 0;
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6f85fe0bf958..aabb1289ff04 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2505,6 +2505,14 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
mmu_free_roots(vcpu);
}
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+ int bit7;
+
+ bit7 = (gpte >> 7) & 1;
+ return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
+}
+
static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
bool no_dirty_log)
{
@@ -2517,6 +2525,26 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
return gfn_to_pfn_memslot_atomic(slot, gfn);
}
+static bool prefetch_invalid_gpte(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *sp, u64 *spte,
+ u64 gpte)
+{
+ if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+ goto no_present;
+
+ if (!is_present_gpte(gpte))
+ goto no_present;
+
+ if (!(gpte & PT_ACCESSED_MASK))
+ goto no_present;
+
+ return false;
+
+no_present:
+ drop_spte(vcpu->kvm, spte);
+ return true;
+}
+
static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp,
u64 *start, u64 *end)
@@ -2699,11 +2727,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
}
}
-static bool mmu_invalid_pfn(pfn_t pfn)
-{
- return unlikely(is_invalid_pfn(pfn));
-}
-
static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
pfn_t pfn, unsigned access, int *ret_val)
{
@@ -2862,7 +2885,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
return r;
spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu, mmu_seq))
+ if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
if (likely(!force_pt_level))
@@ -3331,7 +3354,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
return r;
spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu, mmu_seq))
+ if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
if (likely(!force_pt_level))
@@ -3399,14 +3422,6 @@ static void paging_free(struct kvm_vcpu *vcpu)
nonpaging_free(vcpu);
}
-static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
-{
- int bit7;
-
- bit7 = (gpte >> 7) & 1;
- return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
-}
-
static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
{
unsigned mask;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 714e2c01a6fe..d17decaf1db9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -305,51 +305,43 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
addr, access);
}
-static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp, u64 *spte,
- pt_element_t gpte)
+static bool
+FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ u64 *spte, pt_element_t gpte, bool no_dirty_log)
{
- if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
- goto no_present;
-
- if (!is_present_gpte(gpte))
- goto no_present;
-
- if (!(gpte & PT_ACCESSED_MASK))
- goto no_present;
-
- return false;
-
-no_present:
- drop_spte(vcpu->kvm, spte);
- return true;
-}
-
-static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- u64 *spte, const void *pte)
-{
- pt_element_t gpte;
unsigned pte_access;
+ gfn_t gfn;
pfn_t pfn;
- gpte = *(const pt_element_t *)pte;
- if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
- return;
+ if (prefetch_invalid_gpte(vcpu, sp, spte, gpte))
+ return false;
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
+
+ gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access & gpte_access(vcpu, gpte);
protect_clean_gpte(&pte_access, gpte);
- pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
- if (mmu_invalid_pfn(pfn))
- return;
+ pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
+ no_dirty_log && (pte_access & ACC_WRITE_MASK));
+ if (is_invalid_pfn(pfn))
+ return false;
/*
- * we call mmu_set_spte() with host_writable = true because that
- * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
+ * we call mmu_set_spte() with host_writable = true because
+ * pte_prefetch_gfn_to_pfn always gets a writable pfn.
*/
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
- NULL, PT_PAGE_TABLE_LEVEL,
- gpte_to_gfn(gpte), pfn, true, true);
+ NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true);
+
+ return true;
+}
+
+static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ u64 *spte, const void *pte)
+{
+ pt_element_t gpte = *(const pt_element_t *)pte;
+
+ FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
}
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
@@ -395,53 +387,34 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
spte = sp->spt + i;
for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
- pt_element_t gpte;
- unsigned pte_access;
- gfn_t gfn;
- pfn_t pfn;
-
if (spte == sptep)
continue;
if (is_shadow_present_pte(*spte))
continue;
- gpte = gptep[i];
-
- if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
- continue;
-
- pte_access = sp->role.access & gpte_access(vcpu, gpte);
- protect_clean_gpte(&pte_access, gpte);
- gfn = gpte_to_gfn(gpte);
- pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
- pte_access & ACC_WRITE_MASK);
- if (mmu_invalid_pfn(pfn))
+ if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
break;
-
- mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
- NULL, PT_PAGE_TABLE_LEVEL, gfn,
- pfn, true, true);
}
}
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
+ * If the guest tries to write a write-protected page, we need to
+ * emulate this operation, return 1 to indicate this case.
*/
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw,
int user_fault, int write_fault, int hlevel,
- int *emulate, pfn_t pfn, bool map_writable,
- bool prefault)
+ pfn_t pfn, bool map_writable, bool prefault)
{
- unsigned access = gw->pt_access;
struct kvm_mmu_page *sp = NULL;
- int top_level;
- unsigned direct_access;
struct kvm_shadow_walk_iterator it;
+ unsigned direct_access, access = gw->pt_access;
+ int top_level, emulate = 0;
if (!is_present_gpte(gw->ptes[gw->level - 1]))
- return NULL;
+ return 0;
direct_access = gw->pte_access;
@@ -505,17 +478,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
clear_sp_write_flooding_count(it.sptep);
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
- user_fault, write_fault, emulate, it.level,
+ user_fault, write_fault, &emulate, it.level,
gw->gfn, pfn, prefault, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
- return it.sptep;
+ return emulate;
out_gpte_changed:
if (sp)
kvm_mmu_put_page(sp, it.sptep);
kvm_release_pfn_clean(pfn);
- return NULL;
+ return 0;
}
/*
@@ -538,8 +511,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
int write_fault = error_code & PFERR_WRITE_MASK;
int user_fault = error_code & PFERR_USER_MASK;
struct guest_walker walker;
- u64 *sptep;
- int emulate = 0;
int r;
pfn_t pfn;
int level = PT_PAGE_TABLE_LEVEL;
@@ -594,24 +565,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
return r;
spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu, mmu_seq))
+ if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
kvm_mmu_free_some_pages(vcpu);
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
- sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
- level, &emulate, pfn, map_writable, prefault);
- (void)sptep;
- pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
- sptep, *sptep, emulate);
-
+ r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
+ level, pfn, map_writable, prefault);
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
spin_unlock(&vcpu->kvm->mmu_lock);
- return emulate;
+ return r;
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
@@ -757,7 +724,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
sizeof(pt_element_t)))
return -EINVAL;
- if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
+ if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) {
vcpu->kvm->tlbs_dirty++;
continue;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d017df3899ef..e93908fabf46 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -630,15 +630,12 @@ static int svm_hardware_enable(void *garbage)
return -EBUSY;
if (!has_svm()) {
- printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
- me);
+ pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
return -EINVAL;
}
sd = per_cpu(svm_data, me);
-
if (!sd) {
- printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
- me);
+ pr_err("%s: svm_data is NULL on %d\n", __func__, me);
return -EINVAL;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ad6b1dd06f8b..6599e45f73f6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4287,16 +4287,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (is_machine_check(intr_info))
return handle_machine_check(vcpu);
- if ((vect_info & VECTORING_INFO_VALID_MASK) &&
- !is_page_fault(intr_info)) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
- vcpu->run->internal.ndata = 2;
- vcpu->run->internal.data[0] = vect_info;
- vcpu->run->internal.data[1] = intr_info;
- return 0;
- }
-
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
return 1; /* already handled by vmx_vcpu_run() */
@@ -4315,6 +4305,22 @@ static int handle_exception(struct kvm_vcpu *vcpu)
error_code = 0;
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+
+ /*
+ * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
+ * MMIO, it is better to report an internal error.
+ * See the comments in vmx_handle_exit.
+ */
+ if ((vect_info & VECTORING_INFO_VALID_MASK) &&
+ !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
+ vcpu->run->internal.ndata = 2;
+ vcpu->run->internal.data[0] = vect_info;
+ vcpu->run->internal.data[1] = intr_info;
+ return 0;
+ }
+
if (is_page_fault(intr_info)) {
/* EPT won't cause page fault directly */
BUG_ON(enable_ept);
@@ -5979,13 +5985,24 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
+ /*
+ * Note:
+ * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
+ * delivery event since it indicates guest is accessing MMIO.
+ * The vm-exit can be triggered again after return to guest that
+ * will cause infinite loop.
+ */
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
exit_reason != EXIT_REASON_EPT_VIOLATION &&
- exit_reason != EXIT_REASON_TASK_SWITCH))
- printk(KERN_WARNING "%s: unexpected, valid vectoring info "
- "(0x%x) and exit reason is 0x%x\n",
- __func__, vectoring_info, exit_reason);
+ exit_reason != EXIT_REASON_TASK_SWITCH)) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
+ vcpu->run->internal.ndata = 2;
+ vcpu->run->internal.data[0] = vectoring_info;
+ vcpu->run->internal.data[1] = exit_reason;
+ return 0;
+ }
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
!(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1eefebe5d727..6e5f069bee30 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -158,7 +158,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
u64 __read_mostly host_xcr0;
-int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
+static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
+
+static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
{
@@ -633,7 +635,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
}
if (is_long_mode(vcpu)) {
- if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
return 1;
} else
@@ -5066,7 +5068,7 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
-int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
+static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
char instruction[3];
@@ -5426,7 +5428,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
kvm_lapic_reset(vcpu);
- r = kvm_arch_vcpu_reset(vcpu);
+ r = kvm_vcpu_reset(vcpu);
if (r)
return r;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -6036,7 +6038,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
r = vcpu_load(vcpu);
if (r)
return r;
- r = kvm_arch_vcpu_reset(vcpu);
+ r = kvm_vcpu_reset(vcpu);
if (r == 0)
r = kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
@@ -6058,7 +6060,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_x86_ops->vcpu_free(vcpu);
}
-int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
{
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;