From 7103f60de8bed21a0ad5d15d2ad5b7a333dda201 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 19 Aug 2014 16:45:56 +0200
Subject: KVM: avoid unnecessary synchronize_rcu

We dont have to wait for a grace period if there is no oldpid that
we are going to free. putpid also checks for NULL, so this patch
only fences synchronize_rcu.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 33712fb26eb1..39b16035386f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 		struct pid *oldpid = vcpu->pid;
 		struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
 		rcu_assign_pointer(vcpu->pid, newpid);
-		synchronize_rcu();
+		if (oldpid)
+			synchronize_rcu();
 		put_pid(oldpid);
 	}
 	cpu = get_cpu();
-- 
cgit v1.2.3


From e790d9ef6405633b007339d746b709aed43a928d Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Thu, 21 Aug 2014 18:08:05 +0200
Subject: KVM: add kvm_arch_sched_in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce preempt notifiers for architecture specific code.
Advantage over creating a new notifier in every arch is slightly simpler
code and guaranteed call order with respect to kvm_sched_in.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/kvm/arm.c         | 4 ++++
 arch/mips/kvm/mips.c       | 4 ++++
 arch/powerpc/kvm/powerpc.c | 4 ++++
 arch/s390/kvm/kvm-s390.c   | 4 ++++
 arch/x86/kvm/x86.c         | 4 ++++
 include/linux/kvm_host.h   | 2 ++
 virt/kvm/kvm_main.c        | 2 ++
 7 files changed, 24 insertions(+)

(limited to 'virt/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a99e0cdf8ba2..9f788ebac55b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index cd7114147ae7..2362df2a79f9 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1002,6 +1002,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 				  struct kvm_translation *tr)
 {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c79284b58be..cbc432f4f0a6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -720,6 +720,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_BOOKE
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ce81eb2ab76a..a3c324ec4370 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -555,6 +555,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	/* Nothing todo */
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd718c01cdf1..7d43dc7bb906 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7171,6 +7171,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	if (type)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a4c33b34fe3f..ebd723676633 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -624,6 +624,8 @@ void kvm_arch_exit(void);
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 39b16035386f..5a0817ee996e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3124,6 +3124,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 	if (vcpu->preempted)
 		vcpu->preempted = false;
 
+	kvm_arch_sched_in(vcpu, cpu);
+
 	kvm_arch_vcpu_load(vcpu, cpu);
 }
 
-- 
cgit v1.2.3


From 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Aug 2014 12:15:00 +0200
Subject: KVM: Introduce gfn_to_hva_memslot_prot

To support read-only memory regions on arm and arm64, we have a need to
resolve a gfn to an hva given a pointer to a memslot to avoid looping
through the memslots twice and to reuse the hva error checking of
gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and
refactor gfn_to_hva_prot() to use this function.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ebd723676633..6d8a658ec174 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -528,6 +528,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
+				      bool *writable);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5a0817ee996e..76c92a7249c4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1076,9 +1076,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
  * If writable is set to false, the hva returned by this function is only
  * allowed to be read.
  */
-unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+				      gfn_t gfn, bool *writable)
 {
-	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 	unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
 
 	if (!kvm_is_error_hva(hva) && writable)
@@ -1087,6 +1087,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 	return hva;
 }
 
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+{
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+	return gfn_to_hva_memslot_prot(slot, gfn, writable);
+}
+
 static int kvm_read_hva(void *data, void __user *hva, int len)
 {
 	return __copy_from_user(data, hva, len);
-- 
cgit v1.2.3


From 1fa451bcc67fa921a04c5fac8dbcde7844d54512 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:24 +0100
Subject: KVM: vgic: return int instead of bool when checking I/O ranges

vgic_ioaddr_overlap claims to return a bool, but in reality it returns
an int. Shut sparse up by fixing the type signature.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 73eba793b17f..d1cfe672b9d7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1690,7 +1690,7 @@ out:
 	return ret;
 }
 
-static bool vgic_ioaddr_overlap(struct kvm *kvm)
+static int vgic_ioaddr_overlap(struct kvm *kvm)
 {
 	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
 	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
-- 
cgit v1.2.3


From de56fb1923ca11f428bf557870e0faa99f38762e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:25 +0100
Subject: KVM: vgic: declare probe function pointer as const

We extract the vgic probe function from the of_device_id data pointer,
which is const. Kill the sparse warning by ensuring that the local
function pointer is also marked as const.

Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index d1cfe672b9d7..efe6eee2e7eb 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1557,8 +1557,8 @@ static const struct of_device_id vgic_ids[] = {
 int kvm_vgic_hyp_init(void)
 {
 	const struct of_device_id *matched_id;
-	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
-			  const struct vgic_params **);
+	const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+				const struct vgic_params **);
 	struct device_node *vgic_node;
 	int ret;
 
-- 
cgit v1.2.3


From 0f8a4de3e088797576ac76200b634b802e5c7781 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 26 Aug 2014 14:00:37 +0200
Subject: KVM: Unconditionally export KVM_CAP_READONLY_MEM

The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that
userspace can, at run-time, determine if a feature is supported or not.
This allows KVM to being supporting a new feature with a new kernel
version without any need to update user space.  Unfortunately, since the
definition of KVM_CAP_READONLY_MEM was guarded by #ifdef
__KVM_HAVE_READONLY_MEM, such discovery still required a user space
update.

Therefore, unconditionally export KVM_CAP_READONLY_MEM and change the
in-kernel conditional to rely on __KVM_HAVE_READONLY_MEM.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h | 2 --
 virt/kvm/kvm_main.c      | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cf3a2ff440e4..90d3edab839c 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -738,9 +738,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
-#ifdef __KVM_HAVE_READONLY_MEM
 #define KVM_CAP_READONLY_MEM 81
-#endif
 #define KVM_CAP_IRQFD_RESAMPLE 82
 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83
 #define KVM_CAP_PPC_HTAB_FD 84
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5a0817ee996e..1d03967def40 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -708,7 +708,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 {
 	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
 
-#ifdef KVM_CAP_READONLY_MEM
+#ifdef __KVM_HAVE_READONLY_MEM
 	valid_flags |= KVM_MEM_READONLY;
 #endif
 
-- 
cgit v1.2.3


From 13a34e067eab24fec882e1834fbf2cc31911d474 Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Thu, 28 Aug 2014 15:13:03 +0200
Subject: KVM: remove garbage arg to *hardware_{en,dis}able
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the beggining was on_each_cpu(), which required an unused argument to
kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten.

Remove unnecessary arguments that stem from this.

Signed-off-by: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  2 +-
 arch/arm/kvm/arm.c                  |  2 +-
 arch/arm64/include/asm/kvm_host.h   |  2 +-
 arch/ia64/kvm/kvm-ia64.c            |  4 ++--
 arch/mips/include/asm/kvm_host.h    |  2 +-
 arch/mips/kvm/mips.c                |  2 +-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/kvm/powerpc.c          |  2 +-
 arch/s390/include/asm/kvm_host.h    |  2 +-
 arch/s390/kvm/kvm-s390.c            |  2 +-
 arch/x86/include/asm/kvm_host.h     |  4 ++--
 arch/x86/kvm/svm.c                  |  4 ++--
 arch/x86/kvm/vmx.c                  |  4 ++--
 arch/x86/kvm/x86.c                  | 12 ++++++------
 include/linux/kvm_host.h            |  4 ++--
 virt/kvm/kvm_main.c                 |  4 ++--
 16 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'virt/kvm')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index aea259e9431f..032a8538318a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 132bb0d9c5ad..005a7b5fd0aa 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -87,7 +87,7 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index b5045e3e05f8..be9970a59497 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 	}
 }
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5e14dcaf844e..ec6b9acb6bea 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	long  status;
 	long  tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
 
 	long status;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 0b24d6622ec1..f2c249796ea8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -762,7 +762,7 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
 extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 0ec7490d70bd..e3b21e51ff7e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -77,7 +77,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 237cc0cc80a2..604000882352 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -682,7 +682,7 @@ struct kvm_vcpu_arch {
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 72c3fc085207..da505237a664 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -384,7 +384,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 }
 EXPORT_SYMBOL_GPL(kvmppc_ld);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index f6dd90684b97..a76a124dff48 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -452,7 +452,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_check_processor_compat(void *rtn) {}
 static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b8fe1ae777db..628e992eeded 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -100,7 +100,7 @@ int test_vfacility(unsigned long nr)
 }
 
 /* Section: not file related */
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	/* every s390 is virtualization enabled ;-) */
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 567fface45f8..73e4149eda33 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -661,8 +661,8 @@ struct msr_data {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	int (*hardware_enable)(void *dummy);
-	void (*hardware_disable)(void *dummy);
+	int (*hardware_enable)(void);
+	void (*hardware_disable)(void);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7cd230e55118..8ef704037370 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -622,7 +622,7 @@ static int has_svm(void)
 	return 1;
 }
 
-static void svm_hardware_disable(void *garbage)
+static void svm_hardware_disable(void)
 {
 	/* Make sure we clean up behind us */
 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage)
 	amd_pmu_disable_virt();
 }
 
-static int svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void)
 {
 
 	struct svm_cpu_data *sd;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d70550d0bcff..671ca5edc709 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2728,7 +2728,7 @@ static void kvm_cpu_vmxon(u64 addr)
 			: "memory", "cc");
 }
 
-static int hardware_enable(void *garbage)
+static int hardware_enable(void)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2792,7 +2792,7 @@ static void kvm_cpu_vmxoff(void)
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 }
 
-static void hardware_disable(void *garbage)
+static void hardware_disable(void)
 {
 	if (vmm_exclusive) {
 		vmclear_local_loaded_vmcss();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c10408ef9ab1..a375dfc42f6a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 }
 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
 
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
 {
 	unsigned int cpu = smp_processor_id();
 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -6959,7 +6959,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
 	kvm_rip_write(vcpu, 0);
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
@@ -6970,7 +6970,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	bool stable, backwards_tsc = false;
 
 	kvm_shared_msr_cpu_online();
-	ret = kvm_x86_ops->hardware_enable(garbage);
+	ret = kvm_x86_ops->hardware_enable();
 	if (ret != 0)
 		return ret;
 
@@ -7051,10 +7051,10 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
-	kvm_x86_ops->hardware_disable(garbage);
-	drop_user_return_notifiers(garbage);
+	kvm_x86_ops->hardware_disable();
+	drop_user_return_notifiers();
 }
 
 int kvm_arch_hardware_setup(void)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e1cb915a1096..e098dce179df 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -630,8 +630,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
-int kvm_arch_hardware_enable(void *garbage);
-void kvm_arch_hardware_disable(void *garbage);
+int kvm_arch_hardware_enable(void);
+void kvm_arch_hardware_disable(void);
 int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1d03967def40..7176929a4cda 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2669,7 +2669,7 @@ static void hardware_enable_nolock(void *junk)
 
 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
 
-	r = kvm_arch_hardware_enable(NULL);
+	r = kvm_arch_hardware_enable();
 
 	if (r) {
 		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2694,7 +2694,7 @@ static void hardware_disable_nolock(void *junk)
 	if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
 	cpumask_clear_cpu(cpu, cpus_hardware_enabled);
-	kvm_arch_hardware_disable(NULL);
+	kvm_arch_hardware_disable();
 }
 
 static void hardware_disable(void)
-- 
cgit v1.2.3


From 00f034a12fdd81210d58116326d92780aac5c238 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 20 Aug 2014 14:29:21 +0200
Subject: KVM: do not bias the generation number in kvm_current_mmio_generation

The next patch will give a meaning (a la seqcount) to the low bit of the
generation number.  Ensure that it matches between kvm->memslots->generation
and kvm_current_mmio_generation().

Cc: stable@vger.kernel.org
Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c  | 7 +------
 virt/kvm/kvm_main.c | 7 +++++++
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'virt/kvm')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 931467881da7..323c3f5f5c84 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -236,12 +236,7 @@ static unsigned int get_mmio_spte_generation(u64 spte)
 
 static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
 {
-	/*
-	 * Init kvm generation close to MMIO_MAX_GEN to easily test the
-	 * code of handling generation number wrap-around.
-	 */
-	return (kvm_memslots(kvm)->generation +
-		      MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
+	return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
 }
 
 static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7176929a4cda..0bfdb673db26 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -477,6 +477,13 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
 		goto out_err_no_srcu;
+
+	/*
+	 * Init kvm generation close to the maximum to easily test the
+	 * code of handling generation number wrap-around.
+	 */
+	kvm->memslots->generation = -150;
+
 	kvm_init_memslots_id(kvm);
 	if (init_srcu_struct(&kvm->srcu))
 		goto out_err_no_srcu;
-- 
cgit v1.2.3


From ee3d1570b58677885b4552bce8217fda7b226a68 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Mon, 18 Aug 2014 15:46:06 -0700
Subject: kvm: fix potentially corrupt mmio cache

vcpu exits and memslot mutations can run concurrently as long as the
vcpu does not aquire the slots mutex. Thus it is theoretically possible
for memslots to change underneath a vcpu that is handling an exit.

If we increment the memslot generation number again after
synchronize_srcu_expedited(), vcpus can safely cache memslot generation
without maintaining a single rcu_dereference through an entire vm exit.
And much of the x86/kvm code does not maintain a single rcu_dereference
of the current memslots during each exit.

We can prevent the following case:

   vcpu (CPU 0)                             | thread (CPU 1)
--------------------------------------------+--------------------------
1  vm exit                                  |
2  srcu_read_unlock(&kvm->srcu)             |
3  decide to cache something based on       |
     old memslots                           |
4                                           | change memslots
                                            | (increments generation)
5                                           | synchronize_srcu(&kvm->srcu);
6  retrieve generation # from new memslots  |
7  tag cache with new memslot generation    |
8  srcu_read_unlock(&kvm->srcu)             |
...                                         |
   <action based on cache occurs even       |
    though the caching decision was based   |
    on the old memslots>                    |
...                                         |
   <action *continues* to occur until next  |
    memslot generation change, which may    |
    be never>                               |
                                            |

By incrementing the generation after synchronizing with kvm->srcu readers,
we ensure that the generation retrieved in (6) will become invalid soon
after (8).

Keeping the existing increment is not strictly necessary, but we
do keep it and just move it for consistency from update_memslots to
install_new_memslots.  It invalidates old cached MMIOs immediately,
instead of having to wait for the end of synchronize_srcu_expedited,
which makes the code more clearly correct in case CPU 1 is preempted
right after synchronize_srcu() returns.

To avoid halving the generation space in SPTEs, always presume that the
low bit of the generation is zero when reconstructing a generation number
out of an SPTE.  This effectively disables MMIO caching in SPTEs during
the call to synchronize_srcu_expedited.  Using the low bit this way is
somewhat like a seqcount---where the protected thing is a cache, and
instead of retrying we can simply punt if we observe the low bit to be 1.

Cc: stable@vger.kernel.org
Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/mmu.txt | 14 ++++++++++++++
 arch/x86/kvm/mmu.c                | 20 ++++++++++++--------
 virt/kvm/kvm_main.c               | 23 ++++++++++++++++-------
 3 files changed, 42 insertions(+), 15 deletions(-)

(limited to 'virt/kvm')

diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index 290894176142..53838d9c6295 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -425,6 +425,20 @@ fault through the slow path.
 Since only 19 bits are used to store generation-number on mmio spte, all
 pages are zapped when there is an overflow.
 
+Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
+times, the last one happening when the generation number is retrieved and
+stored into the MMIO spte.  Thus, the MMIO spte might be created based on
+out-of-date information, but with an up-to-date generation number.
+
+To avoid this, the generation number is incremented again after synchronize_srcu
+returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a
+memslot update, while some SRCU readers might be using the old copy.  We do not
+want to use an MMIO sptes created with an odd generation number, and we can do
+this without losing a bit in the MMIO spte.  The low bit of the generation
+is not stored in MMIO spte, and presumed zero when it is extracted out of the
+spte.  If KVM is unlucky and creates an MMIO spte while the low bit is 1,
+the next access to the spte will always be a cache miss.
+
 
 Further reading
 ===============
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 323c3f5f5c84..96515957ba82 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -199,16 +199,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 
 /*
- * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number,
- * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation
- * number.
+ * the low bit of the generation number is always presumed to be zero.
+ * This disables mmio caching during memslot updates.  The concept is
+ * similar to a seqcount but instead of retrying the access we just punt
+ * and ignore the cache.
+ *
+ * spte bits 3-11 are used as bits 1-9 of the generation number,
+ * the bits 52-61 are used as bits 10-19 of the generation number.
  */
-#define MMIO_SPTE_GEN_LOW_SHIFT		3
+#define MMIO_SPTE_GEN_LOW_SHIFT		2
 #define MMIO_SPTE_GEN_HIGH_SHIFT	52
 
-#define MMIO_GEN_SHIFT			19
-#define MMIO_GEN_LOW_SHIFT		9
-#define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 1)
+#define MMIO_GEN_SHIFT			20
+#define MMIO_GEN_LOW_SHIFT		10
+#define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 2)
 #define MMIO_GEN_MASK			((1 << MMIO_GEN_SHIFT) - 1)
 #define MMIO_MAX_GEN			((1 << MMIO_GEN_SHIFT) - 1)
 
@@ -4428,7 +4432,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
 	 * The very rare case: if the generation-number is round,
 	 * zap all shadow pages.
 	 */
-	if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) {
+	if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
 		printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
 		kvm_mmu_invalidate_zap_all_pages(kvm);
 	}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0bfdb673db26..bb8641b5d83b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,8 +95,6 @@ static int hardware_enable_all(void);
 static void hardware_disable_all(void);
 
 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-static void update_memslots(struct kvm_memslots *slots,
-			    struct kvm_memory_slot *new, u64 last_generation);
 
 static void kvm_release_pfn_dirty(pfn_t pfn);
 static void mark_page_dirty_in_slot(struct kvm *kvm,
@@ -695,8 +693,7 @@ static void sort_memslots(struct kvm_memslots *slots)
 }
 
 static void update_memslots(struct kvm_memslots *slots,
-			    struct kvm_memory_slot *new,
-			    u64 last_generation)
+			    struct kvm_memory_slot *new)
 {
 	if (new) {
 		int id = new->id;
@@ -707,8 +704,6 @@ static void update_memslots(struct kvm_memslots *slots,
 		if (new->npages != npages)
 			sort_memslots(slots);
 	}
-
-	slots->generation = last_generation + 1;
 }
 
 static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
@@ -730,10 +725,24 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 {
 	struct kvm_memslots *old_memslots = kvm->memslots;
 
-	update_memslots(slots, new, kvm->memslots->generation);
+	/*
+	 * Set the low bit in the generation, which disables SPTE caching
+	 * until the end of synchronize_srcu_expedited.
+	 */
+	WARN_ON(old_memslots->generation & 1);
+	slots->generation = old_memslots->generation + 1;
+
+	update_memslots(slots, new);
 	rcu_assign_pointer(kvm->memslots, slots);
 	synchronize_srcu_expedited(&kvm->srcu);
 
+	/*
+	 * Increment the new memslot generation a second time. This prevents
+	 * vm exits that race with memslot updates from caching a memslot
+	 * generation that will (potentially) be valid forever.
+	 */
+	slots->generation++;
+
 	kvm_arch_memslots_updated(kvm);
 
 	return old_memslots;
-- 
cgit v1.2.3


From 34656113182b704682e23d1363417536addfec97 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 4 Sep 2014 21:13:31 +0200
Subject: KVM: remove redundant check of in_spin_loop

The expression `vcpu->spin_loop.in_spin_loop' is always true,
because it is evaluated only when the condition
`!vcpu->spin_loop.in_spin_loop' is false.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bb8641b5d83b..cc7bd286d135 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1785,8 +1785,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 	bool eligible;
 
 	eligible = !vcpu->spin_loop.in_spin_loop ||
-			(vcpu->spin_loop.in_spin_loop &&
-			 vcpu->spin_loop.dy_eligible);
+		    vcpu->spin_loop.dy_eligible;
 
 	if (vcpu->spin_loop.in_spin_loop)
 		kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
-- 
cgit v1.2.3


From a13f533b2f1d53a7c0baa7490498caeab7bc8ba5 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 4 Sep 2014 21:13:32 +0200
Subject: KVM: remove redundant assigment of return value in kvm_dev_ioctl

The first statement of kvm_dev_ioctl is
        long r = -EINVAL;

No need to reassign the same value.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cc7bd286d135..de1ae82ba192 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2627,7 +2627,6 @@ static long kvm_dev_ioctl(struct file *filp,
 
 	switch (ioctl) {
 	case KVM_GET_API_VERSION:
-		r = -EINVAL;
 		if (arg)
 			goto out;
 		r = KVM_API_VERSION;
@@ -2639,7 +2638,6 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
-		r = -EINVAL;
 		if (arg)
 			goto out;
 		r = PAGE_SIZE;     /* struct kvm_run */
-- 
cgit v1.2.3


From f2a25160887e00434ce1361007009120e1fecbda Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 4 Sep 2014 21:13:33 +0200
Subject: KVM: remove redundant assignments in __kvm_set_memory_region

__kvm_set_memory_region sets r to EINVAL very early.
Doing it again is not necessary. The same is true later on, where
r is assigned -ENOMEM twice.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de1ae82ba192..c338599804e0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -793,7 +793,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
-	r = -EINVAL;
 	if (npages > KVM_MEM_MAX_NR_PAGES)
 		goto out;
 
@@ -807,7 +806,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	new.npages = npages;
 	new.flags = mem->flags;
 
-	r = -EINVAL;
 	if (npages) {
 		if (!old.npages)
 			change = KVM_MR_CREATE;
@@ -863,7 +861,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-		r = -ENOMEM;
 		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
 				GFP_KERNEL);
 		if (!slots)
-- 
cgit v1.2.3


From 0ba09511ddc3ff0b462f37b4fe4b9c4dccc054ec Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Mon, 1 Sep 2014 09:36:08 +0100
Subject: KVM: EVENTFD: remove inclusion of irq.h

No more needed. irq.h would be void on ARM.

Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/eventfd.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 3c5981c87c3f..0c712a779b44 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,7 +36,6 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#include "irq.h"
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
-- 
cgit v1.2.3


From 184564efae4d775225c8fe3b762a56956fb1f827 Mon Sep 17 00:00:00 2001
From: Zhang Haoyu <zhanghy@sangfor.com>
Date: Thu, 11 Sep 2014 16:47:04 +0800
Subject: kvm: ioapic: conditionally delay irq delivery duringeoi broadcast

Currently, we call ioapic_service() immediately when we find the irq is still
active during eoi broadcast. But for real hardware, there's some delay between
the EOI writing and irq delivery.  If we do not emulate this behavior, and
re-inject the interrupt immediately after the guest sends an EOI and re-enables
interrupts, a guest might spend all its time in the ISR if it has a broken
handler for a level-triggered interrupt.

Such livelock actually happens with Windows guests when resuming from
hibernation.

As there's no way to recognize the broken handle from new raised ones, this patch
delays an interrupt if 10.000 consecutive EOIs found that the interrupt was
still high.  The guest can then make a little forward progress, until a proper
IRQ handler is set or until some detection routine in the guest (such as
Linux's note_interrupt()) recognizes the situation.

Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Zhang Haoyu <zhanghy@sangfor.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/trace/events/kvm.h | 20 ++++++++++++++++++++
 virt/kvm/ioapic.c          | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 virt/kvm/ioapic.h          |  2 ++
 3 files changed, 66 insertions(+), 2 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 908925ace776..ab679c395042 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -95,6 +95,26 @@ TRACE_EVENT(kvm_ioapic_set_irq,
 		  __entry->coalesced ? " (coalesced)" : "")
 );
 
+TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
+	    TP_PROTO(__u64 e),
+	    TP_ARGS(e),
+
+	TP_STRUCT__entry(
+		__field(	__u64,		e		)
+	),
+
+	TP_fast_assign(
+		__entry->e		= e;
+	),
+
+	TP_printk("dst %x vec=%u (%s|%s|%s%s)",
+		  (u8)(__entry->e >> 56), (u8)__entry->e,
+		  __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
+		  (__entry->e & (1<<11)) ? "logical" : "physical",
+		  (__entry->e & (1<<15)) ? "level" : "edge",
+		  (__entry->e & (1<<16)) ? "|masked" : "")
+);
+
 TRACE_EVENT(kvm_msi_set_irq,
 	    TP_PROTO(__u64 address, __u64 data),
 	    TP_ARGS(address, data),
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index e8ce34c9db32..0ba4057d271b 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -405,6 +405,26 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
 	spin_unlock(&ioapic->lock);
 }
 
+static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
+{
+	int i;
+	struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
+						 eoi_inject.work);
+	spin_lock(&ioapic->lock);
+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+		if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
+			continue;
+
+		if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
+			ioapic_service(ioapic, i, false);
+	}
+	spin_unlock(&ioapic->lock);
+}
+
+#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
+
 static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
 			struct kvm_ioapic *ioapic, int vector, int trigger_mode)
 {
@@ -435,8 +455,26 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
 
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 		ent->fields.remote_irr = 0;
-		if (ioapic->irr & (1 << i))
-			ioapic_service(ioapic, i, false);
+		if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
+			++ioapic->irq_eoi[i];
+			if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
+				/*
+				 * Real hardware does not deliver the interrupt
+				 * immediately during eoi broadcast, and this
+				 * lets a buggy guest make slow progress
+				 * even if it does not correctly handle a
+				 * level-triggered interrupt.  Emulate this
+				 * behavior if we detect an interrupt storm.
+				 */
+				schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
+				ioapic->irq_eoi[i] = 0;
+				trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
+			} else {
+				ioapic_service(ioapic, i, false);
+			}
+		} else {
+			ioapic->irq_eoi[i] = 0;
+		}
 	}
 }
 
@@ -565,12 +603,14 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 {
 	int i;
 
+	cancel_delayed_work_sync(&ioapic->eoi_inject);
 	for (i = 0; i < IOAPIC_NUM_PINS; i++)
 		ioapic->redirtbl[i].fields.mask = 1;
 	ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
 	ioapic->ioregsel = 0;
 	ioapic->irr = 0;
 	ioapic->id = 0;
+	memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
 	rtc_irq_eoi_tracking_reset(ioapic);
 	update_handled_vectors(ioapic);
 }
@@ -589,6 +629,7 @@ int kvm_ioapic_init(struct kvm *kvm)
 	if (!ioapic)
 		return -ENOMEM;
 	spin_lock_init(&ioapic->lock);
+	INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
 	kvm->arch.vioapic = ioapic;
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
@@ -609,6 +650,7 @@ void kvm_ioapic_destroy(struct kvm *kvm)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 
+	cancel_delayed_work_sync(&ioapic->eoi_inject);
 	if (ioapic) {
 		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
 		kvm->arch.vioapic = NULL;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 90d43e95dcf8..e23b70634f1e 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -59,6 +59,8 @@ struct kvm_ioapic {
 	spinlock_t lock;
 	DECLARE_BITMAP(handled_vectors, 256);
 	struct rtc_status rtc_status;
+	struct delayed_work eoi_inject;
+	u32 irq_eoi[IOAPIC_NUM_PINS];
 };
 
 #ifdef DEBUG
-- 
cgit v1.2.3


From d60eacb07053142bfb9b41582074a89a790a9d46 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:33 +0100
Subject: KVM: device: add simple registration mechanism for kvm_device_ops

kvm_ioctl_create_device currently has knowledge of all the device types
and their associated ops. This is fairly inflexible when adding support
for new in-kernel device emulations, so move what we currently have out
into a table, which can support dynamic registration of ops by new
drivers for virtual hardware.

Cc: Alex Williamson <Alex.Williamson@redhat.com>
Cc: Alex Graf <agraf@suse.de>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 include/uapi/linux/kvm.h | 22 +++++++++++-----
 virt/kvm/kvm_main.c      | 65 ++++++++++++++++++++++++++++--------------------
 3 files changed, 55 insertions(+), 33 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e098dce179df..b6e954742951 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1064,6 +1064,7 @@ struct kvm_device_ops {
 void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0695a1e3e332..60768822b140 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -943,15 +943,25 @@ struct kvm_device_attr {
 	__u64	addr;		/* userspace address of attr data */
 };
 
-#define KVM_DEV_TYPE_FSL_MPIC_20	1
-#define KVM_DEV_TYPE_FSL_MPIC_42	2
-#define KVM_DEV_TYPE_XICS		3
-#define KVM_DEV_TYPE_VFIO		4
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
-#define KVM_DEV_TYPE_ARM_VGIC_V2	5
-#define KVM_DEV_TYPE_FLIC		6
+
+enum kvm_device_type {
+	KVM_DEV_TYPE_FSL_MPIC_20	= 1,
+#define KVM_DEV_TYPE_FSL_MPIC_20	KVM_DEV_TYPE_FSL_MPIC_20
+	KVM_DEV_TYPE_FSL_MPIC_42,
+#define KVM_DEV_TYPE_FSL_MPIC_42	KVM_DEV_TYPE_FSL_MPIC_42
+	KVM_DEV_TYPE_XICS,
+#define KVM_DEV_TYPE_XICS		KVM_DEV_TYPE_XICS
+	KVM_DEV_TYPE_VFIO,
+#define KVM_DEV_TYPE_VFIO		KVM_DEV_TYPE_VFIO
+	KVM_DEV_TYPE_ARM_VGIC_V2,
+#define KVM_DEV_TYPE_ARM_VGIC_V2	KVM_DEV_TYPE_ARM_VGIC_V2
+	KVM_DEV_TYPE_FLIC,
+#define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
+	KVM_DEV_TYPE_MAX,
+};
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c338599804e0..686d783387a0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2272,44 +2272,55 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
 	return filp->private_data;
 }
 
-static int kvm_ioctl_create_device(struct kvm *kvm,
-				   struct kvm_create_device *cd)
-{
-	struct kvm_device_ops *ops = NULL;
-	struct kvm_device *dev;
-	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
-	int ret;
-
-	switch (cd->type) {
+static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_MPIC
-	case KVM_DEV_TYPE_FSL_MPIC_20:
-	case KVM_DEV_TYPE_FSL_MPIC_42:
-		ops = &kvm_mpic_ops;
-		break;
+	[KVM_DEV_TYPE_FSL_MPIC_20]	= &kvm_mpic_ops,
+	[KVM_DEV_TYPE_FSL_MPIC_42]	= &kvm_mpic_ops,
 #endif
+
 #ifdef CONFIG_KVM_XICS
-	case KVM_DEV_TYPE_XICS:
-		ops = &kvm_xics_ops;
-		break;
+	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
 #endif
+
 #ifdef CONFIG_KVM_VFIO
-	case KVM_DEV_TYPE_VFIO:
-		ops = &kvm_vfio_ops;
-		break;
+	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
+
 #ifdef CONFIG_KVM_ARM_VGIC
-	case KVM_DEV_TYPE_ARM_VGIC_V2:
-		ops = &kvm_arm_vgic_v2_ops;
-		break;
+	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
 #endif
+
 #ifdef CONFIG_S390
-	case KVM_DEV_TYPE_FLIC:
-		ops = &kvm_flic_ops;
-		break;
+	[KVM_DEV_TYPE_FLIC]		= &kvm_flic_ops,
 #endif
-	default:
+};
+
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+{
+	if (type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENOSPC;
+
+	if (kvm_device_ops_table[type] != NULL)
+		return -EEXIST;
+
+	kvm_device_ops_table[type] = ops;
+	return 0;
+}
+
+static int kvm_ioctl_create_device(struct kvm *kvm,
+				   struct kvm_create_device *cd)
+{
+	struct kvm_device_ops *ops = NULL;
+	struct kvm_device *dev;
+	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+	int ret;
+
+	if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENODEV;
+
+	ops = kvm_device_ops_table[cd->type];
+	if (ops == NULL)
 		return -ENODEV;
-	}
 
 	if (test)
 		return 0;
-- 
cgit v1.2.3


From c06a841bf36340e9e917ce60d11a6425ac85d0bd Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:34 +0100
Subject: KVM: ARM: vgic: register kvm_device_ops dynamically

Now that we have a dynamic means to register kvm_device_ops, use that
for the ARM VGIC, instead of relying on the static table.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |   1 -
 virt/kvm/arm/vgic.c      | 157 ++++++++++++++++++++++++-----------------------
 virt/kvm/kvm_main.c      |   4 --
 3 files changed, 79 insertions(+), 83 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b6e954742951..601d321f96e7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1069,7 +1069,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
-extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_flic_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 73eba793b17f..3ee3ce06bbec 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1522,83 +1522,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void vgic_init_maintenance_interrupt(void *info)
-{
-	enable_percpu_irq(vgic->maint_irq, 0);
-}
-
-static int vgic_cpu_notify(struct notifier_block *self,
-			   unsigned long action, void *cpu)
-{
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		vgic_init_maintenance_interrupt(NULL);
-		break;
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic->maint_irq);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block vgic_cpu_nb = {
-	.notifier_call = vgic_cpu_notify,
-};
-
-static const struct of_device_id vgic_ids[] = {
-	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
-	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
-	{},
-};
-
-int kvm_vgic_hyp_init(void)
-{
-	const struct of_device_id *matched_id;
-	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
-			  const struct vgic_params **);
-	struct device_node *vgic_node;
-	int ret;
-
-	vgic_node = of_find_matching_node_and_match(NULL,
-						    vgic_ids, &matched_id);
-	if (!vgic_node) {
-		kvm_err("error: no compatible GIC node found\n");
-		return -ENODEV;
-	}
-
-	vgic_probe = matched_id->data;
-	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
-	if (ret)
-		return ret;
-
-	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
-				 "vgic", kvm_get_running_vcpus());
-	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
-		return ret;
-	}
-
-	ret = __register_cpu_notifier(&vgic_cpu_nb);
-	if (ret) {
-		kvm_err("Cannot register vgic CPU notifier\n");
-		goto out_free_irq;
-	}
-
-	/* Callback into for arch code for setup */
-	vgic_arch_setup(vgic);
-
-	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
-	return 0;
-
-out_free_irq:
-	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
-	return ret;
-}
-
 /**
  * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
@@ -2062,7 +1985,7 @@ static int vgic_create(struct kvm_device *dev, u32 type)
 	return kvm_vgic_create(dev->kvm);
 }
 
-struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
 	.name = "kvm-arm-vgic",
 	.create = vgic_create,
 	.destroy = vgic_destroy,
@@ -2070,3 +1993,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
 	.get_attr = vgic_get_attr,
 	.has_attr = vgic_has_attr,
 };
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+	enable_percpu_irq(vgic->maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+			   unsigned long action, void *cpu)
+{
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		vgic_init_maintenance_interrupt(NULL);
+		break;
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		disable_percpu_irq(vgic->maint_irq);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+	.notifier_call = vgic_cpu_notify,
+};
+
+static const struct of_device_id vgic_ids[] = {
+	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+	{},
+};
+
+int kvm_vgic_hyp_init(void)
+{
+	const struct of_device_id *matched_id;
+	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+			  const struct vgic_params **);
+	struct device_node *vgic_node;
+	int ret;
+
+	vgic_node = of_find_matching_node_and_match(NULL,
+						    vgic_ids, &matched_id);
+	if (!vgic_node) {
+		kvm_err("error: no compatible GIC node found\n");
+		return -ENODEV;
+	}
+
+	vgic_probe = matched_id->data;
+	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+	if (ret)
+		return ret;
+
+	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
+				 "vgic", kvm_get_running_vcpus());
+	if (ret) {
+		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+		return ret;
+	}
+
+	ret = __register_cpu_notifier(&vgic_cpu_nb);
+	if (ret) {
+		kvm_err("Cannot register vgic CPU notifier\n");
+		goto out_free_irq;
+	}
+
+	/* Callback into for arch code for setup */
+	vgic_arch_setup(vgic);
+
+	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+	return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
+				       KVM_DEV_TYPE_ARM_VGIC_V2);
+
+out_free_irq:
+	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
+	return ret;
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 686d783387a0..68d96f5dbfe2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2286,10 +2286,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
 
-#ifdef CONFIG_KVM_ARM_VGIC
-	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
-#endif
-
 #ifdef CONFIG_S390
 	[KVM_DEV_TYPE_FLIC]		= &kvm_flic_ops,
 #endif
-- 
cgit v1.2.3


From 84877d93336de21a6251db00b841468a83c65906 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Tue, 2 Sep 2014 10:27:35 +0100
Subject: KVM: s390: register flic ops dynamically

Using the new kvm_register_device_ops() interface makes us get rid of
an #ifdef in common code.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/s390/kvm/kvm-s390.c | 3 ++-
 arch/s390/kvm/kvm-s390.h | 1 +
 include/linux/kvm_host.h | 1 -
 virt/kvm/kvm_main.c      | 4 ----
 4 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'virt/kvm')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b95d4a481b0c..56a411c0245a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -122,7 +122,8 @@ void kvm_arch_hardware_unsetup(void)
 
 int kvm_arch_init(void *opaque)
 {
-	return 0;
+	/* Register floating interrupt controller interface. */
+	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 }
 
 /* Section: device related */
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index b1a77669137b..244d02303182 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -227,6 +227,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
 void kvm_s390_destroy_adapters(struct kvm *kvm);
 int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
+extern struct kvm_device_ops kvm_flic_ops;
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 601d321f96e7..7ef088bec715 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1069,7 +1069,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
-extern struct kvm_device_ops kvm_flic_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 68d96f5dbfe2..f4e792fedb4f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2285,10 +2285,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_VFIO
 	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
-
-#ifdef CONFIG_S390
-	[KVM_DEV_TYPE_FLIC]		= &kvm_flic_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
-- 
cgit v1.2.3


From 80ce1639727e9d38729c34f162378508c307ca25 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:36 +0100
Subject: KVM: VFIO: register kvm_device_ops dynamically

Now that we have a dynamic means to register kvm_device_ops, use that
for the VFIO kvm device, instead of relying on the static table.

This is achieved by a module_init call to register the ops with KVM.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Alex Williamson <Alex.Williamson@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 -
 virt/kvm/kvm_main.c      |  4 ----
 virt/kvm/vfio.c          | 22 +++++++++++++++-------
 3 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7ef088bec715..d44a2d640551 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1068,7 +1068,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
-extern struct kvm_device_ops kvm_vfio_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f4e792fedb4f..db57363cc287 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2281,10 +2281,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_XICS
 	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
 #endif
-
-#ifdef CONFIG_KVM_VFIO
-	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ba1a93f935c7..bb11b36ee8a2 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -246,6 +246,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 	kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
 }
 
+static int kvm_vfio_create(struct kvm_device *dev, u32 type);
+
+static struct kvm_device_ops kvm_vfio_ops = {
+	.name = "kvm-vfio",
+	.create = kvm_vfio_create,
+	.destroy = kvm_vfio_destroy,
+	.set_attr = kvm_vfio_set_attr,
+	.has_attr = kvm_vfio_has_attr,
+};
+
 static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 {
 	struct kvm_device *tmp;
@@ -268,10 +278,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 	return 0;
 }
 
-struct kvm_device_ops kvm_vfio_ops = {
-	.name = "kvm-vfio",
-	.create = kvm_vfio_create,
-	.destroy = kvm_vfio_destroy,
-	.set_attr = kvm_vfio_set_attr,
-	.has_attr = kvm_vfio_has_attr,
-};
+static int __init kvm_vfio_ops_init(void)
+{
+	return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
+}
+module_init(kvm_vfio_ops_init);
-- 
cgit v1.2.3


From 227844f53864077ccaefe01d0960fcccc03445ce Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 9 Jun 2014 12:27:18 +0200
Subject: arm/arm64: KVM: Rename irq_state to irq_pending

The irq_state field on the distributor struct is ambiguous in its
meaning; the comment says it's the level of the input put, but that
doesn't make much sense for edge-triggered interrupts.  The code
actually uses this state variable to check if the interrupt is in the
pending state on the distributor so clarify the comment and rename the
actual variable and accessor methods.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  4 ++--
 virt/kvm/arm/vgic.c    | 52 +++++++++++++++++++++++++-------------------------
 2 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 35b0c121bb65..388d442eecb5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -140,8 +140,8 @@ struct vgic_dist {
 	/* Interrupt enabled (one bit per IRQ) */
 	struct vgic_bitmap	irq_enabled;
 
-	/* Interrupt 'pin' level */
-	struct vgic_bitmap	irq_state;
+	/* Interrupt state is pending on the distributor */
+	struct vgic_bitmap	irq_pending;
 
 	/* Level-triggered interrupt in progress */
 	struct vgic_bitmap	irq_active;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index eeb23b37f87c..7e86a36f3fc5 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -37,7 +37,7 @@
  *
  * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
  *   something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_state vgic
+ * - VGIC pending interrupts are stored on the vgic.irq_pending vgic
  *   bitmap (this bitmap is updated by both user land ioctls and guest
  *   mmio ops, and other in-kernel peripherals such as the
  *   arch. timers) and indicate the 'wire' state.
@@ -45,8 +45,8 @@
  *   recalculated
  * - To calculate the oracle, we need info for each cpu from
  *   compute_pending_for_cpu, which considers:
- *   - PPI: dist->irq_state & dist->irq_enable
- *   - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
+ *   - PPI: dist->irq_pending & dist->irq_enable
+ *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
  *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
  *     registers, stored on each vcpu. We only keep one bit of
  *     information per interrupt, making sure that only one vcpu can
@@ -221,21 +221,21 @@ static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
+	return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
 }
 
-static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
+	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
 }
 
-static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
+	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
 }
 
 static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
@@ -409,7 +409,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 					struct kvm_exit_mmio *mmio,
 					phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
+	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
 				       vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
@@ -425,7 +425,7 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  struct kvm_exit_mmio *mmio,
 					  phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
+	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
 				       vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
@@ -651,7 +651,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * is fine, then we are only setting a few bits that were
 		 * already set.
 		 */
-		vgic_dist_irq_set(vcpu, lr.irq);
+		vgic_dist_irq_set_pending(vcpu, lr.irq);
 		if (lr.irq < VGIC_NR_SGIS)
 			dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
 		lr.state &= ~LR_STATE_PENDING;
@@ -932,7 +932,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 	kvm_for_each_vcpu(c, vcpu, kvm) {
 		if (target_cpus & 1) {
 			/* Flag the SGI as pending */
-			vgic_dist_irq_set(vcpu, sgi);
+			vgic_dist_irq_set_pending(vcpu, sgi);
 			dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
 			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
 		}
@@ -952,11 +952,11 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 	pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
 	pend_shared = vcpu->arch.vgic_cpu.pending_shared;
 
-	pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
+	pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
 	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
 	bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
 
-	pending = vgic_bitmap_get_shared_map(&dist->irq_state);
+	pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
 	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
 	bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
 	bitmap_and(pend_shared, pend_shared,
@@ -1160,7 +1160,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 	 * our emulated gic and can get rid of them.
 	 */
 	if (!sources) {
-		vgic_dist_irq_clear(vcpu, irq);
+		vgic_dist_irq_clear_pending(vcpu, irq);
 		vgic_cpu_irq_clear(vcpu, irq);
 		return true;
 	}
@@ -1175,7 +1175,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
 
 	if (vgic_queue_irq(vcpu, 0, irq)) {
 		if (vgic_irq_is_edge(vcpu, irq)) {
-			vgic_dist_irq_clear(vcpu, irq);
+			vgic_dist_irq_clear_pending(vcpu, irq);
 			vgic_cpu_irq_clear(vcpu, irq);
 		} else {
 			vgic_irq_set_active(vcpu, irq);
@@ -1376,7 +1376,7 @@ static void vgic_kick_vcpus(struct kvm *kvm)
 
 static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 {
-	int is_edge = vgic_irq_is_edge(vcpu, irq);
+	int edge_triggered = vgic_irq_is_edge(vcpu, irq);
 	int state = vgic_dist_irq_is_pending(vcpu, irq);
 
 	/*
@@ -1384,26 +1384,26 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 	 * - edge triggered and we have a rising edge
 	 * - level triggered and we change level
 	 */
-	if (is_edge)
+	if (edge_triggered)
 		return level > state;
 	else
 		return level != state;
 }
 
-static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
+static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 				  unsigned int irq_num, bool level)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct kvm_vcpu *vcpu;
-	int is_edge, is_level;
+	int edge_triggered, level_triggered;
 	int enabled;
 	bool ret = true;
 
 	spin_lock(&dist->lock);
 
 	vcpu = kvm_get_vcpu(kvm, cpuid);
-	is_edge = vgic_irq_is_edge(vcpu, irq_num);
-	is_level = !is_edge;
+	edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
+	level_triggered = !edge_triggered;
 
 	if (!vgic_validate_injection(vcpu, irq_num, level)) {
 		ret = false;
@@ -1418,9 +1418,9 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
 	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
 
 	if (level)
-		vgic_dist_irq_set(vcpu, irq_num);
+		vgic_dist_irq_set_pending(vcpu, irq_num);
 	else
-		vgic_dist_irq_clear(vcpu, irq_num);
+		vgic_dist_irq_clear_pending(vcpu, irq_num);
 
 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
 
@@ -1429,7 +1429,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
 		goto out;
 	}
 
-	if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
+	if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) {
 		/*
 		 * Level interrupt in progress, will be picked up
 		 * when EOId.
@@ -1466,7 +1466,7 @@ out:
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level)
 {
-	if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
+	if (vgic_update_irq_pending(kvm, cpuid, irq_num, level))
 		vgic_kick_vcpus(kvm);
 
 	return 0;
-- 
cgit v1.2.3


From dbf20f9d8105cca531614c8bff9a74351e8e67e7 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 9 Jun 2014 12:55:13 +0200
Subject: arm/arm64: KVM: Rename irq_active to irq_queued

We have a special bitmap on the distributor struct to keep track of when
level-triggered interrupts are queued on the list registers.  This was
named irq_active, which is confusing, because the active state of an
interrupt as per the GIC spec is a different thing, not specifically
related to edge-triggered/level-triggered configurations but rather
indicates an interrupt which has been ack'ed but not yet eoi'ed.

Rename the bitmap and the corresponding accessor functions to irq_queued
to clarify what this is actually used for.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  4 ++--
 virt/kvm/arm/vgic.c    | 33 +++++++++++++++++++--------------
 2 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 388d442eecb5..7d8e61fa9928 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -143,8 +143,8 @@ struct vgic_dist {
 	/* Interrupt state is pending on the distributor */
 	struct vgic_bitmap	irq_pending;
 
-	/* Level-triggered interrupt in progress */
-	struct vgic_bitmap	irq_active;
+	/* Level-triggered interrupt queued on VCPU interface */
+	struct vgic_bitmap	irq_queued;
 
 	/* Interrupt priority. Not used yet. */
 	struct vgic_bytemap	irq_priority;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 7e86a36f3fc5..ce1a2d17ee81 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -60,12 +60,12 @@
  * the 'line' again. This is achieved as such:
  *
  * - When a level interrupt is moved onto a vcpu, the corresponding
- *   bit in irq_active is set. As long as this bit is set, the line
+ *   bit in irq_queued is set. As long as this bit is set, the line
  *   will be ignored for further interrupts. The interrupt is injected
  *   into the vcpu with the GICH_LR_EOI bit set (generate a
  *   maintenance interrupt on EOI).
  * - When the interrupt is EOIed, the maintenance interrupt fires,
- *   and clears the corresponding bit in irq_active. This allow the
+ *   and clears the corresponding bit in irq_queued. This allows the
  *   interrupt line to be sampled again.
  */
 
@@ -196,25 +196,25 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
 	return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
 }
 
-static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+	return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
 }
 
-static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
 }
 
-static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
 }
 
 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
@@ -256,6 +256,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
 			  vcpu->arch.vgic_cpu.pending_shared);
 }
 
+static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
+{
+	return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
+}
+
 static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
 {
 	return le32_to_cpu(*((u32 *)mmio->data)) & mask;
@@ -1079,8 +1084,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 
 		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
 			vgic_retire_lr(lr, vlr.irq, vcpu);
-			if (vgic_irq_is_active(vcpu, vlr.irq))
-				vgic_irq_clear_active(vcpu, vlr.irq);
+			if (vgic_irq_is_queued(vcpu, vlr.irq))
+				vgic_irq_clear_queued(vcpu, vlr.irq);
 		}
 	}
 }
@@ -1170,7 +1175,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 
 static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
 {
-	if (vgic_irq_is_active(vcpu, irq))
+	if (!vgic_can_sample_irq(vcpu, irq))
 		return true; /* level interrupt, already queued */
 
 	if (vgic_queue_irq(vcpu, 0, irq)) {
@@ -1178,7 +1183,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
 			vgic_dist_irq_clear_pending(vcpu, irq);
 			vgic_cpu_irq_clear(vcpu, irq);
 		} else {
-			vgic_irq_set_active(vcpu, irq);
+			vgic_irq_set_queued(vcpu, irq);
 		}
 
 		return true;
@@ -1262,7 +1267,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
-			vgic_irq_clear_active(vcpu, vlr.irq);
+			vgic_irq_clear_queued(vcpu, vlr.irq);
 			WARN_ON(vlr.state & LR_STATE_MASK);
 			vlr.state = 0;
 			vgic_set_lr(vcpu, lr, vlr);
@@ -1429,7 +1434,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 		goto out;
 	}
 
-	if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) {
+	if (!vgic_can_sample_irq(vcpu, irq_num)) {
 		/*
 		 * Level interrupt in progress, will be picked up
 		 * when EOId.
-- 
cgit v1.2.3


From cced50c9280ef7ca1af48080707a170efa1adfa0 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 22:37:33 +0200
Subject: arm/arm64: KVM: vgic: Clear queued flags on unqueue

If we unqueue a level-triggered interrupt completely, and the LR does
not stick around in the active state (and will therefore no longer
generate a maintenance interrupt), then we should clear the queued flag
so that the vgic can actually queue this level-triggered interrupt at a
later time and deal with its pending state then.

Note: This should actually be properly fixed to handle the active state
on the distributor.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index ce1a2d17ee81..2026b6147805 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -667,8 +667,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * active), then the LR does not hold any useful info and can
 		 * be marked as free for other use.
 		 */
-		if (!(lr.state & LR_STATE_MASK))
+		if (!(lr.state & LR_STATE_MASK)) {
 			vgic_retire_lr(i, lr.irq, vcpu);
+			vgic_irq_clear_queued(vcpu, lr.irq);
+		}
 
 		/* Finally update the VGIC state. */
 		vgic_update_state(vcpu->kvm);
-- 
cgit v1.2.3


From faa1b46c3e9f4d40359aee04ff275eea5f4cae3a Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 21:54:51 +0200
Subject: arm/arm64: KVM: vgic: Improve handling of GICD_I{CS}PENDRn

Writes to GICD_ISPENDRn and GICD_ICPENDRn are currently not handled
correctly for level-triggered interrupts.  The spec states that for
level-triggered interrupts, writes to the GICD_ISPENDRn activate the
output of a flip-flop which is in turn or'ed with the actual input
interrupt signal.  Correspondingly, writes to GICD_ICPENDRn simply
deactivates the output of that flip-flop, but does not (of course) affect
the external input signal.  Reads from GICC_IAR will also deactivate the
flip-flop output.

This requires us to track the state of the level-input separately from
the state in the flip-flop.  We therefore introduce two new variables on
the distributor struct to track these two states.  Astute readers may
notice that this is introducing more state than required (because an OR
of the two states gives you the pending state), but the remaining vgic
code uses the pending bitmap for optimized operations to figure out, at
the end of the day, if an interrupt is pending or not on the distributor
side.  Refactoring the code to consider the two state variables all the
places where we currently access the precomputed pending value, did not
look pretty.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  16 ++++++-
 virt/kvm/arm/vgic.c    | 119 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 123 insertions(+), 12 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7d8e61fa9928..f074539c6ac5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -140,9 +140,23 @@ struct vgic_dist {
 	/* Interrupt enabled (one bit per IRQ) */
 	struct vgic_bitmap	irq_enabled;
 
-	/* Interrupt state is pending on the distributor */
+	/* Level-triggered interrupt external input is asserted */
+	struct vgic_bitmap	irq_level;
+
+	/*
+	 * Interrupt state is pending on the distributor
+	 */
 	struct vgic_bitmap	irq_pending;
 
+	/*
+	 * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered
+	 * interrupts.  Essentially holds the state of the flip-flop in
+	 * Figure 4-10 on page 4-101 in ARM IHI 0048B.b.
+	 * Once set, it is only cleared for level-triggered interrupts on
+	 * guest ACKs (when we queue it) or writes to GICD_ICPENDRn.
+	 */
+	struct vgic_bitmap	irq_soft_pend;
+
 	/* Level-triggered interrupt queued on VCPU interface */
 	struct vgic_bitmap	irq_queued;
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 2026b6147805..435d8e7ad137 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -67,6 +67,11 @@
  * - When the interrupt is EOIed, the maintenance interrupt fires,
  *   and clears the corresponding bit in irq_queued. This allows the
  *   interrupt line to be sampled again.
+ * - Note that level-triggered interrupts can also be set to pending from
+ *   writes to GICD_ISPENDRn and lowering the external input line does not
+ *   cause the interrupt to become inactive in such a situation.
+ *   Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
+ *   inactive as long as the external input line is held high.
  */
 
 #define VGIC_ADDR_UNDEF		(-1)
@@ -217,6 +222,41 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
 	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
 }
 
+static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
+}
+
 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -414,11 +454,26 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 					struct kvm_exit_mmio *mmio,
 					phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
-				       vcpu->vcpu_id, offset);
+	u32 *reg;
+	u32 level_mask;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
+	level_mask = (~(*reg));
+
+	/* Mark both level and edge triggered irqs as pending */
+	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
 	if (mmio->is_write) {
+		/* Set the soft-pending flag only for level-triggered irqs */
+		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+					  vcpu->vcpu_id, offset);
+		vgic_reg_access(mmio, reg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+		*reg &= level_mask;
+
 		vgic_update_state(vcpu->kvm);
 		return true;
 	}
@@ -430,11 +485,27 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  struct kvm_exit_mmio *mmio,
 					  phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
-				       vcpu->vcpu_id, offset);
+	u32 *level_active;
+	u32 *reg;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
 	if (mmio->is_write) {
+		/* Re-set level triggered level-active interrupts */
+		level_active = vgic_bitmap_get_reg(&dist->irq_level,
+					  vcpu->vcpu_id, offset);
+		reg = vgic_bitmap_get_reg(&dist->irq_pending,
+					  vcpu->vcpu_id, offset);
+		*reg |= *level_active;
+
+		/* Clear soft-pending flags */
+		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+					  vcpu->vcpu_id, offset);
+		vgic_reg_access(mmio, reg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
 		vgic_update_state(vcpu->kvm);
 		return true;
 	}
@@ -1268,17 +1339,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 
 		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+			WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
 
 			vgic_irq_clear_queued(vcpu, vlr.irq);
 			WARN_ON(vlr.state & LR_STATE_MASK);
 			vlr.state = 0;
 			vgic_set_lr(vcpu, lr, vlr);
 
+			/*
+			 * If the IRQ was EOIed it was also ACKed and we we
+			 * therefore assume we can clear the soft pending
+			 * state (should it had been set) for this interrupt.
+			 *
+			 * Note: if the IRQ soft pending state was set after
+			 * the IRQ was acked, it actually shouldn't be
+			 * cleared, but we have no way of knowing that unless
+			 * we start trapping ACKs when the soft-pending state
+			 * is set.
+			 */
+			vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+
 			/* Any additional pending interrupt? */
-			if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) {
+			if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
 				vgic_cpu_irq_set(vcpu, vlr.irq);
 				level_pending = true;
 			} else {
+				vgic_dist_irq_clear_pending(vcpu, vlr.irq);
 				vgic_cpu_irq_clear(vcpu, vlr.irq);
 			}
 
@@ -1384,17 +1470,19 @@ static void vgic_kick_vcpus(struct kvm *kvm)
 static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 {
 	int edge_triggered = vgic_irq_is_edge(vcpu, irq);
-	int state = vgic_dist_irq_is_pending(vcpu, irq);
 
 	/*
 	 * Only inject an interrupt if:
 	 * - edge triggered and we have a rising edge
 	 * - level triggered and we change level
 	 */
-	if (edge_triggered)
+	if (edge_triggered) {
+		int state = vgic_dist_irq_is_pending(vcpu, irq);
 		return level > state;
-	else
+	} else {
+		int state = vgic_dist_irq_get_level(vcpu, irq);
 		return level != state;
+	}
 }
 
 static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
@@ -1424,10 +1512,19 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
 	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
 
-	if (level)
+	if (level) {
+		if (level_triggered)
+			vgic_dist_irq_set_level(vcpu, irq_num);
 		vgic_dist_irq_set_pending(vcpu, irq_num);
-	else
-		vgic_dist_irq_clear_pending(vcpu, irq_num);
+	} else {
+		if (level_triggered) {
+			vgic_dist_irq_clear_level(vcpu, irq_num);
+			if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+				vgic_dist_irq_clear_pending(vcpu, irq_num);
+		} else {
+			vgic_dist_irq_clear_pending(vcpu, irq_num);
+		}
+	}
 
 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
 
-- 
cgit v1.2.3


From 9da48b5502622f9f0e49df957521ec43a0c9f4c1 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 22:30:45 +0200
Subject: arm/arm64: KVM: vgic: Fix SGI writes to GICD_I{CS}PENDR0

Writes to GICD_ISPENDR0 and GICD_ICPENDR0 ignore all settings of the
pending state for SGIs.  Make sure the implementation handles this
correctly.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 435d8e7ad137..0039ae266a7b 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -454,7 +454,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 					struct kvm_exit_mmio *mmio,
 					phys_addr_t offset)
 {
-	u32 *reg;
+	u32 *reg, orig;
 	u32 level_mask;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
@@ -463,6 +463,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 
 	/* Mark both level and edge triggered irqs as pending */
 	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+	orig = *reg;
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
 
@@ -474,6 +475,12 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 				ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
 		*reg &= level_mask;
 
+		/* Ignore writes to SGIs */
+		if (offset < 2) {
+			*reg &= ~0xffff;
+			*reg |= orig & 0xffff;
+		}
+
 		vgic_update_state(vcpu->kvm);
 		return true;
 	}
@@ -486,10 +493,11 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  phys_addr_t offset)
 {
 	u32 *level_active;
-	u32 *reg;
+	u32 *reg, orig;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
 	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+	orig = *reg;
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
 	if (mmio->is_write) {
@@ -500,6 +508,12 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  vcpu->vcpu_id, offset);
 		*reg |= *level_active;
 
+		/* Ignore writes to SGIs */
+		if (offset < 2) {
+			*reg &= ~0xffff;
+			*reg |= orig & 0xffff;
+		}
+
 		/* Clear soft-pending flags */
 		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
 					  vcpu->vcpu_id, offset);
-- 
cgit v1.2.3


From 7e362919a59e6fc60e08ad1cf0b047291d1ca2e9 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 22:34:04 +0200
Subject: arm/arm64: KVM: vgic: Clarify and correct vgic documentation

The VGIC virtual distributor implementation documentation was written a
very long time ago, before the true nature of the beast had been
partially absorbed into my bloodstream.  Clarify the docs.

Plus, it fixes an actual bug.  ICFRn, pfff.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 0039ae266a7b..37fd20d35759 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -36,21 +36,22 @@
  * How the whole thing works (courtesy of Christoffer Dall):
  *
  * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
- *   something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_pending vgic
- *   bitmap (this bitmap is updated by both user land ioctls and guest
- *   mmio ops, and other in-kernel peripherals such as the
- *   arch. timers) and indicate the 'wire' state.
+ *   something is pending on the CPU interface.
+ * - Interrupts that are pending on the distributor are stored on the
+ *   vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
+ *   ioctls and guest mmio ops, and other in-kernel peripherals such as the
+ *   arch. timers).
  * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
  *   recalculated
  * - To calculate the oracle, we need info for each cpu from
  *   compute_pending_for_cpu, which considers:
  *   - PPI: dist->irq_pending & dist->irq_enable
  *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
- *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
+ *   - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
  *     registers, stored on each vcpu. We only keep one bit of
  *     information per interrupt, making sure that only one vcpu can
  *     accept the interrupt.
+ * - If any of the above state changes, we must recalculate the oracle.
  * - The same is true when injecting an interrupt, except that we only
  *   consider a single interrupt at a time. The irq_spi_cpu array
  *   contains the target CPU for each SPI.
-- 
cgit v1.2.3


From 71afaba4a2e98bb7bdeba5078370ab43d46e67a1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:00 +0100
Subject: KVM: ARM: vgic: plug irq injection race

As it stands, nothing prevents userspace from injecting an interrupt
before the guest's GIC is actually initialized.

This goes unnoticed so far (as everything is pretty much statically
allocated), but ends up exploding in a spectacular way once we switch
to a more dynamic allocation (the GIC data structure isn't there yet).

The fix is to test for the "ready" flag in the VGIC distributor before
trying to inject the interrupt. Note that in order to avoid breaking
userspace, we have to ignore what is essentially an error.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 37fd20d35759..9bdf181a00e2 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1585,7 +1585,8 @@ out:
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level)
 {
-	if (vgic_update_irq_pending(kvm, cpuid, irq_num, level))
+	if (likely(vgic_initialized(kvm)) &&
+	    vgic_update_irq_pending(kvm, cpuid, irq_num, level))
 		vgic_kick_vcpus(kvm);
 
 	return 0;
-- 
cgit v1.2.3


From c1bfb577addd4867a82c4f235824a315d5afb94a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:01 +0100
Subject: arm/arm64: KVM: vgic: switch to dynamic allocation

So far, all the VGIC data structures are statically defined by the
*maximum* number of vcpus and interrupts it supports. It means that
we always have to oversize it to cater for the worse case.

Start by changing the data structures to be dynamically sizeable,
and allocate them at runtime.

The sizes are still very static though.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/arm.c     |   3 +
 include/kvm/arm_vgic.h |  76 ++++++++++++----
 virt/kvm/arm/vgic.c    | 243 ++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 269 insertions(+), 53 deletions(-)

(limited to 'virt/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 88c901cfc75e..c1a11496817b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -161,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 			kvm->vcpus[i] = NULL;
 		}
 	}
+
+	kvm_vgic_destroy(kvm);
 }
 
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -243,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_memory_caches(vcpu);
 	kvm_timer_vcpu_terminate(vcpu);
+	kvm_vgic_vcpu_destroy(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f074539c6ac5..fd1b8f252da1 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -54,19 +54,33 @@
  * - a bunch of shared interrupts (SPI)
  */
 struct vgic_bitmap {
-	union {
-		u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
-	} percpu[VGIC_MAX_CPUS];
-	union {
-		u32 reg[VGIC_NR_SHARED_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
-	} shared;
+	/*
+	 * - One UL per VCPU for private interrupts (assumes UL is at
+	 *   least 32 bits)
+	 * - As many UL as necessary for shared interrupts.
+	 *
+	 * The private interrupts are accessed via the "private"
+	 * field, one UL per vcpu (the state for vcpu n is in
+	 * private[n]). The shared interrupts are accessed via the
+	 * "shared" pointer (IRQn state is at bit n-32 in the bitmap).
+	 */
+	unsigned long *private;
+	unsigned long *shared;
 };
 
 struct vgic_bytemap {
-	u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
-	u32 shared[VGIC_NR_SHARED_IRQS  / 4];
+	/*
+	 * - 8 u32 per VCPU for private interrupts
+	 * - As many u32 as necessary for shared interrupts.
+	 *
+	 * The private interrupts are accessed via the "private"
+	 * field, (the state for vcpu n is in private[n*8] to
+	 * private[n*8 + 7]). The shared interrupts are accessed via
+	 * the "shared" pointer (IRQn state is at byte (n-32)%4 of the
+	 * shared[(n-32)/4] word).
+	 */
+	u32 *private;
+	u32 *shared;
 };
 
 struct kvm_vcpu;
@@ -127,6 +141,9 @@ struct vgic_dist {
 	bool			in_kernel;
 	bool			ready;
 
+	int			nr_cpus;
+	int			nr_irqs;
+
 	/* Virtual control interface mapping */
 	void __iomem		*vctrl_base;
 
@@ -166,15 +183,36 @@ struct vgic_dist {
 	/* Level/edge triggered */
 	struct vgic_bitmap	irq_cfg;
 
-	/* Source CPU per SGI and target CPU */
-	u8			irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
+	/*
+	 * Source CPU per SGI and target CPU:
+	 *
+	 * Each byte represent a SGI observable on a VCPU, each bit of
+	 * this byte indicating if the corresponding VCPU has
+	 * generated this interrupt. This is a GICv2 feature only.
+	 *
+	 * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are
+	 * the SGIs observable on VCPUn.
+	 */
+	u8			*irq_sgi_sources;
 
-	/* Target CPU for each IRQ */
-	u8			irq_spi_cpu[VGIC_NR_SHARED_IRQS];
-	struct vgic_bitmap	irq_spi_target[VGIC_MAX_CPUS];
+	/*
+	 * Target CPU for each SPI:
+	 *
+	 * Array of available SPI, each byte indicating the target
+	 * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32].
+	 */
+	u8			*irq_spi_cpu;
+
+	/*
+	 * Reverse lookup of irq_spi_cpu for faster compute pending:
+	 *
+	 * Array of bitmaps, one per VCPU, describing if IRQn is
+	 * routed to a particular VCPU.
+	 */
+	struct vgic_bitmap	*irq_spi_target;
 
 	/* Bitmap indicating which CPU has something pending */
-	unsigned long		irq_pending_on_cpu;
+	unsigned long		*irq_pending_on_cpu;
 #endif
 };
 
@@ -204,11 +242,11 @@ struct vgic_v3_cpu_if {
 struct vgic_cpu {
 #ifdef CONFIG_KVM_ARM_VGIC
 	/* per IRQ to LR mapping */
-	u8		vgic_irq_lr_map[VGIC_NR_IRQS];
+	u8		*vgic_irq_lr_map;
 
 	/* Pending interrupts on this VCPU */
 	DECLARE_BITMAP(	pending_percpu, VGIC_NR_PRIVATE_IRQS);
-	DECLARE_BITMAP(	pending_shared, VGIC_NR_SHARED_IRQS);
+	unsigned long	*pending_shared;
 
 	/* Bitmap of used/free list registers */
 	DECLARE_BITMAP(	lr_used, VGIC_V2_MAX_LRS);
@@ -239,7 +277,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
+void kvm_vgic_destroy(struct kvm *kvm);
 int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 9bdf181a00e2..08db8764496a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -95,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
 static void vgic_kick_vcpus(struct kvm *kvm);
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
@@ -105,10 +106,8 @@ static const struct vgic_ops *vgic_ops;
 static const struct vgic_params *vgic;
 
 /*
- * struct vgic_bitmap contains unions that provide two views of
- * the same data. In one case it is an array of registers of
- * u32's, and in the other case it is a bitmap of unsigned
- * longs.
+ * struct vgic_bitmap contains a bitmap made of unsigned longs, but
+ * extracts u32s out of them.
  *
  * This does not work on 64-bit BE systems, because the bitmap access
  * will store two consecutive 32-bit words with the higher-addressed
@@ -124,23 +123,45 @@ static const struct vgic_params *vgic;
 #define REG_OFFSET_SWIZZLE	0
 #endif
 
+static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
+{
+	int nr_longs;
+
+	nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+
+	b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
+	if (!b->private)
+		return -ENOMEM;
+
+	b->shared = b->private + nr_cpus;
+
+	return 0;
+}
+
+static void vgic_free_bitmap(struct vgic_bitmap *b)
+{
+	kfree(b->private);
+	b->private = NULL;
+	b->shared = NULL;
+}
+
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
 {
 	offset >>= 2;
 	if (!offset)
-		return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE);
+		return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
 	else
-		return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
+		return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
 }
 
 static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
 				   int cpuid, int irq)
 {
 	if (irq < VGIC_NR_PRIVATE_IRQS)
-		return test_bit(irq, x->percpu[cpuid].reg_ul);
+		return test_bit(irq, x->private + cpuid);
 
-	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
+	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
 }
 
 static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
@@ -149,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
 	unsigned long *reg;
 
 	if (irq < VGIC_NR_PRIVATE_IRQS) {
-		reg = x->percpu[cpuid].reg_ul;
+		reg = x->private + cpuid;
 	} else {
-		reg =  x->shared.reg_ul;
+		reg = x->shared;
 		irq -= VGIC_NR_PRIVATE_IRQS;
 	}
 
@@ -163,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
 
 static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
 {
-	if (unlikely(cpuid >= VGIC_MAX_CPUS))
-		return NULL;
-	return x->percpu[cpuid].reg_ul;
+	return x->private + cpuid;
 }
 
 static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
 {
-	return x->shared.reg_ul;
+	return x->shared;
+}
+
+static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
+{
+	int size;
+
+	size  = nr_cpus * VGIC_NR_PRIVATE_IRQS;
+	size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
+
+	x->private = kzalloc(size, GFP_KERNEL);
+	if (!x->private)
+		return -ENOMEM;
+
+	x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
+	return 0;
+}
+
+static void vgic_free_bytemap(struct vgic_bytemap *b)
+{
+	kfree(b->private);
+	b->private = NULL;
+	b->shared = NULL;
 }
 
 static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
 {
-	offset >>= 2;
-	BUG_ON(offset > (VGIC_NR_IRQS / 4));
-	if (offset < 8)
-		return x->percpu[cpuid] + offset;
-	else
-		return x->shared + offset - 8;
+	u32 *reg;
+
+	if (offset < VGIC_NR_PRIVATE_IRQS) {
+		reg = x->private;
+		offset += cpuid * VGIC_NR_PRIVATE_IRQS;
+	} else {
+		reg = x->shared;
+		offset -= VGIC_NR_PRIVATE_IRQS;
+	}
+
+	return reg + (offset / sizeof(u32));
 }
 
 #define VGIC_CFG_LEVEL	0
@@ -744,7 +790,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 */
 		vgic_dist_irq_set_pending(vcpu, lr.irq);
 		if (lr.irq < VGIC_NR_SGIS)
-			dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
+			*vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
 		lr.state &= ~LR_STATE_PENDING;
 		vgic_set_lr(vcpu, i, lr);
 
@@ -778,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 	/* Copy source SGIs from distributor side */
 	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
 		int shift = 8 * (sgi - min_sgi);
-		reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
+		reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
 	}
 
 	mmio_data_write(mmio, ~0, reg);
@@ -802,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 	/* Clear pending SGIs on the distributor */
 	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
 		u8 mask = reg >> (8 * (sgi - min_sgi));
+		u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
 		if (set) {
-			if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
+			if ((*src & mask) != mask)
 				updated = true;
-			dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
+			*src |= mask;
 		} else {
-			if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
+			if (*src & mask)
 				updated = true;
-			dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
+			*src &= ~mask;
 		}
 	}
 
@@ -993,6 +1040,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	return true;
 }
 
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
+{
+	return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
+}
+
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 {
 	struct kvm *kvm = vcpu->kvm;
@@ -1026,7 +1078,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 		if (target_cpus & 1) {
 			/* Flag the SGI as pending */
 			vgic_dist_irq_set_pending(vcpu, sgi);
-			dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
+			*vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
 			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
 		}
 
@@ -1073,14 +1125,14 @@ static void vgic_update_state(struct kvm *kvm)
 	int c;
 
 	if (!dist->enabled) {
-		set_bit(0, &dist->irq_pending_on_cpu);
+		set_bit(0, dist->irq_pending_on_cpu);
 		return;
 	}
 
 	kvm_for_each_vcpu(c, vcpu, kvm) {
 		if (compute_pending_for_cpu(vcpu)) {
 			pr_debug("CPU%d has pending interrupts\n", c);
-			set_bit(c, &dist->irq_pending_on_cpu);
+			set_bit(c, dist->irq_pending_on_cpu);
 		}
 	}
 }
@@ -1237,14 +1289,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 	int vcpu_id = vcpu->vcpu_id;
 	int c;
 
-	sources = dist->irq_sgi_sources[vcpu_id][irq];
+	sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
 
 	for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
 		if (vgic_queue_irq(vcpu, c, irq))
 			clear_bit(c, &sources);
 	}
 
-	dist->irq_sgi_sources[vcpu_id][irq] = sources;
+	*vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
 
 	/*
 	 * If the sources bitmap has been cleared it means that we
@@ -1332,7 +1384,7 @@ epilog:
 		 * us. Claim we don't have anything pending. We'll
 		 * adjust that if needed while exiting.
 		 */
-		clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
+		clear_bit(vcpu_id, dist->irq_pending_on_cpu);
 	}
 }
 
@@ -1430,7 +1482,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	/* Check if we still have something up our sleeve... */
 	pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
 	if (level_pending || pending < vgic->nr_lr)
-		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
+		set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 }
 
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
@@ -1464,7 +1516,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 	if (!irqchip_in_kernel(vcpu->kvm))
 		return 0;
 
-	return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
+	return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 }
 
 static void vgic_kick_vcpus(struct kvm *kvm)
@@ -1559,7 +1611,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
 	if (level) {
 		vgic_cpu_irq_set(vcpu, irq_num);
-		set_bit(cpuid, &dist->irq_pending_on_cpu);
+		set_bit(cpuid, dist->irq_pending_on_cpu);
 	}
 
 out:
@@ -1603,6 +1655,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+	kfree(vgic_cpu->pending_shared);
+	kfree(vgic_cpu->vgic_irq_lr_map);
+	vgic_cpu->pending_shared = NULL;
+	vgic_cpu->vgic_irq_lr_map = NULL;
+}
+
+static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+	int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+	vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+
+	if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+		kvm_vgic_vcpu_destroy(vcpu);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 /**
  * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
  * @vcpu: pointer to the vcpu struct
@@ -1642,6 +1720,97 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+void kvm_vgic_destroy(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_vgic_vcpu_destroy(vcpu);
+
+	vgic_free_bitmap(&dist->irq_enabled);
+	vgic_free_bitmap(&dist->irq_level);
+	vgic_free_bitmap(&dist->irq_pending);
+	vgic_free_bitmap(&dist->irq_soft_pend);
+	vgic_free_bitmap(&dist->irq_queued);
+	vgic_free_bitmap(&dist->irq_cfg);
+	vgic_free_bytemap(&dist->irq_priority);
+	if (dist->irq_spi_target) {
+		for (i = 0; i < dist->nr_cpus; i++)
+			vgic_free_bitmap(&dist->irq_spi_target[i]);
+	}
+	kfree(dist->irq_sgi_sources);
+	kfree(dist->irq_spi_cpu);
+	kfree(dist->irq_spi_target);
+	kfree(dist->irq_pending_on_cpu);
+	dist->irq_sgi_sources = NULL;
+	dist->irq_spi_cpu = NULL;
+	dist->irq_spi_target = NULL;
+	dist->irq_pending_on_cpu = NULL;
+}
+
+/*
+ * Allocate and initialize the various data structures. Must be called
+ * with kvm->lock held!
+ */
+static int vgic_init_maps(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int nr_cpus, nr_irqs;
+	int ret, i;
+
+	nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS;
+	nr_irqs = dist->nr_irqs = VGIC_NR_IRQS;
+
+	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
+	ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
+
+	if (ret)
+		goto out;
+
+	dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
+	dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
+	dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
+				       GFP_KERNEL);
+	dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+					   GFP_KERNEL);
+	if (!dist->irq_sgi_sources ||
+	    !dist->irq_spi_cpu ||
+	    !dist->irq_spi_target ||
+	    !dist->irq_pending_on_cpu) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < nr_cpus; i++)
+		ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
+					nr_cpus, nr_irqs);
+
+	if (ret)
+		goto out;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
+		if (ret) {
+			kvm_err("VGIC: Failed to allocate vcpu memory\n");
+			break;
+		}
+	}
+
+out:
+	if (ret)
+		kvm_vgic_destroy(kvm);
+
+	return ret;
+}
+
 /**
  * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
@@ -1722,6 +1891,10 @@ int kvm_vgic_create(struct kvm *kvm)
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 
+	ret = vgic_init_maps(kvm);
+	if (ret)
+		kvm_err("Unable to allocate maps\n");
+
 out_unlock:
 	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
 		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
-- 
cgit v1.2.3


From fb65ab63b8cae510ea1e43e68b5da2f9980aa6d5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:02 +0100
Subject: arm/arm64: KVM: vgic: Parametrize VGIC_NR_SHARED_IRQS

Having a dynamic number of supported interrupts means that we
cannot relly on VGIC_NR_SHARED_IRQS being fixed anymore.

Instead, make it take the distributor structure as a parameter,
so it can return the right value.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_vgic.h |  1 -
 virt/kvm/arm/vgic.c    | 16 +++++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index fd1b8f252da1..b2f9936df319 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -29,7 +29,6 @@
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_NR_SHARED_IRQS	(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
 #define VGIC_MAX_CPUS		KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 08db8764496a..7d64dc242afc 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1086,11 +1086,17 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 	}
 }
 
+static int vgic_nr_shared_irqs(struct vgic_dist *dist)
+{
+	return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
+}
+
 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
 	unsigned long pending_private, pending_shared;
+	int nr_shared = vgic_nr_shared_irqs(dist);
 	int vcpu_id;
 
 	vcpu_id = vcpu->vcpu_id;
@@ -1103,15 +1109,15 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 
 	pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
 	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
-	bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
+	bitmap_and(pend_shared, pending, enabled, nr_shared);
 	bitmap_and(pend_shared, pend_shared,
 		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
-		   VGIC_NR_SHARED_IRQS);
+		   nr_shared);
 
 	pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
-	pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
+	pending_shared = find_first_bit(pend_shared, nr_shared);
 	return (pending_private < VGIC_NR_PRIVATE_IRQS ||
-		pending_shared < VGIC_NR_SHARED_IRQS);
+		pending_shared < vgic_nr_shared_irqs(dist));
 }
 
 /*
@@ -1368,7 +1374,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 	}
 
 	/* SPIs */
-	for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
+	for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
 		if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
 			overflow = 1;
 	}
-- 
cgit v1.2.3


From fc675e355e705a046df7b635d3f3330c0ad94569 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:03 +0100
Subject: arm/arm64: KVM: vgic: kill VGIC_MAX_CPUS

We now have the information about the number of CPU interfaces in
the distributor itself. Let's get rid of VGIC_MAX_CPUS, and just
rely on KVM_MAX_VCPUS where we don't have the choice. Yet.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_vgic.h | 3 +--
 virt/kvm/arm/vgic.c    | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index b2f9936df319..3b73d7845124 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -29,13 +29,12 @@
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_MAX_CPUS		KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
 #define VGIC_V3_MAX_LRS		16
 
 /* Sanity checks... */
-#if (VGIC_MAX_CPUS > 8)
+#if (KVM_MAX_VCPUS > 8)
 #error	Invalid number of CPU interfaces
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 7d64dc242afc..599ad17e5436 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1297,7 +1297,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 
 	sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
 
-	for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
+	for_each_set_bit(c, &sources, dist->nr_cpus) {
 		if (vgic_queue_irq(vcpu, c, irq))
 			clear_bit(c, &sources);
 	}
@@ -1700,7 +1700,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int i;
 
-	if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
+	if (vcpu->vcpu_id >= dist->nr_cpus)
 		return -EBUSY;
 
 	for (i = 0; i < VGIC_NR_IRQS; i++) {
@@ -1767,7 +1767,7 @@ static int vgic_init_maps(struct kvm *kvm)
 	int nr_cpus, nr_irqs;
 	int ret, i;
 
-	nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS;
+	nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS;
 	nr_irqs = dist->nr_irqs = VGIC_NR_IRQS;
 
 	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
-- 
cgit v1.2.3


From c3c918361adcceb816c92b21dd95d2b46fb96a8f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:04 +0100
Subject: arm/arm64: KVM: vgic: handle out-of-range MMIO accesses

Now that we can (almost) dynamically size the number of interrupts,
we're facing an interesting issue:

We have to evaluate at runtime whether or not an access hits a valid
register, based on the sizing of this particular instance of the
distributor. Furthermore, the GIC spec says that accessing a reserved
register is RAZ/WI.

For this, add a new field to our range structure, indicating the number
of bits a single interrupts uses. That allows us to find out whether or
not the access is in range.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_vgic.h |  3 ++-
 virt/kvm/arm/vgic.c    | 56 ++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 47 insertions(+), 12 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 3b73d7845124..2767f939f47c 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -32,6 +32,7 @@
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
 #define VGIC_V3_MAX_LRS		16
+#define VGIC_MAX_IRQS		1024
 
 /* Sanity checks... */
 #if (KVM_MAX_VCPUS > 8)
@@ -42,7 +43,7 @@
 #error "VGIC_NR_IRQS must be a multiple of 32"
 #endif
 
-#if (VGIC_NR_IRQS > 1024)
+#if (VGIC_NR_IRQS > VGIC_MAX_IRQS)
 #error "VGIC_NR_IRQS must be <= 1024"
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 599ad17e5436..973eaf7ebe98 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -895,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
 struct mmio_range {
 	phys_addr_t base;
 	unsigned long len;
+	int bits_per_irq;
 	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
 			    phys_addr_t offset);
 };
@@ -903,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = {
 	{
 		.base		= GIC_DIST_CTRL,
 		.len		= 12,
+		.bits_per_irq	= 0,
 		.handle_mmio	= handle_mmio_misc,
 	},
 	{
 		.base		= GIC_DIST_IGROUP,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_DIST_ENABLE_SET,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_set_enable_reg,
 	},
 	{
 		.base		= GIC_DIST_ENABLE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_clear_enable_reg,
 	},
 	{
 		.base		= GIC_DIST_PENDING_SET,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_set_pending_reg,
 	},
 	{
 		.base		= GIC_DIST_PENDING_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_clear_pending_reg,
 	},
 	{
 		.base		= GIC_DIST_ACTIVE_SET,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_DIST_ACTIVE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_DIST_PRI,
-		.len		= VGIC_NR_IRQS,
+		.len		= VGIC_MAX_IRQS,
+		.bits_per_irq	= 8,
 		.handle_mmio	= handle_mmio_priority_reg,
 	},
 	{
 		.base		= GIC_DIST_TARGET,
-		.len		= VGIC_NR_IRQS,
+		.len		= VGIC_MAX_IRQS,
+		.bits_per_irq	= 8,
 		.handle_mmio	= handle_mmio_target_reg,
 	},
 	{
 		.base		= GIC_DIST_CONFIG,
-		.len		= VGIC_NR_IRQS / 4,
+		.len		= VGIC_MAX_IRQS / 4,
+		.bits_per_irq	= 2,
 		.handle_mmio	= handle_mmio_cfg_reg,
 	},
 	{
@@ -990,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges,
 	return NULL;
 }
 
+static bool vgic_validate_access(const struct vgic_dist *dist,
+				 const struct mmio_range *range,
+				 unsigned long offset)
+{
+	int irq;
+
+	if (!range->bits_per_irq)
+		return true;	/* Not an irq-based access */
+
+	irq = offset * 8 / range->bits_per_irq;
+	if (irq >= dist->nr_irqs)
+		return false;
+
+	return true;
+}
+
 /**
  * vgic_handle_mmio - handle an in-kernel MMIO access
  * @vcpu:	pointer to the vcpu performing the access
@@ -1029,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 	spin_lock(&vcpu->kvm->arch.vgic.lock);
 	offset = mmio->phys_addr - range->base - base;
-	updated_state = range->handle_mmio(vcpu, mmio, offset);
+	if (vgic_validate_access(dist, range, offset)) {
+		updated_state = range->handle_mmio(vcpu, mmio, offset);
+	} else {
+		vgic_reg_access(mmio, NULL, offset,
+				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+		updated_state = false;
+	}
 	spin_unlock(&vcpu->kvm->arch.vgic.lock);
 	kvm_prepare_mmio(run, mmio);
 	kvm_handle_mmio_return(vcpu, run);
-- 
cgit v1.2.3


From 5fb66da64064d0cb8dcce4cc8bf4cb1b921b13a0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:05 +0100
Subject: arm/arm64: KVM: vgic: kill VGIC_NR_IRQS

Nuke VGIC_NR_IRQS entierly, now that the distributor instance
contains the number of IRQ allocated to this GIC.

Also add VGIC_NR_IRQS_LEGACY to preserve the current API.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_vgic.h |  6 +++---
 virt/kvm/arm/vgic.c    | 17 +++++++++++------
 2 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 2767f939f47c..aa20d4a7242f 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,7 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
-#define VGIC_NR_IRQS		256
+#define VGIC_NR_IRQS_LEGACY	256
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
@@ -39,11 +39,11 @@
 #error	Invalid number of CPU interfaces
 #endif
 
-#if (VGIC_NR_IRQS & 31)
+#if (VGIC_NR_IRQS_LEGACY & 31)
 #error "VGIC_NR_IRQS must be a multiple of 32"
 #endif
 
-#if (VGIC_NR_IRQS > VGIC_MAX_IRQS)
+#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS)
 #error "VGIC_NR_IRQS must be <= 1024"
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 973eaf7ebe98..725d829ad1d9 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -439,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
 
 	case 4:			/* GICD_TYPER */
 		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
-		reg |= (VGIC_NR_IRQS >> 5) - 1;
+		reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
 		vgic_reg_access(mmio, &reg, word_offset,
 				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
 		break;
@@ -1277,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	struct vgic_lr vlr;
 	int lr;
 
 	/* Sanitize the input... */
 	BUG_ON(sgi_source_id & ~7);
 	BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
-	BUG_ON(irq >= VGIC_NR_IRQS);
+	BUG_ON(irq >= dist->nr_irqs);
 
 	kvm_debug("Queue IRQ%d\n", irq);
 
@@ -1515,7 +1516,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 
 		vlr = vgic_get_lr(vcpu, lr);
 
-		BUG_ON(vlr.irq >= VGIC_NR_IRQS);
+		BUG_ON(vlr.irq >= dist->nr_irqs);
 		vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
 	}
 
@@ -1737,7 +1738,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	if (vcpu->vcpu_id >= dist->nr_cpus)
 		return -EBUSY;
 
-	for (i = 0; i < VGIC_NR_IRQS; i++) {
+	for (i = 0; i < dist->nr_irqs; i++) {
 		if (i < VGIC_NR_PPIS)
 			vgic_bitmap_set_irq_val(&dist->irq_enabled,
 						vcpu->vcpu_id, i, 1);
@@ -1802,7 +1803,11 @@ static int vgic_init_maps(struct kvm *kvm)
 	int ret, i;
 
 	nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS;
-	nr_irqs = dist->nr_irqs = VGIC_NR_IRQS;
+
+	if (!dist->nr_irqs)
+		dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
+
+	nr_irqs = dist->nr_irqs;
 
 	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
 	ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
@@ -1886,7 +1891,7 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
-	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
+	for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
 		vgic_set_target_reg(kvm, 0, i);
 
 	kvm->arch.vgic.ready = true;
-- 
cgit v1.2.3


From 4956f2bc1fdee4bc336532f3f34635a8534cedfd Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:06 +0100
Subject: arm/arm64: KVM: vgic: delay vgic allocation until init time

It is now quite easy to delay the allocation of the vgic tables
until we actually require it to be up and running (when the first
vcpu is kicking around, or someones tries to access the GIC registers).

This allow us to allocate memory for the exact number of CPUs we
have. As nobody configures the number of interrupts just yet,
use a fallback to VGIC_NR_IRQS_LEGACY.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/arm.c     |  7 -------
 include/kvm/arm_vgic.h |  1 -
 virt/kvm/arm/vgic.c    | 42 +++++++++++++++++++++++++++++-------------
 3 files changed, 29 insertions(+), 21 deletions(-)

(limited to 'virt/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c1a11496817b..40bc3df6d87b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -261,16 +261,9 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	int ret;
-
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
 
-	/* Set up VGIC */
-	ret = kvm_vgic_vcpu_init(vcpu);
-	if (ret)
-		return ret;
-
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
 
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index aa20d4a7242f..2f2aac8448a4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -277,7 +277,6 @@ int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
 void kvm_vgic_destroy(struct kvm *kvm);
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 725d829ad1d9..ac2b44d58e60 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1729,15 +1729,12 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
  * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
  * this vcpu and enable the VGIC for this VCPU
  */
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int i;
 
-	if (vcpu->vcpu_id >= dist->nr_cpus)
-		return -EBUSY;
-
 	for (i = 0; i < dist->nr_irqs; i++) {
 		if (i < VGIC_NR_PPIS)
 			vgic_bitmap_set_irq_val(&dist->irq_enabled,
@@ -1757,8 +1754,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	vgic_cpu->nr_lr = vgic->nr_lr;
 
 	vgic_enable(vcpu);
-
-	return 0;
 }
 
 void kvm_vgic_destroy(struct kvm *kvm)
@@ -1802,8 +1797,17 @@ static int vgic_init_maps(struct kvm *kvm)
 	int nr_cpus, nr_irqs;
 	int ret, i;
 
-	nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS;
+	if (dist->nr_cpus)	/* Already allocated */
+		return 0;
+
+	nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
+	if (!nr_cpus)		/* No vcpus? Can't be good... */
+		return -EINVAL;
 
+	/*
+	 * If nobody configured the number of interrupts, use the
+	 * legacy one.
+	 */
 	if (!dist->nr_irqs)
 		dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
 
@@ -1849,6 +1853,9 @@ static int vgic_init_maps(struct kvm *kvm)
 		}
 	}
 
+	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+		vgic_set_target_reg(kvm, 0, i);
+
 out:
 	if (ret)
 		kvm_vgic_destroy(kvm);
@@ -1867,6 +1874,7 @@ out:
  */
 int kvm_vgic_init(struct kvm *kvm)
 {
+	struct kvm_vcpu *vcpu;
 	int ret = 0, i;
 
 	if (!irqchip_in_kernel(kvm))
@@ -1884,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
+	ret = vgic_init_maps(kvm);
+	if (ret) {
+		kvm_err("Unable to allocate maps\n");
+		goto out;
+	}
+
 	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
 				    vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
 	if (ret) {
@@ -1891,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
-	for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
-		vgic_set_target_reg(kvm, 0, i);
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_vgic_vcpu_init(vcpu);
 
 	kvm->arch.vgic.ready = true;
 out:
+	if (ret)
+		kvm_vgic_destroy(kvm);
 	mutex_unlock(&kvm->lock);
 	return ret;
 }
@@ -1936,10 +1952,6 @@ int kvm_vgic_create(struct kvm *kvm)
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 
-	ret = vgic_init_maps(kvm);
-	if (ret)
-		kvm_err("Unable to allocate maps\n");
-
 out_unlock:
 	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
 		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
@@ -2140,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 
 	mutex_lock(&dev->kvm->lock);
 
+	ret = vgic_init_maps(dev->kvm);
+	if (ret)
+		goto out;
+
 	if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
 		ret = -EINVAL;
 		goto out;
-- 
cgit v1.2.3


From a98f26f183801685ef57333de4bafd4bbc692c7c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:07 +0100
Subject: arm/arm64: KVM: vgic: make number of irqs a configurable attribute

In order to make the number of interrupts configurable, use the new
fancy device management API to add KVM_DEV_ARM_VGIC_GRP_NR_IRQS as
a VGIC configurable attribute.

Userspace can now specify the exact size of the GIC (by increments
of 32 interrupts).

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 Documentation/virtual/kvm/devices/arm-vgic.txt | 10 +++++++
 arch/arm/include/uapi/asm/kvm.h                |  1 +
 arch/arm64/include/uapi/asm/kvm.h              |  1 +
 virt/kvm/arm/vgic.c                            | 37 ++++++++++++++++++++++++++
 4 files changed, 49 insertions(+)

(limited to 'virt/kvm')

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 7f4e91b1316b..df8b0c7540b6 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -71,3 +71,13 @@ Groups:
   Errors:
     -ENODEV: Getting or setting this register is not yet supported
     -EBUSY: One or more VCPUs are running
+
+  KVM_DEV_ARM_VGIC_GRP_NR_IRQS
+  Attributes:
+    A value describing the number of interrupts (SGI, PPI and SPI) for
+    this GIC instance, ranging from 64 to 1024, in increments of 32.
+
+  Errors:
+    -EINVAL: Value set is out of the expected range
+    -EBUSY: Value has already be set, or GIC has already been initialized
+            with default values.
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 51257fda254b..09ee408c1a67 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -174,6 +174,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index f4ec5a674d05..8e38878c87c6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -160,6 +160,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index ac2b44d58e60..b6fab0f25f3b 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2253,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
 		return vgic_attr_regs_access(dev, attr, &reg, true);
 	}
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		u32 val;
+		int ret = 0;
+
+		if (get_user(val, uaddr))
+			return -EFAULT;
+
+		/*
+		 * We require:
+		 * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
+		 * - at most 1024 interrupts
+		 * - a multiple of 32 interrupts
+		 */
+		if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
+		    val > VGIC_MAX_IRQS ||
+		    (val & 31))
+			return -EINVAL;
+
+		mutex_lock(&dev->kvm->lock);
+
+		if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+			ret = -EBUSY;
+		else
+			dev->kvm->arch.vgic.nr_irqs = val;
+
+		mutex_unlock(&dev->kvm->lock);
+
+		return ret;
+	}
 
 	}
 
@@ -2289,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		r = put_user(reg, uaddr);
 		break;
 	}
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
+		break;
+	}
 
 	}
 
@@ -2325,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
 		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
 		return vgic_has_attr_regs(vgic_cpu_ranges, offset);
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+		return 0;
 	}
 	return -ENXIO;
 }
-- 
cgit v1.2.3


From 29f1b65b5984c1e35e2d60d1416d03cee0b91ee2 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 22 Sep 2014 23:33:08 +0200
Subject: KVM: EVENTFD: Remove inclusion of irq.h

Commit c77dcac (KVM: Move more code under CONFIG_HAVE_KVM_IRQFD) added
functionality that depends on definitions in ioapic.h when
__KVM_HAVE_IOAPIC is defined.

At the same time, kvm-arm commit 0ba0951 (KVM: EVENTFD: remove inclusion
of irq.h) removed the inclusion of irq.h, an architecture-specific header
that is not present on ARM but which happened to include ioapic.h on x86.

Include ioapic.h directly in eventfd.c if __KVM_HAVE_IOAPIC is defined.
This fixes x86 and lets ARM use eventfd.c.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/eventfd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 3c5981c87c3f..b0fb390943c6 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,7 +36,9 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#include "irq.h"
+#ifdef __KVM_HAVE_IOAPIC
+#include "ioapic.h"
+#endif
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
-- 
cgit v1.2.3


From 3c3c29fd0d7cddc32862c350d0700ce69953e3bd Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 24 Sep 2014 13:02:46 +0200
Subject: kvm-vfio: do not use module_init

/me got confused between the kernel and QEMU.  In the kernel, you can
only have one module_init function, and it will prevent unloading the
module unless you also have the corresponding module_exit function.

So, commit 80ce1639727e (KVM: VFIO: register kvm_device_ops dynamically,
2014-09-02) broke unloading of the kvm module, by adding a module_init
function and no module_exit.

Repair it by making kvm_vfio_ops_init weak, and checking it in
kvm_init.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Alex Williamson <Alex.Williamson@redhat.com>
Fixes: 80ce1639727e9d38729c34f162378508c307ca25
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c |  4 ++++
 virt/kvm/vfio.c     |  4 ++--
 virt/kvm/vfio.h     | 13 +++++++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 virt/kvm/vfio.h

(limited to 'virt/kvm')

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index db57363cc287..499db0977f3c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -57,6 +57,7 @@
 
 #include "coalesced_mmio.h"
 #include "async_pf.h"
+#include "vfio.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/kvm.h>
@@ -3226,6 +3227,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 		goto out_undebugfs;
 	}
 
+	r = kvm_vfio_ops_init();
+	WARN_ON(r);
+
 	return 0;
 
 out_undebugfs:
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index bb11b36ee8a2..281e7cf2b8e5 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
+#include "vfio.h"
 
 struct kvm_vfio_group {
 	struct list_head node;
@@ -278,8 +279,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 	return 0;
 }
 
-static int __init kvm_vfio_ops_init(void)
+int kvm_vfio_ops_init(void)
 {
 	return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
 }
-module_init(kvm_vfio_ops_init);
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
new file mode 100644
index 000000000000..92eac75d6b62
--- /dev/null
+++ b/virt/kvm/vfio.h
@@ -0,0 +1,13 @@
+#ifndef __KVM_VFIO_H
+#define __KVM_VFIO_H
+
+#ifdef CONFIG_KVM_VFIO
+int kvm_vfio_ops_init(void);
+#else
+static inline int kvm_vfio_ops_init(void)
+{
+	return 0;
+}
+#endif
+
+#endif
-- 
cgit v1.2.3


From 234b239bea395316d7f78018c672f4a88b3cdf0d Mon Sep 17 00:00:00 2001
From: Andres Lagar-Cavilla <andreslc@google.com>
Date: Wed, 17 Sep 2014 10:51:48 -0700
Subject: kvm: Faults which trigger IO release the mmap_sem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When KVM handles a tdp fault it uses FOLL_NOWAIT. If the guest memory
has been swapped out or is behind a filemap, this will trigger async
readahead and return immediately. The rationale is that KVM will kick
back the guest with an "async page fault" and allow for some other
guest process to take over.

If async PFs are enabled the fault is retried asap from an async
workqueue. If not, it's retried immediately in the same code path. In
either case the retry will not relinquish the mmap semaphore and will
block on the IO. This is a bad thing, as other mmap semaphore users
now stall as a function of swap or filemap latency.

This patch ensures both the regular and async PF path re-enter the
fault allowing for the mmap semaphore to be relinquished in the case
of IO wait.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 11 +++++++++++
 include/linux/mm.h       |  1 +
 mm/gup.c                 |  4 ++++
 virt/kvm/async_pf.c      |  4 +---
 virt/kvm/kvm_main.c      | 49 +++++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 63 insertions(+), 6 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d44a2d640551..45aaeb3360c9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -198,6 +198,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
+/*
+ * Carry out a gup that requires IO. Allow the mm to relinquish the mmap
+ * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL
+ * controls whether we retry the gup one more time to completion in that case.
+ * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp
+ * handler.
+ */
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+			 unsigned long addr, bool write_fault,
+			 struct page **pagep);
+
 enum {
 	OUTSIDE_GUEST_MODE,
 	IN_GUEST_MODE,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8981cc882ed2..0f4196a0bc20 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
 #define FOLL_NUMA	0x200	/* force NUMA hinting page fault */
 #define FOLL_MIGRATION	0x400	/* wait for page to replace migration entry */
+#define FOLL_TRIED	0x800	/* a retry, previous pass started an IO */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/gup.c b/mm/gup.c
index 91d044b1600d..af7ea3e0826b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -281,6 +281,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 	if (*flags & FOLL_NOWAIT)
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
+	if (*flags & FOLL_TRIED) {
+		VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
+		fault_flags |= FAULT_FLAG_TRIED;
+	}
 
 	ret = handle_mm_fault(mm, vma, address, fault_flags);
 	if (ret & VM_FAULT_ERROR) {
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index d6a3d0993d88..5ff7f7f2689a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work)
 
 	might_sleep();
 
-	down_read(&mm->mmap_sem);
-	get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
-	up_read(&mm->mmap_sem);
+	kvm_get_user_page_io(NULL, mm, addr, 1, NULL);
 	kvm_async_page_present_sync(vcpu, apf);
 
 	spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 499db0977f3c..1c6e8476b244 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1122,6 +1122,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
 	return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
 }
 
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+			 unsigned long addr, bool write_fault,
+			 struct page **pagep)
+{
+	int npages;
+	int locked = 1;
+	int flags = FOLL_TOUCH | FOLL_HWPOISON |
+		    (pagep ? FOLL_GET : 0) |
+		    (write_fault ? FOLL_WRITE : 0);
+
+	/*
+	 * If retrying the fault, we get here *not* having allowed the filemap
+	 * to wait on the page lock. We should now allow waiting on the IO with
+	 * the mmap semaphore released.
+	 */
+	down_read(&mm->mmap_sem);
+	npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
+				  &locked);
+	if (!locked) {
+		VM_BUG_ON(npages != -EBUSY);
+
+		if (!pagep)
+			return 0;
+
+		/*
+		 * The previous call has now waited on the IO. Now we can
+		 * retry and complete. Pass TRIED to ensure we do not re
+		 * schedule async IO (see e.g. filemap_fault).
+		 */
+		down_read(&mm->mmap_sem);
+		npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
+					  pagep, NULL, NULL);
+	}
+	up_read(&mm->mmap_sem);
+	return npages;
+}
+
 static inline int check_user_page_hwpoison(unsigned long addr)
 {
 	int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1184,9 +1221,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
 		npages = get_user_page_nowait(current, current->mm,
 					      addr, write_fault, page);
 		up_read(&current->mm->mmap_sem);
-	} else
-		npages = get_user_pages_fast(addr, 1, write_fault,
-					     page);
+	} else {
+		/*
+		 * By now we have tried gup_fast, and possibly async_pf, and we
+		 * are certainly not atomic. Time to retry the gup, allowing
+		 * mmap semaphore to be relinquished in the case of IO.
+		 */
+		npages = kvm_get_user_page_io(current, current->mm, addr,
+					      write_fault, page);
+	}
 	if (npages != 1)
 		return npages;
 
-- 
cgit v1.2.3


From 2ea75be3219571d0ec009ce20d9971e54af96e09 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 19 Sep 2014 16:03:25 -0700
Subject: kvm: don't take vcpu mutex for obviously invalid vcpu ioctls

vcpu ioctls can hang the calling thread if issued while a vcpu is running.
However, invalid ioctls can happen when userspace tries to probe the kind
of file descriptors (e.g. isatty() calls ioctl(TCGETS)); in that case,
we know the ioctl is going to be rejected as invalid anyway and we can
fail before trying to take the vcpu mutex.

This patch does not change functionality, it just makes invalid ioctls
fail faster.

Cc: stable@vger.kernel.org
Signed-off-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'virt/kvm')

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1c6e8476b244..ff42b11d2b9c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -52,6 +52,7 @@
 
 #include <asm/processor.h>
 #include <asm/io.h>
+#include <asm/ioctl.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
@@ -2032,6 +2033,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
 	if (vcpu->kvm->mm != current->mm)
 		return -EIO;
 
+	if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
+		return -EINVAL;
+
 #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
 	/*
 	 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
-- 
cgit v1.2.3


From 57128468080a8b6ea452223036d3e417f748af55 Mon Sep 17 00:00:00 2001
From: Andres Lagar-Cavilla <andreslc@google.com>
Date: Mon, 22 Sep 2014 14:54:42 -0700
Subject: kvm: Fix page ageing bugs

1. We were calling clear_flush_young_notify in unmap_one, but we are
within an mmu notifier invalidate range scope. The spte exists no more
(due to range_start) and the accessed bit info has already been
propagated (due to kvm_pfn_set_accessed). Simply call
clear_flush_young.

2. We clear_flush_young on a primary MMU PMD, but this may be mapped
as a collection of PTEs by the secondary MMU (e.g. during log-dirty).
This required expanding the interface of the clear_flush_young mmu
notifier, so a lot of code has been trivially touched.

3. In the absence of shadow_accessed_mask (e.g. EPT A bit), we emulate
the access bit by blowing the spte. This requires proper synchronizing
with MMU notifier consumers, like every other removal of spte's does.

Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  3 ++-
 arch/arm64/include/asm/kvm_host.h   |  3 ++-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/include/asm/kvm_ppc.h  |  2 +-
 arch/powerpc/kvm/book3s.c           |  4 ++--
 arch/powerpc/kvm/book3s.h           |  3 ++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  4 ++--
 arch/powerpc/kvm/book3s_pr.c        |  3 ++-
 arch/powerpc/kvm/e500_mmu_host.c    |  2 +-
 arch/x86/include/asm/kvm_host.h     |  2 +-
 arch/x86/kvm/mmu.c                  | 38 ++++++++++++++++++++++---------------
 drivers/iommu/amd_iommu_v2.c        |  6 ++++--
 include/linux/mmu_notifier.h        | 24 ++++++++++++++++-------
 mm/mmu_notifier.c                   |  5 +++--
 mm/rmap.c                           |  6 +++++-
 virt/kvm/kvm_main.c                 |  5 +++--
 16 files changed, 71 insertions(+), 41 deletions(-)

(limited to 'virt/kvm')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 032a8538318a..8c3f7eb62b54 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -170,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+			      unsigned long end)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index be9970a59497..a3c671b3acc9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -180,7 +180,8 @@ int kvm_unmap_hva_range(struct kvm *kvm,
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+			      unsigned long end)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 604000882352..d329bc5543a2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -56,7 +56,7 @@
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
-extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index fb86a2299d8a..d4a92d7cea6a 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -243,7 +243,7 @@ struct kvmppc_ops {
 	int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
 	int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
 			   unsigned long end);
-	int (*age_hva)(struct kvm *kvm, unsigned long hva);
+	int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
 	int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
 	void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
 	void (*mmu_destroy)(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b299ba..c16cfbfeb781 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -851,9 +851,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 	return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	return kvm->arch.kvm_ops->age_hva(kvm, hva);
+	return kvm->arch.kvm_ops->age_hva(kvm, start, end);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 4bf956cf94d6..d2b3ec088b8c 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
 extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
 				  unsigned long end);
-extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
+			  unsigned long end);
 extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 72c20bb16d26..81460c5359c0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	return ret;
 }
 
-int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	if (!kvm->arch.using_mmu_notifiers)
 		return 0;
-	return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+	return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index faffb27badd9..852fcd8951c4 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
 	return 0;
 }
 
-static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start,
+			  unsigned long end)
 {
 	/* XXX could be more clever ;) */
 	return 0;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 08f14bb57897..b1f3f630315e 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -732,7 +732,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 	return 0;
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	/* XXX could be more clever ;) */
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index eeeb573fcf6f..763d273cab1d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1035,7 +1035,7 @@ asmlinkage void kvm_spurious_fault(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 47d534066325..3201e93ebd07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1417,18 +1417,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	struct rmap_iterator uninitialized_var(iter);
 	int young = 0;
 
-	/*
-	 * In case of absence of EPT Access and Dirty Bits supports,
-	 * emulate the accessed bit for EPT, by checking if this page has
-	 * an EPT mapping, and clearing it if it does. On the next access,
-	 * a new EPT mapping will be established.
-	 * This has some overhead, but not as much as the cost of swapping
-	 * out actively used pages or breaking up actively used hugepages.
-	 */
-	if (!shadow_accessed_mask) {
-		young = kvm_unmap_rmapp(kvm, rmapp, slot, gfn, level, data);
-		goto out;
-	}
+	BUG_ON(!shadow_accessed_mask);
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
 	     sptep = rmap_get_next(&iter)) {
@@ -1440,7 +1429,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 				 (unsigned long *)sptep);
 		}
 	}
-out:
 	trace_kvm_age_page(gfn, level, slot, young);
 	return young;
 }
@@ -1489,9 +1477,29 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
+	/*
+	 * In case of absence of EPT Access and Dirty Bits supports,
+	 * emulate the accessed bit for EPT, by checking if this page has
+	 * an EPT mapping, and clearing it if it does. On the next access,
+	 * a new EPT mapping will be established.
+	 * This has some overhead, but not as much as the cost of swapping
+	 * out actively used pages or breaking up actively used hugepages.
+	 */
+	if (!shadow_accessed_mask) {
+		/*
+		 * We are holding the kvm->mmu_lock, and we are blowing up
+		 * shadow PTEs. MMU notifier consumers need to be kept at bay.
+		 * This is correct as long as we don't decouple the mmu_lock
+		 * protected regions (like invalidate_range_start|end does).
+		 */
+		kvm->mmu_notifier_seq++;
+		return kvm_handle_hva_range(kvm, start, end, 0,
+					    kvm_unmap_rmapp);
+	}
+
+	return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 5f578e850fc5..90d734bbf467 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -402,9 +402,11 @@ static void __mn_flush_page(struct mmu_notifier *mn,
 
 static int mn_clear_flush_young(struct mmu_notifier *mn,
 				struct mm_struct *mm,
-				unsigned long address)
+				unsigned long start,
+				unsigned long end)
 {
-	__mn_flush_page(mn, address);
+	for (; start < end; start += PAGE_SIZE)
+		__mn_flush_page(mn, start);
 
 	return 0;
 }
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 27288692241e..88787bb4b3b9 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -57,10 +57,13 @@ struct mmu_notifier_ops {
 	 * pte. This way the VM will provide proper aging to the
 	 * accesses to the page through the secondary MMUs and not
 	 * only to the ones through the Linux pte.
+	 * Start-end is necessary in case the secondary MMU is mapping the page
+	 * at a smaller granularity than the primary MMU.
 	 */
 	int (*clear_flush_young)(struct mmu_notifier *mn,
 				 struct mm_struct *mm,
-				 unsigned long address);
+				 unsigned long start,
+				 unsigned long end);
 
 	/*
 	 * test_young is called to check the young/accessed bitflag in
@@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long address);
+					  unsigned long start,
+					  unsigned long end);
 extern int __mmu_notifier_test_young(struct mm_struct *mm,
 				     unsigned long address);
 extern void __mmu_notifier_change_pte(struct mm_struct *mm,
@@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
 }
 
 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long address)
+					  unsigned long start,
+					  unsigned long end)
 {
 	if (mm_has_notifiers(mm))
-		return __mmu_notifier_clear_flush_young(mm, address);
+		return __mmu_notifier_clear_flush_young(mm, start, end);
 	return 0;
 }
 
@@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	unsigned long ___address = __address;				\
 	__young = ptep_clear_flush_young(___vma, ___address, __ptep);	\
 	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
-						  ___address);		\
+						  ___address,		\
+						  ___address +		\
+							PAGE_SIZE);	\
 	__young;							\
 })
 
@@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	unsigned long ___address = __address;				\
 	__young = pmdp_clear_flush_young(___vma, ___address, __pmdp);	\
 	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
-						  ___address);		\
+						  ___address,		\
+						  ___address +		\
+							PMD_SIZE);	\
 	__young;							\
 })
 
@@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
 }
 
 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long address)
+					  unsigned long start,
+					  unsigned long end)
 {
 	return 0;
 }
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 950813b1eb36..2c8da9825fe3 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm)
  * existed or not.
  */
 int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					unsigned long address)
+					unsigned long start,
+					unsigned long end)
 {
 	struct mmu_notifier *mn;
 	int young = 0, id;
@@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	id = srcu_read_lock(&srcu);
 	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
 		if (mn->ops->clear_flush_young)
-			young |= mn->ops->clear_flush_young(mn, mm, address);
+			young |= mn->ops->clear_flush_young(mn, mm, start, end);
 	}
 	srcu_read_unlock(&srcu, id);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 3e8491c504f8..bc74e0012809 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 			continue;	/* don't unmap */
 		}
 
-		if (ptep_clear_flush_young_notify(vma, address, pte))
+		/*
+		 * No need for _notify because we're within an
+		 * mmu_notifier_invalidate_range_ {start|end} scope.
+		 */
+		if (ptep_clear_flush_young(vma, address, pte))
 			continue;
 
 		/* Nuke the page table entry. */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ff42b11d2b9c..0316314d48f4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -369,7 +369,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 
 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 					      struct mm_struct *mm,
-					      unsigned long address)
+					      unsigned long start,
+					      unsigned long end)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
 	int young, idx;
@@ -377,7 +378,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 
-	young = kvm_age_hva(kvm, address);
+	young = kvm_age_hva(kvm, start, end);
 	if (young)
 		kvm_flush_remote_tlbs(kvm);
 
-- 
cgit v1.2.3


From 445b8236959bfe624a5aa9bce89f44a3bec9b2b1 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 24 Sep 2014 15:57:55 +0800
Subject: kvm: Rename make_all_cpus_request() to kvm_make_all_cpus_request()
 and make it non-static

Different architectures need different requests, and in fact we
will use this function in architecture-specific code later. This
will be outside kvm_main.c, so make it non-static and rename it to
kvm_make_all_cpus_request().

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'virt/kvm')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 45aaeb3360c9..12c591fc2571 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -586,6 +586,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
 void kvm_make_scan_ioapic_request(struct kvm *kvm);
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0316314d48f4..5b8ca365932a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -153,7 +153,7 @@ static void ack_flush(void *_completed)
 {
 }
 
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
 	int i, cpu, me;
 	cpumask_var_t cpus;
@@ -190,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	long dirty_count = kvm->tlbs_dirty;
 
 	smp_mb();
-	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+	if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
@@ -198,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+	kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 
 void kvm_make_mclock_inprogress_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+	kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
 }
 
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
 }
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
-- 
cgit v1.2.3


From fe71557afbec641fee73711e40602bed37f6f33b Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 24 Sep 2014 15:57:57 +0800
Subject: kvm: Add arch specific mmu notifier for page invalidation

This will be used to let the guest run while the APIC access page is
not pinned.  Because subsequent patches will fill in the function
for x86, place the (still empty) x86 implementation in the x86.c file
instead of adding an inline function in kvm_host.h.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     | 5 +++++
 arch/arm64/include/asm/kvm_host.h   | 5 +++++
 arch/powerpc/include/asm/kvm_host.h | 5 +++++
 arch/x86/include/asm/kvm_host.h     | 2 ++
 arch/x86/kvm/x86.c                  | 5 +++++
 virt/kvm/kvm_main.c                 | 3 +++
 6 files changed, 25 insertions(+)

(limited to 'virt/kvm')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8c3f7eb62b54..155497c2b4da 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -181,6 +181,11 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
 	return 0;
 }
 
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+							 unsigned long address)
+{
+}
+
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a3c671b3acc9..992d9da88119 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -191,6 +191,11 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
 	return 0;
 }
 
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+							 unsigned long address)
+{
+}
+
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d329bc5543a2..2cf6c1587d43 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -60,6 +60,11 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+							 unsigned long address)
+{
+}
+
 #define HPTEG_CACHE_NUM			(1 << 15)
 #define HPTEG_HASH_BITS_PTE		13
 #define HPTEG_HASH_BITS_PTE_LONG	12
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 763d273cab1d..022c356e0fed 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1044,6 +1044,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+					   unsigned long address);
 
 void kvm_define_shared_msr(unsigned index, u32 msr);
 void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6152aa0da7dd..142569e6f8f9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6026,6 +6026,11 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->tlb_flush(vcpu);
 }
 
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+					   unsigned long address)
+{
+}
+
 /*
  * Returns 1 to let __vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5b8ca365932a..3f16f569169e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -296,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 		kvm_flush_remote_tlbs(kvm);
 
 	spin_unlock(&kvm->mmu_lock);
+
+	kvm_arch_mmu_notifier_invalidate_page(kvm, address);
+
 	srcu_read_unlock(&kvm->srcu, idx);
 }
 
-- 
cgit v1.2.3


From 0fea6d7628ed6e25a9ee1b67edf7c859718d39e8 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 25 Sep 2014 18:41:07 +0200
Subject: arm/arm64: KVM: Fix set_clear_sgi_pend_reg offset

The sgi values calculated in read_set_clear_sgi_pend_reg() and
write_set_clear_sgi_pend_reg() were horribly incorrectly multiplied by 4
with catastrophic results in that subfunctions ended up overwriting
memory not allocated for the expected purpose.

This showed up as bugs in kfree() and the kernel complaining a lot of
you turn on memory debugging.

This addresses: http://marc.info/?l=kvm&m=141164910007868&w=2

Reported-by: Shannon Zhao <zhaoshenglong@huawei.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b6fab0f25f3b..862967852d5a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -816,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int sgi;
-	int min_sgi = (offset & ~0x3) * 4;
+	int min_sgi = (offset & ~0x3);
 	int max_sgi = min_sgi + 3;
 	int vcpu_id = vcpu->vcpu_id;
 	u32 reg = 0;
@@ -837,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int sgi;
-	int min_sgi = (offset & ~0x3) * 4;
+	int min_sgi = (offset & ~0x3);
 	int max_sgi = min_sgi + 3;
 	int vcpu_id = vcpu->vcpu_id;
 	u32 reg;
-- 
cgit v1.2.3


From bb0ca6acd466af55c95b7ce508f29e23a24cabd9 Mon Sep 17 00:00:00 2001
From: Andres Lagar-Cavilla <andreslc@google.com>
Date: Thu, 25 Sep 2014 15:26:50 -0700
Subject: kvm: Fix kvm_get_page_retry_io __gup retval check

Confusion around -EBUSY and zero (inside a BUG_ON no less).

Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3f16f569169e..a1cf53ee0d28 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1146,7 +1146,7 @@ int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
 	npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
 				  &locked);
 	if (!locked) {
-		VM_BUG_ON(npages != -EBUSY);
+		VM_BUG_ON(npages);
 
 		if (!pagep)
 			return 0;
-- 
cgit v1.2.3