Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini: "One of the largest releases for KVM... Hardly any generic changes, but lots of architecture-specific updates. ARM: - VHE support so that we can run the kernel at EL2 on ARMv8.1 systems - PMU support for guests - 32bit world switch rewritten in C - various optimizations to the vgic save/restore code. PPC: - enabled KVM-VFIO integration ("VFIO device") - optimizations to speed up IPIs between vcpus - in-kernel handling of IOMMU hypercalls - support for dynamic DMA windows (DDW). s390: - provide the floating point registers via sync regs; - separated instruction vs. data accesses - dirty log improvements for huge guests - bugfixes and documentation improvements. x86: - Hyper-V VMBus hypercall userspace exit - alternative implementation of lowest-priority interrupts using vector hashing (for better VT-d posted interrupt support) - fixed guest debugging with nested virtualizations - improved interrupt tracking in the in-kernel IOAPIC - generic infrastructure for tracking writes to guest memory - currently its only use is to speedup the legacy shadow paging (pre-EPT) case, but in the future it will be used for virtual GPUs as well - much cleanup (LAPIC, kvmclock, MMU, PIT), including ubsan fixes" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (217 commits) KVM: x86: remove eager_fpu field of struct kvm_vcpu_arch KVM: x86: disable MPX if host did not enable MPX XSAVE features arm64: KVM: vgic-v3: Only wipe LRs on vcpu exit arm64: KVM: vgic-v3: Reset LRs at boot time arm64: KVM: vgic-v3: Do not save an LR known to be empty arm64: KVM: vgic-v3: Save maintenance interrupt state only if required arm64: KVM: vgic-v3: Avoid accessing ICH registers KVM: arm/arm64: vgic-v2: Make GICD_SGIR quicker to hit KVM: arm/arm64: vgic-v2: Only wipe LRs on vcpu exit KVM: arm/arm64: vgic-v2: Reset LRs at boot time KVM: arm/arm64: vgic-v2: Do not save an LR known to be empty KVM: arm/arm64: vgic-v2: Move GICH_ELRSR saving to its own function KVM: arm/arm64: vgic-v2: Save maintenance interrupt state only if required KVM: arm/arm64: vgic-v2: Avoid accessing GICH registers KVM: s390: allocate only one DMA page per VM KVM: s390: enable STFLE interpretation only if enabled for the guest KVM: s390: wake up when the VCPU cpu timer expires KVM: s390: step the VCPU timer while in enabled wait KVM: s390: protect VCPU cpu timer with a seqcount KVM: s390: step VCPU cpu timer during kvm_run ioctl ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-03-16 19:55:35 +0300
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-03-16 19:55:35 +0300
commit: 10dc3747661bea9215417b659449bb7b8ed3df2c (patch)
tree: d943974b4941203a7db2fabe4896852cf0f16bc4 /arch/x86/include
parent: 047486d8e7c2a7e8d75b068b69cb67b47364f5d4 (diff)
parent: f958ee745f70b60d0e41927cab2c073104bc70c2 (diff)
download: linux-10dc3747661bea9215417b659449bb7b8ed3df2c.tar.xz
3 files changed, 84 insertions, 12 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 44adbb819041..01c8b501cb6d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -32,6 +32,7 @@
 #include <asm/mtrr.h>
 #include <asm/msr-index.h>
 #include <asm/asm.h>
+#include <asm/kvm_page_track.h>
 
 #define KVM_MAX_VCPUS 255
 #define KVM_SOFT_MAX_VCPUS 160
@@ -214,6 +215,14 @@ struct kvm_mmu_memory_cache {
 	void *objects[KVM_NR_MEM_OBJS];
 };
 
+/*
+ * the pages used as guest page table on soft mmu are tracked by
+ * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
+ * by indirect shadow page can not be more than 15 bits.
+ *
+ * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access,
+ * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
+ */
 union kvm_mmu_page_role {
 	unsigned word;
 	struct {
@@ -276,7 +285,7 @@ struct kvm_mmu_page {
 #endif
 
 	/* Number of writes since the last time traversal visited this page.  */
-	int write_flooding_count;
+	atomic_t write_flooding_count;
 };
 
 struct kvm_pio_request {
@@ -338,12 +347,8 @@ struct kvm_mmu {
 
 	struct rsvd_bits_validate guest_rsvd_check;
 
-	/*
-	 * Bitmap: bit set = last pte in walk
-	 * index[0:1]: level (zero-based)
-	 * index[2]: pte.ps
-	 */
-	u8 last_pte_bitmap;
+	/* Can have large pages at levels 2..last_nonleaf_level-1. */
+	u8 last_nonleaf_level;
 
 	bool nx;
 
@@ -498,7 +503,6 @@ struct kvm_vcpu_arch {
 	struct kvm_mmu_memory_cache mmu_page_header_cache;
 
 	struct fpu guest_fpu;
-	bool eager_fpu;
 	u64 xcr0;
 	u64 guest_supported_xcr0;
 	u32 guest_xstate_size;
@@ -644,12 +648,13 @@ struct kvm_vcpu_arch {
 };
 
 struct kvm_lpage_info {
-	int write_count;
+	int disallow_lpage;
 };
 
 struct kvm_arch_memory_slot {
 	struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
+	unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
 };
 
 /*
@@ -694,6 +699,8 @@ struct kvm_arch {
 	 */
 	struct list_head active_mmu_pages;
 	struct list_head zapped_obsolete_pages;
+	struct kvm_page_track_notifier_node mmu_sp_tracker;
+	struct kvm_page_track_notifier_head track_notifier_head;
 
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
@@ -754,6 +761,8 @@ struct kvm_arch {
 
 	bool irqchip_split;
 	u8 nr_reserved_ioapic_pins;
+
+	bool disabled_lapic_found;
 };
 
 struct kvm_vm_stat {
@@ -988,6 +997,8 @@ void kvm_mmu_module_exit(void);
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
 void kvm_mmu_setup(struct kvm_vcpu *vcpu);
+void kvm_mmu_init_vm(struct kvm *kvm);
+void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 		u64 dirty_mask, u64 nx_mask, u64 x_mask);
 
@@ -1127,8 +1138,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 
-void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
-		       const u8 *new, int bytes);
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
 int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
new file mode 100644
index 000000000000..c2b8d24a235c
--- /dev/null
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -0,0 +1,61 @@
+#ifndef _ASM_X86_KVM_PAGE_TRACK_H
+#define _ASM_X86_KVM_PAGE_TRACK_H
+
+enum kvm_page_track_mode {
+	KVM_PAGE_TRACK_WRITE,
+	KVM_PAGE_TRACK_MAX,
+};
+
+/*
+ * The notifier represented by @kvm_page_track_notifier_node is linked into
+ * the head which will be notified when guest is triggering the track event.
+ *
+ * Write access on the head is protected by kvm->mmu_lock, read access
+ * is protected by track_srcu.
+ */
+struct kvm_page_track_notifier_head {
+	struct srcu_struct track_srcu;
+	struct hlist_head track_notifier_list;
+};
+
+struct kvm_page_track_notifier_node {
+	struct hlist_node node;
+
+	/*
+	 * It is called when guest is writing the write-tracked page
+	 * and write emulation is finished at that time.
+	 *
+	 * @vcpu: the vcpu where the write access happened.
+	 * @gpa: the physical address written by guest.
+	 * @new: the data was written to the address.
+	 * @bytes: the written length.
+	 */
+	void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+			    int bytes);
+};
+
+void kvm_page_track_init(struct kvm *kvm);
+
+void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
+				 struct kvm_memory_slot *dont);
+int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+				  unsigned long npages);
+
+void kvm_slot_page_track_add_page(struct kvm *kvm,
+				  struct kvm_memory_slot *slot, gfn_t gfn,
+				  enum kvm_page_track_mode mode);
+void kvm_slot_page_track_remove_page(struct kvm *kvm,
+				     struct kvm_memory_slot *slot, gfn_t gfn,
+				     enum kvm_page_track_mode mode);
+bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
+			      enum kvm_page_track_mode mode);
+
+void
+kvm_page_track_register_notifier(struct kvm *kvm,
+				 struct kvm_page_track_notifier_node *n);
+void
+kvm_page_track_unregister_notifier(struct kvm *kvm,
+				   struct kvm_page_track_notifier_node *n);
+void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+			  int bytes);
+#endif
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 7956412d09bd..9b1a91834ac8 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -226,7 +226,9 @@
 		(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
 
 /* Declare the various hypercall operations. */
-#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT		0x0008
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT		0x0008
+#define HVCALL_POST_MESSAGE			0x005c
+#define HVCALL_SIGNAL_EVENT			0x005d
 
 #define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE		0x00000001
 #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT	12
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-03-16 19:55:35 +0300
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-03-16 19:55:35 +0300
commit	10dc3747661bea9215417b659449bb7b8ed3df2c (patch)
tree	d943974b4941203a7db2fabe4896852cf0f16bc4 /arch/x86/include
parent	047486d8e7c2a7e8d75b068b69cb67b47364f5d4 (diff)
parent	f958ee745f70b60d0e41927cab2c073104bc70c2 (diff)
download	linux-10dc3747661bea9215417b659449bb7b8ed3df2c.tar.xz