Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini: "x86: - Support for userspace to emulate Xen hypercalls - Raise the maximum number of user memslots - Scalability improvements for the new MMU. Instead of the complex "fast page fault" logic that is used in mmu.c, tdp_mmu.c uses an rwlock so that page faults are concurrent, but the code that can run against page faults is limited. Right now only page faults take the lock for reading; in the future this will be extended to some cases of page table destruction. I hope to switch the default MMU around 5.12-rc3 (some testing was delayed due to Chinese New Year). - Cleanups for MAXPHYADDR checks - Use static calls for vendor-specific callbacks - On AMD, use VMLOAD/VMSAVE to save and restore host state - Stop using deprecated jump label APIs - Workaround for AMD erratum that made nested virtualization unreliable - Support for LBR emulation in the guest - Support for communicating bus lock vmexits to userspace - Add support for SEV attestation command - Miscellaneous cleanups PPC: - Support for second data watchpoint on POWER10 - Remove some complex workarounds for buggy early versions of POWER9 - Guest entry/exit fixes ARM64: - Make the nVHE EL2 object relocatable - Cleanups for concurrent translation faults hitting the same page - Support for the standard TRNG hypervisor call - A bunch of small PMU/Debug fixes - Simplification of the early init hypercall handling Non-KVM changes (with acks): - Detection of contended rwlocks (implemented only for qrwlocks, because KVM only needs it for x86) - Allow __DISABLE_EXPORTS from assembly code - Provide a saner follow_pfn replacements for modules" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (192 commits) KVM: x86/xen: Explicitly pad struct compat_vcpu_info to 64 bytes KVM: selftests: Don't bother mapping GVA for Xen shinfo test KVM: selftests: Fix hex vs. decimal snafu in Xen test KVM: selftests: Fix size of memslots created by Xen tests KVM: selftests: Ignore recently added Xen tests' build output KVM: selftests: Add missing header file needed by xAPIC IPI tests KVM: selftests: Add operand to vmsave/vmload/vmrun in svm.c KVM: SVM: Make symbol 'svm_gp_erratum_intercept' static locking/arch: Move qrwlock.h include after qspinlock.h KVM: PPC: Book3S HV: Fix host radix SLB optimisation with hash guests KVM: PPC: Book3S HV: Ensure radix guest has no SLB entries KVM: PPC: Don't always report hash MMU capability for P9 < DD2.2 KVM: PPC: Book3S HV: Save and restore FSCR in the P9 path KVM: PPC: remove unneeded semicolon KVM: PPC: Book3S HV: Use POWER9 SLBIA IH=6 variant to clear SLB KVM: PPC: Book3S HV: No need to clear radix host SLB before loading HPT guest KVM: PPC: Book3S HV: Fix radix guest SLB side channel KVM: PPC: Book3S HV: Remove support for running HPT guest on RPT host without mixed mode support KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR KVM: PPC: Book3S HV: Add infrastructure to support 2nd DAWR ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2021-02-22 00:31:43 +0300
committer: Linus Torvalds <torvalds@linux-foundation.org> 2021-02-22 00:31:43 +0300
commit: 3e10585335b7967326ca7b4118cada0d2d00a2ab (patch)
tree: e1655bc4f093f7de3a54dc3b2d83a54159aca10b /tools
parent: 9c5b80b795e9c847a7b7f5e63c6bcf07873fbcdf (diff)
parent: 8c6e67bec3192f16fa624203c8131e10cc4814ba (diff)
download: linux-3e10585335b7967326ca7b4118cada0d2d00a2ab.tar.xz
25 files changed, 1801 insertions, 102 deletions
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index c3af3f324c5a..9f18fa090f1f 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_MMCR3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
 #define KVM_REG_PPC_SIER2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
 #define KVM_REG_PPC_SIER3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+#define KVM_REG_PPC_DAWR1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
+#define KVM_REG_PPC_DAWRX1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 374c67875cdb..abb89bbe5635 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1058,6 +1058,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 #define KVM_CAP_SYS_HYPERV_CPUID 191
 #define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_PPC_DAWR1 194
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index ce8f4ad39684..3a84394829ea 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -7,6 +7,7 @@
 /x86_64/cr4_cpuid_sync_test
 /x86_64/debug_regs
 /x86_64/evmcs_test
+/x86_64/get_cpuid_test
 /x86_64/kvm_pv_test
 /x86_64/hyperv_cpuid
 /x86_64/mmio_warning_test
@@ -24,10 +25,15 @@
 /x86_64/vmx_preemption_timer_test
 /x86_64/vmx_set_nested_state_test
 /x86_64/vmx_tsc_adjust_test
+/x86_64/xapic_ipi_test
+/x86_64/xen_shinfo_test
+/x86_64/xen_vmcall_test
 /x86_64/xss_msr_test
+/x86_64/vmx_pmu_msrs_test
 /demand_paging_test
 /dirty_log_test
 /dirty_log_perf_test
 /kvm_create_max_vcpus
+/memslot_modification_stress_test
 /set_memory_region_test
 /steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index fe41c6a0fa67..8c8eda429576 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -40,6 +40,7 @@ LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_ha
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
+TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
@@ -56,13 +57,18 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
 TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
 TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
+TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
+TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index cdad1eca72f7..5f7a229c3af1 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -64,7 +64,7 @@ static void *vcpu_worker(void *data)
 			    exit_reason_str(run->exit_reason));
 	}
 
-	ts_diff = timespec_diff_now(start);
+	ts_diff = timespec_elapsed(start);
 	PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
 		       ts_diff.tv_sec, ts_diff.tv_nsec);
 
@@ -95,7 +95,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
 		return r;
 	}
 
-	ts_diff = timespec_diff_now(start);
+	ts_diff = timespec_elapsed(start);
 
 	PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
 		       timespec_to_ns(ts_diff));
@@ -190,7 +190,7 @@ static void *uffd_handler_thread_fn(void *arg)
 		pages++;
 	}
 
-	ts_diff = timespec_diff_now(start);
+	ts_diff = timespec_elapsed(start);
 	PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
 		       pages, ts_diff.tv_sec, ts_diff.tv_nsec,
 		       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
@@ -250,6 +250,7 @@ static int setup_demand_paging(struct kvm_vm *vm,
 struct test_params {
 	bool use_uffd;
 	useconds_t uffd_delay;
+	bool partition_vcpu_memory_access;
 };
 
 static void run_test(enum vm_guest_mode mode, void *arg)
@@ -265,7 +266,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	int vcpu_id;
 	int r;
 
-	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+				 VM_MEM_SRC_ANONYMOUS);
 
 	perf_test_args.wr_fract = 1;
 
@@ -277,7 +279,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
 	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
 
-	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
+			      p->partition_vcpu_memory_access);
 
 	if (p->use_uffd) {
 		uffd_handler_threads =
@@ -293,10 +296,19 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
 			vm_paddr_t vcpu_gpa;
 			void *vcpu_hva;
+			uint64_t vcpu_mem_size;
 
-			vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size);
+
+			if (p->partition_vcpu_memory_access) {
+				vcpu_gpa = guest_test_phys_mem +
+					   (vcpu_id * guest_percpu_mem_size);
+				vcpu_mem_size = guest_percpu_mem_size;
+			} else {
+				vcpu_gpa = guest_test_phys_mem;
+				vcpu_mem_size = guest_percpu_mem_size * nr_vcpus;
+			}
 			PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-				       vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size);
+				       vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
 
 			/* Cache the HVA pointer of the region */
 			vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
@@ -313,7 +325,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 						&uffd_handler_threads[vcpu_id],
 						pipefds[vcpu_id * 2],
 						p->uffd_delay, &uffd_args[vcpu_id],
-						vcpu_hva, guest_percpu_mem_size);
+						vcpu_hva, vcpu_mem_size);
 			if (r < 0)
 				exit(-r);
 		}
@@ -339,7 +351,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
 	}
 
-	ts_diff = timespec_diff_now(start);
+	ts_diff = timespec_elapsed(start);
 
 	pr_info("All vCPU threads joined\n");
 
@@ -376,7 +388,7 @@ static void help(char *name)
 {
 	puts("");
 	printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
-	       "          [-b memory] [-v vcpus]\n", name);
+	       "          [-b memory] [-v vcpus] [-o]\n", name);
 	guest_modes_help();
 	printf(" -u: use User Fault FD to handle vCPU page\n"
 	       "     faults.\n");
@@ -387,6 +399,8 @@ static void help(char *name)
 	       "     demand paged by each vCPU. e.g. 10M or 3G.\n"
 	       "     Default: 1G\n");
 	printf(" -v: specify the number of vCPUs to run.\n");
+	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+	       "     them into a separate region of memory for each vCPU.\n");
 	puts("");
 	exit(0);
 }
@@ -394,12 +408,14 @@ static void help(char *name)
 int main(int argc, char *argv[])
 {
 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
-	struct test_params p = {};
+	struct test_params p = {
+		.partition_vcpu_memory_access = true,
+	};
 	int opt;
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) {
+	while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) {
 		switch (opt) {
 		case 'm':
 			guest_modes_cmdline(optarg);
@@ -419,6 +435,9 @@ int main(int argc, char *argv[])
 			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
+		case 'o':
+			p.partition_vcpu_memory_access = false;
+			break;
 		case 'h':
 		default:
 			help(argv[0]);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 2283a0ec74a9..04a2641261be 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -28,8 +28,8 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 /* Host variables */
 static u64 dirty_log_manual_caps;
 static bool host_quit;
-static uint64_t iteration;
-static uint64_t vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
 
 static void *vcpu_worker(void *data)
 {
@@ -48,11 +48,11 @@ static void *vcpu_worker(void *data)
 	run = vcpu_state(vm, vcpu_id);
 
 	while (!READ_ONCE(host_quit)) {
-		uint64_t current_iteration = READ_ONCE(iteration);
+		int current_iteration = READ_ONCE(iteration);
 
 		clock_gettime(CLOCK_MONOTONIC, &start);
 		ret = _vcpu_run(vm, vcpu_id);
-		ts_diff = timespec_diff_now(start);
+		ts_diff = timespec_elapsed(start);
 
 		TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
 		TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC,
@@ -61,17 +61,17 @@ static void *vcpu_worker(void *data)
 
 		pr_debug("Got sync event from vCPU %d\n", vcpu_id);
 		vcpu_last_completed_iteration[vcpu_id] = current_iteration;
-		pr_debug("vCPU %d updated last completed iteration to %lu\n",
+		pr_debug("vCPU %d updated last completed iteration to %d\n",
 			 vcpu_id, vcpu_last_completed_iteration[vcpu_id]);
 
 		if (current_iteration) {
 			pages_count += vcpu_args->pages;
 			total = timespec_add(total, ts_diff);
-			pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n",
+			pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n",
 				vcpu_id, current_iteration, ts_diff.tv_sec,
 				ts_diff.tv_nsec);
 		} else {
-			pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n",
+			pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n",
 				vcpu_id, current_iteration, ts_diff.tv_sec,
 				ts_diff.tv_nsec);
 		}
@@ -81,7 +81,7 @@ static void *vcpu_worker(void *data)
 	}
 
 	avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]);
-	pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+	pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
 		vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
 		total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
 
@@ -92,6 +92,8 @@ struct test_params {
 	unsigned long iterations;
 	uint64_t phys_offset;
 	int wr_fract;
+	bool partition_vcpu_memory_access;
+	enum vm_mem_backing_src_type backing_src;
 };
 
 static void run_test(enum vm_guest_mode mode, void *arg)
@@ -111,7 +113,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct kvm_enable_cap cap = {};
 	struct timespec clear_dirty_log_total = (struct timespec){0};
 
-	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+				 p->backing_src);
 
 	perf_test_args.wr_fract = p->wr_fract;
 
@@ -129,7 +132,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
 	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
 
-	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
+			      p->partition_vcpu_memory_access);
 
 	sync_global_to_guest(vm, perf_test_args);
 
@@ -139,17 +143,21 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
 	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+		vcpu_last_completed_iteration[vcpu_id] = -1;
+
 		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
 			       &perf_test_args.vcpu_args[vcpu_id]);
 	}
 
-	/* Allow the vCPU to populate memory */
-	pr_debug("Starting iteration %lu - Populating\n", iteration);
-	while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
-		pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n",
-			iteration);
+	/* Allow the vCPUs to populate memory */
+	pr_debug("Starting iteration %d - Populating\n", iteration);
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+		while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+		       iteration)
+			;
+	}
 
-	ts_diff = timespec_diff_now(start);
+	ts_diff = timespec_elapsed(start);
 	pr_info("Populate memory time: %ld.%.9lds\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 
@@ -157,7 +165,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	clock_gettime(CLOCK_MONOTONIC, &start);
 	vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX,
 				KVM_MEM_LOG_DIRTY_PAGES);
-	ts_diff = timespec_diff_now(start);
+	ts_diff = timespec_elapsed(start);
 	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 
@@ -169,25 +177,25 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		clock_gettime(CLOCK_MONOTONIC, &start);
 		iteration++;
 
-		pr_debug("Starting iteration %lu\n", iteration);
+		pr_debug("Starting iteration %d\n", iteration);
 		for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
-			while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
-				pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n",
-					 vcpu_id, iteration);
+			while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id])
+			       != iteration)
+				;
 		}
 
-		ts_diff = timespec_diff_now(start);
+		ts_diff = timespec_elapsed(start);
 		vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff);
-		pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n",
+		pr_info("Iteration %d dirty memory time: %ld.%.9lds\n",
 			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
 		clock_gettime(CLOCK_MONOTONIC, &start);
 		kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap);
 
-		ts_diff = timespec_diff_now(start);
+		ts_diff = timespec_elapsed(start);
 		get_dirty_log_total = timespec_add(get_dirty_log_total,
 						   ts_diff);
-		pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
+		pr_info("Iteration %d get dirty log time: %ld.%.9lds\n",
 			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
 		if (dirty_log_manual_caps) {
@@ -195,26 +203,26 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 			kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0,
 					       host_num_pages);
 
-			ts_diff = timespec_diff_now(start);
+			ts_diff = timespec_elapsed(start);
 			clear_dirty_log_total = timespec_add(clear_dirty_log_total,
 							     ts_diff);
-			pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
+			pr_info("Iteration %d clear dirty log time: %ld.%.9lds\n",
 				iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 		}
 	}
 
-	/* Tell the vcpu thread to quit */
-	host_quit = true;
-	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
-		pthread_join(vcpu_threads[vcpu_id], NULL);
-
 	/* Disable dirty logging */
 	clock_gettime(CLOCK_MONOTONIC, &start);
 	vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0);
-	ts_diff = timespec_diff_now(start);
+	ts_diff = timespec_elapsed(start);
 	pr_info("Disabling dirty logging time: %ld.%.9lds\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 
+	/* Tell the vcpu thread to quit */
+	host_quit = true;
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
+		pthread_join(vcpu_threads[vcpu_id], NULL);
+
 	avg = timespec_div(get_dirty_log_total, p->iterations);
 	pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
 		p->iterations, get_dirty_log_total.tv_sec,
@@ -236,7 +244,7 @@ static void help(char *name)
 {
 	puts("");
 	printf("usage: %s [-h] [-i iterations] [-p offset] "
-	       "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name);
+	       "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]\n", name);
 	puts("");
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
 	       TEST_HOST_LOOP_N);
@@ -251,6 +259,11 @@ static void help(char *name)
 	       "     1/<fraction of pages to write>.\n"
 	       "     (default: 1 i.e. all pages are written to.)\n");
 	printf(" -v: specify the number of vCPUs to run.\n");
+	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+	       "     them into a separate region of memory for each vCPU.\n");
+	printf(" -s: specify the type of memory that should be used to\n"
+	       "     back the guest data region.\n\n");
+	backing_src_help();
 	puts("");
 	exit(0);
 }
@@ -261,6 +274,8 @@ int main(int argc, char *argv[])
 	struct test_params p = {
 		.iterations = TEST_HOST_LOOP_N,
 		.wr_fract = 1,
+		.partition_vcpu_memory_access = true,
+		.backing_src = VM_MEM_SRC_ANONYMOUS,
 	};
 	int opt;
 
@@ -271,10 +286,10 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) {
+	while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:")) != -1) {
 		switch (opt) {
 		case 'i':
-			p.iterations = strtol(optarg, NULL, 10);
+			p.iterations = atoi(optarg);
 			break;
 		case 'p':
 			p.phys_offset = strtoull(optarg, NULL, 0);
@@ -295,6 +310,11 @@ int main(int argc, char *argv[])
 			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
+		case 'o':
+			p.partition_vcpu_memory_access = false;
+		case 's':
+			p.backing_src = parse_backing_src_type(optarg);
+			break;
 		case 'h':
 		default:
 			help(argv[0]);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 5cbb861525ed..2d7eb6989e83 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -79,12 +79,6 @@ struct vm_guest_mode_params {
 };
 extern const struct vm_guest_mode_params vm_guest_mode_params[];
 
-enum vm_mem_backing_src_type {
-	VM_MEM_SRC_ANONYMOUS,
-	VM_MEM_SRC_ANONYMOUS_THP,
-	VM_MEM_SRC_ANONYMOUS_HUGETLB,
-};
-
 int kvm_check_cap(long cap);
 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
 int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h
new file mode 100644
index 000000000000..b020547403fd
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/numaif.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/numaif.h
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Header file that provides access to NUMA API functions not explicitly
+ * exported to user space.
+ */
+
+#ifndef SELFTEST_KVM_NUMAIF_H
+#define SELFTEST_KVM_NUMAIF_H
+
+#define __NR_get_mempolicy 239
+#define __NR_migrate_pages 256
+
+/* System calls */
+long get_mempolicy(int *policy, const unsigned long *nmask,
+		   unsigned long maxnode, void *addr, int flags)
+{
+	return syscall(__NR_get_mempolicy, policy, nmask,
+		       maxnode, addr, flags);
+}
+
+long migrate_pages(int pid, unsigned long maxnode,
+		   const unsigned long *frommask,
+		   const unsigned long *tomask)
+{
+	return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask);
+}
+
+/* Policies */
+#define MPOL_DEFAULT	 0
+#define MPOL_PREFERRED	 1
+#define MPOL_BIND	 2
+#define MPOL_INTERLEAVE	 3
+
+#define MPOL_MAX MPOL_INTERLEAVE
+
+/* Flags for get_mem_policy */
+#define MPOL_F_NODE	    (1<<0)  /* return next il node or node of address */
+				    /* Warning: MPOL_F_NODE is unsupported and
+				     * subject to change. Don't use.
+				     */
+#define MPOL_F_ADDR	    (1<<1)  /* look up vma using address */
+#define MPOL_F_MEMS_ALLOWED (1<<2)  /* query nodes allowed in cpuset */
+
+/* Flags for mbind */
+#define MPOL_MF_STRICT	     (1<<0) /* Verify existing pages in the mapping */
+#define MPOL_MF_MOVE	     (1<<1) /* Move pages owned by this process to conform to mapping */
+#define MPOL_MF_MOVE_ALL     (1<<2) /* Move every page to conform to mapping */
+
+#endif /* SELFTEST_KVM_NUMAIF_H */
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index b1188823c31b..005f2143adeb 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -44,8 +44,11 @@ extern struct perf_test_args perf_test_args;
 extern uint64_t guest_test_phys_mem;
 
 struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
-				uint64_t vcpu_memory_bytes);
+				   uint64_t vcpu_memory_bytes,
+				   enum vm_mem_backing_src_type backing_src);
 void perf_test_destroy_vm(struct kvm_vm *vm);
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes);
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
+			   uint64_t vcpu_memory_bytes,
+			   bool partition_vcpu_memory_access);
 
 #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index ffffa560436b..b7f41399f22c 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -64,7 +64,21 @@ int64_t timespec_to_ns(struct timespec ts);
 struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
 struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
 struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
-struct timespec timespec_diff_now(struct timespec start);
+struct timespec timespec_elapsed(struct timespec start);
 struct timespec timespec_div(struct timespec ts, int divisor);
 
+enum vm_mem_backing_src_type {
+	VM_MEM_SRC_ANONYMOUS,
+	VM_MEM_SRC_ANONYMOUS_THP,
+	VM_MEM_SRC_ANONYMOUS_HUGETLB,
+};
+
+struct vm_mem_backing_src_alias {
+	const char *name;
+	enum vm_mem_backing_src_type type;
+};
+
+void backing_src_help(void);
+enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
+
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 90cd5984751b..0b30b4e15c38 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -263,6 +263,19 @@ static inline void outl(uint16_t port, uint32_t value)
 	__asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
 }
 
+static inline void cpuid(uint32_t *eax, uint32_t *ebx,
+			 uint32_t *ecx, uint32_t *edx)
+{
+	/* ecx is often an input as well as an output. */
+	asm volatile("cpuid"
+	    : "=a" (*eax),
+	      "=b" (*ebx),
+	      "=c" (*ecx),
+	      "=d" (*edx)
+	    : "0" (*eax), "2" (*ecx)
+	    : "memory");
+}
+
 #define SET_XMM(__var, __xmm) \
 	asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
 
@@ -338,8 +351,10 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
 		     struct kvm_x86_state *state);
 
 struct kvm_msr_list *kvm_get_msr_index_list(void);
-
+uint64_t kvm_get_feature_msr(uint64_t msr_index);
 struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+
+struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
 void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
 		    struct kvm_cpuid2 *cpuid);
 
@@ -391,6 +406,10 @@ bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent);
 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
 		       uint64_t a3);
 
+struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
+void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+
 /*
  * Basic CPU control in CR0
  */
@@ -406,8 +425,27 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
 #define X86_CR0_CD          (1UL<<30) /* Cache Disable */
 #define X86_CR0_PG          (1UL<<31) /* Paging */
 
+#define APIC_DEFAULT_GPA		0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE		0x0000001b
+#define MSR_IA32_APICBASE_BSP		(1<<8)
+#define MSR_IA32_APICBASE_EXTD		(1<<10)
+#define MSR_IA32_APICBASE_ENABLE	(1<<11)
+#define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
+#define		GET_APIC_BASE(x)	(((x) >> 12) << 12)
+
 #define APIC_BASE_MSR	0x800
 #define X2APIC_ENABLE	(1UL << 10)
+#define	APIC_ID		0x20
+#define	APIC_LVR	0x30
+#define		GET_APIC_ID_FIELD(x)	(((x) >> 24) & 0xFF)
+#define	APIC_TASKPRI	0x80
+#define	APIC_PROCPRI	0xA0
+#define	APIC_EOI	0xB0
+#define	APIC_SPIV	0xF0
+#define		APIC_SPIV_FOCUS_DISABLED	(1 << 9)
+#define		APIC_SPIV_APIC_ENABLED		(1 << 8)
 #define	APIC_ICR	0x300
 #define		APIC_DEST_SELF		0x40000
 #define		APIC_DEST_ALLINC	0x80000
@@ -432,6 +470,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
 #define		APIC_DM_EXTINT		0x00700
 #define		APIC_VECTOR_MASK	0x000FF
 #define	APIC_ICR2	0x310
+#define		SET_APIC_DEST_FIELD(x)	((x) << 24)
 
 /* VMX_EPT_VPID_CAP bits */
 #define VMX_EPT_VPID_CAP_AD_BITS       (1ULL << 21)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index fa5a90e6c6f0..d787cb802b4a 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1801,6 +1801,7 @@ static struct exit_reason {
 	{KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
 	{KVM_EXIT_X86_RDMSR, "RDMSR"},
 	{KVM_EXIT_X86_WRMSR, "WRMSR"},
+	{KVM_EXIT_XEN, "XEN"},
 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
 	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
 #endif
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 9be1944c2d1c..81490b9b4e32 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -49,7 +49,8 @@ static void guest_code(uint32_t vcpu_id)
 }
 
 struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
-				   uint64_t vcpu_memory_bytes)
+				   uint64_t vcpu_memory_bytes,
+				   enum vm_mem_backing_src_type backing_src)
 {
 	struct kvm_vm *vm;
 	uint64_t guest_num_pages;
@@ -93,8 +94,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
 
 	/* Add an extra memory slot for testing */
-	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-				    guest_test_phys_mem,
+	vm_userspace_mem_region_add(vm, backing_src, guest_test_phys_mem,
 				    PERF_TEST_MEM_SLOT_INDEX,
 				    guest_num_pages, 0);
 
@@ -112,7 +112,9 @@ void perf_test_destroy_vm(struct kvm_vm *vm)
 	kvm_vm_free(vm);
 }
 
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
+			   uint64_t vcpu_memory_bytes,
+			   bool partition_vcpu_memory_access)
 {
 	vm_paddr_t vcpu_gpa;
 	struct perf_test_vcpu_args *vcpu_args;
@@ -122,13 +124,22 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_by
 		vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
 
 		vcpu_args->vcpu_id = vcpu_id;
-		vcpu_args->gva = guest_test_virt_mem +
-				 (vcpu_id * vcpu_memory_bytes);
-		vcpu_args->pages = vcpu_memory_bytes /
-				   perf_test_args.guest_page_size;
+		if (partition_vcpu_memory_access) {
+			vcpu_args->gva = guest_test_virt_mem +
+					 (vcpu_id * vcpu_memory_bytes);
+			vcpu_args->pages = vcpu_memory_bytes /
+					   perf_test_args.guest_page_size;
+			vcpu_gpa = guest_test_phys_mem +
+				   (vcpu_id * vcpu_memory_bytes);
+		} else {
+			vcpu_args->gva = guest_test_virt_mem;
+			vcpu_args->pages = (vcpus * vcpu_memory_bytes) /
+					   perf_test_args.guest_page_size;
+			vcpu_gpa = guest_test_phys_mem;
+		}
 
-		vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
 		pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-			 vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+			 vcpu_id, vcpu_gpa, vcpu_gpa +
+			 (vcpu_args->pages * perf_test_args.guest_page_size));
 	}
 }
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 8e04c0b1608e..906c955384e2 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -10,6 +10,7 @@
 #include <limits.h>
 #include <stdlib.h>
 #include <time.h>
+#include "linux/kernel.h"
 
 #include "test_util.h"
 
@@ -84,7 +85,7 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
 	return timespec_add_ns((struct timespec){0}, ns1 - ns2);
 }
 
-struct timespec timespec_diff_now(struct timespec start)
+struct timespec timespec_elapsed(struct timespec start)
 {
 	struct timespec end;
 
@@ -109,3 +110,31 @@ void print_skip(const char *fmt, ...)
 	va_end(ap);
 	puts(", skipping test");
 }
+
+const struct vm_mem_backing_src_alias backing_src_aliases[] = {
+	{"anonymous", VM_MEM_SRC_ANONYMOUS,},
+	{"anonymous_thp", VM_MEM_SRC_ANONYMOUS_THP,},
+	{"anonymous_hugetlb", VM_MEM_SRC_ANONYMOUS_HUGETLB,},
+};
+
+void backing_src_help(void)
+{
+	int i;
+
+	printf("Available backing src types:\n");
+	for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++)
+		printf("\t%s\n", backing_src_aliases[i].name);
+}
+
+enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(backing_src_aliases); i++)
+		if (!strcmp(type_name, backing_src_aliases[i].name))
+			return backing_src_aliases[i].type;
+
+	backing_src_help();
+	TEST_FAIL("Unknown backing src type: %s", type_name);
+	return -1;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 95e1a757c629..de0c76177d02 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -670,6 +670,82 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
 }
 
 /*
+ * KVM Get MSR
+ *
+ * Input Args:
+ *   msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t kvm_get_feature_msr(uint64_t msr_index)
+{
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r, kvm_fd;
+
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
+	TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+
+	close(kvm_fd);
+	return buffer.entry.data;
+}
+
+/*
+ * VM VCPU CPUID Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU id
+ *
+ * Output Args: None
+ *
+ * Return: KVM CPUID (KVM_GET_CPUID2)
+ *
+ * Set the VCPU's CPUID.
+ */
+struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct kvm_cpuid2 *cpuid;
+	int rc, max_ent;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	cpuid = allocate_kvm_cpuid2();
+	max_ent = cpuid->nent;
+
+	for (cpuid->nent = 1; cpuid->nent <= max_ent; cpuid->nent++) {
+		rc = ioctl(vcpu->fd, KVM_GET_CPUID2, cpuid);
+		if (!rc)
+			break;
+
+		TEST_ASSERT(rc == -1 && errno == E2BIG,
+			    "KVM_GET_CPUID2 should either succeed or give E2BIG: %d %d",
+			    rc, errno);
+	}
+
+	TEST_ASSERT(rc == 0, "KVM_GET_CPUID2 failed, rc: %i errno: %i",
+		    rc, errno);
+
+	return cpuid;
+}
+
+
+
+/*
  * Locate a cpuid entry.
  *
  * Input Args:
@@ -1224,3 +1300,71 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
 		     : "b"(a0), "c"(a1), "d"(a2), "S"(a3));
 	return r;
 }
+
+struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
+{
+	static struct kvm_cpuid2 *cpuid;
+	int ret;
+	int kvm_fd;
+
+	if (cpuid)
+		return cpuid;
+
+	cpuid = allocate_kvm_cpuid2();
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n",
+		    ret, errno);
+
+	close(kvm_fd);
+	return cpuid;
+}
+
+void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	static struct kvm_cpuid2 *cpuid_full;
+	struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
+	int i, nent = 0;
+
+	if (!cpuid_full) {
+		cpuid_sys = kvm_get_supported_cpuid();
+		cpuid_hv = kvm_get_supported_hv_cpuid();
+
+		cpuid_full = malloc(sizeof(*cpuid_full) +
+				    (cpuid_sys->nent + cpuid_hv->nent) *
+				    sizeof(struct kvm_cpuid_entry2));
+		if (!cpuid_full) {
+			perror("malloc");
+			abort();
+		}
+
+		/* Need to skip KVM CPUID leaves 0x400000xx */
+		for (i = 0; i < cpuid_sys->nent; i++) {
+			if (cpuid_sys->entries[i].function >= 0x40000000 &&
+			    cpuid_sys->entries[i].function < 0x40000100)
+				continue;
+			cpuid_full->entries[nent] = cpuid_sys->entries[i];
+			nent++;
+		}
+
+		memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
+		       cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
+		cpuid_full->nent = nent + cpuid_hv->nent;
+	}
+
+	vcpu_set_cpuid(vm, vcpuid, cpuid_full);
+}
+
+struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	static struct kvm_cpuid2 *cpuid;
+
+	cpuid = allocate_kvm_cpuid2();
+
+	vcpu_ioctl(vm, vcpuid, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+	return cpuid;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 3a5c72ed2b79..827fe6028dd4 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -74,7 +74,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
 	wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
 
 	memset(vmcb, 0, sizeof(*vmcb));
-	asm volatile ("vmsave\n\t" : : "a" (vmcb_gpa) : "memory");
+	asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
 	vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
 	vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
 	vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
@@ -131,19 +131,19 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
 void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
 {
 	asm volatile (
-		"vmload\n\t"
+		"vmload %[vmcb_gpa]\n\t"
 		"mov rflags, %%r15\n\t"	// rflags
 		"mov %%r15, 0x170(%[vmcb])\n\t"
 		"mov guest_regs, %%r15\n\t"	// rax
 		"mov %%r15, 0x1f8(%[vmcb])\n\t"
 		LOAD_GPR_C
-		"vmrun\n\t"
+		"vmrun %[vmcb_gpa]\n\t"
 		SAVE_GPR_C
 		"mov 0x170(%[vmcb]), %%r15\n\t"	// rflags
 		"mov %%r15, rflags\n\t"
 		"mov 0x1f8(%[vmcb]), %%r15\n\t"	// rax
 		"mov %%r15, guest_regs\n\t"
-		"vmsave\n\t"
+		"vmsave %[vmcb_gpa]\n\t"
 		: : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
 		: "r15", "memory");
 }
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
new file mode 100644
index 000000000000..6096bf0a5b34
--- /dev/null
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM memslot modification stress test
+ * Adapted from demand_paging_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2020, Google, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <time.h>
+#include <poll.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/userfaultfd.h>
+
+#include "perf_test_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include "guest_modes.h"
+
+#define DUMMY_MEMSLOT_INDEX 7
+
+#define DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS 10
+
+
+static int nr_vcpus = 1;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static bool run_vcpus = true;
+
+static void *vcpu_worker(void *data)
+{
+	int ret;
+	struct perf_test_vcpu_args *vcpu_args =
+		(struct perf_test_vcpu_args *)data;
+	int vcpu_id = vcpu_args->vcpu_id;
+	struct kvm_vm *vm = perf_test_args.vm;
+	struct kvm_run *run;
+
+	vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+	run = vcpu_state(vm, vcpu_id);
+
+	/* Let the guest access its memory until a stop signal is received */
+	while (READ_ONCE(run_vcpus)) {
+		ret = _vcpu_run(vm, vcpu_id);
+		TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+
+		if (get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC)
+			continue;
+
+		TEST_ASSERT(false,
+			    "Invalid guest sync status: exit_reason=%s\n",
+			    exit_reason_str(run->exit_reason));
+	}
+
+	return NULL;
+}
+
+struct memslot_antagonist_args {
+	struct kvm_vm *vm;
+	useconds_t delay;
+	uint64_t nr_modifications;
+};
+
+static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
+			      uint64_t nr_modifications, uint64_t gpa)
+{
+	int i;
+
+	for (i = 0; i < nr_modifications; i++) {
+		usleep(delay);
+		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa,
+					    DUMMY_MEMSLOT_INDEX, 1, 0);
+
+		vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX);
+	}
+}
+
+struct test_params {
+	useconds_t memslot_modification_delay;
+	uint64_t nr_memslot_modifications;
+	bool partition_vcpu_memory_access;
+};
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+	struct test_params *p = arg;
+	pthread_t *vcpu_threads;
+	struct kvm_vm *vm;
+	int vcpu_id;
+
+	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+				 VM_MEM_SRC_ANONYMOUS);
+
+	perf_test_args.wr_fract = 1;
+
+	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
+	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
+			      p->partition_vcpu_memory_access);
+
+	/* Export the shared variables to the guest */
+	sync_global_to_guest(vm, perf_test_args);
+
+	pr_info("Finished creating vCPUs\n");
+
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
+		pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
+			       &perf_test_args.vcpu_args[vcpu_id]);
+
+	pr_info("Started all vCPUs\n");
+
+	add_remove_memslot(vm, p->memslot_modification_delay,
+			   p->nr_memslot_modifications,
+			   guest_test_phys_mem +
+			   (guest_percpu_mem_size * nr_vcpus) +
+			   perf_test_args.host_page_size +
+			   perf_test_args.guest_page_size);
+
+	run_vcpus = false;
+
+	/* Wait for the vcpu threads to quit */
+	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
+		pthread_join(vcpu_threads[vcpu_id], NULL);
+
+	pr_info("All vCPU threads joined\n");
+
+	ucall_uninit(vm);
+	kvm_vm_free(vm);
+
+	free(vcpu_threads);
+}
+
+static void help(char *name)
+{
+	puts("");
+	printf("usage: %s [-h] [-m mode] [-d delay_usec]\n"
+	       "          [-b memory] [-v vcpus] [-o] [-i iterations]\n", name);
+	guest_modes_help();
+	printf(" -d: add a delay between each iteration of adding and\n"
+	       "     deleting a memslot in usec.\n");
+	printf(" -b: specify the size of the memory region which should be\n"
+	       "     accessed by each vCPU. e.g. 10M or 3G.\n"
+	       "     Default: 1G\n");
+	printf(" -v: specify the number of vCPUs to run.\n");
+	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+	       "     them into a separate region of memory for each vCPU.\n");
+	printf(" -i: specify the number of iterations of adding and removing\n"
+	       "     a memslot.\n"
+	       "     Default: %d\n", DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS);
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+	int opt;
+	struct test_params p = {
+		.memslot_modification_delay = 0,
+		.nr_memslot_modifications =
+			DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS,
+		.partition_vcpu_memory_access = true
+	};
+
+	guest_modes_append_default();
+
+	while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) {
+		switch (opt) {
+		case 'm':
+			guest_modes_cmdline(optarg);
+			break;
+		case 'd':
+			p.memslot_modification_delay = strtoul(optarg, NULL, 0);
+			TEST_ASSERT(p.memslot_modification_delay >= 0,
+				    "A negative delay is not supported.");
+			break;
+		case 'b':
+			guest_percpu_mem_size = parse_size(optarg);
+			break;
+		case 'v':
+			nr_vcpus = atoi(optarg);
+			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+				    "Invalid number of vcpus, must be between 1 and %d",
+				    max_vcpus);
+			break;
+		case 'o':
+			p.partition_vcpu_memory_access = false;
+			break;
+		case 'i':
+			p.nr_memslot_modifications = atoi(optarg);
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	for_each_guest_mode(run_test, &p);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/settings b/tools/testing/selftests/kvm/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/kvm/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 37b8a78f6b74..ca22ee6d19cb 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -99,6 +99,7 @@ int main(int argc, char *argv[])
 		exit(KSFT_SKIP);
 	}
 
+	vcpu_set_hv_cpuid(vm, VCPU_ID);
 	vcpu_enable_evmcs(vm, VCPU_ID);
 
 	run = vcpu_state(vm, VCPU_ID);
@@ -142,7 +143,7 @@ int main(int argc, char *argv[])
 		/* Restore state in a new VM.  */
 		kvm_vm_restart(vm, O_RDWR);
 		vm_vcpu_add(vm, VCPU_ID);
-		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_set_hv_cpuid(vm, VCPU_ID);
 		vcpu_enable_evmcs(vm, VCPU_ID);
 		vcpu_load_state(vm, VCPU_ID, state);
 		run = vcpu_state(vm, VCPU_ID);
diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
new file mode 100644
index 000000000000..9b78e8889638
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat Inc.
+ *
+ * Generic tests for KVM CPUID set/get ioctls
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+
+/* CPUIDs known to differ */
+struct {
+	u32 function;
+	u32 index;
+} mangled_cpuids[] = {
+	{.function = 0xd, .index = 0},
+};
+
+static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
+{
+	int i;
+	u32 eax, ebx, ecx, edx;
+
+	for (i = 0; i < guest_cpuid->nent; i++) {
+		eax = guest_cpuid->entries[i].function;
+		ecx = guest_cpuid->entries[i].index;
+
+		cpuid(&eax, &ebx, &ecx, &edx);
+
+		GUEST_ASSERT(eax == guest_cpuid->entries[i].eax &&
+			     ebx == guest_cpuid->entries[i].ebx &&
+			     ecx == guest_cpuid->entries[i].ecx &&
+			     edx == guest_cpuid->entries[i].edx);
+	}
+
+}
+
+static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid)
+{
+	u32 eax = 0x40000000, ebx, ecx = 0, edx;
+
+	cpuid(&eax, &ebx, &ecx, &edx);
+
+	GUEST_ASSERT(eax == 0x40000001);
+}
+
+static void guest_main(struct kvm_cpuid2 *guest_cpuid)
+{
+	GUEST_SYNC(1);
+
+	test_guest_cpuids(guest_cpuid);
+
+	GUEST_SYNC(2);
+
+	test_cpuid_40000000(guest_cpuid);
+
+	GUEST_DONE();
+}
+
+static bool is_cpuid_mangled(struct kvm_cpuid_entry2 *entrie)
+{
+	int i;
+
+	for (i = 0; i < sizeof(mangled_cpuids); i++) {
+		if (mangled_cpuids[i].function == entrie->function &&
+		    mangled_cpuids[i].index == entrie->index)
+			return true;
+	}
+
+	return false;
+}
+
+static void check_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *entrie)
+{
+	int i;
+
+	for (i = 0; i < cpuid->nent; i++) {
+		if (cpuid->entries[i].function == entrie->function &&
+		    cpuid->entries[i].index == entrie->index) {
+			if (is_cpuid_mangled(entrie))
+				return;
+
+			TEST_ASSERT(cpuid->entries[i].eax == entrie->eax &&
+				    cpuid->entries[i].ebx == entrie->ebx &&
+				    cpuid->entries[i].ecx == entrie->ecx &&
+				    cpuid->entries[i].edx == entrie->edx,
+				    "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
+				    entrie->function, entrie->index,
+				    cpuid->entries[i].eax, cpuid->entries[i].ebx,
+				    cpuid->entries[i].ecx, cpuid->entries[i].edx,
+				    entrie->eax, entrie->ebx, entrie->ecx, entrie->edx);
+			return;
+		}
+	}
+
+	TEST_ASSERT(false, "CPUID 0x%x.%x not found", entrie->function, entrie->index);
+}
+
+static void compare_cpuids(struct kvm_cpuid2 *cpuid1, struct kvm_cpuid2 *cpuid2)
+{
+	int i;
+
+	for (i = 0; i < cpuid1->nent; i++)
+		check_cpuid(cpuid2, &cpuid1->entries[i]);
+
+	for (i = 0; i < cpuid2->nent; i++)
+		check_cpuid(cpuid1, &cpuid2->entries[i]);
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
+{
+	struct ucall uc;
+
+	_vcpu_run(vm, vcpuid);
+
+	switch (get_ucall(vm, vcpuid, &uc)) {
+	case UCALL_SYNC:
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage + 1,
+			    "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage + 1, (ulong)uc.args[1]);
+		return;
+	case UCALL_DONE:
+		return;
+	case UCALL_ABORT:
+		TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", (const char *)uc.args[0],
+			    __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+	default:
+		TEST_ASSERT(false, "Unexpected exit: %s",
+			    exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+	}
+}
+
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+{
+	int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
+	vm_vaddr_t gva = vm_vaddr_alloc(vm, size,
+					getpagesize(), 0, 0);
+	struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
+
+	memcpy(guest_cpuids, cpuid, size);
+
+	*p_gva = gva;
+	return guest_cpuids;
+}
+
+int main(void)
+{
+	struct kvm_cpuid2 *supp_cpuid, *cpuid2;
+	vm_vaddr_t cpuid_gva;
+	struct kvm_vm *vm;
+	int stage;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_main);
+
+	supp_cpuid = kvm_get_supported_cpuid();
+	cpuid2 = vcpu_get_cpuid(vm, VCPU_ID);
+
+	compare_cpuids(supp_cpuid, cpuid2);
+
+	vcpu_alloc_cpuid(vm, &cpuid_gva, cpuid2);
+
+	vcpu_args_set(vm, VCPU_ID, 1, cpuid_gva);
+
+	for (stage = 0; stage < 3; stage++)
+		run_vcpu(vm, VCPU_ID, stage);
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 88a595b7fbdd..7e2d2d17d2ed 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -125,30 +125,6 @@ void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system)
 		    " it should have: %d %d", system ? "KVM" : "vCPU", ret, errno);
 }
 
-
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm, bool system)
-{
-	int nent = 20; /* should be enough */
-	static struct kvm_cpuid2 *cpuid;
-
-	cpuid = malloc(sizeof(*cpuid) + nent * sizeof(struct kvm_cpuid_entry2));
-
-	if (!cpuid) {
-		perror("malloc");
-		abort();
-	}
-
-	cpuid->nent = nent;
-
-	if (!system)
-		vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-	else
-		kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
-
-	return cpuid;
-}
-
-
 int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
@@ -167,7 +143,7 @@ int main(int argc, char *argv[])
 	/* Test vCPU ioctl version */
 	test_hv_cpuid_e2big(vm, false);
 
-	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false);
+	hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID);
 	test_hv_cpuid(hv_cpuid_entries, false);
 	free(hv_cpuid_entries);
 
@@ -177,7 +153,7 @@ int main(int argc, char *argv[])
 		goto do_sys;
 	}
 	vcpu_enable_evmcs(vm, VCPU_ID);
-	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, false);
+	hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID);
 	test_hv_cpuid(hv_cpuid_entries, true);
 	free(hv_cpuid_entries);
 
@@ -190,9 +166,8 @@ do_sys:
 
 	test_hv_cpuid_e2big(vm, true);
 
-	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm, true);
+	hv_cpuid_entries = kvm_get_supported_hv_cpuid();
 	test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported());
-	free(hv_cpuid_entries);
 
 out:
 	kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
new file mode 100644
index 000000000000..23051d84b907
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VMX-pmu related msrs test
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Test to check the effect of various CPUID settings
+ * on the MSR_IA32_PERF_CAPABILITIES MSR, and check that
+ * whatever we write with KVM_SET_MSR is _not_ modified
+ * in the guest and test it can be retrieved with KVM_GET_MSR.
+ *
+ * Test to check that invalid LBR formats are rejected.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define VCPU_ID	      0
+
+#define X86_FEATURE_PDCM	(1<<15)
+#define PMU_CAP_FW_WRITES	(1ULL << 13)
+#define PMU_CAP_LBR_FMT		0x3f
+
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_counters:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
+
+union perf_capabilities {
+	struct {
+		u64	lbr_format:6;
+		u64	pebs_trap:1;
+		u64	pebs_arch_reg:1;
+		u64	pebs_format:4;
+		u64	smm_freeze:1;
+		u64	full_width_write:1;
+		u64 pebs_baseline:1;
+		u64	perf_metrics:1;
+		u64	pebs_output_pt_available:1;
+		u64	anythread_deprecated:1;
+	};
+	u64	capabilities;
+};
+
+static void guest_code(void)
+{
+	wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_cpuid2 *cpuid;
+	struct kvm_cpuid_entry2 *entry_1_0;
+	struct kvm_cpuid_entry2 *entry_a_0;
+	bool pdcm_supported = false;
+	struct kvm_vm *vm;
+	int ret;
+	union cpuid10_eax eax;
+	union perf_capabilities host_cap;
+
+	host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+	host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	cpuid = kvm_get_supported_cpuid();
+
+	if (kvm_get_cpuid_max_basic() >= 0xa) {
+		entry_1_0 = kvm_get_supported_cpuid_index(1, 0);
+		entry_a_0 = kvm_get_supported_cpuid_index(0xa, 0);
+		pdcm_supported = entry_1_0 && !!(entry_1_0->ecx & X86_FEATURE_PDCM);
+		eax.full = entry_a_0->eax;
+	}
+	if (!pdcm_supported) {
+		print_skip("MSR_IA32_PERF_CAPABILITIES is not supported by the vCPU");
+		exit(KSFT_SKIP);
+	}
+	if (!eax.split.version_id) {
+		print_skip("PMU is not supported by the vCPU");
+		exit(KSFT_SKIP);
+	}
+
+	/* testcase 1, set capabilities when we have PDCM bit */
+	vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+	vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
+
+	/* check capabilities can be retrieved with KVM_GET_MSR */
+	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+
+	/* check whatever we write with KVM_SET_MSR is _not_ modified */
+	vcpu_run(vm, VCPU_ID);
+	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+
+	/* testcase 2, check valid LBR formats are accepted */
+	vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
+	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
+
+	vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
+	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
+
+	/* testcase 3, check invalid LBR format is rejected */
+	ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
+	TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
+
+	/* testcase 4, set capabilities when we don't have PDCM bit */
+	entry_1_0->ecx &= ~X86_FEATURE_PDCM;
+	vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+	ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+	TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
+
+	/* testcase 5, set capabilities when we don't have PMU version bits */
+	entry_1_0->ecx |= X86_FEATURE_PDCM;
+	eax.split.version_id = 0;
+	entry_1_0->ecx = eax.full;
+	vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+	ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
+	TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
+
+	vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
+	ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
new file mode 100644
index 000000000000..2f964cdc273c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -0,0 +1,544 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * xapic_ipi_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
+ * another vCPU that is halted when KVM's backing page for the APIC access
+ * address has been moved by mm.
+ *
+ * The test starts two vCPUs: one that sends IPIs and one that continually
+ * executes HLT. The sender checks that the halter has woken from the HLT and
+ * has reentered HLT before sending the next IPI. While the vCPUs are running,
+ * the host continually calls migrate_pages to move all of the process' pages
+ * amongst the available numa nodes on the machine.
+ *
+ * Migration is a command line option. When used on non-numa machines will 
+ * exit with error. Test is still usefull on non-numa for testing IPIs.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <getopt.h>
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "numaif.h"
+#include "processor.h"
+#include "test_util.h"
+#include "vmx.h"
+
+/* Default running time for the test */
+#define DEFAULT_RUN_SECS 3
+
+/* Default delay between migrate_pages calls (microseconds) */
+#define DEFAULT_DELAY_USECS 500000
+
+#define HALTER_VCPU_ID 0
+#define SENDER_VCPU_ID 1
+
+volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA;
+
+/*
+ * Vector for IPI from sender vCPU to halting vCPU.
+ * Value is arbitrary and was chosen for the alternating bit pattern. Any
+ * value should work.
+ */
+#define IPI_VECTOR	 0xa5
+
+/*
+ * Incremented in the IPI handler. Provides evidence to the sender that the IPI
+ * arrived at the destination
+ */
+static volatile uint64_t ipis_rcvd;
+
+/* Data struct shared between host main thread and vCPUs */
+struct test_data_page {
+	uint32_t halter_apic_id;
+	volatile uint64_t hlt_count;
+	volatile uint64_t wake_count;
+	uint64_t ipis_sent;
+	uint64_t migrations_attempted;
+	uint64_t migrations_completed;
+	uint32_t icr;
+	uint32_t icr2;
+	uint32_t halter_tpr;
+	uint32_t halter_ppr;
+
+	/*
+	 *  Record local version register as a cross-check that APIC access
+	 *  worked. Value should match what KVM reports (APIC_VERSION in
+	 *  arch/x86/kvm/lapic.c). If test is failing, check that values match
+	 *  to determine whether APIC access exits are working.
+	 */
+	uint32_t halter_lvr;
+};
+
+struct thread_params {
+	struct test_data_page *data;
+	struct kvm_vm *vm;
+	uint32_t vcpu_id;
+	uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
+};
+
+uint32_t read_apic_reg(uint reg)
+{
+	return apic_base[reg >> 2];
+}
+
+void write_apic_reg(uint reg, uint32_t val)
+{
+	apic_base[reg >> 2] = val;
+}
+
+void disable_apic(void)
+{
+	wrmsr(MSR_IA32_APICBASE,
+	      rdmsr(MSR_IA32_APICBASE) &
+		~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void enable_xapic(void)
+{
+	uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+	/* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+	if (val & MSR_IA32_APICBASE_EXTD) {
+		disable_apic();
+		wrmsr(MSR_IA32_APICBASE,
+		      rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+	} else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+		wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+	}
+
+	/*
+	 * Per SDM: reset value of spurious interrupt vector register has the
+	 * APIC software enabled bit=0. It must be enabled in addition to the
+	 * enable bit in the MSR.
+	 */
+	val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+	write_apic_reg(APIC_SPIV, val);
+}
+
+void verify_apic_base_addr(void)
+{
+	uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+	uint64_t base = GET_APIC_BASE(msr);
+
+	GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void halter_guest_code(struct test_data_page *data)
+{
+	verify_apic_base_addr();
+	enable_xapic();
+
+	data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID));
+	data->halter_lvr = read_apic_reg(APIC_LVR);
+
+	/*
+	 * Loop forever HLTing and recording halts & wakes. Disable interrupts
+	 * each time around to minimize window between signaling the pending
+	 * halt to the sender vCPU and executing the halt. No need to disable on
+	 * first run as this vCPU executes first and the host waits for it to
+	 * signal going into first halt before starting the sender vCPU. Record
+	 * TPR and PPR for diagnostic purposes in case the test fails.
+	 */
+	for (;;) {
+		data->halter_tpr = read_apic_reg(APIC_TASKPRI);
+		data->halter_ppr = read_apic_reg(APIC_PROCPRI);
+		data->hlt_count++;
+		asm volatile("sti; hlt; cli");
+		data->wake_count++;
+	}
+}
+
+/*
+ * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
+ * enable diagnosing errant writes to the APIC access address backing page in
+ * case of test failure.
+ */
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+	ipis_rcvd++;
+	write_apic_reg(APIC_EOI, 77);
+}
+
+static void sender_guest_code(struct test_data_page *data)
+{
+	uint64_t last_wake_count;
+	uint64_t last_hlt_count;
+	uint64_t last_ipis_rcvd_count;
+	uint32_t icr_val;
+	uint32_t icr2_val;
+	uint64_t tsc_start;
+
+	verify_apic_base_addr();
+	enable_xapic();
+
+	/*
+	 * Init interrupt command register for sending IPIs
+	 *
+	 * Delivery mode=fixed, per SDM:
+	 *   "Delivers the interrupt specified in the vector field to the target
+	 *    processor."
+	 *
+	 * Destination mode=physical i.e. specify target by its local APIC
+	 * ID. This vCPU assumes that the halter vCPU has already started and
+	 * set data->halter_apic_id.
+	 */
+	icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
+	icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
+	data->icr = icr_val;
+	data->icr2 = icr2_val;
+
+	last_wake_count = data->wake_count;
+	last_hlt_count = data->hlt_count;
+	last_ipis_rcvd_count = ipis_rcvd;
+	for (;;) {
+		/*
+		 * Send IPI to halter vCPU.
+		 * First IPI can be sent unconditionally because halter vCPU
+		 * starts earlier.
+		 */
+		write_apic_reg(APIC_ICR2, icr2_val);
+		write_apic_reg(APIC_ICR, icr_val);
+		data->ipis_sent++;
+
+		/*
+		 * Wait up to ~1 sec for halter to indicate that it has:
+		 * 1. Received the IPI
+		 * 2. Woken up from the halt
+		 * 3. Gone back into halt
+		 * Current CPUs typically run at 2.x Ghz which is ~2
+		 * billion ticks per second.
+		 */
+		tsc_start = rdtsc();
+		while (rdtsc() - tsc_start < 2000000000) {
+			if ((ipis_rcvd != last_ipis_rcvd_count) &&
+			    (data->wake_count != last_wake_count) &&
+			    (data->hlt_count != last_hlt_count))
+				break;
+		}
+
+		GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
+			     (data->wake_count != last_wake_count) &&
+			     (data->hlt_count != last_hlt_count));
+
+		last_wake_count = data->wake_count;
+		last_hlt_count = data->hlt_count;
+		last_ipis_rcvd_count = ipis_rcvd;
+	}
+}
+
+static void *vcpu_thread(void *arg)
+{
+	struct thread_params *params = (struct thread_params *)arg;
+	struct ucall uc;
+	int old;
+	int r;
+	unsigned int exit_reason;
+
+	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+	TEST_ASSERT(r == 0,
+		    "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+		    params->vcpu_id, r);
+
+	fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id);
+	vcpu_run(params->vm, params->vcpu_id);
+	exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason;
+
+	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+		    "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+		    params->vcpu_id, exit_reason, exit_reason_str(exit_reason));
+
+	if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) {
+		TEST_ASSERT(false,
+			    "vCPU %u exited with error: %s.\n"
+			    "Sending vCPU sent %lu IPIs to halting vCPU\n"
+			    "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+			    "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+			    "Migrations attempted: %lu\n"
+			    "Migrations completed: %lu\n",
+			    params->vcpu_id, (const char *)uc.args[0],
+			    params->data->ipis_sent, params->data->hlt_count,
+			    params->data->wake_count,
+			    *params->pipis_rcvd, params->data->halter_tpr,
+			    params->data->halter_ppr, params->data->halter_lvr,
+			    params->data->migrations_attempted,
+			    params->data->migrations_completed);
+	}
+
+	return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id)
+{
+	void *retval;
+	int r;
+
+	r = pthread_cancel(thread);
+	TEST_ASSERT(r == 0,
+		    "pthread_cancel on vcpu_id=%d failed with errno=%d",
+		    vcpu_id, r);
+
+	r = pthread_join(thread, &retval);
+	TEST_ASSERT(r == 0,
+		    "pthread_join on vcpu_id=%d failed with errno=%d",
+		    vcpu_id, r);
+	TEST_ASSERT(retval == PTHREAD_CANCELED,
+		    "expected retval=%p, got %p", PTHREAD_CANCELED,
+		    retval);
+}
+
+void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
+		   uint64_t *pipis_rcvd)
+{
+	long pages_not_moved;
+	unsigned long nodemask = 0;
+	unsigned long nodemasks[sizeof(nodemask) * 8];
+	int nodes = 0;
+	time_t start_time, last_update, now;
+	time_t interval_secs = 1;
+	int i, r;
+	int from, to;
+	unsigned long bit;
+	uint64_t hlt_count;
+	uint64_t wake_count;
+	uint64_t ipis_sent;
+
+	fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
+		delay_usecs);
+
+	/* Get set of first 64 numa nodes available */
+	r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+			  0, MPOL_F_MEMS_ALLOWED);
+	TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
+
+	fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
+		"(each 1-bit indicates node is present): %#lx\n",
+		sizeof(nodemask) * 8, nodemask);
+
+	/* Init array of masks containing a single-bit in each, one for each
+	 * available node. migrate_pages called below requires specifying nodes
+	 * as bit masks.
+	 */
+	for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
+		if (nodemask & bit) {
+			nodemasks[nodes] = nodemask & bit;
+			nodes++;
+		}
+	}
+
+	TEST_ASSERT(nodes > 1,
+		    "Did not find at least 2 numa nodes. Can't do migration\n");
+
+	fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
+
+	from = 0;
+	to = 1;
+	start_time = time(NULL);
+	last_update = start_time;
+
+	ipis_sent = data->ipis_sent;
+	hlt_count = data->hlt_count;
+	wake_count = data->wake_count;
+
+	while ((int)(time(NULL) - start_time) < run_secs) {
+		data->migrations_attempted++;
+
+		/*
+		 * migrate_pages with PID=0 will migrate all pages of this
+		 * process between the nodes specified as bitmasks. The page
+		 * backing the APIC access address belongs to this process
+		 * because it is allocated by KVM in the context of the
+		 * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
+		 * test may break or give a false positive signal.
+		 */
+		pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
+						&nodemasks[from],
+						&nodemasks[to]);
+		if (pages_not_moved < 0)
+			fprintf(stderr,
+				"migrate_pages failed, errno=%d\n", errno);
+		else if (pages_not_moved > 0)
+			fprintf(stderr,
+				"migrate_pages could not move %ld pages\n",
+				pages_not_moved);
+		else
+			data->migrations_completed++;
+
+		from = to;
+		to++;
+		if (to == nodes)
+			to = 0;
+
+		now = time(NULL);
+		if (((now - start_time) % interval_secs == 0) &&
+		    (now != last_update)) {
+			last_update = now;
+			fprintf(stderr,
+				"%lu seconds: Migrations attempted=%lu completed=%lu, "
+				"IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
+				now - start_time, data->migrations_attempted,
+				data->migrations_completed,
+				data->ipis_sent, *pipis_rcvd,
+				data->hlt_count, data->wake_count);
+
+			TEST_ASSERT(ipis_sent != data->ipis_sent &&
+				    hlt_count != data->hlt_count &&
+				    wake_count != data->wake_count,
+				    "IPI, HLT and wake count have not increased "
+				    "in the last %lu seconds. "
+				    "HLTer is likely hung.\n", interval_secs);
+
+			ipis_sent = data->ipis_sent;
+			hlt_count = data->hlt_count;
+			wake_count = data->wake_count;
+		}
+		usleep(delay_usecs);
+	}
+}
+
+void get_cmdline_args(int argc, char *argv[], int *run_secs,
+		      bool *migrate, int *delay_usecs)
+{
+	for (;;) {
+		int opt = getopt(argc, argv, "s:d:m");
+
+		if (opt == -1)
+			break;
+		switch (opt) {
+		case 's':
+			*run_secs = parse_size(optarg);
+			break;
+		case 'm':
+			*migrate = true;
+			break;
+		case 'd':
+			*delay_usecs = parse_size(optarg);
+			break;
+		default:
+			TEST_ASSERT(false,
+				    "Usage: -s <runtime seconds>. Default is %d seconds.\n"
+				    "-m adds calls to migrate_pages while vCPUs are running."
+				    " Default is no migrations.\n"
+				    "-d <delay microseconds> - delay between migrate_pages() calls."
+				    " Default is %d microseconds.\n",
+				    DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	int r;
+	int wait_secs;
+	const int max_halter_wait = 10;
+	int run_secs = 0;
+	int delay_usecs = 0;
+	struct test_data_page *data;
+	vm_vaddr_t test_data_page_vaddr;
+	bool migrate = false;
+	pthread_t threads[2];
+	struct thread_params params[2];
+	struct kvm_vm *vm;
+	uint64_t *pipis_rcvd;
+
+	get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
+	if (run_secs <= 0)
+		run_secs = DEFAULT_RUN_SECS;
+	if (delay_usecs <= 0)
+		delay_usecs = DEFAULT_DELAY_USECS;
+
+	vm = vm_create_default(HALTER_VCPU_ID, 0, halter_guest_code);
+	params[0].vm = vm;
+	params[1].vm = vm;
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
+	vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
+
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
+
+	vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
+
+	test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0);
+	data =
+	   (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr);
+	memset(data, 0, sizeof(*data));
+	params[0].data = data;
+	params[1].data = data;
+
+	vcpu_args_set(vm, HALTER_VCPU_ID, 1, test_data_page_vaddr);
+	vcpu_args_set(vm, SENDER_VCPU_ID, 1, test_data_page_vaddr);
+
+	pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
+	params[0].pipis_rcvd = pipis_rcvd;
+	params[1].pipis_rcvd = pipis_rcvd;
+
+	/* Start halter vCPU thread and wait for it to execute first HLT. */
+	params[0].vcpu_id = HALTER_VCPU_ID;
+	r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
+	TEST_ASSERT(r == 0,
+		    "pthread_create halter failed errno=%d", errno);
+	fprintf(stderr, "Halter vCPU thread started\n");
+
+	wait_secs = 0;
+	while ((wait_secs < max_halter_wait) && !data->hlt_count) {
+		sleep(1);
+		wait_secs++;
+	}
+
+	TEST_ASSERT(data->hlt_count,
+		    "Halter vCPU did not execute first HLT within %d seconds",
+		    max_halter_wait);
+
+	fprintf(stderr,
+		"Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
+		data->halter_apic_id, wait_secs);
+
+	params[1].vcpu_id = SENDER_VCPU_ID;
+	r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
+	TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
+
+	fprintf(stderr,
+		"IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
+		run_secs);
+
+	if (!migrate)
+		sleep(run_secs);
+	else
+		do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
+
+	/*
+	 * Cancel threads and wait for them to stop.
+	 */
+	cancel_join_vcpu_thread(threads[0], HALTER_VCPU_ID);
+	cancel_join_vcpu_thread(threads[1], SENDER_VCPU_ID);
+
+	fprintf(stderr,
+		"Test successful after running for %d seconds.\n"
+		"Sending vCPU sent %lu IPIs to halting vCPU\n"
+		"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+		"Halter APIC ID=%#x\n"
+		"Sender ICR value=%#x ICR2 value=%#x\n"
+		"Halter TPR=%#x PPR=%#x LVR=%#x\n"
+		"Migrations attempted: %lu\n"
+		"Migrations completed: %lu\n",
+		run_secs, data->ipis_sent,
+		data->hlt_count, data->wake_count, *pipis_rcvd,
+		data->halter_apic_id,
+		data->icr, data->icr2,
+		data->halter_tpr, data->halter_ppr, data->halter_lvr,
+		data->migrations_attempted, data->migrations_completed);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
new file mode 100644
index 000000000000..9246ea310587
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_vmcall_test
+ *
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ *
+ * Xen shared_info / pvclock testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+
+#define VCPU_ID		5
+
+#define SHINFO_REGION_GPA	0xc0000000ULL
+#define SHINFO_REGION_SLOT	10
+#define PAGE_SIZE		4096
+
+#define PVTIME_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE)
+
+static struct kvm_vm *vm;
+
+#define XEN_HYPERCALL_MSR	0x40000000
+
+struct pvclock_vcpu_time_info {
+        u32   version;
+        u32   pad0;
+        u64   tsc_timestamp;
+        u64   system_time;
+        u32   tsc_to_system_mul;
+        s8    tsc_shift;
+        u8    flags;
+        u8    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+        u32   version;
+        u32   sec;
+        u32   nsec;
+} __attribute__((__packed__));
+
+static void guest_code(void)
+{
+	GUEST_DONE();
+}
+
+static int cmp_timespec(struct timespec *a, struct timespec *b)
+{
+	if (a->tv_sec > b->tv_sec)
+		return 1;
+	else if (a->tv_sec < b->tv_sec)
+		return -1;
+	else if (a->tv_nsec > b->tv_nsec)
+		return 1;
+	else if (a->tv_nsec < b->tv_nsec)
+		return -1;
+	else
+		return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct timespec min_ts, max_ts, vm_ts;
+
+	if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
+	      KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
+		print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
+		exit(KSFT_SKIP);
+	}
+
+	clock_gettime(CLOCK_REALTIME, &min_ts);
+
+	vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	/* Map a region for the shared_info page */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
+
+	struct kvm_xen_hvm_config hvmc = {
+		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+		.msr = XEN_HYPERCALL_MSR,
+	};
+	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+	struct kvm_xen_hvm_attr lm = {
+		.type = KVM_XEN_ATTR_TYPE_LONG_MODE,
+		.u.long_mode = 1,
+	};
+	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+	struct kvm_xen_hvm_attr ha = {
+		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+		.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
+	};
+	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+
+	struct kvm_xen_vcpu_attr vi = {
+		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+		.u.gpa = SHINFO_REGION_GPA + 0x40,
+	};
+	vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
+
+	struct kvm_xen_vcpu_attr pvclock = {
+		.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
+		.u.gpa = PVTIME_ADDR,
+	};
+	vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		struct ucall uc;
+
+		vcpu_run(vm, VCPU_ID);
+
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s", (const char *)uc.args[0]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+		}
+	}
+
+ done:
+	clock_gettime(CLOCK_REALTIME, &max_ts);
+
+	/*
+	 * Just a *really* basic check that things are being put in the
+	 * right place. The actual calculations are much the same for
+	 * Xen as they are for the KVM variants, so no need to check.
+	 */
+	struct pvclock_wall_clock *wc;
+	struct pvclock_vcpu_time_info *ti, *ti2;
+
+	wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
+	ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
+	ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+
+	vm_ts.tv_sec = wc->sec;
+	vm_ts.tv_nsec = wc->nsec;
+        TEST_ASSERT(wc->version && !(wc->version & 1),
+		    "Bad wallclock version %x", wc->version);
+	TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
+	TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
+
+	TEST_ASSERT(ti->version && !(ti->version & 1),
+		    "Bad time_info version %x", ti->version);
+	TEST_ASSERT(ti2->version && !(ti2->version & 1),
+		    "Bad time_info version %x", ti->version);
+
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
new file mode 100644
index 000000000000..8389e0bfd711
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xen_vmcall_test
+ *
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Userspace hypercall testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID		5
+
+#define HCALL_REGION_GPA	0xc0000000ULL
+#define HCALL_REGION_SLOT	10
+#define PAGE_SIZE		4096
+
+static struct kvm_vm *vm;
+
+#define INPUTVALUE 17
+#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
+#define RETVALUE 0xcafef00dfbfbffffUL
+
+#define XEN_HYPERCALL_MSR	0x40000200
+#define HV_GUEST_OS_ID_MSR	0x40000000
+#define HV_HYPERCALL_MSR	0x40000001
+
+#define HVCALL_SIGNAL_EVENT		0x005d
+#define HV_STATUS_INVALID_ALIGNMENT	4
+
+static void guest_code(void)
+{
+	unsigned long rax = INPUTVALUE;
+	unsigned long rdi = ARGVALUE(1);
+	unsigned long rsi = ARGVALUE(2);
+	unsigned long rdx = ARGVALUE(3);
+	unsigned long rcx;
+	register unsigned long r10 __asm__("r10") = ARGVALUE(4);
+	register unsigned long r8 __asm__("r8") = ARGVALUE(5);
+	register unsigned long r9 __asm__("r9") = ARGVALUE(6);
+
+	/* First a direct invocation of 'vmcall' */
+	__asm__ __volatile__("vmcall" :
+			     "=a"(rax) :
+			     "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+			     "r"(r10), "r"(r8), "r"(r9));
+	GUEST_ASSERT(rax == RETVALUE);
+
+	/* Fill in the Xen hypercall page */
+	__asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+			     "a" (HCALL_REGION_GPA & 0xffffffff),
+			     "d" (HCALL_REGION_GPA >> 32));
+
+	/* Set Hyper-V Guest OS ID */
+	__asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
+			     "a" (0x5a), "d" (0));
+
+	/* Hyper-V hypercall page */
+	u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
+	__asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
+			     "a" (msrval & 0xffffffff),
+			     "d" (msrval >> 32));
+
+	/* Invoke a Xen hypercall */
+	__asm__ __volatile__("call *%1" : "=a"(rax) :
+			     "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+			     "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+			     "r"(r10), "r"(r8), "r"(r9));
+	GUEST_ASSERT(rax == RETVALUE);
+
+	/* Invoke a Hyper-V hypercall */
+	rax = 0;
+	rcx = HVCALL_SIGNAL_EVENT;	/* code */
+	rdx = 0x5a5a5a5a;		/* ingpa (badly aligned) */
+	__asm__ __volatile__("call *%1" : "=a"(rax) :
+			     "r"(HCALL_REGION_GPA + PAGE_SIZE),
+			     "a"(rax), "c"(rcx), "d"(rdx),
+			     "r"(r8));
+	GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
+	      KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) ) {
+		print_skip("KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL not available");
+		exit(KSFT_SKIP);
+	}
+
+	vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
+	vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+	struct kvm_xen_hvm_config hvmc = {
+		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+		.msr = XEN_HYPERCALL_MSR,
+	};
+	vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+	/* Map a region for the hypercall pages */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
+	virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2, 0);
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		struct ucall uc;
+
+		vcpu_run(vm, VCPU_ID);
+
+		if (run->exit_reason == KVM_EXIT_XEN) {
+			ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+			ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+			ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+			ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+			ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+			ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+			ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+			ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+			ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+			ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+			run->xen.u.hcall.result = RETVALUE;
+			continue;
+		}
+
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s", (const char *)uc.args[0]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+		}
+	}
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
author	Linus Torvalds <torvalds@linux-foundation.org>	2021-02-22 00:31:43 +0300
committer	Linus Torvalds <torvalds@linux-foundation.org>	2021-02-22 00:31:43 +0300
commit	3e10585335b7967326ca7b4118cada0d2d00a2ab (patch)
tree	e1655bc4f093f7de3a54dc3b2d83a54159aca10b /tools
parent	9c5b80b795e9c847a7b7f5e63c6bcf07873fbcdf (diff)
parent	8c6e67bec3192f16fa624203c8131e10cc4814ba (diff)
download	linux-3e10585335b7967326ca7b4118cada0d2d00a2ab.tar.xz