diff options
author | Marc Zyngier <maz@kernel.org> | 2022-12-05 17:16:41 +0300 |
---|---|---|
committer | Marc Zyngier <maz@kernel.org> | 2022-12-05 17:16:41 +0300 |
commit | adde0476aff9c4d15df0c5738880bb8cbd44ad8b (patch) | |
tree | 2f07f39d5a95763ed66926124b44498d4f5886c2 | |
parent | 02f6fdd44dbf01b8732eb37499c20fbf5830d784 (diff) | |
parent | ff2b5509e1d252cd18bb1430b5461d5044701559 (diff) | |
download | linux-adde0476aff9c4d15df0c5738880bb8cbd44ad8b.tar.xz |
Merge branch kvm-arm64/selftest/s2-faults into kvmarm-master/next
* kvm-arm64/selftest/s2-faults:
: .
: New KVM/arm64 selftests exercising various sorts of S2 faults, courtesy
: of Ricardo Koller. From the cover letter:
:
: "This series adds a new aarch64 selftest for testing stage 2 fault handling
: for various combinations of guest accesses (e.g., write, S1PTW), backing
: sources (e.g., anon), and types of faults (e.g., read on hugetlbfs with a
: hole, write on a readonly memslot). Each test tries a different combination
: and then checks that the access results in the right behavior (e.g., uffd
: faults with the right address and write/read flag). [...]"
: .
KVM: selftests: aarch64: Add mix of tests into page_fault_test
KVM: selftests: aarch64: Add readonly memslot tests into page_fault_test
KVM: selftests: aarch64: Add dirty logging tests into page_fault_test
KVM: selftests: aarch64: Add userfaultfd tests into page_fault_test
KVM: selftests: aarch64: Add aarch64/page_fault_test
KVM: selftests: Use the right memslot for code, page-tables, and data allocations
KVM: selftests: Fix alignment in virt_arch_pgd_alloc() and vm_vaddr_alloc()
KVM: selftests: Add vm->memslots[] and enum kvm_mem_region_type
KVM: selftests: Stash backing_src_type in struct userspace_mem_region
tools: Copy bitfield.h from the kernel sources
KVM: selftests: aarch64: Construct DEFAULT_MAIR_EL1 using sysreg.h macros
KVM: selftests: Add missing close and munmap in __vm_mem_region_delete()
KVM: selftests: aarch64: Add virt_get_pte_hva() library function
KVM: selftests: Add a userfaultfd library
Signed-off-by: Marc Zyngier <maz@kernel.org>
-rw-r--r-- | tools/include/linux/bitfield.h | 176 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/aarch64/page_fault_test.c | 1112 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/demand_paging_test.c | 228 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/aarch64/processor.h | 35 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/kvm_util_base.h | 31 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/userfaultfd_util.h | 45 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/aarch64/processor.c | 48 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/elf.c | 3 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 82 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/riscv/processor.c | 29 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/s390x/processor.c | 8 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/userfaultfd_util.c | 186 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/lib/x86_64/processor.c | 13 |
15 files changed, 1723 insertions, 276 deletions
diff --git a/tools/include/linux/bitfield.h b/tools/include/linux/bitfield.h new file mode 100644 index 000000000000..6093fa6db260 --- /dev/null +++ b/tools/include/linux/bitfield.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2014 Felix Fietkau <nbd@nbd.name> + * Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com> + */ + +#ifndef _LINUX_BITFIELD_H +#define _LINUX_BITFIELD_H + +#include <linux/build_bug.h> +#include <asm/byteorder.h> + +/* + * Bitfield access macros + * + * FIELD_{GET,PREP} macros take as first parameter shifted mask + * from which they extract the base mask and shift amount. + * Mask must be a compilation time constant. + * + * Example: + * + * #define REG_FIELD_A GENMASK(6, 0) + * #define REG_FIELD_B BIT(7) + * #define REG_FIELD_C GENMASK(15, 8) + * #define REG_FIELD_D GENMASK(31, 16) + * + * Get: + * a = FIELD_GET(REG_FIELD_A, reg); + * b = FIELD_GET(REG_FIELD_B, reg); + * + * Set: + * reg = FIELD_PREP(REG_FIELD_A, 1) | + * FIELD_PREP(REG_FIELD_B, 0) | + * FIELD_PREP(REG_FIELD_C, c) | + * FIELD_PREP(REG_FIELD_D, 0x40); + * + * Modify: + * reg &= ~REG_FIELD_C; + * reg |= FIELD_PREP(REG_FIELD_C, c); + */ + +#define __bf_shf(x) (__builtin_ffsll(x) - 1) + +#define __scalar_type_to_unsigned_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (unsigned type)0 + +#define __unsigned_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (unsigned char)0, \ + __scalar_type_to_unsigned_cases(char), \ + __scalar_type_to_unsigned_cases(short), \ + __scalar_type_to_unsigned_cases(int), \ + __scalar_type_to_unsigned_cases(long), \ + __scalar_type_to_unsigned_cases(long long), \ + default: (x))) + +#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x)) + +#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ + ({ \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ + _pfx "mask is not constant"); \ + BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \ + BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ + ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + _pfx "value too large for the field"); \ + BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \ + __bf_cast_unsigned(_reg, ~0ull), \ + _pfx "type of reg too small for mask"); \ + __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ + (1ULL << __bf_shf(_mask))); \ + }) + +/** + * FIELD_MAX() - produce the maximum value representable by a field + * @_mask: shifted mask defining the field's length and position + * + * FIELD_MAX() returns the maximum value that can be held in the field + * specified by @_mask. + */ +#define FIELD_MAX(_mask) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \ + (typeof(_mask))((_mask) >> __bf_shf(_mask)); \ + }) + +/** + * FIELD_FIT() - check if value fits in the field + * @_mask: shifted mask defining the field's length and position + * @_val: value to test against the field + * + * Return: true if @_val can fit inside @_mask, false if @_val is too big. + */ +#define FIELD_FIT(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ + !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ + }) + +/** + * FIELD_PREP() - prepare a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP() masks and shifts up the value. The result should + * be combined with other fields of the bitfield using logical OR. + */ +#define FIELD_PREP(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ + ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \ + }) + +/** + * FIELD_GET() - extract a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg: value of entire bitfield + * + * FIELD_GET() extracts the field specified by @_mask from the + * bitfield passed in as @_reg by masking and shifting it down. + */ +#define FIELD_GET(_mask, _reg) \ + ({ \ + __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ + (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ + }) + +extern void __compiletime_error("value doesn't fit into mask") +__field_overflow(void); +extern void __compiletime_error("bad bitfield mask") +__bad_mask(void); +static __always_inline u64 field_multiplier(u64 field) +{ + if ((field | (field - 1)) & ((field | (field - 1)) + 1)) + __bad_mask(); + return field & -field; +} +static __always_inline u64 field_mask(u64 field) +{ + return field / field_multiplier(field); +} +#define field_max(field) ((typeof(field))field_mask(field)) +#define ____MAKE_OP(type,base,to,from) \ +static __always_inline __##type type##_encode_bits(base v, base field) \ +{ \ + if (__builtin_constant_p(v) && (v & ~field_mask(field))) \ + __field_overflow(); \ + return to((v & field_mask(field)) * field_multiplier(field)); \ +} \ +static __always_inline __##type type##_replace_bits(__##type old, \ + base val, base field) \ +{ \ + return (old & ~to(field)) | type##_encode_bits(val, field); \ +} \ +static __always_inline void type##p_replace_bits(__##type *p, \ + base val, base field) \ +{ \ + *p = (*p & ~to(field)) | type##_encode_bits(val, field); \ +} \ +static __always_inline base type##_get_bits(__##type v, base field) \ +{ \ + return (from(v) & field)/field_multiplier(field); \ +} +#define __MAKE_OP(size) \ + ____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \ + ____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \ + ____MAKE_OP(u##size,u##size,,) +____MAKE_OP(u8,u8,,) +__MAKE_OP(16) +__MAKE_OP(32) +__MAKE_OP(64) +#undef __MAKE_OP +#undef ____MAKE_OP + +#endif diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 2f0d705db9db..4a30d684e208 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -4,6 +4,7 @@ /aarch64/debug-exceptions /aarch64/get-reg-list /aarch64/hypercalls +/aarch64/page_fault_test /aarch64/psci_test /aarch64/vcpu_width_config /aarch64/vgic_init diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 0172eb6cb6ee..50c30335460f 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -47,6 +47,7 @@ LIBKVM += lib/perf_test_util.c LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c +LIBKVM += lib/userfaultfd_util.c LIBKVM_STRING += lib/string_override.c @@ -152,6 +153,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/hypercalls +TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c new file mode 100644 index 000000000000..05bb6a6369c2 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -0,0 +1,1112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * page_fault_test.c - Test stage 2 faults. + * + * This test tries different combinations of guest accesses (e.g., write, + * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on + * hugetlbfs with a hole). It checks that the expected handling method is + * called (e.g., uffd faults with the right address and write/read flag). + */ + +#define _GNU_SOURCE +#include <linux/bitmap.h> +#include <fcntl.h> +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> +#include <asm/sysreg.h> +#include <linux/bitfield.h> +#include "guest_modes.h" +#include "userfaultfd_util.h" + +/* Guest virtual addresses that point to the test page and its PTE. */ +#define TEST_GVA 0xc0000000 +#define TEST_EXEC_GVA (TEST_GVA + 0x8) +#define TEST_PTE_GVA 0xb0000000 +#define TEST_DATA 0x0123456789ABCDEF + +static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA; + +#define CMD_NONE (0) +#define CMD_SKIP_TEST (1ULL << 1) +#define CMD_HOLE_PT (1ULL << 2) +#define CMD_HOLE_DATA (1ULL << 3) +#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4) +#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5) +#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6) +#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7) +#define CMD_SET_PTE_AF (1ULL << 8) + +#define PREPARE_FN_NR 10 +#define CHECK_FN_NR 10 + +static struct event_cnt { + int mmio_exits; + int fail_vcpu_runs; + int uffd_faults; + /* uffd_faults is incremented from multiple threads. */ + pthread_mutex_t uffd_faults_mutex; +} events; + +struct test_desc { + const char *name; + uint64_t mem_mark_cmd; + /* Skip the test if any prepare function returns false */ + bool (*guest_prepare[PREPARE_FN_NR])(void); + void (*guest_test)(void); + void (*guest_test_check[CHECK_FN_NR])(void); + uffd_handler_t uffd_pt_handler; + uffd_handler_t uffd_data_handler; + void (*dabt_handler)(struct ex_regs *regs); + void (*iabt_handler)(struct ex_regs *regs); + void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run); + void (*fail_vcpu_run_handler)(int ret); + uint32_t pt_memslot_flags; + uint32_t data_memslot_flags; + bool skip; + struct event_cnt expected_events; +}; + +struct test_params { + enum vm_mem_backing_src_type src_type; + struct test_desc *test_desc; +}; + +static inline void flush_tlb_page(uint64_t vaddr) +{ + uint64_t page = vaddr >> 12; + + dsb(ishst); + asm volatile("tlbi vaae1is, %0" :: "r" (page)); + dsb(ish); + isb(); +} + +static void guest_write64(void) +{ + uint64_t val; + + WRITE_ONCE(*guest_test_memory, TEST_DATA); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +/* Check the system for atomic instructions. */ +static bool guest_check_lse(void) +{ + uint64_t isar0 = read_sysreg(id_aa64isar0_el1); + uint64_t atomic; + + atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0); + return atomic >= 2; +} + +static bool guest_check_dc_zva(void) +{ + uint64_t dczid = read_sysreg(dczid_el0); + uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid); + + return dzp == 0; +} + +/* Compare and swap instruction. */ +static void guest_cas(void) +{ + uint64_t val; + + GUEST_ASSERT(guest_check_lse()); + asm volatile(".arch_extension lse\n" + "casal %0, %1, [%2]\n" + :: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory)); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, TEST_DATA); +} + +static void guest_read64(void) +{ + uint64_t val; + + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* Address translation instruction */ +static void guest_at(void) +{ + uint64_t par; + + asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); + par = read_sysreg(par_el1); + isb(); + + /* Bit 1 indicates whether the AT was successful */ + GUEST_ASSERT_EQ(par & 1, 0); +} + +/* + * The size of the block written by "dc zva" is guaranteed to be between (2 << + * 0) and (2 << 9), which is safe in our case as we need the write to happen + * for at least a word, and not more than a page. + */ +static void guest_dc_zva(void) +{ + uint16_t val; + + asm volatile("dc zva, %0" :: "r" (guest_test_memory)); + dsb(ish); + val = READ_ONCE(*guest_test_memory); + GUEST_ASSERT_EQ(val, 0); +} + +/* + * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). + * And that's special because KVM must take special care with those: they + * should still count as accesses for dirty logging or user-faulting, but + * should be handled differently on mmio. + */ +static void guest_ld_preidx(void) +{ + uint64_t val; + uint64_t addr = TEST_GVA - 8; + + /* + * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is + * in a gap between memslots not backing by anything. + */ + asm volatile("ldr %0, [%1, #8]!" + : "=r" (val), "+r" (addr)); + GUEST_ASSERT_EQ(val, 0); + GUEST_ASSERT_EQ(addr, TEST_GVA); +} + +static void guest_st_preidx(void) +{ + uint64_t val = TEST_DATA; + uint64_t addr = TEST_GVA - 8; + + asm volatile("str %0, [%1, #8]!" + : "+r" (val), "+r" (addr)); + + GUEST_ASSERT_EQ(addr, TEST_GVA); + val = READ_ONCE(*guest_test_memory); +} + +static bool guest_set_ha(void) +{ + uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1); + uint64_t hadbs, tcr; + + /* Skip if HA is not supported. */ + hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1); + if (hadbs == 0) + return false; + + tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + write_sysreg(tcr, tcr_el1); + isb(); + + return true; +} + +static bool guest_clear_pte_af(void) +{ + *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF; + flush_tlb_page(TEST_GVA); + + return true; +} + +static void guest_check_pte_af(void) +{ + dsb(ish); + GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF); +} + +static void guest_check_write_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG); +} + +static void guest_check_no_write_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG); +} + +static void guest_check_s1ptw_wr_in_dirty_log(void) +{ + GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG); +} + +static void guest_exec(void) +{ + int (*code)(void) = (int (*)(void))TEST_EXEC_GVA; + int ret; + + ret = code(); + GUEST_ASSERT_EQ(ret, 0x77); +} + +static bool guest_prepare(struct test_desc *test) +{ + bool (*prepare_fn)(void); + int i; + + for (i = 0; i < PREPARE_FN_NR; i++) { + prepare_fn = test->guest_prepare[i]; + if (prepare_fn && !prepare_fn()) + return false; + } + + return true; +} + +static void guest_test_check(struct test_desc *test) +{ + void (*check_fn)(void); + int i; + + for (i = 0; i < CHECK_FN_NR; i++) { + check_fn = test->guest_test_check[i]; + if (check_fn) + check_fn(); + } +} + +static void guest_code(struct test_desc *test) +{ + if (!guest_prepare(test)) + GUEST_SYNC(CMD_SKIP_TEST); + + GUEST_SYNC(test->mem_mark_cmd); + + if (test->guest_test) + test->guest_test(); + + guest_test_check(test); + GUEST_DONE(); +} + +static void no_dabt_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(false, read_sysreg(far_el1)); +} + +static void no_iabt_handler(struct ex_regs *regs) +{ + GUEST_ASSERT_1(false, regs->pc); +} + +static struct uffd_args { + char *copy; + void *hva; + uint64_t paging_size; +} pt_args, data_args; + +/* Returns true to continue the test, and false if it should be skipped. */ +static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, + struct uffd_args *args, bool expect_write) +{ + uint64_t addr = msg->arg.pagefault.address; + uint64_t flags = msg->arg.pagefault.flags; + struct uffdio_copy copy; + int ret; + + TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, + "The only expected UFFD mode is MISSING"); + ASSERT_EQ(!!(flags & UFFD_PAGEFAULT_FLAG_WRITE), expect_write); + ASSERT_EQ(addr, (uint64_t)args->hva); + + pr_debug("uffd fault: addr=%p write=%d\n", + (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); + + copy.src = (uint64_t)args->copy; + copy.dst = addr; + copy.len = args->paging_size; + copy.mode = 0; + + ret = ioctl(uffd, UFFDIO_COPY, ©); + if (ret == -1) { + pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n", + addr, errno); + return ret; + } + + pthread_mutex_lock(&events.uffd_faults_mutex); + events.uffd_faults += 1; + pthread_mutex_unlock(&events.uffd_faults_mutex); + return 0; +} + +static int uffd_pt_write_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &pt_args, true); +} + +static int uffd_data_write_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &data_args, true); +} + +static int uffd_data_read_handler(int mode, int uffd, struct uffd_msg *msg) +{ + return uffd_generic_handler(mode, uffd, msg, &data_args, false); +} + +static void setup_uffd_args(struct userspace_mem_region *region, + struct uffd_args *args) +{ + args->hva = (void *)region->region.userspace_addr; + args->paging_size = region->region.memory_size; + + args->copy = malloc(args->paging_size); + TEST_ASSERT(args->copy, "Failed to allocate data copy."); + memcpy(args->copy, args->hva, args->paging_size); +} + +static void setup_uffd(struct kvm_vm *vm, struct test_params *p, + struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd) +{ + struct test_desc *test = p->test_desc; + int uffd_mode = UFFDIO_REGISTER_MODE_MISSING; + + setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args); + setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args); + + *pt_uffd = NULL; + if (test->uffd_pt_handler) + *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, + pt_args.hva, + pt_args.paging_size, + test->uffd_pt_handler); + + *data_uffd = NULL; + if (test->uffd_data_handler) + *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, + data_args.hva, + data_args.paging_size, + test->uffd_data_handler); +} + +static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, + struct uffd_desc *data_uffd) +{ + if (test->uffd_pt_handler) + uffd_stop_demand_paging(pt_uffd); + if (test->uffd_data_handler) + uffd_stop_demand_paging(data_uffd); + + free(pt_args.copy); + free(data_args.copy); +} + +static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg) +{ + TEST_FAIL("There was no UFFD fault expected."); + return -1; +} + +/* Returns false if the test should be skipped. */ +static bool punch_hole_in_backing_store(struct kvm_vm *vm, + struct userspace_mem_region *region) +{ + void *hva = (void *)region->region.userspace_addr; + uint64_t paging_size = region->region.memory_size; + int ret, fd = region->fd; + + if (fd != -1) { + ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + 0, paging_size); + TEST_ASSERT(ret == 0, "fallocate failed\n"); + } else { + ret = madvise(hva, paging_size, MADV_DONTNEED); + TEST_ASSERT(ret == 0, "madvise failed\n"); + } + + return true; +} + +static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) +{ + struct userspace_mem_region *region; + void *hva; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + hva = (void *)region->region.userspace_addr; + + ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); + + memcpy(hva, run->mmio.data, run->mmio.len); + events.mmio_exits += 1; +} + +static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run) +{ + uint64_t data; + + memcpy(&data, run->mmio.data, sizeof(data)); + pr_debug("addr=%lld len=%d w=%d data=%lx\n", + run->mmio.phys_addr, run->mmio.len, + run->mmio.is_write, data); + TEST_FAIL("There was no MMIO exit expected."); +} + +static bool check_write_in_dirty_log(struct kvm_vm *vm, + struct userspace_mem_region *region, + uint64_t host_pg_nr) +{ + unsigned long *bmap; + bool first_page_dirty; + uint64_t size = region->region.memory_size; + + /* getpage_size() is not always equal to vm->page_size */ + bmap = bitmap_zalloc(size / getpagesize()); + kvm_vm_get_dirty_log(vm, region->region.slot, bmap); + first_page_dirty = test_bit(host_pg_nr, bmap); + free(bmap); + return first_page_dirty; +} + +/* Returns true to continue the test, and false if it should be skipped. */ +static bool handle_cmd(struct kvm_vm *vm, int cmd) +{ + struct userspace_mem_region *data_region, *pt_region; + bool continue_test = true; + + data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + pt_region = vm_get_mem_region(vm, MEM_REGION_PT); + + if (cmd == CMD_SKIP_TEST) + continue_test = false; + + if (cmd & CMD_HOLE_PT) + continue_test = punch_hole_in_backing_store(vm, pt_region); + if (cmd & CMD_HOLE_DATA) + continue_test = punch_hole_in_backing_store(vm, data_region); + if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG) + TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0), + "Missing write in dirty log"); + if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG) + TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, 0), + "Missing s1ptw write in dirty log"); + if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG) + TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0), + "Unexpected write in dirty log"); + if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG) + TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, 0), + "Unexpected s1ptw write in dirty log"); + + return continue_test; +} + +void fail_vcpu_run_no_handler(int ret) +{ + TEST_FAIL("Unexpected vcpu run failure\n"); +} + +void fail_vcpu_run_mmio_no_syndrome_handler(int ret) +{ + TEST_ASSERT(errno == ENOSYS, + "The mmio handler should have returned not implemented."); + events.fail_vcpu_runs += 1; +} + +typedef uint32_t aarch64_insn_t; +extern aarch64_insn_t __exec_test[2]; + +noinline void __return_0x77(void) +{ + asm volatile("__exec_test: mov x0, #0x77\n" + "ret\n"); +} + +/* + * Note that this function runs on the host before the test VM starts: there's + * no need to sync the D$ and I$ caches. + */ +static void load_exec_code_for_test(struct kvm_vm *vm) +{ + uint64_t *code; + struct userspace_mem_region *region; + void *hva; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + hva = (void *)region->region.userspace_addr; + + assert(TEST_EXEC_GVA > TEST_GVA); + code = hva + TEST_EXEC_GVA - TEST_GVA; + memcpy(code, __exec_test, sizeof(__exec_test)); +} + +static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_DABT, no_dabt_handler); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_IABT, no_iabt_handler); +} + +static void setup_gva_maps(struct kvm_vm *vm) +{ + struct userspace_mem_region *region; + uint64_t pte_gpa; + + region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); + /* Map TEST_GVA first. This will install a new PTE. */ + virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr); + /* Then map TEST_PTE_GVA to the above PTE. */ + pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA)); + virt_pg_map(vm, TEST_PTE_GVA, pte_gpa); +} + +enum pf_test_memslots { + CODE_AND_DATA_MEMSLOT, + PAGE_TABLE_MEMSLOT, + TEST_DATA_MEMSLOT, +}; + +/* + * Create a memslot for code and data at pfn=0, and test-data and PT ones + * at max_gfn. + */ +static void setup_memslots(struct kvm_vm *vm, struct test_params *p) +{ + uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type); + uint64_t guest_page_size = vm->page_size; + uint64_t max_gfn = vm_compute_max_gfn(vm); + /* Enough for 2M of code when using 4K guest pages. */ + uint64_t code_npages = 512; + uint64_t pt_size, data_size, data_gpa; + + /* + * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using + * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13 + * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use + * twice that just in case. + */ + pt_size = 26 * guest_page_size; + + /* memslot sizes and gpa's must be aligned to the backing page size */ + pt_size = align_up(pt_size, backing_src_pagesz); + data_size = align_up(guest_page_size, backing_src_pagesz); + data_gpa = (max_gfn * guest_page_size) - data_size; + data_gpa = align_down(data_gpa, backing_src_pagesz); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, + CODE_AND_DATA_MEMSLOT, code_npages, 0); + vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT; + vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT; + + vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size, + PAGE_TABLE_MEMSLOT, pt_size / guest_page_size, + p->test_desc->pt_memslot_flags); + vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT; + + vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT, + data_size / guest_page_size, + p->test_desc->data_memslot_flags); + vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; +} + +static void setup_default_handlers(struct test_desc *test) +{ + if (!test->mmio_handler) + test->mmio_handler = mmio_no_handler; + + if (!test->fail_vcpu_run_handler) + test->fail_vcpu_run_handler = fail_vcpu_run_no_handler; +} + +static void check_event_counts(struct test_desc *test) +{ + ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); + ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); + ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); +} + +static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) +{ + struct test_desc *test = p->test_desc; + + pr_debug("Test: %s\n", test->name); + pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + pr_debug("Testing memory backing src type: %s\n", + vm_mem_backing_src_alias(p->src_type)->name); +} + +static void reset_event_counts(void) +{ + memset(&events, 0, sizeof(events)); +} + +/* + * This function either succeeds, skips the test (after setting test->skip), or + * fails with a TEST_FAIL that aborts all tests. + */ +static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + struct test_desc *test) +{ + struct kvm_run *run; + struct ucall uc; + int ret; + + run = vcpu->run; + + for (;;) { + ret = _vcpu_run(vcpu); + if (ret) { + test->fail_vcpu_run_handler(ret); + goto done; + } + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + if (!handle_cmd(vm, uc.args[1])) { + test->skip = true; + goto done; + } + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + break; + case UCALL_DONE: + goto done; + case UCALL_NONE: + if (run->exit_reason == KVM_EXIT_MMIO) + test->mmio_handler(vm, run); + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + pr_debug(test->skip ? "Skipped.\n" : "Done.\n"); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *p = (struct test_params *)arg; + struct test_desc *test = p->test_desc; + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct uffd_desc *pt_uffd, *data_uffd; + + print_test_banner(mode, p); + + vm = ____vm_create(mode); + setup_memslots(vm, p); + kvm_vm_elf_load(vm, program_invocation_name); + vcpu = vm_vcpu_add(vm, 0, guest_code); + + setup_gva_maps(vm); + + ucall_init(vm, NULL); + + reset_event_counts(); + + /* + * Set some code in the data memslot for the guest to execute (only + * applicable to the EXEC tests). This has to be done before + * setup_uffd() as that function copies the memslot data for the uffd + * handler. + */ + load_exec_code_for_test(vm); + setup_uffd(vm, p, &pt_uffd, &data_uffd); + setup_abort_handlers(vm, vcpu, test); + setup_default_handlers(test); + vcpu_args_set(vcpu, 1, test); + + vcpu_run_loop(vm, vcpu, test); + + ucall_uninit(vm); + kvm_vm_free(vm); + free_uffd(test, pt_uffd, data_uffd); + + /* + * Make sure we check the events after the uffd threads have exited, + * which means they updated their respective event counters. + */ + if (!test->skip) + check_event_counts(test); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-s mem-type]\n", name); + puts(""); + guest_modes_help(); + backing_src_help("-s"); + puts(""); +} + +#define SNAME(s) #s +#define SCAT2(a, b) SNAME(a ## _ ## b) +#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c)) +#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d)) + +#define _CHECK(_test) _CHECK_##_test +#define _PREPARE(_test) _PREPARE_##_test +#define _PREPARE_guest_read64 NULL +#define _PREPARE_guest_ld_preidx NULL +#define _PREPARE_guest_write64 NULL +#define _PREPARE_guest_st_preidx NULL +#define _PREPARE_guest_exec NULL +#define _PREPARE_guest_at NULL +#define _PREPARE_guest_dc_zva guest_check_dc_zva +#define _PREPARE_guest_cas guest_check_lse + +/* With or without access flag checks */ +#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af +#define _PREPARE_no_af NULL +#define _CHECK_with_af guest_check_pte_af +#define _CHECK_no_af NULL + +/* Performs an access and checks that no faults were triggered. */ +#define TEST_ACCESS(_access, _with_af, _mark_cmd) \ +{ \ + .name = SCAT3(_access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af) }, \ + .expected_events = { 0 }, \ +} + +#define TEST_UFFD(_access, _with_af, _mark_cmd, \ + _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \ +{ \ + .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .mem_mark_cmd = _mark_cmd, \ + .guest_test_check = { _CHECK(_with_af) }, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = _uffd_pt_handler, \ + .expected_events = { .uffd_faults = _uffd_faults, }, \ +} + +#define TEST_DIRTY_LOG(_access, _with_af, _test_check) \ +{ \ + .name = SCAT3(dirty_log, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _CHECK(_with_af), _test_check, \ + guest_check_s1ptw_wr_in_dirty_log}, \ + .expected_events = { 0 }, \ +} + +#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \ + _uffd_faults, _test_check) \ +{ \ + .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_with_af), \ + _PREPARE(_access) }, \ + .guest_test = _access, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_test_check = { _CHECK(_with_af), _test_check }, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_write_handler, \ + .expected_events = { .uffd_faults = _uffd_faults, }, \ +} + +#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \ +{ \ + .name = SCAT3(ro_memslot, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits }, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \ +{ \ + .name = SCAT2(ro_memslot_no_syndrome, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .guest_test = _access, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1 }, \ +} + +#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \ + _test_check) \ +{ \ + .name = SCAT3(ro_memslot, _access, _with_af), \ + .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .guest_test_check = { _test_check }, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits}, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \ +{ \ + .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ + .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_test = _access, \ + .guest_test_check = { _test_check }, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1 }, \ +} + +#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \ + _uffd_data_handler, _uffd_faults) \ +{ \ + .name = SCAT2(ro_memslot_uffd, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_prepare = { _PREPARE(_access) }, \ + .guest_test = _access, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_write_handler, \ + .mmio_handler = _mmio_handler, \ + .expected_events = { .mmio_exits = _mmio_exits, \ + .uffd_faults = _uffd_faults }, \ +} + +#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \ + _uffd_faults) \ +{ \ + .name = SCAT2(ro_memslot_no_syndrome, _access), \ + .data_memslot_flags = KVM_MEM_READONLY, \ + .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_test = _access, \ + .uffd_data_handler = _uffd_data_handler, \ + .uffd_pt_handler = uffd_pt_write_handler, \ + .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ + .expected_events = { .fail_vcpu_runs = 1, \ + .uffd_faults = _uffd_faults }, \ +} + +static struct test_desc tests[] = { + + /* Check that HW is setting the Access Flag (AF) (sanity checks). */ + TEST_ACCESS(guest_read64, with_af, CMD_NONE), + TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_cas, with_af, CMD_NONE), + TEST_ACCESS(guest_write64, with_af, CMD_NONE), + TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE), + TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE), + TEST_ACCESS(guest_exec, with_af, CMD_NONE), + + /* + * Punch a hole in the data backing store, and then try multiple + * accesses: reads should rturn zeroes, and writes should + * re-populate the page. Moreover, the test also check that no + * exception was generated in the guest. Note that this + * reading/writing behavior is the same as reading/writing a + * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from + * userspace. + */ + TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA), + TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA), + + /* + * Punch holes in the data and PT backing stores and mark them for + * userfaultfd handling. This should result in 2 faults: the access + * on the data backing store, and its respective S1 page table walk + * (S1PTW). + */ + TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + /* no_af should also lead to a PT write. */ + TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + /* Note how that cas invokes the read handler. */ + TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + /* + * Can't test guest_at with_af as it's IMPDEF whether the AF is set. + * The S1PTW fault should still be marked as a write. + */ + TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 1), + TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_write_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_write_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_write_handler, uffd_pt_write_handler, 2), + TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT, + uffd_data_read_handler, uffd_pt_write_handler, 2), + + /* + * Try accesses when the data and PT memory regions are both + * tracked for dirty logging. + */ + TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log), + /* no_af should also lead to a PT write. */ + TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_ld_preidx, with_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log), + TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log), + TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log), + TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log), + TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log), + + /* + * Access when the data and PT memory regions are both marked for + * dirty logging and UFFD at the same time. The expected result is + * that writes should mark the dirty log and trigger a userfaultfd + * write fault. Reads/execs should result in a read userfaultfd + * fault, and nothing in the dirty log. Any S1PTW should result in + * a write in the dirty log and a userfaultfd write. + */ + TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, uffd_data_read_handler, 2, + guest_check_no_write_in_dirty_log), + /* no_af should also lead to a PT write. */ + TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, uffd_data_read_handler, 2, + guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, uffd_data_read_handler, + 2, guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, 0, 1, + guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, uffd_data_read_handler, 2, + guest_check_no_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, uffd_data_write_handler, + 2, guest_check_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, uffd_data_read_handler, 2, + guest_check_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, uffd_data_write_handler, + 2, guest_check_write_in_dirty_log), + TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af, + uffd_data_write_handler, 2, + guest_check_write_in_dirty_log), + + /* + * Try accesses when the data memory region is marked read-only + * (with KVM_MEM_READONLY). Writes with a syndrome result in an + * MMIO exit, writes with no syndrome (e.g., CAS) result in a + * failed vcpu run, and reads/execs with and without syndroms do + * not fault. + */ + TEST_RO_MEMSLOT(guest_read64, 0, 0), + TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0), + TEST_RO_MEMSLOT(guest_at, 0, 0), + TEST_RO_MEMSLOT(guest_exec, 0, 0), + TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas), + TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx), + + /* + * Access when both the data region is both read-only and marked + * for dirty logging at the same time. The expected result is that + * for writes there should be no write in the dirty log. The + * readonly handling is the same as if the memslot was not marked + * for dirty logging: writes with a syndrome result in an MMIO + * exit, and writes with no syndrome result in a failed vcpu run. + */ + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler, + 1, guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas, + guest_check_no_write_in_dirty_log), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx, + guest_check_no_write_in_dirty_log), + + /* + * Access when the data region is both read-only and punched with + * holes tracked with userfaultfd. The expected result is the + * union of both userfaultfd and read-only behaviors. For example, + * write accesses result in a userfaultfd write fault and an MMIO + * exit. Writes with no syndrome result in a failed vcpu run and + * no userfaultfd write fault. Reads result in userfaultfd getting + * triggered. + */ + TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, + uffd_no_handler, 1), + TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1, + uffd_data_write_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, + uffd_data_read_handler, 2), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, + uffd_no_handler, 1), + TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, + uffd_no_handler, 1), + + { 0 } +}; + +static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type) +{ + struct test_desc *t; + + for (t = &tests[0]; t->name; t++) { + if (t->skip) + continue; + + struct test_params p = { + .src_type = src_type, + .test_desc = t, + }; + + for_each_guest_mode(run_test, &p); + } +} + +int main(int argc, char *argv[]) +{ + enum vm_mem_backing_src_type src_type; + int opt; + + setbuf(stdout, NULL); + + src_type = DEFAULT_VM_MEM_SRC; + + while ((opt = getopt(argc, argv, "hm:s:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 's': + src_type = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + exit(0); + } + } + + for_each_test_and_guest_mode(src_type); + return 0; +} diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 779ae54f89c4..8e1fe4ffcccd 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -22,23 +22,13 @@ #include "test_util.h" #include "perf_test_util.h" #include "guest_modes.h" +#include "userfaultfd_util.h" #ifdef __NR_userfaultfd -#ifdef PRINT_PER_PAGE_UPDATES -#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) -#else -#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) -#endif - -#ifdef PRINT_PER_VCPU_UPDATES -#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) -#else -#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) -#endif - static int nr_vcpus = 1; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + static size_t demand_paging_size; static char *guest_data_prototype; @@ -67,9 +57,11 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) ts_diff.tv_sec, ts_diff.tv_nsec); } -static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) +static int handle_uffd_page_request(int uffd_mode, int uffd, + struct uffd_msg *msg) { pid_t tid = syscall(__NR_gettid); + uint64_t addr = msg->arg.pagefault.address; struct timespec start; struct timespec ts_diff; int r; @@ -116,174 +108,32 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) return 0; } -bool quit_uffd_thread; - -struct uffd_handler_args { +struct test_params { int uffd_mode; - int uffd; - int pipefd; - useconds_t delay; + useconds_t uffd_delay; + enum vm_mem_backing_src_type src_type; + bool partition_vcpu_memory_access; }; -static void *uffd_handler_thread_fn(void *arg) -{ - struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg; - int uffd = uffd_args->uffd; - int pipefd = uffd_args->pipefd; - useconds_t delay = uffd_args->delay; - int64_t pages = 0; - struct timespec start; - struct timespec ts_diff; - - clock_gettime(CLOCK_MONOTONIC, &start); - while (!quit_uffd_thread) { - struct uffd_msg msg; - struct pollfd pollfd[2]; - char tmp_chr; - int r; - uint64_t addr; - - pollfd[0].fd = uffd; - pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd; - pollfd[1].events = POLLIN; - - r = poll(pollfd, 2, -1); - switch (r) { - case -1: - pr_info("poll err"); - continue; - case 0: - continue; - case 1: - break; - default: - pr_info("Polling uffd returned %d", r); - return NULL; - } - - if (pollfd[0].revents & POLLERR) { - pr_info("uffd revents has POLLERR"); - return NULL; - } - - if (pollfd[1].revents & POLLIN) { - r = read(pollfd[1].fd, &tmp_chr, 1); - TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread\n"); - return NULL; - } - - if (!(pollfd[0].revents & POLLIN)) - continue; - - r = read(uffd, &msg, sizeof(msg)); - if (r == -1) { - if (errno == EAGAIN) - continue; - pr_info("Read of uffd got errno %d\n", errno); - return NULL; - } - - if (r != sizeof(msg)) { - pr_info("Read on uffd returned unexpected size: %d bytes", r); - return NULL; - } - - if (!(msg.event & UFFD_EVENT_PAGEFAULT)) - continue; - - if (delay) - usleep(delay); - addr = msg.arg.pagefault.address; - r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr); - if (r < 0) - return NULL; - pages++; - } - - ts_diff = timespec_elapsed(start); - PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", - pages, ts_diff.tv_sec, ts_diff.tv_nsec, - pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); - - return NULL; -} - -static void setup_demand_paging(struct kvm_vm *vm, - pthread_t *uffd_handler_thread, int pipefd, - int uffd_mode, useconds_t uffd_delay, - struct uffd_handler_args *uffd_args, - void *hva, void *alias, uint64_t len) +static void prefault_mem(void *alias, uint64_t len) { - bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); - int uffd; - struct uffdio_api uffdio_api; - struct uffdio_register uffdio_register; - uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; - int ret; + size_t p; - PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", - is_minor ? "MINOR" : "MISSING", - is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); - - /* In order to get minor faults, prefault via the alias. */ - if (is_minor) { - size_t p; - - expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; - - TEST_ASSERT(alias != NULL, "Alias required for minor faults"); - for (p = 0; p < (len / demand_paging_size); ++p) { - memcpy(alias + (p * demand_paging_size), - guest_data_prototype, demand_paging_size); - } + TEST_ASSERT(alias != NULL, "Alias required for minor faults"); + for (p = 0; p < (len / demand_paging_size); ++p) { + memcpy(alias + (p * demand_paging_size), + guest_data_prototype, demand_paging_size); } - - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd)); - - uffdio_api.api = UFFD_API; - uffdio_api.features = 0; - ret = ioctl(uffd, UFFDIO_API, &uffdio_api); - TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret)); - - uffdio_register.range.start = (uint64_t)hva; - uffdio_register.range.len = len; - uffdio_register.mode = uffd_mode; - ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register); - TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret)); - TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == - expected_ioctls, "missing userfaultfd ioctls"); - - uffd_args->uffd_mode = uffd_mode; - uffd_args->uffd = uffd; - uffd_args->pipefd = pipefd; - uffd_args->delay = uffd_delay; - pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn, - uffd_args); - - PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", - hva, hva + len); } -struct test_params { - int uffd_mode; - useconds_t uffd_delay; - enum vm_mem_backing_src_type src_type; - bool partition_vcpu_memory_access; -}; - static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; - pthread_t *uffd_handler_threads = NULL; - struct uffd_handler_args *uffd_args = NULL; + struct uffd_desc **uffd_descs = NULL; struct timespec start; struct timespec ts_diff; - int *pipefds = NULL; struct kvm_vm *vm; - int r, i; + int i; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, p->src_type, p->partition_vcpu_memory_access); @@ -296,15 +146,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) memset(guest_data_prototype, 0xAB, demand_paging_size); if (p->uffd_mode) { - uffd_handler_threads = - malloc(nr_vcpus * sizeof(*uffd_handler_threads)); - TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - - uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); - TEST_ASSERT(uffd_args, "Memory allocation failed"); - - pipefds = malloc(sizeof(int) * nr_vcpus * 2); - TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); + uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); + TEST_ASSERT(uffd_descs, "Memory allocation failed"); for (i = 0; i < nr_vcpus; i++) { struct perf_test_vcpu_args *vcpu_args; @@ -317,19 +160,17 @@ static void run_test(enum vm_guest_mode mode, void *arg) vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa); + prefault_mem(vcpu_alias, + vcpu_args->pages * perf_test_args.guest_page_size); + /* * Set up user fault fd to handle demand paging * requests. */ - r = pipe2(&pipefds[i * 2], - O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(!r, "Failed to set up pipefd"); - - setup_demand_paging(vm, &uffd_handler_threads[i], - pipefds[i * 2], p->uffd_mode, - p->uffd_delay, &uffd_args[i], - vcpu_hva, vcpu_alias, - vcpu_args->pages * perf_test_args.guest_page_size); + uffd_descs[i] = uffd_setup_demand_paging( + p->uffd_mode, p->uffd_delay, vcpu_hva, + vcpu_args->pages * perf_test_args.guest_page_size, + &handle_uffd_page_request); } } @@ -344,15 +185,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("All vCPU threads joined\n"); if (p->uffd_mode) { - char c; - /* Tell the user fault fd handler threads to quit */ - for (i = 0; i < nr_vcpus; i++) { - r = write(pipefds[i * 2 + 1], &c, 1); - TEST_ASSERT(r == 1, "Unable to write to pipefd"); - - pthread_join(uffd_handler_threads[i], NULL); - } + for (i = 0; i < nr_vcpus; i++) + uffd_stop_demand_paging(uffd_descs[i]); } pr_info("Total guest execution time: %ld.%.9lds\n", @@ -364,11 +199,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_destroy_vm(vm); free(guest_data_prototype); - if (p->uffd_mode) { - free(uffd_handler_threads); - free(uffd_args); - free(pipefds); - } + if (p->uffd_mode) + free(uffd_descs); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index a8124f9dd68a..5f977528e09c 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -38,12 +38,25 @@ * NORMAL 4 1111:1111 * NORMAL_WT 5 1011:1011 */ -#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \ - (0x04ul << (1 * 8)) | \ - (0x0cul << (2 * 8)) | \ - (0x44ul << (3 * 8)) | \ - (0xfful << (4 * 8)) | \ - (0xbbul << (5 * 8))) + +/* Linux doesn't use these memory types, so let's define them. */ +#define MAIR_ATTR_DEVICE_GRE UL(0x0c) +#define MAIR_ATTR_NORMAL_WT UL(0xbb) + +#define MT_DEVICE_nGnRnE 0 +#define MT_DEVICE_nGnRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 +#define MT_NORMAL_WT 5 + +#define DEFAULT_MAIR_EL1 \ + (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) #define MPIDR_HWID_BITMASK (0xff00fffffful) @@ -92,11 +105,19 @@ enum { #define ESR_EC_MASK (ESR_EC_NUM - 1) #define ESR_EC_SVC64 0x15 +#define ESR_EC_IABT 0x21 +#define ESR_EC_DABT 0x25 #define ESR_EC_HW_BP_CURRENT 0x31 #define ESR_EC_SSTEP_CURRENT 0x33 #define ESR_EC_WP_CURRENT 0x35 #define ESR_EC_BRK_INS 0x3c +/* Access flag */ +#define PTE_AF (1ULL << 10) + +/* Access flag update enable/disable */ +#define TCR_EL1_HA (1ULL << 39) + void aarch64_get_supported_page_sizes(uint32_t ipa, bool *ps4k, bool *ps16k, bool *ps64k); @@ -109,6 +130,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, handler_fn handler); +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva); + static inline void cpu_relax(void) { asm volatile("yield" ::: "memory"); diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index e42a09cd24a0..b0da75af1ff3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -34,6 +34,7 @@ struct userspace_mem_region { struct sparsebit *unused_phy_pages; int fd; off_t offset; + enum vm_mem_backing_src_type backing_src_type; void *host_mem; void *host_alias; void *mmap_start; @@ -64,6 +65,14 @@ struct userspace_mem_regions { DECLARE_HASHTABLE(slot_hash, 9); }; +enum kvm_mem_region_type { + MEM_REGION_CODE, + MEM_REGION_DATA, + MEM_REGION_PT, + MEM_REGION_TEST_DATA, + NR_MEM_REGIONS, +}; + struct kvm_vm { int mode; unsigned long type; @@ -92,6 +101,13 @@ struct kvm_vm { int stats_fd; struct kvm_stats_header stats_header; struct kvm_stats_desc *stats_desc; + + /* + * KVM region slots. These are the default memslots used by page + * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] + * memslot. + */ + uint32_t memslots[NR_MEM_REGIONS]; }; @@ -104,6 +120,13 @@ struct kvm_vm { struct userspace_mem_region * memslot2region(struct kvm_vm *vm, uint32_t memslot); +static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, + enum kvm_mem_region_type type) +{ + assert(type < NR_MEM_REGIONS); + return memslot2region(vm, vm->memslots[type]); +} + /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 @@ -384,7 +407,11 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type); vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, + enum kvm_mem_region_type type); vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -646,13 +673,13 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to * calculate the amount of memory needed for per-vCPU data, e.g. stacks. */ -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages); +struct kvm_vm *____vm_create(enum vm_guest_mode mode); struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages); static inline struct kvm_vm *vm_create_barebones(void) { - return ____vm_create(VM_MODE_DEFAULT, 0); + return ____vm_create(VM_MODE_DEFAULT); } static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h new file mode 100644 index 000000000000..877449c34592 --- /dev/null +++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KVM userfaultfd util + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2019-2022 Google LLC + */ + +#define _GNU_SOURCE /* for pipe2 */ + +#include <inttypes.h> +#include <time.h> +#include <pthread.h> +#include <linux/userfaultfd.h> + +#include "test_util.h" + +typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg); + +struct uffd_desc { + int uffd_mode; + int uffd; + int pipefds[2]; + useconds_t delay; + uffd_handler_t handler; + pthread_t thread; +}; + +struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, + void *hva, uint64_t len, + uffd_handler_t handler); + +void uffd_stop_demand_paging(struct uffd_desc *uffd); + +#ifdef PRINT_PER_PAGE_UPDATES +#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) +#else +#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__) +#endif + +#ifdef PRINT_PER_VCPU_UPDATES +#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__) +#else +#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) +#endif diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 7c96b931edd5..f79f2e37dc3b 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -77,13 +77,15 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - if (!vm->pgd_created) { - vm_paddr_t paddr = vm_phy_pages_alloc(vm, - page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); - vm->pgd = paddr; - vm->pgd_created = true; - } + size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size; + + if (vm->pgd_created) + return; + + vm->pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->pgd_created = true; } static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -134,12 +136,12 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { - uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */ + uint64_t attr_idx = MT_NORMAL; _virt_pg_map(vm, vaddr, paddr, attr_idx); } -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep; @@ -170,11 +172,18 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) TEST_FAIL("Page table levels must be 2, 3, or 4"); } - return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); + return ptep; unmapped_gva: TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); - exit(1); + exit(EXIT_FAILURE); +} + +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep = virt_get_pte_hva(vm, gva); + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); } static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) @@ -319,13 +328,16 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code) { - size_t stack_size = vm->page_size == 4096 ? - DEFAULT_STACK_PGS * vm->page_size : - vm->page_size; - uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_ARM64_GUEST_STACK_VADDR_MIN); + size_t stack_size; + uint64_t stack_vaddr; struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); + stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + aarch64_vcpu_setup(vcpu, init); vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); @@ -429,8 +441,8 @@ unexpected_exception: void vm_init_descriptor_tables(struct kvm_vm *vm) { - vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size); + vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers), + vm->page_size, MEM_REGION_DATA); *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 9f54c098d9d0..51f280c412ba 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -161,7 +161,8 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; - vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart); + vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart, + MEM_REGION_CODE); TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " "virtual memory for segment at requested min addr,\n" " segment idx: %u\n" diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index f1cb1627161f..5ad4acaec8e0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -185,13 +185,10 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); -struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) +struct kvm_vm *____vm_create(enum vm_guest_mode mode) { struct kvm_vm *vm; - pr_debug("%s: mode='%s' pages='%ld'\n", __func__, - vm_guest_mode_string(mode), nr_pages); - vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); @@ -287,9 +284,6 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) /* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); - if (nr_pages != 0) - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - 0, 0, nr_pages, 0); return vm; } @@ -335,8 +329,16 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, nr_extra_pages); struct kvm_vm *vm; + int i; + + pr_debug("%s: mode='%s' pages='%ld'\n", __func__, + vm_guest_mode_string(mode), nr_pages); - vm = ____vm_create(mode, nr_pages); + vm = ____vm_create(mode); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); + for (i = 0; i < NR_MEM_REGIONS; i++) + vm->memslots[i] = 0; kvm_vm_elf_load(vm, program_invocation_name); @@ -586,6 +588,12 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, sparsebit_free(®ion->unused_phy_pages); ret = munmap(region->mmap_start, region->mmap_size); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + if (region->fd >= 0) { + /* There's an extra map when using shared memory. */ + ret = munmap(region->mmap_alias, region->mmap_size); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + close(region->fd); + } free(region); } @@ -923,6 +931,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, vm_mem_backing_src_alias(src_type)->name); } + region->backing_src_type = src_type; region->unused_phy_pages = sparsebit_alloc(); sparsebit_set_num(region->unused_phy_pages, guest_paddr >> vm->page_shift, npages); @@ -1217,32 +1226,15 @@ va_found: return pgidx_start * vm->page_size; } -/* - * VM Virtual Address Allocate - * - * Input Args: - * vm - Virtual Machine - * sz - Size in bytes - * vaddr_min - Minimum starting virtual address - * - * Output Args: None - * - * Return: - * Starting guest virtual address - * - * Allocates at least sz bytes within the virtual address space of the vm - * given by vm. The allocated bytes are mapped to a virtual address >= - * the address given by vaddr_min. Note that each allocation uses a - * a unique set of pages, with the minimum real allocation being at least - * a page. - */ -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type) { uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm); vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, - KVM_UTIL_MIN_PFN * vm->page_size, 0); + KVM_UTIL_MIN_PFN * vm->page_size, + vm->memslots[type]); /* * Find an unused range of virtual page addresses of at least @@ -1264,6 +1256,30 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) } /* + * VM Virtual Address Allocate + * + * Input Args: + * vm - Virtual Machine + * sz - Size in bytes + * vaddr_min - Minimum starting virtual address + * + * Output Args: None + * + * Return: + * Starting guest virtual address + * + * Allocates at least sz bytes within the virtual address space of the vm + * given by vm. The allocated bytes are mapped to a virtual address >= + * the address given by vaddr_min. Note that each allocation uses a + * a unique set of pages, with the minimum real allocation being at least + * a page. The allocated physical space comes from the TEST_DATA memory region. + */ +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) +{ + return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA); +} + +/* * VM Virtual Address Allocate Pages * * Input Args: @@ -1282,6 +1298,11 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); } +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type) +{ + return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type); +} + /* * VM Virtual Address Allocate Page * @@ -1847,7 +1868,8 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) { - return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); } /* diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index 604478151212..d146ca71e0c0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -55,13 +55,15 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) void virt_arch_pgd_alloc(struct kvm_vm *vm) { - if (!vm->pgd_created) { - vm_paddr_t paddr = vm_phy_pages_alloc(vm, - page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); - vm->pgd = paddr; - vm->pgd_created = true; - } + size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size; + + if (vm->pgd_created) + return; + + vm->pgd = vm_phy_pages_alloc(vm, nr_pages, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); + vm->pgd_created = true; } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -279,15 +281,18 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, void *guest_code) { int r; - size_t stack_size = vm->page_size == 4096 ? - DEFAULT_STACK_PGS * vm->page_size : - vm->page_size; - unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_RISCV_GUEST_STACK_VADDR_MIN); + size_t stack_size; + unsigned long stack_vaddr; unsigned long current_gp = 0; struct kvm_mp_state mps; struct kvm_vcpu *vcpu; + stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_RISCV_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); + vcpu = __vm_vcpu_add(vm, vcpu_id); riscv_vcpu_mmu_setup(vcpu); diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 89d7340d9cbd..15945121daf1 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -21,7 +21,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) return; paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, - KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + KVM_GUEST_PAGE_TABLE_MIN_PADDR, + vm->memslots[MEM_REGION_PT]); memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); vm->pgd = paddr; @@ -167,8 +168,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", vm->page_size); - stack_vaddr = vm_vaddr_alloc(vm, stack_size, - DEFAULT_GUEST_STACK_VADDR_MIN); + stack_vaddr = __vm_vaddr_alloc(vm, stack_size, + DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c new file mode 100644 index 000000000000..3b44846fc277 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM userfaultfd util + * Adapted from demand_paging_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2019-2022 Google LLC + */ + +#define _GNU_SOURCE /* for pipe2 */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <poll.h> +#include <pthread.h> +#include <linux/userfaultfd.h> +#include <sys/syscall.h> + +#include "kvm_util.h" +#include "test_util.h" +#include "perf_test_util.h" +#include "userfaultfd_util.h" + +#ifdef __NR_userfaultfd + +static void *uffd_handler_thread_fn(void *arg) +{ + struct uffd_desc *uffd_desc = (struct uffd_desc *)arg; + int uffd = uffd_desc->uffd; + int pipefd = uffd_desc->pipefds[0]; + useconds_t delay = uffd_desc->delay; + int64_t pages = 0; + struct timespec start; + struct timespec ts_diff; + + clock_gettime(CLOCK_MONOTONIC, &start); + while (1) { + struct uffd_msg msg; + struct pollfd pollfd[2]; + char tmp_chr; + int r; + + pollfd[0].fd = uffd; + pollfd[0].events = POLLIN; + pollfd[1].fd = pipefd; + pollfd[1].events = POLLIN; + + r = poll(pollfd, 2, -1); + switch (r) { + case -1: + pr_info("poll err"); + continue; + case 0: + continue; + case 1: + break; + default: + pr_info("Polling uffd returned %d", r); + return NULL; + } + + if (pollfd[0].revents & POLLERR) { + pr_info("uffd revents has POLLERR"); + return NULL; + } + + if (pollfd[1].revents & POLLIN) { + r = read(pollfd[1].fd, &tmp_chr, 1); + TEST_ASSERT(r == 1, + "Error reading pipefd in UFFD thread\n"); + return NULL; + } + + if (!(pollfd[0].revents & POLLIN)) + continue; + + r = read(uffd, &msg, sizeof(msg)); + if (r == -1) { + if (errno == EAGAIN) + continue; + pr_info("Read of uffd got errno %d\n", errno); + return NULL; + } + + if (r != sizeof(msg)) { + pr_info("Read on uffd returned unexpected size: %d bytes", r); + return NULL; + } + + if (!(msg.event & UFFD_EVENT_PAGEFAULT)) + continue; + + if (delay) + usleep(delay); + r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg); + if (r < 0) + return NULL; + pages++; + } + + ts_diff = timespec_elapsed(start); + PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", + pages, ts_diff.tv_sec, ts_diff.tv_nsec, + pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + + return NULL; +} + +struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, + void *hva, uint64_t len, + uffd_handler_t handler) +{ + struct uffd_desc *uffd_desc; + bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); + int uffd; + struct uffdio_api uffdio_api; + struct uffdio_register uffdio_register; + uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; + int ret; + + PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", + is_minor ? "MINOR" : "MISSING", + is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + + uffd_desc = malloc(sizeof(struct uffd_desc)); + TEST_ASSERT(uffd_desc, "malloc failed"); + + /* In order to get minor faults, prefault via the alias. */ + if (is_minor) + expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, + "ioctl UFFDIO_API failed: %" PRIu64, + (uint64_t)uffdio_api.api); + + uffdio_register.range.start = (uint64_t)hva; + uffdio_register.range.len = len; + uffdio_register.mode = uffd_mode; + TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, + "ioctl UFFDIO_REGISTER failed"); + TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == + expected_ioctls, "missing userfaultfd ioctls"); + + ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(!ret, "Failed to set up pipefd"); + + uffd_desc->uffd_mode = uffd_mode; + uffd_desc->uffd = uffd; + uffd_desc->delay = delay; + uffd_desc->handler = handler; + pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn, + uffd_desc); + + PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", + hva, hva + len); + + return uffd_desc; +} + +void uffd_stop_demand_paging(struct uffd_desc *uffd) +{ + char c = 0; + int ret; + + ret = write(uffd->pipefds[1], &c, 1); + TEST_ASSERT(ret == 1, "Unable to write to pipefd"); + + ret = pthread_join(uffd->thread, NULL); + TEST_ASSERT(ret == 0, "Pthread_join failed."); + + close(uffd->uffd); + + close(uffd->pipefds[1]); + close(uffd->pipefds[0]); + + free(uffd); +} + +#endif /* __NR_userfaultfd */ diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 39c4409ef56a..b199dde90e9f 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -552,7 +552,7 @@ unmapped_gva: static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) { if (!vm->gdt) - vm->gdt = vm_vaddr_alloc_page(vm); + vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); dt->base = vm->gdt; dt->limit = getpagesize(); @@ -562,7 +562,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, int selector) { if (!vm->tss) - vm->tss = vm_vaddr_alloc_page(vm); + vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); memset(segp, 0, sizeof(*segp)); segp->base = vm->tss; @@ -647,8 +647,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, vm_vaddr_t stack_vaddr; struct kvm_vcpu *vcpu; - stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), - DEFAULT_GUEST_STACK_VADDR_MIN); + stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, + MEM_REGION_DATA); vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); @@ -1145,8 +1146,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm) extern void *idt_handlers; int i; - vm->idt = vm_vaddr_alloc_page(vm); - vm->handlers = vm_vaddr_alloc_page(vm); + vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); /* Handlers have the same address in both address spaces.*/ for (i = 0; i < NUM_INTERRUPTS; i++) set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, |