diff options
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/kvm_util_arch.h | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/include/x86/processor.h | 16 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/processor.c | 21 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86/vmx.c | 119 |
4 files changed, 52 insertions, 108 deletions
diff --git a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h index 05a1fc1780f2..1cf84b8212c6 100644 --- a/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h @@ -14,6 +14,8 @@ struct pte_masks { uint64_t present; uint64_t writable; uint64_t user; + uint64_t readable; + uint64_t executable; uint64_t accessed; uint64_t dirty; uint64_t huge; @@ -37,8 +39,6 @@ struct kvm_vm_arch { uint64_t s_bit; int sev_fd; bool is_pt_protected; - - struct kvm_mmu *tdp_mmu; }; static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 0164ef090787..e17cbbe71b8f 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1444,6 +1444,8 @@ enum pg_level { #define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present) #define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable) #define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user) +#define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable) +#define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable) #define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed) #define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty) #define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge) @@ -1451,13 +1453,23 @@ enum pg_level { #define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c) #define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s) -#define is_present_pte(mmu, pte) (!!(*(pte) & PTE_PRESENT_MASK(mmu))) +/* + * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present + * if it's executable or readable, as EPT supports execute-only PTEs, but not + * write-only PTEs. + */ +#define is_present_pte(mmu, pte) \ + (PTE_PRESENT_MASK(mmu) ? \ + !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \ + !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu)))) +#define is_executable_pte(mmu, pte) \ + ((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu)) #define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu))) #define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu))) #define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu))) #define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu))) #define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu))) -#define is_nx_pte(mmu, pte) (!!(*(pte) & PTE_NX_MASK(mmu))) +#define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte)) void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, struct pte_masks *pte_masks); diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 8a9298a72897..41316cac94e0 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -165,6 +165,10 @@ static void virt_mmu_init(struct kvm_vm *vm, struct kvm_mmu *mmu, mmu->pgd_created = true; mmu->arch.pte_masks = *pte_masks; } + + TEST_ASSERT(mmu->pgtable_levels == 4 || mmu->pgtable_levels == 5, + "Selftests MMU only supports 4-level and 5-level paging, not %u-level paging", + mmu->pgtable_levels); } void virt_arch_pgd_alloc(struct kvm_vm *vm) @@ -180,6 +184,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) .dirty = BIT_ULL(6), .huge = BIT_ULL(7), .nx = BIT_ULL(63), + .executable = 0, .c = vm->arch.c_bit, .s = vm->arch.s_bit, }; @@ -190,10 +195,10 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels, struct pte_masks *pte_masks) { - TEST_ASSERT(!vm->arch.tdp_mmu, "TDP MMU already initialized"); + TEST_ASSERT(!vm->stage2_mmu.pgtable_levels, "TDP MMU already initialized"); - vm->arch.tdp_mmu = calloc(1, sizeof(*vm->arch.tdp_mmu)); - virt_mmu_init(vm, vm->arch.tdp_mmu, pte_masks); + vm->stage2_mmu.pgtable_levels = pgtable_levels; + virt_mmu_init(vm, &vm->stage2_mmu, pte_masks); } static void *virt_get_pte(struct kvm_vm *vm, struct kvm_mmu *mmu, @@ -223,7 +228,8 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, paddr = vm_untag_gpa(vm, paddr); if (!is_present_pte(mmu, pte)) { - *pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu); + *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | + PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu); if (current_level == target_level) *pte |= PTE_HUGE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK); else @@ -269,6 +275,9 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr, TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr, "Unexpected bits in paddr: %lx", paddr); + TEST_ASSERT(!PTE_EXECUTABLE_MASK(mmu) || !PTE_NX_MASK(mmu), + "X and NX bit masks cannot be used simultaneously"); + /* * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. @@ -286,7 +295,9 @@ void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr, pte = virt_get_pte(vm, mmu, pte, vaddr, PG_LEVEL_4K); TEST_ASSERT(!is_present_pte(mmu, pte), "PTE already present for 4k page at vaddr: 0x%lx", vaddr); - *pte = PTE_PRESENT_MASK(mmu) | PTE_WRITABLE_MASK(mmu) | (paddr & PHYSICAL_PAGE_MASK); + *pte = PTE_PRESENT_MASK(mmu) | PTE_READABLE_MASK(mmu) | + PTE_WRITABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu) | + (paddr & PHYSICAL_PAGE_MASK); /* * Neither SEV nor TDX supports shared page tables, so only the final diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index ea1c09f9e8ab..e3737b3d9120 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -25,21 +25,6 @@ bool enable_evmcs; struct hv_enlightened_vmcs *current_evmcs; struct hv_vp_assist_page *current_vp_assist; -struct eptPageTableEntry { - uint64_t readable:1; - uint64_t writable:1; - uint64_t executable:1; - uint64_t memory_type:3; - uint64_t ignore_pat:1; - uint64_t page_size:1; - uint64_t accessed:1; - uint64_t dirty:1; - uint64_t ignored_11_10:2; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t suppress_ve:1; -}; - int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) { uint16_t evmcs_ver; @@ -58,13 +43,24 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) void vm_enable_ept(struct kvm_vm *vm) { - TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); - if (vm->arch.tdp_mmu) - return; + struct pte_masks pte_masks; - /* TODO: Drop eptPageTableEntry in favor of PTE masks. */ - struct pte_masks pte_masks = (struct pte_masks) { + TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); + /* + * EPTs do not have 'present' or 'user' bits, instead bit 0 is the + * 'readable' bit. + */ + pte_masks = (struct pte_masks) { + .present = 0, + .user = 0, + .readable = BIT_ULL(0), + .writable = BIT_ULL(1), + .executable = BIT_ULL(2), + .huge = BIT_ULL(7), + .accessed = BIT_ULL(8), + .dirty = BIT_ULL(9), + .nx = 0, }; /* TODO: Add support for 5-level EPT. */ @@ -120,8 +116,8 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); - if (vm->arch.tdp_mmu) - vmx->eptp_gpa = vm->arch.tdp_mmu->pgd; + if (vm->stage2_mmu.pgd_created) + vmx->eptp_gpa = vm->stage2_mmu.pgd; *p_vmx_gva = vmx_gva; return vmx; @@ -377,82 +373,6 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_guest_state(guest_rip, guest_rsp); } -static void tdp_create_pte(struct kvm_vm *vm, - struct eptPageTableEntry *pte, - uint64_t nested_paddr, - uint64_t paddr, - int current_level, - int target_level) -{ - if (!pte->readable) { - pte->writable = true; - pte->readable = true; - pte->executable = true; - pte->page_size = (current_level == target_level); - if (pte->page_size) - pte->address = paddr >> vm->page_shift; - else - pte->address = vm_alloc_page_table(vm) >> vm->page_shift; - } else { - /* - * Entry already present. Assert that the caller doesn't want - * a hugepage at this level, and that there isn't a hugepage at - * this level. - */ - TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - TEST_ASSERT(!pte->page_size, - "Cannot create page table at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - } -} - - -void __tdp_pg_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, - int target_level) -{ - const uint64_t page_size = PG_LEVEL_SIZE(target_level); - void *eptp_hva = addr_gpa2hva(vm, vm->arch.tdp_mmu->pgd); - struct eptPageTableEntry *pt = eptp_hva, *pte; - uint16_t index; - - TEST_ASSERT(vm->mode == VM_MODE_PXXVYY_4K, - "Unknown or unsupported guest mode: 0x%x", vm->mode); - - TEST_ASSERT((nested_paddr >> 48) == 0, - "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT", - nested_paddr); - TEST_ASSERT((nested_paddr % page_size) == 0, - "Nested physical address not on page boundary,\n" - " nested_paddr: 0x%lx page_size: 0x%lx", - nested_paddr, page_size); - TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT((paddr % page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx page_size: 0x%lx", - paddr, page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - - for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { - index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - pte = &pt[index]; - - tdp_create_pte(vm, pte, nested_paddr, paddr, level, target_level); - - if (pte->page_size) - break; - - pt = addr_gpa2hva(vm, pte->address * vm->page_size); - } -} - /* * Map a range of EPT guest physical addresses to the VM's physical address * @@ -473,6 +393,7 @@ void __tdp_pg_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size, int level) { + struct kvm_mmu *mmu = &vm->stage2_mmu; size_t page_size = PG_LEVEL_SIZE(level); size_t npages = size / page_size; @@ -480,7 +401,7 @@ void __tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); while (npages--) { - __tdp_pg_map(vm, nested_paddr, paddr, level); + __virt_pg_map(vm, mmu, nested_paddr, paddr, level); nested_paddr += page_size; paddr += page_size; } |
