From 5b22bbe717d9b96867783cde380c43f6cc28aafd Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 11 Mar 2022 15:03:41 +0800 Subject: KVM: X86: Change the type of access u32 to u64 Change the type of access u32 to u64 for FNAME(walk_addr) and ->gva_to_gpa(). The kinds of accesses are usually combinations of UWX, and VMX/SVM's nested paging adds a new factor of access: is it an access for a guest page table or for a final guest physical address. And SMAP relies a factor for supervisor access: explicit or implicit. So @access in FNAME(walk_addr) and ->gva_to_gpa() is better to include all these information to do the walk. Although @access(u32) has enough bits to encode all the kinds, this patch extends it to u64: o Extra bits will be in the higher 32 bits, so that we can easily obtain the traditional access mode (UWX) by converting it to u32. o Reuse the value for the access kind defined by SVM's nested paging (PFERR_GUEST_FINAL_MASK and PFERR_GUEST_PAGE_MASK) as @error_code in kvm_handle_page_fault(). Signed-off-by: Lai Jiangshan Message-Id: <20220311070346.45023-2-jiangshanlai@gmail.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/paging_tmpl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/mmu/paging_tmpl.h') diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 252c77805eb9..8621188b46df 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -339,7 +339,7 @@ static inline bool FNAME(is_last_gpte)(struct kvm_mmu *mmu, */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gpa_t addr, u32 access) + gpa_t addr, u64 access) { int ret; pt_element_t pte; @@ -347,7 +347,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, gfn_t table_gfn; u64 pt_access, pte_access; unsigned index, accessed_dirty, pte_pkey; - unsigned nested_access; + u64 nested_access; gpa_t pte_gpa; bool have_ad; int offset; @@ -540,7 +540,7 @@ error: } static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gpa_t addr, u32 access) + struct kvm_vcpu *vcpu, gpa_t addr, u64 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, access); @@ -988,7 +988,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) /* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gpa_t addr, u32 access, + gpa_t addr, u64 access, struct x86_exception *exception) { struct guest_walker walker; -- cgit v1.2.3 From 2a8859f373b0a86f0ece8ec8312607eacf12485d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Mar 2022 12:56:24 -0400 Subject: KVM: x86/mmu: do compare-and-exchange of gPTE via the user address FNAME(cmpxchg_gpte) is an inefficient mess. It is at least decent if it can go through get_user_pages_fast(), but if it cannot then it tries to use memremap(); that is not just terribly slow, it is also wrong because it assumes that the VM_PFNMAP VMA is contiguous. The right way to do it would be to do the same thing as hva_to_pfn_remapped() does since commit add6a0cd1c5b ("KVM: MMU: try to fix up page faults before giving up", 2016-07-05), using follow_pte() and fixup_user_fault() to determine the correct address to use for memremap(). To do this, one could for example extract hva_to_pfn() for use outside virt/kvm/kvm_main.c. But really there is no reason to do that either, because there is already a perfectly valid address to do the cmpxchg() on, only it is a userspace address. That means doing user_access_begin()/user_access_end() and writing the code in assembly to handle exceptions correctly. Worse, the guest PTE can be 8-byte even on i686 so there is the extra complication of using cmpxchg8b to account for. But at least it is an efficient mess. (Thanks to Linus for suggesting improvement on the inline assembly). Reported-by: Qiuhao Li Reported-by: Gaoning Pan Reported-by: Yongkang Jia Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com Debugged-by: Tadeusz Struk Tested-by: Maxim Levitsky Cc: stable@vger.kernel.org Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/paging_tmpl.h | 74 +++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 40 deletions(-) (limited to 'arch/x86/kvm/mmu/paging_tmpl.h') diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 8621188b46df..01fee5f67ac3 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -34,9 +34,8 @@ #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgq" #else - #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 2 #endif #elif PTTYPE == 32 @@ -52,7 +51,7 @@ #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgl" #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 #define guest_walker guest_walkerEPT @@ -65,7 +64,9 @@ #define PT_GUEST_DIRTY_SHIFT 9 #define PT_GUEST_ACCESSED_SHIFT 8 #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) - #define CMPXCHG cmpxchg64 + #ifdef CONFIG_X86_64 + #define CMPXCHG "cmpxchgq" + #endif #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #else #error Invalid PTTYPE value @@ -147,43 +148,36 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t __user *ptep_user, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { - int npages; - pt_element_t ret; - pt_element_t *table; - struct page *page; - - npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); - if (likely(npages == 1)) { - table = kmap_atomic(page); - ret = CMPXCHG(&table[index], orig_pte, new_pte); - kunmap_atomic(table); - - kvm_release_page_dirty(page); - } else { - struct vm_area_struct *vma; - unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK; - unsigned long pfn; - unsigned long paddr; - - mmap_read_lock(current->mm); - vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); - if (!vma || !(vma->vm_flags & VM_PFNMAP)) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - paddr = pfn << PAGE_SHIFT; - table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); - if (!table) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - ret = CMPXCHG(&table[index], orig_pte, new_pte); - memunmap(table); - mmap_read_unlock(current->mm); - } + signed char r; - return (ret != orig_pte); + if (!user_access_begin(ptep_user, sizeof(pt_element_t))) + return -EFAULT; + +#ifdef CMPXCHG + asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) + : [ptr] "+m" (*ptep_user), + [old] "+a" (orig_pte), + [r] "=q" (r) + : [new] "r" (new_pte) + : "memory"); +#else + asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) + : [ptr] "+m" (*ptep_user), + [old] "+A" (orig_pte), + [r] "=q" (r) + : [new_lo] "b" ((u32)new_pte), + [new_hi] "c" ((u32)(new_pte >> 32)) + : "memory"); +#endif + + user_access_end(); + return r; } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, -- cgit v1.2.3