diff options
author | Marc Zyngier <maz@kernel.org> | 2023-08-28 11:29:02 +0300 |
---|---|---|
committer | Marc Zyngier <maz@kernel.org> | 2023-08-28 11:29:02 +0300 |
commit | d58335d10fd7617addb48b3c4f06c373c2f76529 (patch) | |
tree | 24cce346ac5c2808af3ff437e1f9f8e2f1cbd0e6 | |
parent | c1907626dddc0e1c9aaea6af6c2ae49c861177ce (diff) | |
parent | 7657ea920c54218f123ddc1b572821695b669c13 (diff) | |
download | linux-d58335d10fd7617addb48b3c4f06c373c2f76529.tar.xz |
Merge branch kvm-arm64/tlbi-range into kvmarm-master/next
* kvm-arm64/tlbi-range:
: .
: FEAT_TLBIRANGE support, courtesy of Raghavendra Rao Ananta.
: From the cover letter:
:
: "In certain code paths, KVM/ARM currently invalidates the entire VM's
: page-tables instead of just invalidating a necessary range. For example,
: when collapsing a table PTE to a block PTE, instead of iterating over
: each PTE and flushing them, KVM uses 'vmalls12e1is' TLBI operation to
: flush all the entries. This is inefficient since the guest would have
: to refill the TLBs again, even for the addresses that aren't covered
: by the table entry. The performance impact would scale poorly if many
: addresses in the VM is going through this remapping.
:
: For architectures that implement FEAT_TLBIRANGE, KVM can replace such
: inefficient paths by performing the invalidations only on the range of
: addresses that are in scope. This series tries to achieve the same in
: the areas of stage-2 map, unmap and write-protecting the pages."
: .
KVM: arm64: Use TLBI range-based instructions for unmap
KVM: arm64: Invalidate the table entries upon a range
KVM: arm64: Flush only the memslot after write-protect
KVM: arm64: Implement kvm_arch_flush_remote_tlbs_range()
KVM: arm64: Define kvm_tlb_flush_vmid_range()
KVM: arm64: Implement __kvm_tlb_flush_vmid_range()
arm64: tlb: Implement __flush_s2_tlb_range_op()
arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range
KVM: Move kvm_arch_flush_remote_tlbs_memslot() to common code
KVM: Allow range-based TLB invalidation from common code
KVM: Remove CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
KVM: arm64: Use kvm_arch_flush_remote_tlbs()
KVM: Declare kvm_arch_flush_remote_tlbs() globally
KVM: Rename kvm_arch_flush_remote_tlb() to kvm_arch_flush_remote_tlbs()
Signed-off-by: Marc Zyngier <maz@kernel.org>
-rw-r--r-- | arch/arm64/include/asm/kvm_asm.h | 3 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pgtable.h | 10 | ||||
-rw-r--r-- | arch/arm64/include/asm/tlbflush.h | 124 | ||||
-rw-r--r-- | arch/arm64/kvm/Kconfig | 1 | ||||
-rw-r--r-- | arch/arm64/kvm/arm.c | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-main.c | 11 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/tlb.c | 30 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/pgtable.c | 63 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/vhe/tlb.c | 28 | ||||
-rw-r--r-- | arch/arm64/kvm/mmu.c | 16 | ||||
-rw-r--r-- | arch/mips/include/asm/kvm_host.h | 3 | ||||
-rw-r--r-- | arch/mips/kvm/mips.c | 12 | ||||
-rw-r--r-- | arch/riscv/kvm/mmu.c | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 28 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu_internal.h | 3 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 2 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 24 | ||||
-rw-r--r-- | virt/kvm/Kconfig | 3 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 35 |
21 files changed, 286 insertions, 132 deletions
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 7d170aaa2db4..2c27cb8cf442 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -70,6 +70,7 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, @@ -229,6 +230,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, phys_addr_t ipa, int level); +extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t start, unsigned long pages); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 616dca499678..967ee7e8e408 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1120,6 +1120,10 @@ int __init kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS + +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE + static inline bool kvm_vm_is_protected(struct kvm *kvm) { return false; diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 929d355eae0a..d3e354bb8351 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -746,4 +746,14 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte); * kvm_pgtable_prot format. */ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte); + +/** + * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries + * + * @mmu: Stage-2 KVM MMU struct + * @addr: The base Intermediate physical address from which to invalidate + * @size: Size of the range from the base to invalidate + */ +void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t addr, size_t size); #endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 412a3b9a3c25..93f4b397f9a1 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -278,14 +278,77 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, */ #define MAX_TLBI_OPS PTRS_PER_PTE +/* + * __flush_tlb_range_op - Perform TLBI operation upon a range + * + * @op: TLBI instruction that operates on a range (has 'r' prefix) + * @start: The start address of the range + * @pages: Range as the number of pages from 'start' + * @stride: Flush granularity + * @asid: The ASID of the task (0 for IPA instructions) + * @tlb_level: Translation Table level hint, if known + * @tlbi_user: If 'true', call an additional __tlbi_user() + * (typically for user ASIDs). 'flase' for IPA instructions + * + * When the CPU does not support TLB range operations, flush the TLB + * entries one by one at the granularity of 'stride'. If the TLB + * range ops are supported, then: + * + * 1. If 'pages' is odd, flush the first page through non-range + * operations; + * + * 2. For remaining pages: the minimum range granularity is decided + * by 'scale', so multiple range TLBI operations may be required. + * Start from scale = 0, flush the corresponding number of pages + * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it + * until no pages left. + * + * Note that certain ranges can be represented by either num = 31 and + * scale or num = 0 and scale + 1. The loop below favours the latter + * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. + */ +#define __flush_tlb_range_op(op, start, pages, stride, \ + asid, tlb_level, tlbi_user) \ +do { \ + int num = 0; \ + int scale = 0; \ + unsigned long addr; \ + \ + while (pages > 0) { \ + if (!system_supports_tlb_range() || \ + pages % 2 == 1) { \ + addr = __TLBI_VADDR(start, asid); \ + __tlbi_level(op, addr, tlb_level); \ + if (tlbi_user) \ + __tlbi_user_level(op, addr, tlb_level); \ + start += stride; \ + pages -= stride >> PAGE_SHIFT; \ + continue; \ + } \ + \ + num = __TLBI_RANGE_NUM(pages, scale); \ + if (num >= 0) { \ + addr = __TLBI_VADDR_RANGE(start, asid, scale, \ + num, tlb_level); \ + __tlbi(r##op, addr); \ + if (tlbi_user) \ + __tlbi_user(r##op, addr); \ + start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ + pages -= __TLBI_RANGE_PAGES(num, scale); \ + } \ + scale++; \ + } \ +} while (0) + +#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \ + __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false) + static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long stride, bool last_level, int tlb_level) { - int num = 0; - int scale = 0; - unsigned long asid, addr, pages; + unsigned long asid, pages; start = round_down(start, stride); end = round_up(end, stride); @@ -307,56 +370,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ishst); asid = ASID(vma->vm_mm); - /* - * When the CPU does not support TLB range operations, flush the TLB - * entries one by one at the granularity of 'stride'. If the TLB - * range ops are supported, then: - * - * 1. If 'pages' is odd, flush the first page through non-range - * operations; - * - * 2. For remaining pages: the minimum range granularity is decided - * by 'scale', so multiple range TLBI operations may be required. - * Start from scale = 0, flush the corresponding number of pages - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it - * until no pages left. - * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. - */ - while (pages > 0) { - if (!system_supports_tlb_range() || - pages % 2 == 1) { - addr = __TLBI_VADDR(start, asid); - if (last_level) { - __tlbi_level(vale1is, addr, tlb_level); - __tlbi_user_level(vale1is, addr, tlb_level); - } else { - __tlbi_level(vae1is, addr, tlb_level); - __tlbi_user_level(vae1is, addr, tlb_level); - } - start += stride; - pages -= stride >> PAGE_SHIFT; - continue; - } - - num = __TLBI_RANGE_NUM(pages, scale); - if (num >= 0) { - addr = __TLBI_VADDR_RANGE(start, asid, scale, - num, tlb_level); - if (last_level) { - __tlbi(rvale1is, addr); - __tlbi_user(rvale1is, addr); - } else { - __tlbi(rvae1is, addr); - __tlbi_user(rvae1is, addr); - } - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; - pages -= __TLBI_RANGE_PAGES(num, scale); - } - scale++; - } + if (last_level) + __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); + else + __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); + dsb(ish); } diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index f4f0b19d755b..83c1e09be42e 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -25,7 +25,6 @@ menuconfig KVM select MMU_NOTIFIER select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT - select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_XFER_TO_GUEST_WORK diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 183786fc553c..1cad7368aedf 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1534,12 +1534,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - kvm_flush_remote_tlbs(kvm); -} - static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr) { diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a169c619db60..857d9bc04fd4 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx __kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level); } +static void +handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + DECLARE_REG(phys_addr_t, start, host_ctxt, 2); + DECLARE_REG(unsigned long, pages, host_ctxt, 3); + + __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages); +} + static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); @@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh), HANDLE_FUNC(__kvm_tlb_flush_vmid), + HANDLE_FUNC(__kvm_tlb_flush_vmid_range), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__vgic_v3_read_vmcr), diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index b9991bbd8e3f..1b265713d6be 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, __tlb_switch_to_host(&cxt); } +void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t start, unsigned long pages) +{ + struct tlb_inv_context cxt; + unsigned long stride; + + /* + * Since the range of addresses may not be mapped at + * the same level, assume the worst case as PAGE_SIZE + */ + stride = PAGE_SIZE; + start = round_down(start, stride); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt, false); + + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + /* See the comment in __kvm_tlb_flush_vmid_ipa() */ + if (icache_is_vpipt()) + icache_inval_all_pou(); + + __tlb_switch_to_host(&cxt); +} + void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f7a93ef29250..f155b8c9e98c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt) return !(pgt->flags & KVM_PGTABLE_S2_NOFWB); } +void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t addr, size_t size) +{ + unsigned long pages, inval_pages; + + if (!system_supports_tlb_range()) { + kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); + return; + } + + pages = size >> PAGE_SHIFT; + while (pages > 0) { + inval_pages = min(pages, MAX_TLBI_RANGE_PAGES); + kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages); + + addr += inval_pages << PAGE_SHIFT; + pages -= inval_pages; + } +} + #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, @@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, * evicted pte value (if any). */ if (kvm_pte_table(ctx->old, ctx->level)) - kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); + kvm_tlb_flush_vmid_range(mmu, ctx->addr, + kvm_granule_size(ctx->level)); else if (kvm_pte_valid(ctx->old)) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); @@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n smp_store_release(ctx->ptep, new); } -static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, - struct kvm_pgtable_mm_ops *mm_ops) +static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt) +{ + /* + * If FEAT_TLBIRANGE is implemented, defer the individual + * TLB invalidations until the entire walk is finished, and + * then use the range-based TLBI instructions to do the + * invalidations. Condition deferred TLB invalidation on the + * system supporting FWB as the optimization is entirely + * pointless when the unmap walker needs to perform CMOs. + */ + return system_supports_tlb_range() && stage2_has_fwb(pgt); +} + +static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, + struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops) { + struct kvm_pgtable *pgt = ctx->arg; + /* - * Clear the existing PTE, and perform break-before-make with - * TLB maintenance if it was valid. + * Clear the existing PTE, and perform break-before-make if it was + * valid. Depending on the system support, defer the TLB maintenance + * for the same until the entire unmap walk is completed. */ if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + + if (!stage2_unmap_defer_tlb_flush(pgt)) + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, + ctx->addr, ctx->level); } mm_ops->put_page(ctx->ptep); @@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, * block entry and rely on the remaining portions being faulted * back lazily. */ - stage2_put_pte(ctx, mmu, mm_ops); + stage2_unmap_put_pte(ctx, mmu, mm_ops); if (need_flush && mm_ops->dcache_clean_inval_poc) mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), @@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) { + int ret; struct kvm_pgtable_walker walker = { .cb = stage2_unmap_walker, .arg = pgt, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; - return kvm_pgtable_walk(pgt, addr, size, &walker); + ret = kvm_pgtable_walk(pgt, addr, size, &walker); + if (stage2_unmap_defer_tlb_flush(pgt)) + /* Perform the deferred TLB invalidations */ + kvm_tlb_flush_vmid_range(pgt->mmu, addr, size); + + return ret; } struct stage2_attr_data { diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index e69da550cdc5..46bd43f61d76 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, __tlb_switch_to_host(&cxt); } +void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, + phys_addr_t start, unsigned long pages) +{ + struct tlb_inv_context cxt; + unsigned long stride; + + /* + * Since the range of addresses may not be mapped at + * the same level, assume the worst case as PAGE_SIZE + */ + stride = PAGE_SIZE; + start = round_down(start, stride); + + dsb(ishst); + + /* Switch to requested VMID */ + __tlb_switch_to_guest(mmu, &cxt); + + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + + dsb(ish); + __tlbi(vmalle1is); + dsb(ish); + isb(); + + __tlb_switch_to_host(&cxt); +} + void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index d3b4feed460c..b16aff3f65f6 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot) } /** - * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8 + * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8 * @kvm: pointer to kvm structure. * * Interface to HYP function to flush all VM TLB entries */ -void kvm_flush_remote_tlbs(struct kvm *kvm) +int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { - ++kvm->stat.generic.remote_tlb_flush_requests; kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu); + return 0; +} + +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, + gfn_t gfn, u64 nr_pages) +{ + kvm_tlb_flush_vmid_range(&kvm->arch.mmu, + gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT); + return 0; } static bool kvm_is_device_pfn(unsigned long pfn) @@ -1075,7 +1083,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) write_lock(&kvm->mmu_lock); stage2_wp_range(&kvm->arch.mmu, start, end); write_unlock(&kvm->mmu_lock); - kvm_flush_remote_tlbs(kvm); + kvm_flush_remote_tlbs_memslot(kvm, memslot); } /** diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 04cedf9f8811..54a85f1d4f2c 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -896,7 +896,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} -#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -int kvm_arch_flush_remote_tlb(struct kvm *kvm); +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index aa5583a7b05b..231ac052b506 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -199,7 +199,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, /* Flush slot from GPA */ kvm_mips_flush_gpa_pt(kvm, slot->base_gfn, slot->base_gfn + slot->npages - 1); - kvm_arch_flush_remote_tlbs_memslot(kvm, slot); + kvm_flush_remote_tlbs_memslot(kvm, slot); spin_unlock(&kvm->mmu_lock); } @@ -235,7 +235,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn, new->base_gfn + new->npages - 1); if (needs_flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, new); + kvm_flush_remote_tlbs_memslot(kvm, new); spin_unlock(&kvm->mmu_lock); } } @@ -981,18 +981,12 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) } -int kvm_arch_flush_remote_tlb(struct kvm *kvm) +int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { kvm_mips_callbacks->prepare_flush_shadow(kvm); return 1; } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - kvm_flush_remote_tlbs(kvm); -} - int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { int r; diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index f2eb47925806..97e129620686 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -406,12 +406,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - kvm_flush_remote_tlbs(kvm); -} - void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free) { } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 28bd38303d70..b547d17c58f6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1794,8 +1794,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void) #define __KVM_HAVE_ARCH_VM_FREE void kvm_arch_free_vm(struct kvm *kvm); -#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { if (kvm_x86_ops.flush_remote_tlbs && !static_call(kvm_x86_flush_remote_tlbs)(kvm)) @@ -1804,6 +1804,8 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) return -ENOTSUPP; } +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE + #define kvm_arch_pmi_in_guest(vcpu) \ ((vcpu) && (vcpu)->arch.handling_intr_from_guest) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index ec169f5c7dce..dbf3c6c2316c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -278,16 +278,12 @@ static inline bool kvm_available_flush_remote_tlbs_range(void) return kvm_x86_ops.flush_remote_tlbs_range; } -void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, - gfn_t nr_pages) +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) { - int ret = -EOPNOTSUPP; + if (!kvm_x86_ops.flush_remote_tlbs_range) + return -EOPNOTSUPP; - if (kvm_x86_ops.flush_remote_tlbs_range) - ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn, - nr_pages); - if (ret) - kvm_flush_remote_tlbs(kvm); + return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages); } static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index); @@ -6670,7 +6666,7 @@ static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm, */ if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true)) - kvm_arch_flush_remote_tlbs_memslot(kvm, slot); + kvm_flush_remote_tlbs_memslot(kvm, slot); } void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, @@ -6689,20 +6685,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, } } -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot) -{ - /* - * All current use cases for flushing the TLBs for a specific memslot - * related to dirty logging, and many do the TLB flush out of mmu_lock. - * The interaction between the various operations on memslot must be - * serialized by slots_locks to ensure the TLB flush from one operation - * is observed by any other operation on the same memslot. - */ - lockdep_assert_held(&kvm->slots_lock); - kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages); -} - void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot) { diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index d39af5639ce9..86cb83bb3480 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -170,9 +170,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn, int min_level); -void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn, - gfn_t nr_pages); - /* Flush the given page (huge or not) of guest memory. */ static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a6b9bea62fb8..faeb2e307b36 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12751,7 +12751,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, * See is_writable_pte() for more details (the case involving * access-tracked SPTEs is particularly relevant). */ - kvm_arch_flush_remote_tlbs_memslot(kvm, new); + kvm_flush_remote_tlbs_memslot(kvm, new); } } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d3ac7720da9..394db2ce11e2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1359,6 +1359,9 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); +void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); +void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); @@ -1387,10 +1390,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, unsigned long mask); void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); -#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, - const struct kvm_memory_slot *memslot); -#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ +#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty, struct kvm_memory_slot **memslot); @@ -1479,11 +1479,23 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif -#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { return -ENOTSUPP; } +#else +int kvm_arch_flush_remote_tlbs(struct kvm *kvm); +#endif + +#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE +static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, + gfn_t gfn, u64 nr_pages) +{ + return -EOPNOTSUPP; +} +#else +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); #endif #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index b74916de5183..484d0873061c 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -62,9 +62,6 @@ config HAVE_KVM_CPU_RELAX_INTERCEPT config KVM_VFIO bool -config HAVE_KVM_ARCH_TLB_FLUSH_ALL - bool - config HAVE_KVM_INVALID_WAKEUPS bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dfbaafbe3a00..5d4d2e051aa0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -345,7 +345,6 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) } EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request); -#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL void kvm_flush_remote_tlbs(struct kvm *kvm) { ++kvm->stat.generic.remote_tlb_flush_requests; @@ -361,12 +360,38 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) * kvm_make_all_cpus_request() reads vcpu->mode. We reuse that * barrier here. */ - if (!kvm_arch_flush_remote_tlb(kvm) + if (!kvm_arch_flush_remote_tlbs(kvm) || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH)) ++kvm->stat.generic.remote_tlb_flush; } EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); -#endif + +void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) +{ + if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages)) + return; + + /* + * Fall back to a flushing entire TLBs if the architecture range-based + * TLB invalidation is unsupported or can't be performed for whatever + * reason. + */ + kvm_flush_remote_tlbs(kvm); +} + +void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + /* + * All current use cases for flushing the TLBs for a specific memslot + * are related to dirty logging, and many do the TLB flush out of + * mmu_lock. The interaction between the various operations on memslot + * must be serialized by slots_locks to ensure the TLB flush from one + * operation is observed by any other operation on the same memslot. + */ + lockdep_assert_held(&kvm->slots_lock); + kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages); +} static void kvm_flush_shadow_all(struct kvm *kvm) { @@ -2180,7 +2205,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) } if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); + kvm_flush_remote_tlbs_memslot(kvm, memslot); if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) return -EFAULT; @@ -2297,7 +2322,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, KVM_MMU_UNLOCK(kvm); if (flush) - kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); + kvm_flush_remote_tlbs_memslot(kvm, memslot); return 0; } |