From 190cc5370a8b6eb2894f1e958058b0c6589c582e Mon Sep 17 00:00:00 2001 From: Takahiro Itazuri Date: Mon, 20 Apr 2026 15:46:04 +0000 Subject: KVM: Rename invalidate_begin to invalidate_start for consistency Rename kvm_mmu_invalidate_begin() to kvm_mmu_invalidate_start() to align with mmu_notifier_ops.invalidate_range_start(), which is the callback that ultimately drives KVM's MMU invalidation. While the naming within KVM itself is a close split between "_begin" and "_start": $ git grep -E "invalidate(_range)?_begin" **/kvm* | wc -l 12 $ git grep -E "invalidate(_range)?_start" **/kvm* | wc -l 21 All two of the begin() uses are in KVM: $ git grep -E "invalidate(_range)?_begin" * | wc -l 14 And those two holdouts are bugs in invalidate_range_start()'s comment, i.e. will also be fixed sooner or later[*]. On the other hand, use of _start() is pervasive throughout the kernel: $ git grep -E "invalidate(_range)?_start" * | wc -l 117 Even if that weren't the case, conforming to the mmu_notifier_ops naming is the right call since invalidate_range_start() is the external API that KVM hooks into. No functional change intended. Link: https://lore.kernel.org/all/20260513163546.1176742-1-seanjc@google.com [*] Signed-off-by: Takahiro Itazuri Link: https://patch.msgid.link/20260420154720.29012-4-itazur@amazon.com [sean: massage changelog to provide more (accurate) numbers] Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4c14aee1fb06..7b231a1e63ba 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1562,7 +1562,7 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif -void kvm_mmu_invalidate_begin(struct kvm *kvm); +void kvm_mmu_invalidate_start(struct kvm *kvm); void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end); void kvm_mmu_invalidate_end(struct kvm *kvm); bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); -- cgit v1.2.3 From 4b28f0846ef6521b47ee95ea7d77c9d40a7baf29 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Thu, 16 Apr 2026 16:23:24 -0700 Subject: crypto/ccp: export firmware supported vm types In some configurations, the firmware does not support all VM types. The SEV firmware has an entry in the TCB_VERSION structure referred to as the Security Version Number in the SEV-SNP firmware specification and referred to as the "SPL" in SEV firmware release notes. The SEV firmware release notes say: On every SEV firmware release where a security mitigation has been added, the SNP SPL gets increased by 1. This is to let users know that it is important to update to this version. The SEV firmware release that fixed CVE-2025-48514 by disabling SEV-ES support on vulnerable platforms has this SVN increased to reflect the fix. The SVN is platform-specific, as is the structure of TCB_VERSION. Check CURRENT_TCB instead of REPORTED_TCB, since the firmware behaves with the CURRENT_TCB SVN level and will reject SEV-ES VMs accordingly. Parse the SVN, and mask off the SEV_ES supported VM type from the list of supported types if it is above the per-platform threshold for the relevant platforms. Signed-off-by: Tycho Andersen (AMD) Acked-by: Herbert Xu Reviewed-by: Tom Lendacky Tested-by: Tycho Andersen (AMD) Link: https://patch.msgid.link/20260416232329.3408497-3-seanjc@google.com Signed-off-by: Sean Christopherson --- drivers/crypto/ccp/sev-dev.c | 70 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/psp-sev.h | 37 +++++++++++++++++++++++ 2 files changed, 107 insertions(+) (limited to 'include/linux') diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 22bc4ef27a63..7cd6cd6fdb10 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -2954,3 +2954,73 @@ void sev_pci_exit(void) sev_firmware_shutdown(sev); } + +static int get_v1_svn(struct sev_device *sev) +{ + struct sev_snp_tcb_version_genoa_milan *tcb; + struct sev_user_data_snp_status status; + int ret, error = 0; + + mutex_lock(&sev_cmd_mutex); + ret = __sev_do_snp_platform_status(&status, &error); + mutex_unlock(&sev_cmd_mutex); + if (ret < 0) + return ret; + + tcb = (struct sev_snp_tcb_version_genoa_milan *)&status + .current_tcb_version; + return tcb->snp; +} + +static int get_v2_svn(struct sev_device *sev) +{ + struct sev_user_data_snp_status status; + struct sev_snp_tcb_version_turin *tcb; + int ret, error = 0; + + mutex_lock(&sev_cmd_mutex); + ret = __sev_do_snp_platform_status(&status, &error); + mutex_unlock(&sev_cmd_mutex); + if (ret < 0) + return ret; + + tcb = (struct sev_snp_tcb_version_turin *)&status + .current_tcb_version; + return tcb->snp; +} + +static bool sev_firmware_allows_es(struct sev_device *sev) +{ + /* Documented in AMD-SB-3023 */ + if (boot_cpu_has(X86_FEATURE_ZEN4) || boot_cpu_has(X86_FEATURE_ZEN3)) + return get_v1_svn(sev) < 0x1b; + else if (boot_cpu_has(X86_FEATURE_ZEN5)) + return get_v2_svn(sev) < 0x4; + else + return true; +} + +int sev_firmware_supported_vm_types(void) +{ + int supported_vm_types = 0; + struct sev_device *sev; + + if (!psp_master || !psp_master->sev_data) + return supported_vm_types; + sev = psp_master->sev_data; + + supported_vm_types |= BIT(KVM_X86_SEV_VM); + supported_vm_types |= BIT(KVM_X86_SEV_ES_VM); + + if (!sev->snp_initialized) + return supported_vm_types; + + supported_vm_types |= BIT(KVM_X86_SNP_VM); + + if (!sev_firmware_allows_es(sev)) + supported_vm_types &= ~BIT(KVM_X86_SEV_ES_VM); + + return supported_vm_types; + +} +EXPORT_SYMBOL_FOR_MODULES(sev_firmware_supported_vm_types, "kvm-amd"); diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index d5099a2baca5..ce16bbc0b308 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -902,6 +902,42 @@ struct snp_feature_info { /* Feature bits in EBX */ #define SNP_SEV_TIO_SUPPORTED BIT(1) +/** + * struct sev_snp_tcb_version_genoa_milan + * + * @boot_loader: SVN of PSP bootloader + * @tee: SVN of PSP operating system + * @reserved: reserved + * @snp: SVN of SNP firmware + * @microcode: Lowest current patch level of all cores + */ +struct sev_snp_tcb_version_genoa_milan { + u8 boot_loader; + u8 tee; + u8 reserved[4]; + u8 snp; + u8 microcode; +}; + +/** + * struct sev_snp_tcb_version_turin + * + * @fmc: SVN of FMC firmware + * @boot_loader: SVN of PSP bootloader + * @tee: SVN of PSP operating system + * @snp: SVN of SNP firmware + * @reserved: reserved + * @microcode: Lowest current patch level of all cores + */ +struct sev_snp_tcb_version_turin { + u8 fmc; + u8 boot_loader; + u8 tee; + u8 snp; + u8 reserved[3]; + u8 microcode; +}; + #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** @@ -1048,6 +1084,7 @@ void snp_free_firmware_page(void *addr); void sev_platform_shutdown(void); bool sev_is_snp_ciphertext_hiding_supported(void); u64 sev_get_snp_policy_bits(void); +int sev_firmware_supported_vm_types(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ -- cgit v1.2.3 From ccd6c77223bbdea352190956f5e00e3b07119fc3 Mon Sep 17 00:00:00 2001 From: Peter Fang Date: Tue, 7 Apr 2026 17:11:28 -0700 Subject: KVM: Fix kvm_vcpu_map[_readonly]() function prototypes kvm_vcpu_map() and kvm_vcpu_map_readonly() should take a gfn instead of a gpa. This appears to be a result of the original kvm_vcpu_map() being declared with the wrong function prototype in kvm_host.h, even though it was correct in the actual implementation in kvm_main.c. No actual harm has been done yet as all of the call sites are correctly passing in a gfn. Plus, both gfn_t and gpa_t are typedef'd to u64 so this change shouldn't have any functional impact. Compile-tested on x86 and ppc, which are the current users of these interfaces. Fixes: e45adf665a53 ("KVM: Introduce a new guest mapping API") Cc: KarimAllah Ahmed Cc: Konrad Rzeszutek Wilk Signed-off-by: Peter Fang Reviewed-by: Yosry Ahmed Link: https://patch.msgid.link/20260408001137.3290444-2-peter.fang@intel.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7b231a1e63ba..61a3430957f2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1390,20 +1390,20 @@ void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *mems void mark_page_dirty(struct kvm *kvm, gfn_t gfn); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); -int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map, +int __kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, bool writable); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map); -static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, +static inline int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) { - return __kvm_vcpu_map(vcpu, gpa, map, true); + return __kvm_vcpu_map(vcpu, gfn, map, true); } -static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gpa_t gpa, +static inline int kvm_vcpu_map_readonly(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) { - return __kvm_vcpu_map(vcpu, gpa, map, false); + return __kvm_vcpu_map(vcpu, gfn, map, false); } static inline void kvm_vcpu_map_mark_dirty(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 59ebc16276c51fa3c462e5a0d21280249755502d Mon Sep 17 00:00:00 2001 From: "Guo Ren (Alibaba DAMO Academy)" Date: Mon, 25 May 2026 15:19:44 +0530 Subject: irqchip/riscv-imsic: Add nr_guest_files in per-HART local config Add nr_guest_files in per-HART local config to represent the number of guest files available on a particular HART whereas the nr_guest_files in the global config represents the number of guest files available across all HARTs. This allows KVM RISC-V to use nr_guest_files from per-HART local config for asymmetric big.Little systems. Signed-off-by: Guo Ren (Alibaba DAMO Academy) Acked-by: Thomas Gleixner Signed-off-by: Anup Patel Link: https://lore.kernel.org/r/20260525094945.3721783-2-anup.patel@oss.qualcomm.com Signed-off-by: Anup Patel --- drivers/irqchip/irq-riscv-imsic-state.c | 9 ++++----- include/linux/irqchip/riscv-imsic.h | 5 ++++- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c index e3ed874d89e7..b8d1bbbf42f7 100644 --- a/drivers/irqchip/irq-riscv-imsic-state.c +++ b/drivers/irqchip/irq-riscv-imsic-state.c @@ -920,13 +920,12 @@ int __init imsic_setup_state(struct fwnode_handle *fwnode, void *opaque) local->msi_va = mmios_va[index] + reloff; /* - * KVM uses global->nr_guest_files to determine the available guest - * interrupt files on each CPU. Take the minimum number of guest - * interrupt files across all CPUs to avoid KVM incorrectly allocating - * an unexisted or unmapped guest interrupt file on some CPUs. + * KVM uses both local->nr_guest_files and global->nr_guest_files + * to determine the available guest interrupt files on each CPU. */ nr_guest_files = (resource_size(&mmios[index]) - reloff) / IMSIC_MMIO_PAGE_SZ - 1; - global->nr_guest_files = min(global->nr_guest_files, nr_guest_files); + local->nr_guest_files = min((BIT(global->guest_index_bits) - 1), nr_guest_files); + global->nr_guest_files = min(global->nr_guest_files, local->nr_guest_files); nr_handlers++; } diff --git a/include/linux/irqchip/riscv-imsic.h b/include/linux/irqchip/riscv-imsic.h index 4b348836de7a..61af3a5bea09 100644 --- a/include/linux/irqchip/riscv-imsic.h +++ b/include/linux/irqchip/riscv-imsic.h @@ -40,6 +40,9 @@ struct imsic_local_config { phys_addr_t msi_pa; void __iomem *msi_va; + + /* Number of guest interrupt files per-HART */ + u32 nr_guest_files; }; struct imsic_global_config { @@ -68,7 +71,7 @@ struct imsic_global_config { /* Number of guest interrupt identities */ u32 nr_guest_ids; - /* Number of guest interrupt files per core */ + /* Number of guest interrupt files across all HARTs */ u32 nr_guest_files; /* Per-CPU IMSIC addresses */ -- cgit v1.2.3 From f13e900599089b10113ceb36013423f0837c6792 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 22 May 2026 15:46:06 -0700 Subject: KVM: SEV: Pin source page for write when adding CPUID data for SNP guest When populating a guest_memfd instance with the initial CPUID data for an SNP guest, acquire a writable pin on the source page as KVM will write back the "correct" CPUID information if the userspace provided data is rejected by trusted firmware. Because KVM writes to the source page using a kernel mapping, pinning for read could result in KVM clobbering read-only memory. Note, well-behaved VMMs are unlikely to be affected, as CPUID information is almost always dynamically generated by userspace, i.e. it's unlikely for the CPUID information to be backed by a read-only mapping. Fixes: 2a62345b30529 ("KVM: guest_memfd: GUP source pages prior to populating guest memory") Cc: stable@vger.kernel.org Signed-off-by: Ackerley Tng Link: https://patch.msgid.link/20260522-fix-sev-gmem-post-populate-v2-1-3f196bfad5a1@google.com [sean: rewrite shortlog and changelog, tag for stable@] Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/sev.c | 1 + arch/x86/kvm/vmx/tdx.c | 2 +- include/linux/kvm_host.h | 3 ++- virt/kvm/guest_memfd.c | 6 ++++-- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 37d4cfa5d980..c73c028d72c1 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2456,6 +2456,7 @@ static int snp_launch_update(struct kvm *kvm, struct kvm_sev_cmd *argp) sev_populate_args.type = params.type; count = kvm_gmem_populate(kvm, params.gfn_start, src, npages, + params.type == KVM_SEV_SNP_PAGE_TYPE_CPUID, sev_gmem_post_populate, &sev_populate_args); if (count < 0) { argp->error = sev_populate_args.fw_error; diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index b8c3d3d8bbfe..00dcfcbc47f6 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -3185,7 +3185,7 @@ static int tdx_vcpu_init_mem_region(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *c }; gmem_ret = kvm_gmem_populate(kvm, gpa_to_gfn(region.gpa), u64_to_user_ptr(region.source_addr), - 1, tdx_gmem_post_populate, &arg); + 1, false, tdx_gmem_post_populate, &arg); if (gmem_ret < 0) { ret = gmem_ret; break; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4c14aee1fb06..2c5ad9a6d5ce 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2596,7 +2596,8 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, struct page *page, void *opaque); -long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, +long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, + long npages, bool may_writeback_src, kvm_gmem_populate_cb post_populate, void *opaque); #endif diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 69c9d6d546b2..07d8db344872 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -858,7 +858,8 @@ out_unlock: return ret; } -long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, +long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, + long npages, bool may_writeback_src, kvm_gmem_populate_cb post_populate, void *opaque) { struct kvm_memory_slot *slot; @@ -892,8 +893,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long if (src) { unsigned long uaddr = (unsigned long)src + i * PAGE_SIZE; + unsigned int flags = may_writeback_src ? FOLL_WRITE : 0; - ret = get_user_pages_fast(uaddr, 1, 0, &src_page); + ret = get_user_pages_fast(uaddr, 1, flags, &src_page); if (ret < 0) break; if (ret != 1) { -- cgit v1.2.3 From 5804f58901af77ab507e199d31dfa263404e177c Mon Sep 17 00:00:00 2001 From: Jiun Jeong Date: Fri, 1 May 2026 23:44:13 +0900 Subject: call_once:: Fix typo in comment for call_once() Change "succesfully" to "successfully" in the kerneldoc comment of call_once(). Signed-off-by: Jiun Jeong Link: https://patch.msgid.link/20260501144413.49419-1-jiun.jeong.cs@gmail.com [sean: don't scope to KVM, massage changelog] Signed-off-by: Sean Christopherson --- include/linux/call_once.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/call_once.h b/include/linux/call_once.h index 13cd6469e7e5..1625a9d6ff5b 100644 --- a/include/linux/call_once.h +++ b/include/linux/call_once.h @@ -36,7 +36,7 @@ do { \ * it returns a zero or positive value, mark @once as completed. Return * the value returned by @cb * - * If @once has completed succesfully before, return 0. + * If @once has completed successfully before, return 0. * * The call to @cb is implicitly surrounded by a mutex, though for * efficiency the * function avoids taking it after the first call. -- cgit v1.2.3 From ef057cbf825e03b63f6edf5980f96abf3c53089d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 29 Apr 2026 09:34:01 -0700 Subject: KVM: x86/mmu: Ensure hugepage is in by slot before checking max mapping level When recovering hugepages in the shadow MMU, verify that the base gfn of the shadow page is actually contained within the target memslot, *before* querying the max mapping level given the shadow page's gfn. Failure to pre-check the validity of the gfn can lead to an out-of-bounds access to the slot's lpage_info (which typically manifests as a host #PF because the lpage_info is vmalloc'd) if the guest creates a hugepage mapping (in its PTEs) that extends "below" the bounds of a memslot. When faulting in memory for a guest, and the size of the guest mapping is greater than KVM's (current) max mapping, then KVM will create a "direct" shadow page (direct in that there are no gPTEs to shadow, and so the target gfn is a direct calculation given the base gfn of the shadow page). The hugepage recovery flow looks for such direct shadow pages, as forcing 4KiB mappings when dirty logging generates the guest > host mapping size case. When the 4KiB restriction is lifted, then KVM can replace the shadow page with a hugepage. But if KVM originally used a smaller mapping than the guest because the range of memory covered by the guest hugepage exceeds the bounds of a memslot, then KVM will link a direct shadow page with a gfn that is outside the bounds of the memslot being used to fault in memory. The rmap entry added for the leaf mapping is correct and within bounds, but the gfn of the leaf SPTE's parent shadow page will be out of bounds. BUG: unable to handle page fault for address: ffffc90000806ffc #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 100000067 P4D 100000067 PUD 1002a7067 PMD 10612f067 PTE 0 Oops: Oops: 0000 [#1] SMP CPU: 13 UID: 1000 PID: 757 Comm: mmu_stress_test Not tainted 7.1.0-rc1-48ce1e26eace-x86_pir_to_irr_comments-vm #341 PREEMPT Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kvm_mmu_max_mapping_level+0x79/0x2b0 [kvm] Call Trace: kvm_mmu_recover_huge_pages+0x21b/0x320 [kvm] kvm_set_memslot+0x1ee/0x590 [kvm] kvm_set_memory_region.part.0+0x3a1/0x4d0 [kvm] kvm_vm_ioctl+0x9bf/0x15d0 [kvm] __x64_sys_ioctl+0x8a/0xd0 do_syscall_64+0xb7/0xbb0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7f21c0f1a9bf Don't bother pre-checking the bounds of the potential hugepage, i.e. don't check that e.g. sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level + 1) is also within the memslot, as the checks performed by kvm_mmu_max_mapping_level() are a superset of the basic bounds checks. I.e. pre-checking the full range would be a dubious micro-optimization. Fixes: 9eba50f8d7fc ("KVM: x86/mmu: Consult max mapping level when zapping collapsible SPTEs") Cc: stable@vger.kernel.org Cc: David Matlack Cc: James Houghton Cc: Alexander Bulekov Cc: Fred Griffoul Cc: Alexander Graf Cc: David Woodhouse Cc: Filippo Sironi Cc: Ivan Orlov Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 18 ++++++++++++------ include/linux/kvm_host.h | 7 ++++++- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c13b80fe3125..26ed97efda91 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7360,13 +7360,19 @@ restart: sp = sptep_to_sp(sptep); /* - * We cannot do huge page mapping for indirect shadow pages, - * which are found on the last rmap (level = 1) when not using - * tdp; such shadow pages are synced with the page table in - * the guest, and the guest page table is using 4K page size - * mapping if the indirect sp has level = 1. + * Direct shadow page can be replaced by a hugepage if the host + * mapping level allows it and the memslot maps all of the host + * hugepage. Note! If the memslot maps only part of the + * hugepage, sp->gfn may be below slot->base_gfn, and querying + * the max mapping level would cause an out-of-bounds lpage_info + * access. So the gfn bounds check *must* be done first. + * + * Indirect shadow pages are created when the guest page tables + * are using 4K pages. Since the host mapping is always + * constrained by the page size in the guest, indirect shadow + * pages are never collapsible. */ - if (sp->role.direct && + if (sp->role.direct && is_gfn_in_memslot(slot, sp->gfn) && sp->role.level < kvm_mmu_max_mapping_level(kvm, NULL, slot, sp->gfn)) { kvm_zap_one_rmap_spte(kvm, rmap_head, sptep); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 27498e990dff..ab8cfaec82d3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1815,6 +1815,11 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); +static inline bool is_gfn_in_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) +{ + return gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages; +} + /* * Returns a pointer to the memslot if it contains gfn. * Otherwise returns NULL. @@ -1825,7 +1830,7 @@ try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn) if (!slot) return NULL; - if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages) + if (is_gfn_in_memslot(slot, gfn)) return slot; else return NULL; -- cgit v1.2.3