From c72ceafbd12cf95e088681ae5e535ef1a78bf0ed Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Fri, 29 Mar 2024 16:24:42 -0500 Subject: mm: Introduce AS_INACCESSIBLE for encrypted/confidential memory filemap users like guest_memfd may use page cache pages to allocate/manage memory that is only intended to be accessed by guests via hardware protections like encryption. Writes to memory of this sort in common paths like truncation may cause unexpected behavior such as writing garbage instead of zeros when attempting to zero pages, or worse, triggering hardware protections that are considered fatal as far as the kernel is concerned. Introduce a new address_space flag, AS_INACCESSIBLE, and use this initially to prevent zero'ing of pages during truncation, with the understanding that it is up to the owner of the mapping to handle this specially if needed. This is admittedly a rather blunt solution, but it seems like there are no other places that should take into account the flag to keep its promise. Link: https://lore.kernel.org/lkml/ZR9LYhpxTaTk6PJX@google.com/ Cc: Matthew Wilcox Suggested-by: Sean Christopherson Signed-off-by: Michael Roth Message-ID: <20240329212444.395559-5-michael.roth@amd.com> Acked-by: Vlastimil Babka Signed-off-by: Paolo Bonzini --- include/linux/pagemap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2df35e65557d..f879c1d54da7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -207,6 +207,7 @@ enum mapping_flags { AS_STABLE_WRITES, /* must wait for writeback before modifying folio contents */ AS_UNMOVABLE, /* The mapping cannot be moved, ever */ + AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping */ }; /** -- cgit v1.2.3 From 3bb2531e20bff99f9cd8719ee7aea8bd82687173 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 May 2024 12:54:03 -0400 Subject: KVM: guest_memfd: Add hook for initializing memory guest_memfd pages are generally expected to be in some arch-defined initial state prior to using them for guest memory. For SEV-SNP this initial state is 'private', or 'guest-owned', and requires additional operations to move these pages into a 'private' state by updating the corresponding entries the RMP table. Allow for an arch-defined hook to handle updates of this sort, and go ahead and implement one for x86 so KVM implementations like AMD SVM can register a kvm_x86_ops callback to handle these updates for SEV-SNP guests. The preparation callback is always called when allocating/grabbing folios via gmem, and it is up to the architecture to keep track of whether or not the pages are already in the expected state (e.g. the RMP table in the case of SEV-SNP). In some cases, it is necessary to defer the preparation of the pages to handle things like in-place encryption of initial guest memory payloads before marking these pages as 'private'/'guest-owned'. Add an argument (always true for now) to kvm_gmem_get_folio() that allows for the preparation callback to be bypassed. To detect possible issues in the way userspace initializes memory, it is only possible to add an unprepared page if it is not already included in the filemap. Link: https://lore.kernel.org/lkml/ZLqVdvsF11Ddo7Dq@google.com/ Co-developed-by: Michael Roth Signed-off-by: Michael Roth Message-Id: <20231230172351.574091-5-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 6 +++++ include/linux/kvm_host.h | 5 ++++ virt/kvm/Kconfig | 4 +++ virt/kvm/guest_memfd.c | 51 +++++++++++++++++++++++++++++++++++--- 6 files changed, 65 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 5187fcf4b610..d26fcad13e36 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -139,6 +139,7 @@ KVM_X86_OP(vcpu_deliver_sipi_vector) KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); KVM_X86_OP_OPTIONAL(get_untagged_addr) KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) +KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) #undef KVM_X86_OP #undef KVM_X86_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0369e9efe429..738ad82a3e67 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1812,6 +1812,7 @@ struct kvm_x86_ops { gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); + int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2d2619d3eee4..972524ddcfdb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13598,6 +13598,12 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); +#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE +int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) +{ + return static_call(kvm_x86_gmem_prepare)(kvm, pfn, gfn, max_order); +} +#endif int kvm_spec_ctrl_test_value(u64 value) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index afbc99264ffa..1af069ab657c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2443,4 +2443,9 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, } #endif /* CONFIG_KVM_PRIVATE_MEM */ +#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE +int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); +bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); +#endif + #endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 29b73eedfe74..ca870157b2ed 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -109,3 +109,7 @@ config KVM_GENERIC_PRIVATE_MEM select KVM_GENERIC_MEMORY_ATTRIBUTES select KVM_PRIVATE_MEM bool + +config HAVE_KVM_GMEM_PREPARE + bool + depends on KVM_PRIVATE_MEM diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index fd32288d0fbc..0176089be731 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -13,7 +13,43 @@ struct kvm_gmem { struct list_head entry; }; -static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) +static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio) +{ +#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE + struct list_head *gmem_list = &inode->i_mapping->i_private_list; + struct kvm_gmem *gmem; + + list_for_each_entry(gmem, gmem_list, entry) { + struct kvm_memory_slot *slot; + struct kvm *kvm = gmem->kvm; + struct page *page; + kvm_pfn_t pfn; + gfn_t gfn; + int rc; + + if (!kvm_arch_gmem_prepare_needed(kvm)) + continue; + + slot = xa_load(&gmem->bindings, index); + if (!slot) + continue; + + page = folio_file_page(folio, index); + pfn = page_to_pfn(page); + gfn = slot->base_gfn + index - slot->gmem.pgoff; + rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page))); + if (rc) { + pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx, error %d.\n", + index, rc); + return rc; + } + } + +#endif + return 0; +} + +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare) { struct folio *folio; @@ -41,6 +77,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) folio_mark_uptodate(folio); } + if (prepare) { + int r = kvm_gmem_prepare_folio(inode, index, folio); + if (r < 0) { + folio_unlock(folio); + folio_put(folio); + return ERR_PTR(r); + } + } + /* * Ignore accessed, referenced, and dirty flags. The memory is * unevictable and there is no storage to write back to. @@ -145,7 +190,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) break; } - folio = kvm_gmem_get_folio(inode, index); + folio = kvm_gmem_get_folio(inode, index, true); if (IS_ERR(folio)) { r = PTR_ERR(folio); break; @@ -505,7 +550,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, goto out_fput; } - folio = kvm_gmem_get_folio(file_inode(file), index); + folio = kvm_gmem_get_folio(file_inode(file), index, true); if (IS_ERR(folio)) { r = PTR_ERR(folio); goto out_fput; -- cgit v1.2.3 From 1f6c06b177513e8a47c43e95d1985dbd9cff3ddd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Feb 2024 12:09:06 -0500 Subject: KVM: guest_memfd: Add interface for populating gmem pages with user data During guest run-time, kvm_arch_gmem_prepare() is issued as needed to prepare newly-allocated gmem pages prior to mapping them into the guest. In the case of SEV-SNP, this mainly involves setting the pages to private in the RMP table. However, for the GPA ranges comprising the initial guest payload, which are encrypted/measured prior to starting the guest, the gmem pages need to be accessed prior to setting them to private in the RMP table so they can be initialized with the userspace-provided data. Additionally, an SNP firmware call is needed afterward to encrypt them in-place and measure the contents into the guest's launch digest. While it is possible to bypass the kvm_arch_gmem_prepare() hooks so that this handling can be done in an open-coded/vendor-specific manner, this may expose more gmem-internal state/dependencies to external callers than necessary. Try to avoid this by implementing an interface that tries to handle as much of the common functionality inside gmem as possible, while also making it generic enough to potentially be usable/extensible for TDX as well. Suggested-by: Sean Christopherson Signed-off-by: Michael Roth Co-developed-by: Michael Roth Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 27 +++++++++++++++++++++++++ virt/kvm/guest_memfd.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1af069ab657c..1ae65774d9fa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2448,4 +2448,31 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); #endif +/** + * kvm_gmem_populate() - Populate/prepare a GPA range with guest data + * + * @kvm: KVM instance + * @gfn: starting GFN to be populated + * @src: userspace-provided buffer containing data to copy into GFN range + * (passed to @post_populate, and incremented on each iteration + * if not NULL) + * @npages: number of pages to copy from userspace-buffer + * @post_populate: callback to issue for each gmem page that backs the GPA + * range + * @opaque: opaque data to pass to @post_populate callback + * + * This is primarily intended for cases where a gmem-backed GPA range needs + * to be initialized with userspace-provided data prior to being mapped into + * the guest as a private page. This should be called with the slots->lock + * held so that caller-enforced invariants regarding the expected memory + * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES. + * + * Returns the number of pages that were populated. + */ +typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, + void __user *src, int order, void *opaque); + +long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, + kvm_gmem_populate_cb post_populate, void *opaque); + #endif diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index bfe437098b79..5d6c87bb13f6 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -585,3 +585,55 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, return r; } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); + +long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, + kvm_gmem_populate_cb post_populate, void *opaque) +{ + struct file *file; + struct kvm_memory_slot *slot; + void __user *p; + + int ret = 0, max_order; + long i; + + lockdep_assert_held(&kvm->slots_lock); + if (npages < 0) + return -EINVAL; + + slot = gfn_to_memslot(kvm, start_gfn); + if (!kvm_slot_can_be_private(slot)) + return -EINVAL; + + file = kvm_gmem_get_file(slot); + if (!file) + return -EFAULT; + + filemap_invalidate_lock(file->f_mapping); + + npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages); + for (i = 0; i < npages; i += (1 << max_order)) { + gfn_t gfn = start_gfn + i; + kvm_pfn_t pfn; + + ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false); + if (ret) + break; + + if (!IS_ALIGNED(gfn, (1 << max_order)) || + (npages - i) < (1 << max_order)) + max_order = 0; + + p = src ? src + i * PAGE_SIZE : NULL; + ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); + + put_page(pfn_to_page(pfn)); + if (ret) + break; + } + + filemap_invalidate_unlock(file->f_mapping); + + fput(file); + return ret && !i ? ret : i; +} +EXPORT_SYMBOL_GPL(kvm_gmem_populate); -- cgit v1.2.3 From a90764f0e4ed5350cc9caeb384e0fb356c032587 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Sat, 30 Dec 2023 11:23:21 -0600 Subject: KVM: guest_memfd: Add hook for invalidating memory In some cases, like with SEV-SNP, guest memory needs to be updated in a platform-specific manner before it can be safely freed back to the host. Wire up arch-defined hooks to the .free_folio kvm_gmem_aops callback to allow for special handling of this sort when freeing memory in response to FALLOC_FL_PUNCH_HOLE operations and when releasing the inode, and go ahead and define an arch-specific hook for x86 since it will be needed for handling memory used for SEV-SNP guests. Signed-off-by: Michael Roth Message-Id: <20231230172351.574091-6-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 7 +++++++ include/linux/kvm_host.h | 4 ++++ virt/kvm/Kconfig | 4 ++++ virt/kvm/guest_memfd.c | 14 ++++++++++++++ 6 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index d26fcad13e36..c81990937ab4 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -140,6 +140,7 @@ KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); KVM_X86_OP_OPTIONAL(get_untagged_addr) KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) +KVM_X86_OP_OPTIONAL(gmem_invalidate) #undef KVM_X86_OP #undef KVM_X86_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 738ad82a3e67..51bb4b785d6a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1813,6 +1813,7 @@ struct kvm_x86_ops { gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); + void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 972524ddcfdb..83b8260443a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13605,6 +13605,13 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord } #endif +#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) +{ + static_call_cond(kvm_x86_gmem_invalidate)(start, end); +} +#endif + int kvm_spec_ctrl_test_value(u64 value) { /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1ae65774d9fa..b43b96f876fe 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2475,4 +2475,8 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); +#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); +#endif + #endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index ca870157b2ed..754c6c923427 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -113,3 +113,7 @@ config KVM_GENERIC_PRIVATE_MEM config HAVE_KVM_GMEM_PREPARE bool depends on KVM_PRIVATE_MEM + +config HAVE_KVM_GMEM_INVALIDATE + bool + depends on KVM_PRIVATE_MEM diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 5d6c87bb13f6..dfe50c64a552 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -343,10 +343,24 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol return MF_DELAYED; } +#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +static void kvm_gmem_free_folio(struct folio *folio) +{ + struct page *page = folio_page(folio, 0); + kvm_pfn_t pfn = page_to_pfn(page); + int order = folio_order(folio); + + kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order)); +} +#endif + static const struct address_space_operations kvm_gmem_aops = { .dirty_folio = noop_dirty_folio, .migrate_folio = kvm_gmem_migrate_folio, .error_remove_folio = kvm_gmem_error_folio, +#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE + .free_folio = kvm_gmem_free_folio, +#endif }; static int kvm_gmem_getattr(struct mnt_idmap *idmap, const struct path *path, -- cgit v1.2.3