diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-04-03 20:47:47 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-04-03 20:47:47 +0300 |
commit | 204e9a18f1b9685476d6480d4f26d5d7f7e2d505 (patch) | |
tree | d0a8c16eb884354147b784477370d0feee8ed547 | |
parent | ea59cb74234c8d658a4299b57156265f59977494 (diff) | |
parent | c11bcbc0a517acf69282c8225059b2a8ac5fe628 (diff) | |
download | linux-204e9a18f1b9685476d6480d4f26d5d7f7e2d505.tar.xz |
Merge tag 'mm-hotfixes-stable-2025-04-02-21-57' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM hotfixes from Andrew Morton:
"Five hotfixes. Three are cc:stable and the remainder address post-6.14
issues or aren't considered necessary for -stable kernels.
All patches are for MM"
* tag 'mm-hotfixes-stable-2025-04-02-21-57' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
mm: zswap: fix crypto_free_acomp() deadlock in zswap_cpu_comp_dead()
mm/hugetlb: move hugetlb_sysctl_init() to the __init section
mm: page_isolation: avoid calling folio_hstate() without hugetlb_lock
mm/hugetlb_vmemmap: fix memory loads ordering
mm/userfaultfd: fix release hang over concurrent GUP
-rw-r--r-- | fs/userfaultfd.c | 51 | ||||
-rw-r--r-- | include/linux/page-flags.h | 37 | ||||
-rw-r--r-- | include/linux/page_ref.h | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/page_isolation.c | 9 | ||||
-rw-r--r-- | mm/zswap.c | 30 |
6 files changed, 94 insertions, 37 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 97c4d71115d8..d80f94346199 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -396,32 +396,6 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) goto out; /* - * If it's already released don't get it. This avoids to loop - * in __get_user_pages if userfaultfd_release waits on the - * caller of handle_userfault to release the mmap_lock. - */ - if (unlikely(READ_ONCE(ctx->released))) { - /* - * Don't return VM_FAULT_SIGBUS in this case, so a non - * cooperative manager can close the uffd after the - * last UFFDIO_COPY, without risking to trigger an - * involuntary SIGBUS if the process was starting the - * userfaultfd while the userfaultfd was still armed - * (but after the last UFFDIO_COPY). If the uffd - * wasn't already closed when the userfault reached - * this point, that would normally be solved by - * userfaultfd_must_wait returning 'false'. - * - * If we were to return VM_FAULT_SIGBUS here, the non - * cooperative manager would be instead forced to - * always call UFFDIO_UNREGISTER before it can safely - * close the uffd. - */ - ret = VM_FAULT_NOPAGE; - goto out; - } - - /* * Check that we can return VM_FAULT_RETRY. * * NOTE: it should become possible to return VM_FAULT_RETRY @@ -457,6 +431,31 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) goto out; + if (unlikely(READ_ONCE(ctx->released))) { + /* + * If a concurrent release is detected, do not return + * VM_FAULT_SIGBUS or VM_FAULT_NOPAGE, but instead always + * return VM_FAULT_RETRY with lock released proactively. + * + * If we were to return VM_FAULT_SIGBUS here, the non + * cooperative manager would be instead forced to + * always call UFFDIO_UNREGISTER before it can safely + * close the uffd, to avoid involuntary SIGBUS triggered. + * + * If we were to return VM_FAULT_NOPAGE, it would work for + * the fault path, in which the lock will be released + * later. However for GUP, faultin_page() does nothing + * special on NOPAGE, so GUP would spin retrying without + * releasing the mmap read lock, causing possible livelock. + * + * Here only VM_FAULT_RETRY would make sure the mmap lock + * be released immediately, so that the thread concurrently + * releasing the userfault would always make progress. + */ + release_fault_lock(vmf); + goto out; + } + /* take the reference before dropping the mmap_lock */ userfaultfd_ctx_get(ctx); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5bd9492a66ee..e6a21b62dcce 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -226,11 +226,48 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page } return page; } + +static __always_inline bool page_count_writable(const struct page *page, int u) +{ + if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key)) + return true; + + /* + * The refcount check is ordered before the fake-head check to prevent + * the following race: + * CPU 1 (HVO) CPU 2 (speculative PFN walker) + * + * page_ref_freeze() + * synchronize_rcu() + * rcu_read_lock() + * page_is_fake_head() is false + * vmemmap_remap_pte() + * XXX: struct page[] becomes r/o + * + * page_ref_unfreeze() + * page_ref_count() is not zero + * + * atomic_add_unless(&page->_refcount) + * XXX: try to modify r/o struct page[] + * + * The refcount check also prevents modification attempts to other (r/o) + * tail pages that are not fake heads. + */ + if (atomic_read_acquire(&page->_refcount) == u) + return false; + + return page_fixed_fake_head(page) == page; +} #else static inline const struct page *page_fixed_fake_head(const struct page *page) { return page; } + +static inline bool page_count_writable(const struct page *page, int u) +{ + return true; +} #endif static __always_inline int page_is_fake_head(const struct page *page) diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 8c236c651d1d..544150d1d5fd 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -234,7 +234,7 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u) rcu_read_lock(); /* avoid writing to the vmemmap area being remapped */ - if (!page_is_fake_head(page) && page_ref_count(page) != u) + if (page_count_writable(page, u)) ret = atomic_add_unless(&page->_refcount, nr, u); rcu_read_unlock(); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6fccfe6d046c..39f92aad7bd1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5179,7 +5179,7 @@ static const struct ctl_table hugetlb_table[] = { }, }; -static void hugetlb_sysctl_init(void) +static void __init hugetlb_sysctl_init(void) { register_sysctl_init("vm", hugetlb_table); } diff --git a/mm/page_isolation.c b/mm/page_isolation.c index a051a29e95ad..b2fc5266e3d2 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -83,7 +83,14 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e unsigned int skip_pages; if (PageHuge(page)) { - if (!hugepage_migration_supported(folio_hstate(folio))) + struct hstate *h; + + /* + * The huge page may be freed so can not + * use folio_hstate() directly. + */ + h = size_to_hstate(folio_size(folio)); + if (h && !hugepage_migration_supported(h)) return page; } else if (!folio_test_lru(folio) && !__folio_test_movable(folio)) { return page; diff --git a/mm/zswap.c b/mm/zswap.c index 0dcc54eab58b..204fb59da33c 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -883,18 +883,32 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) { struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); + struct acomp_req *req; + struct crypto_acomp *acomp; + u8 *buffer; + + if (IS_ERR_OR_NULL(acomp_ctx)) + return 0; mutex_lock(&acomp_ctx->mutex); - if (!IS_ERR_OR_NULL(acomp_ctx)) { - if (!IS_ERR_OR_NULL(acomp_ctx->req)) - acomp_request_free(acomp_ctx->req); - acomp_ctx->req = NULL; - if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) - crypto_free_acomp(acomp_ctx->acomp); - kfree(acomp_ctx->buffer); - } + req = acomp_ctx->req; + acomp = acomp_ctx->acomp; + buffer = acomp_ctx->buffer; + acomp_ctx->req = NULL; + acomp_ctx->acomp = NULL; + acomp_ctx->buffer = NULL; mutex_unlock(&acomp_ctx->mutex); + /* + * Do the actual freeing after releasing the mutex to avoid subtle + * locking dependencies causing deadlocks. + */ + if (!IS_ERR_OR_NULL(req)) + acomp_request_free(req); + if (!IS_ERR_OR_NULL(acomp)) + crypto_free_acomp(acomp); + kfree(buffer); + return 0; } |