Merge tag 'mm-hotfixes-stable-2025-04-02-21-57' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull MM hotfixes from Andrew Morton: "Five hotfixes. Three are cc:stable and the remainder address post-6.14 issues or aren't considered necessary for -stable kernels. All patches are for MM" * tag 'mm-hotfixes-stable-2025-04-02-21-57' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: mm: zswap: fix crypto_free_acomp() deadlock in zswap_cpu_comp_dead() mm/hugetlb: move hugetlb_sysctl_init() to the __init section mm: page_isolation: avoid calling folio_hstate() without hugetlb_lock mm/hugetlb_vmemmap: fix memory loads ordering mm/userfaultfd: fix release hang over concurrent GUP
author: Linus Torvalds <torvalds@linux-foundation.org> 2025-04-03 20:47:47 +0300
committer: Linus Torvalds <torvalds@linux-foundation.org> 2025-04-03 20:47:47 +0300
commit: 204e9a18f1b9685476d6480d4f26d5d7f7e2d505 (patch)
tree: d0a8c16eb884354147b784477370d0feee8ed547
parent: ea59cb74234c8d658a4299b57156265f59977494 (diff)
parent: c11bcbc0a517acf69282c8225059b2a8ac5fe628 (diff)
download: linux-204e9a18f1b9685476d6480d4f26d5d7f7e2d505.tar.xz
6 files changed, 94 insertions, 37 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 97c4d71115d8..d80f94346199 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -396,32 +396,6 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 		goto out;
 
 	/*
-	 * If it's already released don't get it. This avoids to loop
-	 * in __get_user_pages if userfaultfd_release waits on the
-	 * caller of handle_userfault to release the mmap_lock.
-	 */
-	if (unlikely(READ_ONCE(ctx->released))) {
-		/*
-		 * Don't return VM_FAULT_SIGBUS in this case, so a non
-		 * cooperative manager can close the uffd after the
-		 * last UFFDIO_COPY, without risking to trigger an
-		 * involuntary SIGBUS if the process was starting the
-		 * userfaultfd while the userfaultfd was still armed
-		 * (but after the last UFFDIO_COPY). If the uffd
-		 * wasn't already closed when the userfault reached
-		 * this point, that would normally be solved by
-		 * userfaultfd_must_wait returning 'false'.
-		 *
-		 * If we were to return VM_FAULT_SIGBUS here, the non
-		 * cooperative manager would be instead forced to
-		 * always call UFFDIO_UNREGISTER before it can safely
-		 * close the uffd.
-		 */
-		ret = VM_FAULT_NOPAGE;
-		goto out;
-	}
-
-	/*
 	 * Check that we can return VM_FAULT_RETRY.
 	 *
 	 * NOTE: it should become possible to return VM_FAULT_RETRY
@@ -457,6 +431,31 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
 		goto out;
 
+	if (unlikely(READ_ONCE(ctx->released))) {
+		/*
+		 * If a concurrent release is detected, do not return
+		 * VM_FAULT_SIGBUS or VM_FAULT_NOPAGE, but instead always
+		 * return VM_FAULT_RETRY with lock released proactively.
+		 *
+		 * If we were to return VM_FAULT_SIGBUS here, the non
+		 * cooperative manager would be instead forced to
+		 * always call UFFDIO_UNREGISTER before it can safely
+		 * close the uffd, to avoid involuntary SIGBUS triggered.
+		 *
+		 * If we were to return VM_FAULT_NOPAGE, it would work for
+		 * the fault path, in which the lock will be released
+		 * later.  However for GUP, faultin_page() does nothing
+		 * special on NOPAGE, so GUP would spin retrying without
+		 * releasing the mmap read lock, causing possible livelock.
+		 *
+		 * Here only VM_FAULT_RETRY would make sure the mmap lock
+		 * be released immediately, so that the thread concurrently
+		 * releasing the userfault would always make progress.
+		 */
+		release_fault_lock(vmf);
+		goto out;
+	}
+
 	/* take the reference before dropping the mmap_lock */
 	userfaultfd_ctx_get(ctx);
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5bd9492a66ee..e6a21b62dcce 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -226,11 +226,48 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page
 	}
 	return page;
 }
+
+static __always_inline bool page_count_writable(const struct page *page, int u)
+{
+	if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key))
+		return true;
+
+	/*
+	 * The refcount check is ordered before the fake-head check to prevent
+	 * the following race:
+	 *   CPU 1 (HVO)                     CPU 2 (speculative PFN walker)
+	 *
+	 *   page_ref_freeze()
+	 *   synchronize_rcu()
+	 *                                   rcu_read_lock()
+	 *                                   page_is_fake_head() is false
+	 *   vmemmap_remap_pte()
+	 *   XXX: struct page[] becomes r/o
+	 *
+	 *   page_ref_unfreeze()
+	 *                                   page_ref_count() is not zero
+	 *
+	 *                                   atomic_add_unless(&page->_refcount)
+	 *                                   XXX: try to modify r/o struct page[]
+	 *
+	 * The refcount check also prevents modification attempts to other (r/o)
+	 * tail pages that are not fake heads.
+	 */
+	if (atomic_read_acquire(&page->_refcount) == u)
+		return false;
+
+	return page_fixed_fake_head(page) == page;
+}
 #else
 static inline const struct page *page_fixed_fake_head(const struct page *page)
 {
 	return page;
 }
+
+static inline bool page_count_writable(const struct page *page, int u)
+{
+	return true;
+}
 #endif
 
 static __always_inline int page_is_fake_head(const struct page *page)
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 8c236c651d1d..544150d1d5fd 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -234,7 +234,7 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u)
 
 	rcu_read_lock();
 	/* avoid writing to the vmemmap area being remapped */
-	if (!page_is_fake_head(page) && page_ref_count(page) != u)
+	if (page_count_writable(page, u))
 		ret = atomic_add_unless(&page->_refcount, nr, u);
 	rcu_read_unlock();
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6fccfe6d046c..39f92aad7bd1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5179,7 +5179,7 @@ static const struct ctl_table hugetlb_table[] = {
 	},
 };
 
-static void hugetlb_sysctl_init(void)
+static void __init hugetlb_sysctl_init(void)
 {
 	register_sysctl_init("vm", hugetlb_table);
 }
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index a051a29e95ad..b2fc5266e3d2 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -83,7 +83,14 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e
 			unsigned int skip_pages;
 
 			if (PageHuge(page)) {
-				if (!hugepage_migration_supported(folio_hstate(folio)))
+				struct hstate *h;
+
+				/*
+				 * The huge page may be freed so can not
+				 * use folio_hstate() directly.
+				 */
+				h = size_to_hstate(folio_size(folio));
+				if (h && !hugepage_migration_supported(h))
 					return page;
 			} else if (!folio_test_lru(folio) && !__folio_test_movable(folio)) {
 				return page;
diff --git a/mm/zswap.c b/mm/zswap.c
index 0dcc54eab58b..204fb59da33c 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -883,18 +883,32 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
 {
 	struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node);
 	struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu);
+	struct acomp_req *req;
+	struct crypto_acomp *acomp;
+	u8 *buffer;
+
+	if (IS_ERR_OR_NULL(acomp_ctx))
+		return 0;
 
 	mutex_lock(&acomp_ctx->mutex);
-	if (!IS_ERR_OR_NULL(acomp_ctx)) {
-		if (!IS_ERR_OR_NULL(acomp_ctx->req))
-			acomp_request_free(acomp_ctx->req);
-		acomp_ctx->req = NULL;
-		if (!IS_ERR_OR_NULL(acomp_ctx->acomp))
-			crypto_free_acomp(acomp_ctx->acomp);
-		kfree(acomp_ctx->buffer);
-	}
+	req = acomp_ctx->req;
+	acomp = acomp_ctx->acomp;
+	buffer = acomp_ctx->buffer;
+	acomp_ctx->req = NULL;
+	acomp_ctx->acomp = NULL;
+	acomp_ctx->buffer = NULL;
 	mutex_unlock(&acomp_ctx->mutex);
 
+	/*
+	 * Do the actual freeing after releasing the mutex to avoid subtle
+	 * locking dependencies causing deadlocks.
+	 */
+	if (!IS_ERR_OR_NULL(req))
+		acomp_request_free(req);
+	if (!IS_ERR_OR_NULL(acomp))
+		crypto_free_acomp(acomp);
+	kfree(buffer);
+
 	return 0;
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2025-04-03 20:47:47 +0300
committer	Linus Torvalds <torvalds@linux-foundation.org>	2025-04-03 20:47:47 +0300
commit	204e9a18f1b9685476d6480d4f26d5d7f7e2d505 (patch)
tree	d0a8c16eb884354147b784477370d0feee8ed547
parent	ea59cb74234c8d658a4299b57156265f59977494 (diff)
parent	c11bcbc0a517acf69282c8225059b2a8ac5fe628 (diff)
download	linux-204e9a18f1b9685476d6480d4f26d5d7f7e2d505.tar.xz