diff options
| author | Mike Rapoport (Microsoft) <rppt@kernel.org> | 2026-04-02 07:11:45 +0300 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2026-04-18 10:10:53 +0300 |
| commit | b8c03b7f4558219ca09693b5fa4f5e068041d2c2 (patch) | |
| tree | 9383e921dcf8cddca5f4890b68468eda4f6bc5f0 | |
| parent | e2e0b826d37419536b91b25fa51ecc0565d27726 (diff) | |
| download | linux-b8c03b7f4558219ca09693b5fa4f5e068041d2c2.tar.xz | |
userfaultfd: introduce mfill_get_vma() and mfill_put_vma()
Split the code that finds, locks and verifies VMA from mfill_atomic() into
a helper function.
This function will be used later during refactoring of
mfill_atomic_pte_copy().
Add a counterpart mfill_put_vma() helper that unlocks the VMA and releases
map_changing_lock.
[avagin@google.com: fix lock leak in mfill_get_vma()]
Link: https://lore.kernel.org/20260316173829.1126728-1-avagin@google.com
Link: https://lore.kernel.org/20260402041156.1377214-5-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Andrei Vagin <avagin@google.com>
Reviewed-by: Harry Yoo (Oracle) <harry@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: David Hildenbrand (Arm) <david@kernel.org>
Cc: Harry Yoo <harry.yoo@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nikita Kalyazin <kalyazin@amazon.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Carlier <devnexen@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
| -rw-r--r-- | mm/userfaultfd.c | 125 |
1 files changed, 75 insertions, 50 deletions
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 3f7ed93020bc..bcba57dc1aee 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -157,6 +157,75 @@ static void uffd_mfill_unlock(struct vm_area_struct *vma) } #endif +static void mfill_put_vma(struct mfill_state *state) +{ + if (!state->vma) + return; + + up_read(&state->ctx->map_changing_lock); + uffd_mfill_unlock(state->vma); + state->vma = NULL; +} + +static int mfill_get_vma(struct mfill_state *state) +{ + struct userfaultfd_ctx *ctx = state->ctx; + uffd_flags_t flags = state->flags; + struct vm_area_struct *dst_vma; + int err; + + /* + * Make sure the vma is not shared, that the dst range is + * both valid and fully within a single existing vma. + */ + dst_vma = uffd_mfill_lock(ctx->mm, state->dst_start, state->len); + if (IS_ERR(dst_vma)) + return PTR_ERR(dst_vma); + + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + down_read(&ctx->map_changing_lock); + state->vma = dst_vma; + err = -EAGAIN; + if (atomic_read(&ctx->mmap_changing)) + goto out_unlock; + + err = -EINVAL; + + /* + * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but + * it will overwrite vm_ops, so vma_is_anonymous must return false. + */ + if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) && + dst_vma->vm_flags & VM_SHARED)) + goto out_unlock; + + /* + * validate 'mode' now that we know the dst_vma: don't allow + * a wrprotect copy if the userfaultfd didn't register as WP. + */ + if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP)) + goto out_unlock; + + if (is_vm_hugetlb_page(dst_vma)) + return 0; + + if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) + goto out_unlock; + if (!vma_is_shmem(dst_vma) && + uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) + goto out_unlock; + + return 0; + +out_unlock: + mfill_put_vma(state); + return err; +} + static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address) { pgd_t *pgd; @@ -767,8 +836,6 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, .src_addr = src_start, .dst_addr = dst_start, }; - struct mm_struct *dst_mm = ctx->mm; - struct vm_area_struct *dst_vma; long copied = 0; ssize_t err; @@ -783,56 +850,17 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, VM_WARN_ON_ONCE(dst_start + len <= dst_start); retry: - /* - * Make sure the vma is not shared, that the dst range is - * both valid and fully within a single existing vma. - */ - dst_vma = uffd_mfill_lock(dst_mm, dst_start, len); - if (IS_ERR(dst_vma)) { - err = PTR_ERR(dst_vma); + err = mfill_get_vma(&state); + if (err) goto out; - } - state.vma = dst_vma; - - /* - * If memory mappings are changing because of non-cooperative - * operation (e.g. mremap) running in parallel, bail out and - * request the user to retry later - */ - down_read(&ctx->map_changing_lock); - err = -EAGAIN; - if (atomic_read(&ctx->mmap_changing)) - goto out_unlock; - - err = -EINVAL; - /* - * shmem_zero_setup is invoked in mmap for MAP_ANONYMOUS|MAP_SHARED but - * it will overwrite vm_ops, so vma_is_anonymous must return false. - */ - if (WARN_ON_ONCE(vma_is_anonymous(dst_vma) && - dst_vma->vm_flags & VM_SHARED)) - goto out_unlock; - - /* - * validate 'mode' now that we know the dst_vma: don't allow - * a wrprotect copy if the userfaultfd didn't register as WP. - */ - if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP)) - goto out_unlock; /* * If this is a HUGETLB vma, pass off to appropriate routine */ - if (is_vm_hugetlb_page(dst_vma)) - return mfill_atomic_hugetlb(ctx, dst_vma, dst_start, + if (is_vm_hugetlb_page(state.vma)) + return mfill_atomic_hugetlb(ctx, state.vma, dst_start, src_start, len, flags); - if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) - goto out_unlock; - if (!vma_is_shmem(dst_vma) && - uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) - goto out_unlock; - while (state.src_addr < src_start + len) { VM_WARN_ON_ONCE(state.dst_addr >= dst_start + len); @@ -851,8 +879,7 @@ retry: if (unlikely(err == -ENOENT)) { void *kaddr; - up_read(&ctx->map_changing_lock); - uffd_mfill_unlock(state.vma); + mfill_put_vma(&state); VM_WARN_ON_ONCE(!state.folio); kaddr = kmap_local_folio(state.folio, 0); @@ -881,9 +908,7 @@ retry: break; } -out_unlock: - up_read(&ctx->map_changing_lock); - uffd_mfill_unlock(state.vma); + mfill_put_vma(&state); out: if (state.folio) folio_put(state.folio); |
