From 3460f6e5c1ed94c2ab7c1ccc032a5bebd88deaa7 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Wed, 30 Jun 2021 18:49:17 -0700 Subject: userfaultfd/shmem: combine shmem_{mcopy_atomic,mfill_zeropage}_pte Patch series "userfaultfd: add minor fault handling for shmem", v6. Overview ======== See the series which added minor faults for hugetlbfs [3] for a detailed overview of minor fault handling in general. This series adds the same support for shmem-backed areas. This series is structured as follows: - Commits 1 and 2 are cleanups. - Commits 3 and 4 implement the new feature (minor fault handling for shmem). - Commit 5 advertises that the feature is now available since at this point it's fully implemented. - Commit 6 is a final cleanup, modifying an existing code path to re-use a new helper we've introduced. - Commits 7, 8, 9, 10 update the userfaultfd selftest to exercise the feature. Use Case ======== In some cases it is useful to have VM memory backed by tmpfs instead of hugetlbfs. So, this feature will be used to support the same VM live migration use case described in my original series. Additionally, Android folks (Lokesh Gidra ) hope to optimize the Android Runtime garbage collector using this feature: "The plan is to use userfaultfd for concurrently compacting the heap. With this feature, the heap can be shared-mapped at another location where the GC-thread(s) could continue the compaction operation without the need to invoke userfault ioctl(UFFDIO_COPY) each time. OTOH, if and when Java threads get faults on the heap, UFFDIO_CONTINUE can be used to resume execution. Furthermore, this feature enables updating references in the 'non-moving' portion of the heap efficiently. Without this feature, uneccessary page copying (ioctl(UFFDIO_COPY)) would be required." [1] https://lore.kernel.org/patchwork/cover/1388144/ [2] https://lore.kernel.org/patchwork/patch/1408161/ [3] https://lore.kernel.org/linux-fsdevel/20210301222728.176417-1-axelrasmussen@google.com/T/#t This patch (of 9): Previously, we did a dance where we had one calling path in userfaultfd.c (mfill_atomic_pte), but then we split it into two in shmem_fs.h (shmem_{mcopy_atomic,mfill_zeropage}_pte), and then rejoined into a single shared function in shmem.c (shmem_mfill_atomic_pte). This is all a bit overly complex. Just call the single combined shmem function directly, allowing us to clean up various branches, boilerplate, etc. While we're touching this function, two other small cleanup changes: - offset is equivalent to pgoff, so we can get rid of offset entirely. - Split two VM_BUG_ON cases into two statements. This means the line number reported when the BUG is hit specifies exactly which condition was true. Link: https://lkml.kernel.org/r/20210503180737.2487560-1-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20210503180737.2487560-3-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Peter Xu Acked-by: Hugh Dickins Cc: Alexander Viro Cc: Andrea Arcangeli Cc: Brian Geffon Cc: "Dr . David Alan Gilbert" Cc: Jerome Glisse Cc: Joe Perches Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: Mike Kravetz Cc: Mike Rapoport Cc: Mina Almasry Cc: Oliver Upton Cc: Shaohua Li Cc: Shuah Khan Cc: Stephen Rothwell Cc: Wang Qing Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 52 ++++++++++++++++------------------------------------ mm/userfaultfd.c | 10 +++------- 2 files changed, 19 insertions(+), 43 deletions(-) (limited to 'mm') diff --git a/mm/shmem.c b/mm/shmem.c index 1155279a6276..9b66a2b2680c 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2352,13 +2352,14 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode return inode; } -static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, - pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr, - unsigned long src_addr, - bool zeropage, - struct page **pagep) +#ifdef CONFIG_USERFAULTFD +int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, + pmd_t *dst_pmd, + struct vm_area_struct *dst_vma, + unsigned long dst_addr, + unsigned long src_addr, + bool zeropage, + struct page **pagep) { struct inode *inode = file_inode(dst_vma->vm_file); struct shmem_inode_info *info = SHMEM_I(inode); @@ -2370,7 +2371,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, struct page *page; pte_t _dst_pte, *dst_pte; int ret; - pgoff_t offset, max_off; + pgoff_t max_off; ret = -ENOMEM; if (!shmem_inode_acct_block(inode, 1)) { @@ -2391,7 +2392,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, if (!page) goto out_unacct_blocks; - if (!zeropage) { /* mcopy_atomic */ + if (!zeropage) { /* COPY */ page_kaddr = kmap_atomic(page); ret = copy_from_user(page_kaddr, (const void __user *)src_addr, @@ -2405,7 +2406,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, /* don't free the page */ return -ENOENT; } - } else { /* mfill_zeropage_atomic */ + } else { /* ZEROPAGE */ clear_highpage(page); } } else { @@ -2413,15 +2414,15 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, *pagep = NULL; } - VM_BUG_ON(PageLocked(page) || PageSwapBacked(page)); + VM_BUG_ON(PageLocked(page)); + VM_BUG_ON(PageSwapBacked(page)); __SetPageLocked(page); __SetPageSwapBacked(page); __SetPageUptodate(page); ret = -EFAULT; - offset = linear_page_index(dst_vma, dst_addr); max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); - if (unlikely(offset >= max_off)) + if (unlikely(pgoff >= max_off)) goto out_release; ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL, @@ -2447,7 +2448,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, ret = -EFAULT; max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); - if (unlikely(offset >= max_off)) + if (unlikely(pgoff >= max_off)) goto out_release_unlock; ret = -EEXIST; @@ -2484,28 +2485,7 @@ out_unacct_blocks: shmem_inode_unacct_blocks(inode, 1); goto out; } - -int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, - pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr, - unsigned long src_addr, - struct page **pagep) -{ - return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, - dst_addr, src_addr, false, pagep); -} - -int shmem_mfill_zeropage_pte(struct mm_struct *dst_mm, - pmd_t *dst_pmd, - struct vm_area_struct *dst_vma, - unsigned long dst_addr) -{ - struct page *page = NULL; - - return shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, - dst_addr, 0, true, &page); -} +#endif /* CONFIG_USERFAULTFD */ #ifdef CONFIG_TMPFS static const struct inode_operations shmem_symlink_inode_operations; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index da5535d2d990..57b228b9b6a4 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -392,13 +392,9 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm, dst_vma, dst_addr); } else { VM_WARN_ON_ONCE(wp_copy); - if (!zeropage) - err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd, - dst_vma, dst_addr, - src_addr, page); - else - err = shmem_mfill_zeropage_pte(dst_mm, dst_pmd, - dst_vma, dst_addr); + err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, + dst_addr, src_addr, zeropage, + page); } return err; -- cgit v1.2.3