summaryrefslogtreecommitdiff
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c135
1 files changed, 62 insertions, 73 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 569a4d82012d..84461d384ae2 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -96,8 +96,7 @@ retry:
* belongs to this folio.
*/
if (unlikely(page_folio(page) != folio)) {
- if (!put_devmap_managed_folio_refs(folio, refs))
- folio_put_refs(folio, refs);
+ folio_put_refs(folio, refs);
goto retry;
}
@@ -110,14 +109,13 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
if (is_zero_folio(folio))
return;
node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
- if (folio_test_large(folio))
+ if (folio_has_pincount(folio))
atomic_sub(refs, &folio->_pincount);
else
refs *= GUP_PIN_COUNTING_BIAS;
}
- if (!put_devmap_managed_folio_refs(folio, refs))
- folio_put_refs(folio, refs);
+ folio_put_refs(folio, refs);
}
/**
@@ -166,7 +164,7 @@ int __must_check try_grab_folio(struct folio *folio, int refs,
* Increment the normal page refcount field at least once,
* so that the page really is pinned.
*/
- if (folio_test_large(folio)) {
+ if (folio_has_pincount(folio)) {
folio_ref_add(folio, refs);
atomic_add(refs, &folio->_pincount);
} else {
@@ -225,7 +223,7 @@ void folio_add_pin(struct folio *folio)
* page refcount field at least once, so that the page really is
* pinned.
*/
- if (folio_test_large(folio)) {
+ if (folio_has_pincount(folio)) {
WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1);
folio_ref_inc(folio);
atomic_inc(&folio->_pincount);
@@ -565,8 +563,7 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs,
*/
if (unlikely((flags & FOLL_LONGTERM) &&
!folio_is_longterm_pinnable(folio))) {
- if (!put_devmap_managed_folio_refs(folio, refs))
- folio_put_refs(folio, refs);
+ folio_put_refs(folio, refs);
return NULL;
}
@@ -578,7 +575,7 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs,
* is pinned. That's why the refcount from the earlier
* try_get_folio() is left intact.
*/
- if (folio_test_large(folio))
+ if (folio_has_pincount(folio))
atomic_add(refs, &folio->_pincount);
else
folio_ref_add(folio,
@@ -596,6 +593,33 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs,
}
#endif /* CONFIG_HAVE_GUP_FAST */
+/* Common code for can_follow_write_* */
+static inline bool can_follow_write_common(struct page *page,
+ struct vm_area_struct *vma, unsigned int flags)
+{
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ return page && PageAnon(page) && PageAnonExclusive(page);
+}
+
static struct page *no_page_table(struct vm_area_struct *vma,
unsigned int flags, unsigned long address)
{
@@ -622,6 +646,18 @@ static struct page *no_page_table(struct vm_area_struct *vma,
}
#ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
+/* FOLL_FORCE can write to even unwritable PUDs in COW mappings. */
+static inline bool can_follow_write_pud(pud_t pud, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
+{
+ /* If the pud is writable, we can write to the page. */
+ if (pud_write(pud))
+ return true;
+
+ return can_follow_write_common(page, vma, flags);
+}
+
static struct page *follow_huge_pud(struct vm_area_struct *vma,
unsigned long addr, pud_t *pudp,
int flags, struct follow_page_context *ctx)
@@ -634,10 +670,11 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
assert_spin_locked(pud_lockptr(mm, pudp));
- if ((flags & FOLL_WRITE) && !pud_write(pud))
+ if (!pud_present(pud))
return NULL;
- if (!pud_present(pud))
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pud(pud, pfn_to_page(pfn), vma, flags))
return NULL;
pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
@@ -686,27 +723,7 @@ static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
if (pmd_write(pmd))
return true;
- /* Maybe FOLL_FORCE is set to override it? */
- if (!(flags & FOLL_FORCE))
- return false;
-
- /* But FOLL_FORCE has no effect on shared mappings */
- if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
- return false;
-
- /* ... or read-only private ones */
- if (!(vma->vm_flags & VM_MAYWRITE))
- return false;
-
- /* ... or already writable ones that just need to take a write fault */
- if (vma->vm_flags & VM_WRITE)
- return false;
-
- /*
- * See can_change_pte_writable(): we broke COW and could map the page
- * writable if we have an exclusive anonymous page ...
- */
- if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ if (!can_follow_write_common(page, vma, flags))
return false;
/* ... and a write-fault isn't required for other reasons. */
@@ -807,27 +824,7 @@ static inline bool can_follow_write_pte(pte_t pte, struct page *page,
if (pte_write(pte))
return true;
- /* Maybe FOLL_FORCE is set to override it? */
- if (!(flags & FOLL_FORCE))
- return false;
-
- /* But FOLL_FORCE has no effect on shared mappings */
- if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
- return false;
-
- /* ... or read-only private ones */
- if (!(vma->vm_flags & VM_MAYWRITE))
- return false;
-
- /* ... or already writable ones that just need to take a write fault */
- if (vma->vm_flags & VM_WRITE)
- return false;
-
- /*
- * See can_change_pte_writable(): we broke COW and could map the page
- * writable if we have an exclusive anonymous page ...
- */
- if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ if (!can_follow_write_common(page, vma, flags))
return false;
/* ... and a write-fault isn't required for other reasons. */
@@ -1283,6 +1280,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
return -EOPNOTSUPP;
+ if ((gup_flags & FOLL_SPLIT_PMD) && is_vm_hugetlb_page(vma))
+ return -EOPNOTSUPP;
+
if (vma_is_secretmem(vma))
return -EFAULT;
@@ -1294,9 +1294,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) {
if (!(gup_flags & FOLL_FORCE))
return -EFAULT;
- /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
- if (is_vm_hugetlb_page(vma))
- return -EFAULT;
/*
* We used to let the write,force case do COW in a
* VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
@@ -2210,8 +2207,8 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
} while (start != end);
mmap_read_unlock(mm);
- if (size > (unsigned long)uaddr - start)
- return size - ((unsigned long)uaddr - start);
+ if (size > start - (unsigned long)uaddr)
+ return size - (start - (unsigned long)uaddr);
return 0;
}
EXPORT_SYMBOL(fault_in_safe_writeable);
@@ -2257,6 +2254,7 @@ EXPORT_SYMBOL(fault_in_readable);
/**
* get_dump_page() - pin user page in memory while writing it to core dump
* @addr: user address
+ * @locked: a pointer to an int denoting whether the mmap sem is held
*
* Returns struct page pointer of user page pinned for dump,
* to be freed afterwards by put_page().
@@ -2269,13 +2267,12 @@ EXPORT_SYMBOL(fault_in_readable);
* Called without mmap_lock (takes and releases the mmap_lock by itself).
*/
#ifdef CONFIG_ELF_CORE
-struct page *get_dump_page(unsigned long addr)
+struct page *get_dump_page(unsigned long addr, int *locked)
{
struct page *page;
- int locked = 0;
int ret;
- ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked,
+ ret = __get_user_pages_locked(current->mm, addr, 1, &page, locked,
FOLL_FORCE | FOLL_DUMP | FOLL_GET);
return (ret == 1) ? page : NULL;
}
@@ -2345,7 +2342,7 @@ static void collect_longterm_unpinnable_folios(
continue;
if (folio_test_hugetlb(folio)) {
- isolate_hugetlb(folio, movable_folio_list);
+ folio_isolate_hugetlb(folio, movable_folio_list);
continue;
}
@@ -2760,7 +2757,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
*
* *) ptes can be read atomically by the architecture.
*
- * *) access_ok is sufficient to validate userspace address ranges.
+ * *) valid user addesses are below TASK_MAX_SIZE
*
* The last two assumptions can be relaxed by the addition of helper functions.
*
@@ -3013,11 +3010,6 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr,
break;
}
- if (!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
- gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
- break;
- }
-
folio = try_grab_folio_fast(page, 1, flags);
if (!folio) {
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
@@ -3354,8 +3346,7 @@ static unsigned long gup_fast(unsigned long start, unsigned long end,
return 0;
if (gup_flags & FOLL_PIN) {
- seq = raw_read_seqcount(&current->mm->write_protect_seq);
- if (seq & 1)
+ if (!raw_seqcount_try_begin(&current->mm->write_protect_seq, seq))
return 0;
}
@@ -3415,8 +3406,6 @@ static int gup_fast_fallback(unsigned long start, unsigned long nr_pages,
return -EOVERFLOW;
if (end > TASK_SIZE_MAX)
return -EFAULT;
- if (unlikely(!access_ok((void __user *)start, len)))
- return -EFAULT;
nr_pinned = gup_fast(start, end, gup_flags, pages);
if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY)