diff options
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 135 |
1 files changed, 62 insertions, 73 deletions
@@ -96,8 +96,7 @@ retry: * belongs to this folio. */ if (unlikely(page_folio(page) != folio)) { - if (!put_devmap_managed_folio_refs(folio, refs)) - folio_put_refs(folio, refs); + folio_put_refs(folio, refs); goto retry; } @@ -110,14 +109,13 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) if (is_zero_folio(folio)) return; node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs); - if (folio_test_large(folio)) + if (folio_has_pincount(folio)) atomic_sub(refs, &folio->_pincount); else refs *= GUP_PIN_COUNTING_BIAS; } - if (!put_devmap_managed_folio_refs(folio, refs)) - folio_put_refs(folio, refs); + folio_put_refs(folio, refs); } /** @@ -166,7 +164,7 @@ int __must_check try_grab_folio(struct folio *folio, int refs, * Increment the normal page refcount field at least once, * so that the page really is pinned. */ - if (folio_test_large(folio)) { + if (folio_has_pincount(folio)) { folio_ref_add(folio, refs); atomic_add(refs, &folio->_pincount); } else { @@ -225,7 +223,7 @@ void folio_add_pin(struct folio *folio) * page refcount field at least once, so that the page really is * pinned. */ - if (folio_test_large(folio)) { + if (folio_has_pincount(folio)) { WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1); folio_ref_inc(folio); atomic_inc(&folio->_pincount); @@ -565,8 +563,7 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs, */ if (unlikely((flags & FOLL_LONGTERM) && !folio_is_longterm_pinnable(folio))) { - if (!put_devmap_managed_folio_refs(folio, refs)) - folio_put_refs(folio, refs); + folio_put_refs(folio, refs); return NULL; } @@ -578,7 +575,7 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs, * is pinned. That's why the refcount from the earlier * try_get_folio() is left intact. */ - if (folio_test_large(folio)) + if (folio_has_pincount(folio)) atomic_add(refs, &folio->_pincount); else folio_ref_add(folio, @@ -596,6 +593,33 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs, } #endif /* CONFIG_HAVE_GUP_FAST */ +/* Common code for can_follow_write_* */ +static inline bool can_follow_write_common(struct page *page, + struct vm_area_struct *vma, unsigned int flags) +{ + /* Maybe FOLL_FORCE is set to override it? */ + if (!(flags & FOLL_FORCE)) + return false; + + /* But FOLL_FORCE has no effect on shared mappings */ + if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) + return false; + + /* ... or read-only private ones */ + if (!(vma->vm_flags & VM_MAYWRITE)) + return false; + + /* ... or already writable ones that just need to take a write fault */ + if (vma->vm_flags & VM_WRITE) + return false; + + /* + * See can_change_pte_writable(): we broke COW and could map the page + * writable if we have an exclusive anonymous page ... + */ + return page && PageAnon(page) && PageAnonExclusive(page); +} + static struct page *no_page_table(struct vm_area_struct *vma, unsigned int flags, unsigned long address) { @@ -622,6 +646,18 @@ static struct page *no_page_table(struct vm_area_struct *vma, } #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES +/* FOLL_FORCE can write to even unwritable PUDs in COW mappings. */ +static inline bool can_follow_write_pud(pud_t pud, struct page *page, + struct vm_area_struct *vma, + unsigned int flags) +{ + /* If the pud is writable, we can write to the page. */ + if (pud_write(pud)) + return true; + + return can_follow_write_common(page, vma, flags); +} + static struct page *follow_huge_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp, int flags, struct follow_page_context *ctx) @@ -634,10 +670,11 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma, assert_spin_locked(pud_lockptr(mm, pudp)); - if ((flags & FOLL_WRITE) && !pud_write(pud)) + if (!pud_present(pud)) return NULL; - if (!pud_present(pud)) + if ((flags & FOLL_WRITE) && + !can_follow_write_pud(pud, pfn_to_page(pfn), vma, flags)) return NULL; pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT; @@ -686,27 +723,7 @@ static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page, if (pmd_write(pmd)) return true; - /* Maybe FOLL_FORCE is set to override it? */ - if (!(flags & FOLL_FORCE)) - return false; - - /* But FOLL_FORCE has no effect on shared mappings */ - if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) - return false; - - /* ... or read-only private ones */ - if (!(vma->vm_flags & VM_MAYWRITE)) - return false; - - /* ... or already writable ones that just need to take a write fault */ - if (vma->vm_flags & VM_WRITE) - return false; - - /* - * See can_change_pte_writable(): we broke COW and could map the page - * writable if we have an exclusive anonymous page ... - */ - if (!page || !PageAnon(page) || !PageAnonExclusive(page)) + if (!can_follow_write_common(page, vma, flags)) return false; /* ... and a write-fault isn't required for other reasons. */ @@ -807,27 +824,7 @@ static inline bool can_follow_write_pte(pte_t pte, struct page *page, if (pte_write(pte)) return true; - /* Maybe FOLL_FORCE is set to override it? */ - if (!(flags & FOLL_FORCE)) - return false; - - /* But FOLL_FORCE has no effect on shared mappings */ - if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED)) - return false; - - /* ... or read-only private ones */ - if (!(vma->vm_flags & VM_MAYWRITE)) - return false; - - /* ... or already writable ones that just need to take a write fault */ - if (vma->vm_flags & VM_WRITE) - return false; - - /* - * See can_change_pte_writable(): we broke COW and could map the page - * writable if we have an exclusive anonymous page ... - */ - if (!page || !PageAnon(page) || !PageAnonExclusive(page)) + if (!can_follow_write_common(page, vma, flags)) return false; /* ... and a write-fault isn't required for other reasons. */ @@ -1283,6 +1280,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma)) return -EOPNOTSUPP; + if ((gup_flags & FOLL_SPLIT_PMD) && is_vm_hugetlb_page(vma)) + return -EOPNOTSUPP; + if (vma_is_secretmem(vma)) return -EFAULT; @@ -1294,9 +1294,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) { if (!(gup_flags & FOLL_FORCE)) return -EFAULT; - /* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */ - if (is_vm_hugetlb_page(vma)) - return -EFAULT; /* * We used to let the write,force case do COW in a * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could @@ -2210,8 +2207,8 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size) } while (start != end); mmap_read_unlock(mm); - if (size > (unsigned long)uaddr - start) - return size - ((unsigned long)uaddr - start); + if (size > start - (unsigned long)uaddr) + return size - (start - (unsigned long)uaddr); return 0; } EXPORT_SYMBOL(fault_in_safe_writeable); @@ -2257,6 +2254,7 @@ EXPORT_SYMBOL(fault_in_readable); /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address + * @locked: a pointer to an int denoting whether the mmap sem is held * * Returns struct page pointer of user page pinned for dump, * to be freed afterwards by put_page(). @@ -2269,13 +2267,12 @@ EXPORT_SYMBOL(fault_in_readable); * Called without mmap_lock (takes and releases the mmap_lock by itself). */ #ifdef CONFIG_ELF_CORE -struct page *get_dump_page(unsigned long addr) +struct page *get_dump_page(unsigned long addr, int *locked) { struct page *page; - int locked = 0; int ret; - ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked, + ret = __get_user_pages_locked(current->mm, addr, 1, &page, locked, FOLL_FORCE | FOLL_DUMP | FOLL_GET); return (ret == 1) ? page : NULL; } @@ -2345,7 +2342,7 @@ static void collect_longterm_unpinnable_folios( continue; if (folio_test_hugetlb(folio)) { - isolate_hugetlb(folio, movable_folio_list); + folio_isolate_hugetlb(folio, movable_folio_list); continue; } @@ -2760,7 +2757,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * * *) ptes can be read atomically by the architecture. * - * *) access_ok is sufficient to validate userspace address ranges. + * *) valid user addesses are below TASK_MAX_SIZE * * The last two assumptions can be relaxed by the addition of helper functions. * @@ -3013,11 +3010,6 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr, break; } - if (!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)) { - gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); - break; - } - folio = try_grab_folio_fast(page, 1, flags); if (!folio) { gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages); @@ -3354,8 +3346,7 @@ static unsigned long gup_fast(unsigned long start, unsigned long end, return 0; if (gup_flags & FOLL_PIN) { - seq = raw_read_seqcount(¤t->mm->write_protect_seq); - if (seq & 1) + if (!raw_seqcount_try_begin(¤t->mm->write_protect_seq, seq)) return 0; } @@ -3415,8 +3406,6 @@ static int gup_fast_fallback(unsigned long start, unsigned long nr_pages, return -EOVERFLOW; if (end > TASK_SIZE_MAX) return -EFAULT; - if (unlikely(!access_ok((void __user *)start, len))) - return -EFAULT; nr_pinned = gup_fast(start, end, gup_flags, pages); if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY) |