summaryrefslogtreecommitdiff
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c193
1 files changed, 58 insertions, 135 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 4ededc113358..3c39cbbeebef 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -26,6 +26,7 @@
#include <asm/tlbflush.h>
#include "internal.h"
+#include "swap.h"
struct follow_page_context {
struct dev_pagemap *pgmap;
@@ -96,8 +97,7 @@ retry:
* belongs to this folio.
*/
if (unlikely(page_folio(page) != folio)) {
- if (!put_devmap_managed_folio_refs(folio, refs))
- folio_put_refs(folio, refs);
+ folio_put_refs(folio, refs);
goto retry;
}
@@ -110,14 +110,13 @@ static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
if (is_zero_folio(folio))
return;
node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
- if (folio_test_large(folio))
+ if (folio_has_pincount(folio))
atomic_sub(refs, &folio->_pincount);
else
refs *= GUP_PIN_COUNTING_BIAS;
}
- if (!put_devmap_managed_folio_refs(folio, refs))
- folio_put_refs(folio, refs);
+ folio_put_refs(folio, refs);
}
/**
@@ -166,7 +165,7 @@ int __must_check try_grab_folio(struct folio *folio, int refs,
* Increment the normal page refcount field at least once,
* so that the page really is pinned.
*/
- if (folio_test_large(folio)) {
+ if (folio_has_pincount(folio)) {
folio_ref_add(folio, refs);
atomic_add(refs, &folio->_pincount);
} else {
@@ -225,7 +224,7 @@ void folio_add_pin(struct folio *folio)
* page refcount field at least once, so that the page really is
* pinned.
*/
- if (folio_test_large(folio)) {
+ if (folio_has_pincount(folio)) {
WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1);
folio_ref_inc(folio);
atomic_inc(&folio->_pincount);
@@ -565,8 +564,7 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs,
*/
if (unlikely((flags & FOLL_LONGTERM) &&
!folio_is_longterm_pinnable(folio))) {
- if (!put_devmap_managed_folio_refs(folio, refs))
- folio_put_refs(folio, refs);
+ folio_put_refs(folio, refs);
return NULL;
}
@@ -578,7 +576,7 @@ static struct folio *try_grab_folio_fast(struct page *page, int refs,
* is pinned. That's why the refcount from the earlier
* try_get_folio() is left intact.
*/
- if (folio_test_large(folio))
+ if (folio_has_pincount(folio))
atomic_add(refs, &folio->_pincount);
else
folio_ref_add(folio,
@@ -847,11 +845,6 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
pte_t *ptep, pte;
int ret;
- /* FOLL_GET and FOLL_PIN are mutually exclusive. */
- if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
- (FOLL_PIN | FOLL_GET)))
- return ERR_PTR(-EINVAL);
-
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!ptep)
return no_page_table(vma, flags, address);
@@ -1109,10 +1102,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
/* user gate pages are read-only */
if (gup_flags & FOLL_WRITE)
return -EFAULT;
- if (address > TASK_SIZE)
- pgd = pgd_offset_k(address);
- else
- pgd = pgd_offset_gate(mm, address);
+ pgd = pgd_offset(mm, address);
if (pgd_none(*pgd))
return -EFAULT;
p4d = p4d_offset(pgd, address);
@@ -1435,7 +1425,11 @@ static long __get_user_pages(struct mm_struct *mm,
start = untagged_addr_remote(mm, start);
- VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));
+ VM_WARN_ON_ONCE(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));
+
+ /* FOLL_GET and FOLL_PIN are mutually exclusive. */
+ VM_WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
+ (FOLL_PIN | FOLL_GET));
do {
struct page *page;
@@ -2117,28 +2111,22 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
*/
size_t fault_in_writeable(char __user *uaddr, size_t size)
{
- char __user *start = uaddr, *end;
+ const unsigned long start = (unsigned long)uaddr;
+ const unsigned long end = start + size;
+ unsigned long cur;
if (unlikely(size == 0))
return 0;
if (!user_write_access_begin(uaddr, size))
return size;
- if (!PAGE_ALIGNED(uaddr)) {
- unsafe_put_user(0, uaddr, out);
- uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
- }
- end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
- if (unlikely(end < start))
- end = NULL;
- while (uaddr != end) {
- unsafe_put_user(0, uaddr, out);
- uaddr += PAGE_SIZE;
- }
+ /* Stop once we overflow to 0. */
+ for (cur = start; cur && cur < end; cur = PAGE_ALIGN_DOWN(cur + PAGE_SIZE))
+ unsafe_put_user(0, (char __user *)cur, out);
out:
user_write_access_end();
- if (size > uaddr - start)
- return size - (uaddr - start);
+ if (size > cur - start)
+ return size - (cur - start);
return 0;
}
EXPORT_SYMBOL(fault_in_writeable);
@@ -2192,26 +2180,24 @@ EXPORT_SYMBOL(fault_in_subpage_writeable);
*/
size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
{
- unsigned long start = (unsigned long)uaddr, end;
+ const unsigned long start = (unsigned long)uaddr;
+ const unsigned long end = start + size;
+ unsigned long cur;
struct mm_struct *mm = current->mm;
bool unlocked = false;
if (unlikely(size == 0))
return 0;
- end = PAGE_ALIGN(start + size);
- if (end < start)
- end = 0;
mmap_read_lock(mm);
- do {
- if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked))
+ /* Stop once we overflow to 0. */
+ for (cur = start; cur && cur < end; cur = PAGE_ALIGN_DOWN(cur + PAGE_SIZE))
+ if (fixup_user_fault(mm, cur, FAULT_FLAG_WRITE, &unlocked))
break;
- start = (start + PAGE_SIZE) & PAGE_MASK;
- } while (start != end);
mmap_read_unlock(mm);
- if (size > start - (unsigned long)uaddr)
- return size - (start - (unsigned long)uaddr);
+ if (size > cur - start)
+ return size - (cur - start);
return 0;
}
EXPORT_SYMBOL(fault_in_safe_writeable);
@@ -2226,30 +2212,24 @@ EXPORT_SYMBOL(fault_in_safe_writeable);
*/
size_t fault_in_readable(const char __user *uaddr, size_t size)
{
- const char __user *start = uaddr, *end;
+ const unsigned long start = (unsigned long)uaddr;
+ const unsigned long end = start + size;
+ unsigned long cur;
volatile char c;
if (unlikely(size == 0))
return 0;
if (!user_read_access_begin(uaddr, size))
return size;
- if (!PAGE_ALIGNED(uaddr)) {
- unsafe_get_user(c, uaddr, out);
- uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
- }
- end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
- if (unlikely(end < start))
- end = NULL;
- while (uaddr != end) {
- unsafe_get_user(c, uaddr, out);
- uaddr += PAGE_SIZE;
- }
+ /* Stop once we overflow to 0. */
+ for (cur = start; cur && cur < end; cur = PAGE_ALIGN_DOWN(cur + PAGE_SIZE))
+ unsafe_get_user(c, (const char __user *)cur, out);
out:
user_read_access_end();
(void)c;
- if (size > uaddr - start)
- return size - (uaddr - start);
+ if (size > cur - start)
+ return size - (cur - start);
return 0;
}
EXPORT_SYMBOL(fault_in_readable);
@@ -2257,6 +2237,7 @@ EXPORT_SYMBOL(fault_in_readable);
/**
* get_dump_page() - pin user page in memory while writing it to core dump
* @addr: user address
+ * @locked: a pointer to an int denoting whether the mmap sem is held
*
* Returns struct page pointer of user page pinned for dump,
* to be freed afterwards by put_page().
@@ -2269,13 +2250,12 @@ EXPORT_SYMBOL(fault_in_readable);
* Called without mmap_lock (takes and releases the mmap_lock by itself).
*/
#ifdef CONFIG_ELF_CORE
-struct page *get_dump_page(unsigned long addr)
+struct page *get_dump_page(unsigned long addr, int *locked)
{
struct page *page;
- int locked = 0;
int ret;
- ret = __get_user_pages_locked(current->mm, addr, 1, &page, &locked,
+ ret = __get_user_pages_locked(current->mm, addr, 1, &page, locked,
FOLL_FORCE | FOLL_DUMP | FOLL_GET);
return (ret == 1) ? page : NULL;
}
@@ -2323,13 +2303,13 @@ static void pofs_unpin(struct pages_or_folios *pofs)
/*
* Returns the number of collected folios. Return value is always >= 0.
*/
-static void collect_longterm_unpinnable_folios(
+static unsigned long collect_longterm_unpinnable_folios(
struct list_head *movable_folio_list,
struct pages_or_folios *pofs)
{
+ unsigned long i, collected = 0;
struct folio *prev_folio = NULL;
bool drain_allow = true;
- unsigned long i;
for (i = 0; i < pofs->nr_entries; i++) {
struct folio *folio = pofs_get_folio(pofs, i);
@@ -2341,6 +2321,8 @@ static void collect_longterm_unpinnable_folios(
if (folio_is_longterm_pinnable(folio))
continue;
+ collected++;
+
if (folio_is_device_coherent(folio))
continue;
@@ -2362,6 +2344,8 @@ static void collect_longterm_unpinnable_folios(
NR_ISOLATED_ANON + folio_is_file_lru(folio),
folio_nr_pages(folio));
}
+
+ return collected;
}
/*
@@ -2438,9 +2422,11 @@ static long
check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
{
LIST_HEAD(movable_folio_list);
+ unsigned long collected;
- collect_longterm_unpinnable_folios(&movable_folio_list, pofs);
- if (list_empty(&movable_folio_list))
+ collected = collect_longterm_unpinnable_folios(&movable_folio_list,
+ pofs);
+ if (!collected)
return 0;
return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);
@@ -2760,7 +2746,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
*
* *) ptes can be read atomically by the architecture.
*
- * *) access_ok is sufficient to validate userspace address ranges.
+ * *) valid user addesses are below TASK_MAX_SIZE
*
* The last two assumptions can be relaxed by the addition of helper functions.
*
@@ -3013,11 +2999,6 @@ static int gup_fast_devmap_leaf(unsigned long pfn, unsigned long addr,
break;
}
- if (!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)) {
- gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
- break;
- }
-
folio = try_grab_folio_fast(page, 1, flags);
if (!folio) {
gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);
@@ -3181,46 +3162,6 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
return 1;
}
-static int gup_fast_pgd_leaf(pgd_t orig, pgd_t *pgdp, unsigned long addr,
- unsigned long end, unsigned int flags, struct page **pages,
- int *nr)
-{
- int refs;
- struct page *page;
- struct folio *folio;
-
- if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
- return 0;
-
- BUILD_BUG_ON(pgd_devmap(orig));
-
- page = pgd_page(orig);
- refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr);
-
- folio = try_grab_folio_fast(page, refs, flags);
- if (!folio)
- return 0;
-
- if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
- gup_put_folio(folio, refs, flags);
- return 0;
- }
-
- if (!pgd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
- gup_put_folio(folio, refs, flags);
- return 0;
- }
-
- if (!gup_fast_folio_allowed(folio, flags)) {
- gup_put_folio(folio, refs, flags);
- return 0;
- }
-
- *nr += refs;
- folio_set_referenced(folio);
- return 1;
-}
-
static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
unsigned long end, unsigned int flags, struct page **pages,
int *nr)
@@ -3315,12 +3256,9 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
return;
- if (unlikely(pgd_leaf(pgd))) {
- if (!gup_fast_pgd_leaf(pgd, pgdp, addr, next, flags,
- pages, nr))
- return;
- } else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,
- pages, nr))
+ BUILD_BUG_ON(pgd_leaf(pgd));
+ if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,
+ pages, nr))
return;
} while (pgdp++, addr = next, addr != end);
}
@@ -3367,7 +3305,7 @@ static unsigned long gup_fast(unsigned long start, unsigned long end,
* include/asm-generic/tlb.h for more details.
*
* We do not adopt an rcu_read_lock() here as we also want to block IPIs
- * that come from THPs splitting.
+ * that come from callers of tlb_remove_table_sync_one().
*/
local_irq_save(flags);
gup_fast_pgd_range(start, end, gup_flags, pages, &nr_pinned);
@@ -3414,8 +3352,6 @@ static int gup_fast_fallback(unsigned long start, unsigned long nr_pages,
return -EOVERFLOW;
if (end > TASK_SIZE_MAX)
return -EFAULT;
- if (unlikely(!access_ok((void __user *)start, len)))
- return -EFAULT;
nr_pinned = gup_fast(start, end, gup_flags, pages);
if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY)
@@ -3657,7 +3593,7 @@ long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
{
unsigned int flags, nr_folios, nr_found;
unsigned int i, pgshift = PAGE_SHIFT;
- pgoff_t start_idx, end_idx, next_idx;
+ pgoff_t start_idx, end_idx;
struct folio *folio = NULL;
struct folio_batch fbatch;
struct hstate *h;
@@ -3707,20 +3643,8 @@ long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
folio = NULL;
}
- next_idx = 0;
for (i = 0; i < nr_found; i++) {
- /*
- * As there can be multiple entries for a
- * given folio in the batch returned by
- * filemap_get_folios_contig(), the below
- * check is to ensure that we pin and return a
- * unique set of folios between start and end.
- */
- if (next_idx &&
- next_idx != folio_index(fbatch.folios[i]))
- continue;
-
- folio = page_folio(&fbatch.folios[i]->page);
+ folio = fbatch.folios[i];
if (try_grab_folio(folio, 1, FOLL_PIN)) {
folio_batch_release(&fbatch);
@@ -3732,7 +3656,6 @@ long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end,
*offset = offset_in_folio(folio, start);
folios[nr_folios] = folio;
- next_idx = folio_next_index(folio);
if (++nr_folios == max_folios)
break;
}