summaryrefslogtreecommitdiff
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c293
1 files changed, 103 insertions, 190 deletions
diff --git a/mm/gup.c b/mm/gup.c
index d8a33dd1430d..ad617e7f22f5 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -329,6 +329,13 @@ void unpin_user_pages(struct page **pages, unsigned long npages)
unsigned long index;
/*
+ * If this WARN_ON() fires, then the system *might* be leaking pages (by
+ * leaving them pinned), but probably not. More likely, gup/pup returned
+ * a hard -ERRNO error to the caller, who erroneously passed it here.
+ */
+ if (WARN_ON(IS_ERR_VALUE(npages)))
+ return;
+ /*
* TODO: this can be optimized for huge pages: if a series of pages is
* physically contiguous and part of the same compound page, then a
* single operation to the head page should suffice.
@@ -381,22 +388,13 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
}
/*
- * FOLL_FORCE or a forced COW break can write even to unwritable pte's,
- * but only after we've gone through a COW cycle and they are dirty.
+ * FOLL_FORCE can write to even unwritable pte's, but only
+ * after we've gone through a COW cycle and they are dirty.
*/
static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
{
- return pte_write(pte) || ((flags & FOLL_COW) && pte_dirty(pte));
-}
-
-/*
- * A (separate) COW fault might break the page the other way and
- * get_user_pages() would return the page from what is now the wrong
- * VM. So we need to force a COW break at GUP time even for reads.
- */
-static inline bool should_force_cow_break(struct vm_area_struct *vma, unsigned int flags)
-{
- return is_cow_mapping(vma->vm_flags) && (flags & (FOLL_GET | FOLL_PIN));
+ return pte_write(pte) ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
}
static struct page *follow_page_pte(struct vm_area_struct *vma,
@@ -843,7 +841,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
goto unmap;
*page = pte_page(*pte);
}
- if (unlikely(!try_get_page(*page))) {
+ if (unlikely(!try_grab_page(*page, gup_flags))) {
ret = -ENOMEM;
goto unmap;
}
@@ -859,7 +857,7 @@ unmap:
* does not include FOLL_NOWAIT, the mmap_lock may be released. If it
* is, *@locked will be set to 0 and -EBUSY returned.
*/
-static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
+static int faultin_page(struct vm_area_struct *vma,
unsigned long address, unsigned int *flags, int *locked)
{
unsigned int fault_flags = 0;
@@ -884,7 +882,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
fault_flags |= FAULT_FLAG_TRIED;
}
- ret = handle_mm_fault(vma, address, fault_flags);
+ ret = handle_mm_fault(vma, address, fault_flags, NULL);
if (ret & VM_FAULT_ERROR) {
int err = vm_fault_to_errno(ret, *flags);
@@ -893,13 +891,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
BUG();
}
- if (tsk) {
- if (ret & VM_FAULT_MAJOR)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
- }
-
if (ret & VM_FAULT_RETRY) {
if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
*locked = 0;
@@ -969,7 +960,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
/**
* __get_user_pages() - pin user pages in memory
- * @tsk: task_struct of target task
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
@@ -1028,7 +1018,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* instead of __get_user_pages. __get_user_pages should be used only if
* you need some special @gup_flags.
*/
-static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+static long __get_user_pages(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -1075,11 +1065,9 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
goto out;
}
if (is_vm_hugetlb_page(vma)) {
- if (should_force_cow_break(vma, foll_flags))
- foll_flags |= FOLL_WRITE;
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
- foll_flags, locked);
+ gup_flags, locked);
if (locked && *locked == 0) {
/*
* We've got a VM_FAULT_RETRY
@@ -1093,10 +1081,6 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
continue;
}
}
-
- if (should_force_cow_break(vma, foll_flags))
- foll_flags |= FOLL_WRITE;
-
retry:
/*
* If we have a pending SIGKILL, don't keep faulting pages and
@@ -1110,8 +1094,7 @@ retry:
page = follow_page_mask(vma, start, foll_flags, &ctx);
if (!page) {
- ret = faultin_page(tsk, vma, start, &foll_flags,
- locked);
+ ret = faultin_page(vma, start, &foll_flags, locked);
switch (ret) {
case 0:
goto retry;
@@ -1185,8 +1168,6 @@ static bool vma_permits_fault(struct vm_area_struct *vma,
/**
* fixup_user_fault() - manually resolve a user page fault
- * @tsk: the task_struct to use for page fault accounting, or
- * NULL if faults are not to be recorded.
* @mm: mm_struct of target mm
* @address: user address
* @fault_flags:flags to pass down to handle_mm_fault()
@@ -1214,7 +1195,7 @@ static bool vma_permits_fault(struct vm_area_struct *vma,
* This function will not return with an unlocked mmap_lock. So it has not the
* same semantics wrt the @mm->mmap_lock as does filemap_fault().
*/
-int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
+int fixup_user_fault(struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
bool *unlocked)
{
@@ -1238,7 +1219,7 @@ retry:
fatal_signal_pending(current))
return -EINTR;
- ret = handle_mm_fault(vma, address, fault_flags);
+ ret = handle_mm_fault(vma, address, fault_flags, NULL);
major |= ret & VM_FAULT_MAJOR;
if (ret & VM_FAULT_ERROR) {
int err = vm_fault_to_errno(ret, 0);
@@ -1255,12 +1236,6 @@ retry:
goto retry;
}
- if (tsk) {
- if (major)
- tsk->maj_flt++;
- else
- tsk->min_flt++;
- }
return 0;
}
EXPORT_SYMBOL_GPL(fixup_user_fault);
@@ -1269,8 +1244,7 @@ EXPORT_SYMBOL_GPL(fixup_user_fault);
* Please note that this function, unlike __get_user_pages will not
* return 0 for nr_pages > 0 without FOLL_NOWAIT
*/
-static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
- struct mm_struct *mm,
+static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
@@ -1288,6 +1262,9 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
BUG_ON(*locked != 1);
}
+ if (flags & FOLL_PIN)
+ atomic_set(&mm->has_pinned, 1);
+
/*
* FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior
* is to set FOLL_GET if the caller wants pages[] filled in (but has
@@ -1303,7 +1280,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
pages_done = 0;
lock_dropped = false;
for (;;) {
- ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages,
+ ret = __get_user_pages(mm, start, nr_pages, flags, pages,
vmas, locked);
if (!locked)
/* VM_FAULT_RETRY couldn't trigger, bypass */
@@ -1363,7 +1340,7 @@ retry:
}
*locked = 1;
- ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED,
+ ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED,
pages, NULL, locked);
if (!*locked) {
/* Continue to retry until we succeeded */
@@ -1450,7 +1427,7 @@ long populate_vma_page_range(struct vm_area_struct *vma,
* We made sure addr is within a VMA, so the following will
* not result in a stack expansion that recurses back here.
*/
- return __get_user_pages(current, mm, start, nr_pages, gup_flags,
+ return __get_user_pages(mm, start, nr_pages, gup_flags,
NULL, NULL, locked);
}
@@ -1534,7 +1511,7 @@ struct page *get_dump_page(unsigned long addr)
struct vm_area_struct *vma;
struct page *page;
- if (__get_user_pages(current, current->mm, addr, 1,
+ if (__get_user_pages(current->mm, addr, 1,
FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma,
NULL) < 1)
return NULL;
@@ -1543,8 +1520,7 @@ struct page *get_dump_page(unsigned long addr)
}
#endif /* CONFIG_ELF_CORE */
#else /* CONFIG_MMU */
-static long __get_user_pages_locked(struct task_struct *tsk,
- struct mm_struct *mm, unsigned long start,
+static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
unsigned long nr_pages, struct page **pages,
struct vm_area_struct **vmas, int *locked,
unsigned int foll_flags)
@@ -1609,59 +1585,7 @@ static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
}
#ifdef CONFIG_CMA
-static struct page *new_non_cma_page(struct page *page, unsigned long private)
-{
- /*
- * We want to make sure we allocate the new page from the same node
- * as the source page.
- */
- int nid = page_to_nid(page);
- /*
- * Trying to allocate a page for migration. Ignore allocation
- * failure warnings. We don't force __GFP_THISNODE here because
- * this node here is the node where we have CMA reservation and
- * in some case these nodes will have really less non movable
- * allocation memory.
- */
- gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
-
- if (PageHighMem(page))
- gfp_mask |= __GFP_HIGHMEM;
-
-#ifdef CONFIG_HUGETLB_PAGE
- if (PageHuge(page)) {
- struct hstate *h = page_hstate(page);
- /*
- * We don't want to dequeue from the pool because pool pages will
- * mostly be from the CMA region.
- */
- return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
- }
-#endif
- if (PageTransHuge(page)) {
- struct page *thp;
- /*
- * ignore allocation failure warnings
- */
- gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
-
- /*
- * Remove the movable mask so that we don't allocate from
- * CMA area again.
- */
- thp_gfpmask &= ~__GFP_MOVABLE;
- thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
- if (!thp)
- return NULL;
- prep_transhuge_page(thp);
- return thp;
- }
-
- return __alloc_pages_node(nid, gfp_mask, 0);
-}
-
-static long check_and_migrate_cma_pages(struct task_struct *tsk,
- struct mm_struct *mm,
+static long check_and_migrate_cma_pages(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
@@ -1674,6 +1598,10 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk,
bool migrate_allow = true;
LIST_HEAD(cma_page_list);
long ret = nr_pages;
+ struct migration_target_control mtc = {
+ .nid = NUMA_NO_NODE,
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN,
+ };
check_again:
for (i = 0; i < nr_pages;) {
@@ -1704,7 +1632,7 @@ check_again:
mod_node_page_state(page_pgdat(head),
NR_ISOLATED_ANON +
page_is_file_lru(head),
- hpage_nr_pages(head));
+ thp_nr_pages(head));
}
}
}
@@ -1719,8 +1647,8 @@ check_again:
for (i = 0; i < nr_pages; i++)
put_page(pages[i]);
- if (migrate_pages(&cma_page_list, new_non_cma_page,
- NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+ if (migrate_pages(&cma_page_list, alloc_migration_target, NULL,
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
/*
* some of the pages failed migration. Do get_user_pages
* without migration.
@@ -1735,7 +1663,7 @@ check_again:
* again migrating any new CMA pages which we failed to isolate
* earlier.
*/
- ret = __get_user_pages_locked(tsk, mm, start, nr_pages,
+ ret = __get_user_pages_locked(mm, start, nr_pages,
pages, vmas, NULL,
gup_flags);
@@ -1749,8 +1677,7 @@ check_again:
return ret;
}
#else
-static long check_and_migrate_cma_pages(struct task_struct *tsk,
- struct mm_struct *mm,
+static long check_and_migrate_cma_pages(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
@@ -1765,8 +1692,7 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk,
* __gup_longterm_locked() is a wrapper for __get_user_pages_locked which
* allows us to process the FOLL_LONGTERM flag.
*/
-static long __gup_longterm_locked(struct task_struct *tsk,
- struct mm_struct *mm,
+static long __gup_longterm_locked(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
@@ -1791,11 +1717,10 @@ static long __gup_longterm_locked(struct task_struct *tsk,
flags = memalloc_nocma_save();
}
- rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages,
+ rc = __get_user_pages_locked(mm, start, nr_pages, pages,
vmas_tmp, NULL, gup_flags);
if (gup_flags & FOLL_LONGTERM) {
- memalloc_nocma_restore(flags);
if (rc < 0)
goto out;
@@ -1806,32 +1731,50 @@ static long __gup_longterm_locked(struct task_struct *tsk,
goto out;
}
- rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages,
+ rc = check_and_migrate_cma_pages(mm, start, rc, pages,
vmas_tmp, gup_flags);
+out:
+ memalloc_nocma_restore(flags);
}
-out:
if (vmas_tmp != vmas)
kfree(vmas_tmp);
return rc;
}
#else /* !CONFIG_FS_DAX && !CONFIG_CMA */
-static __always_inline long __gup_longterm_locked(struct task_struct *tsk,
- struct mm_struct *mm,
+static __always_inline long __gup_longterm_locked(struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
struct page **pages,
struct vm_area_struct **vmas,
unsigned int flags)
{
- return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
+ return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
NULL, flags);
}
#endif /* CONFIG_FS_DAX || CONFIG_CMA */
+static bool is_valid_gup_flags(unsigned int gup_flags)
+{
+ /*
+ * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
+ * never directly by the caller, so enforce that with an assertion:
+ */
+ if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
+ return false;
+ /*
+ * FOLL_PIN is a prerequisite to FOLL_LONGTERM. Another way of saying
+ * that is, FOLL_LONGTERM is a specific case, more restrictive case of
+ * FOLL_PIN.
+ */
+ if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
+ return false;
+
+ return true;
+}
+
#ifdef CONFIG_MMU
-static long __get_user_pages_remote(struct task_struct *tsk,
- struct mm_struct *mm,
+static long __get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -1850,20 +1793,18 @@ static long __get_user_pages_remote(struct task_struct *tsk,
* This will check the vmas (even if our vmas arg is NULL)
* and return -ENOTSUPP if DAX isn't allowed in this case:
*/
- return __gup_longterm_locked(tsk, mm, start, nr_pages, pages,
+ return __gup_longterm_locked(mm, start, nr_pages, pages,
vmas, gup_flags | FOLL_TOUCH |
FOLL_REMOTE);
}
- return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
+ return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
locked,
gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
/**
* get_user_pages_remote() - pin user pages in memory
- * @tsk: the task_struct to use for page fault accounting, or
- * NULL if faults are not to be recorded.
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
@@ -1922,25 +1863,21 @@ static long __get_user_pages_remote(struct task_struct *tsk,
* should use get_user_pages_remote because it cannot pass
* FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
*/
-long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
{
- /*
- * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
- * never directly by the caller, so enforce that with an assertion:
- */
- if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
+ if (!is_valid_gup_flags(gup_flags))
return -EINVAL;
- return __get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags,
+ return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
pages, vmas, locked);
}
EXPORT_SYMBOL(get_user_pages_remote);
#else /* CONFIG_MMU */
-long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -1948,8 +1885,7 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
return 0;
}
-static long __get_user_pages_remote(struct task_struct *tsk,
- struct mm_struct *mm,
+static long __get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -1969,24 +1905,19 @@ static long __get_user_pages_remote(struct task_struct *tsk,
* @vmas: array of pointers to vmas corresponding to each page.
* Or NULL if the caller does not require them.
*
- * This is the same as get_user_pages_remote(), just with a
- * less-flexible calling convention where we assume that the task
- * and mm being operated on are the current task's and don't allow
- * passing of a locked parameter. We also obviously don't pass
- * FOLL_REMOTE in here.
+ * This is the same as get_user_pages_remote(), just with a less-flexible
+ * calling convention where we assume that the mm being operated on belongs to
+ * the current task, and doesn't allow passing of a locked parameter. We also
+ * obviously don't pass FOLL_REMOTE in here.
*/
long get_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas)
{
- /*
- * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
- * never directly by the caller, so enforce that with an assertion:
- */
- if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
+ if (!is_valid_gup_flags(gup_flags))
return -EINVAL;
- return __gup_longterm_locked(current, current->mm, start, nr_pages,
+ return __gup_longterm_locked(current->mm, start, nr_pages,
pages, vmas, gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages);
@@ -1996,7 +1927,7 @@ EXPORT_SYMBOL(get_user_pages);
*
* mmap_read_lock(mm);
* do_something()
- * get_user_pages(tsk, mm, ..., pages, NULL);
+ * get_user_pages(mm, ..., pages, NULL);
* mmap_read_unlock(mm);
*
* to:
@@ -2004,7 +1935,7 @@ EXPORT_SYMBOL(get_user_pages);
* int locked = 1;
* mmap_read_lock(mm);
* do_something()
- * get_user_pages_locked(tsk, mm, ..., pages, &locked);
+ * get_user_pages_locked(mm, ..., pages, &locked);
* if (locked)
* mmap_read_unlock(mm);
*
@@ -2042,7 +1973,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
return -EINVAL;
- return __get_user_pages_locked(current, current->mm, start, nr_pages,
+ return __get_user_pages_locked(current->mm, start, nr_pages,
pages, NULL, locked,
gup_flags | FOLL_TOUCH);
}
@@ -2052,12 +1983,12 @@ EXPORT_SYMBOL(get_user_pages_locked);
* get_user_pages_unlocked() is suitable to replace the form:
*
* mmap_read_lock(mm);
- * get_user_pages(tsk, mm, ..., pages, NULL);
+ * get_user_pages(mm, ..., pages, NULL);
* mmap_read_unlock(mm);
*
* with:
*
- * get_user_pages_unlocked(tsk, mm, ..., pages);
+ * get_user_pages_unlocked(mm, ..., pages);
*
* It is functionally equivalent to get_user_pages_fast so
* get_user_pages_fast should be used instead if specific gup_flags
@@ -2080,7 +2011,7 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
return -EINVAL;
mmap_read_lock(mm);
- ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
+ ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
&locked, gup_flags | FOLL_TOUCH);
if (locked)
mmap_read_unlock(mm);
@@ -2575,13 +2506,13 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
return 1;
}
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
unsigned int flags, struct page **pages, int *nr)
{
unsigned long next;
pmd_t *pmdp;
- pmdp = pmd_offset(&pud, addr);
+ pmdp = pmd_offset_lockless(pudp, pud, addr);
do {
pmd_t pmd = READ_ONCE(*pmdp);
@@ -2618,13 +2549,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
return 1;
}
-static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
+static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
unsigned int flags, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;
- pudp = pud_offset(&p4d, addr);
+ pudp = pud_offset_lockless(p4dp, p4d, addr);
do {
pud_t pud = READ_ONCE(*pudp);
@@ -2639,20 +2570,20 @@ static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
PUD_SHIFT, next, flags, pages, nr))
return 0;
- } else if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
+ } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
return 0;
} while (pudp++, addr = next, addr != end);
return 1;
}
-static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
+static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
unsigned int flags, struct page **pages, int *nr)
{
unsigned long next;
p4d_t *p4dp;
- p4dp = p4d_offset(&pgd, addr);
+ p4dp = p4d_offset_lockless(pgdp, pgd, addr);
do {
p4d_t p4d = READ_ONCE(*p4dp);
@@ -2664,7 +2595,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
P4D_SHIFT, next, flags, pages, nr))
return 0;
- } else if (!gup_pud_range(p4d, addr, next, flags, pages, nr))
+ } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
return 0;
} while (p4dp++, addr = next, addr != end);
@@ -2692,7 +2623,7 @@ static void gup_pgd_range(unsigned long addr, unsigned long end,
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
PGDIR_SHIFT, next, flags, pages, nr))
return;
- } else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr))
+ } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
return;
} while (pgdp++, addr = next, addr != end);
}
@@ -2725,7 +2656,7 @@ static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
*/
if (gup_flags & FOLL_LONGTERM) {
mmap_read_lock(current->mm);
- ret = __gup_longterm_locked(current, current->mm,
+ ret = __gup_longterm_locked(current->mm,
start, nr_pages,
pages, NULL, gup_flags);
mmap_read_unlock(current->mm);
@@ -2750,6 +2681,9 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
FOLL_FAST_ONLY)))
return -EINVAL;
+ if (gup_flags & FOLL_PIN)
+ atomic_set(&current->mm->has_pinned, 1);
+
if (!(gup_flags & FOLL_FAST_ONLY))
might_lock_read(&current->mm->mmap_lock);
@@ -2764,19 +2698,6 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
return -EFAULT;
/*
- * The FAST_GUP case requires FOLL_WRITE even for pure reads,
- * because get_user_pages() may need to cause an early COW in
- * order to avoid confusing the normal COW routines. So only
- * targets that are already writable are safe to do by just
- * looking at the page tables.
- *
- * NOTE! With FOLL_FAST_ONLY we allow read-only gup_fast() here,
- * because there is no slow path to fall back on. But you'd
- * better be careful about possible COW pages - you'll get _a_
- * COW page, but not necessarily the one you intended to get
- * depending on what COW event happens after this. COW may break
- * the page copy in a random direction.
- *
* Disable interrupts. The nested form is used, in order to allow
* full, general purpose use of this routine.
*
@@ -2789,8 +2710,6 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
*/
if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) && gup_fast_permitted(start, end)) {
unsigned long fast_flags = gup_flags;
- if (!(gup_flags & FOLL_FAST_ONLY))
- fast_flags |= FOLL_WRITE;
local_irq_save(flags);
gup_pgd_range(addr, end, fast_flags, pages, &nr_pinned);
@@ -2885,11 +2804,7 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages)
{
- /*
- * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
- * never directly by the caller, so enforce that:
- */
- if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
+ if (!is_valid_gup_flags(gup_flags))
return -EINVAL;
/*
@@ -2968,10 +2883,8 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages,
EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
/**
- * pin_user_pages_remote() - pin pages of a remote process (task != current)
+ * pin_user_pages_remote() - pin pages of a remote process
*
- * @tsk: the task_struct to use for page fault accounting, or
- * NULL if faults are not to be recorded.
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
@@ -2992,7 +2905,7 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for details.
*/
-long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+long pin_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked)
@@ -3002,7 +2915,7 @@ long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
return -EINVAL;
gup_flags |= FOLL_PIN;
- return __get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags,
+ return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
pages, vmas, locked);
}
EXPORT_SYMBOL(pin_user_pages_remote);
@@ -3034,7 +2947,7 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
return -EINVAL;
gup_flags |= FOLL_PIN;
- return __gup_longterm_locked(current, current->mm, start, nr_pages,
+ return __gup_longterm_locked(current->mm, start, nr_pages,
pages, vmas, gup_flags);
}
EXPORT_SYMBOL(pin_user_pages);
@@ -3079,7 +2992,7 @@ long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
return -EINVAL;
gup_flags |= FOLL_PIN;
- return __get_user_pages_locked(current, current->mm, start, nr_pages,
+ return __get_user_pages_locked(current->mm, start, nr_pages,
pages, NULL, locked,
gup_flags | FOLL_TOUCH);
}