diff options
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 293 |
1 files changed, 103 insertions, 190 deletions
@@ -329,6 +329,13 @@ void unpin_user_pages(struct page **pages, unsigned long npages) unsigned long index; /* + * If this WARN_ON() fires, then the system *might* be leaking pages (by + * leaving them pinned), but probably not. More likely, gup/pup returned + * a hard -ERRNO error to the caller, who erroneously passed it here. + */ + if (WARN_ON(IS_ERR_VALUE(npages))) + return; + /* * TODO: this can be optimized for huge pages: if a series of pages is * physically contiguous and part of the same compound page, then a * single operation to the head page should suffice. @@ -381,22 +388,13 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, } /* - * FOLL_FORCE or a forced COW break can write even to unwritable pte's, - * but only after we've gone through a COW cycle and they are dirty. + * FOLL_FORCE can write to even unwritable pte's, but only + * after we've gone through a COW cycle and they are dirty. */ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) { - return pte_write(pte) || ((flags & FOLL_COW) && pte_dirty(pte)); -} - -/* - * A (separate) COW fault might break the page the other way and - * get_user_pages() would return the page from what is now the wrong - * VM. So we need to force a COW break at GUP time even for reads. - */ -static inline bool should_force_cow_break(struct vm_area_struct *vma, unsigned int flags) -{ - return is_cow_mapping(vma->vm_flags) && (flags & (FOLL_GET | FOLL_PIN)); + return pte_write(pte) || + ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); } static struct page *follow_page_pte(struct vm_area_struct *vma, @@ -843,7 +841,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, goto unmap; *page = pte_page(*pte); } - if (unlikely(!try_get_page(*page))) { + if (unlikely(!try_grab_page(*page, gup_flags))) { ret = -ENOMEM; goto unmap; } @@ -859,7 +857,7 @@ unmap: * does not include FOLL_NOWAIT, the mmap_lock may be released. If it * is, *@locked will be set to 0 and -EBUSY returned. */ -static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, +static int faultin_page(struct vm_area_struct *vma, unsigned long address, unsigned int *flags, int *locked) { unsigned int fault_flags = 0; @@ -884,7 +882,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, fault_flags |= FAULT_FLAG_TRIED; } - ret = handle_mm_fault(vma, address, fault_flags); + ret = handle_mm_fault(vma, address, fault_flags, NULL); if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, *flags); @@ -893,13 +891,6 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, BUG(); } - if (tsk) { - if (ret & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; - } - if (ret & VM_FAULT_RETRY) { if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) *locked = 0; @@ -969,7 +960,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) /** * __get_user_pages() - pin user pages in memory - * @tsk: task_struct of target task * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin @@ -1028,7 +1018,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) * instead of __get_user_pages. __get_user_pages should be used only if * you need some special @gup_flags. */ -static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, +static long __get_user_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1075,11 +1065,9 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, goto out; } if (is_vm_hugetlb_page(vma)) { - if (should_force_cow_break(vma, foll_flags)) - foll_flags |= FOLL_WRITE; i = follow_hugetlb_page(mm, vma, pages, vmas, &start, &nr_pages, i, - foll_flags, locked); + gup_flags, locked); if (locked && *locked == 0) { /* * We've got a VM_FAULT_RETRY @@ -1093,10 +1081,6 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, continue; } } - - if (should_force_cow_break(vma, foll_flags)) - foll_flags |= FOLL_WRITE; - retry: /* * If we have a pending SIGKILL, don't keep faulting pages and @@ -1110,8 +1094,7 @@ retry: page = follow_page_mask(vma, start, foll_flags, &ctx); if (!page) { - ret = faultin_page(tsk, vma, start, &foll_flags, - locked); + ret = faultin_page(vma, start, &foll_flags, locked); switch (ret) { case 0: goto retry; @@ -1185,8 +1168,6 @@ static bool vma_permits_fault(struct vm_area_struct *vma, /** * fixup_user_fault() - manually resolve a user page fault - * @tsk: the task_struct to use for page fault accounting, or - * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @address: user address * @fault_flags:flags to pass down to handle_mm_fault() @@ -1214,7 +1195,7 @@ static bool vma_permits_fault(struct vm_area_struct *vma, * This function will not return with an unlocked mmap_lock. So it has not the * same semantics wrt the @mm->mmap_lock as does filemap_fault(). */ -int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, +int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked) { @@ -1238,7 +1219,7 @@ retry: fatal_signal_pending(current)) return -EINTR; - ret = handle_mm_fault(vma, address, fault_flags); + ret = handle_mm_fault(vma, address, fault_flags, NULL); major |= ret & VM_FAULT_MAJOR; if (ret & VM_FAULT_ERROR) { int err = vm_fault_to_errno(ret, 0); @@ -1255,12 +1236,6 @@ retry: goto retry; } - if (tsk) { - if (major) - tsk->maj_flt++; - else - tsk->min_flt++; - } return 0; } EXPORT_SYMBOL_GPL(fixup_user_fault); @@ -1269,8 +1244,7 @@ EXPORT_SYMBOL_GPL(fixup_user_fault); * Please note that this function, unlike __get_user_pages will not * return 0 for nr_pages > 0 without FOLL_NOWAIT */ -static __always_inline long __get_user_pages_locked(struct task_struct *tsk, - struct mm_struct *mm, +static __always_inline long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1288,6 +1262,9 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, BUG_ON(*locked != 1); } + if (flags & FOLL_PIN) + atomic_set(&mm->has_pinned, 1); + /* * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior * is to set FOLL_GET if the caller wants pages[] filled in (but has @@ -1303,7 +1280,7 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, pages_done = 0; lock_dropped = false; for (;;) { - ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages, + ret = __get_user_pages(mm, start, nr_pages, flags, pages, vmas, locked); if (!locked) /* VM_FAULT_RETRY couldn't trigger, bypass */ @@ -1363,7 +1340,7 @@ retry: } *locked = 1; - ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED, + ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED, pages, NULL, locked); if (!*locked) { /* Continue to retry until we succeeded */ @@ -1450,7 +1427,7 @@ long populate_vma_page_range(struct vm_area_struct *vma, * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ - return __get_user_pages(current, mm, start, nr_pages, gup_flags, + return __get_user_pages(mm, start, nr_pages, gup_flags, NULL, NULL, locked); } @@ -1534,7 +1511,7 @@ struct page *get_dump_page(unsigned long addr) struct vm_area_struct *vma; struct page *page; - if (__get_user_pages(current, current->mm, addr, 1, + if (__get_user_pages(current->mm, addr, 1, FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma, NULL) < 1) return NULL; @@ -1543,8 +1520,7 @@ struct page *get_dump_page(unsigned long addr) } #endif /* CONFIG_ELF_CORE */ #else /* CONFIG_MMU */ -static long __get_user_pages_locked(struct task_struct *tsk, - struct mm_struct *mm, unsigned long start, +static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, struct vm_area_struct **vmas, int *locked, unsigned int foll_flags) @@ -1609,59 +1585,7 @@ static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages) } #ifdef CONFIG_CMA -static struct page *new_non_cma_page(struct page *page, unsigned long private) -{ - /* - * We want to make sure we allocate the new page from the same node - * as the source page. - */ - int nid = page_to_nid(page); - /* - * Trying to allocate a page for migration. Ignore allocation - * failure warnings. We don't force __GFP_THISNODE here because - * this node here is the node where we have CMA reservation and - * in some case these nodes will have really less non movable - * allocation memory. - */ - gfp_t gfp_mask = GFP_USER | __GFP_NOWARN; - - if (PageHighMem(page)) - gfp_mask |= __GFP_HIGHMEM; - -#ifdef CONFIG_HUGETLB_PAGE - if (PageHuge(page)) { - struct hstate *h = page_hstate(page); - /* - * We don't want to dequeue from the pool because pool pages will - * mostly be from the CMA region. - */ - return alloc_migrate_huge_page(h, gfp_mask, nid, NULL); - } -#endif - if (PageTransHuge(page)) { - struct page *thp; - /* - * ignore allocation failure warnings - */ - gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN; - - /* - * Remove the movable mask so that we don't allocate from - * CMA area again. - */ - thp_gfpmask &= ~__GFP_MOVABLE; - thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER); - if (!thp) - return NULL; - prep_transhuge_page(thp); - return thp; - } - - return __alloc_pages_node(nid, gfp_mask, 0); -} - -static long check_and_migrate_cma_pages(struct task_struct *tsk, - struct mm_struct *mm, +static long check_and_migrate_cma_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1674,6 +1598,10 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk, bool migrate_allow = true; LIST_HEAD(cma_page_list); long ret = nr_pages; + struct migration_target_control mtc = { + .nid = NUMA_NO_NODE, + .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN, + }; check_again: for (i = 0; i < nr_pages;) { @@ -1704,7 +1632,7 @@ check_again: mod_node_page_state(page_pgdat(head), NR_ISOLATED_ANON + page_is_file_lru(head), - hpage_nr_pages(head)); + thp_nr_pages(head)); } } } @@ -1719,8 +1647,8 @@ check_again: for (i = 0; i < nr_pages; i++) put_page(pages[i]); - if (migrate_pages(&cma_page_list, new_non_cma_page, - NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) { + if (migrate_pages(&cma_page_list, alloc_migration_target, NULL, + (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) { /* * some of the pages failed migration. Do get_user_pages * without migration. @@ -1735,7 +1663,7 @@ check_again: * again migrating any new CMA pages which we failed to isolate * earlier. */ - ret = __get_user_pages_locked(tsk, mm, start, nr_pages, + ret = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, gup_flags); @@ -1749,8 +1677,7 @@ check_again: return ret; } #else -static long check_and_migrate_cma_pages(struct task_struct *tsk, - struct mm_struct *mm, +static long check_and_migrate_cma_pages(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1765,8 +1692,7 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk, * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which * allows us to process the FOLL_LONGTERM flag. */ -static long __gup_longterm_locked(struct task_struct *tsk, - struct mm_struct *mm, +static long __gup_longterm_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, @@ -1791,11 +1717,10 @@ static long __gup_longterm_locked(struct task_struct *tsk, flags = memalloc_nocma_save(); } - rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, + rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas_tmp, NULL, gup_flags); if (gup_flags & FOLL_LONGTERM) { - memalloc_nocma_restore(flags); if (rc < 0) goto out; @@ -1806,32 +1731,50 @@ static long __gup_longterm_locked(struct task_struct *tsk, goto out; } - rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages, + rc = check_and_migrate_cma_pages(mm, start, rc, pages, vmas_tmp, gup_flags); +out: + memalloc_nocma_restore(flags); } -out: if (vmas_tmp != vmas) kfree(vmas_tmp); return rc; } #else /* !CONFIG_FS_DAX && !CONFIG_CMA */ -static __always_inline long __gup_longterm_locked(struct task_struct *tsk, - struct mm_struct *mm, +static __always_inline long __gup_longterm_locked(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, struct vm_area_struct **vmas, unsigned int flags) { - return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, + return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, flags); } #endif /* CONFIG_FS_DAX || CONFIG_CMA */ +static bool is_valid_gup_flags(unsigned int gup_flags) +{ + /* + * FOLL_PIN must only be set internally by the pin_user_pages*() APIs, + * never directly by the caller, so enforce that with an assertion: + */ + if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) + return false; + /* + * FOLL_PIN is a prerequisite to FOLL_LONGTERM. Another way of saying + * that is, FOLL_LONGTERM is a specific case, more restrictive case of + * FOLL_PIN. + */ + if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM)) + return false; + + return true; +} + #ifdef CONFIG_MMU -static long __get_user_pages_remote(struct task_struct *tsk, - struct mm_struct *mm, +static long __get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1850,20 +1793,18 @@ static long __get_user_pages_remote(struct task_struct *tsk, * This will check the vmas (even if our vmas arg is NULL) * and return -ENOTSUPP if DAX isn't allowed in this case: */ - return __gup_longterm_locked(tsk, mm, start, nr_pages, pages, + return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, gup_flags | FOLL_TOUCH | FOLL_REMOTE); } - return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, + return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked, gup_flags | FOLL_TOUCH | FOLL_REMOTE); } /** * get_user_pages_remote() - pin user pages in memory - * @tsk: the task_struct to use for page fault accounting, or - * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin @@ -1922,25 +1863,21 @@ static long __get_user_pages_remote(struct task_struct *tsk, * should use get_user_pages_remote because it cannot pass * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault. */ -long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) { - /* - * FOLL_PIN must only be set internally by the pin_user_pages*() APIs, - * never directly by the caller, so enforce that with an assertion: - */ - if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) + if (!is_valid_gup_flags(gup_flags)) return -EINVAL; - return __get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, + return __get_user_pages_remote(mm, start, nr_pages, gup_flags, pages, vmas, locked); } EXPORT_SYMBOL(get_user_pages_remote); #else /* CONFIG_MMU */ -long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1948,8 +1885,7 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, return 0; } -static long __get_user_pages_remote(struct task_struct *tsk, - struct mm_struct *mm, +static long __get_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -1969,24 +1905,19 @@ static long __get_user_pages_remote(struct task_struct *tsk, * @vmas: array of pointers to vmas corresponding to each page. * Or NULL if the caller does not require them. * - * This is the same as get_user_pages_remote(), just with a - * less-flexible calling convention where we assume that the task - * and mm being operated on are the current task's and don't allow - * passing of a locked parameter. We also obviously don't pass - * FOLL_REMOTE in here. + * This is the same as get_user_pages_remote(), just with a less-flexible + * calling convention where we assume that the mm being operated on belongs to + * the current task, and doesn't allow passing of a locked parameter. We also + * obviously don't pass FOLL_REMOTE in here. */ long get_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { - /* - * FOLL_PIN must only be set internally by the pin_user_pages*() APIs, - * never directly by the caller, so enforce that with an assertion: - */ - if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) + if (!is_valid_gup_flags(gup_flags)) return -EINVAL; - return __gup_longterm_locked(current, current->mm, start, nr_pages, + return __gup_longterm_locked(current->mm, start, nr_pages, pages, vmas, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); @@ -1996,7 +1927,7 @@ EXPORT_SYMBOL(get_user_pages); * * mmap_read_lock(mm); * do_something() - * get_user_pages(tsk, mm, ..., pages, NULL); + * get_user_pages(mm, ..., pages, NULL); * mmap_read_unlock(mm); * * to: @@ -2004,7 +1935,7 @@ EXPORT_SYMBOL(get_user_pages); * int locked = 1; * mmap_read_lock(mm); * do_something() - * get_user_pages_locked(tsk, mm, ..., pages, &locked); + * get_user_pages_locked(mm, ..., pages, &locked); * if (locked) * mmap_read_unlock(mm); * @@ -2042,7 +1973,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) return -EINVAL; - return __get_user_pages_locked(current, current->mm, start, nr_pages, + return __get_user_pages_locked(current->mm, start, nr_pages, pages, NULL, locked, gup_flags | FOLL_TOUCH); } @@ -2052,12 +1983,12 @@ EXPORT_SYMBOL(get_user_pages_locked); * get_user_pages_unlocked() is suitable to replace the form: * * mmap_read_lock(mm); - * get_user_pages(tsk, mm, ..., pages, NULL); + * get_user_pages(mm, ..., pages, NULL); * mmap_read_unlock(mm); * * with: * - * get_user_pages_unlocked(tsk, mm, ..., pages); + * get_user_pages_unlocked(mm, ..., pages); * * It is functionally equivalent to get_user_pages_fast so * get_user_pages_fast should be used instead if specific gup_flags @@ -2080,7 +2011,7 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, return -EINVAL; mmap_read_lock(mm); - ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL, + ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL, &locked, gup_flags | FOLL_TOUCH); if (locked) mmap_read_unlock(mm); @@ -2575,13 +2506,13 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, return 1; } -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, +static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pmd_t *pmdp; - pmdp = pmd_offset(&pud, addr); + pmdp = pmd_offset_lockless(pudp, pud, addr); do { pmd_t pmd = READ_ONCE(*pmdp); @@ -2618,13 +2549,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, return 1; } -static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, +static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; - pudp = pud_offset(&p4d, addr); + pudp = pud_offset_lockless(p4dp, p4d, addr); do { pud_t pud = READ_ONCE(*pudp); @@ -2639,20 +2570,20 @@ static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, if (!gup_huge_pd(__hugepd(pud_val(pud)), addr, PUD_SHIFT, next, flags, pages, nr)) return 0; - } else if (!gup_pmd_range(pud, addr, next, flags, pages, nr)) + } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr)) return 0; } while (pudp++, addr = next, addr != end); return 1; } -static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, +static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; p4d_t *p4dp; - p4dp = p4d_offset(&pgd, addr); + p4dp = p4d_offset_lockless(pgdp, pgd, addr); do { p4d_t p4d = READ_ONCE(*p4dp); @@ -2664,7 +2595,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, P4D_SHIFT, next, flags, pages, nr)) return 0; - } else if (!gup_pud_range(p4d, addr, next, flags, pages, nr)) + } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr)) return 0; } while (p4dp++, addr = next, addr != end); @@ -2692,7 +2623,7 @@ static void gup_pgd_range(unsigned long addr, unsigned long end, if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, PGDIR_SHIFT, next, flags, pages, nr)) return; - } else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr)) + } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr)) return; } while (pgdp++, addr = next, addr != end); } @@ -2725,7 +2656,7 @@ static int __gup_longterm_unlocked(unsigned long start, int nr_pages, */ if (gup_flags & FOLL_LONGTERM) { mmap_read_lock(current->mm); - ret = __gup_longterm_locked(current, current->mm, + ret = __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL, gup_flags); mmap_read_unlock(current->mm); @@ -2750,6 +2681,9 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages, FOLL_FAST_ONLY))) return -EINVAL; + if (gup_flags & FOLL_PIN) + atomic_set(¤t->mm->has_pinned, 1); + if (!(gup_flags & FOLL_FAST_ONLY)) might_lock_read(¤t->mm->mmap_lock); @@ -2764,19 +2698,6 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages, return -EFAULT; /* - * The FAST_GUP case requires FOLL_WRITE even for pure reads, - * because get_user_pages() may need to cause an early COW in - * order to avoid confusing the normal COW routines. So only - * targets that are already writable are safe to do by just - * looking at the page tables. - * - * NOTE! With FOLL_FAST_ONLY we allow read-only gup_fast() here, - * because there is no slow path to fall back on. But you'd - * better be careful about possible COW pages - you'll get _a_ - * COW page, but not necessarily the one you intended to get - * depending on what COW event happens after this. COW may break - * the page copy in a random direction. - * * Disable interrupts. The nested form is used, in order to allow * full, general purpose use of this routine. * @@ -2789,8 +2710,6 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages, */ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) && gup_fast_permitted(start, end)) { unsigned long fast_flags = gup_flags; - if (!(gup_flags & FOLL_FAST_ONLY)) - fast_flags |= FOLL_WRITE; local_irq_save(flags); gup_pgd_range(addr, end, fast_flags, pages, &nr_pinned); @@ -2885,11 +2804,7 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast_only); int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages) { - /* - * FOLL_PIN must only be set internally by the pin_user_pages*() APIs, - * never directly by the caller, so enforce that: - */ - if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) + if (!is_valid_gup_flags(gup_flags)) return -EINVAL; /* @@ -2968,10 +2883,8 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages, EXPORT_SYMBOL_GPL(pin_user_pages_fast_only); /** - * pin_user_pages_remote() - pin pages of a remote process (task != current) + * pin_user_pages_remote() - pin pages of a remote process * - * @tsk: the task_struct to use for page fault accounting, or - * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin @@ -2992,7 +2905,7 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast_only); * FOLL_PIN means that the pages must be released via unpin_user_page(). Please * see Documentation/core-api/pin_user_pages.rst for details. */ -long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +long pin_user_pages_remote(struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) @@ -3002,7 +2915,7 @@ long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, return -EINVAL; gup_flags |= FOLL_PIN; - return __get_user_pages_remote(tsk, mm, start, nr_pages, gup_flags, + return __get_user_pages_remote(mm, start, nr_pages, gup_flags, pages, vmas, locked); } EXPORT_SYMBOL(pin_user_pages_remote); @@ -3034,7 +2947,7 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages, return -EINVAL; gup_flags |= FOLL_PIN; - return __gup_longterm_locked(current, current->mm, start, nr_pages, + return __gup_longterm_locked(current->mm, start, nr_pages, pages, vmas, gup_flags); } EXPORT_SYMBOL(pin_user_pages); @@ -3079,7 +2992,7 @@ long pin_user_pages_locked(unsigned long start, unsigned long nr_pages, return -EINVAL; gup_flags |= FOLL_PIN; - return __get_user_pages_locked(current, current->mm, start, nr_pages, + return __get_user_pages_locked(current->mm, start, nr_pages, pages, NULL, locked, gup_flags | FOLL_TOUCH); } |