summaryrefslogtreecommitdiff
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c325
1 files changed, 180 insertions, 145 deletions
diff --git a/mm/gup.c b/mm/gup.c
index ef7d2da9f03f..3ded6a5f26b2 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -87,11 +87,12 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
int orig_refs = refs;
/*
- * Can't do FOLL_LONGTERM + FOLL_PIN with CMA in the gup fast
- * path, so fail and let the caller fall back to the slow path.
+ * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
+ * right zone, so fail and let the caller fall back to the slow
+ * path.
*/
- if (unlikely(flags & FOLL_LONGTERM) &&
- is_migrate_cma_page(page))
+ if (unlikely((flags & FOLL_LONGTERM) &&
+ !is_pinnable_page(page)))
return NULL;
/*
@@ -213,6 +214,58 @@ void unpin_user_page(struct page *page)
}
EXPORT_SYMBOL(unpin_user_page);
+static inline void compound_range_next(unsigned long i, unsigned long npages,
+ struct page **list, struct page **head,
+ unsigned int *ntails)
+{
+ struct page *next, *page;
+ unsigned int nr = 1;
+
+ if (i >= npages)
+ return;
+
+ next = *list + i;
+ page = compound_head(next);
+ if (PageCompound(page) && compound_order(page) >= 1)
+ nr = min_t(unsigned int,
+ page + compound_nr(page) - next, npages - i);
+
+ *head = page;
+ *ntails = nr;
+}
+
+#define for_each_compound_range(__i, __list, __npages, __head, __ntails) \
+ for (__i = 0, \
+ compound_range_next(__i, __npages, __list, &(__head), &(__ntails)); \
+ __i < __npages; __i += __ntails, \
+ compound_range_next(__i, __npages, __list, &(__head), &(__ntails)))
+
+static inline void compound_next(unsigned long i, unsigned long npages,
+ struct page **list, struct page **head,
+ unsigned int *ntails)
+{
+ struct page *page;
+ unsigned int nr;
+
+ if (i >= npages)
+ return;
+
+ page = compound_head(list[i]);
+ for (nr = i + 1; nr < npages; nr++) {
+ if (compound_head(list[nr]) != page)
+ break;
+ }
+
+ *head = page;
+ *ntails = nr - i;
+}
+
+#define for_each_compound_head(__i, __list, __npages, __head, __ntails) \
+ for (__i = 0, \
+ compound_next(__i, __npages, __list, &(__head), &(__ntails)); \
+ __i < __npages; __i += __ntails, \
+ compound_next(__i, __npages, __list, &(__head), &(__ntails)))
+
/**
* unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
* @pages: array of pages to be maybe marked dirty, and definitely released.
@@ -239,20 +292,15 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
bool make_dirty)
{
unsigned long index;
-
- /*
- * TODO: this can be optimized for huge pages: if a series of pages is
- * physically contiguous and part of the same compound page, then a
- * single operation to the head page should suffice.
- */
+ struct page *head;
+ unsigned int ntails;
if (!make_dirty) {
unpin_user_pages(pages, npages);
return;
}
- for (index = 0; index < npages; index++) {
- struct page *page = compound_head(pages[index]);
+ for_each_compound_head(index, pages, npages, head, ntails) {
/*
* Checking PageDirty at this point may race with
* clear_page_dirty_for_io(), but that's OK. Two key
@@ -273,14 +321,50 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
* written back, so it gets written back again in the
* next writeback cycle. This is harmless.
*/
- if (!PageDirty(page))
- set_page_dirty_lock(page);
- unpin_user_page(page);
+ if (!PageDirty(head))
+ set_page_dirty_lock(head);
+ put_compound_head(head, ntails, FOLL_PIN);
}
}
EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
/**
+ * unpin_user_page_range_dirty_lock() - release and optionally dirty
+ * gup-pinned page range
+ *
+ * @page: the starting page of a range maybe marked dirty, and definitely released.
+ * @npages: number of consecutive pages to release.
+ * @make_dirty: whether to mark the pages dirty
+ *
+ * "gup-pinned page range" refers to a range of pages that has had one of the
+ * pin_user_pages() variants called on that page.
+ *
+ * For the page ranges defined by [page .. page+npages], make that range (or
+ * its head pages, if a compound page) dirty, if @make_dirty is true, and if the
+ * page range was previously listed as clean.
+ *
+ * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
+ * required, then the caller should a) verify that this is really correct,
+ * because _lock() is usually required, and b) hand code it:
+ * set_page_dirty_lock(), unpin_user_page().
+ *
+ */
+void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
+ bool make_dirty)
+{
+ unsigned long index;
+ struct page *head;
+ unsigned int ntails;
+
+ for_each_compound_range(index, &page, npages, head, ntails) {
+ if (make_dirty && !PageDirty(head))
+ set_page_dirty_lock(head);
+ put_compound_head(head, ntails, FOLL_PIN);
+ }
+}
+EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
+
+/**
* unpin_user_pages() - release an array of gup-pinned pages.
* @pages: array of pages to be marked dirty and released.
* @npages: number of pages in the @pages array.
@@ -292,6 +376,8 @@ EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
void unpin_user_pages(struct page **pages, unsigned long npages)
{
unsigned long index;
+ struct page *head;
+ unsigned int ntails;
/*
* If this WARN_ON() fires, then the system *might* be leaking pages (by
@@ -300,13 +386,9 @@ void unpin_user_pages(struct page **pages, unsigned long npages)
*/
if (WARN_ON(IS_ERR_VALUE(npages)))
return;
- /*
- * TODO: this can be optimized for huge pages: if a series of pages is
- * physically contiguous and part of the same compound page, then a
- * single operation to the head page should suffice.
- */
- for (index = 0; index < npages; index++)
- unpin_user_page(pages[index]);
+
+ for_each_compound_head(index, pages, npages, head, ntails)
+ put_compound_head(head, ntails, FOLL_PIN);
}
EXPORT_SYMBOL(unpin_user_pages);
@@ -435,18 +517,6 @@ retry:
}
}
- if (flags & FOLL_SPLIT && PageTransCompound(page)) {
- get_page(page);
- pte_unmap_unlock(ptep, ptl);
- lock_page(page);
- ret = split_huge_page(page);
- unlock_page(page);
- put_page(page);
- if (ret)
- return ERR_PTR(ret);
- goto retry;
- }
-
/* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
if (unlikely(!try_grab_page(page, flags))) {
page = ERR_PTR(-ENOMEM);
@@ -591,7 +661,7 @@ retry_locked:
spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
}
- if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
+ if (flags & FOLL_SPLIT_PMD) {
int ret;
page = pmd_page(*pmd);
if (is_huge_zero_page(page)) {
@@ -600,19 +670,7 @@ retry_locked:
split_huge_pmd(vma, pmd, address);
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
- } else if (flags & FOLL_SPLIT) {
- if (unlikely(!try_get_page(page))) {
- spin_unlock(ptl);
- return ERR_PTR(-ENOMEM);
- }
- spin_unlock(ptl);
- lock_page(page);
- ret = split_huge_page(page);
- unlock_page(page);
- put_page(page);
- if (pmd_none(*pmd))
- return no_page_table(vma, flags);
- } else { /* flags & FOLL_SPLIT_PMD */
+ } else {
spin_unlock(ptl);
split_huge_pmd(vma, pmd, address);
ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
@@ -1470,7 +1528,7 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
{
struct vm_area_struct *vma;
unsigned long vm_flags;
- int i;
+ long i;
/* calculate required read or write permissions.
* If FOLL_FORCE is set, we only require the "MAY" flags.
@@ -1517,7 +1575,7 @@ finish_or_fault:
* Returns NULL on any kind of failure - a hole must then be inserted into
* the corefile, to preserve alignment with its headers; and also returns
* NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
- * allowing a hole to be left in the corefile to save diskspace.
+ * allowing a hole to be left in the corefile to save disk space.
*
* Called without mmap_lock (takes and releases the mmap_lock by itself).
*/
@@ -1535,120 +1593,96 @@ struct page *get_dump_page(unsigned long addr)
FOLL_FORCE | FOLL_DUMP | FOLL_GET);
if (locked)
mmap_read_unlock(mm);
-
- if (ret == 1 && is_page_poisoned(page))
- return NULL;
-
return (ret == 1) ? page : NULL;
}
#endif /* CONFIG_ELF_CORE */
-#ifdef CONFIG_CMA
-static long check_and_migrate_cma_pages(struct mm_struct *mm,
- unsigned long start,
- unsigned long nr_pages,
- struct page **pages,
- struct vm_area_struct **vmas,
- unsigned int gup_flags)
+#ifdef CONFIG_MIGRATION
+/*
+ * Check whether all pages are pinnable, if so return number of pages. If some
+ * pages are not pinnable, migrate them, and unpin all pages. Return zero if
+ * pages were migrated, or if some pages were not successfully isolated.
+ * Return negative error if migration fails.
+ */
+static long check_and_migrate_movable_pages(unsigned long nr_pages,
+ struct page **pages,
+ unsigned int gup_flags)
{
unsigned long i;
- unsigned long step;
+ unsigned long isolation_error_count = 0;
bool drain_allow = true;
- bool migrate_allow = true;
- LIST_HEAD(cma_page_list);
- long ret = nr_pages;
+ LIST_HEAD(movable_page_list);
+ long ret = 0;
+ struct page *prev_head = NULL;
+ struct page *head;
struct migration_target_control mtc = {
.nid = NUMA_NO_NODE,
- .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN,
+ .gfp_mask = GFP_USER | __GFP_NOWARN,
};
-check_again:
- for (i = 0; i < nr_pages;) {
-
- struct page *head = compound_head(pages[i]);
-
- /*
- * gup may start from a tail page. Advance step by the left
- * part.
- */
- step = compound_nr(head) - (pages[i] - head);
+ for (i = 0; i < nr_pages; i++) {
+ head = compound_head(pages[i]);
+ if (head == prev_head)
+ continue;
+ prev_head = head;
/*
- * If we get a page from the CMA zone, since we are going to
- * be pinning these entries, we might as well move them out
- * of the CMA zone if possible.
+ * If we get a movable page, since we are going to be pinning
+ * these entries, try to move them out if possible.
*/
- if (is_migrate_cma_page(head)) {
- if (PageHuge(head))
- isolate_huge_page(head, &cma_page_list);
- else {
+ if (!is_pinnable_page(head)) {
+ if (PageHuge(head)) {
+ if (!isolate_huge_page(head, &movable_page_list))
+ isolation_error_count++;
+ } else {
if (!PageLRU(head) && drain_allow) {
lru_add_drain_all();
drain_allow = false;
}
- if (!isolate_lru_page(head)) {
- list_add_tail(&head->lru, &cma_page_list);
- mod_node_page_state(page_pgdat(head),
- NR_ISOLATED_ANON +
- page_is_file_lru(head),
- thp_nr_pages(head));
+ if (isolate_lru_page(head)) {
+ isolation_error_count++;
+ continue;
}
+ list_add_tail(&head->lru, &movable_page_list);
+ mod_node_page_state(page_pgdat(head),
+ NR_ISOLATED_ANON +
+ page_is_file_lru(head),
+ thp_nr_pages(head));
}
}
-
- i += step;
}
- if (!list_empty(&cma_page_list)) {
- /*
- * drop the above get_user_pages reference.
- */
- if (gup_flags & FOLL_PIN)
- unpin_user_pages(pages, nr_pages);
- else
- for (i = 0; i < nr_pages; i++)
- put_page(pages[i]);
-
- if (migrate_pages(&cma_page_list, alloc_migration_target, NULL,
- (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
- /*
- * some of the pages failed migration. Do get_user_pages
- * without migration.
- */
- migrate_allow = false;
+ /*
+ * If list is empty, and no isolation errors, means that all pages are
+ * in the correct zone.
+ */
+ if (list_empty(&movable_page_list) && !isolation_error_count)
+ return nr_pages;
- if (!list_empty(&cma_page_list))
- putback_movable_pages(&cma_page_list);
- }
- /*
- * We did migrate all the pages, Try to get the page references
- * again migrating any new CMA pages which we failed to isolate
- * earlier.
- */
- ret = __get_user_pages_locked(mm, start, nr_pages,
- pages, vmas, NULL,
- gup_flags);
-
- if ((ret > 0) && migrate_allow) {
- nr_pages = ret;
- drain_allow = true;
- goto check_again;
- }
+ if (gup_flags & FOLL_PIN) {
+ unpin_user_pages(pages, nr_pages);
+ } else {
+ for (i = 0; i < nr_pages; i++)
+ put_page(pages[i]);
+ }
+ if (!list_empty(&movable_page_list)) {
+ ret = migrate_pages(&movable_page_list, alloc_migration_target,
+ NULL, (unsigned long)&mtc, MIGRATE_SYNC,
+ MR_LONGTERM_PIN);
+ if (ret && !list_empty(&movable_page_list))
+ putback_movable_pages(&movable_page_list);
}
- return ret;
+ return ret > 0 ? -ENOMEM : ret;
}
#else
-static long check_and_migrate_cma_pages(struct mm_struct *mm,
- unsigned long start,
- unsigned long nr_pages,
- struct page **pages,
- struct vm_area_struct **vmas,
- unsigned int gup_flags)
+static long check_and_migrate_movable_pages(unsigned long nr_pages,
+ struct page **pages,
+ unsigned int gup_flags)
{
return nr_pages;
}
-#endif /* CONFIG_CMA */
+#endif /* CONFIG_MIGRATION */
/*
* __gup_longterm_locked() is a wrapper for __get_user_pages_locked which
@@ -1661,21 +1695,22 @@ static long __gup_longterm_locked(struct mm_struct *mm,
struct vm_area_struct **vmas,
unsigned int gup_flags)
{
- unsigned long flags = 0;
+ unsigned int flags;
long rc;
- if (gup_flags & FOLL_LONGTERM)
- flags = memalloc_nocma_save();
-
- rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL,
- gup_flags);
+ if (!(gup_flags & FOLL_LONGTERM))
+ return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+ NULL, gup_flags);
+ flags = memalloc_pin_save();
+ do {
+ rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+ NULL, gup_flags);
+ if (rc <= 0)
+ break;
+ rc = check_and_migrate_movable_pages(rc, pages, gup_flags);
+ } while (!rc);
+ memalloc_pin_restore(flags);
- if (gup_flags & FOLL_LONGTERM) {
- if (rc > 0)
- rc = check_and_migrate_cma_pages(mm, start, rc, pages,
- vmas, gup_flags);
- memalloc_nocma_restore(flags);
- }
return rc;
}