diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-23 03:03:12 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-23 03:03:12 +0300 |
commit | 9030fb0bb9d607908d51f9ee02efdbe01da355ee (patch) | |
tree | 9ee1d9d47fbb4b30c7f5cbc291432e666e58967a /mm/gup.c | |
parent | 3bf03b9a0839c9fb06927ae53ebd0f960b19d408 (diff) | |
parent | 2a3c4bce3edb0d54983384aa8a88c0da330638f4 (diff) | |
download | linux-9030fb0bb9d607908d51f9ee02efdbe01da355ee.tar.xz |
Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache
Pull folio updates from Matthew Wilcox:
- Rewrite how munlock works to massively reduce the contention on
i_mmap_rwsem (Hugh Dickins):
https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/
- Sort out the page refcount mess for ZONE_DEVICE pages (Christoph
Hellwig):
https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/
- Convert GUP to use folios and make pincount available for order-1
pages. (Matthew Wilcox)
- Convert a few more truncation functions to use folios (Matthew
Wilcox)
- Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew
Wilcox)
- Convert rmap_walk to use folios (Matthew Wilcox)
- Convert most of shrink_page_list() to use a folio (Matthew Wilcox)
- Add support for creating large folios in readahead (Matthew Wilcox)
* tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache: (114 commits)
mm/damon: minor cleanup for damon_pa_young
selftests/vm/transhuge-stress: Support file-backed PMD folios
mm/filemap: Support VM_HUGEPAGE for file mappings
mm/readahead: Switch to page_cache_ra_order
mm/readahead: Align file mappings for non-DAX
mm/readahead: Add large folio readahead
mm: Support arbitrary THP sizes
mm: Make large folios depend on THP
mm: Fix READ_ONLY_THP warning
mm/filemap: Allow large folios to be added to the page cache
mm: Turn can_split_huge_page() into can_split_folio()
mm/vmscan: Convert pageout() to take a folio
mm/vmscan: Turn page_check_references() into folio_check_references()
mm/vmscan: Account large folios correctly
mm/vmscan: Optimise shrink_page_list for non-PMD-sized folios
mm/vmscan: Free non-shmem folios without splitting them
mm/rmap: Constify the rmap_walk_control argument
mm/rmap: Convert rmap_walk() to take a folio
mm: Turn page_anon_vma() into folio_anon_vma()
mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read()
...
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 486 |
1 files changed, 205 insertions, 281 deletions
@@ -29,107 +29,71 @@ struct follow_page_context { unsigned int page_mask; }; -static void hpage_pincount_add(struct page *page, int refs) -{ - VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); - VM_BUG_ON_PAGE(page != compound_head(page), page); - - atomic_add(refs, compound_pincount_ptr(page)); -} - -static void hpage_pincount_sub(struct page *page, int refs) -{ - VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); - VM_BUG_ON_PAGE(page != compound_head(page), page); - - atomic_sub(refs, compound_pincount_ptr(page)); -} - -/* Equivalent to calling put_page() @refs times. */ -static void put_page_refs(struct page *page, int refs) -{ -#ifdef CONFIG_DEBUG_VM - if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page)) - return; -#endif - - /* - * Calling put_page() for each ref is unnecessarily slow. Only the last - * ref needs a put_page(). - */ - if (refs > 1) - page_ref_sub(page, refs - 1); - put_page(page); -} - /* - * Return the compound head page with ref appropriately incremented, + * Return the folio with ref appropriately incremented, * or NULL if that failed. */ -static inline struct page *try_get_compound_head(struct page *page, int refs) +static inline struct folio *try_get_folio(struct page *page, int refs) { - struct page *head = compound_head(page); + struct folio *folio; - if (WARN_ON_ONCE(page_ref_count(head) < 0)) +retry: + folio = page_folio(page); + if (WARN_ON_ONCE(folio_ref_count(folio) < 0)) return NULL; - if (unlikely(!page_cache_add_speculative(head, refs))) + if (unlikely(!folio_ref_try_add_rcu(folio, refs))) return NULL; /* - * At this point we have a stable reference to the head page; but it - * could be that between the compound_head() lookup and the refcount - * increment, the compound page was split, in which case we'd end up - * holding a reference on a page that has nothing to do with the page + * At this point we have a stable reference to the folio; but it + * could be that between calling page_folio() and the refcount + * increment, the folio was split, in which case we'd end up + * holding a reference on a folio that has nothing to do with the page * we were given anymore. - * So now that the head page is stable, recheck that the pages still - * belong together. + * So now that the folio is stable, recheck that the page still + * belongs to this folio. */ - if (unlikely(compound_head(page) != head)) { - put_page_refs(head, refs); - return NULL; + if (unlikely(page_folio(page) != folio)) { + folio_put_refs(folio, refs); + goto retry; } - return head; + return folio; } /** - * try_grab_compound_head() - attempt to elevate a page's refcount, by a - * flags-dependent amount. - * - * Even though the name includes "compound_head", this function is still - * appropriate for callers that have a non-compound @page to get. - * + * try_grab_folio() - Attempt to get or pin a folio. * @page: pointer to page to be grabbed - * @refs: the value to (effectively) add to the page's refcount + * @refs: the value to (effectively) add to the folio's refcount * @flags: gup flags: these are the FOLL_* flag values. * * "grab" names in this file mean, "look at flags to decide whether to use - * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount. + * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount. * * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the * same time. (That's true throughout the get_user_pages*() and * pin_user_pages*() APIs.) Cases: * - * FOLL_GET: page's refcount will be incremented by @refs. + * FOLL_GET: folio's refcount will be incremented by @refs. * - * FOLL_PIN on compound pages that are > two pages long: page's refcount will - * be incremented by @refs, and page[2].hpage_pinned_refcount will be - * incremented by @refs * GUP_PIN_COUNTING_BIAS. + * FOLL_PIN on large folios: folio's refcount will be incremented by + * @refs, and its compound_pincount will be incremented by @refs. * - * FOLL_PIN on normal pages, or compound pages that are two pages long: - * page's refcount will be incremented by @refs * GUP_PIN_COUNTING_BIAS. + * FOLL_PIN on single-page folios: folio's refcount will be incremented by + * @refs * GUP_PIN_COUNTING_BIAS. * - * Return: head page (with refcount appropriately incremented) for success, or - * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's - * considered failure, and furthermore, a likely bug in the caller, so a warning - * is also emitted. + * Return: The folio containing @page (with refcount appropriately + * incremented) for success, or NULL upon failure. If neither FOLL_GET + * nor FOLL_PIN was set, that's considered failure, and furthermore, + * a likely bug in the caller, so a warning is also emitted. */ -__maybe_unused struct page *try_grab_compound_head(struct page *page, - int refs, unsigned int flags) +struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) { if (flags & FOLL_GET) - return try_get_compound_head(page, refs); + return try_get_folio(page, refs); else if (flags & FOLL_PIN) { + struct folio *folio; + /* * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a * right zone, so fail and let the caller fall back to the slow @@ -143,63 +107,57 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page, * CAUTION: Don't use compound_head() on the page before this * point, the result won't be stable. */ - page = try_get_compound_head(page, refs); - if (!page) + folio = try_get_folio(page, refs); + if (!folio) return NULL; /* - * When pinning a compound page of order > 1 (which is what - * hpage_pincount_available() checks for), use an exact count to - * track it, via hpage_pincount_add/_sub(). + * When pinning a large folio, use an exact count to track it. * - * However, be sure to *also* increment the normal page refcount - * field at least once, so that the page really is pinned. - * That's why the refcount from the earlier - * try_get_compound_head() is left intact. + * However, be sure to *also* increment the normal folio + * refcount field at least once, so that the folio really + * is pinned. That's why the refcount from the earlier + * try_get_folio() is left intact. */ - if (hpage_pincount_available(page)) - hpage_pincount_add(page, refs); + if (folio_test_large(folio)) + atomic_add(refs, folio_pincount_ptr(folio)); else - page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1)); - - mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, - refs); + folio_ref_add(folio, + refs * (GUP_PIN_COUNTING_BIAS - 1)); + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs); - return page; + return folio; } WARN_ON_ONCE(1); return NULL; } -static void put_compound_head(struct page *page, int refs, unsigned int flags) +static void gup_put_folio(struct folio *folio, int refs, unsigned int flags) { if (flags & FOLL_PIN) { - mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, - refs); - - if (hpage_pincount_available(page)) - hpage_pincount_sub(page, refs); + node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs); + if (folio_test_large(folio)) + atomic_sub(refs, folio_pincount_ptr(folio)); else refs *= GUP_PIN_COUNTING_BIAS; } - put_page_refs(page, refs); + folio_put_refs(folio, refs); } /** * try_grab_page() - elevate a page's refcount by a flag-dependent amount + * @page: pointer to page to be grabbed + * @flags: gup flags: these are the FOLL_* flag values. * * This might not do anything at all, depending on the flags argument. * * "grab" names in this file mean, "look at flags to decide whether to use * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount. * - * @page: pointer to page to be grabbed - * @flags: gup flags: these are the FOLL_* flag values. - * * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same - * time. Cases: please see the try_grab_compound_head() documentation, with + * time. Cases: please see the try_grab_folio() documentation, with * "refs=1". * * Return: true for success, or if no action was required (if neither FOLL_PIN @@ -208,32 +166,28 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) */ bool __must_check try_grab_page(struct page *page, unsigned int flags) { + struct folio *folio = page_folio(page); + WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN)); + if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) + return false; if (flags & FOLL_GET) - return try_get_page(page); + folio_ref_inc(folio); else if (flags & FOLL_PIN) { - int refs = 1; - - page = compound_head(page); - - if (WARN_ON_ONCE(page_ref_count(page) <= 0)) - return false; - - if (hpage_pincount_available(page)) - hpage_pincount_add(page, 1); - else - refs = GUP_PIN_COUNTING_BIAS; - /* - * Similar to try_grab_compound_head(): even if using the - * hpage_pincount_add/_sub() routines, be sure to - * *also* increment the normal page refcount field at least - * once, so that the page really is pinned. + * Similar to try_grab_folio(): be sure to *also* + * increment the normal page refcount field at least once, + * so that the page really is pinned. */ - page_ref_add(page, refs); + if (folio_test_large(folio)) { + folio_ref_add(folio, 1); + atomic_add(1, folio_pincount_ptr(folio)); + } else { + folio_ref_add(folio, GUP_PIN_COUNTING_BIAS); + } - mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1); + node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1); } return true; @@ -250,62 +204,40 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) */ void unpin_user_page(struct page *page) { - put_compound_head(compound_head(page), 1, FOLL_PIN); + gup_put_folio(page_folio(page), 1, FOLL_PIN); } EXPORT_SYMBOL(unpin_user_page); -static inline void compound_range_next(unsigned long i, unsigned long npages, - struct page **list, struct page **head, - unsigned int *ntails) +static inline struct folio *gup_folio_range_next(struct page *start, + unsigned long npages, unsigned long i, unsigned int *ntails) { - struct page *next, *page; + struct page *next = nth_page(start, i); + struct folio *folio = page_folio(next); unsigned int nr = 1; - if (i >= npages) - return; - - next = *list + i; - page = compound_head(next); - if (PageCompound(page) && compound_order(page) >= 1) - nr = min_t(unsigned int, - page + compound_nr(page) - next, npages - i); + if (folio_test_large(folio)) + nr = min_t(unsigned int, npages - i, + folio_nr_pages(folio) - folio_page_idx(folio, next)); - *head = page; *ntails = nr; + return folio; } -#define for_each_compound_range(__i, __list, __npages, __head, __ntails) \ - for (__i = 0, \ - compound_range_next(__i, __npages, __list, &(__head), &(__ntails)); \ - __i < __npages; __i += __ntails, \ - compound_range_next(__i, __npages, __list, &(__head), &(__ntails))) - -static inline void compound_next(unsigned long i, unsigned long npages, - struct page **list, struct page **head, - unsigned int *ntails) +static inline struct folio *gup_folio_next(struct page **list, + unsigned long npages, unsigned long i, unsigned int *ntails) { - struct page *page; + struct folio *folio = page_folio(list[i]); unsigned int nr; - if (i >= npages) - return; - - page = compound_head(list[i]); for (nr = i + 1; nr < npages; nr++) { - if (compound_head(list[nr]) != page) + if (page_folio(list[nr]) != folio) break; } - *head = page; *ntails = nr - i; + return folio; } -#define for_each_compound_head(__i, __list, __npages, __head, __ntails) \ - for (__i = 0, \ - compound_next(__i, __npages, __list, &(__head), &(__ntails)); \ - __i < __npages; __i += __ntails, \ - compound_next(__i, __npages, __list, &(__head), &(__ntails))) - /** * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages * @pages: array of pages to be maybe marked dirty, and definitely released. @@ -331,16 +263,17 @@ static inline void compound_next(unsigned long i, unsigned long npages, void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty) { - unsigned long index; - struct page *head; - unsigned int ntails; + unsigned long i; + struct folio *folio; + unsigned int nr; if (!make_dirty) { unpin_user_pages(pages, npages); return; } - for_each_compound_head(index, pages, npages, head, ntails) { + for (i = 0; i < npages; i += nr) { + folio = gup_folio_next(pages, npages, i, &nr); /* * Checking PageDirty at this point may race with * clear_page_dirty_for_io(), but that's OK. Two key @@ -361,9 +294,12 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, * written back, so it gets written back again in the * next writeback cycle. This is harmless. */ - if (!PageDirty(head)) - set_page_dirty_lock(head); - put_compound_head(head, ntails, FOLL_PIN); + if (!folio_test_dirty(folio)) { + folio_lock(folio); + folio_mark_dirty(folio); + folio_unlock(folio); + } + gup_put_folio(folio, nr, FOLL_PIN); } } EXPORT_SYMBOL(unpin_user_pages_dirty_lock); @@ -392,14 +328,18 @@ EXPORT_SYMBOL(unpin_user_pages_dirty_lock); void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, bool make_dirty) { - unsigned long index; - struct page *head; - unsigned int ntails; + unsigned long i; + struct folio *folio; + unsigned int nr; - for_each_compound_range(index, &page, npages, head, ntails) { - if (make_dirty && !PageDirty(head)) - set_page_dirty_lock(head); - put_compound_head(head, ntails, FOLL_PIN); + for (i = 0; i < npages; i += nr) { + folio = gup_folio_range_next(page, npages, i, &nr); + if (make_dirty && !folio_test_dirty(folio)) { + folio_lock(folio); + folio_mark_dirty(folio); + folio_unlock(folio); + } + gup_put_folio(folio, nr, FOLL_PIN); } } EXPORT_SYMBOL(unpin_user_page_range_dirty_lock); @@ -415,9 +355,9 @@ EXPORT_SYMBOL(unpin_user_page_range_dirty_lock); */ void unpin_user_pages(struct page **pages, unsigned long npages) { - unsigned long index; - struct page *head; - unsigned int ntails; + unsigned long i; + struct folio *folio; + unsigned int nr; /* * If this WARN_ON() fires, then the system *might* be leaking pages (by @@ -427,8 +367,10 @@ void unpin_user_pages(struct page **pages, unsigned long npages) if (WARN_ON(IS_ERR_VALUE(npages))) return; - for_each_compound_head(index, pages, npages, head, ntails) - put_compound_head(head, ntails, FOLL_PIN); + for (i = 0; i < npages; i += nr) { + folio = gup_folio_next(pages, npages, i, &nr); + gup_put_folio(folio, nr, FOLL_PIN); + } } EXPORT_SYMBOL(unpin_user_pages); @@ -593,32 +535,6 @@ retry: */ mark_page_accessed(page); } - if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { - /* Do not mlock pte-mapped THP */ - if (PageTransCompound(page)) - goto out; - - /* - * The preliminary mapping check is mainly to avoid the - * pointless overhead of lock_page on the ZERO_PAGE - * which might bounce very badly if there is contention. - * - * If the page is already locked, we don't need to - * handle it now - vmscan will handle it later if and - * when it attempts to reclaim the page. - */ - if (page->mapping && trylock_page(page)) { - lru_add_drain(); /* push cached pages to LRU */ - /* - * Because we lock page here, and migration is - * blocked by the pte's page reference, and we - * know the page is still mapped, we don't even - * need to check for file-cache page truncation. - */ - mlock_vma_page(page); - unlock_page(page); - } - } out: pte_unmap_unlock(ptep, ptl); return page; @@ -941,9 +857,6 @@ static int faultin_page(struct vm_area_struct *vma, unsigned int fault_flags = 0; vm_fault_t ret; - /* mlock all present pages, but do not fault in new pages */ - if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK) - return -ENOENT; if (*flags & FOLL_NOFAULT) return -EFAULT; if (*flags & FOLL_WRITE) @@ -1194,8 +1107,6 @@ retry: case -ENOMEM: case -EHWPOISON: goto out; - case -ENOENT: - goto next_page; } BUG(); } else if (PTR_ERR(page) == -EEXIST) { @@ -1500,9 +1411,14 @@ long populate_vma_page_range(struct vm_area_struct *vma, VM_BUG_ON_VMA(end > vma->vm_end, vma); mmap_assert_locked(mm); - gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK; + /* + * Rightly or wrongly, the VM_LOCKONFAULT case has never used + * faultin_page() to break COW, so it has no work to do here. + */ if (vma->vm_flags & VM_LOCKONFAULT) - gup_flags &= ~FOLL_POPULATE; + return nr_pages; + + gup_flags = FOLL_TOUCH; /* * We want to touch writable mappings with a write fault in order * to break COW, except for shared mappings because these don't COW @@ -1569,10 +1485,9 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start, * in the page table. * FOLL_HWPOISON: Return -EHWPOISON instead of -EFAULT when we hit * a poisoned page. - * FOLL_POPULATE: Always populate memory with VM_LOCKONFAULT. * !FOLL_FORCE: Require proper access permissions. */ - gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK | FOLL_HWPOISON; + gup_flags = FOLL_TOUCH | FOLL_HWPOISON; if (write) gup_flags |= FOLL_WRITE; @@ -1852,72 +1767,80 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, struct page **pages, unsigned int gup_flags) { - unsigned long i; - unsigned long isolation_error_count = 0; - bool drain_allow = true; + unsigned long isolation_error_count = 0, i; + struct folio *prev_folio = NULL; LIST_HEAD(movable_page_list); - long ret = 0; - struct page *prev_head = NULL; - struct page *head; - struct migration_target_control mtc = { - .nid = NUMA_NO_NODE, - .gfp_mask = GFP_USER | __GFP_NOWARN, - }; + bool drain_allow = true; + int ret = 0; for (i = 0; i < nr_pages; i++) { - head = compound_head(pages[i]); - if (head == prev_head) + struct folio *folio = page_folio(pages[i]); + + if (folio == prev_folio) continue; - prev_head = head; + prev_folio = folio; + + if (folio_is_pinnable(folio)) + continue; + /* - * If we get a movable page, since we are going to be pinning - * these entries, try to move them out if possible. + * Try to move out any movable page before pinning the range. */ - if (!is_pinnable_page(head)) { - if (PageHuge(head)) { - if (!isolate_huge_page(head, &movable_page_list)) - isolation_error_count++; - } else { - if (!PageLRU(head) && drain_allow) { - lru_add_drain_all(); - drain_allow = false; - } + if (folio_test_hugetlb(folio)) { + if (!isolate_huge_page(&folio->page, + &movable_page_list)) + isolation_error_count++; + continue; + } - if (isolate_lru_page(head)) { - isolation_error_count++; - continue; - } - list_add_tail(&head->lru, &movable_page_list); - mod_node_page_state(page_pgdat(head), - NR_ISOLATED_ANON + - page_is_file_lru(head), - thp_nr_pages(head)); - } + if (!folio_test_lru(folio) && drain_allow) { + lru_add_drain_all(); + drain_allow = false; + } + + if (folio_isolate_lru(folio)) { + isolation_error_count++; + continue; } + list_add_tail(&folio->lru, &movable_page_list); + node_stat_mod_folio(folio, + NR_ISOLATED_ANON + folio_is_file_lru(folio), + folio_nr_pages(folio)); } + if (!list_empty(&movable_page_list) || isolation_error_count) + goto unpin_pages; + /* * If list is empty, and no isolation errors, means that all pages are * in the correct zone. */ - if (list_empty(&movable_page_list) && !isolation_error_count) - return nr_pages; + return nr_pages; +unpin_pages: if (gup_flags & FOLL_PIN) { unpin_user_pages(pages, nr_pages); } else { for (i = 0; i < nr_pages; i++) put_page(pages[i]); } + if (!list_empty(&movable_page_list)) { + struct migration_target_control mtc = { + .nid = NUMA_NO_NODE, + .gfp_mask = GFP_USER | __GFP_NOWARN, + }; + ret = migrate_pages(&movable_page_list, alloc_migration_target, NULL, (unsigned long)&mtc, MIGRATE_SYNC, MR_LONGTERM_PIN, NULL); - if (ret && !list_empty(&movable_page_list)) - putback_movable_pages(&movable_page_list); + if (ret > 0) /* number of pages not migrated */ + ret = -ENOMEM; } - return ret > 0 ? -ENOMEM : ret; + if (ret && !list_empty(&movable_page_list)) + putback_movable_pages(&movable_page_list); + return ret; } #else static long check_and_migrate_movable_pages(unsigned long nr_pages, @@ -2227,7 +2150,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, ptem = ptep = pte_offset_map(&pmd, addr); do { pte_t pte = ptep_get_lockless(ptep); - struct page *head, *page; + struct page *page; + struct folio *folio; /* * Similar to the PMD case below, NUMA hinting must take slow @@ -2254,22 +2178,20 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); - head = try_grab_compound_head(page, 1, flags); - if (!head) + folio = try_grab_folio(page, 1, flags); + if (!folio) goto pte_unmap; if (unlikely(page_is_secretmem(page))) { - put_compound_head(head, 1, flags); + gup_put_folio(folio, 1, flags); goto pte_unmap; } if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_compound_head(head, 1, flags); + gup_put_folio(folio, 1, flags); goto pte_unmap; } - VM_BUG_ON_PAGE(compound_head(page) != head, page); - /* * We need to make the page accessible if and only if we are * going to access its content (the FOLL_PIN case). Please @@ -2279,14 +2201,13 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, if (flags & FOLL_PIN) { ret = arch_make_page_accessible(page); if (ret) { - unpin_user_page(page); + gup_put_folio(folio, 1, flags); goto pte_unmap; } } - SetPageReferenced(page); + folio_set_referenced(folio); pages[*nr] = page; (*nr)++; - } while (ptep++, addr += PAGE_SIZE, addr != end); ret = 1; @@ -2403,8 +2324,8 @@ static int record_subpages(struct page *page, unsigned long addr, { int nr; - for (nr = 0; addr != end; addr += PAGE_SIZE) - pages[nr++] = page++; + for (nr = 0; addr != end; nr++, addr += PAGE_SIZE) + pages[nr] = nth_page(page, nr); return nr; } @@ -2422,7 +2343,8 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, struct page **pages, int *nr) { unsigned long pte_end; - struct page *head, *page; + struct page *page; + struct folio *folio; pte_t pte; int refs; @@ -2438,21 +2360,20 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, /* hugepages are never "special" */ VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - head = pte_page(pte); - page = head + ((addr & (sz-1)) >> PAGE_SHIFT); + page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); - head = try_grab_compound_head(head, refs, flags); - if (!head) + folio = try_grab_folio(page, refs, flags); + if (!folio) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_compound_head(head, refs, flags); + gup_put_folio(folio, refs, flags); return 0; } *nr += refs; - SetPageReferenced(head); + folio_set_referenced(folio); return 1; } @@ -2486,7 +2407,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { - struct page *head, *page; + struct page *page; + struct folio *folio; int refs; if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) @@ -2499,20 +2421,20 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, pages, nr); } - page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); - head = try_grab_compound_head(pmd_page(orig), refs, flags); - if (!head) + folio = try_grab_folio(page, refs, flags); + if (!folio) return 0; if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { - put_compound_head(head, refs, flags); + gup_put_folio(folio, refs, flags); return 0; } *nr += refs; - SetPageReferenced(head); + folio_set_referenced(folio); return 1; } @@ -2520,7 +2442,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { - struct page *head, *page; + struct page *page; + struct folio *folio; int refs; if (!pud_access_permitted(orig, flags & FOLL_WRITE)) @@ -2533,20 +2456,20 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, pages, nr); } - page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); - head = try_grab_compound_head(pud_page(orig), refs, flags); - if (!head) + folio = try_grab_folio(page, refs, flags); + if (!folio) return 0; if (unlikely(pud_val(orig) != pud_val(*pudp))) { - put_compound_head(head, refs, flags); + gup_put_folio(folio, refs, flags); return 0; } *nr += refs; - SetPageReferenced(head); + folio_set_referenced(folio); return 1; } @@ -2555,27 +2478,28 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, struct page **pages, int *nr) { int refs; - struct page *head, *page; + struct page *page; + struct folio *folio; if (!pgd_access_permitted(orig, flags & FOLL_WRITE)) return 0; BUILD_BUG_ON(pgd_devmap(orig)); - page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT); + page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT); refs = record_subpages(page, addr, end, pages + *nr); - head = try_grab_compound_head(pgd_page(orig), refs, flags); - if (!head) + folio = try_grab_folio(page, refs, flags); + if (!folio) return 0; if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) { - put_compound_head(head, refs, flags); + gup_put_folio(folio, refs, flags); return 0; } *nr += refs; - SetPageReferenced(head); + folio_set_referenced(folio); return 1; } |