diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 934 |
1 files changed, 653 insertions, 281 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index cc5455614e01..37865f85df6d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -58,8 +58,9 @@ #include "internal.h" -int isolate_movable_page(struct page *page, isolate_mode_t mode) +bool isolate_movable_page(struct page *page, isolate_mode_t mode) { + struct folio *folio = folio_get_nontail_page(page); const struct movable_operations *mops; /* @@ -71,11 +72,11 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) * the put_page() at the end of this block will take care of * release this page, thus avoiding a nasty leakage. */ - if (unlikely(!get_page_unless_zero(page))) + if (!folio) goto out; - if (unlikely(PageSlab(page))) - goto out_putpage; + if (unlikely(folio_test_slab(folio))) + goto out_putfolio; /* Pairs with smp_wmb() in slab freeing, e.g. SLUB's __free_slab() */ smp_rmb(); /* @@ -83,12 +84,12 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) * we use non-atomic bitops on newly allocated page flags so * unconditionally grabbing the lock ruins page's owner side. */ - if (unlikely(!__PageMovable(page))) - goto out_putpage; + if (unlikely(!__folio_test_movable(folio))) + goto out_putfolio; /* Pairs with smp_wmb() in slab allocation, e.g. SLUB's alloc_slab_page() */ smp_rmb(); - if (unlikely(PageSlab(page))) - goto out_putpage; + if (unlikely(folio_test_slab(folio))) + goto out_putfolio; /* * As movable pages are not isolated from LRU lists, concurrent @@ -101,39 +102,39 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) * lets be sure we have the page lock * before proceeding with the movable page isolation steps. */ - if (unlikely(!trylock_page(page))) - goto out_putpage; + if (unlikely(!folio_trylock(folio))) + goto out_putfolio; - if (!PageMovable(page) || PageIsolated(page)) + if (!folio_test_movable(folio) || folio_test_isolated(folio)) goto out_no_isolated; - mops = page_movable_ops(page); - VM_BUG_ON_PAGE(!mops, page); + mops = folio_movable_ops(folio); + VM_BUG_ON_FOLIO(!mops, folio); - if (!mops->isolate_page(page, mode)) + if (!mops->isolate_page(&folio->page, mode)) goto out_no_isolated; /* Driver shouldn't use PG_isolated bit of page->flags */ - WARN_ON_ONCE(PageIsolated(page)); - SetPageIsolated(page); - unlock_page(page); + WARN_ON_ONCE(folio_test_isolated(folio)); + folio_set_isolated(folio); + folio_unlock(folio); - return 0; + return true; out_no_isolated: - unlock_page(page); -out_putpage: - put_page(page); + folio_unlock(folio); +out_putfolio: + folio_put(folio); out: - return -EBUSY; + return false; } -static void putback_movable_page(struct page *page) +static void putback_movable_folio(struct folio *folio) { - const struct movable_operations *mops = page_movable_ops(page); + const struct movable_operations *mops = folio_movable_ops(folio); - mops->putback_page(page); - ClearPageIsolated(page); + mops->putback_page(&folio->page); + folio_clear_isolated(folio); } /* @@ -146,33 +147,33 @@ static void putback_movable_page(struct page *page) */ void putback_movable_pages(struct list_head *l) { - struct page *page; - struct page *page2; + struct folio *folio; + struct folio *folio2; - list_for_each_entry_safe(page, page2, l, lru) { - if (unlikely(PageHuge(page))) { - putback_active_hugepage(page); + list_for_each_entry_safe(folio, folio2, l, lru) { + if (unlikely(folio_test_hugetlb(folio))) { + folio_putback_active_hugetlb(folio); continue; } - list_del(&page->lru); + list_del(&folio->lru); /* - * We isolated non-lru movable page so here we can use - * __PageMovable because LRU page's mapping cannot have + * We isolated non-lru movable folio so here we can use + * __PageMovable because LRU folio's mapping cannot have * PAGE_MAPPING_MOVABLE. */ - if (unlikely(__PageMovable(page))) { - VM_BUG_ON_PAGE(!PageIsolated(page), page); - lock_page(page); - if (PageMovable(page)) - putback_movable_page(page); + if (unlikely(__folio_test_movable(folio))) { + VM_BUG_ON_FOLIO(!folio_test_isolated(folio), folio); + folio_lock(folio); + if (folio_test_movable(folio)) + putback_movable_folio(folio); else - ClearPageIsolated(page); - unlock_page(page); - put_page(page); + folio_clear_isolated(folio); + folio_unlock(folio); + folio_put(folio); } else { - mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + - page_is_file_lru(page), -thp_nr_pages(page)); - putback_lru_page(page); + node_stat_mod_folio(folio, NR_ISOLATED_ANON + + folio_is_file_lru(folio), -folio_nr_pages(folio)); + folio_putback_lru(folio); } } } @@ -267,7 +268,7 @@ static bool remove_migration_pte(struct folio *folio, set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte); } if (vma->vm_flags & VM_LOCKED) - mlock_page_drain_local(); + mlock_drain_local(); trace_remove_migration_pte(pvmw.address, pte_val(pte), compound_order(new)); @@ -331,24 +332,41 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, } #ifdef CONFIG_HUGETLB_PAGE -void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) +/* + * The vma read lock must be held upon entry. Holding that lock prevents either + * the pte or the ptl from being freed. + * + * This function will release the vma lock before returning. + */ +void __migration_entry_wait_huge(struct vm_area_struct *vma, + pte_t *ptep, spinlock_t *ptl) { pte_t pte; + hugetlb_vma_assert_locked(vma); spin_lock(ptl); pte = huge_ptep_get(ptep); - if (unlikely(!is_hugetlb_entry_migration(pte))) + if (unlikely(!is_hugetlb_entry_migration(pte))) { spin_unlock(ptl); - else + hugetlb_vma_unlock_read(vma); + } else { + /* + * If migration entry existed, safe to release vma lock + * here because the pgtable page won't be freed without the + * pgtable lock released. See comment right above pgtable + * lock release in migration_entry_wait_on_locked(). + */ + hugetlb_vma_unlock_read(vma); migration_entry_wait_on_locked(pte_to_swp_entry(pte), NULL, ptl); + } } void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), vma->vm_mm, pte); - __migration_entry_wait_huge(pte, ptl); + __migration_entry_wait_huge(vma, pte, ptl); } #endif @@ -975,7 +993,7 @@ static int move_to_new_folio(struct folio *dst, struct folio *src, goto out; } - mops = page_movable_ops(&src->page); + mops = folio_movable_ops(src); rc = mops->migrate_page(&dst->page, &src->page, mode); WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS && !folio_test_isolated(src)); @@ -1011,13 +1029,113 @@ out: return rc; } -static int __unmap_and_move(struct folio *src, struct folio *dst, - int force, enum migrate_mode mode) +/* + * To record some information during migration, we use some unused + * fields (mapping and private) of struct folio of the newly allocated + * destination folio. This is safe because nobody is using them + * except us. + */ +static void __migrate_folio_record(struct folio *dst, + unsigned long page_was_mapped, + struct anon_vma *anon_vma) +{ + dst->mapping = (void *)anon_vma; + dst->private = (void *)page_was_mapped; +} + +static void __migrate_folio_extract(struct folio *dst, + int *page_was_mappedp, + struct anon_vma **anon_vmap) +{ + *anon_vmap = (void *)dst->mapping; + *page_was_mappedp = (unsigned long)dst->private; + dst->mapping = NULL; + dst->private = NULL; +} + +/* Restore the source folio to the original state upon failure */ +static void migrate_folio_undo_src(struct folio *src, + int page_was_mapped, + struct anon_vma *anon_vma, + bool locked, + struct list_head *ret) +{ + if (page_was_mapped) + remove_migration_ptes(src, src, false); + /* Drop an anon_vma reference if we took one */ + if (anon_vma) + put_anon_vma(anon_vma); + if (locked) + folio_unlock(src); + if (ret) + list_move_tail(&src->lru, ret); +} + +/* Restore the destination folio to the original state upon failure */ +static void migrate_folio_undo_dst(struct folio *dst, + bool locked, + free_page_t put_new_page, + unsigned long private) +{ + if (locked) + folio_unlock(dst); + if (put_new_page) + put_new_page(&dst->page, private); + else + folio_put(dst); +} + +/* Cleanup src folio upon migration success */ +static void migrate_folio_done(struct folio *src, + enum migrate_reason reason) +{ + /* + * Compaction can migrate also non-LRU pages which are + * not accounted to NR_ISOLATED_*. They can be recognized + * as __PageMovable + */ + if (likely(!__folio_test_movable(src))) + mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + + folio_is_file_lru(src), -folio_nr_pages(src)); + + if (reason != MR_MEMORY_FAILURE) + /* We release the page in page_handle_poison. */ + folio_put(src); +} + +/* Obtain the lock on page, remove all ptes. */ +static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page, + unsigned long private, struct folio *src, + struct folio **dstp, int force, bool avoid_force_lock, + enum migrate_mode mode, enum migrate_reason reason, + struct list_head *ret) { + struct folio *dst; int rc = -EAGAIN; - bool page_was_mapped = false; + struct page *newpage = NULL; + int page_was_mapped = 0; struct anon_vma *anon_vma = NULL; bool is_lru = !__PageMovable(&src->page); + bool locked = false; + bool dst_locked = false; + + if (folio_ref_count(src) == 1) { + /* Folio was freed from under us. So we are done. */ + folio_clear_active(src); + folio_clear_unevictable(src); + /* free_pages_prepare() will clear PG_isolated. */ + list_del(&src->lru); + migrate_folio_done(src, reason); + return MIGRATEPAGE_SUCCESS; + } + + newpage = get_new_page(&src->page, private); + if (!newpage) + return -ENOMEM; + dst = page_folio(newpage); + *dstp = dst; + + dst->private = NULL; if (!folio_trylock(src)) { if (!force || mode == MIGRATE_ASYNC) @@ -1039,8 +1157,20 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, if (current->flags & PF_MEMALLOC) goto out; + /* + * We have locked some folios and are going to wait to lock + * this folio. To avoid a potential deadlock, let's bail + * out and not do that. The locked folios will be moved and + * unlocked, then we can wait to lock this folio. + */ + if (avoid_force_lock) { + rc = -EDEADLOCK; + goto out; + } + folio_lock(src); } + locked = true; if (folio_test_writeback(src)) { /* @@ -1055,10 +1185,10 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, break; default: rc = -EBUSY; - goto out_unlock; + goto out; } if (!force) - goto out_unlock; + goto out; folio_wait_writeback(src); } @@ -1088,11 +1218,12 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, * This is much like races on refcount of oldpage: just don't BUG(). */ if (unlikely(!folio_trylock(dst))) - goto out_unlock; + goto out; + dst_locked = true; if (unlikely(!is_lru)) { - rc = move_to_new_folio(dst, src, mode); - goto out_unlock_both; + __migrate_folio_record(dst, page_was_mapped, anon_vma); + return MIGRATEPAGE_UNMAP; } /* @@ -1110,18 +1241,57 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, if (!src->mapping) { if (folio_test_private(src)) { try_to_free_buffers(src); - goto out_unlock_both; + goto out; } } else if (folio_mapped(src)) { /* Establish migration ptes */ VM_BUG_ON_FOLIO(folio_test_anon(src) && !folio_test_ksm(src) && !anon_vma, src); - try_to_migrate(src, 0); - page_was_mapped = true; + try_to_migrate(src, TTU_BATCH_FLUSH); + page_was_mapped = 1; } - if (!folio_mapped(src)) - rc = move_to_new_folio(dst, src, mode); + if (!folio_mapped(src)) { + __migrate_folio_record(dst, page_was_mapped, anon_vma); + return MIGRATEPAGE_UNMAP; + } + +out: + /* + * A folio that has not been unmapped will be restored to + * right list unless we want to retry. + */ + if (rc == -EAGAIN || rc == -EDEADLOCK) + ret = NULL; + + migrate_folio_undo_src(src, page_was_mapped, anon_vma, locked, ret); + migrate_folio_undo_dst(dst, dst_locked, put_new_page, private); + + return rc; +} + +/* Migrate the folio to the newly allocated folio in dst. */ +static int migrate_folio_move(free_page_t put_new_page, unsigned long private, + struct folio *src, struct folio *dst, + enum migrate_mode mode, enum migrate_reason reason, + struct list_head *ret) +{ + int rc; + int page_was_mapped = 0; + struct anon_vma *anon_vma = NULL; + bool is_lru = !__PageMovable(&src->page); + struct list_head *prev; + + __migrate_folio_extract(dst, &page_was_mapped, &anon_vma); + prev = dst->lru.prev; + list_del(&dst->lru); + + rc = move_to_new_folio(dst, src, mode); + if (rc) + goto out; + + if (unlikely(!is_lru)) + goto out_unlock_both; /* * When successful, push dst to LRU immediately: so that if it @@ -1132,111 +1302,49 @@ static int __unmap_and_move(struct folio *src, struct folio *dst, * unsuccessful, and other cases when a page has been temporarily * isolated from the unevictable LRU: but this case is the easiest. */ - if (rc == MIGRATEPAGE_SUCCESS) { - folio_add_lru(dst); - if (page_was_mapped) - lru_add_drain(); - } + folio_add_lru(dst); + if (page_was_mapped) + lru_add_drain(); if (page_was_mapped) - remove_migration_ptes(src, - rc == MIGRATEPAGE_SUCCESS ? dst : src, false); + remove_migration_ptes(src, dst, false); out_unlock_both: folio_unlock(dst); -out_unlock: - /* Drop an anon_vma reference if we took one */ - if (anon_vma) - put_anon_vma(anon_vma); - folio_unlock(src); -out: + set_page_owner_migrate_reason(&dst->page, reason); /* * If migration is successful, decrease refcount of dst, * which will not free the page because new page owner increased * refcounter. */ - if (rc == MIGRATEPAGE_SUCCESS) - folio_put(dst); - - return rc; -} - -/* - * Obtain the lock on folio, remove all ptes and migrate the folio - * to the newly allocated folio in dst. - */ -static int unmap_and_move(new_page_t get_new_page, - free_page_t put_new_page, - unsigned long private, struct folio *src, - int force, enum migrate_mode mode, - enum migrate_reason reason, - struct list_head *ret) -{ - struct folio *dst; - int rc = MIGRATEPAGE_SUCCESS; - struct page *newpage = NULL; - - if (!thp_migration_supported() && folio_test_transhuge(src)) - return -ENOSYS; - - if (folio_ref_count(src) == 1) { - /* Folio was freed from under us. So we are done. */ - folio_clear_active(src); - folio_clear_unevictable(src); - /* free_pages_prepare() will clear PG_isolated. */ - goto out; - } - - newpage = get_new_page(&src->page, private); - if (!newpage) - return -ENOMEM; - dst = page_folio(newpage); + folio_put(dst); - dst->private = NULL; - rc = __unmap_and_move(src, dst, force, mode); - if (rc == MIGRATEPAGE_SUCCESS) - set_page_owner_migrate_reason(&dst->page, reason); + /* + * A folio that has been migrated has all references removed + * and will be freed. + */ + list_del(&src->lru); + /* Drop an anon_vma reference if we took one */ + if (anon_vma) + put_anon_vma(anon_vma); + folio_unlock(src); + migrate_folio_done(src, reason); + return rc; out: - if (rc != -EAGAIN) { - /* - * A folio that has been migrated has all references - * removed and will be freed. A folio that has not been - * migrated will have kept its references and be restored. - */ - list_del(&src->lru); - } - /* - * If migration is successful, releases reference grabbed during - * isolation. Otherwise, restore the folio to right list unless - * we want to retry. + * A folio that has not been migrated will be restored to + * right list unless we want to retry. */ - if (rc == MIGRATEPAGE_SUCCESS) { - /* - * Compaction can migrate also non-LRU folios which are - * not accounted to NR_ISOLATED_*. They can be recognized - * as __folio_test_movable - */ - if (likely(!__folio_test_movable(src))) - mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + - folio_is_file_lru(src), -folio_nr_pages(src)); - - if (reason != MR_MEMORY_FAILURE) - /* - * We release the folio in page_handle_poison. - */ - folio_put(src); - } else { - if (rc != -EAGAIN) - list_add_tail(&src->lru, ret); - - if (put_new_page) - put_new_page(&dst->page, private); - else - folio_put(dst); + if (rc == -EAGAIN) { + list_add(&dst->lru, prev); + __migrate_folio_record(dst, page_was_mapped, anon_vma); + return rc; } + migrate_folio_undo_src(src, page_was_mapped, anon_vma, true, ret); + migrate_folio_undo_dst(dst, true, put_new_page, private); + return rc; } @@ -1271,19 +1379,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; - /* - * Migratability of hugepages depends on architectures and their size. - * This check is necessary because some callers of hugepage migration - * like soft offline and memory hotremove don't walk through page - * tables or check whether the hugepage is pmd-based or not before - * kicking migration. - */ - if (!hugepage_migration_supported(page_hstate(hpage))) - return -ENOSYS; - if (folio_ref_count(src) == 1) { /* page was freed from under us. So we are done. */ - putback_active_hugepage(hpage); + folio_putback_active_hugetlb(src); return MIGRATEPAGE_SUCCESS; } @@ -1368,7 +1466,7 @@ out_unlock: folio_unlock(src); out: if (rc == MIGRATEPAGE_SUCCESS) - putback_active_hugepage(hpage); + folio_putback_active_hugetlb(src); else if (rc != -EAGAIN) list_move_tail(&src->lru, ret); @@ -1380,7 +1478,7 @@ out: if (put_new_page) put_new_page(new_hpage, private); else - putback_active_hugepage(new_hpage); + folio_putback_active_hugetlb(dst); return rc; } @@ -1398,61 +1496,153 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f return rc; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define NR_MAX_BATCHED_MIGRATION HPAGE_PMD_NR +#else +#define NR_MAX_BATCHED_MIGRATION 512 +#endif +#define NR_MAX_MIGRATE_PAGES_RETRY 10 + +struct migrate_pages_stats { + int nr_succeeded; /* Normal and large folios migrated successfully, in + units of base pages */ + int nr_failed_pages; /* Normal and large folios failed to be migrated, in + units of base pages. Untried folios aren't counted */ + int nr_thp_succeeded; /* THP migrated successfully */ + int nr_thp_failed; /* THP failed to be migrated */ + int nr_thp_split; /* THP split before migrating */ +}; + /* - * migrate_pages - migrate the folios specified in a list, to the free folios - * supplied as the target for the page migration - * - * @from: The list of folios to be migrated. - * @get_new_page: The function used to allocate free folios to be used - * as the target of the folio migration. - * @put_new_page: The function used to free target folios if migration - * fails, or NULL if no special handling is necessary. - * @private: Private data to be passed on to get_new_page() - * @mode: The migration mode that specifies the constraints for - * folio migration, if any. - * @reason: The reason for folio migration. - * @ret_succeeded: Set to the number of folios migrated successfully if - * the caller passes a non-NULL pointer. - * - * The function returns after 10 attempts or if no folios are movable any more - * because the list has become empty or no retryable folios exist any more. - * It is caller's responsibility to call putback_movable_pages() to return folios - * to the LRU or free list only if ret != 0. - * - * Returns the number of {normal folio, large folio, hugetlb} that were not - * migrated, or an error code. The number of large folio splits will be - * considered as the number of non-migrated large folio, no matter how many - * split folios of the large folio are migrated successfully. + * Returns the number of hugetlb folios that were not migrated, or an error code + * after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no hugetlb folios are movable + * any more because the list has become empty or no retryable hugetlb folios + * exist any more. It is caller's responsibility to call putback_movable_pages() + * only if ret != 0. */ -int migrate_pages(struct list_head *from, new_page_t get_new_page, - free_page_t put_new_page, unsigned long private, - enum migrate_mode mode, int reason, unsigned int *ret_succeeded) +static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page, + free_page_t put_new_page, unsigned long private, + enum migrate_mode mode, int reason, + struct migrate_pages_stats *stats, + struct list_head *ret_folios) { int retry = 1; + int nr_failed = 0; + int nr_retry_pages = 0; + int pass = 0; + struct folio *folio, *folio2; + int rc, nr_pages; + + for (pass = 0; pass < NR_MAX_MIGRATE_PAGES_RETRY && retry; pass++) { + retry = 0; + nr_retry_pages = 0; + + list_for_each_entry_safe(folio, folio2, from, lru) { + if (!folio_test_hugetlb(folio)) + continue; + + nr_pages = folio_nr_pages(folio); + + cond_resched(); + + /* + * Migratability of hugepages depends on architectures and + * their size. This check is necessary because some callers + * of hugepage migration like soft offline and memory + * hotremove don't walk through page tables or check whether + * the hugepage is pmd-based or not before kicking migration. + */ + if (!hugepage_migration_supported(folio_hstate(folio))) { + nr_failed++; + stats->nr_failed_pages += nr_pages; + list_move_tail(&folio->lru, ret_folios); + continue; + } + + rc = unmap_and_move_huge_page(get_new_page, + put_new_page, private, + &folio->page, pass > 2, mode, + reason, ret_folios); + /* + * The rules are: + * Success: hugetlb folio will be put back + * -EAGAIN: stay on the from list + * -ENOMEM: stay on the from list + * Other errno: put on ret_folios list + */ + switch(rc) { + case -ENOMEM: + /* + * When memory is low, don't bother to try to migrate + * other folios, just exit. + */ + stats->nr_failed_pages += nr_pages + nr_retry_pages; + return -ENOMEM; + case -EAGAIN: + retry++; + nr_retry_pages += nr_pages; + break; + case MIGRATEPAGE_SUCCESS: + stats->nr_succeeded += nr_pages; + break; + default: + /* + * Permanent failure (-EBUSY, etc.): + * unlike -EAGAIN case, the failed folio is + * removed from migration folio list and not + * retried in the next outer loop. + */ + nr_failed++; + stats->nr_failed_pages += nr_pages; + break; + } + } + } + /* + * nr_failed is number of hugetlb folios failed to be migrated. After + * NR_MAX_MIGRATE_PAGES_RETRY attempts, give up and count retried hugetlb + * folios as failed. + */ + nr_failed += retry; + stats->nr_failed_pages += nr_retry_pages; + + return nr_failed; +} + +/* + * migrate_pages_batch() first unmaps folios in the from list as many as + * possible, then move the unmapped folios. + */ +static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page, + free_page_t put_new_page, unsigned long private, + enum migrate_mode mode, int reason, struct list_head *ret_folios, + struct migrate_pages_stats *stats) +{ + int retry; int large_retry = 1; int thp_retry = 1; int nr_failed = 0; - int nr_failed_pages = 0; int nr_retry_pages = 0; - int nr_succeeded = 0; - int nr_thp_succeeded = 0; int nr_large_failed = 0; - int nr_thp_failed = 0; - int nr_thp_split = 0; int pass = 0; bool is_large = false; bool is_thp = false; - struct folio *folio, *folio2; - int rc, nr_pages; - LIST_HEAD(ret_folios); + struct folio *folio, *folio2, *dst = NULL, *dst2; + int rc, rc_saved, nr_pages; LIST_HEAD(split_folios); + LIST_HEAD(unmap_folios); + LIST_HEAD(dst_folios); bool nosplit = (reason == MR_NUMA_MISPLACED); bool no_split_folio_counting = false; - - trace_mm_migrate_pages_start(mode, reason); - -split_folio_migration: - for (pass = 0; pass < 10 && (retry || large_retry); pass++) { + bool avoid_force_lock; + +retry: + rc_saved = 0; + avoid_force_lock = false; + retry = 1; + for (pass = 0; + pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); + pass++) { retry = 0; large_retry = 0; thp_retry = 0; @@ -1464,35 +1654,15 @@ split_folio_migration: * folio. Capture required information that might get * lost during migration. */ - is_large = folio_test_large(folio) && !folio_test_hugetlb(folio); + is_large = folio_test_large(folio); is_thp = is_large && folio_test_pmd_mappable(folio); nr_pages = folio_nr_pages(folio); + cond_resched(); - if (folio_test_hugetlb(folio)) - rc = unmap_and_move_huge_page(get_new_page, - put_new_page, private, - &folio->page, pass > 2, mode, - reason, - &ret_folios); - else - rc = unmap_and_move(get_new_page, put_new_page, - private, folio, pass > 2, mode, - reason, &ret_folios); - /* - * The rules are: - * Success: non hugetlb folio will be freed, hugetlb - * folio will be put back - * -EAGAIN: stay on the from list - * -ENOMEM: stay on the from list - * -ENOSYS: stay on the from list - * Other errno: put on ret_folios list then splice to - * from list - */ - switch(rc) { /* * Large folio migration might be unsupported or - * the allocation could've failed so we should retry + * the allocation might be failed so we should retry * on the same folio with the large folio split * to normal folios. * @@ -1500,37 +1670,46 @@ split_folio_migration: * we will migrate them after the rest of the * list is processed. */ - case -ENOSYS: - /* Large folio migration is unsupported */ - if (is_large) { - nr_large_failed++; - nr_thp_failed += is_thp; - if (!try_split_folio(folio, &split_folios)) { - nr_thp_split += is_thp; - break; - } - /* Hugetlb migration is unsupported */ - } else if (!no_split_folio_counting) { - nr_failed++; + if (!thp_migration_supported() && is_thp) { + nr_large_failed++; + stats->nr_thp_failed++; + if (!try_split_folio(folio, &split_folios)) { + stats->nr_thp_split++; + continue; } + stats->nr_failed_pages += nr_pages; + list_move_tail(&folio->lru, ret_folios); + continue; + } - nr_failed_pages += nr_pages; - list_move_tail(&folio->lru, &ret_folios); - break; + rc = migrate_folio_unmap(get_new_page, put_new_page, private, + folio, &dst, pass > 2, avoid_force_lock, + mode, reason, ret_folios); + /* + * The rules are: + * Success: folio will be freed + * Unmap: folio will be put on unmap_folios list, + * dst folio put on dst_folios list + * -EAGAIN: stay on the from list + * -EDEADLOCK: stay on the from list + * -ENOMEM: stay on the from list + * Other errno: put on ret_folios list + */ + switch(rc) { case -ENOMEM: /* * When memory is low, don't bother to try to migrate - * other folios, just exit. + * other folios, move unmapped folios, then exit. */ if (is_large) { nr_large_failed++; - nr_thp_failed += is_thp; + stats->nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (!nosplit) { int ret = try_split_folio(folio, &split_folios); if (!ret) { - nr_thp_split += is_thp; + stats->nr_thp_split += is_thp; break; } else if (reason == MR_LONGTERM_PIN && ret == -EAGAIN) { @@ -1548,18 +1727,30 @@ split_folio_migration: nr_failed++; } - nr_failed_pages += nr_pages + nr_retry_pages; + stats->nr_failed_pages += nr_pages + nr_retry_pages; /* * There might be some split folios of fail-to-migrate large - * folios left in split_folios list. Move them back to migration + * folios left in split_folios list. Move them to ret_folios * list so that they could be put back to the right list by * the caller otherwise the folio refcnt will be leaked. */ - list_splice_init(&split_folios, from); + list_splice_init(&split_folios, ret_folios); /* nr_failed isn't updated for not used */ nr_large_failed += large_retry; - nr_thp_failed += thp_retry; - goto out; + stats->nr_thp_failed += thp_retry; + rc_saved = rc; + if (list_empty(&unmap_folios)) + goto out; + else + goto move; + case -EDEADLOCK: + /* + * The folio cannot be locked for potential deadlock. + * Go move (and unlock) all locked folios. Then we can + * try again. + */ + rc_saved = rc; + goto move; case -EAGAIN: if (is_large) { large_retry++; @@ -1570,8 +1761,17 @@ split_folio_migration: nr_retry_pages += nr_pages; break; case MIGRATEPAGE_SUCCESS: - nr_succeeded += nr_pages; - nr_thp_succeeded += is_thp; + stats->nr_succeeded += nr_pages; + stats->nr_thp_succeeded += is_thp; + break; + case MIGRATEPAGE_UNMAP: + /* + * We have locked some folios, don't force lock + * to avoid deadlock. + */ + avoid_force_lock = true; + list_move_tail(&folio->lru, &unmap_folios); + list_add_tail(&dst->lru, &dst_folios); break; default: /* @@ -1582,38 +1782,204 @@ split_folio_migration: */ if (is_large) { nr_large_failed++; - nr_thp_failed += is_thp; + stats->nr_thp_failed += is_thp; + } else if (!no_split_folio_counting) { + nr_failed++; + } + + stats->nr_failed_pages += nr_pages; + break; + } + } + } + nr_failed += retry; + nr_large_failed += large_retry; + stats->nr_thp_failed += thp_retry; + stats->nr_failed_pages += nr_retry_pages; +move: + /* Flush TLBs for all unmapped folios */ + try_to_unmap_flush(); + + retry = 1; + for (pass = 0; + pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); + pass++) { + retry = 0; + large_retry = 0; + thp_retry = 0; + nr_retry_pages = 0; + + dst = list_first_entry(&dst_folios, struct folio, lru); + dst2 = list_next_entry(dst, lru); + list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) { + is_large = folio_test_large(folio); + is_thp = is_large && folio_test_pmd_mappable(folio); + nr_pages = folio_nr_pages(folio); + + cond_resched(); + + rc = migrate_folio_move(put_new_page, private, + folio, dst, mode, + reason, ret_folios); + /* + * The rules are: + * Success: folio will be freed + * -EAGAIN: stay on the unmap_folios list + * Other errno: put on ret_folios list + */ + switch(rc) { + case -EAGAIN: + if (is_large) { + large_retry++; + thp_retry += is_thp; + } else if (!no_split_folio_counting) { + retry++; + } + nr_retry_pages += nr_pages; + break; + case MIGRATEPAGE_SUCCESS: + stats->nr_succeeded += nr_pages; + stats->nr_thp_succeeded += is_thp; + break; + default: + if (is_large) { + nr_large_failed++; + stats->nr_thp_failed += is_thp; } else if (!no_split_folio_counting) { nr_failed++; } - nr_failed_pages += nr_pages; + stats->nr_failed_pages += nr_pages; break; } + dst = dst2; + dst2 = list_next_entry(dst, lru); } } nr_failed += retry; nr_large_failed += large_retry; - nr_thp_failed += thp_retry; - nr_failed_pages += nr_retry_pages; + stats->nr_thp_failed += thp_retry; + stats->nr_failed_pages += nr_retry_pages; + + if (rc_saved) + rc = rc_saved; + else + rc = nr_failed + nr_large_failed; +out: + /* Cleanup remaining folios */ + dst = list_first_entry(&dst_folios, struct folio, lru); + dst2 = list_next_entry(dst, lru); + list_for_each_entry_safe(folio, folio2, &unmap_folios, lru) { + int page_was_mapped = 0; + struct anon_vma *anon_vma = NULL; + + __migrate_folio_extract(dst, &page_was_mapped, &anon_vma); + migrate_folio_undo_src(folio, page_was_mapped, anon_vma, + true, ret_folios); + list_del(&dst->lru); + migrate_folio_undo_dst(dst, true, put_new_page, private); + dst = dst2; + dst2 = list_next_entry(dst, lru); + } + /* * Try to migrate split folios of fail-to-migrate large folios, no * nr_failed counting in this round, since all split folios of a * large folio is counted as 1 failure in the first round. */ - if (!list_empty(&split_folios)) { + if (rc >= 0 && !list_empty(&split_folios)) { /* - * Move non-migrated folios (after 10 retries) to ret_folios - * to avoid migrating them again. + * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY + * retries) to ret_folios to avoid migrating them again. */ - list_splice_init(from, &ret_folios); + list_splice_init(from, ret_folios); list_splice_init(&split_folios, from); no_split_folio_counting = true; - retry = 1; - goto split_folio_migration; + goto retry; } - rc = nr_failed + nr_large_failed; + /* + * We have unlocked all locked folios, so we can force lock now, let's + * try again. + */ + if (rc == -EDEADLOCK) + goto retry; + + return rc; +} + +/* + * migrate_pages - migrate the folios specified in a list, to the free folios + * supplied as the target for the page migration + * + * @from: The list of folios to be migrated. + * @get_new_page: The function used to allocate free folios to be used + * as the target of the folio migration. + * @put_new_page: The function used to free target folios if migration + * fails, or NULL if no special handling is necessary. + * @private: Private data to be passed on to get_new_page() + * @mode: The migration mode that specifies the constraints for + * folio migration, if any. + * @reason: The reason for folio migration. + * @ret_succeeded: Set to the number of folios migrated successfully if + * the caller passes a non-NULL pointer. + * + * The function returns after NR_MAX_MIGRATE_PAGES_RETRY attempts or if no folios + * are movable any more because the list has become empty or no retryable folios + * exist any more. It is caller's responsibility to call putback_movable_pages() + * only if ret != 0. + * + * Returns the number of {normal folio, large folio, hugetlb} that were not + * migrated, or an error code. The number of large folio splits will be + * considered as the number of non-migrated large folio, no matter how many + * split folios of the large folio are migrated successfully. + */ +int migrate_pages(struct list_head *from, new_page_t get_new_page, + free_page_t put_new_page, unsigned long private, + enum migrate_mode mode, int reason, unsigned int *ret_succeeded) +{ + int rc, rc_gather; + int nr_pages; + struct folio *folio, *folio2; + LIST_HEAD(folios); + LIST_HEAD(ret_folios); + struct migrate_pages_stats stats; + + trace_mm_migrate_pages_start(mode, reason); + + memset(&stats, 0, sizeof(stats)); + + rc_gather = migrate_hugetlbs(from, get_new_page, put_new_page, private, + mode, reason, &stats, &ret_folios); + if (rc_gather < 0) + goto out; +again: + nr_pages = 0; + list_for_each_entry_safe(folio, folio2, from, lru) { + /* Retried hugetlb folios will be kept in list */ + if (folio_test_hugetlb(folio)) { + list_move_tail(&folio->lru, &ret_folios); + continue; + } + + nr_pages += folio_nr_pages(folio); + if (nr_pages > NR_MAX_BATCHED_MIGRATION) + break; + } + if (nr_pages > NR_MAX_BATCHED_MIGRATION) + list_cut_before(&folios, from, &folio->lru); + else + list_splice_init(from, &folios); + rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private, + mode, reason, &ret_folios, &stats); + list_splice_tail_init(&folios, &ret_folios); + if (rc < 0) { + rc_gather = rc; + goto out; + } + rc_gather += rc; + if (!list_empty(from)) + goto again; out: /* * Put the permanent failure folio back to migration list, they @@ -1626,20 +1992,21 @@ out: * are migrated successfully. */ if (list_empty(from)) - rc = 0; + rc_gather = 0; - count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); - count_vm_events(PGMIGRATE_FAIL, nr_failed_pages); - count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded); - count_vm_events(THP_MIGRATION_FAIL, nr_thp_failed); - count_vm_events(THP_MIGRATION_SPLIT, nr_thp_split); - trace_mm_migrate_pages(nr_succeeded, nr_failed_pages, nr_thp_succeeded, - nr_thp_failed, nr_thp_split, mode, reason); + count_vm_events(PGMIGRATE_SUCCESS, stats.nr_succeeded); + count_vm_events(PGMIGRATE_FAIL, stats.nr_failed_pages); + count_vm_events(THP_MIGRATION_SUCCESS, stats.nr_thp_succeeded); + count_vm_events(THP_MIGRATION_FAIL, stats.nr_thp_failed); + count_vm_events(THP_MIGRATION_SPLIT, stats.nr_thp_split); + trace_mm_migrate_pages(stats.nr_succeeded, stats.nr_failed_pages, + stats.nr_thp_succeeded, stats.nr_thp_failed, + stats.nr_thp_split, mode, reason); if (ret_succeeded) - *ret_succeeded = nr_succeeded; + *ret_succeeded = stats.nr_succeeded; - return rc; + return rc_gather; } struct page *alloc_migration_target(struct page *page, unsigned long private) @@ -1648,6 +2015,7 @@ struct page *alloc_migration_target(struct page *page, unsigned long private) struct migration_target_control *mtc; gfp_t gfp_mask; unsigned int order = 0; + struct folio *hugetlb_folio = NULL; struct folio *new_folio = NULL; int nid; int zidx; @@ -1662,7 +2030,9 @@ struct page *alloc_migration_target(struct page *page, unsigned long private) struct hstate *h = folio_hstate(folio); gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); - return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask); + hugetlb_folio = alloc_hugetlb_folio_nodemask(h, nid, + mtc->nmask, gfp_mask); + return &hugetlb_folio->page; } if (folio_test_large(folio)) { @@ -1727,6 +2097,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, struct vm_area_struct *vma; struct page *page; int err; + bool isolated; mmap_read_lock(mm); err = -EFAULT; @@ -1758,17 +2129,18 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, if (PageHuge(page)) { if (PageHead(page)) { - err = isolate_hugetlb(page, pagelist); - if (!err) - err = 1; + isolated = isolate_hugetlb(page_folio(page), pagelist); + err = isolated ? 1 : -EBUSY; } } else { struct page *head; head = compound_head(page); - err = isolate_lru_page(head); - if (err) + isolated = isolate_lru_page(head); + if (!isolated) { + err = -EBUSY; goto out_putpage; + } err = 1; list_add_tail(&head->lru, pagelist); @@ -2173,7 +2545,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) return 0; } - if (isolate_lru_page(page)) + if (!isolate_lru_page(page)) return 0; mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_is_file_lru(page), |