diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 176 |
1 files changed, 89 insertions, 87 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1b791b26d72d..4fc43859e59a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -119,7 +119,8 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, * own flags. */ if (!in_pf && shmem_file(vma->vm_file)) - return shmem_huge_enabled(vma, !enforce_sysfs); + return shmem_is_huge(file_inode(vma->vm_file), vma->vm_pgoff, + !enforce_sysfs, vma->vm_mm, vm_flags); /* Enforce sysfs THP requirements as necessary */ if (enforce_sysfs && @@ -559,10 +560,11 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) } #ifdef CONFIG_MEMCG -static inline struct deferred_split *get_deferred_split_queue(struct page *page) +static inline +struct deferred_split *get_deferred_split_queue(struct folio *folio) { - struct mem_cgroup *memcg = page_memcg(compound_head(page)); - struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + struct mem_cgroup *memcg = folio_memcg(folio); + struct pglist_data *pgdat = NODE_DATA(folio_nid(folio)); if (memcg) return &memcg->deferred_split_queue; @@ -570,9 +572,10 @@ static inline struct deferred_split *get_deferred_split_queue(struct page *page) return &pgdat->deferred_split_queue; } #else -static inline struct deferred_split *get_deferred_split_queue(struct page *page) +static inline +struct deferred_split *get_deferred_split_queue(struct folio *folio) { - struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + struct pglist_data *pgdat = NODE_DATA(folio_nid(folio)); return &pgdat->deferred_split_queue; } @@ -580,23 +583,23 @@ static inline struct deferred_split *get_deferred_split_queue(struct page *page) void prep_transhuge_page(struct page *page) { - /* - * we use page->mapping and page->index in second tail page - * as list_head: assuming THP order >= 2 - */ + struct folio *folio = (struct folio *)page; - INIT_LIST_HEAD(page_deferred_list(page)); + VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); + INIT_LIST_HEAD(&folio->_deferred_list); set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR); } static inline bool is_transparent_hugepage(struct page *page) { + struct folio *folio; + if (!PageCompound(page)) return false; - page = compound_head(page); - return is_huge_zero_page(page) || - page[1].compound_dtor == TRANSHUGE_PAGE_DTOR; + folio = page_folio(page); + return is_huge_zero_page(&folio->page) || + folio->_folio_dtor == TRANSHUGE_PAGE_DTOR; } static unsigned long __thp_get_unmapped_area(struct file *filp, @@ -1039,11 +1042,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, assert_spin_locked(pmd_lockptr(mm, pmd)); - /* FOLL_GET and FOLL_PIN are mutually exclusive. */ - if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == - (FOLL_PIN | FOLL_GET))) - return NULL; - if (flags & FOLL_WRITE && !pmd_write(*pmd)) return NULL; @@ -1202,11 +1200,6 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, if (flags & FOLL_WRITE && !pud_write(*pud)) return NULL; - /* FOLL_GET and FOLL_PIN are mutually exclusive. */ - if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == - (FOLL_PIN | FOLL_GET))) - return NULL; - if (pud_present(*pud) && pud_devmap(*pud)) /* pass */; else @@ -1603,7 +1596,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, { spinlock_t *ptl; pmd_t orig_pmd; - struct page *page; + struct folio *folio; struct mm_struct *mm = tlb->mm; bool ret = false; @@ -1623,15 +1616,15 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, goto out; } - page = pmd_page(orig_pmd); + folio = pfn_folio(pmd_pfn(orig_pmd)); /* - * If other processes are mapping this page, we couldn't discard - * the page unless they all do MADV_FREE so let's skip the page. + * If other processes are mapping this folio, we couldn't discard + * the folio unless they all do MADV_FREE so let's skip the folio. */ - if (total_mapcount(page) != 1) + if (folio_mapcount(folio) != 1) goto out; - if (!trylock_page(page)) + if (!folio_trylock(folio)) goto out; /* @@ -1639,17 +1632,17 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, * will deactivate only them. */ if (next - addr != HPAGE_PMD_SIZE) { - get_page(page); + folio_get(folio); spin_unlock(ptl); - split_huge_page(page); - unlock_page(page); - put_page(page); + split_folio(folio); + folio_unlock(folio); + folio_put(folio); goto out_unlocked; } - if (PageDirty(page)) - ClearPageDirty(page); - unlock_page(page); + if (folio_test_dirty(folio)) + folio_clear_dirty(folio); + folio_unlock(folio); if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) { pmdp_invalidate(vma, addr, pmd); @@ -1660,7 +1653,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb_remove_pmd_tlb_entry(tlb, pmd, addr); } - mark_page_lazyfree(page); + folio_mark_lazyfree(folio); ret = true; out: spin_unlock(ptl); @@ -1920,17 +1913,15 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, oldpmd = pmdp_invalidate_ad(vma, addr, pmd); entry = pmd_modify(oldpmd, newprot); - if (uffd_wp) { - entry = pmd_wrprotect(entry); + if (uffd_wp) entry = pmd_mkuffd_wp(entry); - } else if (uffd_wp_resolve) { + else if (uffd_wp_resolve) /* * Leave the write bit to be handled by PF interrupt * handler, then things like COW could be properly * handled. */ entry = pmd_clear_uffd_wp(entry); - } /* See change_pte_range(). */ if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) && @@ -2022,7 +2013,7 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, spinlock_t *ptl; struct mmu_notifier_range range; - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, address & HPAGE_PUD_MASK, (address & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE); mmu_notifier_invalidate_range_start(&range); @@ -2284,7 +2275,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, spinlock_t *ptl; struct mmu_notifier_range range; - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, address & HPAGE_PMD_MASK, (address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE); mmu_notifier_invalidate_range_start(&range); @@ -2479,9 +2470,9 @@ static void __split_huge_page_tail(struct page *head, int tail, * of swap cache pages that store the swp_entry_t in tail pages. * Fix up and warn once if private is unexpectedly set. * - * What of 32-bit systems, on which head[1].compound_pincount overlays + * What of 32-bit systems, on which folio->_pincount overlays * head[1].private? No problem: THP_SWAP is not enabled on 32-bit, and - * compound_pincount must be 0 for folio_ref_freeze() to have succeeded. + * pincount must be 0 for folio_ref_freeze() to have succeeded. */ if (!folio_test_swapcache(page_folio(head))) { VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail); @@ -2652,7 +2643,7 @@ bool can_split_folio(struct folio *folio, int *pextra_pins) int split_huge_page_to_list(struct page *page, struct list_head *list) { struct folio *folio = page_folio(page); - struct deferred_split *ds_queue = get_deferred_split_queue(&folio->page); + struct deferred_split *ds_queue = get_deferred_split_queue(folio); XA_STATE(xas, &folio->mapping->i_pages, folio->index); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; @@ -2756,9 +2747,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) /* Prevent deferred_split_scan() touching ->_refcount */ spin_lock(&ds_queue->split_queue_lock); if (folio_ref_freeze(folio, 1 + extra_pins)) { - if (!list_empty(page_deferred_list(&folio->page))) { + if (!list_empty(&folio->_deferred_list)) { ds_queue->split_queue_len--; - list_del(page_deferred_list(&folio->page)); + list_del(&folio->_deferred_list); } spin_unlock(&ds_queue->split_queue_lock); if (mapping) { @@ -2802,49 +2793,53 @@ out: void free_transhuge_page(struct page *page) { - struct deferred_split *ds_queue = get_deferred_split_queue(page); + struct folio *folio = (struct folio *)page; + struct deferred_split *ds_queue = get_deferred_split_queue(folio); unsigned long flags; spin_lock_irqsave(&ds_queue->split_queue_lock, flags); - if (!list_empty(page_deferred_list(page))) { + if (!list_empty(&folio->_deferred_list)) { ds_queue->split_queue_len--; - list_del(page_deferred_list(page)); + list_del(&folio->_deferred_list); } spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); free_compound_page(page); } -void deferred_split_huge_page(struct page *page) +void deferred_split_folio(struct folio *folio) { - struct deferred_split *ds_queue = get_deferred_split_queue(page); + struct deferred_split *ds_queue = get_deferred_split_queue(folio); #ifdef CONFIG_MEMCG - struct mem_cgroup *memcg = page_memcg(compound_head(page)); + struct mem_cgroup *memcg = folio_memcg(folio); #endif unsigned long flags; - VM_BUG_ON_PAGE(!PageTransHuge(page), page); + VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); /* * The try_to_unmap() in page reclaim path might reach here too, * this may cause a race condition to corrupt deferred split queue. - * And, if page reclaim is already handling the same page, it is + * And, if page reclaim is already handling the same folio, it is * unnecessary to handle it again in shrinker. * - * Check PageSwapCache to determine if the page is being - * handled by page reclaim since THP swap would add the page into + * Check the swapcache flag to determine if the folio is being + * handled by page reclaim since THP swap would add the folio into * swap cache before calling try_to_unmap(). */ - if (PageSwapCache(page)) + if (folio_test_swapcache(folio)) + return; + + if (!list_empty(&folio->_deferred_list)) return; spin_lock_irqsave(&ds_queue->split_queue_lock, flags); - if (list_empty(page_deferred_list(page))) { + if (list_empty(&folio->_deferred_list)) { count_vm_event(THP_DEFERRED_SPLIT_PAGE); - list_add_tail(page_deferred_list(page), &ds_queue->split_queue); + list_add_tail(&folio->_deferred_list, &ds_queue->split_queue); ds_queue->split_queue_len++; #ifdef CONFIG_MEMCG if (memcg) - set_shrinker_bit(memcg, page_to_nid(page), + set_shrinker_bit(memcg, folio_nid(folio), deferred_split_shrinker.id); #endif } @@ -2870,8 +2865,8 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, struct pglist_data *pgdata = NODE_DATA(sc->nid); struct deferred_split *ds_queue = &pgdata->deferred_split_queue; unsigned long flags; - LIST_HEAD(list), *pos, *next; - struct page *page; + LIST_HEAD(list); + struct folio *folio, *next; int split = 0; #ifdef CONFIG_MEMCG @@ -2881,14 +2876,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, spin_lock_irqsave(&ds_queue->split_queue_lock, flags); /* Take pin on all head pages to avoid freeing them under us */ - list_for_each_safe(pos, next, &ds_queue->split_queue) { - page = list_entry((void *)pos, struct page, deferred_list); - page = compound_head(page); - if (get_page_unless_zero(page)) { - list_move(page_deferred_list(page), &list); + list_for_each_entry_safe(folio, next, &ds_queue->split_queue, + _deferred_list) { + if (folio_try_get(folio)) { + list_move(&folio->_deferred_list, &list); } else { - /* We lost race with put_compound_page() */ - list_del_init(page_deferred_list(page)); + /* We lost race with folio_put() */ + list_del_init(&folio->_deferred_list); ds_queue->split_queue_len--; } if (!--sc->nr_to_scan) @@ -2896,16 +2890,15 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, } spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); - list_for_each_safe(pos, next, &list) { - page = list_entry((void *)pos, struct page, deferred_list); - if (!trylock_page(page)) + list_for_each_entry_safe(folio, next, &list, _deferred_list) { + if (!folio_trylock(folio)) goto next; /* split_huge_page() removes page from list on success */ - if (!split_huge_page(page)) + if (!split_folio(folio)) split++; - unlock_page(page); + folio_unlock(folio); next: - put_page(page); + folio_put(folio); } spin_lock_irqsave(&ds_queue->split_queue_lock, flags); @@ -2934,6 +2927,7 @@ static void split_huge_pages_all(void) { struct zone *zone; struct page *page; + struct folio *folio; unsigned long pfn, max_zone_pfn; unsigned long total = 0, split = 0; @@ -2946,24 +2940,32 @@ static void split_huge_pages_all(void) int nr_pages; page = pfn_to_online_page(pfn); - if (!page || !get_page_unless_zero(page)) + if (!page || PageTail(page)) + continue; + folio = page_folio(page); + if (!folio_try_get(folio)) continue; - if (zone != page_zone(page)) + if (unlikely(page_folio(page) != folio)) goto next; - if (!PageHead(page) || PageHuge(page) || !PageLRU(page)) + if (zone != folio_zone(folio)) + goto next; + + if (!folio_test_large(folio) + || folio_test_hugetlb(folio) + || !folio_test_lru(folio)) goto next; total++; - lock_page(page); - nr_pages = thp_nr_pages(page); - if (!split_huge_page(page)) + folio_lock(folio); + nr_pages = folio_nr_pages(folio); + if (!split_folio(folio)) split++; pfn += nr_pages - 1; - unlock_page(page); + folio_unlock(folio); next: - put_page(page); + folio_put(folio); cond_resched(); } } @@ -3273,7 +3275,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) if (pmd_swp_soft_dirty(*pvmw->pmd)) pmde = pmd_mksoft_dirty(pmde); if (pmd_swp_uffd_wp(*pvmw->pmd)) - pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde)); + pmde = pmd_mkuffd_wp(pmde); if (!is_migration_entry_young(entry)) pmde = pmd_mkold(pmde); /* NOTE: this may contain setting soft-dirty on some archs */ |