diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 106 |
1 files changed, 73 insertions, 33 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bccd5a628ea6..65c98eb5483c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -130,8 +130,14 @@ static int set_recommended_min_free_kbytes(void) (unsigned long) nr_free_buffer_pages() / 20); recommended_min <<= (PAGE_SHIFT-10); - if (recommended_min > min_free_kbytes) + if (recommended_min > min_free_kbytes) { + if (user_min_free_kbytes >= 0) + pr_info("raising min_free_kbytes from %d to %lu " + "to help transparent hugepage allocations\n", + min_free_kbytes, recommended_min); + min_free_kbytes = recommended_min; + } setup_per_zone_wmarks(); return 0; } @@ -655,7 +661,7 @@ out: hugepage_exit_sysfs(hugepage_kobj); return err; } -module_init(hugepage_init) +subsys_initcall(hugepage_init); static int __init setup_transparent_hugepage(char *str) { @@ -712,7 +718,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pgtable_t pgtable; spinlock_t *ptl; - VM_BUG_ON(!PageCompound(page)); + VM_BUG_ON_PAGE(!PageCompound(page), page); pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; @@ -882,6 +888,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ret = 0; goto out_unlock; } + if (unlikely(pmd_trans_splitting(pmd))) { /* split huge page running from under us */ spin_unlock(src_ptl); @@ -892,7 +899,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out; } src_page = pmd_page(pmd); - VM_BUG_ON(!PageHead(src_page)); + VM_BUG_ON_PAGE(!PageHead(src_page), src_page); get_page(src_page); page_dup_rmap(src_page); add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); @@ -1066,7 +1073,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_free_pages; - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); pmdp_clear_flush(vma, haddr, pmd); /* leave pmd empty until pte is filled */ @@ -1132,7 +1139,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_unlock; page = pmd_page(orig_pmd); - VM_BUG_ON(!PageCompound(page) || !PageHead(page)); + VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page); if (page_mapcount(page) == 1) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); @@ -1153,7 +1160,7 @@ alloc: new_page = NULL; if (unlikely(!new_page)) { - if (is_huge_zero_pmd(orig_pmd)) { + if (!page) { ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, address, pmd, orig_pmd, haddr); } else { @@ -1180,7 +1187,7 @@ alloc: count_vm_event(THP_FAULT_ALLOC); - if (is_huge_zero_pmd(orig_pmd)) + if (!page) clear_huge_page(new_page, haddr, HPAGE_PMD_NR); else copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); @@ -1206,11 +1213,11 @@ alloc: page_add_new_anon_rmap(new_page, vma, haddr); set_pmd_at(mm, haddr, pmd, entry); update_mmu_cache_pmd(vma, address, pmd); - if (is_huge_zero_pmd(orig_pmd)) { + if (!page) { add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); put_huge_zero_page(); } else { - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); page_remove_rmap(page); put_page(page); } @@ -1243,8 +1250,12 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd)) return ERR_PTR(-EFAULT); + /* Full NUMA hinting faults to serialise migration in fault paths */ + if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) + goto out; + page = pmd_page(*pmd); - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); if (flags & FOLL_TOUCH) { pmd_t _pmd; /* @@ -1269,7 +1280,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, } } page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; - VM_BUG_ON(!PageCompound(page)); + VM_BUG_ON_PAGE(!PageCompound(page), page); if (flags & FOLL_GET) get_page_foll(page); @@ -1295,6 +1306,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; + /* + * If there are potential migrations, wait for completion and retry + * without disrupting NUMA hinting information. Do not relock and + * check_same as the page may no longer be mapped. + */ + if (unlikely(pmd_trans_migrating(*pmdp))) { + spin_unlock(ptl); + wait_migrate_huge_page(vma->anon_vma, pmdp); + goto out; + } + page = pmd_page(pmd); BUG_ON(is_huge_zero_page(page)); page_nid = page_to_nid(page); @@ -1323,23 +1345,22 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* If the page was locked, there are no parallel migrations */ if (page_locked) goto clear_pmdnuma; + } - /* - * Otherwise wait for potential migrations and retry. We do - * relock and check_same as the page may no longer be mapped. - * As the fault is being retried, do not account for it. - */ + /* Migration could have started since the pmd_trans_migrating check */ + if (!page_locked) { spin_unlock(ptl); wait_on_page_locked(page); page_nid = -1; goto out; } - /* Page is misplaced, serialise migrations and parallel THP splits */ + /* + * Page is misplaced. Page lock serialises migrations. Acquire anon_vma + * to serialises splits + */ get_page(page); spin_unlock(ptl); - if (!page_locked) - lock_page(page); anon_vma = page_lock_anon_vma_read(page); /* Confirm the PMD did not change while page_table_lock was released */ @@ -1351,6 +1372,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_unlock; } + /* Bail if we fail to protect against THP splits for any reason */ + if (unlikely(!anon_vma)) { + put_page(page); + page_nid = -1; + goto clear_pmdnuma; + } + /* * Migrate the THP to the requested node, returns with page unlocked * and pmd_numa cleared. @@ -1410,9 +1438,9 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, } else { page = pmd_page(orig_pmd); page_remove_rmap(page); - VM_BUG_ON(page_mapcount(page) < 0); + VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); atomic_long_dec(&tlb->mm->nr_ptes); spin_unlock(ptl); tlb_remove_page(tlb, page); @@ -1481,8 +1509,18 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); - if (new_ptl != old_ptl) + if (new_ptl != old_ptl) { + pgtable_t pgtable; + + /* + * Move preallocated PTE page table if new_pmd is on + * different PMD page table. + */ + pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); + pgtable_trans_huge_deposit(mm, new_pmd, pgtable); + spin_unlock(new_ptl); + } spin_unlock(old_ptl); } out: @@ -1507,6 +1545,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ret = 1; if (!prot_numa) { entry = pmdp_get_and_clear(mm, addr, pmd); + if (pmd_numa(entry)) + entry = pmd_mknonnuma(entry); entry = pmd_modify(entry, newprot); ret = HPAGE_PMD_NR; BUG_ON(pmd_write(entry)); @@ -1521,7 +1561,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, */ if (!is_huge_zero_page(page) && !pmd_numa(*pmd)) { - entry = pmdp_get_and_clear(mm, addr, pmd); + entry = *pmd; entry = pmd_mknuma(entry); ret = HPAGE_PMD_NR; } @@ -2142,9 +2182,9 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, if (unlikely(!page)) goto out; - VM_BUG_ON(PageCompound(page)); - BUG_ON(!PageAnon(page)); - VM_BUG_ON(!PageSwapBacked(page)); + VM_BUG_ON_PAGE(PageCompound(page), page); + VM_BUG_ON_PAGE(!PageAnon(page), page); + VM_BUG_ON_PAGE(!PageSwapBacked(page), page); /* cannot use mapcount: can't collapse if there's a gup pin */ if (page_count(page) != 1) @@ -2167,8 +2207,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, } /* 0 stands for page_is_file_cache(page) == false */ inc_zone_page_state(page, NR_ISOLATED_ANON + 0); - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageLRU(page), page); /* If there is no mapped pte young don't collapse the page */ if (pte_young(pteval) || PageReferenced(page) || @@ -2198,7 +2238,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, } else { src_page = pte_page(pteval); copy_user_highpage(page, src_page, address, vma); - VM_BUG_ON(page_mapcount(src_page) != 1); + VM_BUG_ON_PAGE(page_mapcount(src_page) != 1, src_page); release_pte_page(src_page); /* * ptl mostly unnecessary, but preempt has to @@ -2277,7 +2317,7 @@ static struct page struct vm_area_struct *vma, unsigned long address, int node) { - VM_BUG_ON(*hpage); + VM_BUG_ON_PAGE(*hpage, *hpage); /* * Allocate the page while the vma is still valid and under * the mmap_sem read mode so there is no memory allocation @@ -2546,7 +2586,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, */ node = page_to_nid(page); khugepaged_node_load[node]++; - VM_BUG_ON(PageCompound(page)); + VM_BUG_ON_PAGE(PageCompound(page), page); if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) goto out_unmap; /* cannot use mapcount: can't collapse if there's a gup pin */ @@ -2842,7 +2882,7 @@ again: return; } page = pmd_page(*pmd); - VM_BUG_ON(!page_count(page)); + VM_BUG_ON_PAGE(!page_count(page), page); get_page(page); spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |