diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 87 |
1 files changed, 69 insertions, 18 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86f9f8b82f8e..b49ee126d4d1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -232,7 +232,7 @@ retry: return READ_ONCE(huge_zero_page); } -static void put_huge_zero_page(void) +void put_huge_zero_page(void) { /* * Counter should never go to zero here. Only shrinker can put @@ -1298,15 +1298,9 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page); /* * We can only reuse the page if nobody else maps the huge page or it's - * part. We can do it by checking page_mapcount() on each sub-page, but - * it's expensive. - * The cheaper way is to check page_count() to be equal 1: every - * mapcount takes page reference reference, so this way we can - * guarantee, that the PMD is the only mapping. - * This can give false negative if somebody pinned the page, but that's - * fine. + * part. */ - if (page_mapcount(page) == 1 && page_count(page) == 1) { + if (page_trans_huge_mapcount(page, NULL) == 1) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); @@ -1684,12 +1678,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (vma_is_dax(vma)) { spin_unlock(ptl); if (is_huge_zero_pmd(orig_pmd)) - put_huge_zero_page(); + tlb_remove_page(tlb, pmd_page(orig_pmd)); } else if (is_huge_zero_pmd(orig_pmd)) { pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd)); atomic_long_dec(&tlb->mm->nr_ptes); spin_unlock(ptl); - put_huge_zero_page(); + tlb_remove_page(tlb, pmd_page(orig_pmd)); } else { struct page *page = pmd_page(orig_pmd); page_remove_rmap(page, true); @@ -1960,10 +1954,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma, * page fault if needed. */ return 0; - if (vma->vm_ops) + if (vma->vm_ops || (vm_flags & VM_NO_THP)) /* khugepaged not yet working on file or special mappings */ return 0; - VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma); hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; hend = vma->vm_end & HPAGE_PMD_MASK; if (hstart < hend) @@ -2080,7 +2073,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, if (pte_write(pteval)) { writable = true; } else { - if (PageSwapCache(page) && !reuse_swap_page(page)) { + if (PageSwapCache(page) && + !reuse_swap_page(page, NULL)) { unlock_page(page); result = SCAN_SWAP_CACHE_PAGE; goto out; @@ -2352,8 +2346,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) return false; if (is_vma_temporary_stack(vma)) return false; - VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma); - return true; + return !(vma->vm_flags & VM_NO_THP); } static void collapse_huge_page(struct mm_struct *mm, @@ -3225,6 +3218,64 @@ int total_mapcount(struct page *page) } /* + * This calculates accurately how many mappings a transparent hugepage + * has (unlike page_mapcount() which isn't fully accurate). This full + * accuracy is primarily needed to know if copy-on-write faults can + * reuse the page and change the mapping to read-write instead of + * copying them. At the same time this returns the total_mapcount too. + * + * The function returns the highest mapcount any one of the subpages + * has. If the return value is one, even if different processes are + * mapping different subpages of the transparent hugepage, they can + * all reuse it, because each process is reusing a different subpage. + * + * The total_mapcount is instead counting all virtual mappings of the + * subpages. If the total_mapcount is equal to "one", it tells the + * caller all mappings belong to the same "mm" and in turn the + * anon_vma of the transparent hugepage can become the vma->anon_vma + * local one as no other process may be mapping any of the subpages. + * + * It would be more accurate to replace page_mapcount() with + * page_trans_huge_mapcount(), however we only use + * page_trans_huge_mapcount() in the copy-on-write faults where we + * need full accuracy to avoid breaking page pinning, because + * page_trans_huge_mapcount() is slower than page_mapcount(). + */ +int page_trans_huge_mapcount(struct page *page, int *total_mapcount) +{ + int i, ret, _total_mapcount, mapcount; + + /* hugetlbfs shouldn't call it */ + VM_BUG_ON_PAGE(PageHuge(page), page); + + if (likely(!PageTransCompound(page))) { + mapcount = atomic_read(&page->_mapcount) + 1; + if (total_mapcount) + *total_mapcount = mapcount; + return mapcount; + } + + page = compound_head(page); + + _total_mapcount = ret = 0; + for (i = 0; i < HPAGE_PMD_NR; i++) { + mapcount = atomic_read(&page[i]._mapcount) + 1; + ret = max(ret, mapcount); + _total_mapcount += mapcount; + } + if (PageDoubleMap(page)) { + ret -= 1; + _total_mapcount -= HPAGE_PMD_NR; + } + mapcount = compound_mapcount(page); + ret += mapcount; + _total_mapcount += mapcount; + if (total_mapcount) + *total_mapcount = _total_mapcount; + return ret; +} + +/* * This function splits huge page into normal pages. @page can point to any * subpage of huge page to split. Split doesn't change the position of @page. * @@ -3454,7 +3505,7 @@ next: } } - pr_info("%lu of %lu THP split", split, total); + pr_info("%lu of %lu THP split\n", split, total); return 0; } @@ -3465,7 +3516,7 @@ static int __init split_huge_pages_debugfs(void) { void *ret; - ret = debugfs_create_file("split_huge_pages", 0644, NULL, NULL, + ret = debugfs_create_file("split_huge_pages", 0200, NULL, NULL, &split_huge_pages_fops); if (!ret) pr_warn("Failed to create split_huge_pages in debugfs"); |