diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 106 |
1 files changed, 52 insertions, 54 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 817a875f2b8c..cb7be110cad3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -171,12 +171,7 @@ static int start_khugepaged(void) } static atomic_t huge_zero_refcount; -static struct page *huge_zero_page __read_mostly; - -static inline bool is_huge_zero_page(struct page *page) -{ - return ACCESS_ONCE(huge_zero_page) == page; -} +struct page *huge_zero_page __read_mostly; static inline bool is_huge_zero_pmd(pmd_t pmd) { @@ -766,15 +761,6 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp; } -static inline struct page *alloc_hugepage_vma(int defrag, - struct vm_area_struct *vma, - unsigned long haddr, int nd, - gfp_t extra_gfp) -{ - return alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp), - HPAGE_PMD_ORDER, vma, haddr, nd); -} - /* Caller must hold page table lock. */ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, @@ -795,6 +781,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { + gfp_t gfp; struct page *page; unsigned long haddr = address & HPAGE_PMD_MASK; @@ -829,8 +816,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, } return 0; } - page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), - vma, haddr, numa_node_id(), 0); + gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); + page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); if (unlikely(!page)) { count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -1118,10 +1105,12 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, spin_unlock(ptl); alloc: if (transparent_hugepage_enabled(vma) && - !transparent_hugepage_debug_cow()) - new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), - vma, haddr, numa_node_id(), 0); - else + !transparent_hugepage_debug_cow()) { + gfp_t gfp; + + gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); + new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); + } else new_page = NULL; if (unlikely(!new_page)) { @@ -1423,26 +1412,6 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, return ret; } -int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - unsigned char *vec) -{ - spinlock_t *ptl; - int ret = 0; - - if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { - /* - * All logical pages in the range are present - * if backed by a huge page. - */ - spin_unlock(ptl); - memset(vec, 1, (end - addr) >> PAGE_SHIFT); - ret = 1; - } - - return ret; -} - int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, @@ -2148,7 +2117,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, { struct page *page; pte_t *_pte; - int referenced = 0, none = 0; + int none = 0; + bool referenced = false, writable = false; for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++, address += PAGE_SIZE) { pte_t pteval = *_pte; @@ -2158,7 +2128,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, else goto out; } - if (!pte_present(pteval) || !pte_write(pteval)) + if (!pte_present(pteval)) goto out; page = vm_normal_page(vma, address, pteval); if (unlikely(!page)) @@ -2168,9 +2138,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, VM_BUG_ON_PAGE(!PageAnon(page), page); VM_BUG_ON_PAGE(!PageSwapBacked(page), page); - /* cannot use mapcount: can't collapse if there's a gup pin */ - if (page_count(page) != 1) - goto out; /* * We can do it before isolate_lru_page because the * page can't be freed from under us. NOTE: PG_lock @@ -2179,6 +2146,29 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, */ if (!trylock_page(page)) goto out; + + /* + * cannot use mapcount: can't collapse if there's a gup pin. + * The page must only be referenced by the scanned process + * and page swap cache. + */ + if (page_count(page) != 1 + !!PageSwapCache(page)) { + unlock_page(page); + goto out; + } + if (pte_write(pteval)) { + writable = true; + } else { + if (PageSwapCache(page) && !reuse_swap_page(page)) { + unlock_page(page); + goto out; + } + /* + * Page is not in the swap cache. It can be collapsed + * into a THP. + */ + } + /* * Isolate the page to avoid collapsing an hugepage * currently in use by the VM. @@ -2195,9 +2185,9 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, /* If there is no mapped pte young don't collapse the page */ if (pte_young(pteval) || PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, address)) - referenced = 1; + referenced = true; } - if (likely(referenced)) + if (likely(referenced && writable)) return 1; out: release_pte_pages(pte, _pte); @@ -2550,11 +2540,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, { pmd_t *pmd; pte_t *pte, *_pte; - int ret = 0, referenced = 0, none = 0; + int ret = 0, none = 0; struct page *page; unsigned long _address; spinlock_t *ptl; int node = NUMA_NO_NODE; + bool writable = false, referenced = false; VM_BUG_ON(address & ~HPAGE_PMD_MASK); @@ -2573,8 +2564,11 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, else goto out_unmap; } - if (!pte_present(pteval) || !pte_write(pteval)) + if (!pte_present(pteval)) goto out_unmap; + if (pte_write(pteval)) + writable = true; + page = vm_normal_page(vma, _address, pteval); if (unlikely(!page)) goto out_unmap; @@ -2591,14 +2585,18 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, VM_BUG_ON_PAGE(PageCompound(page), page); if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) goto out_unmap; - /* cannot use mapcount: can't collapse if there's a gup pin */ - if (page_count(page) != 1) + /* + * cannot use mapcount: can't collapse if there's a gup pin. + * The page must only be referenced by the scanned process + * and page swap cache. + */ + if (page_count(page) != 1 + !!PageSwapCache(page)) goto out_unmap; if (pte_young(pteval) || PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, address)) - referenced = 1; + referenced = true; } - if (referenced) + if (referenced && writable) ret = 1; out_unmap: pte_unmap_unlock(pte, ptl); |