diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 77 |
1 files changed, 64 insertions, 13 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86975dec0ba1..0b51e70e0a8b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -32,6 +32,7 @@ #include <linux/userfaultfd_k.h> #include <linux/page_idle.h> #include <linux/shmem_fs.h> +#include <linux/oom.h> #include <asm/tlb.h> #include <asm/pgalloc.h> @@ -327,7 +328,7 @@ static struct attribute *hugepage_attr[] = { NULL, }; -static struct attribute_group hugepage_attr_group = { +static const struct attribute_group hugepage_attr_group = { .attrs = hugepage_attr, }; @@ -550,6 +551,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, struct mem_cgroup *memcg; pgtable_t pgtable; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; + int ret = 0; VM_BUG_ON_PAGE(!PageCompound(page), page); @@ -561,12 +563,11 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, pgtable = pte_alloc_one(vma->vm_mm, haddr); if (unlikely(!pgtable)) { - mem_cgroup_cancel_charge(page, memcg, true); - put_page(page); - return VM_FAULT_OOM; + ret = VM_FAULT_OOM; + goto release; } - clear_huge_page(page, haddr, HPAGE_PMD_NR); + clear_huge_page(page, vmf->address, HPAGE_PMD_NR); /* * The memory barrier inside __SetPageUptodate makes sure that * clear_huge_page writes become visible before the set_pmd_at() @@ -576,13 +577,14 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_none(*vmf->pmd))) { - spin_unlock(vmf->ptl); - mem_cgroup_cancel_charge(page, memcg, true); - put_page(page); - pte_free(vma->vm_mm, pgtable); + goto unlock_release; } else { pmd_t entry; + ret = check_stable_address_space(vma->vm_mm); + if (ret) + goto unlock_release; + /* Deliver the page fault to userland */ if (userfaultfd_missing(vma)) { int ret; @@ -610,6 +612,15 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, } return 0; +unlock_release: + spin_unlock(vmf->ptl); +release: + if (pgtable) + pte_free(vma->vm_mm, pgtable); + mem_cgroup_cancel_charge(page, memcg, true); + put_page(page); + return ret; + } /* @@ -688,7 +699,10 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf) ret = 0; set = false; if (pmd_none(*vmf->pmd)) { - if (userfaultfd_missing(vma)) { + ret = check_stable_address_space(vma->vm_mm); + if (ret) { + spin_unlock(vmf->ptl); + } else if (userfaultfd_missing(vma)) { spin_unlock(vmf->ptl); ret = handle_userfault(vmf, VM_UFFD_MISSING); VM_BUG_ON(ret & VM_FAULT_FALLBACK); @@ -1226,15 +1240,29 @@ int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) * We can only reuse the page if nobody else maps the huge page or it's * part. */ - if (page_trans_huge_mapcount(page, NULL) == 1) { + if (!trylock_page(page)) { + get_page(page); + spin_unlock(vmf->ptl); + lock_page(page); + spin_lock(vmf->ptl); + if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) { + unlock_page(page); + put_page(page); + goto out_unlock; + } + put_page(page); + } + if (reuse_swap_page(page, NULL)) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1)) update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); ret |= VM_FAULT_WRITE; + unlock_page(page); goto out_unlock; } + unlock_page(page); get_page(page); spin_unlock(vmf->ptl); alloc: @@ -1277,7 +1305,7 @@ alloc: count_vm_event(THP_FAULT_ALLOC); if (!page) - clear_huge_page(new_page, haddr, HPAGE_PMD_NR); + clear_huge_page(new_page, vmf->address, HPAGE_PMD_NR); else copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); __SetPageUptodate(new_page); @@ -1496,10 +1524,25 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) } /* + * Since we took the NUMA fault, we must have observed the !accessible + * bit. Make sure all other CPUs agree with that, to avoid them + * modifying the page we're about to migrate. + * + * Must be done under PTL such that we'll observe the relevant + * inc_tlb_flush_pending(). + * + * We are not sure a pending tlb flush here is for a huge page + * mapping or not. Hence use the tlb range variant + */ + if (mm_tlb_flush_pending(vma->vm_mm)) + flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); + + /* * Migrate the THP to the requested node, returns with page unlocked * and access rights restored. */ spin_unlock(vmf->ptl); + migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma, vmf->pmd, pmd, vmf->address, page, target_nid); if (migrated) { @@ -2438,6 +2481,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page); + if (PageWriteback(page)) + return -EBUSY; + if (PageAnon(head)) { /* * The caller does not necessarily hold an mmap_sem that would @@ -2515,7 +2561,12 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) __dec_node_page_state(page, NR_SHMEM_THPS); spin_unlock(&pgdata->split_queue_lock); __split_huge_page(page, list, flags); - ret = 0; + if (PageSwapCache(head)) { + swp_entry_t entry = { .val = page_private(head) }; + + ret = split_swap_cluster(entry); + } else + ret = 0; } else { if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { pr_alert("total_mapcount: %u, page_count(): %u\n", |