summaryrefslogtreecommitdiff
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c67
1 files changed, 60 insertions, 7 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cdcd25cb30fe..cee42cf05477 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -285,6 +285,15 @@ static ssize_t use_zero_page_store(struct kobject *kobj,
}
static struct kobj_attribute use_zero_page_attr =
__ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store);
+
+static ssize_t hpage_pmd_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", HPAGE_PMD_SIZE);
+}
+static struct kobj_attribute hpage_pmd_size_attr =
+ __ATTR_RO(hpage_pmd_size);
+
#ifdef CONFIG_DEBUG_VM
static ssize_t debug_cow_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -307,6 +316,7 @@ static struct attribute *hugepage_attr[] = {
&enabled_attr.attr,
&defrag_attr.attr,
&use_zero_page_attr.attr,
+ &hpage_pmd_size_attr.attr,
#if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
&shmem_enabled_attr.attr,
#endif
@@ -737,8 +747,9 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
if (addr < vma->vm_start || addr >= vma->vm_end)
return VM_FAULT_SIGBUS;
- if (track_pfn_insert(vma, &pgprot, pfn))
- return VM_FAULT_SIGBUS;
+
+ track_pfn_insert(vma, &pgprot, pfn);
+
insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
return VM_FAULT_NOPAGE;
}
@@ -1322,6 +1333,8 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
struct mm_struct *mm = tlb->mm;
bool ret = false;
+ tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);
+
ptl = pmd_trans_huge_lock(pmd, vma);
if (!ptl)
goto out_unlocked;
@@ -1377,12 +1390,23 @@ out_unlocked:
return ret;
}
+static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
+{
+ pgtable_t pgtable;
+
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+ pte_free(mm, pgtable);
+ atomic_long_dec(&mm->nr_ptes);
+}
+
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr)
{
pmd_t orig_pmd;
spinlock_t *ptl;
+ tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);
+
ptl = __pmd_trans_huge_lock(pmd, vma);
if (!ptl)
return 0;
@@ -1398,12 +1422,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (vma_is_dax(vma)) {
spin_unlock(ptl);
if (is_huge_zero_pmd(orig_pmd))
- tlb_remove_page(tlb, pmd_page(orig_pmd));
+ tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
} else if (is_huge_zero_pmd(orig_pmd)) {
pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
atomic_long_dec(&tlb->mm->nr_ptes);
spin_unlock(ptl);
- tlb_remove_page(tlb, pmd_page(orig_pmd));
+ tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
} else {
struct page *page = pmd_page(orig_pmd);
page_remove_rmap(page, true);
@@ -1416,6 +1440,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
atomic_long_dec(&tlb->mm->nr_ptes);
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
} else {
+ if (arch_needs_pgtable_deposit())
+ zap_deposited_table(tlb->mm, pmd);
add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
}
spin_unlock(ptl);
@@ -1424,13 +1450,29 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
return 1;
}
+#ifndef pmd_move_must_withdraw
+static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
+ spinlock_t *old_pmd_ptl,
+ struct vm_area_struct *vma)
+{
+ /*
+ * With split pmd lock we also need to move preallocated
+ * PTE page table if new_pmd is on different PMD page table.
+ *
+ * We also don't deposit and withdraw tables for file pages.
+ */
+ return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
+}
+#endif
+
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
- pmd_t *old_pmd, pmd_t *new_pmd)
+ pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
{
spinlock_t *old_ptl, *new_ptl;
pmd_t pmd;
struct mm_struct *mm = vma->vm_mm;
+ bool force_flush = false;
if ((old_addr & ~HPAGE_PMD_MASK) ||
(new_addr & ~HPAGE_PMD_MASK) ||
@@ -1456,10 +1498,11 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
+ if (pmd_present(pmd) && pmd_dirty(pmd))
+ force_flush = true;
VM_BUG_ON(!pmd_none(*new_pmd));
- if (pmd_move_must_withdraw(new_ptl, old_ptl) &&
- vma_is_anonymous(vma)) {
+ if (pmd_move_must_withdraw(new_ptl, old_ptl, vma)) {
pgtable_t pgtable;
pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
@@ -1467,6 +1510,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
+ if (force_flush)
+ flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
+ else
+ *need_flush = true;
spin_unlock(old_ptl);
return true;
}
@@ -1581,6 +1628,12 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (!vma_is_anonymous(vma)) {
_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+ /*
+ * We are going to unmap this huge page. So
+ * just go ahead and zap it
+ */
+ if (arch_needs_pgtable_deposit())
+ zap_deposited_table(mm, pmd);
if (vma_is_dax(vma))
return;
page = pmd_page(_pmd);