summaryrefslogtreecommitdiff
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c118
1 files changed, 117 insertions, 1 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 171b68768df1..0db38e7d0a72 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -62,6 +62,8 @@
#include <asm/tlbflush.h>
+#include <trace/events/tlb.h>
+
#include "internal.h"
static struct kmem_cache *anon_vma_cachep;
@@ -583,6 +585,107 @@ vma_address(struct page *page, struct vm_area_struct *vma)
return address;
}
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+static void percpu_flush_tlb_batch_pages(void *data)
+{
+ /*
+ * All TLB entries are flushed on the assumption that it is
+ * cheaper to flush all TLBs and let them be refilled than
+ * flushing individual PFNs. Note that we do not track mm's
+ * to flush as that might simply be multiple full TLB flushes
+ * for no gain.
+ */
+ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ flush_tlb_local();
+}
+
+/*
+ * Flush TLB entries for recently unmapped pages from remote CPUs. It is
+ * important if a PTE was dirty when it was unmapped that it's flushed
+ * before any IO is initiated on the page to prevent lost writes. Similarly,
+ * it must be flushed before freeing to prevent data leakage.
+ */
+void try_to_unmap_flush(void)
+{
+ struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+ int cpu;
+
+ if (!tlb_ubc->flush_required)
+ return;
+
+ cpu = get_cpu();
+
+ trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, -1UL);
+
+ if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask))
+ percpu_flush_tlb_batch_pages(&tlb_ubc->cpumask);
+
+ if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids) {
+ smp_call_function_many(&tlb_ubc->cpumask,
+ percpu_flush_tlb_batch_pages, (void *)tlb_ubc, true);
+ }
+ cpumask_clear(&tlb_ubc->cpumask);
+ tlb_ubc->flush_required = false;
+ tlb_ubc->writable = false;
+ put_cpu();
+}
+
+/* Flush iff there are potentially writable TLB entries that can race with IO */
+void try_to_unmap_flush_dirty(void)
+{
+ struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+
+ if (tlb_ubc->writable)
+ try_to_unmap_flush();
+}
+
+static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
+ struct page *page, bool writable)
+{
+ struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+
+ cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm));
+ tlb_ubc->flush_required = true;
+
+ /*
+ * If the PTE was dirty then it's best to assume it's writable. The
+ * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
+ * before the page is queued for IO.
+ */
+ if (writable)
+ tlb_ubc->writable = true;
+}
+
+/*
+ * Returns true if the TLB flush should be deferred to the end of a batch of
+ * unmap operations to reduce IPIs.
+ */
+static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
+{
+ bool should_defer = false;
+
+ if (!(flags & TTU_BATCH_FLUSH))
+ return false;
+
+ /* If remote CPUs need to be flushed then defer batch the flush */
+ if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
+ should_defer = true;
+ put_cpu();
+
+ return should_defer;
+}
+#else
+static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
+ struct page *page, bool writable)
+{
+}
+
+static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
+{
+ return false;
+}
+#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
+
/*
* At what user virtual address is page expected in vma?
* Caller should check the page is actually part of the vma.
@@ -1220,7 +1323,20 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
/* Nuke the page table entry. */
flush_cache_page(vma, address, page_to_pfn(page));
- pteval = ptep_clear_flush(vma, address, pte);
+ if (should_defer_flush(mm, flags)) {
+ /*
+ * We clear the PTE but do not flush so potentially a remote
+ * CPU could still be writing to the page. If the entry was
+ * previously clean then the architecture must guarantee that
+ * a clear->dirty transition on a cached TLB entry is written
+ * through and traps if the PTE is unmapped.
+ */
+ pteval = ptep_get_and_clear(mm, address, pte);
+
+ set_tlb_ubc_flush_pending(mm, page, pte_dirty(pteval));
+ } else {
+ pteval = ptep_clear_flush(vma, address, pte);
+ }
/* Move the dirty bit to the physical page now the pte is gone. */
if (pte_dirty(pteval))