summaryrefslogtreecommitdiff
path: root/mm/swap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swap.c')
-rw-r--r--mm/swap.c71
1 files changed, 57 insertions, 14 deletions
diff --git a/mm/swap.c b/mm/swap.c
index d16d65d9b4e0..65ef7e3525bf 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -494,14 +494,14 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED;
if (unlikely(unevictable) && !TestSetPageMlocked(page)) {
+ int nr_pages = thp_nr_pages(page);
/*
* We use the irq-unsafe __mod_zone_page_stat because this
* counter is not modified from interrupt context, and the pte
* lock is held(spinlock), which implies preemption disabled.
*/
- __mod_zone_page_state(page_zone(page), NR_MLOCK,
- thp_nr_pages(page));
- count_vm_event(UNEVICTABLE_PGMLOCKED);
+ __mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
+ count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
}
lru_cache_add(page);
}
@@ -763,10 +763,20 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
*/
void lru_add_drain_all(void)
{
- static seqcount_t seqcount = SEQCNT_ZERO(seqcount);
- static DEFINE_MUTEX(lock);
+ /*
+ * lru_drain_gen - Global pages generation number
+ *
+ * (A) Definition: global lru_drain_gen = x implies that all generations
+ * 0 < n <= x are already *scheduled* for draining.
+ *
+ * This is an optimization for the highly-contended use case where a
+ * user space workload keeps constantly generating a flow of pages for
+ * each CPU.
+ */
+ static unsigned int lru_drain_gen;
static struct cpumask has_work;
- int cpu, seq;
+ static DEFINE_MUTEX(lock);
+ unsigned cpu, this_gen;
/*
* Make sure nobody triggers this path before mm_percpu_wq is fully
@@ -775,21 +785,54 @@ void lru_add_drain_all(void)
if (WARN_ON(!mm_percpu_wq))
return;
- seq = raw_read_seqcount_latch(&seqcount);
+ /*
+ * Guarantee pagevec counter stores visible by this CPU are visible to
+ * other CPUs before loading the current drain generation.
+ */
+ smp_mb();
+
+ /*
+ * (B) Locally cache global LRU draining generation number
+ *
+ * The read barrier ensures that the counter is loaded before the mutex
+ * is taken. It pairs with smp_mb() inside the mutex critical section
+ * at (D).
+ */
+ this_gen = smp_load_acquire(&lru_drain_gen);
mutex_lock(&lock);
/*
- * Piggyback on drain started and finished while we waited for lock:
- * all pages pended at the time of our enter were drained from vectors.
+ * (C) Exit the draining operation if a newer generation, from another
+ * lru_add_drain_all(), was already scheduled for draining. Check (A).
*/
- if (__read_seqcount_retry(&seqcount, seq))
+ if (unlikely(this_gen != lru_drain_gen))
goto done;
- raw_write_seqcount_latch(&seqcount);
+ /*
+ * (D) Increment global generation number
+ *
+ * Pairs with smp_load_acquire() at (B), outside of the critical
+ * section. Use a full memory barrier to guarantee that the new global
+ * drain generation number is stored before loading pagevec counters.
+ *
+ * This pairing must be done here, before the for_each_online_cpu loop
+ * below which drains the page vectors.
+ *
+ * Let x, y, and z represent some system CPU numbers, where x < y < z.
+ * Assume CPU #z is is in the middle of the for_each_online_cpu loop
+ * below and has already reached CPU #y's per-cpu data. CPU #x comes
+ * along, adds some pages to its per-cpu vectors, then calls
+ * lru_add_drain_all().
+ *
+ * If the paired barrier is done at any later step, e.g. after the
+ * loop, CPU #x will just exit at (C) and miss flushing out all of its
+ * added pages.
+ */
+ WRITE_ONCE(lru_drain_gen, lru_drain_gen + 1);
+ smp_mb();
cpumask_clear(&has_work);
-
for_each_online_cpu(cpu) {
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
@@ -801,7 +844,7 @@ void lru_add_drain_all(void)
need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
queue_work_on(cpu, mm_percpu_wq, work);
- cpumask_set_cpu(cpu, &has_work);
+ __cpumask_set_cpu(cpu, &has_work);
}
}
@@ -816,7 +859,7 @@ void lru_add_drain_all(void)
{
lru_add_drain();
}
-#endif
+#endif /* CONFIG_SMP */
/**
* release_pages - batched put_page()