summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c102
1 files changed, 54 insertions, 48 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index eb4e8440c507..ddaaff67642e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -56,6 +56,7 @@
#include <linux/khugepaged.h>
#include <linux/rculist_nulls.h>
#include <linux/random.h>
+#include <linux/mmu_notifier.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -3294,7 +3295,8 @@ static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk
return false;
}
-static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
+static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr,
+ struct pglist_data *pgdat)
{
unsigned long pfn = pte_pfn(pte);
@@ -3306,13 +3308,20 @@ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned
if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
return -1;
+ if (!pte_young(pte) && !mm_has_notifiers(vma->vm_mm))
+ return -1;
+
if (WARN_ON_ONCE(!pfn_valid(pfn)))
return -1;
+ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
+ return -1;
+
return pfn;
}
-static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr)
+static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr,
+ struct pglist_data *pgdat)
{
unsigned long pfn = pmd_pfn(pmd);
@@ -3324,9 +3333,15 @@ static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned
if (WARN_ON_ONCE(pmd_devmap(pmd)))
return -1;
+ if (!pmd_young(pmd) && !mm_has_notifiers(vma->vm_mm))
+ return -1;
+
if (WARN_ON_ONCE(!pfn_valid(pfn)))
return -1;
+ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
+ return -1;
+
return pfn;
}
@@ -3335,10 +3350,6 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
{
struct folio *folio;
- /* try to avoid unnecessary memory loads */
- if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
- return NULL;
-
folio = pfn_folio(pfn);
if (folio_nid(folio) != pgdat->node_id)
return NULL;
@@ -3394,21 +3405,16 @@ restart:
total++;
walk->mm_stats[MM_LEAF_TOTAL]++;
- pfn = get_pte_pfn(ptent, args->vma, addr);
+ pfn = get_pte_pfn(ptent, args->vma, addr, pgdat);
if (pfn == -1)
continue;
- if (!pte_young(ptent)) {
- walk->mm_stats[MM_LEAF_OLD]++;
- continue;
- }
-
folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
if (!folio)
continue;
- if (!ptep_test_and_clear_young(args->vma, addr, pte + i))
- VM_WARN_ON_ONCE(true);
+ if (!ptep_clear_young_notify(args->vma, addr, pte + i))
+ continue;
young++;
walk->mm_stats[MM_LEAF_YOUNG]++;
@@ -3474,21 +3480,25 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
/* don't round down the first address */
addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first;
- pfn = get_pmd_pfn(pmd[i], vma, addr);
- if (pfn == -1)
+ if (!pmd_present(pmd[i]))
goto next;
if (!pmd_trans_huge(pmd[i])) {
- if (!walk->force_scan && should_clear_pmd_young())
+ if (!walk->force_scan && should_clear_pmd_young() &&
+ !mm_has_notifiers(args->mm))
pmdp_test_and_clear_young(vma, addr, pmd + i);
goto next;
}
+ pfn = get_pmd_pfn(pmd[i], vma, addr, pgdat);
+ if (pfn == -1)
+ goto next;
+
folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
if (!folio)
goto next;
- if (!pmdp_test_and_clear_young(vma, addr, pmd + i))
+ if (!pmdp_clear_young_notify(vma, addr, pmd + i))
goto next;
walk->mm_stats[MM_LEAF_YOUNG]++;
@@ -3546,27 +3556,18 @@ restart:
}
if (pmd_trans_huge(val)) {
- unsigned long pfn = pmd_pfn(val);
struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
+ unsigned long pfn = get_pmd_pfn(val, vma, addr, pgdat);
walk->mm_stats[MM_LEAF_TOTAL]++;
- if (!pmd_young(val)) {
- walk->mm_stats[MM_LEAF_OLD]++;
- continue;
- }
-
- /* try to avoid unnecessary memory loads */
- if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
- continue;
-
- walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
+ if (pfn != -1)
+ walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
continue;
}
- walk->mm_stats[MM_NONLEAF_TOTAL]++;
-
- if (!walk->force_scan && should_clear_pmd_young()) {
+ if (!walk->force_scan && should_clear_pmd_young() &&
+ !mm_has_notifiers(args->mm)) {
if (!pmd_young(val))
continue;
@@ -4040,13 +4041,13 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
* the PTE table to the Bloom filter. This forms a feedback loop between the
* eviction and the aging.
*/
-void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
{
int i;
unsigned long start;
unsigned long end;
struct lru_gen_mm_walk *walk;
- int young = 0;
+ int young = 1;
pte_t *pte = pvmw->pte;
unsigned long addr = pvmw->address;
struct vm_area_struct *vma = pvmw->vma;
@@ -4062,12 +4063,15 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
lockdep_assert_held(pvmw->ptl);
VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
+ if (!ptep_clear_young_notify(vma, addr, pte))
+ return false;
+
if (spin_is_contended(pvmw->ptl))
- return;
+ return true;
/* exclude special VMAs containing anon pages from COW */
if (vma->vm_flags & VM_SPECIAL)
- return;
+ return true;
/* avoid taking the LRU lock under the PTL when possible */
walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL;
@@ -4075,6 +4079,9 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
start = max(addr & PMD_MASK, vma->vm_start);
end = min(addr | ~PMD_MASK, vma->vm_end - 1) + 1;
+ if (end - start == PAGE_SIZE)
+ return true;
+
if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
end = start + MIN_LRU_BATCH * PAGE_SIZE;
@@ -4088,7 +4095,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
/* folio_update_gen() requires stable folio_memcg() */
if (!mem_cgroup_trylock_pages(memcg))
- return;
+ return true;
arch_enter_lazy_mmu_mode();
@@ -4098,19 +4105,16 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
unsigned long pfn;
pte_t ptent = ptep_get(pte + i);
- pfn = get_pte_pfn(ptent, vma, addr);
+ pfn = get_pte_pfn(ptent, vma, addr, pgdat);
if (pfn == -1)
continue;
- if (!pte_young(ptent))
- continue;
-
folio = get_pfn_folio(pfn, memcg, pgdat, can_swap);
if (!folio)
continue;
- if (!ptep_test_and_clear_young(vma, addr, pte + i))
- VM_WARN_ON_ONCE(true);
+ if (!ptep_clear_young_notify(vma, addr, pte + i))
+ continue;
young++;
@@ -4140,6 +4144,8 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
/* feedback from rmap walkers to page table walkers */
if (mm_state && suitable_to_scan(i, young))
update_bloom_filter(mm_state, max_seq, pvmw->pmd);
+
+ return true;
}
/******************************************************************************
@@ -5254,11 +5260,11 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
for (tier = 0; tier < MAX_NR_TIERS; tier++) {
seq_printf(m, " %10d", tier);
for (type = 0; type < ANON_AND_FILE; type++) {
- const char *s = " ";
+ const char *s = "xxx";
unsigned long n[3] = {};
if (seq == max_seq) {
- s = "RT ";
+ s = "RTx";
n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
} else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
@@ -5280,14 +5286,14 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
seq_puts(m, " ");
for (i = 0; i < NR_MM_STATS; i++) {
- const char *s = " ";
+ const char *s = "xxxx";
unsigned long n = 0;
if (seq == max_seq && NR_HIST_GENS == 1) {
- s = "LOYNFA";
+ s = "TYFA";
n = READ_ONCE(mm_state->stats[hist][i]);
} else if (seq != max_seq && NR_HIST_GENS > 1) {
- s = "loynfa";
+ s = "tyfa";
n = READ_ONCE(mm_state->stats[hist][i]);
}