diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 58 |
1 files changed, 42 insertions, 16 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index b620d74b0f66..b6f4db6c240f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -648,21 +648,20 @@ typedef enum { /* * pageout is called by shrink_folio_list() for each dirty folio. - * Calls ->writepage(). */ static pageout_t pageout(struct folio *folio, struct address_space *mapping, struct swap_iocb **plug, struct list_head *folio_list) { + int (*writeout)(struct folio *, struct writeback_control *); + /* - * If the folio is dirty, only perform writeback if that write - * will be non-blocking. To prevent this allocation from being - * stalled by pagecache activity. But note that there may be - * stalls if we need to run get_block(). We could test - * PagePrivate for that. - * - * If this process is currently in __generic_file_write_iter() against - * this folio's queue, we can perform writeback even if that - * will block. + * We no longer attempt to writeback filesystem folios here, other + * than tmpfs/shmem. That's taken care of in page-writeback. + * If we find a dirty filesystem folio at the end of the LRU list, + * typically that means the filesystem is saturating the storage + * with contiguous writes and telling it to write a folio here + * would only make the situation worse by injecting an element + * of random access. * * If the folio is swapcache, write it back even if that would * block, for some throttling. This happens by accident, because @@ -685,7 +684,11 @@ static pageout_t pageout(struct folio *folio, struct address_space *mapping, } return PAGE_KEEP; } - if (mapping->a_ops->writepage == NULL) + if (shmem_mapping(mapping)) + writeout = shmem_writeout; + else if (folio_test_anon(folio)) + writeout = swap_writeout; + else return PAGE_ACTIVATE; if (folio_clear_dirty_for_io(folio)) { @@ -708,7 +711,7 @@ static pageout_t pageout(struct folio *folio, struct address_space *mapping, wbc.list = folio_list; folio_set_reclaim(folio); - res = mapping->a_ops->writepage(&folio->page, &wbc); + res = writeout(folio, &wbc); if (res < 0) handle_write_error(mapping, folio, res); if (res == AOP_WRITEPAGE_ACTIVATE) { @@ -717,7 +720,7 @@ static pageout_t pageout(struct folio *folio, struct address_space *mapping, } if (!folio_test_writeback(folio)) { - /* synchronous write or broken a_ops? */ + /* synchronous write? */ folio_clear_reclaim(folio); } trace_mm_vmscan_write_folio(folio); @@ -6736,6 +6739,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) * meet watermarks. */ for_each_managed_zone_pgdat(zone, pgdat, i, highest_zoneidx) { + enum zone_stat_item item; unsigned long free_pages; if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) @@ -6746,11 +6750,33 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) /* * In defrag_mode, watermarks must be met in whole * blocks to avoid polluting allocator fallbacks. + * + * However, kswapd usually cannot accomplish this on + * its own and needs kcompactd support. Once it's + * reclaimed a compaction gap, and kswapd_shrink_node + * has dropped order, simply ensure there are enough + * base pages for compaction, wake kcompactd & sleep. */ - if (defrag_mode) - free_pages = zone_page_state(zone, NR_FREE_PAGES_BLOCKS); + if (defrag_mode && order) + item = NR_FREE_PAGES_BLOCKS; else - free_pages = zone_page_state(zone, NR_FREE_PAGES); + item = NR_FREE_PAGES; + + /* + * When there is a high number of CPUs in the system, + * the cumulative error from the vmstat per-cpu cache + * can blur the line between the watermarks. In that + * case, be safe and get an accurate snapshot. + * + * TODO: NR_FREE_PAGES_BLOCKS moves in steps of + * pageblock_nr_pages, while the vmstat pcp threshold + * is limited to 125. On many configurations that + * counter won't actually be per-cpu cached. But keep + * things simple for now; revisit when somebody cares. + */ + free_pages = zone_page_state(zone, item); + if (zone->percpu_drift_mark && free_pages < zone->percpu_drift_mark) + free_pages = zone_page_state_snapshot(zone, item); if (__zone_watermark_ok(zone, order, mark, highest_zoneidx, 0, free_pages)) |