summaryrefslogtreecommitdiff
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-30 03:29:11 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-30 03:29:11 +0300
commit65090f30ab791810a3dc840317e57df05018559c (patch)
treef417526656da37109777e89613e140ffc59228bc /mm/page-writeback.c
parent349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830 (diff)
parent0ed950d1f28142ccd9a9453c60df87853530d778 (diff)
downloadlinux-65090f30ab791810a3dc840317e57df05018559c.tar.xz
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "191 patches. Subsystems affected by this patch series: kthread, ia64, scripts, ntfs, squashfs, ocfs2, kernel/watchdog, and mm (gup, pagealloc, slab, slub, kmemleak, dax, debug, pagecache, gup, swap, memcg, pagemap, mprotect, bootmem, dma, tracing, vmalloc, kasan, initialization, pagealloc, and memory-failure)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (191 commits) mm,hwpoison: make get_hwpoison_page() call get_any_page() mm,hwpoison: send SIGBUS with error virutal address mm/page_alloc: split pcp->high across all online CPUs for cpuless nodes mm/page_alloc: allow high-order pages to be stored on the per-cpu lists mm: replace CONFIG_FLAT_NODE_MEM_MAP with CONFIG_FLATMEM mm: replace CONFIG_NEED_MULTIPLE_NODES with CONFIG_NUMA docs: remove description of DISCONTIGMEM arch, mm: remove stale mentions of DISCONIGMEM mm: remove CONFIG_DISCONTIGMEM m68k: remove support for DISCONTIGMEM arc: remove support for DISCONTIGMEM arc: update comment about HIGHMEM implementation alpha: remove DISCONTIGMEM and NUMA mm/page_alloc: move free_the_page mm/page_alloc: fix counting of managed_pages mm/page_alloc: improve memmap_pages dbg msg mm: drop SECTION_SHIFT in code comments mm/page_alloc: introduce vm.percpu_pagelist_high_fraction mm/page_alloc: limit the number of pages on PCP lists when reclaim is active mm/page_alloc: scale the number of pages that are batch freed ...
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c89
1 files changed, 53 insertions, 36 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0062d5c57d41..e5b38ffe9fca 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -32,7 +32,6 @@
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/syscalls.h>
-#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
#include <linux/pagevec.h>
#include <linux/timer.h>
#include <linux/sched/rt.h>
@@ -845,7 +844,7 @@ static long long pos_ratio_polynom(unsigned long setpoint,
* ^ pos_ratio
* |
* | |<===== global dirty control scope ======>|
- * 2.0 .............*
+ * 2.0 * * * * * * *
* | .*
* | . *
* | . *
@@ -1869,10 +1868,9 @@ DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
* which was newly dirtied. The function will periodically check the system's
* dirty state and will initiate writeback if needed.
*
- * On really big machines, get_writeback_state is expensive, so try to avoid
- * calling it too often (ratelimiting). But once we're over the dirty memory
- * limit we decrease the ratelimiting by a lot, to prevent individual processes
- * from overshooting the limit by (ratelimit_pages) each.
+ * Once we're over the dirty memory limit we decrease the ratelimiting
+ * by a lot, to prevent individual processes from overshooting the limit
+ * by (ratelimit_pages) each.
*/
void balance_dirty_pages_ratelimited(struct address_space *mapping)
{
@@ -1945,6 +1943,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
struct dirty_throttle_control * const gdtc = &gdtc_stor;
struct dirty_throttle_control * const mdtc = mdtc_valid(&mdtc_stor) ?
&mdtc_stor : NULL;
+ unsigned long reclaimable;
+ unsigned long thresh;
/*
* Similar to balance_dirty_pages() but ignores pages being written
@@ -1957,8 +1957,13 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
if (gdtc->dirty > gdtc->bg_thresh)
return true;
- if (wb_stat(wb, WB_RECLAIMABLE) >
- wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
+ thresh = wb_calc_thresh(gdtc->wb, gdtc->bg_thresh);
+ if (thresh < 2 * wb_stat_error())
+ reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
+ else
+ reclaimable = wb_stat(wb, WB_RECLAIMABLE);
+
+ if (reclaimable > thresh)
return true;
if (mdtc) {
@@ -1972,8 +1977,13 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
if (mdtc->dirty > mdtc->bg_thresh)
return true;
- if (wb_stat(wb, WB_RECLAIMABLE) >
- wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
+ thresh = wb_calc_thresh(mdtc->wb, mdtc->bg_thresh);
+ if (thresh < 2 * wb_stat_error())
+ reclaimable = wb_stat_sum(wb, WB_RECLAIMABLE);
+ else
+ reclaimable = wb_stat(wb, WB_RECLAIMABLE);
+
+ if (reclaimable > thresh)
return true;
}
@@ -2045,8 +2055,6 @@ void laptop_sync_completion(void)
/*
* If ratelimit_pages is too high then we can get into dirty-data overload
* if a large number of processes all perform writes at the same time.
- * If it is too low then SMP machines will call the (expensive)
- * get_writeback_state too often.
*
* Here we set ratelimit_pages to a level which ensures that when all CPUs are
* dirtying in parallel, we cannot go more than 3% (1/32) over the dirty memory
@@ -2409,6 +2417,7 @@ int __set_page_dirty_no_writeback(struct page *page)
return !TestSetPageDirty(page);
return 0;
}
+EXPORT_SYMBOL(__set_page_dirty_no_writeback);
/*
* Helper function for set_page_dirty family.
@@ -2417,7 +2426,8 @@ int __set_page_dirty_no_writeback(struct page *page)
*
* NOTE: This relies on being atomic wrt interrupts.
*/
-void account_page_dirtied(struct page *page, struct address_space *mapping)
+static void account_page_dirtied(struct page *page,
+ struct address_space *mapping)
{
struct inode *inode = mapping->host;
@@ -2436,7 +2446,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
inc_wb_stat(wb, WB_DIRTIED);
task_io_account_write(PAGE_SIZE);
current->nr_dirtied++;
- this_cpu_inc(bdp_ratelimits);
+ __this_cpu_inc(bdp_ratelimits);
mem_cgroup_track_foreign_dirty(page, wb);
}
@@ -2459,6 +2469,30 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
}
/*
+ * Mark the page dirty, and set it dirty in the page cache, and mark the inode
+ * dirty.
+ *
+ * If warn is true, then emit a warning if the page is not uptodate and has
+ * not been truncated.
+ *
+ * The caller must hold lock_page_memcg().
+ */
+void __set_page_dirty(struct page *page, struct address_space *mapping,
+ int warn)
+{
+ unsigned long flags;
+
+ xa_lock_irqsave(&mapping->i_pages, flags);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(warn && !PageUptodate(page));
+ account_page_dirtied(page, mapping);
+ __xa_set_mark(&mapping->i_pages, page_index(page),
+ PAGECACHE_TAG_DIRTY);
+ }
+ xa_unlock_irqrestore(&mapping->i_pages, flags);
+}
+
+/*
* For address_spaces which do not use buffers. Just tag the page as dirty in
* the xarray.
*
@@ -2475,20 +2509,12 @@ int __set_page_dirty_nobuffers(struct page *page)
lock_page_memcg(page);
if (!TestSetPageDirty(page)) {
struct address_space *mapping = page_mapping(page);
- unsigned long flags;
if (!mapping) {
unlock_page_memcg(page);
return 1;
}
-
- xa_lock_irqsave(&mapping->i_pages, flags);
- BUG_ON(page_mapping(page) != mapping);
- WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
- account_page_dirtied(page, mapping);
- __xa_set_mark(&mapping->i_pages, page_index(page),
- PAGECACHE_TAG_DIRTY);
- xa_unlock_irqrestore(&mapping->i_pages, flags);
+ __set_page_dirty(page, mapping, !PagePrivate(page));
unlock_page_memcg(page);
if (mapping->host) {
@@ -2546,13 +2572,9 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
/*
* Dirty a page.
*
- * For pages with a mapping this should be done under the page lock
- * for the benefit of asynchronous memory errors who prefer a consistent
- * dirty state. This rule can be broken in some special cases,
- * but should be better not to.
- *
- * If the mapping doesn't provide a set_page_dirty a_op, then
- * just fall through and assume that it wants buffer_heads.
+ * For pages with a mapping this should be done under the page lock for the
+ * benefit of asynchronous memory errors who prefer a consistent dirty state.
+ * This rule can be broken in some special cases, but should be better not to.
*/
int set_page_dirty(struct page *page)
{
@@ -2560,7 +2582,6 @@ int set_page_dirty(struct page *page)
page = compound_head(page);
if (likely(mapping)) {
- int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
/*
* readahead/lru_deactivate_page could remain
* PG_readahead/PG_reclaim due to race with end_page_writeback
@@ -2573,11 +2594,7 @@ int set_page_dirty(struct page *page)
*/
if (PageReclaim(page))
ClearPageReclaim(page);
-#ifdef CONFIG_BLOCK
- if (!spd)
- spd = __set_page_dirty_buffers;
-#endif
- return (*spd)(page);
+ return mapping->a_ops->set_page_dirty(page);
}
if (!PageDirty(page)) {
if (!TestSetPageDirty(page))