summaryrefslogtreecommitdiff
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c142
1 files changed, 33 insertions, 109 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d9861e42b2bd..3e248d1c3969 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -41,6 +41,7 @@
#include <trace/events/writeback.h>
#include "internal.h"
+#include "swap.h"
/*
* Sleep at most 200ms at a time in balance_dirty_pages().
@@ -120,29 +121,6 @@ EXPORT_SYMBOL(laptop_mode);
struct wb_domain global_wb_domain;
-/* consolidated parameters for balance_dirty_pages() and its subroutines */
-struct dirty_throttle_control {
-#ifdef CONFIG_CGROUP_WRITEBACK
- struct wb_domain *dom;
- struct dirty_throttle_control *gdtc; /* only set in memcg dtc's */
-#endif
- struct bdi_writeback *wb;
- struct fprop_local_percpu *wb_completions;
-
- unsigned long avail; /* dirtyable */
- unsigned long dirty; /* file_dirty + write + nfs */
- unsigned long thresh; /* dirty threshold */
- unsigned long bg_thresh; /* dirty background threshold */
-
- unsigned long wb_dirty; /* per-wb counterparts */
- unsigned long wb_thresh;
- unsigned long wb_bg_thresh;
-
- unsigned long pos_ratio;
- bool freerun;
- bool dirty_exceeded;
-};
-
/*
* Length of period for aging writeout fractions of bdis. This is an
* arbitrarily chosen number. The longer the period, the slower fractions will
@@ -543,8 +521,8 @@ static int dirty_ratio_handler(const struct ctl_table *table, int write, void *b
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
- writeback_set_ratelimit();
vm_dirty_bytes = 0;
+ writeback_set_ratelimit();
}
return ret;
}
@@ -630,7 +608,7 @@ EXPORT_SYMBOL_GPL(wb_writeout_inc);
*/
static void writeout_period(struct timer_list *t)
{
- struct wb_domain *dom = from_timer(dom, t, period_timer);
+ struct wb_domain *dom = timer_container_of(dom, t, period_timer);
int miss_periods = (jiffies - dom->period_time) /
VM_COMPLETIONS_PERIOD_LEN;
@@ -663,7 +641,7 @@ int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
#ifdef CONFIG_CGROUP_WRITEBACK
void wb_domain_exit(struct wb_domain *dom)
{
- del_timer_sync(&dom->period_timer);
+ timer_delete_sync(&dom->period_timer);
fprop_global_destroy(&dom->completions);
}
#endif
@@ -942,26 +920,25 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc,
wb_min_max_ratio(wb, &wb_min_ratio, &wb_max_ratio);
wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE);
- wb_max_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
- if (wb_thresh > wb_max_thresh)
- wb_thresh = wb_max_thresh;
/*
- * With strictlimit flag, the wb_thresh is treated as
- * a hard limit in balance_dirty_pages() and wb_position_ratio().
- * It's possible that wb_thresh is close to zero, not because
- * the device is slow, but because it has been inactive.
- * To prevent occasional writes from being blocked, we raise wb_thresh.
+ * It's very possible that wb_thresh is close to 0 not because the
+ * device is slow, but that it has remained inactive for long time.
+ * Honour such devices a reasonable good (hopefully IO efficient)
+ * threshold, so that the occasional writes won't be blocked and active
+ * writes can rampup the threshold quickly.
*/
- if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
- unsigned long limit = hard_dirty_limit(dom, dtc->thresh);
- u64 wb_scale_thresh = 0;
-
- if (limit > dtc->dirty)
- wb_scale_thresh = (limit - dtc->dirty) / 100;
- wb_thresh = max(wb_thresh, min(wb_scale_thresh, wb_max_thresh / 4));
+ if (thresh > dtc->dirty) {
+ if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT))
+ wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 100);
+ else
+ wb_thresh = max(wb_thresh, (thresh - dtc->dirty) / 8);
}
+ wb_max_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
+ if (wb_thresh > wb_max_thresh)
+ wb_thresh = wb_max_thresh;
+
return wb_thresh;
}
@@ -969,6 +946,7 @@ unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
{
struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
+ domain_dirty_avail(&gdtc, true);
return __wb_calc_thresh(&gdtc, thresh);
}
@@ -1095,7 +1073,7 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc)
struct bdi_writeback *wb = dtc->wb;
unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth);
unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
- unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
+ unsigned long limit = dtc->limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
unsigned long wb_thresh = dtc->wb_thresh;
unsigned long x_intercept;
unsigned long setpoint; /* dirty pages' target balance point */
@@ -1123,9 +1101,7 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc)
* such filesystems balance_dirty_pages always checks wb counters
* against wb limits. Even if global "nr_dirty" is under "freerun".
* This is especially important for fuse which sets bdi->max_ratio to
- * 1% by default. Without strictlimit feature, fuse writeback may
- * consume arbitrary amount of RAM because it is accounted in
- * NR_WRITEBACK_TEMP which is not involved in calculating "nr_dirty".
+ * 1% by default.
*
* Here, in wb_position_ratio(), we calculate pos_ratio based on
* two values: wb_dirty and wb_thresh. Let's consider an example:
@@ -1145,12 +1121,6 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc)
if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
long long wb_pos_ratio;
- if (dtc->wb_dirty < 8) {
- dtc->pos_ratio = min_t(long long, pos_ratio * 2,
- 2 << RATELIMIT_CALC_SHIFT);
- return;
- }
-
if (dtc->wb_dirty >= wb_thresh)
return;
@@ -1222,14 +1192,6 @@ static void wb_position_ratio(struct dirty_throttle_control *dtc)
if (unlikely(wb_thresh > dtc->thresh))
wb_thresh = dtc->thresh;
/*
- * It's very possible that wb_thresh is close to 0 not because the
- * device is slow, but that it has remained inactive for long time.
- * Honour such devices a reasonable good (hopefully IO efficient)
- * threshold, so that the occasional writes won't be blocked and active
- * writes can rampup the threshold quickly.
- */
- wb_thresh = max(wb_thresh, (limit - dtc->dirty) / 8);
- /*
* scale global setpoint to wb's:
* wb_setpoint = setpoint * wb_thresh / thresh
*/
@@ -1484,17 +1446,10 @@ static void wb_update_dirty_ratelimit(struct dirty_throttle_control *dtc,
* balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate).
* Hence, to calculate "step" properly, we have to use wb_dirty as
* "dirty" and wb_setpoint as "setpoint".
- *
- * We rampup dirty_ratelimit forcibly if wb_dirty is low because
- * it's possible that wb_thresh is close to zero due to inactivity
- * of backing device.
*/
if (unlikely(wb->bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
dirty = dtc->wb_dirty;
- if (dtc->wb_dirty < 8)
- setpoint = dtc->wb_dirty + 1;
- else
- setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
+ setpoint = (dtc->wb_thresh + dtc->wb_bg_thresh) / 2;
}
if (dirty < setpoint) {
@@ -1983,11 +1938,7 @@ free_running:
*/
if (pause < min_pause) {
trace_balance_dirty_pages(wb,
- sdtc->thresh,
- sdtc->bg_thresh,
- sdtc->dirty,
- sdtc->wb_thresh,
- sdtc->wb_dirty,
+ sdtc,
dirty_ratelimit,
task_ratelimit,
pages_dirtied,
@@ -2012,11 +1963,7 @@ free_running:
pause:
trace_balance_dirty_pages(wb,
- sdtc->thresh,
- sdtc->bg_thresh,
- sdtc->dirty,
- sdtc->wb_thresh,
- sdtc->wb_dirty,
+ sdtc,
dirty_ratelimit,
task_ratelimit,
pages_dirtied,
@@ -2254,7 +2201,7 @@ static int dirty_writeback_centisecs_handler(const struct ctl_table *table, int
void laptop_mode_timer_fn(struct timer_list *t)
{
struct backing_dev_info *backing_dev_info =
- from_timer(backing_dev_info, t, laptop_mode_wb_timer);
+ timer_container_of(backing_dev_info, t, laptop_mode_wb_timer);
wakeup_flusher_threads_bdi(backing_dev_info, WB_REASON_LAPTOP_TIMER);
}
@@ -2281,7 +2228,7 @@ void laptop_sync_completion(void)
rcu_read_lock();
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
- del_timer(&bdi->laptop_mode_wb_timer);
+ timer_delete(&bdi->laptop_mode_wb_timer);
rcu_read_unlock();
}
@@ -2319,7 +2266,7 @@ static int page_writeback_cpu_online(unsigned int cpu)
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
-static struct ctl_table vm_page_writeback_sysctls[] = {
+static const struct ctl_table vm_page_writeback_sysctls[] = {
{
.procname = "dirty_background_ratio",
.data = &dirty_background_ratio,
@@ -2616,11 +2563,11 @@ struct folio *writeback_iter(struct address_space *mapping,
if (!folio) {
/*
* To avoid deadlocks between range_cyclic writeback and callers
- * that hold pages in PageWriteback to aggregate I/O until
+ * that hold folios in writeback to aggregate I/O until
* the writeback iteration finishes, we do not loop back to the
- * start of the file. Doing so causes a page lock/page
+ * start of the file. Doing so causes a folio lock/folio
* writeback access order inversion - we should only ever lock
- * multiple pages in ascending page->index order, and looping
+ * multiple folios in ascending folio->index order, and looping
* back to the start of the file violates that rule and causes
* deadlocks.
*/
@@ -2673,27 +2620,6 @@ int write_cache_pages(struct address_space *mapping,
}
EXPORT_SYMBOL(write_cache_pages);
-static int writeback_use_writepage(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- struct folio *folio = NULL;
- struct blk_plug plug;
- int err;
-
- blk_start_plug(&plug);
- while ((folio = writeback_iter(mapping, wbc, folio, &err))) {
- err = mapping->a_ops->writepage(&folio->page, wbc);
- if (err == AOP_WRITEPAGE_ACTIVATE) {
- folio_unlock(folio);
- err = 0;
- }
- mapping_set_error(mapping, err);
- }
- blk_finish_plug(&plug);
-
- return err;
-}
-
int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
int ret;
@@ -2704,14 +2630,11 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
wb = inode_to_wb_wbc(mapping->host, wbc);
wb_bandwidth_estimate_start(wb);
while (1) {
- if (mapping->a_ops->writepages) {
+ if (mapping->a_ops->writepages)
ret = mapping->a_ops->writepages(mapping, wbc);
- } else if (mapping->a_ops->writepage) {
- ret = writeback_use_writepage(mapping, wbc);
- } else {
+ else
/* deal with chardevs and other special files */
ret = 0;
- }
if (ret != -ENOMEM || wbc->sync_mode != WB_SYNC_ALL)
break;
@@ -3130,6 +3053,7 @@ void __folio_start_writeback(struct folio *folio, bool keep_write)
int access_ret;
VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
if (mapping && mapping_use_writeback_tags(mapping)) {
XA_STATE(xas, &mapping->i_pages, folio_index(folio));