summaryrefslogtreecommitdiff
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c57
1 files changed, 36 insertions, 21 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a3278f005230..8616ef3025a4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -32,7 +32,7 @@
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/syscalls.h>
-#include <linux/buffer_head.h>
+#include <linux/buffer_head.h> /* __set_page_dirty_buffers */
#include <linux/pagevec.h>
#include <trace/events/writeback.h>
@@ -128,7 +128,6 @@ unsigned long global_dirty_limit;
*
*/
static struct prop_descriptor vm_completions;
-static struct prop_descriptor vm_dirties;
/*
* couple the period to the dirty_ratio:
@@ -154,7 +153,6 @@ static void update_completion_period(void)
{
int shift = calc_period_shift();
prop_change_shift(&vm_completions, shift);
- prop_change_shift(&vm_dirties, shift);
writeback_set_ratelimit();
}
@@ -235,11 +233,6 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL_GPL(bdi_writeout_inc);
-void task_dirty_inc(struct task_struct *tsk)
-{
- prop_inc_single(&vm_dirties, &tsk->dirties);
-}
-
/*
* Obtain an accurate fraction of the BDI's portion.
*/
@@ -418,8 +411,13 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
*
* Returns @bdi's dirty limit in pages. The term "dirty" in the context of
* dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
- * And the "limit" in the name is not seriously taken as hard limit in
- * balance_dirty_pages().
+ *
+ * Note that balance_dirty_pages() will only seriously take it as a hard limit
+ * when sleeping max_pause per page is not enough to keep the dirty pages under
+ * control. For example, when the device is completely stalled due to some error
+ * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
+ * In the other normal situations, it acts more gently by throttling the tasks
+ * more (rather than completely block them) when the bdi dirty pages go high.
*
* It allocates high/low dirty limits to fast/slow devices, in order to prevent
* - starving fast devices
@@ -601,6 +599,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
*/
if (unlikely(bdi_thresh > thresh))
bdi_thresh = thresh;
+ /*
+ * It's very possible that bdi_thresh is close to 0 not because the
+ * device is slow, but that it has remained inactive for long time.
+ * Honour such devices a reasonable good (hopefully IO efficient)
+ * threshold, so that the occasional writes won't be blocked and active
+ * writes can rampup the threshold quickly.
+ */
bdi_thresh = max(bdi_thresh, (limit - dirty) / 8);
/*
* scale global setpoint to bdi's:
@@ -984,8 +989,7 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
*
* 8 serves as the safety ratio.
*/
- if (bdi_dirty)
- t = min(t, bdi_dirty * HZ / (8 * bw + 1));
+ t = min(t, bdi_dirty * HZ / (8 * bw + 1));
/*
* The pause time will be settled within range (max_pause/4, max_pause).
@@ -1133,17 +1137,30 @@ pause:
pages_dirtied,
pause,
start_time);
- __set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_KILLABLE);
io_schedule_timeout(pause);
- dirty_thresh = hard_dirty_limit(dirty_thresh);
/*
- * max-pause area. If dirty exceeded but still within this
- * area, no need to sleep for more than 200ms: (a) 8 pages per
- * 200ms is typically more than enough to curb heavy dirtiers;
- * (b) the pause time limit makes the dirtiers more responsive.
+ * This is typically equal to (nr_dirty < dirty_thresh) and can
+ * also keep "1000+ dd on a slow USB stick" under control.
*/
- if (nr_dirty < dirty_thresh)
+ if (task_ratelimit)
+ break;
+
+ /*
+ * In the case of an unresponding NFS server and the NFS dirty
+ * pages exceeds dirty_thresh, give the other good bdi's a pipe
+ * to go through, so that tasks on them still remain responsive.
+ *
+ * In theory 1 page is enough to keep the comsumer-producer
+ * pipe going: the flusher cleans 1 page => the task dirties 1
+ * more page. However bdi_dirty has accounting errors. So use
+ * the larger and more IO friendly bdi_stat_error.
+ */
+ if (bdi_dirty <= bdi_stat_error(bdi))
+ break;
+
+ if (fatal_signal_pending(current))
break;
}
@@ -1395,7 +1412,6 @@ void __init page_writeback_init(void)
shift = calc_period_shift();
prop_descriptor_init(&vm_completions, shift);
- prop_descriptor_init(&vm_dirties, shift);
}
/**
@@ -1724,7 +1740,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
__inc_zone_page_state(page, NR_DIRTIED);
__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
__inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
- task_dirty_inc(current);
task_io_account_write(PAGE_CACHE_SIZE);
}
}