From 31373d09da5b7fe21fe6f781e92bd534a3495f00 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 6 Apr 2010 14:25:14 +0200 Subject: laptop-mode: Make flushes per-device One of the features of laptop-mode is that it forces a writeout of dirty pages if something else triggers a physical read or write from a device. The current implementation flushes pages on all devices, rather than only the one that triggered the flush. This patch alters the behaviour so that only the recently accessed block device is flushed, preventing other disks being spun up for no terribly good reason. Signed-off-by: Matthew Garrett Signed-off-by: Jens Axboe --- include/linux/writeback.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 36520ded3e06..eb38a2c645f6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -96,8 +96,10 @@ static inline void inode_sync_wait(struct inode *inode) /* * mm/page-writeback.c */ -void laptop_io_completion(void); +void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); +void laptop_mode_sync(struct work_struct *work); +void laptop_mode_timer_fn(unsigned long data); void throttle_vm_writeout(gfp_t gfp_mask); /* These are exported to sysctl. */ -- cgit v1.2.3 From e913fc825dc685a444cb4c1d0f9d32f372f59861 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 17 May 2010 12:55:07 +0200 Subject: writeback: fix WB_SYNC_NONE writeback from umount When umount calls sync_filesystem(), we first do a WB_SYNC_NONE writeback to kick off writeback of pending dirty inodes, then follow that up with a WB_SYNC_ALL to wait for it. Since umount already holds the sb s_umount mutex, WB_SYNC_NONE ends up doing nothing and all writeback happens as WB_SYNC_ALL. This can greatly slow down umount, since WB_SYNC_ALL writeback is a data integrity operation and thus a bigger hammer than simple WB_SYNC_NONE. For barrier aware file systems it's a lot slower. Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 48 ++++++++++++++++++++++++++++++++++----------- fs/sync.c | 2 +- include/linux/backing-dev.h | 2 +- include/linux/writeback.h | 10 ++++++++++ mm/page-writeback.c | 4 ++-- 5 files changed, 51 insertions(+), 15 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 760dc8d0b4ff..67db89786e7d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -45,6 +45,7 @@ struct wb_writeback_args { int for_kupdate:1; int range_cyclic:1; int for_background:1; + int sb_pinned:1; }; /* @@ -230,6 +231,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, .range_cyclic = 0, + /* + * Setting sb_pinned is not necessary for WB_SYNC_ALL, but + * lets make it explicitly clear. + */ + .sb_pinned = 1, }; struct bdi_work work; @@ -245,21 +251,23 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, * @bdi: the backing device to write from * @sb: write inodes from this super_block * @nr_pages: the number of pages to write + * @sb_locked: caller already holds sb umount sem. * * Description: * This does WB_SYNC_NONE opportunistic writeback. The IO is only * started when this function returns, we make no guarentees on - * completion. Caller need not hold sb s_umount semaphore. + * completion. Caller specifies whether sb umount sem is held already or not. * */ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages) + long nr_pages, int sb_locked) { struct wb_writeback_args args = { .sb = sb, .sync_mode = WB_SYNC_NONE, .nr_pages = nr_pages, .range_cyclic = 1, + .sb_pinned = sb_locked, }; /* @@ -577,7 +585,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, /* * Caller must already hold the ref for this */ - if (wbc->sync_mode == WB_SYNC_ALL) { + if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { WARN_ON(!rwsem_is_locked(&sb->s_umount)); return SB_NOT_PINNED; } @@ -751,6 +759,7 @@ static long wb_writeback(struct bdi_writeback *wb, .for_kupdate = args->for_kupdate, .for_background = args->for_background, .range_cyclic = args->range_cyclic, + .sb_pinned = args->sb_pinned, }; unsigned long oldest_jif; long wrote = 0; @@ -1193,6 +1202,18 @@ static void wait_sb_inodes(struct super_block *sb) iput(old_inode); } +static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) +{ + unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); + unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); + long nr_to_write; + + nr_to_write = nr_dirty + nr_unstable + + (inodes_stat.nr_inodes - inodes_stat.nr_unused); + + bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked); +} + /** * writeback_inodes_sb - writeback dirty inodes from given super_block * @sb: the superblock @@ -1204,17 +1225,22 @@ static void wait_sb_inodes(struct super_block *sb) */ void writeback_inodes_sb(struct super_block *sb) { - unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); - unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); - long nr_to_write; - - nr_to_write = nr_dirty + nr_unstable + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); - - bdi_start_writeback(sb->s_bdi, sb, nr_to_write); + __writeback_inodes_sb(sb, 0); } EXPORT_SYMBOL(writeback_inodes_sb); +/** + * writeback_inodes_sb_locked - writeback dirty inodes from given super_block + * @sb: the superblock + * + * Like writeback_inodes_sb(), except the caller already holds the + * sb umount sem. + */ +void writeback_inodes_sb_locked(struct super_block *sb) +{ + __writeback_inodes_sb(sb, 1); +} + /** * writeback_inodes_sb_if_idle - start writeback if none underway * @sb: the superblock diff --git a/fs/sync.c b/fs/sync.c index 92b228176f7c..de6a44192832 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) if (wait) sync_inodes_sb(sb); else - writeback_inodes_sb(sb); + writeback_inodes_sb_locked(sb); if (sb->s_op->sync_fs) sb->s_op->sync_fs(sb, wait); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 7534979d83bd..ff8bac63213f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -106,7 +106,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); void bdi_unregister(struct backing_dev_info *bdi); int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, - long nr_pages); + long nr_pages, int sb_locked); int bdi_writeback_task(struct bdi_writeback *wb); int bdi_has_dirty_io(struct backing_dev_info *bdi); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index eb38a2c645f6..47e1c686cb02 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -65,6 +65,15 @@ struct writeback_control { * so we use a single control to update them */ unsigned no_nrwrite_index_update:1; + + /* + * For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE, + * the writeback code will pin the sb for the caller. However, + * for eg umount, the caller does WB_SYNC_NONE but already has + * the sb pinned. If the below is set, caller already has the + * sb pinned. + */ + unsigned sb_pinned:1; }; /* @@ -73,6 +82,7 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +void writeback_inodes_sb_locked(struct super_block *); int writeback_inodes_sb_if_idle(struct super_block *); void sync_inodes_sb(struct super_block *); void writeback_inodes_wbc(struct writeback_control *wbc); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d0f2b3765f8d..53b2fcf2d283 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping, (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS)) > background_thresh))) - bdi_start_writeback(bdi, NULL, 0); + bdi_start_writeback(bdi, NULL, 0, 0); } void set_page_dirty_balance(struct page *page, int page_mkwrite) @@ -705,7 +705,7 @@ void laptop_mode_timer_fn(unsigned long data) */ if (bdi_has_dirty_io(&q->backing_dev_info)) - bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages); + bdi_start_writeback(&q->backing_dev_info, NULL, 0, nr_pages); } /* -- cgit v1.2.3 From c2c4986eddaa7dc3d036cb2bfa5c8c5f1f2492a0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 20 May 2010 09:18:47 +0200 Subject: writeback: fix problem with !CONFIG_BLOCK compilation When CONFIG_BLOCK isn't enabled: mm/page-writeback.c: In function 'laptop_mode_timer_fn': mm/page-writeback.c:708: error: dereferencing pointer to incomplete type mm/page-writeback.c:709: error: dereferencing pointer to incomplete type Fix this by essentially eliminating the laptop sync handlers when CONFIG_BLOCK isn't set, as most are only used from the block layer code. The exception is laptop_sync_completion() which is used from sys_sync(), make that an empty declaration in that case. Reported-by: Randy Dunlap Signed-off-by: Jens Axboe --- fs/super.c | 1 + include/linux/writeback.h | 4 ++++ mm/page-writeback.c | 2 ++ 3 files changed, 7 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/fs/super.c b/fs/super.c index dc72491a19f9..1527e6a0ee35 100644 --- a/fs/super.c +++ b/fs/super.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "internal.h" diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 47e1c686cb02..cc97d6caf2b3 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -106,10 +106,14 @@ static inline void inode_sync_wait(struct inode *inode) /* * mm/page-writeback.c */ +#ifdef CONFIG_BLOCK void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); void laptop_mode_sync(struct work_struct *work); void laptop_mode_timer_fn(unsigned long data); +#else +static inline void laptop_sync_completion(void) { } +#endif void throttle_vm_writeout(gfp_t gfp_mask); /* These are exported to sysctl. */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0d7bbe859550..9886424e1864 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -694,6 +694,7 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write, return 0; } +#ifdef CONFIG_BLOCK void laptop_mode_timer_fn(unsigned long data) { struct request_queue *q = (struct request_queue *)data; @@ -735,6 +736,7 @@ void laptop_sync_completion(void) rcu_read_unlock(); } +#endif /* * If ratelimit_pages is too high then we can get into dirty-data overload -- cgit v1.2.3