From 5b41d92437f1ae19b3f3ffa3b16589fd5df50ac0 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 27 Oct 2010 21:30:13 -0400 Subject: ext4: implement writeback livelock avoidance using page tagging This is analogous to Jan Kara's commit, f446daaea9d4a420d16c606f755f3689dcb2d0ce mm: implement writeback livelock avoidance using page tagging but since we forked write_cache_pages, we need to reimplement it there (and in ext4_da_writepages, since range_cyclic handling was moved to there) If you start a large buffered IO to a file, and then set fsync after it, you'll find that fsync does not complete until the other IO stops. If you continue re-dirtying the file (say, putting dd with conv=notrunc in a loop), when fsync finally completes (after all IO is done), it reports via tracing that it has written many more pages than the file contains; in other words it has synced and re-synced pages in the file multiple times. This then leads to problems with our writeback_index update, since it advances it by pages written, and essentially sets writeback_index off the end of the file... With the following patch, we only sync as much as was dirty at the time of the sync. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..3d132bfb4f3d 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -143,6 +143,8 @@ typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, int generic_writepages(struct address_space *mapping, struct writeback_control *wbc); +void tag_pages_for_writeback(struct address_space *mapping, + pgoff_t start, pgoff_t end); int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); -- cgit v1.2.3 From 3259f8bed2f0f57c2fdcdac1b510c3fa319ef97e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 29 Oct 2010 11:16:17 -0400 Subject: Add new functions for triggering inode writeback When btrfs is running low on metadata space, it needs to force delayed allocation pages to disk. It currently does this with a suboptimal walk of a private list of inodes with delayed allocation, and it would be much better if we used the generic flusher threads. writeback_inodes_sb_if_idle would be ideal, but it waits for the flusher thread to start IO on all the dirty pages in the FS before it returns. This adds variants of writeback_inodes_sb* that allow the caller to control how many pages get sent down. Signed-off-by: Chris Mason --- fs/fs-writeback.c | 52 ++++++++++++++++++++++++++++++++++++++--------- include/linux/writeback.h | 2 ++ 2 files changed, 44 insertions(+), 10 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ab38fef1c9a1..1e23c33ea5cf 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1069,33 +1069,44 @@ static void wait_sb_inodes(struct super_block *sb) } /** - * writeback_inodes_sb - writeback dirty inodes from given super_block + * writeback_inodes_sb_nr - writeback dirty inodes from given super_block * @sb: the superblock + * @nr: the number of pages to write * * Start writeback on some inodes on this super_block. No guarantees are made * on how many (if any) will be written, and this function does not wait - * for IO completion of submitted IO. The number of pages submitted is - * returned. + * for IO completion of submitted IO. */ -void writeback_inodes_sb(struct super_block *sb) +void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr) { - unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); - unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); DECLARE_COMPLETION_ONSTACK(done); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_NONE, .done = &done, + .nr_pages = nr, }; WARN_ON(!rwsem_is_locked(&sb->s_umount)); - - work.nr_pages = nr_dirty + nr_unstable + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); - bdi_queue_work(sb->s_bdi, &work); wait_for_completion(&done); } +EXPORT_SYMBOL(writeback_inodes_sb_nr); + +/** + * writeback_inodes_sb - writeback dirty inodes from given super_block + * @sb: the superblock + * + * Start writeback on some inodes on this super_block. No guarantees are made + * on how many (if any) will be written, and this function does not wait + * for IO completion of submitted IO. + */ +void writeback_inodes_sb(struct super_block *sb) +{ + return writeback_inodes_sb_nr(sb, global_page_state(NR_FILE_DIRTY) + + global_page_state(NR_UNSTABLE_NFS) + + (inodes_stat.nr_inodes - inodes_stat.nr_unused)); +} EXPORT_SYMBOL(writeback_inodes_sb); /** @@ -1117,6 +1128,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb) } EXPORT_SYMBOL(writeback_inodes_sb_if_idle); +/** + * writeback_inodes_sb_if_idle - start writeback if none underway + * @sb: the superblock + * @nr: the number of pages to write + * + * Invoke writeback_inodes_sb if no writeback is currently underway. + * Returns 1 if writeback was started, 0 if not. + */ +int writeback_inodes_sb_nr_if_idle(struct super_block *sb, + unsigned long nr) +{ + if (!writeback_in_progress(sb->s_bdi)) { + down_read(&sb->s_umount); + writeback_inodes_sb_nr(sb, nr); + up_read(&sb->s_umount); + return 1; + } else + return 0; +} +EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); + /** * sync_inodes_sb - sync sb inode pages * @sb: the superblock diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..a4cf84511e79 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -60,7 +60,9 @@ struct writeback_control { struct bdi_writeback; int inode_wait(void *); void writeback_inodes_sb(struct super_block *); +void writeback_inodes_sb_nr(struct super_block *, unsigned long nr); int writeback_inodes_sb_if_idle(struct super_block *); +int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr); void sync_inodes_sb(struct super_block *); void writeback_inodes_wb(struct bdi_writeback *wb, struct writeback_control *wbc); -- cgit v1.2.3