From 6e34eeddf7deec1444bbddab533f03f520d8458c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 7 Apr 2009 18:12:43 -0400 Subject: block_write_full_page: switch synchronous writes to use WRITE_SYNC_PLUG Now that we have a distinction between WRITE_SYNC and WRITE_SYNC_PLUG, use WRITE_SYNC_PLUG in __block_write_full_page() to avoid unplugging the block device I/O queue between each page that gets flushed out. Otherwise, when we run sync() or fsync() and we need to write out a large number of pages, the block device queue will get unplugged between for every page that is flushed out, which will be a pretty serious performance regression caused by commit a64c8610. Signed-off-by: "Theodore Ts'o" --- fs/buffer.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 6e35762b6169..13edf7ad3ff1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1596,6 +1596,16 @@ EXPORT_SYMBOL(unmap_underlying_metadata); * locked buffer. This only can happen if someone has written the buffer * directly, with submit_bh(). At the address_space level PageWriteback * prevents this contention from occurring. + * + * If block_write_full_page() is called with wbc->sync_mode == + * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this + * causes the writes to be flagged as synchronous writes, but the + * block device queue will NOT be unplugged, since usually many pages + * will be pushed to the out before the higher-level caller actually + * waits for the writes to be completed. The various wait functions, + * such as wait_on_writeback_range() will ultimately call sync_page() + * which will ultimately call blk_run_backing_dev(), which will end up + * unplugging the device queue. */ static int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block, struct writeback_control *wbc) @@ -1606,7 +1616,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; const unsigned blocksize = 1 << inode->i_blkbits; int nr_underway = 0; - int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); + int write_op = (wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC_PLUG : WRITE); BUG_ON(!PageLocked(page)); -- cgit v1.2.3 From 053c525fcf976810f023d96472f414c0d5e6339b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Apr 2009 13:44:08 +0200 Subject: buffer: switch do_emergency_thaw() away from pdflush_operation() This is (again) a preparatory patch similar to commit a2a9537ac0b37a5da6fbe7e1e9cb06c524d2a9c4. It open codes a simple async way of executing do_thaw_all() out of context, so we can get rid of pdflush. Signed-off-by: Jens Axboe --- fs/buffer.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 13edf7ad3ff1..ff8bb1f2333a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -547,7 +547,7 @@ repeat: return err; } -void do_thaw_all(unsigned long unused) +void do_thaw_all(struct work_struct *work) { struct super_block *sb; char b[BDEVNAME_SIZE]; @@ -567,6 +567,7 @@ restart: goto restart; } spin_unlock(&sb_lock); + kfree(work); printk(KERN_WARNING "Emergency Thaw complete\n"); } @@ -577,7 +578,13 @@ restart: */ void emergency_thaw_all(void) { - pdflush_operation(do_thaw_all, 0); + struct work_struct *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (work) { + INIT_WORK(work, do_thaw_all); + schedule_work(work); + } } /** -- cgit v1.2.3 From 35c80d5f400f68f2eccf3069d1c068e154bde9c9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 15 Apr 2009 13:22:38 -0400 Subject: Add block_write_full_page_endio for passing endio handler block_write_full_page doesn't allow the caller to control what happens when the IO is over. This adds a new call named block_write_full_page_endio so the buffer head end_io handler can be provided by the caller. This will be used by the ext3 data=guarded mode to do i_size updates in a workqueue based end_io handler. end_buffer_async_write is also exported so it can be called to do the dirty work of managing page writeback for the higher level end_io handler. Signed-off-by: Chris Mason Acked-by: Theodore Tso Acked-by: Jan Kara Signed-off-by: Linus Torvalds --- fs/buffer.c | 45 ++++++++++++++++++++++++++++++++++----------- include/linux/buffer_head.h | 3 +++ 2 files changed, 37 insertions(+), 11 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index ff8bb1f2333a..b3e5be7514f5 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -360,7 +360,7 @@ still_busy: * Completion handler for block_write_full_page() - pages which are unlocked * during I/O, and which have PageWriteback cleared upon I/O completion. */ -static void end_buffer_async_write(struct buffer_head *bh, int uptodate) +void end_buffer_async_write(struct buffer_head *bh, int uptodate) { char b[BDEVNAME_SIZE]; unsigned long flags; @@ -438,11 +438,17 @@ static void mark_buffer_async_read(struct buffer_head *bh) set_buffer_async_read(bh); } -void mark_buffer_async_write(struct buffer_head *bh) +void mark_buffer_async_write_endio(struct buffer_head *bh, + bh_end_io_t *handler) { - bh->b_end_io = end_buffer_async_write; + bh->b_end_io = handler; set_buffer_async_write(bh); } + +void mark_buffer_async_write(struct buffer_head *bh) +{ + mark_buffer_async_write_endio(bh, end_buffer_async_write); +} EXPORT_SYMBOL(mark_buffer_async_write); @@ -1615,7 +1621,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata); * unplugging the device queue. */ static int __block_write_full_page(struct inode *inode, struct page *page, - get_block_t *get_block, struct writeback_control *wbc) + get_block_t *get_block, struct writeback_control *wbc, + bh_end_io_t *handler) { int err; sector_t block; @@ -1700,7 +1707,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, continue; } if (test_clear_buffer_dirty(bh)) { - mark_buffer_async_write(bh); + mark_buffer_async_write_endio(bh, handler); } else { unlock_buffer(bh); } @@ -1753,7 +1760,7 @@ recover: if (buffer_mapped(bh) && buffer_dirty(bh) && !buffer_delay(bh)) { lock_buffer(bh); - mark_buffer_async_write(bh); + mark_buffer_async_write_endio(bh, handler); } else { /* * The buffer may have been set dirty during @@ -2679,7 +2686,8 @@ int nobh_writepage(struct page *page, get_block_t *get_block, out: ret = mpage_writepage(page, get_block, wbc); if (ret == -EAGAIN) - ret = __block_write_full_page(inode, page, get_block, wbc); + ret = __block_write_full_page(inode, page, get_block, wbc, + end_buffer_async_write); return ret; } EXPORT_SYMBOL(nobh_writepage); @@ -2837,9 +2845,10 @@ out: /* * The generic ->writepage function for buffer-backed address_spaces + * this form passes in the end_io handler used to finish the IO. */ -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) +int block_write_full_page_endio(struct page *page, get_block_t *get_block, + struct writeback_control *wbc, bh_end_io_t *handler) { struct inode * const inode = page->mapping->host; loff_t i_size = i_size_read(inode); @@ -2848,7 +2857,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block, /* Is the page fully inside i_size? */ if (page->index < end_index) - return __block_write_full_page(inode, page, get_block, wbc); + return __block_write_full_page(inode, page, get_block, wbc, + handler); /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_CACHE_SIZE-1); @@ -2871,9 +2881,20 @@ int block_write_full_page(struct page *page, get_block_t *get_block, * writes to that region are not written out to the file." */ zero_user_segment(page, offset, PAGE_CACHE_SIZE); - return __block_write_full_page(inode, page, get_block, wbc); + return __block_write_full_page(inode, page, get_block, wbc, handler); } +/* + * The generic ->writepage function for buffer-backed address_spaces + */ +int block_write_full_page(struct page *page, get_block_t *get_block, + struct writeback_control *wbc) +{ + return block_write_full_page_endio(page, get_block, wbc, + end_buffer_async_write); +} + + sector_t generic_block_bmap(struct address_space *mapping, sector_t block, get_block_t *get_block) { @@ -3342,9 +3363,11 @@ EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_sync_page); EXPORT_SYMBOL(block_truncate_page); EXPORT_SYMBOL(block_write_full_page); +EXPORT_SYMBOL(block_write_full_page_endio); EXPORT_SYMBOL(cont_write_begin); EXPORT_SYMBOL(end_buffer_read_sync); EXPORT_SYMBOL(end_buffer_write_sync); +EXPORT_SYMBOL(end_buffer_async_write); EXPORT_SYMBOL(file_fsync); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_cont_expand_simple); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7b73bb8f1970..16ed0284d780 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -155,6 +155,7 @@ void create_empty_buffers(struct page *, unsigned long, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); void end_buffer_write_sync(struct buffer_head *bh, int uptodate); +void end_buffer_async_write(struct buffer_head *bh, int uptodate); /* Things to do with buffers at mapping->private_list */ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode); @@ -197,6 +198,8 @@ extern int buffer_heads_over_limit; void block_invalidatepage(struct page *page, unsigned long offset); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); +int block_write_full_page_endio(struct page *page, get_block_t *get_block, + struct writeback_control *wbc, bh_end_io_t *handler); int block_read_full_page(struct page*, get_block_t*); int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, unsigned long from); -- cgit v1.2.3