From 36ade451a5d736e61ac8302b64aacc5acb5e440f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 28 Jan 2013 09:30:52 -0500 Subject: ext4: Always use ext4_bio_write_page() for writeout Currently we sometimes used block_write_full_page() and sometimes ext4_bio_write_page() for writeback (depending on mount options and call path). Let's always use ext4_bio_write_page() to simplify things a bit. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/page-io.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/ext4/page-io.c') diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 0016fbca2a40..ddb3d401543c 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -73,8 +73,6 @@ void ext4_free_io_end(ext4_io_end_t *io) BUG_ON(!list_empty(&io->list)); BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); - if (io->page) - put_page(io->page); for (i = 0; i < io->num_io_pages; i++) put_io_page(io->pages[i]); io->num_io_pages = 0; -- cgit v1.2.3 From 1ae48a6354a364413d372df1525d523a3fb4fb8c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 28 Jan 2013 09:32:54 -0500 Subject: ext4: use redirty_page_for_writepage() in ext4_bio_write_page() When we cannot write a page we should use redirty_page_for_writepage() instead of plain set_page_dirty(). That tells writeback code we have problems, redirties only the page (redirtying buffers is not needed), and updates mm accounting of failed page writes. Also move clearing of buffer dirty flag after io_submit_add_bh(). At that moment we are sure buffer will be going to disk. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/page-io.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/ext4/page-io.c') diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index ddb3d401543c..05795f10e55a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "xattr.h" @@ -434,7 +435,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); if (!io_page) { - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); unlock_page(page); return -ENOMEM; } @@ -466,7 +467,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io, set_buffer_uptodate(bh); continue; } - clear_buffer_dirty(bh); ret = io_submit_add_bh(io, io_page, inode, wbc, bh); if (ret) { /* @@ -474,9 +474,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io, * we can do but mark the page as dirty, and * better luck next time. */ - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); break; } + clear_buffer_dirty(bh); } unlock_page(page); /* -- cgit v1.2.3 From 84c17543ab5685d950da73209df0ecda26e72d3b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 28 Jan 2013 09:43:46 -0500 Subject: ext4: move work from io_end to inode It does not make much sense to have struct work in ext4_io_end_t because we always use it for only one ext4_io_end_t per inode (the first one in the i_completed_io list). So just move the structure to inode itself. This also allows for a small simplification in processing io_end structures. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 6 +++--- fs/ext4/page-io.c | 33 +++++++++------------------------ fs/ext4/super.c | 1 + 3 files changed, 13 insertions(+), 27 deletions(-) (limited to 'fs/ext4/page-io.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0ccda0c9682e..d93393eb5f2d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -194,8 +194,7 @@ struct mpage_da_data { */ #define EXT4_IO_END_UNWRITTEN 0x0001 #define EXT4_IO_END_ERROR 0x0002 -#define EXT4_IO_END_QUEUED 0x0004 -#define EXT4_IO_END_DIRECT 0x0008 +#define EXT4_IO_END_DIRECT 0x0004 struct ext4_io_page { struct page *p_page; @@ -217,7 +216,6 @@ typedef struct ext4_io_end { unsigned int flag; /* unwritten or not */ loff_t offset; /* offset in the file */ ssize_t size; /* size of the extent */ - struct work_struct work; /* data work queue */ struct kiocb *iocb; /* iocb struct for AIO */ int result; /* error value for AIO */ int num_io_pages; /* for writepages() */ @@ -929,6 +927,7 @@ struct ext4_inode_info { spinlock_t i_completed_io_lock; atomic_t i_ioend_count; /* Number of outstanding io_end structs */ atomic_t i_unwritten; /* Nr. of inflight conversions pending */ + struct work_struct i_unwritten_work; /* deferred extent conversion */ spinlock_t i_block_reservation_lock; @@ -2538,6 +2537,7 @@ extern void ext4_exit_pageio(void); extern void ext4_ioend_wait(struct inode *); extern void ext4_free_io_end(ext4_io_end_t *io); extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); +extern void ext4_end_io_work(struct work_struct *work); extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 05795f10e55a..a0290176ee75 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -151,16 +151,13 @@ void ext4_add_complete_io(ext4_io_end_t *io_end) wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; spin_lock_irqsave(&ei->i_completed_io_lock, flags); - if (list_empty(&ei->i_completed_io_list)) { - io_end->flag |= EXT4_IO_END_QUEUED; - queue_work(wq, &io_end->work); - } + if (list_empty(&ei->i_completed_io_list)) + queue_work(wq, &ei->i_unwritten_work); list_add_tail(&io_end->list, &ei->i_completed_io_list); spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); } -static int ext4_do_flush_completed_IO(struct inode *inode, - ext4_io_end_t *work_io) +static int ext4_do_flush_completed_IO(struct inode *inode) { ext4_io_end_t *io; struct list_head unwritten, complete, to_free; @@ -191,19 +188,7 @@ static int ext4_do_flush_completed_IO(struct inode *inode, while (!list_empty(&complete)) { io = list_entry(complete.next, ext4_io_end_t, list); io->flag &= ~EXT4_IO_END_UNWRITTEN; - /* end_io context can not be destroyed now because it still - * used by queued worker. Worker thread will destroy it later */ - if (io->flag & EXT4_IO_END_QUEUED) - list_del_init(&io->list); - else - list_move(&io->list, &to_free); - } - /* If we are called from worker context, it is time to clear queued - * flag, and destroy it's end_io if it was converted already */ - if (work_io) { - work_io->flag &= ~EXT4_IO_END_QUEUED; - if (!(work_io->flag & EXT4_IO_END_UNWRITTEN)) - list_add_tail(&work_io->list, &to_free); + list_move(&io->list, &to_free); } spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); @@ -218,10 +203,11 @@ static int ext4_do_flush_completed_IO(struct inode *inode, /* * work on completed aio dio IO, to convert unwritten extents to extents */ -static void ext4_end_io_work(struct work_struct *work) +void ext4_end_io_work(struct work_struct *work) { - ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); - ext4_do_flush_completed_IO(io->inode, io); + struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, + i_unwritten_work); + ext4_do_flush_completed_IO(&ei->vfs_inode); } int ext4_flush_unwritten_io(struct inode *inode) @@ -229,7 +215,7 @@ int ext4_flush_unwritten_io(struct inode *inode) int ret; WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && !(inode->i_state & I_FREEING)); - ret = ext4_do_flush_completed_IO(inode, NULL); + ret = ext4_do_flush_completed_IO(inode); ext4_unwritten_wait(inode); return ret; } @@ -240,7 +226,6 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) if (io) { atomic_inc(&EXT4_I(inode)->i_ioend_count); io->inode = inode; - INIT_WORK(&io->work, ext4_end_io_work); INIT_LIST_HEAD(&io->list); } return io; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d5d336bd2180..dc0fb7b942cf 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -960,6 +960,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_datasync_tid = 0; atomic_set(&ei->i_ioend_count, 0); atomic_set(&ei->i_unwritten, 0); + INIT_WORK(&ei->i_unwritten_work, ext4_end_io_work); return &ei->vfs_inode; } -- cgit v1.2.3 From 002bd7fa3ac7441bdb36df67b2c64bc8c1be5360 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 28 Jan 2013 09:49:15 -0500 Subject: ext4: simplify list handling in ext4_do_flush_completed_IO() The function splices i_completed_io_list to its private list first. From that moment on we don't need any lock for working with io_end structures because all io_end structure on the list are only our own. So we can remove the other two lists in the function and free io_end immediately after we are done with it. CC: Dmitry Monakhov Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/page-io.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'fs/ext4/page-io.c') diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index a0290176ee75..3fb385cd9670 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -160,14 +160,11 @@ void ext4_add_complete_io(ext4_io_end_t *io_end) static int ext4_do_flush_completed_IO(struct inode *inode) { ext4_io_end_t *io; - struct list_head unwritten, complete, to_free; + struct list_head unwritten; unsigned long flags; struct ext4_inode_info *ei = EXT4_I(inode); int err, ret = 0; - INIT_LIST_HEAD(&complete); - INIT_LIST_HEAD(&to_free); - spin_lock_irqsave(&ei->i_completed_io_lock, flags); dump_completed_IO(inode); list_replace_init(&ei->i_completed_io_list, &unwritten); @@ -181,20 +178,7 @@ static int ext4_do_flush_completed_IO(struct inode *inode) err = ext4_end_io(io); if (unlikely(!ret && err)) ret = err; - - list_add_tail(&io->list, &complete); - } - spin_lock_irqsave(&ei->i_completed_io_lock, flags); - while (!list_empty(&complete)) { - io = list_entry(complete.next, ext4_io_end_t, list); io->flag &= ~EXT4_IO_END_UNWRITTEN; - list_move(&io->list, &to_free); - } - spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); - - while (!list_empty(&to_free)) { - io = list_entry(to_free.next, ext4_io_end_t, list); - list_del_init(&io->list); ext4_free_io_end(io); } return ret; -- cgit v1.2.3 From 8a850c3fb8d0f204eabc1a32b502f47d3c16eac4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 28 Jan 2013 20:53:28 -0500 Subject: ext4: Make ext4_bio_writepage() handle unprepared buffers So far ext4_bio_writepage() unconditionally cleared dirty bit on all buffers underlying the page. That implicitely assumes we can write all buffers. So far that is true because callers call into ext4_bio_writepage() make sure all buffers in the page are mapped but: a) it's a data corruption bug waiting to happen b) in data=ordered mode when blocksize < pagesize we do need to write pages that may have only some of dirty buffers mapped. So change ext4_bio_writepage() to skip buffers that cannot be written without clearing their dirty bit. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/page-io.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs/ext4/page-io.c') diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 3fb385cd9670..0290bf85f97e 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -350,14 +350,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io, unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); } - if (!buffer_mapped(bh) || buffer_delay(bh)) { - if (!buffer_mapped(bh)) - clear_buffer_dirty(bh); - if (io->io_bio) - ext4_io_submit(io); - return 0; - } - if (io->io_bio && bh->b_blocknr != io->io_next_block) { submit_and_retry: ext4_io_submit(io); @@ -436,6 +428,15 @@ int ext4_bio_write_page(struct ext4_io_submit *io, set_buffer_uptodate(bh); continue; } + if (!buffer_dirty(bh) || buffer_delay(bh) || + !buffer_mapped(bh) || buffer_unwritten(bh)) { + /* A hole? We can safely clear the dirty bit */ + if (!buffer_mapped(bh)) + clear_buffer_dirty(bh); + if (io->io_bio) + ext4_io_submit(io); + continue; + } ret = io_submit_add_bh(io, io_page, inode, wbc, bh); if (ret) { /* -- cgit v1.2.3 From cfa7275482414fa87c9e51dd7b9d4d5d3f7a7fed Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 28 Jan 2013 21:14:11 -0500 Subject: ext4: remove unused variable flags Remove unused variable flags from dump_completed_IO(). The code is only exercised when EXT4FS_DEBUG is defined. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" Reviewed-by: Zheng Liu --- fs/ext4/page-io.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/ext4/page-io.c') diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 0290bf85f97e..5d8c66948e1b 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -118,7 +118,6 @@ static void dump_completed_IO(struct inode *inode) #ifdef EXT4FS_DEBUG struct list_head *cur, *before, *after; ext4_io_end_t *io, *io0, *io1; - unsigned long flags; if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { ext4_debug("inode %lu completed_io list is empty\n", -- cgit v1.2.3 From 091e26dfc156aeb3b73bc5c5f277e433ad39331c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 29 Jan 2013 22:48:17 -0500 Subject: ext4: fix possible use-after-free with AIO Running AIO is pinning inode in memory using file reference. Once AIO is completed using aio_complete(), file reference is put and inode can be freed from memory. So we have to be sure that calling aio_complete() is the last thing we do with the inode. CC: stable@vger.kernel.org Reviewed-by: Carlos Maiolino Acked-by: Jeff Moyer Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- fs/ext4/page-io.c | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/ext4/page-io.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 86bf43d6dfcd..07d9defeaf8c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2850,9 +2850,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { ext4_free_io_end(io_end); out: + inode_dio_done(inode); if (is_async) aio_complete(iocb, ret, 0); - inode_dio_done(inode); return; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 5d8c66948e1b..809b31003ecc 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -102,14 +102,13 @@ static int ext4_end_io(ext4_io_end_t *io) "(inode %lu, offset %llu, size %zd, error %d)", inode->i_ino, offset, size, ret); } - if (io->iocb) - aio_complete(io->iocb, io->result, 0); - - if (io->flag & EXT4_IO_END_DIRECT) - inode_dio_done(inode); /* Wake up anyone waiting on unwritten extent conversion */ if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) wake_up_all(ext4_ioend_wq(inode)); + if (io->flag & EXT4_IO_END_DIRECT) + inode_dio_done(inode); + if (io->iocb) + aio_complete(io->iocb, io->result, 0); return ret; } -- cgit v1.2.3