From 7f5aa215088b817add9c71914b83650bdd49f8a9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 10 Feb 2009 11:15:34 -0500 Subject: jbd2: Avoid possible NULL dereference in jbd2_journal_begin_ordered_truncate() If we race with commit code setting i_transaction to NULL, we could possibly dereference it. Proper locking requires the journal pointer (to access journal->j_list_lock), which we don't have. So we have to change the prototype of the function so that filesystem passes us the journal pointer. Also add a more detailed comment about why the function jbd2_journal_begin_ordered_truncate() does what it does and how it should be used. Thanks to Dan Carpenter for pointing to the suspitious code. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" Acked-by: Joel Becker CC: linux-ext4@vger.kernel.org CC: ocfs2-devel@oss.oracle.com CC: mfasheh@suse.de CC: Dan Carpenter --- fs/ext4/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 03ba20be1329..658c4a7f2578 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -47,8 +47,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { - return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, - new_size); + return jbd2_journal_begin_ordered_truncate( + EXT4_SB(inode->i_sb)->s_journal, + &EXT4_I(inode)->jinode, + new_size); } static void ext4_invalidatepage(struct page *page, unsigned long offset); -- cgit v1.2.3 From 2acf2c261b823d9d9ed954f348b97620297a36b5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 14 Feb 2009 10:42:58 -0500 Subject: ext4: Implement range_cyclic in ext4_da_writepages instead of write_cache_pages With delayed allocation we lock the page in write_cache_pages() and try to build an in memory extent of contiguous blocks. This is needed so that we can get large contiguous blocks request. If range_cyclic mode is enabled, write_cache_pages() will loop back to the 0 index if no I/O has been done yet, and try to start writing from the beginning of the range. That causes an attempt to take the page lock of lower index page while holding the page lock of higher index page, which can cause a dead lock with another writeback thread. The solution is to implement the range_cyclic behavior in ext4_da_writepages() instead. http://bugzilla.kernel.org/show_bug.cgi?id=12579 Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 658c4a7f2578..cbd2ca99d113 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2439,6 +2439,7 @@ static int ext4_da_writepages(struct address_space *mapping, int no_nrwrite_index_update; int pages_written = 0; long pages_skipped; + int range_cyclic, cycled = 1, io_done = 0; int needed_blocks, ret = 0, nr_to_writebump = 0; struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); @@ -2490,9 +2491,15 @@ static int ext4_da_writepages(struct address_space *mapping, if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - if (wbc->range_cyclic) + range_cyclic = wbc->range_cyclic; + if (wbc->range_cyclic) { index = mapping->writeback_index; - else + if (index) + cycled = 0; + wbc->range_start = index << PAGE_CACHE_SHIFT; + wbc->range_end = LLONG_MAX; + wbc->range_cyclic = 0; + } else index = wbc->range_start >> PAGE_CACHE_SHIFT; mpd.wbc = wbc; @@ -2506,6 +2513,7 @@ static int ext4_da_writepages(struct address_space *mapping, wbc->no_nrwrite_index_update = 1; pages_skipped = wbc->pages_skipped; +retry: while (!ret && wbc->nr_to_write > 0) { /* @@ -2548,6 +2556,7 @@ static int ext4_da_writepages(struct address_space *mapping, pages_written += mpd.pages_written; wbc->pages_skipped = pages_skipped; ret = 0; + io_done = 1; } else if (wbc->nr_to_write) /* * There is no more writeout needed @@ -2556,6 +2565,13 @@ static int ext4_da_writepages(struct address_space *mapping, */ break; } + if (!io_done && !cycled) { + cycled = 1; + index = 0; + wbc->range_start = index << PAGE_CACHE_SHIFT; + wbc->range_end = mapping->writeback_index - 1; + goto retry; + } if (pages_skipped != wbc->pages_skipped) printk(KERN_EMERG "This should not happen leaving %s " "with nr_to_write = %ld ret = %d\n", @@ -2563,6 +2579,7 @@ static int ext4_da_writepages(struct address_space *mapping, /* Update index */ index += pages_written; + wbc->range_cyclic = range_cyclic; if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) /* * set the writeback_index so that range_cyclic -- cgit v1.2.3 From ebd3610b110bbb18ea6f9f2aeed1e1068c537227 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sun, 22 Feb 2009 21:09:59 -0500 Subject: ext4: Fix deadlock in ext4_write_begin() and ext4_da_write_begin() Functions ext4_write_begin() and ext4_da_write_begin() call grab_cache_page_write_begin() without AOP_FLAG_NOFS. Thus it can happen that page reclaim is triggered in that function and it recurses back into the filesystem (or some other filesystem). But this can lead to various problems as a transaction is already started at that point. Add the necessary flag. http://bugzilla.kernel.org/show_bug.cgi?id=11688 Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cbd2ca99d113..51cdd13e1c31 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1368,6 +1368,10 @@ retry: goto out; } + /* We cannot recurse into the filesystem as the transaction is already + * started */ + flags |= AOP_FLAG_NOFS; + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { ext4_journal_stop(handle); @@ -1377,7 +1381,7 @@ retry: *pagep = page; ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext4_get_block); + ext4_get_block); if (!ret && ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), @@ -2667,6 +2671,9 @@ retry: ret = PTR_ERR(handle); goto out; } + /* We cannot recurse into the filesystem as the transaction is already + * started */ + flags |= AOP_FLAG_NOFS; page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { -- cgit v1.2.3 From 8f64b32eb73fbfe9f38c4123121b63ee409278a7 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 26 Feb 2009 00:57:35 -0500 Subject: ext4: don't call jbd2_journal_force_commit_nested without journal Running without a journal, I oopsed when I ran out of space, because we called jbd2_journal_force_commit_nested() from ext4_should_retry_alloc() without a journal. This should take care of it, I think. Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 4 +++- fs/ext4/inode.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 9a50b8052dcf..de9459b4cb94 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -609,7 +609,9 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, */ int ext4_should_retry_alloc(struct super_block *sb, int *retries) { - if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3) + if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || + (*retries)++ > 3 || + !EXT4_SB(sb)->s_journal) return 0; jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 51cdd13e1c31..c7fed5b18745 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2544,7 +2544,7 @@ retry: ext4_journal_stop(handle); - if (mpd.retval == -ENOSPC) { + if ((mpd.retval == -ENOSPC) && sbi->s_journal) { /* commit the transaction which would * free blocks released in the transaction * and try again -- cgit v1.2.3