From d4cf109f05ff04c6f5065c3e14165ef01a57dd53 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 6 Feb 2009 14:59:26 -0600 Subject: vfs: Don't call attach_nobh_buffers() with an empty list This is a modification of a patch by Bill Pemberton nobh_write_end() could call attach_nobh_buffers() with head == NULL. This would result in a trap when attach_nobh_buffers() attempted to access bh->b_this_page. This can be illustrated by running the writev01 testcase from LTP on jfs. This error was introduced by commit 5b41e74a "vfs: fix data leak in nobh_write_end()". That patch did not take into account that if PageMappedToDisk() is true upon entry to nobh_write_begin(), then no buffers will be allocated for the page. In that case, we won't have to worry about a failed write leaving unitialized data in the page. Of course, head != NULL implies !page_has_buffers(page), so no need to test both. Signed-off-by: Dave Kleikamp Cc: Bill Pemberton Cc: Dmitri Monakhov Signed-off-by: Linus Torvalds --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index b58208f1640a..665d446b25bc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2688,7 +2688,7 @@ int nobh_write_end(struct file *file, struct address_space *mapping, struct buffer_head *bh; BUG_ON(fsdata != NULL && page_has_buffers(page)); - if (unlikely(copied < len) && !page_has_buffers(page)) + if (unlikely(copied < len) && head) attach_nobh_buffers(page, head); if (page_has_buffers(page)) return generic_write_end(file, mapping, pos, len, -- cgit v1.2.3 From 78f707bfc723552e8309b7c38a8d0cc51012e813 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Feb 2009 13:59:08 +0100 Subject: block: revert part of 18ce3751ccd488c78d3827e9f6bf54e6322676fb The above commit added WRITE_SYNC and switched various places to using that for committing writes that will be waited upon immediately after submission. However, this causes a performance regression with AS and CFQ for ext3 at least, since sync_dirty_buffer() will submit some writes with WRITE_SYNC while ext3 has sumitted others dependent writes without the sync flag set. This causes excessive anticipation/idling in the IO scheduler because sync and async writes get interleaved, causing a big performance regression for the below test case (which is meant to simulate sqlite like behaviour). ---- test case ---- int main(int argc, char **argv) { int fdes, i; FILE *fp; struct timeval start; struct timeval end; struct timeval res; gettimeofday(&start, NULL); for (i=0; i --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 665d446b25bc..62b57e330b69 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3108,7 +3108,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE_SYNC, bh); + ret = submit_bh(WRITE, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); -- cgit v1.2.3 From 1cf6e7d83bf334cc5916137862c920a97aabc018 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 18 Feb 2009 14:48:18 -0800 Subject: mm: task dirty accounting fix YAMAMOTO-san noticed that task_dirty_inc doesn't seem to be called properly for cases where set_page_dirty is not used to dirty a page (eg. mark_buffer_dirty). Additionally, there is some inconsistency about when task_dirty_inc is called. It is used for dirty balancing, however it even gets called for __set_page_dirty_no_writeback. So rather than increment it in a set_page_dirty wrapper, move it down to exactly where the dirty page accounting stats are incremented. Cc: YAMAMOTO Takashi Signed-off-by: Nick Piggin Acked-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 1 + include/linux/mm.h | 1 + mm/page-writeback.c | 13 +++---------- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 665d446b25bc..ff4d1cdd779b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -777,6 +777,7 @@ static int __set_page_dirty(struct page *page, __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + task_dirty_inc(current); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, diff --git a/include/linux/mm.h b/include/linux/mm.h index 7dc04ff5ab89..10074212a35b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1159,6 +1159,7 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); /* mm/page-writeback.c */ int write_one_page(struct page *page, int wait); +void task_dirty_inc(struct task_struct *tsk); /* readahead.c */ #define VM_MAX_READAHEAD 128 /* kbytes */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3c84128596ba..74dc57c74349 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -240,7 +240,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi) } EXPORT_SYMBOL_GPL(bdi_writeout_inc); -static inline void task_dirty_inc(struct task_struct *tsk) +void task_dirty_inc(struct task_struct *tsk) { prop_inc_single(&vm_dirties, &tsk->dirties); } @@ -1230,6 +1230,7 @@ int __set_page_dirty_nobuffers(struct page *page) __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); + task_dirty_inc(current); task_io_account_write(PAGE_CACHE_SIZE); } radix_tree_tag_set(&mapping->page_tree, @@ -1262,7 +1263,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage); * If the mapping doesn't provide a set_page_dirty a_op, then * just fall through and assume that it wants buffer_heads. */ -static int __set_page_dirty(struct page *page) +int set_page_dirty(struct page *page) { struct address_space *mapping = page_mapping(page); @@ -1280,14 +1281,6 @@ static int __set_page_dirty(struct page *page) } return 0; } - -int set_page_dirty(struct page *page) -{ - int ret = __set_page_dirty(page); - if (ret) - task_dirty_inc(current); - return ret; -} EXPORT_SYMBOL(set_page_dirty); /* -- cgit v1.2.3