From 61d92c328c16419fc96dc50dd16f8b8c695409ec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Oct 2009 19:11:56 -0400 Subject: Btrfs: fix deadlock on async thread startup The btrfs async worker threads are used for a wide variety of things, including processing bio end_io functions. This means that when the endio threads aren't running, the rest of the FS isn't able to do the final processing required to clear PageWriteback. The endio threads also try to exit as they become idle and start more as the work piles up. The problem is that starting more threads means kthreadd may need to allocate ram, and that allocation may wait until the global number of writeback pages on the system is below a certain limit. The result of that throttling is that end IO threads wait on kthreadd, who is waiting on IO to end, which will never happen. This commit fixes the deadlock by handing off thread startup to a dedicated thread. It also fixes a bug where the on-demand thread creation was creating far too many threads because it didn't take into account threads being started by other procs. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8184f2feb2f3..1b920ffc6a59 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -907,6 +907,7 @@ struct btrfs_fs_info { * A third pool does submit_bio to avoid deadlocking with the other * two */ + struct btrfs_workers generic_worker; struct btrfs_workers workers; struct btrfs_workers delalloc_workers; struct btrfs_workers endio_workers; -- cgit v1.2.3 From 32c00aff718bb54a214b39146bdd9ac01511cd25 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 8 Oct 2009 13:34:05 -0400 Subject: Btrfs: release delalloc reservations on extent item insertion This patch fixes an issue with the delalloc metadata space reservation code. The problem is we used to free the reservation as soon as we allocated the delalloc region. The problem with this is if we are not inserting an inline extent, we don't actually insert the extent item until after the ordered extent is written out. This patch does 3 things, 1) It moves the reservation clearing stuff into the ordered code, so when we remove the ordered extent we remove the reservation. 2) It adds a EXTENT_DO_ACCOUNTING flag that gets passed when we clear delalloc bits in the cases where we want to clear the metadata reservation when we clear the delalloc extent, in the case that we do an inline extent or we invalidate the page. 3) It adds another waitqueue to the space info so that when we start a fs wide delalloc flush, anybody else who also hits that area will simply wait for the flush to finish and then try to make their allocation. This has been tested thoroughly to make sure we did not regress on performance. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 12 ++++++----- fs/btrfs/ctree.h | 3 +++ fs/btrfs/extent-tree.c | 54 +++++++++++++++++++++++++++++++++++++++---------- fs/btrfs/extent_io.c | 19 +++++++++++------ fs/btrfs/extent_io.h | 2 ++ fs/btrfs/file.c | 3 ++- fs/btrfs/inode.c | 45 ++++++++++++++++++++++++++++------------- fs/btrfs/ordered-data.c | 6 ++++++ 8 files changed, 107 insertions(+), 37 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index a54d354cefcb..c71abec0ab90 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -128,12 +128,14 @@ struct btrfs_inode { u64 last_unlink_trans; /* - * These two counters are for delalloc metadata reservations. We keep - * track of how many extents we've accounted for vs how many extents we - * have. + * Counters to keep track of the number of extent item's we may use due + * to delalloc and such. outstanding_extents is the number of extent + * items we think we'll end up using, and reserved_extents is the number + * of extent items we've reserved metadata for. */ - int delalloc_reserved_extents; - int delalloc_extents; + spinlock_t accounting_lock; + int reserved_extents; + int outstanding_extents; /* * ordered_data_close is set by truncate when a file that used diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1b920ffc6a59..dbdada569507 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -699,6 +699,9 @@ struct btrfs_space_info { int allocating_chunk; wait_queue_head_t wait; + + int flushing; + wait_queue_head_t flush_wait; }; /* diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2f82fabd7011..3d1be0b77f8f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2823,14 +2823,17 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, num_items); spin_lock(&meta_sinfo->lock); - if (BTRFS_I(inode)->delalloc_reserved_extents <= - BTRFS_I(inode)->delalloc_extents) { + spin_lock(&BTRFS_I(inode)->accounting_lock); + if (BTRFS_I(inode)->reserved_extents <= + BTRFS_I(inode)->outstanding_extents) { + spin_unlock(&BTRFS_I(inode)->accounting_lock); spin_unlock(&meta_sinfo->lock); return 0; } + spin_unlock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->delalloc_reserved_extents--; - BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0); + BTRFS_I(inode)->reserved_extents--; + BUG_ON(BTRFS_I(inode)->reserved_extents < 0); if (meta_sinfo->bytes_delalloc < num_bytes) { bug = true; @@ -2863,6 +2866,37 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) meta_sinfo->force_delalloc = 0; } +static void flush_delalloc(struct btrfs_root *root, + struct btrfs_space_info *info) +{ + bool wait = false; + + spin_lock(&info->lock); + + if (!info->flushing) { + info->flushing = 1; + init_waitqueue_head(&info->flush_wait); + } else { + wait = true; + } + + spin_unlock(&info->lock); + + if (wait) { + wait_event(info->flush_wait, + !info->flushing); + return; + } + + btrfs_start_delalloc_inodes(root); + btrfs_wait_ordered_extents(root, 0); + + spin_lock(&info->lock); + info->flushing = 0; + spin_unlock(&info->lock); + wake_up(&info->flush_wait); +} + static int maybe_allocate_chunk(struct btrfs_root *root, struct btrfs_space_info *info) { @@ -2980,21 +3014,20 @@ again: filemap_flush(inode->i_mapping); goto again; } else if (flushed == 3) { - btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(root, 0); + flush_delalloc(root, meta_sinfo); goto again; } spin_lock(&meta_sinfo->lock); meta_sinfo->bytes_delalloc -= num_bytes; spin_unlock(&meta_sinfo->lock); printk(KERN_ERR "enospc, has %d, reserved %d\n", - BTRFS_I(inode)->delalloc_extents, - BTRFS_I(inode)->delalloc_reserved_extents); + BTRFS_I(inode)->outstanding_extents, + BTRFS_I(inode)->reserved_extents); dump_space_info(meta_sinfo, 0, 0); return -ENOSPC; } - BTRFS_I(inode)->delalloc_reserved_extents++; + BTRFS_I(inode)->reserved_extents++; check_force_delalloc(meta_sinfo); spin_unlock(&meta_sinfo->lock); @@ -3093,8 +3126,7 @@ again: } if (retries == 2) { - btrfs_start_delalloc_inodes(root); - btrfs_wait_ordered_extents(root, 0); + flush_delalloc(root, meta_sinfo); goto again; } spin_lock(&meta_sinfo->lock); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f9708bd01669..96577e8bf9fd 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -460,7 +460,8 @@ static int clear_state_bit(struct extent_io_tree *tree, struct extent_state *state, int bits, int wake, int delete) { - int ret = state->state & bits; + int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; + int ret = state->state & bits_to_clear; if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { u64 range = state->end - state->start + 1; @@ -468,7 +469,7 @@ static int clear_state_bit(struct extent_io_tree *tree, tree->dirty_bytes -= range; } clear_state_cb(tree, state, bits); - state->state &= ~bits; + state->state &= ~bits_to_clear; if (wake) wake_up(&state->wq); if (delete || state->state == 0) { @@ -956,7 +957,8 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { return clear_extent_bit(tree, start, end, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); } @@ -1419,9 +1421,13 @@ int extent_clear_unlock_delalloc(struct inode *inode, if (op & EXTENT_CLEAR_DELALLOC) clear_bits |= EXTENT_DELALLOC; + if (op & EXTENT_CLEAR_ACCOUNTING) + clear_bits |= EXTENT_DO_ACCOUNTING; + clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); - if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK | - EXTENT_END_WRITEBACK | EXTENT_SET_PRIVATE2))) + if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | + EXTENT_SET_PRIVATE2))) return 0; while (nr_pages > 0) { @@ -2709,7 +2715,8 @@ int extent_invalidatepage(struct extent_io_tree *tree, lock_extent(tree, start, end, GFP_NOFS); wait_on_page_writeback(page); clear_extent_bit(tree, start, end, - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); return 0; } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 41d2a47ecf38..36de250a7b2b 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -15,6 +15,7 @@ #define EXTENT_BUFFER_FILLED (1 << 8) #define EXTENT_BOUNDARY (1 << 9) #define EXTENT_NODATASUM (1 << 10) +#define EXTENT_DO_ACCOUNTING (1 << 11) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* flags for bio submission */ @@ -33,6 +34,7 @@ #define EXTENT_SET_WRITEBACK 0x10 #define EXTENT_END_WRITEBACK 0x20 #define EXTENT_SET_PRIVATE2 0x40 +#define EXTENT_CLEAR_ACCOUNTING 0x80 /* * page->private values. Every page that is controlled by the extent diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index f155179877a6..53fb1c997f0e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -878,7 +878,8 @@ again: btrfs_put_ordered_extent(ordered); clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, - last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, + last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING, GFP_NOFS); unlock_extent(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, GFP_NOFS); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 401dfb2a94e8..ccc4f1121210 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -428,6 +428,7 @@ again: start, end, NULL, EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_ACCOUNTING | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); ret = 0; goto free_pages_out; @@ -722,6 +723,7 @@ static noinline int cow_file_range(struct inode *inode, EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_ACCOUNTING | EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); @@ -1195,15 +1197,17 @@ static int btrfs_split_extent_hook(struct inode *inode, root->fs_info->max_extent); /* - * if we break a large extent up then leave delalloc_extents be, - * since we've already accounted for the large extent. + * if we break a large extent up then leave oustanding_extents + * be, since we've already accounted for the large extent. */ if (div64_u64(new_size + root->fs_info->max_extent - 1, root->fs_info->max_extent) < num_extents) return 0; } - BTRFS_I(inode)->delalloc_extents++; + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->accounting_lock); return 0; } @@ -1234,7 +1238,9 @@ static int btrfs_merge_extent_hook(struct inode *inode, /* we're not bigger than the max, unreserve the space and go */ if (new_size <= root->fs_info->max_extent) { - BTRFS_I(inode)->delalloc_extents--; + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->accounting_lock); return 0; } @@ -1248,7 +1254,9 @@ static int btrfs_merge_extent_hook(struct inode *inode, root->fs_info->max_extent) > num_extents) return 0; - BTRFS_I(inode)->delalloc_extents--; + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->accounting_lock); return 0; } @@ -1270,7 +1278,9 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; - BTRFS_I(inode)->delalloc_extents++; + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_delalloc_reserve_space(root, inode, end - start + 1); spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; @@ -1298,8 +1308,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; - BTRFS_I(inode)->delalloc_extents--; - btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + if (bits & EXTENT_DO_ACCOUNTING) { + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->accounting_lock); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + } spin_lock(&root->fs_info->delalloc_lock); if (state->end - state->start + 1 > @@ -4825,7 +4839,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ clear_extent_bit(tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); + EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, + NULL, GFP_NOFS); /* * whoever cleared the private bit is responsible * for the finish_ordered_io @@ -4838,8 +4853,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) lock_extent(tree, page_start, page_end, GFP_NOFS); } clear_extent_bit(tree, page_start, page_end, - EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, - 1, 1, NULL, GFP_NOFS); + EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); __btrfs_releasepage(page, GFP_NOFS); ClearPageChecked(page); @@ -4934,7 +4949,8 @@ again: * prepare_pages in the normal write path. */ clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, - EXTENT_DIRTY | EXTENT_DELALLOC, GFP_NOFS); + EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, + GFP_NOFS); ret = btrfs_set_extent_delalloc(inode, page_start, page_end); if (ret) { @@ -5082,8 +5098,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) return NULL; ei->last_trans = 0; ei->logged_trans = 0; - ei->delalloc_extents = 0; - ei->delalloc_reserved_extents = 0; + ei->outstanding_extents = 0; + ei->reserved_extents = 0; + spin_lock_init(&ei->accounting_lock); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->ordered_operations); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 4a9c8c4cec25..ab21c29f2247 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -306,6 +306,12 @@ int btrfs_remove_ordered_extent(struct inode *inode, tree->last = NULL; set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->accounting_lock); + btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, + inode, 1); + spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); list_del_init(&entry->root_extent_list); -- cgit v1.2.3 From e3ccfa989752c083ceb23c823a84f7ce3a081e61 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 7 Oct 2009 20:44:34 -0400 Subject: Btrfs: async delalloc flushing under space pressure This patch moves the delalloc flushing that occurs when we are under space pressure off to a async thread pool. This helps since we only free up metadata space when we actually insert the extent item, which means it takes quite a while for space to be free'ed up if we wait on all ordered extents. However, if space is freed up due to inline extents being inserted, we can wake people who are waiting up early, and they can finish their work. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 13 ++++---- fs/btrfs/disk-io.c | 6 ++++ fs/btrfs/extent-tree.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 88 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index dbdada569507..a362dd617e97 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -691,17 +691,17 @@ struct btrfs_space_info { struct list_head list; + /* for controlling how we free up space for allocations */ + wait_queue_head_t allocate_wait; + wait_queue_head_t flush_wait; + int allocating_chunk; + int flushing; + /* for block groups in our same type */ struct list_head block_groups; spinlock_t lock; struct rw_semaphore groups_sem; atomic_t caching_threads; - - int allocating_chunk; - wait_queue_head_t wait; - - int flushing; - wait_queue_head_t flush_wait; }; /* @@ -918,6 +918,7 @@ struct btrfs_fs_info { struct btrfs_workers endio_meta_write_workers; struct btrfs_workers endio_write_workers; struct btrfs_workers submit_workers; + struct btrfs_workers enospc_workers; /* * fixup workers take dirty pages that didn't properly go through * the cow mechanism and make them safe to write. It happens diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9903f042765d..ac8927bdc33d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1762,6 +1762,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, min_t(u64, fs_devices->num_devices, fs_info->thread_pool_size), &fs_info->generic_worker); + btrfs_init_workers(&fs_info->enospc_workers, "enospc", + fs_info->thread_pool_size, + &fs_info->generic_worker); /* a higher idle thresh on the submit workers makes it much more * likely that bios will be send down in a sane order to the @@ -1809,6 +1812,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->endio_meta_workers, 1); btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); btrfs_start_workers(&fs_info->endio_write_workers, 1); + btrfs_start_workers(&fs_info->enospc_workers, 1); fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, @@ -2023,6 +2027,7 @@ fail_sb_buffer: btrfs_stop_workers(&fs_info->endio_meta_write_workers); btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); + btrfs_stop_workers(&fs_info->enospc_workers); fail_iput: invalidate_inode_pages2(fs_info->btree_inode->i_mapping); iput(fs_info->btree_inode); @@ -2449,6 +2454,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->endio_meta_write_workers); btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); + btrfs_stop_workers(&fs_info->enospc_workers); btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3d1be0b77f8f..53026806ae9e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2866,9 +2866,66 @@ static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) meta_sinfo->force_delalloc = 0; } +struct async_flush { + struct btrfs_root *root; + struct btrfs_space_info *info; + struct btrfs_work work; +}; + +static noinline void flush_delalloc_async(struct btrfs_work *work) +{ + struct async_flush *async; + struct btrfs_root *root; + struct btrfs_space_info *info; + + async = container_of(work, struct async_flush, work); + root = async->root; + info = async->info; + + btrfs_start_delalloc_inodes(root); + wake_up(&info->flush_wait); + btrfs_wait_ordered_extents(root, 0); + + spin_lock(&info->lock); + info->flushing = 0; + spin_unlock(&info->lock); + wake_up(&info->flush_wait); + + kfree(async); +} + +static void wait_on_flush(struct btrfs_space_info *info) +{ + DEFINE_WAIT(wait); + u64 used; + + while (1) { + prepare_to_wait(&info->flush_wait, &wait, + TASK_UNINTERRUPTIBLE); + spin_lock(&info->lock); + if (!info->flushing) { + spin_unlock(&info->lock); + break; + } + + used = info->bytes_used + info->bytes_reserved + + info->bytes_pinned + info->bytes_readonly + + info->bytes_super + info->bytes_root + + info->bytes_may_use + info->bytes_delalloc; + if (used < info->total_bytes) { + spin_unlock(&info->lock); + break; + } + spin_unlock(&info->lock); + schedule(); + } + finish_wait(&info->flush_wait, &wait); +} + static void flush_delalloc(struct btrfs_root *root, struct btrfs_space_info *info) { + struct async_flush *async; bool wait = false; spin_lock(&info->lock); @@ -2883,11 +2940,24 @@ static void flush_delalloc(struct btrfs_root *root, spin_unlock(&info->lock); if (wait) { - wait_event(info->flush_wait, - !info->flushing); + wait_on_flush(info); return; } + async = kzalloc(sizeof(*async), GFP_NOFS); + if (!async) + goto flush; + + async->root = root; + async->info = info; + async->work.func = flush_delalloc_async; + + btrfs_queue_worker(&root->fs_info->enospc_workers, + &async->work); + wait_on_flush(info); + return; + +flush: btrfs_start_delalloc_inodes(root); btrfs_wait_ordered_extents(root, 0); @@ -2927,7 +2997,7 @@ static int maybe_allocate_chunk(struct btrfs_root *root, if (!info->allocating_chunk) { info->force_alloc = 1; info->allocating_chunk = 1; - init_waitqueue_head(&info->wait); + init_waitqueue_head(&info->allocate_wait); } else { wait = true; } @@ -2935,7 +3005,7 @@ static int maybe_allocate_chunk(struct btrfs_root *root, spin_unlock(&info->lock); if (wait) { - wait_event(info->wait, + wait_event(info->allocate_wait, !info->allocating_chunk); return 1; } @@ -2956,7 +3026,7 @@ out: spin_lock(&info->lock); info->allocating_chunk = 0; spin_unlock(&info->lock); - wake_up(&info->wait); + wake_up(&info->allocate_wait); if (ret) return 0; -- cgit v1.2.3 From ff782e0a131c7f669445c07fe5c7ba91e043b7ed Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 8 Oct 2009 15:30:04 -0400 Subject: Btrfs: optimize fsync for the single writer case This patch optimizes the tree logging stuff so it doesn't always wait 1 jiffie for new people to join the logging transaction if there is only ever 1 writer. This helps a little bit with latency where we have something like RPM where it will fdatasync every file it writes, and so waiting the 1 jiffie for every fdatasync really starts to add up. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/tree-log.c | 12 +++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a362dd617e97..16ddb19dda86 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1010,6 +1010,8 @@ struct btrfs_root { atomic_t log_commit[2]; unsigned long log_transid; unsigned long log_batch; + pid_t log_start_pid; + bool log_multiple_pids; u64 objectid; u64 last_trans; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4d7d9abef42f..78f6254ac2d9 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -137,11 +137,20 @@ static int start_log_trans(struct btrfs_trans_handle *trans, mutex_lock(&root->log_mutex); if (root->log_root) { + if (!root->log_start_pid) { + root->log_start_pid = current->pid; + root->log_multiple_pids = false; + } else if (root->log_start_pid != current->pid) { + root->log_multiple_pids = true; + } + root->log_batch++; atomic_inc(&root->log_writers); mutex_unlock(&root->log_mutex); return 0; } + root->log_multiple_pids = false; + root->log_start_pid = current->pid; mutex_lock(&root->fs_info->tree_log_mutex); if (!root->fs_info->log_root_tree) { ret = btrfs_init_log_root_tree(trans, root->fs_info); @@ -1985,7 +1994,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (atomic_read(&root->log_commit[(index1 + 1) % 2])) wait_log_commit(trans, root, root->log_transid - 1); - while (1) { + while (root->log_multiple_pids) { unsigned long batch = root->log_batch; mutex_unlock(&root->log_mutex); schedule_timeout_uninterruptible(1); @@ -2011,6 +2020,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, root->log_batch = 0; root->log_transid++; log->log_transid = root->log_transid; + root->log_start_pid = 0; smp_mb(); /* * log tree has been flushed to disk, new modifications of -- cgit v1.2.3 From 82d339d9b3a6395f17d3253887653250b693b74b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 9 Oct 2009 09:54:36 -0400 Subject: Btrfs: constify dentry_operations Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 16ddb19dda86..36a19cd43e03 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2330,7 +2330,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); void btrfs_orphan_cleanup(struct btrfs_root *root); int btrfs_cont_expand(struct inode *inode, loff_t size); int btrfs_invalidate_inodes(struct btrfs_root *root); -extern struct dentry_operations btrfs_dentry_operations; +extern const struct dentry_operations btrfs_dentry_operations; /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3585cb6483c..d960492d3d88 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5840,6 +5840,6 @@ static struct inode_operations btrfs_symlink_inode_operations = { .removexattr = btrfs_removexattr, }; -struct dentry_operations btrfs_dentry_operations = { +const struct dentry_operations btrfs_dentry_operations = { .d_delete = btrfs_dentry_delete, }; -- cgit v1.2.3