diff options
Diffstat (limited to 'fs/btrfs/extent-io-tree.c')
-rw-r--r-- | fs/btrfs/extent-io-tree.c | 192 |
1 files changed, 142 insertions, 50 deletions
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 83cb0378096f..9ae9cd1e7035 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -2,6 +2,7 @@ #include <linux/slab.h> #include <trace/events/btrfs.h> +#include "messages.h" #include "ctree.h" #include "extent-io-tree.h" #include "btrfs_inode.h" @@ -57,17 +58,17 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller, struct extent_io_tree *tree, u64 start, u64 end) { - struct inode *inode = tree->private_data; + struct btrfs_inode *inode = tree->inode; u64 isize; if (!inode) return; - isize = i_size_read(inode); + isize = i_size_read(&inode->vfs_inode); if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { - btrfs_debug_rl(BTRFS_I(inode)->root->fs_info, + btrfs_debug_rl(inode->root->fs_info, "%s: ino %llu isize %llu odd range [%llu,%llu]", - caller, btrfs_ino(BTRFS_I(inode)), isize, start, end); + caller, btrfs_ino(inode), isize, start, end); } } #else @@ -93,13 +94,12 @@ struct tree_entry { }; void extent_io_tree_init(struct btrfs_fs_info *fs_info, - struct extent_io_tree *tree, unsigned int owner, - void *private_data) + struct extent_io_tree *tree, unsigned int owner) { tree->fs_info = fs_info; tree->state = RB_ROOT; spin_lock_init(&tree->lock); - tree->private_data = private_data; + tree->inode = NULL; tree->owner = owner; if (owner == IO_TREE_INODE_FILE_EXTENT) lockdep_set_class(&tree->lock, &file_extent_tree_class); @@ -346,9 +346,8 @@ static void merge_state(struct extent_io_tree *tree, struct extent_state *state) other = prev_state(state); if (other && other->end == state->start - 1 && other->state == state->state) { - if (tree->private_data) - btrfs_merge_delalloc_extent(tree->private_data, - state, other); + if (tree->inode) + btrfs_merge_delalloc_extent(tree->inode, state, other); state->start = other->start; rb_erase(&other->rb_node, &tree->state); RB_CLEAR_NODE(&other->rb_node); @@ -357,9 +356,8 @@ static void merge_state(struct extent_io_tree *tree, struct extent_state *state) other = next_state(state); if (other && other->start == state->end + 1 && other->state == state->state) { - if (tree->private_data) - btrfs_merge_delalloc_extent(tree->private_data, state, - other); + if (tree->inode) + btrfs_merge_delalloc_extent(tree->inode, state, other); state->end = other->end; rb_erase(&other->rb_node, &tree->state); RB_CLEAR_NODE(&other->rb_node); @@ -374,8 +372,8 @@ static void set_state_bits(struct extent_io_tree *tree, u32 bits_to_set = bits & ~EXTENT_CTLBITS; int ret; - if (tree->private_data) - btrfs_set_delalloc_extent(tree->private_data, state, bits); + if (tree->inode) + btrfs_set_delalloc_extent(tree->inode, state, bits); ret = add_extent_changeset(state, bits_to_set, changeset, 1); BUG_ON(ret < 0); @@ -397,7 +395,7 @@ static int insert_state(struct extent_io_tree *tree, u32 bits, struct extent_changeset *changeset) { struct rb_node **node; - struct rb_node *parent; + struct rb_node *parent = NULL; const u64 end = state->end; set_state_bits(tree, state, bits, changeset); @@ -462,8 +460,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, struct rb_node *parent = NULL; struct rb_node **node; - if (tree->private_data) - btrfs_split_delalloc_extent(tree->private_data, orig, split); + if (tree->inode) + btrfs_split_delalloc_extent(tree->inode, orig, split); prealloc->start = orig->start; prealloc->end = split - 1; @@ -510,8 +508,8 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree, u32 bits_to_clear = bits & ~EXTENT_CTLBITS; int ret; - if (tree->private_data) - btrfs_clear_delalloc_extent(tree->private_data, state, bits); + if (tree->inode) + btrfs_clear_delalloc_extent(tree->inode, state, bits); ret = add_extent_changeset(state, bits_to_clear, changeset, 0); BUG_ON(ret < 0); @@ -572,7 +570,7 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) clear = 1; again: - if (!prealloc && gfpflags_allow_blocking(mask)) { + if (!prealloc) { /* * Don't care for allocation failure here because we might end * up not needing the pre-allocated extent state at all, which @@ -636,7 +634,8 @@ hit_next: if (state->start < start) { prealloc = alloc_extent_state_atomic(prealloc); - BUG_ON(!prealloc); + if (!prealloc) + goto search_again; err = split_state(tree, state, prealloc, start); if (err) extent_io_tree_panic(tree, err); @@ -657,7 +656,8 @@ hit_next: */ if (state->start <= end && state->end > end) { prealloc = alloc_extent_state_atomic(prealloc); - BUG_ON(!prealloc); + if (!prealloc) + goto search_again; err = split_state(tree, state, prealloc, end + 1); if (err) extent_io_tree_panic(tree, err); @@ -714,7 +714,8 @@ static void wait_on_state(struct extent_io_tree *tree, * The range [start, end] is inclusive. * The tree lock is taken by this function */ -void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits) +void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, + struct extent_state **cached_state) { struct extent_state *state; @@ -722,6 +723,16 @@ void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits) spin_lock(&tree->lock); again: + /* + * Maintain cached_state, as we may not remove it from the tree if there + * are more bits than the bits we're waiting on set on this state. + */ + if (cached_state && *cached_state) { + state = *cached_state; + if (extent_state_in_tree(state) && + state->start <= start && start < state->end) + goto process_node; + } while (1) { /* * This search will find all the extents that end after our @@ -752,6 +763,12 @@ process_node: } } out: + /* This state is no longer useful, clear it and free it up. */ + if (cached_state && *cached_state) { + state = *cached_state; + *cached_state = NULL; + free_extent_state(state); + } spin_unlock(&tree->lock); } @@ -939,13 +956,17 @@ out: * sleeping, so the gfp mask is used to indicate what is allowed. * * If any of the exclusive bits are set, this will fail with -EEXIST if some - * part of the range already has the desired bits set. The start of the - * existing range is returned in failed_start in this case. + * part of the range already has the desired bits set. The extent_state of the + * existing range is returned in failed_state in this case, and the start of the + * existing range is returned in failed_start. failed_state is used as an + * optimization for wait_extent_bit, failed_start must be used as the source of + * truth as failed_state may have changed since we returned. * * [start, end] is inclusive This takes the tree lock. */ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, u64 *failed_start, + struct extent_state **failed_state, struct extent_state **cached_state, struct extent_changeset *changeset, gfp_t mask) { @@ -964,9 +985,9 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, if (exclusive_bits) ASSERT(failed_start); else - ASSERT(failed_start == NULL); + ASSERT(failed_start == NULL && failed_state == NULL); again: - if (!prealloc && gfpflags_allow_blocking(mask)) { + if (!prealloc) { /* * Don't care for allocation failure here because we might end * up not needing the pre-allocated extent state at all, which @@ -991,7 +1012,8 @@ again: state = tree_search_for_insert(tree, start, &p, &parent); if (!state) { prealloc = alloc_extent_state_atomic(prealloc); - BUG_ON(!prealloc); + if (!prealloc) + goto search_again; prealloc->start = start; prealloc->end = end; insert_state_fast(tree, prealloc, p, parent, bits, changeset); @@ -1012,6 +1034,7 @@ hit_next: if (state->start == start && state->end <= end) { if (state->state & exclusive_bits) { *failed_start = state->start; + cache_state(state, failed_state); err = -EEXIST; goto out; } @@ -1047,6 +1070,7 @@ hit_next: if (state->start < start) { if (state->state & exclusive_bits) { *failed_start = start; + cache_state(state, failed_state); err = -EEXIST; goto out; } @@ -1062,7 +1086,8 @@ hit_next: } prealloc = alloc_extent_state_atomic(prealloc); - BUG_ON(!prealloc); + if (!prealloc) + goto search_again; err = split_state(tree, state, prealloc, start); if (err) extent_io_tree_panic(tree, err); @@ -1099,7 +1124,8 @@ hit_next: this_end = last_start - 1; prealloc = alloc_extent_state_atomic(prealloc); - BUG_ON(!prealloc); + if (!prealloc) + goto search_again; /* * Avoid to free 'prealloc' if it can be merged with the later @@ -1125,12 +1151,14 @@ hit_next: if (state->start <= end && state->end > end) { if (state->state & exclusive_bits) { *failed_start = start; + cache_state(state, failed_state); err = -EEXIST; goto out; } prealloc = alloc_extent_state_atomic(prealloc); - BUG_ON(!prealloc); + if (!prealloc) + goto search_again; err = split_state(tree, state, prealloc, end + 1); if (err) extent_io_tree_panic(tree, err); @@ -1162,8 +1190,8 @@ out: int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits, struct extent_state **cached_state, gfp_t mask) { - return __set_extent_bit(tree, start, end, bits, NULL, cached_state, - NULL, mask); + return __set_extent_bit(tree, start, end, bits, NULL, NULL, + cached_state, NULL, mask); } /* @@ -1397,7 +1425,7 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, u32 bits) { struct extent_state *state; - struct extent_state *prev = NULL, *next; + struct extent_state *prev = NULL, *next = NULL; spin_lock(&tree->lock); @@ -1487,15 +1515,37 @@ out: } /* - * Count the number of bytes in the tree that have a given bit(s) set. This - * can be fairly slow, except for EXTENT_DIRTY which is cached. The total - * number found is returned. + * Count the number of bytes in the tree that have a given bit(s) set for a + * given range. + * + * @tree: The io tree to search. + * @start: The start offset of the range. This value is updated to the + * offset of the first byte found with the given bit(s), so it + * can end up being bigger than the initial value. + * @search_end: The end offset (inclusive value) of the search range. + * @max_bytes: The maximum byte count we are interested. The search stops + * once it reaches this count. + * @bits: The bits the range must have in order to be accounted for. + * If multiple bits are set, then only subranges that have all + * the bits set are accounted for. + * @contig: Indicate if we should ignore holes in the range or not. If + * this is true, then stop once we find a hole. + * @cached_state: A cached state to be used across multiple calls to this + * function in order to speedup searches. Use NULL if this is + * called only once or if each call does not start where the + * previous one ended. + * + * Returns the total number of bytes found within the given range that have + * all given bits set. If the returned number of bytes is greater than zero + * then @start is updated with the offset of the first byte with the bits set. */ u64 count_range_bits(struct extent_io_tree *tree, u64 *start, u64 search_end, u64 max_bytes, - u32 bits, int contig) + u32 bits, int contig, + struct extent_state **cached_state) { - struct extent_state *state; + struct extent_state *state = NULL; + struct extent_state *cached; u64 cur_start = *start; u64 total_bytes = 0; u64 last = 0; @@ -1506,11 +1556,41 @@ u64 count_range_bits(struct extent_io_tree *tree, spin_lock(&tree->lock); + if (!cached_state || !*cached_state) + goto search; + + cached = *cached_state; + + if (!extent_state_in_tree(cached)) + goto search; + + if (cached->start <= cur_start && cur_start <= cached->end) { + state = cached; + } else if (cached->start > cur_start) { + struct extent_state *prev; + + /* + * The cached state starts after our search range's start. Check + * if the previous state record starts at or before the range we + * are looking for, and if so, use it - this is a common case + * when there are holes between records in the tree. If there is + * no previous state record, we can start from our cached state. + */ + prev = prev_state(cached); + if (!prev) + state = cached; + else if (prev->start <= cur_start && cur_start <= prev->end) + state = prev; + } + /* * This search will find all the extents that end after our range * starts. */ - state = tree_search(tree, cur_start); +search: + if (!state) + state = tree_search(tree, cur_start); + while (state) { if (state->start > search_end) break; @@ -1531,7 +1611,16 @@ u64 count_range_bits(struct extent_io_tree *tree, } state = next_state(state); } + + if (cached_state) { + free_extent_state(*cached_state); + *cached_state = state; + if (state) + refcount_inc(&state->refs); + } + spin_unlock(&tree->lock); + return total_bytes; } @@ -1598,8 +1687,8 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, */ ASSERT(!(bits & EXTENT_LOCKED)); - return __set_extent_bit(tree, start, end, bits, NULL, NULL, changeset, - GFP_NOFS); + return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, + changeset, GFP_NOFS); } int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, @@ -1615,17 +1704,18 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, changeset); } -int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) +int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, + struct extent_state **cached) { int err; u64 failed_start; err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, - NULL, NULL, GFP_NOFS); + NULL, cached, NULL, GFP_NOFS); if (err == -EEXIST) { if (failed_start > start) clear_extent_bit(tree, start, failed_start - 1, - EXTENT_LOCKED, NULL); + EXTENT_LOCKED, cached); return 0; } return 1; @@ -1638,20 +1728,22 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state) { + struct extent_state *failed_state = NULL; int err; u64 failed_start; err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start, - cached_state, NULL, GFP_NOFS); + &failed_state, cached_state, NULL, GFP_NOFS); while (err == -EEXIST) { if (failed_start != start) clear_extent_bit(tree, start, failed_start - 1, EXTENT_LOCKED, cached_state); - wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); + wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED, + &failed_state); err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, - &failed_start, cached_state, NULL, - GFP_NOFS); + &failed_start, &failed_state, + cached_state, NULL, GFP_NOFS); } return err; } |