diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 856 |
1 files changed, 394 insertions, 462 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e2d7e86b51d1..7208ecef7088 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/ratelimit.h> #include <linux/percpu_counter.h> +#include <linux/lockdep.h> #include "hash.h" #include "tree-log.h" #include "disk-io.h" @@ -38,6 +39,7 @@ #include "math.h" #include "sysfs.h" #include "qgroup.h" +#include "ref-verify.h" #undef SCRAMBLE_DELAYED_REFS @@ -61,9 +63,6 @@ enum { CHUNK_ALLOC_FORCE = 2, }; -static int update_block_group(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 num_bytes, int alloc); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_node *node, u64 parent, @@ -91,17 +90,8 @@ static int find_next_key(struct btrfs_path *path, int level, static void dump_space_info(struct btrfs_fs_info *fs_info, struct btrfs_space_info *info, u64 bytes, int dump_block_groups); -static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 ram_bytes, u64 num_bytes, int delalloc); -static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int delalloc); static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes); -static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - u64 orig_bytes, - enum btrfs_reserve_flush_enum flush, - bool system_chunk); static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 num_bytes); @@ -652,7 +642,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, cache->cached = BTRFS_CACHE_FAST; spin_unlock(&cache->lock); - if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { + if (btrfs_test_opt(fs_info, SPACE_CACHE)) { mutex_lock(&caching_ctl->mutex); ret = load_free_space_cache(fs_info, cache); @@ -923,7 +913,7 @@ search_again: head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); if (head) { if (!mutex_trylock(&head->mutex)) { - refcount_inc(&head->node.refs); + refcount_inc(&head->refs); spin_unlock(&delayed_refs->lock); btrfs_release_path(path); @@ -934,7 +924,7 @@ search_again: */ mutex_lock(&head->mutex); mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(&head->node); + btrfs_put_delayed_ref_head(head); goto search_again; } spin_lock(&head->lock); @@ -943,7 +933,7 @@ search_again: else BUG_ON(num_refs == 0); - num_refs += head->node.ref_mod; + num_refs += head->ref_mod; spin_unlock(&head->lock); mutex_unlock(&head->mutex); } @@ -2189,16 +2179,20 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, /* Can return -ENOMEM */ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, + struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, u64 offset) { + struct btrfs_fs_info *fs_info = root->fs_info; int old_ref_mod, new_ref_mod; int ret; BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && root_objectid == BTRFS_TREE_LOG_OBJECTID); + btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid, + owner, offset, BTRFS_ADD_DELAYED_REF); + if (owner < BTRFS_FIRST_FREE_OBJECTID) { ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, num_bytes, parent, @@ -2344,7 +2338,7 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, static int run_delayed_extent_op(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_node *node, + struct btrfs_delayed_ref_head *head, struct btrfs_delayed_extent_op *extent_op) { struct btrfs_key key; @@ -2366,14 +2360,14 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - key.objectid = node->bytenr; + key.objectid = head->bytenr; if (metadata) { key.type = BTRFS_METADATA_ITEM_KEY; key.offset = extent_op->level; } else { key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = node->num_bytes; + key.offset = head->num_bytes; } again: @@ -2390,17 +2384,17 @@ again: path->slots[0]--; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.objectid == node->bytenr && + if (key.objectid == head->bytenr && key.type == BTRFS_EXTENT_ITEM_KEY && - key.offset == node->num_bytes) + key.offset == head->num_bytes) ret = 0; } if (ret > 0) { btrfs_release_path(path); metadata = 0; - key.objectid = node->bytenr; - key.offset = node->num_bytes; + key.objectid = head->bytenr; + key.offset = head->num_bytes; key.type = BTRFS_EXTENT_ITEM_KEY; goto again; } @@ -2507,44 +2501,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, return 0; } - if (btrfs_delayed_ref_is_head(node)) { - struct btrfs_delayed_ref_head *head; - /* - * we've hit the end of the chain and we were supposed - * to insert this extent into the tree. But, it got - * deleted before we ever needed to insert it, so all - * we have to do is clean up the accounting - */ - BUG_ON(extent_op); - head = btrfs_delayed_node_to_head(node); - trace_run_delayed_ref_head(fs_info, node, head, node->action); - - if (head->total_ref_mod < 0) { - struct btrfs_block_group_cache *cache; - - cache = btrfs_lookup_block_group(fs_info, node->bytenr); - ASSERT(cache); - percpu_counter_add(&cache->space_info->total_bytes_pinned, - -node->num_bytes); - btrfs_put_block_group(cache); - } - - if (insert_reserved) { - btrfs_pin_extent(fs_info, node->bytenr, - node->num_bytes, 1); - if (head->is_data) { - ret = btrfs_del_csums(trans, fs_info, - node->bytenr, - node->num_bytes); - } - } - - /* Also free its reserved qgroup space */ - btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root, - head->qgroup_reserved); - return ret; - } - if (node->type == BTRFS_TREE_BLOCK_REF_KEY || node->type == BTRFS_SHARED_BLOCK_REF_KEY) ret = run_delayed_tree_ref(trans, fs_info, node, extent_op, @@ -2563,7 +2519,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head) { struct btrfs_delayed_ref_node *ref; - if (list_empty(&head->ref_list)) + if (RB_EMPTY_ROOT(&head->ref_tree)) return NULL; /* @@ -2576,12 +2532,114 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head) return list_first_entry(&head->ref_add_list, struct btrfs_delayed_ref_node, add_list); - ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, - list); + ref = rb_entry(rb_first(&head->ref_tree), + struct btrfs_delayed_ref_node, ref_node); ASSERT(list_empty(&ref->add_list)); return ref; } +static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head) +{ + spin_lock(&delayed_refs->lock); + head->processing = 0; + delayed_refs->num_heads_ready++; + spin_unlock(&delayed_refs->lock); + btrfs_delayed_ref_unlock(head); +} + +static int cleanup_extent_op(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_head *head) +{ + struct btrfs_delayed_extent_op *extent_op = head->extent_op; + int ret; + + if (!extent_op) + return 0; + head->extent_op = NULL; + if (head->must_insert_reserved) { + btrfs_free_delayed_extent_op(extent_op); + return 0; + } + spin_unlock(&head->lock); + ret = run_delayed_extent_op(trans, fs_info, head, extent_op); + btrfs_free_delayed_extent_op(extent_op); + return ret ? ret : 1; +} + +static int cleanup_ref_head(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_head *head) +{ + struct btrfs_delayed_ref_root *delayed_refs; + int ret; + + delayed_refs = &trans->transaction->delayed_refs; + + ret = cleanup_extent_op(trans, fs_info, head); + if (ret < 0) { + unselect_delayed_ref_head(delayed_refs, head); + btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); + return ret; + } else if (ret) { + return ret; + } + + /* + * Need to drop our head ref lock and re-acquire the delayed ref lock + * and then re-check to make sure nobody got added. + */ + spin_unlock(&head->lock); + spin_lock(&delayed_refs->lock); + spin_lock(&head->lock); + if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) { + spin_unlock(&head->lock); + spin_unlock(&delayed_refs->lock); + return 1; + } + delayed_refs->num_heads--; + rb_erase(&head->href_node, &delayed_refs->href_root); + RB_CLEAR_NODE(&head->href_node); + spin_unlock(&delayed_refs->lock); + spin_unlock(&head->lock); + atomic_dec(&delayed_refs->num_entries); + + trace_run_delayed_ref_head(fs_info, head, 0); + + if (head->total_ref_mod < 0) { + struct btrfs_block_group_cache *cache; + + cache = btrfs_lookup_block_group(fs_info, head->bytenr); + ASSERT(cache); + percpu_counter_add(&cache->space_info->total_bytes_pinned, + -head->num_bytes); + btrfs_put_block_group(cache); + + if (head->is_data) { + spin_lock(&delayed_refs->lock); + delayed_refs->pending_csums -= head->num_bytes; + spin_unlock(&delayed_refs->lock); + } + } + + if (head->must_insert_reserved) { + btrfs_pin_extent(fs_info, head->bytenr, + head->num_bytes, 1); + if (head->is_data) { + ret = btrfs_del_csums(trans, fs_info, head->bytenr, + head->num_bytes); + } + } + + /* Also free its reserved qgroup space */ + btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root, + head->qgroup_reserved); + btrfs_delayed_ref_unlock(head); + btrfs_put_delayed_ref_head(head); + return 0; +} + /* * Returns 0 on success or if called with an already aborted transaction. * Returns -ENOMEM or -EIO on failure and will abort the transaction. @@ -2655,11 +2713,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (ref && ref->seq && btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { spin_unlock(&locked_ref->lock); - spin_lock(&delayed_refs->lock); - locked_ref->processing = 0; - delayed_refs->num_heads_ready++; - spin_unlock(&delayed_refs->lock); - btrfs_delayed_ref_unlock(locked_ref); + unselect_delayed_ref_head(delayed_refs, locked_ref); locked_ref = NULL; cond_resched(); count++; @@ -2667,102 +2721,55 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, } /* - * record the must insert reserved flag before we - * drop the spin lock. + * We're done processing refs in this ref_head, clean everything + * up and move on to the next ref_head. */ - must_insert_reserved = locked_ref->must_insert_reserved; - locked_ref->must_insert_reserved = 0; - - extent_op = locked_ref->extent_op; - locked_ref->extent_op = NULL; - if (!ref) { - - - /* All delayed refs have been processed, Go ahead - * and send the head node to run_one_delayed_ref, - * so that any accounting fixes can happen - */ - ref = &locked_ref->node; - - if (extent_op && must_insert_reserved) { - btrfs_free_delayed_extent_op(extent_op); - extent_op = NULL; - } - - if (extent_op) { - spin_unlock(&locked_ref->lock); - ret = run_delayed_extent_op(trans, fs_info, - ref, extent_op); - btrfs_free_delayed_extent_op(extent_op); - - if (ret) { - /* - * Need to reset must_insert_reserved if - * there was an error so the abort stuff - * can cleanup the reserved space - * properly. - */ - if (must_insert_reserved) - locked_ref->must_insert_reserved = 1; - spin_lock(&delayed_refs->lock); - locked_ref->processing = 0; - delayed_refs->num_heads_ready++; - spin_unlock(&delayed_refs->lock); - btrfs_debug(fs_info, - "run_delayed_extent_op returned %d", - ret); - btrfs_delayed_ref_unlock(locked_ref); - return ret; - } + ret = cleanup_ref_head(trans, fs_info, locked_ref); + if (ret > 0 ) { + /* We dropped our lock, we need to loop. */ + ret = 0; continue; + } else if (ret) { + return ret; } + locked_ref = NULL; + count++; + continue; + } - /* - * Need to drop our head ref lock and re-acquire the - * delayed ref lock and then re-check to make sure - * nobody got added. - */ - spin_unlock(&locked_ref->lock); - spin_lock(&delayed_refs->lock); - spin_lock(&locked_ref->lock); - if (!list_empty(&locked_ref->ref_list) || - locked_ref->extent_op) { - spin_unlock(&locked_ref->lock); - spin_unlock(&delayed_refs->lock); - continue; - } - ref->in_tree = 0; - delayed_refs->num_heads--; - rb_erase(&locked_ref->href_node, - &delayed_refs->href_root); - spin_unlock(&delayed_refs->lock); - } else { - actual_count++; - ref->in_tree = 0; - list_del(&ref->list); - if (!list_empty(&ref->add_list)) - list_del(&ref->add_list); + actual_count++; + ref->in_tree = 0; + rb_erase(&ref->ref_node, &locked_ref->ref_tree); + RB_CLEAR_NODE(&ref->ref_node); + if (!list_empty(&ref->add_list)) + list_del(&ref->add_list); + /* + * When we play the delayed ref, also correct the ref_mod on + * head + */ + switch (ref->action) { + case BTRFS_ADD_DELAYED_REF: + case BTRFS_ADD_DELAYED_EXTENT: + locked_ref->ref_mod -= ref->ref_mod; + break; + case BTRFS_DROP_DELAYED_REF: + locked_ref->ref_mod += ref->ref_mod; + break; + default: + WARN_ON(1); } atomic_dec(&delayed_refs->num_entries); - if (!btrfs_delayed_ref_is_head(ref)) { - /* - * when we play the delayed ref, also correct the - * ref_mod on head - */ - switch (ref->action) { - case BTRFS_ADD_DELAYED_REF: - case BTRFS_ADD_DELAYED_EXTENT: - locked_ref->node.ref_mod -= ref->ref_mod; - break; - case BTRFS_DROP_DELAYED_REF: - locked_ref->node.ref_mod += ref->ref_mod; - break; - default: - WARN_ON(1); - } - } + /* + * Record the must-insert_reserved flag before we drop the spin + * lock. + */ + must_insert_reserved = locked_ref->must_insert_reserved; + locked_ref->must_insert_reserved = 0; + + extent_op = locked_ref->extent_op; + locked_ref->extent_op = NULL; spin_unlock(&locked_ref->lock); ret = run_one_delayed_ref(trans, fs_info, ref, extent_op, @@ -2770,33 +2777,13 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, btrfs_free_delayed_extent_op(extent_op); if (ret) { - spin_lock(&delayed_refs->lock); - locked_ref->processing = 0; - delayed_refs->num_heads_ready++; - spin_unlock(&delayed_refs->lock); - btrfs_delayed_ref_unlock(locked_ref); + unselect_delayed_ref_head(delayed_refs, locked_ref); btrfs_put_delayed_ref(ref); btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret); return ret; } - /* - * If this node is a head, that means all the refs in this head - * have been dealt with, and we will pick the next head to deal - * with, so we must unlock the head and drop it from the cluster - * list before we release it. - */ - if (btrfs_delayed_ref_is_head(ref)) { - if (locked_ref->is_data && - locked_ref->total_ref_mod < 0) { - spin_lock(&delayed_refs->lock); - delayed_refs->pending_csums -= ref->num_bytes; - spin_unlock(&delayed_refs->lock); - } - btrfs_delayed_ref_unlock(locked_ref); - locked_ref = NULL; - } btrfs_put_delayed_ref(ref); count++; cond_resched(); @@ -3100,33 +3087,16 @@ again: spin_unlock(&delayed_refs->lock); goto out; } + head = rb_entry(node, struct btrfs_delayed_ref_head, + href_node); + refcount_inc(&head->refs); + spin_unlock(&delayed_refs->lock); - while (node) { - head = rb_entry(node, struct btrfs_delayed_ref_head, - href_node); - if (btrfs_delayed_ref_is_head(&head->node)) { - struct btrfs_delayed_ref_node *ref; - - ref = &head->node; - refcount_inc(&ref->refs); - - spin_unlock(&delayed_refs->lock); - /* - * Mutex was contended, block until it's - * released and try again - */ - mutex_lock(&head->mutex); - mutex_unlock(&head->mutex); + /* Mutex was contended, block until it's released and retry. */ + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(ref); - cond_resched(); - goto again; - } else { - WARN_ON(1); - } - node = rb_next(node); - } - spin_unlock(&delayed_refs->lock); + btrfs_put_delayed_ref_head(head); cond_resched(); goto again; } @@ -3169,6 +3139,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root, struct btrfs_delayed_data_ref *data_ref; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_transaction *cur_trans; + struct rb_node *node; int ret = 0; cur_trans = root->fs_info->running_transaction; @@ -3184,7 +3155,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root, } if (!mutex_trylock(&head->mutex)) { - refcount_inc(&head->node.refs); + refcount_inc(&head->refs); spin_unlock(&delayed_refs->lock); btrfs_release_path(path); @@ -3195,13 +3166,18 @@ static noinline int check_delayed_ref(struct btrfs_root *root, */ mutex_lock(&head->mutex); mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(&head->node); + btrfs_put_delayed_ref_head(head); return -EAGAIN; } spin_unlock(&delayed_refs->lock); spin_lock(&head->lock); - list_for_each_entry(ref, &head->ref_list, list) { + /* + * XXX: We should replace this with a proper search function in the + * future. + */ + for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) { + ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); /* If it's a shared ref we know a cross reference exists */ if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) { ret = 1; @@ -3351,7 +3327,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, int level; int ret = 0; int (*process_func)(struct btrfs_trans_handle *, - struct btrfs_fs_info *, + struct btrfs_root *, u64, u64, u64, u64, u64, u64); @@ -3391,7 +3367,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); key.offset -= btrfs_file_extent_offset(buf, fi); - ret = process_func(trans, fs_info, bytenr, num_bytes, + ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, key.objectid, key.offset); if (ret) @@ -3399,7 +3375,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, } else { bytenr = btrfs_node_blockptr(buf, i); num_bytes = fs_info->nodesize; - ret = process_func(trans, fs_info, bytenr, num_bytes, + ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, level - 1, 0); if (ret) goto fail; @@ -4016,16 +3992,9 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) btrfs_put_block_group(bg); } -static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a) -{ - schedule(); - return 0; -} - void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) { - wait_on_atomic_t(&bg->nocow_writers, - btrfs_wait_nocow_writers_atomic_t, + wait_on_atomic_t(&bg->nocow_writers, atomic_t_wait, TASK_UNINTERRUPTIBLE); } @@ -4843,7 +4812,6 @@ static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, u64 orig, bool wait_ordered) { - struct btrfs_block_rsv *block_rsv; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; u64 delalloc_bytes; @@ -4859,8 +4827,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, to_reclaim = items * EXTENT_SIZE_PER_ITEM; trans = (struct btrfs_trans_handle *)current->journal_info; - block_rsv = &fs_info->delalloc_block_rsv; - space_info = block_rsv->space_info; + space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); delalloc_bytes = percpu_counter_sum_positive( &fs_info->delalloc_bytes); @@ -4919,6 +4886,13 @@ skip_async: } } +struct reserve_ticket { + u64 bytes; + int error; + struct list_head list; + wait_queue_head_t wait; +}; + /** * maybe_commit_transaction - possibly commit the transaction if its ok to * @root - the root we're allocating for @@ -4930,18 +4904,29 @@ skip_async: * will return -ENOSPC. */ static int may_commit_transaction(struct btrfs_fs_info *fs_info, - struct btrfs_space_info *space_info, - u64 bytes, int force) + struct btrfs_space_info *space_info) { + struct reserve_ticket *ticket = NULL; struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; struct btrfs_trans_handle *trans; + u64 bytes; trans = (struct btrfs_trans_handle *)current->journal_info; if (trans) return -EAGAIN; - if (force) - goto commit; + spin_lock(&space_info->lock); + if (!list_empty(&space_info->priority_tickets)) + ticket = list_first_entry(&space_info->priority_tickets, + struct reserve_ticket, list); + else if (!list_empty(&space_info->tickets)) + ticket = list_first_entry(&space_info->tickets, + struct reserve_ticket, list); + bytes = (ticket) ? ticket->bytes : 0; + spin_unlock(&space_info->lock); + + if (!bytes) + return 0; /* See if there is enough pinned space to make this reservation */ if (percpu_counter_compare(&space_info->total_bytes_pinned, @@ -4956,8 +4941,12 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, return -ENOSPC; spin_lock(&delayed_rsv->lock); + if (delayed_rsv->size > bytes) + bytes = 0; + else + bytes -= delayed_rsv->size; if (percpu_counter_compare(&space_info->total_bytes_pinned, - bytes - delayed_rsv->size) < 0) { + bytes) < 0) { spin_unlock(&delayed_rsv->lock); return -ENOSPC; } @@ -4971,13 +4960,6 @@ commit: return btrfs_commit_transaction(trans); } -struct reserve_ticket { - u64 bytes; - int error; - struct list_head list; - wait_queue_head_t wait; -}; - /* * Try to flush some data based on policy set by @state. This is only advisory * and may fail for various reasons. The caller is supposed to examine the @@ -5027,8 +5009,7 @@ static void flush_space(struct btrfs_fs_info *fs_info, ret = 0; break; case COMMIT_TRANS: - ret = may_commit_transaction(fs_info, space_info, - num_bytes, 0); + ret = may_commit_transaction(fs_info, space_info); break; default: ret = -ENOSPC; @@ -5582,11 +5563,12 @@ again: } } -static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, +static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, struct btrfs_block_rsv *dest, u64 num_bytes) { struct btrfs_space_info *space_info = block_rsv->space_info; + u64 ret; spin_lock(&block_rsv->lock); if (num_bytes == (u64)-1) @@ -5601,6 +5583,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, } spin_unlock(&block_rsv->lock); + ret = num_bytes; if (num_bytes > 0) { if (dest) { spin_lock(&dest->lock); @@ -5620,6 +5603,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, space_info_add_old_bytes(fs_info, space_info, num_bytes); } + return ret; } int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, @@ -5643,6 +5627,15 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type) rsv->type = type; } +void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, + struct btrfs_block_rsv *rsv, + unsigned short type) +{ + btrfs_init_block_rsv(rsv, type); + rsv->space_info = __find_space_info(fs_info, + BTRFS_BLOCK_GROUP_METADATA); +} + struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, unsigned short type) { @@ -5652,9 +5645,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, if (!block_rsv) return NULL; - btrfs_init_block_rsv(block_rsv, type); - block_rsv->space_info = __find_space_info(fs_info, - BTRFS_BLOCK_GROUP_METADATA); + btrfs_init_metadata_block_rsv(fs_info, block_rsv, type); return block_rsv; } @@ -5737,6 +5728,66 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, return ret; } +/** + * btrfs_inode_rsv_refill - refill the inode block rsv. + * @inode - the inode we are refilling. + * @flush - the flusing restriction. + * + * Essentially the same as btrfs_block_rsv_refill, except it uses the + * block_rsv->size as the minimum size. We'll either refill the missing amount + * or return if we already have enough space. This will also handle the resreve + * tracepoint for the reserved amount. + */ +int btrfs_inode_rsv_refill(struct btrfs_inode *inode, + enum btrfs_reserve_flush_enum flush) +{ + struct btrfs_root *root = inode->root; + struct btrfs_block_rsv *block_rsv = &inode->block_rsv; + u64 num_bytes = 0; + int ret = -ENOSPC; + + spin_lock(&block_rsv->lock); + if (block_rsv->reserved < block_rsv->size) + num_bytes = block_rsv->size - block_rsv->reserved; + spin_unlock(&block_rsv->lock); + + if (num_bytes == 0) + return 0; + + ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); + if (!ret) { + block_rsv_add_bytes(block_rsv, num_bytes, 0); + trace_btrfs_space_reservation(root->fs_info, "delalloc", + btrfs_ino(inode), num_bytes, 1); + } + return ret; +} + +/** + * btrfs_inode_rsv_release - release any excessive reservation. + * @inode - the inode we need to release from. + * + * This is the same as btrfs_block_rsv_release, except that it handles the + * tracepoint for the reservation. + */ +void btrfs_inode_rsv_release(struct btrfs_inode *inode) +{ + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; + struct btrfs_block_rsv *block_rsv = &inode->block_rsv; + u64 released = 0; + + /* + * Since we statically set the block_rsv->size we just want to say we + * are releasing 0 bytes, and then we'll just get the reservation over + * the size free'd. + */ + released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0); + if (released > 0) + trace_btrfs_space_reservation(fs_info, "delalloc", + btrfs_ino(inode), released, 0); +} + void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, struct btrfs_block_rsv *block_rsv, u64 num_bytes) @@ -5808,7 +5859,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); fs_info->global_block_rsv.space_info = space_info; - fs_info->delalloc_block_rsv.space_info = space_info; fs_info->trans_block_rsv.space_info = space_info; fs_info->empty_block_rsv.space_info = space_info; fs_info->delayed_block_rsv.space_info = space_info; @@ -5828,8 +5878,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) { block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, (u64)-1); - WARN_ON(fs_info->delalloc_block_rsv.size > 0); - WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); WARN_ON(fs_info->trans_block_rsv.size > 0); WARN_ON(fs_info->trans_block_rsv.reserved > 0); WARN_ON(fs_info->chunk_block_rsv.size > 0); @@ -5841,12 +5889,15 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { - if (!trans->block_rsv) + if (!trans->block_rsv) { + ASSERT(!trans->bytes_reserved); return; + } if (!trans->bytes_reserved) return; + ASSERT(trans->block_rsv == &fs_info->trans_block_rsv); trace_btrfs_space_reservation(fs_info, "transaction", trans->transid, trans->bytes_reserved, 0); btrfs_block_rsv_release(fs_info, trans->block_rsv, @@ -5968,104 +6019,37 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, btrfs_block_rsv_release(fs_info, rsv, (u64)-1); } -/** - * drop_outstanding_extent - drop an outstanding extent - * @inode: the inode we're dropping the extent for - * @num_bytes: the number of bytes we're releasing. - * - * This is called when we are freeing up an outstanding extent, either called - * after an error or after an extent is written. This will return the number of - * reserved extents that need to be freed. This must be called with - * BTRFS_I(inode)->lock held. - */ -static unsigned drop_outstanding_extent(struct btrfs_inode *inode, - u64 num_bytes) -{ - unsigned drop_inode_space = 0; - unsigned dropped_extents = 0; - unsigned num_extents; - - num_extents = count_max_extents(num_bytes); - ASSERT(num_extents); - ASSERT(inode->outstanding_extents >= num_extents); - inode->outstanding_extents -= num_extents; - - if (inode->outstanding_extents == 0 && - test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, - &inode->runtime_flags)) - drop_inode_space = 1; - - /* - * If we have more or the same amount of outstanding extents than we have - * reserved then we need to leave the reserved extents count alone. - */ - if (inode->outstanding_extents >= inode->reserved_extents) - return drop_inode_space; - - dropped_extents = inode->reserved_extents - inode->outstanding_extents; - inode->reserved_extents -= dropped_extents; - return dropped_extents + drop_inode_space; -} - -/** - * calc_csum_metadata_size - return the amount of metadata space that must be - * reserved/freed for the given bytes. - * @inode: the inode we're manipulating - * @num_bytes: the number of bytes in question - * @reserve: 1 if we are reserving space, 0 if we are freeing space - * - * This adjusts the number of csum_bytes in the inode and then returns the - * correct amount of metadata that must either be reserved or freed. We - * calculate how many checksums we can fit into one leaf and then divide the - * number of bytes that will need to be checksumed by this value to figure out - * how many checksums will be required. If we are adding bytes then the number - * may go up and we will return the number of additional bytes that must be - * reserved. If it is going down we will return the number of bytes that must - * be freed. - * - * This must be called with BTRFS_I(inode)->lock held. - */ -static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes, - int reserve) +static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, + struct btrfs_inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); - u64 old_csums, num_csums; - - if (inode->flags & BTRFS_INODE_NODATASUM && inode->csum_bytes == 0) - return 0; - - old_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); - if (reserve) - inode->csum_bytes += num_bytes; - else - inode->csum_bytes -= num_bytes; - num_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); + struct btrfs_block_rsv *block_rsv = &inode->block_rsv; + u64 reserve_size = 0; + u64 csum_leaves; + unsigned outstanding_extents; - /* No change, no need to reserve more */ - if (old_csums == num_csums) - return 0; - - if (reserve) - return btrfs_calc_trans_metadata_size(fs_info, - num_csums - old_csums); + lockdep_assert_held(&inode->lock); + outstanding_extents = inode->outstanding_extents; + if (outstanding_extents) + reserve_size = btrfs_calc_trans_metadata_size(fs_info, + outstanding_extents + 1); + csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, + inode->csum_bytes); + reserve_size += btrfs_calc_trans_metadata_size(fs_info, + csum_leaves); - return btrfs_calc_trans_metadata_size(fs_info, old_csums - num_csums); + spin_lock(&block_rsv->lock); + block_rsv->size = reserve_size; + spin_unlock(&block_rsv->lock); } int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); struct btrfs_root *root = inode->root; - struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv; - u64 to_reserve = 0; - u64 csum_bytes; unsigned nr_extents; enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; int ret = 0; bool delalloc_lock = true; - u64 to_free = 0; - unsigned dropped; - bool release_extra = false; /* If we are a free space inode we need to not flush since we will be in * the middle of a transaction commit. We also don't need the delalloc @@ -6091,19 +6075,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, fs_info->sectorsize); + /* Add our new extents and calculate the new rsv size. */ spin_lock(&inode->lock); nr_extents = count_max_extents(num_bytes); - inode->outstanding_extents += nr_extents; - - nr_extents = 0; - if (inode->outstanding_extents > inode->reserved_extents) - nr_extents += inode->outstanding_extents - - inode->reserved_extents; - - /* We always want to reserve a slot for updating the inode. */ - to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1); - to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); - csum_bytes = inode->csum_bytes; + btrfs_mod_outstanding_extents(inode, nr_extents); + inode->csum_bytes += num_bytes; + btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { @@ -6113,92 +6090,26 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) goto out_fail; } - ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush); + ret = btrfs_inode_rsv_refill(inode, flush); if (unlikely(ret)) { btrfs_qgroup_free_meta(root, nr_extents * fs_info->nodesize); goto out_fail; } - spin_lock(&inode->lock); - if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, - &inode->runtime_flags)) { - to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1); - release_extra = true; - } - inode->reserved_extents += nr_extents; - spin_unlock(&inode->lock); - if (delalloc_lock) mutex_unlock(&inode->delalloc_mutex); - - if (to_reserve) - trace_btrfs_space_reservation(fs_info, "delalloc", - btrfs_ino(inode), to_reserve, 1); - if (release_extra) - btrfs_block_rsv_release(fs_info, block_rsv, - btrfs_calc_trans_metadata_size(fs_info, 1)); return 0; out_fail: spin_lock(&inode->lock); - dropped = drop_outstanding_extent(inode, num_bytes); - /* - * If the inodes csum_bytes is the same as the original - * csum_bytes then we know we haven't raced with any free()ers - * so we can just reduce our inodes csum bytes and carry on. - */ - if (inode->csum_bytes == csum_bytes) { - calc_csum_metadata_size(inode, num_bytes, 0); - } else { - u64 orig_csum_bytes = inode->csum_bytes; - u64 bytes; - - /* - * This is tricky, but first we need to figure out how much we - * freed from any free-ers that occurred during this - * reservation, so we reset ->csum_bytes to the csum_bytes - * before we dropped our lock, and then call the free for the - * number of bytes that were freed while we were trying our - * reservation. - */ - bytes = csum_bytes - inode->csum_bytes; - inode->csum_bytes = csum_bytes; - to_free = calc_csum_metadata_size(inode, bytes, 0); - - - /* - * Now we need to see how much we would have freed had we not - * been making this reservation and our ->csum_bytes were not - * artificially inflated. - */ - inode->csum_bytes = csum_bytes - num_bytes; - bytes = csum_bytes - orig_csum_bytes; - bytes = calc_csum_metadata_size(inode, bytes, 0); - - /* - * Now reset ->csum_bytes to what it should be. If bytes is - * more than to_free then we would have freed more space had we - * not had an artificially high ->csum_bytes, so we need to free - * the remainder. If bytes is the same or less then we don't - * need to do anything, the other free-ers did the correct - * thing. - */ - inode->csum_bytes = orig_csum_bytes - num_bytes; - if (bytes > to_free) - to_free = bytes - to_free; - else - to_free = 0; - } + nr_extents = count_max_extents(num_bytes); + btrfs_mod_outstanding_extents(inode, -nr_extents); + inode->csum_bytes -= num_bytes; + btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); - if (dropped) - to_free += btrfs_calc_trans_metadata_size(fs_info, dropped); - if (to_free) { - btrfs_block_rsv_release(fs_info, block_rsv, to_free); - trace_btrfs_space_reservation(fs_info, "delalloc", - btrfs_ino(inode), to_free, 0); - } + btrfs_inode_rsv_release(inode); if (delalloc_lock) mutex_unlock(&inode->delalloc_mutex); return ret; @@ -6206,36 +6117,55 @@ out_fail: /** * btrfs_delalloc_release_metadata - release a metadata reservation for an inode - * @inode: the inode to release the reservation for - * @num_bytes: the number of bytes we're releasing + * @inode: the inode to release the reservation for. + * @num_bytes: the number of bytes we are releasing. * * This will release the metadata reservation for an inode. This can be called * once we complete IO for a given set of bytes to release their metadata - * reservations. + * reservations, or on error for the same reason. */ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); - u64 to_free = 0; - unsigned dropped; num_bytes = ALIGN(num_bytes, fs_info->sectorsize); spin_lock(&inode->lock); - dropped = drop_outstanding_extent(inode, num_bytes); - - if (num_bytes) - to_free = calc_csum_metadata_size(inode, num_bytes, 0); + inode->csum_bytes -= num_bytes; + btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); - if (dropped > 0) - to_free += btrfs_calc_trans_metadata_size(fs_info, dropped); if (btrfs_is_testing(fs_info)) return; - trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode), - to_free, 0); + btrfs_inode_rsv_release(inode); +} + +/** + * btrfs_delalloc_release_extents - release our outstanding_extents + * @inode: the inode to balance the reservation for. + * @num_bytes: the number of bytes we originally reserved with + * + * When we reserve space we increase outstanding_extents for the extents we may + * add. Once we've set the range as delalloc or created our ordered extents we + * have outstanding_extents to track the real usage, so we use this to free our + * temporarily tracked outstanding_extents. This _must_ be used in conjunction + * with btrfs_delalloc_reserve_metadata. + */ +void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + unsigned num_extents; + + spin_lock(&inode->lock); + num_extents = count_max_extents(num_bytes); + btrfs_mod_outstanding_extents(inode, -num_extents); + btrfs_calculate_inode_block_rsv_size(fs_info, inode); + spin_unlock(&inode->lock); - btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free); + if (btrfs_is_testing(fs_info)) + return; + + btrfs_inode_rsv_release(inode); } /** @@ -6282,10 +6212,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, * @inode: inode we're releasing space for * @start: start position of the space already reserved * @len: the len of the space already reserved - * - * This must be matched with a call to btrfs_delalloc_reserve_space. This is - * called in the case that we don't need the metadata AND data reservations - * anymore. So if there is an error or we insert an inline extent. + * @release_bytes: the len of the space we consumed or didn't use * * This function will release the metadata space that was not used and will * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes @@ -6293,7 +6220,8 @@ int btrfs_delalloc_reserve_space(struct inode *inode, * Also it will handle the qgroup reserved space. */ void btrfs_delalloc_release_space(struct inode *inode, - struct extent_changeset *reserved, u64 start, u64 len) + struct extent_changeset *reserved, + u64 start, u64 len) { btrfs_delalloc_release_metadata(BTRFS_I(inode), len); btrfs_free_reserved_data_space(inode, reserved, start, len); @@ -6595,12 +6523,6 @@ void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, btrfs_put_block_group(bg); } -static int btrfs_wait_bg_reservations_atomic_t(atomic_t *a) -{ - schedule(); - return 0; -} - void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) { struct btrfs_space_info *space_info = bg->space_info; @@ -6623,8 +6545,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) down_write(&space_info->groups_sem); up_write(&space_info->groups_sem); - wait_on_atomic_t(&bg->reservations, - btrfs_wait_bg_reservations_atomic_t, + wait_on_atomic_t(&bg->reservations, atomic_t_wait, TASK_UNINTERRUPTIBLE); } @@ -6958,7 +6879,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, BUG_ON(!is_data && refs_to_drop != 1); if (is_data) - skinny_metadata = 0; + skinny_metadata = false; ret = lookup_extent_backref(trans, info, path, &iref, bytenr, num_bytes, parent, @@ -7213,7 +7134,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, goto out_delayed_unlock; spin_lock(&head->lock); - if (!list_empty(&head->ref_list)) + if (!RB_EMPTY_ROOT(&head->ref_tree)) goto out; if (head->extent_op) { @@ -7234,9 +7155,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, * at this point we have a head with no other entries. Go * ahead and process it. */ - head->node.in_tree = 0; rb_erase(&head->href_node, &delayed_refs->href_root); - + RB_CLEAR_NODE(&head->href_node); atomic_dec(&delayed_refs->num_entries); /* @@ -7255,7 +7175,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, ret = 1; mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(&head->node); + btrfs_put_delayed_ref_head(head); return ret; out: spin_unlock(&head->lock); @@ -7277,6 +7197,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { int old_ref_mod, new_ref_mod; + btrfs_ref_tree_mod(root, buf->start, buf->len, parent, + root->root_key.objectid, + btrfs_header_level(buf), 0, + BTRFS_DROP_DELAYED_REF); ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start, buf->len, parent, root->root_key.objectid, @@ -7329,16 +7253,21 @@ out: /* Can return -ENOMEM */ int btrfs_free_extent(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, + struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, u64 owner, u64 offset) { + struct btrfs_fs_info *fs_info = root->fs_info; int old_ref_mod, new_ref_mod; int ret; if (btrfs_is_testing(fs_info)) return 0; + if (root_objectid != BTRFS_TREE_LOG_OBJECTID) + btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, + root_objectid, owner, offset, + BTRFS_DROP_DELAYED_REF); /* * tree log blocks never actually go into the extent allocation @@ -8306,17 +8235,22 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, } int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, - u64 root_objectid, u64 owner, + struct btrfs_root *root, u64 owner, u64 offset, u64 ram_bytes, struct btrfs_key *ins) { - struct btrfs_fs_info *fs_info = trans->fs_info; + struct btrfs_fs_info *fs_info = root->fs_info; int ret; - BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); + BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); + + btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0, + root->root_key.objectid, owner, offset, + BTRFS_ADD_DELAYED_EXTENT); ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid, - ins->offset, 0, root_objectid, owner, + ins->offset, 0, + root->root_key.objectid, owner, offset, ram_bytes, BTRFS_ADD_DELAYED_EXTENT, NULL, NULL); return ret; @@ -8538,6 +8472,9 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, extent_op->is_data = false; extent_op->level = level; + btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent, + root_objectid, level, 0, + BTRFS_ADD_DELAYED_EXTENT); ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid, ins.offset, parent, root_objectid, level, @@ -8894,7 +8831,7 @@ skip: ret); } } - ret = btrfs_free_extent(trans, fs_info, bytenr, blocksize, + ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, root->root_key.objectid, level - 1, 0); if (ret) @@ -9311,7 +9248,7 @@ out: * don't have it in the radix (like when we recover after a power fail * or unmount) so we don't leak memory. */ - if (!for_reloc && root_dropped == false) + if (!for_reloc && !root_dropped) btrfs_add_dead_root(root); if (err && err != -EAGAIN) btrfs_handle_fs_error(fs_info, err, NULL); @@ -9968,9 +9905,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) return 0; } -static void __link_block_group(struct btrfs_space_info *space_info, - struct btrfs_block_group_cache *cache) +static void link_block_group(struct btrfs_block_group_cache *cache) { + struct btrfs_space_info *space_info = cache->space_info; int index = get_block_group_index(cache); bool first = false; @@ -10178,7 +10115,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) cache->space_info = space_info; - __link_block_group(space_info, cache); + link_block_group(cache); set_avail_alloc_bits(info, cache->flags); if (btrfs_chunk_readonly(info, cache->key.objectid)) { @@ -10337,7 +10274,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->bytes_super, &cache->space_info); update_global_block_rsv(fs_info); - __link_block_group(cache->space_info, cache); + link_block_group(cache); list_add_tail(&cache->bg_list, &trans->new_bgs); @@ -10387,6 +10324,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, * remove it. */ free_excluded_extents(fs_info, block_group); + btrfs_free_ref_tree_range(fs_info, block_group->key.objectid, + block_group->key.offset); memcpy(&key, &block_group->key, sizeof(key)); index = get_block_group_index(block_group); @@ -11106,12 +11045,6 @@ int btrfs_start_write_no_snapshotting(struct btrfs_root *root) return 1; } -static int wait_snapshotting_atomic_t(atomic_t *a) -{ - schedule(); - return 0; -} - void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) { while (true) { @@ -11120,8 +11053,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root) ret = btrfs_start_write_no_snapshotting(root); if (ret) break; - wait_on_atomic_t(&root->will_be_snapshotted, - wait_snapshotting_atomic_t, + wait_on_atomic_t(&root->will_be_snapshotted, atomic_t_wait, TASK_UNINTERRUPTIBLE); } } |