diff options
Diffstat (limited to 'fs/btrfs/delayed-ref.c')
-rw-r--r-- | fs/btrfs/delayed-ref.c | 700 |
1 files changed, 403 insertions, 297 deletions
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 6f2e48d697dd..cab94d141f66 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -16,8 +16,7 @@ #include "fs.h" struct kmem_cache *btrfs_delayed_ref_head_cachep; -struct kmem_cache *btrfs_delayed_tree_ref_cachep; -struct kmem_cache *btrfs_delayed_data_ref_cachep; +struct kmem_cache *btrfs_delayed_ref_node_cachep; struct kmem_cache *btrfs_delayed_extent_op_cachep; /* * delayed back reference update tracking. For subvolume trees @@ -57,16 +56,20 @@ bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info) * Release a ref head's reservation. * * @fs_info: the filesystem - * @nr: number of items to drop + * @nr_refs: number of delayed refs to drop + * @nr_csums: number of csum items to drop * * Drops the delayed ref head's count from the delayed refs rsv and free any * excess reservation we had. */ -void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) +void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr_refs, int nr_csums) { struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; - const u64 num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr); - u64 released = 0; + u64 num_bytes; + u64 released; + + num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, nr_refs); + num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, nr_csums); released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL); if (released) @@ -77,68 +80,118 @@ void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr) /* * Adjust the size of the delayed refs rsv. * - * This is to be called anytime we may have adjusted trans->delayed_ref_updates, - * it'll calculate the additional size and add it to the delayed_refs_rsv. + * This is to be called anytime we may have adjusted trans->delayed_ref_updates + * or trans->delayed_ref_csum_deletions, it'll calculate the additional size and + * add it to the delayed_refs_rsv. */ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_block_rsv *local_rsv = &trans->delayed_rsv; u64 num_bytes; + u64 reserved_bytes; - if (!trans->delayed_ref_updates) + num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates); + num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info, + trans->delayed_ref_csum_deletions); + + if (num_bytes == 0) return; - num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, - trans->delayed_ref_updates); + /* + * Try to take num_bytes from the transaction's local delayed reserve. + * If not possible, try to take as much as it's available. If the local + * reserve doesn't have enough reserved space, the delayed refs reserve + * will be refilled next time btrfs_delayed_refs_rsv_refill() is called + * by someone or if a transaction commit is triggered before that, the + * global block reserve will be used. We want to minimize using the + * global block reserve for cases we can account for in advance, to + * avoid exhausting it and reach -ENOSPC during a transaction commit. + */ + spin_lock(&local_rsv->lock); + reserved_bytes = min(num_bytes, local_rsv->reserved); + local_rsv->reserved -= reserved_bytes; + local_rsv->full = (local_rsv->reserved >= local_rsv->size); + spin_unlock(&local_rsv->lock); spin_lock(&delayed_rsv->lock); delayed_rsv->size += num_bytes; - delayed_rsv->full = false; + delayed_rsv->reserved += reserved_bytes; + delayed_rsv->full = (delayed_rsv->reserved >= delayed_rsv->size); spin_unlock(&delayed_rsv->lock); trans->delayed_ref_updates = 0; + trans->delayed_ref_csum_deletions = 0; } /* - * Transfer bytes to our delayed refs rsv. - * - * @fs_info: the filesystem - * @num_bytes: number of bytes to transfer - * - * This transfers up to the num_bytes amount, previously reserved, to the - * delayed_refs_rsv. Any extra bytes are returned to the space info. + * Adjust the size of the delayed refs block reserve for 1 block group item + * insertion, used after allocating a block group. */ -void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info, - u64 num_bytes) +void btrfs_inc_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info) { - struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; - u64 to_free = 0; + struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; - spin_lock(&delayed_refs_rsv->lock); - if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) { - u64 delta = delayed_refs_rsv->size - - delayed_refs_rsv->reserved; - if (num_bytes > delta) { - to_free = num_bytes - delta; - num_bytes = delta; - } - } else { - to_free = num_bytes; - num_bytes = 0; - } + spin_lock(&delayed_rsv->lock); + /* + * Inserting a block group item does not require changing the free space + * tree, only the extent tree or the block group tree, so this is all we + * need. + */ + delayed_rsv->size += btrfs_calc_insert_metadata_size(fs_info, 1); + delayed_rsv->full = false; + spin_unlock(&delayed_rsv->lock); +} - if (num_bytes) - delayed_refs_rsv->reserved += num_bytes; - if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size) - delayed_refs_rsv->full = true; - spin_unlock(&delayed_refs_rsv->lock); +/* + * Adjust the size of the delayed refs block reserve to release space for 1 + * block group item insertion. + */ +void btrfs_dec_delayed_refs_rsv_bg_inserts(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; + const u64 num_bytes = btrfs_calc_insert_metadata_size(fs_info, 1); + u64 released; + + released = btrfs_block_rsv_release(fs_info, delayed_rsv, num_bytes, NULL); + if (released > 0) + trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", + 0, released, 0); +} + +/* + * Adjust the size of the delayed refs block reserve for 1 block group item + * update. + */ +void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; + + spin_lock(&delayed_rsv->lock); + /* + * Updating a block group item does not result in new nodes/leaves and + * does not require changing the free space tree, only the extent tree + * or the block group tree, so this is all we need. + */ + delayed_rsv->size += btrfs_calc_metadata_size(fs_info, 1); + delayed_rsv->full = false; + spin_unlock(&delayed_rsv->lock); +} - if (num_bytes) +/* + * Adjust the size of the delayed refs block reserve to release space for 1 + * block group item update. + */ +void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv; + const u64 num_bytes = btrfs_calc_metadata_size(fs_info, 1); + u64 released; + + released = btrfs_block_rsv_release(fs_info, delayed_rsv, num_bytes, NULL); + if (released > 0) trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", - 0, num_bytes, 1); - if (to_free) - btrfs_space_info_free_bytes_may_use(fs_info, - delayed_refs_rsv->space_info, to_free); + 0, released, 0); } /* @@ -154,6 +207,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush) { struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv; + struct btrfs_space_info *space_info = block_rsv->space_info; u64 limit = btrfs_calc_delayed_ref_bytes(fs_info, 1); u64 num_bytes = 0; u64 refilled_bytes; @@ -170,7 +224,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, if (!num_bytes) return 0; - ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, num_bytes, flush); + ret = btrfs_reserve_metadata_bytes(fs_info, space_info, num_bytes, flush); if (ret) return ret; @@ -199,8 +253,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, spin_unlock(&block_rsv->lock); if (to_free > 0) - btrfs_space_info_free_bytes_may_use(fs_info, block_rsv->space_info, - to_free); + btrfs_space_info_free_bytes_may_use(fs_info, space_info, to_free); if (refilled_bytes > 0) trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0, @@ -209,50 +262,19 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, } /* - * compare two delayed tree backrefs with same bytenr and type - */ -static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1, - struct btrfs_delayed_tree_ref *ref2) -{ - if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { - if (ref1->root < ref2->root) - return -1; - if (ref1->root > ref2->root) - return 1; - } else { - if (ref1->parent < ref2->parent) - return -1; - if (ref1->parent > ref2->parent) - return 1; - } - return 0; -} - -/* * compare two delayed data backrefs with same bytenr and type */ -static int comp_data_refs(struct btrfs_delayed_data_ref *ref1, - struct btrfs_delayed_data_ref *ref2) +static int comp_data_refs(struct btrfs_delayed_ref_node *ref1, + struct btrfs_delayed_ref_node *ref2) { - if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) { - if (ref1->root < ref2->root) - return -1; - if (ref1->root > ref2->root) - return 1; - if (ref1->objectid < ref2->objectid) - return -1; - if (ref1->objectid > ref2->objectid) - return 1; - if (ref1->offset < ref2->offset) - return -1; - if (ref1->offset > ref2->offset) - return 1; - } else { - if (ref1->parent < ref2->parent) - return -1; - if (ref1->parent > ref2->parent) - return 1; - } + if (ref1->data_ref.objectid < ref2->data_ref.objectid) + return -1; + if (ref1->data_ref.objectid > ref2->data_ref.objectid) + return 1; + if (ref1->data_ref.offset < ref2->data_ref.offset) + return -1; + if (ref1->data_ref.offset > ref2->data_ref.offset) + return 1; return 0; } @@ -266,13 +288,20 @@ static int comp_refs(struct btrfs_delayed_ref_node *ref1, return -1; if (ref1->type > ref2->type) return 1; - if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || - ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) - ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1), - btrfs_delayed_node_to_tree_ref(ref2)); - else - ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1), - btrfs_delayed_node_to_data_ref(ref2)); + if (ref1->type == BTRFS_SHARED_BLOCK_REF_KEY || + ref1->type == BTRFS_SHARED_DATA_REF_KEY) { + if (ref1->parent < ref2->parent) + return -1; + if (ref1->parent > ref2->parent) + return 1; + } else { + if (ref1->ref_root < ref2->ref_root) + return -1; + if (ref1->ref_root > ref2->ref_root) + return 1; + if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY) + ret = comp_data_refs(ref1, ref2); + } if (ret) return ret; if (check_seq) { @@ -422,7 +451,8 @@ int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs, return 0; } -static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs, +static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head, struct btrfs_delayed_ref_node *ref) { @@ -433,9 +463,11 @@ static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs, list_del(&ref->add_list); btrfs_put_delayed_ref(ref); atomic_dec(&delayed_refs->num_entries); + btrfs_delayed_refs_rsv_release(fs_info, 1, 0); } -static bool merge_ref(struct btrfs_delayed_ref_root *delayed_refs, +static bool merge_ref(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, struct btrfs_delayed_ref_head *head, struct btrfs_delayed_ref_node *ref, u64 seq) @@ -464,10 +496,10 @@ static bool merge_ref(struct btrfs_delayed_ref_root *delayed_refs, mod = -next->ref_mod; } - drop_delayed_ref(delayed_refs, head, next); + drop_delayed_ref(fs_info, delayed_refs, head, next); ref->ref_mod += mod; if (ref->ref_mod == 0) { - drop_delayed_ref(delayed_refs, head, ref); + drop_delayed_ref(fs_info, delayed_refs, head, ref); done = true; } else { /* @@ -505,7 +537,7 @@ again: ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); if (seq && ref->seq >= seq) continue; - if (merge_ref(delayed_refs, head, ref, seq)) + if (merge_ref(fs_info, delayed_refs, head, ref, seq)) goto again; } } @@ -584,10 +616,11 @@ void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs, * Return true if the ref was merged into an existing one (and therefore can be * freed by the caller). */ -static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root, +static bool insert_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *href, struct btrfs_delayed_ref_node *ref) { + struct btrfs_delayed_ref_root *root = &trans->transaction->delayed_refs; struct btrfs_delayed_ref_node *exist; int mod; @@ -598,6 +631,7 @@ static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root, list_add_tail(&ref->add_list, &href->ref_add_list); atomic_inc(&root->num_entries); spin_unlock(&href->lock); + trans->delayed_ref_updates++; return false; } @@ -626,7 +660,7 @@ static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root, /* remove existing tail if its ref_mod is zero */ if (exist->ref_mod == 0) - drop_delayed_ref(root, href, exist); + drop_delayed_ref(trans->fs_info, root, href, exist); spin_unlock(&href->lock); return true; } @@ -647,6 +681,15 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, BUG_ON(existing->is_data != update->is_data); spin_lock(&existing->lock); + + /* + * When freeing an extent, we may not know the owning root when we + * first create the head_ref. However, some deref before the last deref + * will know it, so we just need to update the head_ref accordingly. + */ + if (!existing->owning_root) + existing->owning_root = update->owning_root; + if (update->must_insert_reserved) { /* if the extent was freed and then * reallocated before the delayed ref @@ -656,6 +699,7 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, * Set it again here */ existing->must_insert_reserved = update->must_insert_reserved; + existing->owning_root = update->owning_root; /* * update the num_bytes so we make sure the accounting @@ -695,6 +739,8 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, /* * If we are going to from a positive ref mod to a negative or vice * versa we need to make sure to adjust pending_csums accordingly. + * We reserve bytes for csum deletion when adding or updating a ref head + * see add_delayed_ref_head() for more details. */ if (existing->is_data) { u64 csum_leaves = @@ -703,11 +749,11 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, if (existing->total_ref_mod >= 0 && old_ref_mod < 0) { delayed_refs->pending_csums -= existing->num_bytes; - btrfs_delayed_refs_rsv_release(fs_info, csum_leaves); + btrfs_delayed_refs_rsv_release(fs_info, 0, csum_leaves); } if (existing->total_ref_mod < 0 && old_ref_mod >= 0) { delayed_refs->pending_csums += existing->num_bytes; - trans->delayed_ref_updates += csum_leaves; + trans->delayed_ref_csum_deletions += csum_leaves; } } @@ -715,18 +761,20 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, } static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, + struct btrfs_ref *generic_ref, struct btrfs_qgroup_extent_record *qrecord, - u64 bytenr, u64 num_bytes, u64 ref_root, - u64 reserved, int action, bool is_data, - bool is_system) + u64 reserved) { int count_mod = 1; bool must_insert_reserved = false; /* If reserved is provided, it must be a data extent. */ - BUG_ON(!is_data && reserved); + BUG_ON(generic_ref->type != BTRFS_REF_DATA && reserved); - switch (action) { + switch (generic_ref->action) { + case BTRFS_ADD_DELAYED_REF: + /* count_mod is already set to 1. */ + break; case BTRFS_UPDATE_DELAYED_HEAD: count_mod = 0; break; @@ -755,12 +803,14 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, } refcount_set(&head_ref->refs, 1); - head_ref->bytenr = bytenr; - head_ref->num_bytes = num_bytes; + head_ref->bytenr = generic_ref->bytenr; + head_ref->num_bytes = generic_ref->num_bytes; head_ref->ref_mod = count_mod; + head_ref->reserved_bytes = reserved; head_ref->must_insert_reserved = must_insert_reserved; - head_ref->is_data = is_data; - head_ref->is_system = is_system; + head_ref->owning_root = generic_ref->owning_root; + head_ref->is_data = (generic_ref->type == BTRFS_REF_DATA); + head_ref->is_system = (generic_ref->ref_root == BTRFS_CHUNK_TREE_OBJECTID); head_ref->ref_tree = RB_ROOT_CACHED; INIT_LIST_HEAD(&head_ref->ref_add_list); RB_CLEAR_NODE(&head_ref->href_node); @@ -769,13 +819,19 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, spin_lock_init(&head_ref->lock); mutex_init(&head_ref->mutex); + /* If not metadata set an impossible level to help debugging. */ + if (generic_ref->type == BTRFS_REF_METADATA) + head_ref->level = generic_ref->tree_ref.level; + else + head_ref->level = U8_MAX; + if (qrecord) { - if (ref_root && reserved) { + if (generic_ref->ref_root && reserved) { qrecord->data_rsv = reserved; - qrecord->data_rsv_refroot = ref_root; + qrecord->data_rsv_refroot = generic_ref->ref_root; } - qrecord->bytenr = bytenr; - qrecord->num_bytes = num_bytes; + qrecord->bytenr = generic_ref->bytenr; + qrecord->num_bytes = generic_ref->num_bytes; qrecord->old_roots = NULL; } } @@ -784,6 +840,8 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref, * helper function to actually insert a head node into the rbtree. * this does all the dirty work in terms of maintaining the correct * overall modification count. + * + * Returns an error pointer in case of an error. */ static noinline struct btrfs_delayed_ref_head * add_delayed_ref_head(struct btrfs_trans_handle *trans, @@ -791,6 +849,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, struct btrfs_qgroup_extent_record *qrecord, int action, bool *qrecord_inserted_ret) { + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_root *delayed_refs; bool qrecord_inserted = false; @@ -799,14 +858,23 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, /* Record qgroup extent info if provided */ if (qrecord) { - if (btrfs_qgroup_trace_extent_nolock(trans->fs_info, - delayed_refs, qrecord)) + int ret; + + ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord); + if (ret) { + /* Clean up if insertion fails or item exists. */ + xa_release(&delayed_refs->dirty_extents, + qrecord->bytenr >> fs_info->sectorsize_bits); + /* Caller responsible for freeing qrecord on error. */ + if (ret < 0) + return ERR_PTR(ret); kfree(qrecord); - else + } else { qrecord_inserted = true; + } } - trace_add_delayed_ref_head(trans->fs_info, head_ref, action); + trace_add_delayed_ref_head(fs_info, head_ref, action); existing = htree_insert(&delayed_refs->href_root, &head_ref->href_node); @@ -819,16 +887,20 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); head_ref = existing; } else { + /* + * We reserve the amount of bytes needed to delete csums when + * adding the ref head and not when adding individual drop refs + * since the csum items are deleted only after running the last + * delayed drop ref (the data extent's ref count drops to 0). + */ if (head_ref->is_data && head_ref->ref_mod < 0) { delayed_refs->pending_csums += head_ref->num_bytes; - trans->delayed_ref_updates += - btrfs_csum_bytes_to_leaves(trans->fs_info, - head_ref->num_bytes); + trans->delayed_ref_csum_deletions += + btrfs_csum_bytes_to_leaves(fs_info, head_ref->num_bytes); } delayed_refs->num_heads++; delayed_refs->num_heads_ready++; atomic_inc(&delayed_refs->num_entries); - trans->delayed_ref_updates++; } if (qrecord_inserted_ret) *qrecord_inserted_ret = qrecord_inserted; @@ -837,8 +909,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, } /* - * init_delayed_ref_common - Initialize the structure which represents a - * modification to a an extent. + * Initialize the structure which represents a modification to a an extent. * * @fs_info: Internal to the mounted filesystem mount structure. * @@ -863,90 +934,111 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, */ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_node *ref, - u64 bytenr, u64 num_bytes, u64 ref_root, - int action, u8 ref_type) + struct btrfs_ref *generic_ref) { + int action = generic_ref->action; u64 seq = 0; if (action == BTRFS_ADD_DELAYED_EXTENT) action = BTRFS_ADD_DELAYED_REF; - if (is_fstree(ref_root)) + if (is_fstree(generic_ref->ref_root)) seq = atomic64_read(&fs_info->tree_mod_seq); refcount_set(&ref->refs, 1); - ref->bytenr = bytenr; - ref->num_bytes = num_bytes; + ref->bytenr = generic_ref->bytenr; + ref->num_bytes = generic_ref->num_bytes; ref->ref_mod = 1; ref->action = action; ref->seq = seq; - ref->type = ref_type; + ref->type = btrfs_ref_type(generic_ref); + ref->ref_root = generic_ref->ref_root; + ref->parent = generic_ref->parent; RB_CLEAR_NODE(&ref->ref_node); INIT_LIST_HEAD(&ref->add_list); + + if (generic_ref->type == BTRFS_REF_DATA) + ref->data_ref = generic_ref->data_ref; + else + ref->tree_ref = generic_ref->tree_ref; } -/* - * add a delayed tree ref. This does all of the accounting required - * to make sure the delayed ref is eventually processed before this - * transaction commits. - */ -int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, - struct btrfs_ref *generic_ref, - struct btrfs_delayed_extent_op *extent_op) +void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root, + bool skip_qgroup) +{ +#ifdef CONFIG_BTRFS_FS_REF_VERIFY + /* If @real_root not set, use @root as fallback */ + generic_ref->real_root = mod_root ?: generic_ref->ref_root; +#endif + generic_ref->tree_ref.level = level; + generic_ref->type = BTRFS_REF_METADATA; + if (skip_qgroup || !(is_fstree(generic_ref->ref_root) && + (!mod_root || is_fstree(mod_root)))) + generic_ref->skip_qgroup = true; + else + generic_ref->skip_qgroup = false; + +} + +void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset, + u64 mod_root, bool skip_qgroup) +{ +#ifdef CONFIG_BTRFS_FS_REF_VERIFY + /* If @real_root not set, use @root as fallback */ + generic_ref->real_root = mod_root ?: generic_ref->ref_root; +#endif + generic_ref->data_ref.objectid = ino; + generic_ref->data_ref.offset = offset; + generic_ref->type = BTRFS_REF_DATA; + if (skip_qgroup || !(is_fstree(generic_ref->ref_root) && + (!mod_root || is_fstree(mod_root)))) + generic_ref->skip_qgroup = true; + else + generic_ref->skip_qgroup = false; +} + +static int add_delayed_ref(struct btrfs_trans_handle *trans, + struct btrfs_ref *generic_ref, + struct btrfs_delayed_extent_op *extent_op, + u64 reserved) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_delayed_tree_ref *ref; + struct btrfs_delayed_ref_node *node; struct btrfs_delayed_ref_head *head_ref; + struct btrfs_delayed_ref_head *new_head_ref; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; bool qrecord_inserted; - bool is_system; - bool merged; int action = generic_ref->action; - int level = generic_ref->tree_ref.level; - u64 bytenr = generic_ref->bytenr; - u64 num_bytes = generic_ref->len; - u64 parent = generic_ref->parent; - u8 ref_type; - - is_system = (generic_ref->tree_ref.owning_root == BTRFS_CHUNK_TREE_OBJECTID); + bool merged; + int ret; - ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action); - ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); - if (!ref) + node = kmem_cache_alloc(btrfs_delayed_ref_node_cachep, GFP_NOFS); + if (!node) return -ENOMEM; head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); if (!head_ref) { - kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); - return -ENOMEM; + ret = -ENOMEM; + goto free_node; } - if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && - !generic_ref->skip_qgroup) { + if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { - kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); - kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); - return -ENOMEM; + ret = -ENOMEM; + goto free_head_ref; + } + if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, + generic_ref->bytenr >> fs_info->sectorsize_bits, + GFP_NOFS)) { + ret = -ENOMEM; + goto free_record; } } - if (parent) - ref_type = BTRFS_SHARED_BLOCK_REF_KEY; - else - ref_type = BTRFS_TREE_BLOCK_REF_KEY; - - init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, - generic_ref->tree_ref.owning_root, action, - ref_type); - ref->root = generic_ref->tree_ref.owning_root; - ref->parent = parent; - ref->level = level; - - init_delayed_ref_head(head_ref, record, bytenr, num_bytes, - generic_ref->tree_ref.owning_root, 0, action, - false, is_system); + init_delayed_ref_common(fs_info, node, generic_ref); + init_delayed_ref_head(head_ref, generic_ref, record, reserved); head_ref->extent_op = extent_op; delayed_refs = &trans->transaction->delayed_refs; @@ -956,10 +1048,16 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, * insert both the head node and the new ref without dropping * the spin lock */ - head_ref = add_delayed_ref_head(trans, head_ref, record, - action, &qrecord_inserted); + new_head_ref = add_delayed_ref_head(trans, head_ref, record, + action, &qrecord_inserted); + if (IS_ERR(new_head_ref)) { + spin_unlock(&delayed_refs->lock); + ret = PTR_ERR(new_head_ref); + goto free_record; + } + head_ref = new_head_ref; - merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node); + merged = insert_delayed_ref(trans, head_ref, node); spin_unlock(&delayed_refs->lock); /* @@ -968,16 +1066,36 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, */ btrfs_update_delayed_refs_rsv(trans); - trace_add_delayed_tree_ref(fs_info, &ref->node, ref, - action == BTRFS_ADD_DELAYED_EXTENT ? - BTRFS_ADD_DELAYED_REF : action); + if (generic_ref->type == BTRFS_REF_DATA) + trace_add_delayed_data_ref(trans->fs_info, node); + else + trace_add_delayed_tree_ref(trans->fs_info, node); if (merged) - kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); + kmem_cache_free(btrfs_delayed_ref_node_cachep, node); if (qrecord_inserted) - btrfs_qgroup_trace_extent_post(trans, record); - + return btrfs_qgroup_trace_extent_post(trans, record); return 0; + +free_record: + kfree(record); +free_head_ref: + kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); +free_node: + kmem_cache_free(btrfs_delayed_ref_node_cachep, node); + return ret; +} + +/* + * Add a delayed tree ref. This does all of the accounting required to make sure + * the delayed ref is eventually processed before this transaction commits. + */ +int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, + struct btrfs_ref *generic_ref, + struct btrfs_delayed_extent_op *extent_op) +{ + ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action); + return add_delayed_ref(trans, generic_ref, extent_op, 0); } /* @@ -987,114 +1105,44 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref, u64 reserved) { - struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_delayed_data_ref *ref; - struct btrfs_delayed_ref_head *head_ref; - struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_qgroup_extent_record *record = NULL; - bool qrecord_inserted; - int action = generic_ref->action; - bool merged; - u64 bytenr = generic_ref->bytenr; - u64 num_bytes = generic_ref->len; - u64 parent = generic_ref->parent; - u64 ref_root = generic_ref->data_ref.owning_root; - u64 owner = generic_ref->data_ref.ino; - u64 offset = generic_ref->data_ref.offset; - u8 ref_type; - - ASSERT(generic_ref->type == BTRFS_REF_DATA && action); - ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); - if (!ref) - return -ENOMEM; - - if (parent) - ref_type = BTRFS_SHARED_DATA_REF_KEY; - else - ref_type = BTRFS_EXTENT_DATA_REF_KEY; - init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes, - ref_root, action, ref_type); - ref->root = ref_root; - ref->parent = parent; - ref->objectid = owner; - ref->offset = offset; - - - head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); - if (!head_ref) { - kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); - return -ENOMEM; - } - - if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) && - !generic_ref->skip_qgroup) { - record = kzalloc(sizeof(*record), GFP_NOFS); - if (!record) { - kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); - kmem_cache_free(btrfs_delayed_ref_head_cachep, - head_ref); - return -ENOMEM; - } - } - - init_delayed_ref_head(head_ref, record, bytenr, num_bytes, ref_root, - reserved, action, true, false); - head_ref->extent_op = NULL; - - delayed_refs = &trans->transaction->delayed_refs; - spin_lock(&delayed_refs->lock); - - /* - * insert both the head node and the new ref without dropping - * the spin lock - */ - head_ref = add_delayed_ref_head(trans, head_ref, record, - action, &qrecord_inserted); - - merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node); - spin_unlock(&delayed_refs->lock); - - /* - * Need to update the delayed_refs_rsv with any changes we may have - * made. - */ - btrfs_update_delayed_refs_rsv(trans); - - trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref, - action == BTRFS_ADD_DELAYED_EXTENT ? - BTRFS_ADD_DELAYED_REF : action); - if (merged) - kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); - - - if (qrecord_inserted) - return btrfs_qgroup_trace_extent_post(trans, record); - return 0; + ASSERT(generic_ref->type == BTRFS_REF_DATA && generic_ref->action); + return add_delayed_ref(trans, generic_ref, NULL, reserved); } int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes, + u64 bytenr, u64 num_bytes, u8 level, struct btrfs_delayed_extent_op *extent_op) { struct btrfs_delayed_ref_head *head_ref; + struct btrfs_delayed_ref_head *head_ref_ret; struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_ref generic_ref = { + .type = BTRFS_REF_METADATA, + .action = BTRFS_UPDATE_DELAYED_HEAD, + .bytenr = bytenr, + .num_bytes = num_bytes, + .tree_ref.level = level, + }; head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); if (!head_ref) return -ENOMEM; - init_delayed_ref_head(head_ref, NULL, bytenr, num_bytes, 0, 0, - BTRFS_UPDATE_DELAYED_HEAD, false, false); + init_delayed_ref_head(head_ref, &generic_ref, NULL, 0); head_ref->extent_op = extent_op; delayed_refs = &trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); - add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD, - NULL); - + head_ref_ret = add_delayed_ref_head(trans, head_ref, NULL, + BTRFS_UPDATE_DELAYED_HEAD, NULL); spin_unlock(&delayed_refs->lock); + if (IS_ERR(head_ref_ret)) { + kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); + return PTR_ERR(head_ref_ret); + } + /* * Need to update the delayed_refs_rsv with any changes we may have * made. @@ -1103,6 +1151,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, return 0; } +void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) +{ + if (refcount_dec_and_test(&ref->refs)) { + WARN_ON(!RB_EMPTY_NODE(&ref->ref_node)); + kmem_cache_free(btrfs_delayed_ref_node_cachep, ref); + } +} + /* * This does a simple search for the head node for a given extent. Returns the * head node if found, or NULL if not. @@ -1115,41 +1171,91 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt return find_ref_head(delayed_refs, bytenr, false); } +static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent) +{ + int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY; + + if (type < entry->type) + return -1; + if (type > entry->type) + return 1; + + if (type == BTRFS_TREE_BLOCK_REF_KEY) { + if (root < entry->ref_root) + return -1; + if (root > entry->ref_root) + return 1; + } else { + if (parent < entry->parent) + return -1; + if (parent > entry->parent) + return 1; + } + return 0; +} + +/* + * Check to see if a given root/parent reference is attached to the head. This + * only checks for BTRFS_ADD_DELAYED_REF references that match, as that + * indicates the reference exists for the given root or parent. This is for + * tree blocks only. + * + * @head: the head of the bytenr we're searching. + * @root: the root objectid of the reference if it is a normal reference. + * @parent: the parent if this is a shared backref. + */ +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent) +{ + struct rb_node *node; + bool found = false; + + lockdep_assert_held(&head->mutex); + + spin_lock(&head->lock); + node = head->ref_tree.rb_root.rb_node; + while (node) { + struct btrfs_delayed_ref_node *entry; + int ret; + + entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); + ret = find_comp(entry, root, parent); + if (ret < 0) { + node = node->rb_left; + } else if (ret > 0) { + node = node->rb_right; + } else { + /* + * We only want to count ADD actions, as drops mean the + * ref doesn't exist. + */ + if (entry->action == BTRFS_ADD_DELAYED_REF) + found = true; + break; + } + } + spin_unlock(&head->lock); + return found; +} + void __cold btrfs_delayed_ref_exit(void) { kmem_cache_destroy(btrfs_delayed_ref_head_cachep); - kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); - kmem_cache_destroy(btrfs_delayed_data_ref_cachep); + kmem_cache_destroy(btrfs_delayed_ref_node_cachep); kmem_cache_destroy(btrfs_delayed_extent_op_cachep); } int __init btrfs_delayed_ref_init(void) { - btrfs_delayed_ref_head_cachep = kmem_cache_create( - "btrfs_delayed_ref_head", - sizeof(struct btrfs_delayed_ref_head), 0, - SLAB_MEM_SPREAD, NULL); + btrfs_delayed_ref_head_cachep = KMEM_CACHE(btrfs_delayed_ref_head, 0); if (!btrfs_delayed_ref_head_cachep) goto fail; - btrfs_delayed_tree_ref_cachep = kmem_cache_create( - "btrfs_delayed_tree_ref", - sizeof(struct btrfs_delayed_tree_ref), 0, - SLAB_MEM_SPREAD, NULL); - if (!btrfs_delayed_tree_ref_cachep) - goto fail; - - btrfs_delayed_data_ref_cachep = kmem_cache_create( - "btrfs_delayed_data_ref", - sizeof(struct btrfs_delayed_data_ref), 0, - SLAB_MEM_SPREAD, NULL); - if (!btrfs_delayed_data_ref_cachep) + btrfs_delayed_ref_node_cachep = KMEM_CACHE(btrfs_delayed_ref_node, 0); + if (!btrfs_delayed_ref_node_cachep) goto fail; - btrfs_delayed_extent_op_cachep = kmem_cache_create( - "btrfs_delayed_extent_op", - sizeof(struct btrfs_delayed_extent_op), 0, - SLAB_MEM_SPREAD, NULL); + btrfs_delayed_extent_op_cachep = KMEM_CACHE(btrfs_delayed_extent_op, 0); if (!btrfs_delayed_extent_op_cachep) goto fail; |