diff options
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r-- | fs/btrfs/block-group.c | 444 |
1 files changed, 266 insertions, 178 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 4427c1b835e8..42b86e0e707e 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -34,6 +34,19 @@ int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group } #endif +static inline bool has_unwritten_metadata(struct btrfs_block_group *block_group) +{ + /* The meta_write_pointer is available only on the zoned setup. */ + if (!btrfs_is_zoned(block_group->fs_info)) + return false; + + if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) + return false; + + return block_group->start + block_group->alloc_offset > + block_group->meta_write_pointer; +} + /* * Return target flags in extended format or 0 if restripe for this chunk_type * is not in progress @@ -173,43 +186,41 @@ void btrfs_put_block_group(struct btrfs_block_group *cache) } } +static int btrfs_bg_start_cmp(const struct rb_node *new, + const struct rb_node *exist) +{ + const struct btrfs_block_group *new_bg = + rb_entry(new, struct btrfs_block_group, cache_node); + const struct btrfs_block_group *exist_bg = + rb_entry(exist, struct btrfs_block_group, cache_node); + + if (new_bg->start < exist_bg->start) + return -1; + if (new_bg->start > exist_bg->start) + return 1; + return 0; +} + /* * This adds the block group to the fs_info rb tree for the block group cache */ -static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, - struct btrfs_block_group *block_group) +static int btrfs_add_block_group_cache(struct btrfs_block_group *block_group) { - struct rb_node **p; - struct rb_node *parent = NULL; - struct btrfs_block_group *cache; - bool leftmost = true; + struct btrfs_fs_info *fs_info = block_group->fs_info; + struct rb_node *exist; + int ret = 0; ASSERT(block_group->length != 0); - write_lock(&info->block_group_cache_lock); - p = &info->block_group_cache_tree.rb_root.rb_node; - - while (*p) { - parent = *p; - cache = rb_entry(parent, struct btrfs_block_group, cache_node); - if (block_group->start < cache->start) { - p = &(*p)->rb_left; - } else if (block_group->start > cache->start) { - p = &(*p)->rb_right; - leftmost = false; - } else { - write_unlock(&info->block_group_cache_lock); - return -EEXIST; - } - } - - rb_link_node(&block_group->cache_node, parent, p); - rb_insert_color_cached(&block_group->cache_node, - &info->block_group_cache_tree, leftmost); + write_lock(&fs_info->block_group_cache_lock); - write_unlock(&info->block_group_cache_lock); + exist = rb_find_add_cached(&block_group->cache_node, + &fs_info->block_group_cache_tree, btrfs_bg_start_cmp); + if (exist) + ret = -EEXIST; + write_unlock(&fs_info->block_group_cache_lock); - return 0; + return ret; } /* @@ -527,10 +538,9 @@ int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start, *total_added_ret = 0; while (start < end) { - if (!find_first_extent_bit(&info->excluded_extents, start, - &extent_start, &extent_end, - EXTENT_DIRTY | EXTENT_UPTODATE, - NULL)) + if (!btrfs_find_first_extent_bit(&info->excluded_extents, start, + &extent_start, &extent_end, + EXTENT_DIRTY, NULL)) break; if (extent_start <= start) { @@ -586,7 +596,7 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ struct btrfs_root *extent_root; u64 search_offset; u64 search_end = block_group->start + block_group->length; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_key search_key; int ret = 0; @@ -628,7 +638,6 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ lockdep_assert_held(&caching_ctl->mutex); lockdep_assert_held_read(&fs_info->commit_root_sem); - btrfs_free_path(path); return ret; } @@ -704,7 +713,7 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) struct btrfs_block_group *block_group = caching_ctl->block_group; struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_root *extent_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct extent_buffer *leaf; struct btrfs_key key; u64 total_found = 0; @@ -740,8 +749,8 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) path->reada = READA_FORWARD; key.objectid = last; - key.offset = 0; key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = 0; next: ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); @@ -787,8 +796,8 @@ next: if (key.objectid < last) { key.objectid = last; - key.offset = 0; key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = 0; btrfs_release_path(path); goto next; } @@ -831,14 +840,13 @@ next: block_group->start + block_group->length, NULL); out: - btrfs_free_path(path); return ret; } static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg) { - clear_extent_bits(&bg->fs_info->excluded_extents, bg->start, - bg->start + bg->length - 1, EXTENT_UPTODATE); + btrfs_clear_extent_bits(&bg->fs_info->excluded_extents, bg->start, + bg->start + bg->length - 1, EXTENT_DIRTY); } static noinline void caching_thread(struct btrfs_work *work) @@ -1223,7 +1231,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); - btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info, + btrfs_space_info_update_bytes_zone_unusable(block_group->space_info, -block_group->zone_unusable); block_group->space_info->disk_total -= block_group->length * factor; @@ -1249,6 +1257,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; spin_lock(&block_group->lock); + /* + * Hitting this WARN means we removed a block group with an unwritten + * region. It will cause "unable to find chunk map for logical" errors. + */ + if (WARN_ON(has_unwritten_metadata(block_group))) + btrfs_warn(fs_info, + "block group %llu is removed before metadata write out", + block_group->start); + set_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags); /* @@ -1396,8 +1413,7 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes to readonly */ sinfo->bytes_readonly += cache->zone_unusable; - btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, - -cache->zone_unusable); + btrfs_space_info_update_bytes_zone_unusable(sinfo, -cache->zone_unusable); cache->zone_unusable = 0; } cache->ro++; @@ -1424,9 +1440,8 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans, int ret; spin_lock(&fs_info->trans_lock); - if (trans->transaction->list.prev != &fs_info->trans_list) { - prev_trans = list_last_entry(&trans->transaction->list, - struct btrfs_transaction, list); + if (!list_is_first(&trans->transaction->list, &fs_info->trans_list)) { + prev_trans = list_prev_entry(trans->transaction, list); refcount_inc(&prev_trans->use_count); } spin_unlock(&fs_info->trans_lock); @@ -1443,14 +1458,14 @@ static bool clean_pinned_extents(struct btrfs_trans_handle *trans, */ mutex_lock(&fs_info->unused_bg_unpin_mutex); if (prev_trans) { - ret = clear_extent_bits(&prev_trans->pinned_extents, start, end, - EXTENT_DIRTY); + ret = btrfs_clear_extent_bits(&prev_trans->pinned_extents, start, end, + EXTENT_DIRTY); if (ret) goto out; } - ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end, - EXTENT_DIRTY); + ret = btrfs_clear_extent_bits(&trans->transaction->pinned_extents, start, end, + EXTENT_DIRTY); out: mutex_unlock(&fs_info->unused_bg_unpin_mutex); if (prev_trans) @@ -1460,6 +1475,32 @@ out: } /* + * Link the block_group to a list via bg_list. + * + * @bg: The block_group to link to the list. + * @list: The list to link it to. + * + * Use this rather than list_add_tail() directly to ensure proper respect + * to locking and refcounting. + * + * Returns: true if the bg was linked with a refcount bump and false otherwise. + */ +static bool btrfs_link_bg_list(struct btrfs_block_group *bg, struct list_head *list) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + bool added = false; + + spin_lock(&fs_info->unused_bgs_lock); + if (list_empty(&bg->bg_list)) { + btrfs_get_block_group(bg); + list_add_tail(&bg->bg_list, list); + added = true; + } + spin_unlock(&fs_info->unused_bgs_lock); + return added; +} + +/* * Process the unused_bgs list and remove any that don't have any allocated * space inside of them. */ @@ -1567,15 +1608,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * needing to allocate extents from the block group. */ used = btrfs_space_info_used(space_info, true); - if (space_info->total_bytes - block_group->length < used && - block_group->zone_unusable < block_group->length) { + if ((space_info->total_bytes - block_group->length < used && + block_group->zone_unusable < block_group->length) || + has_unwritten_metadata(block_group)) { /* * Add a reference for the list, compensate for the ref * drop under the "next" label for the * fs_info->unused_bgs list. */ - btrfs_get_block_group(block_group); - list_add_tail(&block_group->bg_list, &retry_list); + btrfs_link_bg_list(block_group, &retry_list); trace_btrfs_skip_unused_block_group(block_group); spin_unlock(&block_group->lock); @@ -1598,8 +1639,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) ret = btrfs_zone_finish(block_group); if (ret < 0) { btrfs_dec_block_group_ro(block_group); - if (ret == -EAGAIN) + if (ret == -EAGAIN) { + btrfs_link_bg_list(block_group, &retry_list); ret = 0; + } goto next; } @@ -1645,8 +1688,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) spin_lock(&space_info->lock); spin_lock(&block_group->lock); - btrfs_space_info_update_bytes_pinned(fs_info, space_info, - -block_group->pinned); + btrfs_space_info_update_bytes_pinned(space_info, -block_group->pinned); space_info->bytes_readonly += block_group->pinned; block_group->pinned = 0; @@ -1827,7 +1869,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp); while (!list_empty(&fs_info->reclaim_bgs)) { u64 zone_unusable; - u64 reclaimed; + u64 used; + u64 reserved; int ret = 0; bg = list_first_entry(&fs_info->reclaim_bgs, @@ -1891,6 +1934,17 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) up_write(&space_info->groups_sem); goto next; } + + /* + * Cache the zone_unusable value before turning the block group + * to read only. As soon as the block group is read only it's + * zone_unusable value gets moved to the block group's read-only + * bytes and isn't available for calculations anymore. We also + * cache it before unlocking the block group, to prevent races + * (reports from KCSAN and such tools) with tasks updating it. + */ + zone_unusable = bg->zone_unusable; + spin_unlock(&bg->lock); spin_unlock(&space_info->lock); @@ -1907,31 +1961,47 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) goto next; } - /* - * Cache the zone_unusable value before turning the block group - * to read only. As soon as the blog group is read only it's - * zone_unusable value gets moved to the block group's read-only - * bytes and isn't available for calculations anymore. - */ - zone_unusable = bg->zone_unusable; ret = inc_block_group_ro(bg, 0); up_write(&space_info->groups_sem); if (ret < 0) goto next; + /* + * The amount of bytes reclaimed corresponds to the sum of the + * "used" and "reserved" counters. We have set the block group + * to RO above, which prevents reservations from happening but + * we may have existing reservations for which allocation has + * not yet been done - btrfs_update_block_group() was not yet + * called, which is where we will transfer a reserved extent's + * size from the "reserved" counter to the "used" counter - this + * happens when running delayed references. When we relocate the + * chunk below, relocation first flushes dellaloc, waits for + * ordered extent completion (which is where we create delayed + * references for data extents) and commits the current + * transaction (which runs delayed references), and only after + * it does the actual work to move extents out of the block + * group. So the reported amount of reclaimed bytes is + * effectively the sum of the 'used' and 'reserved' counters. + */ + spin_lock(&bg->lock); + used = bg->used; + reserved = bg->reserved; + spin_unlock(&bg->lock); + btrfs_info(fs_info, - "reclaiming chunk %llu with %llu%% used %llu%% unusable", + "reclaiming chunk %llu with %llu%% used %llu%% reserved %llu%% unusable", bg->start, - div64_u64(bg->used * 100, bg->length), + div64_u64(used * 100, bg->length), + div64_u64(reserved * 100, bg->length), div64_u64(zone_unusable * 100, bg->length)); trace_btrfs_reclaim_block_group(bg); - reclaimed = bg->used; ret = btrfs_relocate_chunk(fs_info, bg->start); if (ret) { btrfs_dec_block_group_ro(bg); btrfs_err(fs_info, "error relocating chunk %llu", bg->start); - reclaimed = 0; + used = 0; + reserved = 0; spin_lock(&space_info->lock); space_info->reclaim_errors++; if (READ_ONCE(space_info->periodic_reclaim)) @@ -1940,24 +2010,13 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) } spin_lock(&space_info->lock); space_info->reclaim_count++; - space_info->reclaim_bytes += reclaimed; + space_info->reclaim_bytes += used; + space_info->reclaim_bytes += reserved; spin_unlock(&space_info->lock); next: - if (ret && !READ_ONCE(space_info->periodic_reclaim)) { - /* Refcount held by the reclaim_bgs list after splice. */ - spin_lock(&fs_info->unused_bgs_lock); - /* - * This block group might be added to the unused list - * during the above process. Move it back to the - * reclaim list otherwise. - */ - if (list_empty(&bg->bg_list)) { - btrfs_get_block_group(bg); - list_add_tail(&bg->bg_list, &retry_list); - } - spin_unlock(&fs_info->unused_bgs_lock); - } + if (ret && !READ_ONCE(space_info->periodic_reclaim)) + btrfs_link_bg_list(bg, &retry_list); btrfs_put_block_group(bg); mutex_unlock(&fs_info->reclaim_bgs_lock); @@ -1997,13 +2056,8 @@ void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; - spin_lock(&fs_info->unused_bgs_lock); - if (list_empty(&bg->bg_list)) { - btrfs_get_block_group(bg); + if (btrfs_link_bg_list(bg, &fs_info->reclaim_bgs)) trace_btrfs_add_reclaim_block_group(bg); - list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs); - } - spin_unlock(&fs_info->unused_bgs_lock); } static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key *key, @@ -2186,9 +2240,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) if (cache->start < BTRFS_SUPER_INFO_OFFSET) { stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start; cache->bytes_super += stripe_len; - ret = set_extent_bit(&fs_info->excluded_extents, cache->start, - cache->start + stripe_len - 1, - EXTENT_UPTODATE, NULL); + ret = btrfs_set_extent_bit(&fs_info->excluded_extents, cache->start, + cache->start + stripe_len - 1, + EXTENT_DIRTY, NULL); if (ret) return ret; } @@ -2214,9 +2268,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache) cache->start + cache->length - logical[nr]); cache->bytes_super += len; - ret = set_extent_bit(&fs_info->excluded_extents, logical[nr], - logical[nr] + len - 1, - EXTENT_UPTODATE, NULL); + ret = btrfs_set_extent_bit(&fs_info->excluded_extents, + logical[nr], logical[nr] + len - 1, + EXTENT_DIRTY, NULL); if (ret) { kfree(logical); return ret; @@ -2341,6 +2395,7 @@ static int read_one_block_group(struct btrfs_fs_info *info, cache->commit_used = cache->used; cache->flags = btrfs_stack_block_group_flags(bgi); cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi); + cache->space_info = btrfs_find_space_info(info, cache->flags); set_free_space_tree_thresholds(cache); @@ -2414,11 +2469,12 @@ static int read_one_block_group(struct btrfs_fs_info *info, goto error; } - ret = btrfs_add_block_group_cache(info, cache); + ret = btrfs_add_block_group_cache(cache); if (ret) { btrfs_remove_free_space_cache(cache); goto error; } + trace_btrfs_add_block_group(info, cache, 0); btrfs_add_bg_to_space_info(info, cache); @@ -2463,7 +2519,8 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info) bg->cached = BTRFS_CACHE_FINISHED; bg->used = map->chunk_len; bg->flags = map->type; - ret = btrfs_add_block_group_cache(fs_info, bg); + bg->space_info = btrfs_find_space_info(fs_info, bg->flags); + ret = btrfs_add_block_group_cache(bg); /* * We may have some valid block group cache added already, in * that case we skip to the next one. @@ -2513,8 +2570,8 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) return fill_dummy_bgs(info); key.objectid = 0; - key.offset = 0; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + key.offset = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2645,7 +2702,7 @@ static int insert_dev_extent(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = device->fs_info; struct btrfs_root *root = fs_info->dev_root; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct btrfs_dev_extent *extent; struct extent_buffer *leaf; struct btrfs_key key; @@ -2662,7 +2719,7 @@ static int insert_dev_extent(struct btrfs_trans_handle *trans, key.offset = start; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent)); if (ret) - goto out; + return ret; leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); @@ -2670,11 +2727,8 @@ static int insert_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_chunk_objectid(leaf, extent, BTRFS_FIRST_CHUNK_TREE_OBJECTID); btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); - btrfs_set_dev_extent_length(leaf, extent, num_bytes); - btrfs_mark_buffer_dirty(trans, leaf); -out: - btrfs_free_path(path); + return ret; } @@ -2776,8 +2830,12 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans) /* Already aborted the transaction if it failed. */ next: btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info); + + spin_lock(&fs_info->unused_bgs_lock); list_del_init(&block_group->bg_list); clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags); + btrfs_put_block_group(block_group); + spin_unlock(&fs_info->unused_bgs_lock); /* * If the block group is still unused, add it to the list of @@ -2835,8 +2893,8 @@ static u64 calculate_global_root_id(const struct btrfs_fs_info *fs_info, u64 off } struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans, - u64 type, - u64 chunk_offset, u64 size) + struct btrfs_space_info *space_info, + u64 type, u64 chunk_offset, u64 size) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *cache; @@ -2890,10 +2948,10 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran * assigned to our block group. We want our bg to be added to the rbtree * with its ->space_info set. */ - cache->space_info = btrfs_find_space_info(fs_info, cache->flags); + cache->space_info = space_info; ASSERT(cache->space_info); - ret = btrfs_add_block_group_cache(fs_info, cache); + ret = btrfs_add_block_group_cache(cache); if (ret) { btrfs_remove_free_space_cache(cache); btrfs_put_block_group(cache); @@ -2915,7 +2973,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran } #endif - list_add_tail(&cache->bg_list, &trans->new_bgs); + btrfs_link_bg_list(cache, &trans->new_bgs); btrfs_inc_delayed_refs_rsv_bg_inserts(fs_info); set_avail_alloc_bits(fs_info, type); @@ -2935,6 +2993,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, bool do_chunk_alloc) { struct btrfs_fs_info *fs_info = cache->fs_info; + struct btrfs_space_info *space_info = cache->space_info; struct btrfs_trans_handle *trans; struct btrfs_root *root = btrfs_block_group_root(fs_info); u64 alloc_flags; @@ -2987,7 +3046,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, */ alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); if (alloc_flags != cache->flags) { - ret = btrfs_chunk_alloc(trans, alloc_flags, + ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); /* * ENOSPC is allowed here, we may have enough space @@ -3015,15 +3074,15 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM)) goto unlock_out; - alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags); - ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); + alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags); + ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); if (ret < 0) goto out; /* * We have allocated a new chunk. We also need to activate that chunk to * grant metadata tickets for zoned filesystem. */ - ret = btrfs_zoned_activate_one_bg(fs_info, cache->space_info, true); + ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true); if (ret < 0) goto out; @@ -3060,8 +3119,7 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) (cache->alloc_offset - cache->used - cache->pinned - cache->reserved) + (cache->length - cache->zone_capacity); - btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, - cache->zone_unusable); + btrfs_space_info_update_bytes_zone_unusable(sinfo, cache->zone_unusable); sinfo->bytes_readonly -= cache->zone_unusable; } num_bytes = cache->length - cache->reserved - @@ -3123,7 +3181,6 @@ static int update_block_group_item(struct btrfs_trans_handle *trans, cache->global_root_id); btrfs_set_stack_block_group_flags(&bgi, cache->flags); write_extent_buffer(leaf, &bgi, bi, sizeof(bgi)); - btrfs_mark_buffer_dirty(trans, leaf); fail: btrfs_release_path(path); /* @@ -3313,7 +3370,7 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans) struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_block_group *cache, *tmp; struct btrfs_transaction *cur_trans = trans->transaction; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); if (list_empty(&cur_trans->dirty_bgs) || !btrfs_test_opt(fs_info, SPACE_CACHE)) @@ -3330,7 +3387,6 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans) cache_save_setup(cache, trans, path); } - btrfs_free_path(path); return 0; } @@ -3353,7 +3409,7 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) struct btrfs_transaction *cur_trans = trans->transaction; int ret = 0; int should_put; - struct btrfs_path *path = NULL; + BTRFS_PATH_AUTO_FREE(path); LIST_HEAD(dirty); struct list_head *io = &cur_trans->io_bgs; int loops = 0; @@ -3508,7 +3564,6 @@ out: btrfs_cleanup_dirty_bgs(cur_trans, fs_info); } - btrfs_free_path(path); return ret; } @@ -3519,7 +3574,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) struct btrfs_transaction *cur_trans = trans->transaction; int ret = 0; int should_put; - struct btrfs_path *path; + BTRFS_PATH_AUTO_FREE(path); struct list_head *io = &cur_trans->io_bgs; path = btrfs_alloc_path(); @@ -3631,7 +3686,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) btrfs_put_block_group(cache); } - btrfs_free_path(path); return ret; } @@ -3699,7 +3753,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, old_val -= num_bytes; cache->used = old_val; cache->pinned += num_bytes; - btrfs_space_info_update_bytes_pinned(info, space_info, num_bytes); + btrfs_space_info_update_bytes_pinned(space_info, num_bytes); space_info->bytes_used -= num_bytes; space_info->disk_used -= num_bytes * factor; if (READ_ONCE(space_info->periodic_reclaim)) @@ -3710,8 +3764,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cache->lock); spin_unlock(&space_info->lock); - set_extent_bit(&trans->transaction->pinned_extents, bytenr, - bytenr + num_bytes - 1, EXTENT_DIRTY, NULL); + btrfs_set_extent_bit(&trans->transaction->pinned_extents, bytenr, + bytenr + num_bytes - 1, EXTENT_DIRTY, NULL); } spin_lock(&trans->transaction->dirty_bgs_lock); @@ -3781,8 +3835,7 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group *cache, space_info->bytes_reserved += num_bytes; trace_btrfs_space_reservation(cache->fs_info, "space_info", space_info->flags, num_bytes, 1); - btrfs_space_info_update_bytes_may_use(cache->fs_info, - space_info, -ram_bytes); + btrfs_space_info_update_bytes_may_use(space_info, -ram_bytes); if (delalloc) cache->delalloc_bytes += num_bytes; @@ -3801,17 +3854,17 @@ out: /* * Update the block_group and space info counters. * - * @cache: The cache we are manipulating - * @num_bytes: The number of bytes in question - * @delalloc: The blocks are allocated for the delalloc write + * @cache: The cache we are manipulating. + * @num_bytes: The number of bytes in question. + * @is_delalloc: Whether the blocks are allocated for a delalloc write. * * This is called by somebody who is freeing space that was never actually used * on disk. For example if you reserve some space for a new leaf in transaction * A and before transaction A commits you free that leaf, you call this with * reserve set to 0 in order to clear the reservation. */ -void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, - u64 num_bytes, int delalloc) +void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, u64 num_bytes, + bool is_delalloc) { struct btrfs_space_info *space_info = cache->space_info; @@ -3825,7 +3878,7 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache, space_info->bytes_reserved -= num_bytes; space_info->max_extent_size = 0; - if (delalloc) + if (is_delalloc) cache->delalloc_bytes -= num_bytes; spin_unlock(&cache->lock); @@ -3844,14 +3897,14 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) } } -static int should_alloc_chunk(const struct btrfs_fs_info *fs_info, - const struct btrfs_space_info *sinfo, int force) +static bool should_alloc_chunk(const struct btrfs_fs_info *fs_info, + const struct btrfs_space_info *sinfo, int force) { u64 bytes_used = btrfs_space_info_used(sinfo, false); u64 thresh; if (force == CHUNK_ALLOC_FORCE) - return 1; + return true; /* * in limited mode, we want to have some free space up to @@ -3862,22 +3915,31 @@ static int should_alloc_chunk(const struct btrfs_fs_info *fs_info, thresh = max_t(u64, SZ_64M, mult_perc(thresh, 1)); if (sinfo->total_bytes - bytes_used < thresh) - return 1; + return true; } if (bytes_used + SZ_2M < mult_perc(sinfo->total_bytes, 80)) - return 0; - return 1; + return false; + return true; } int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type) { u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type); + struct btrfs_space_info *space_info; - return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); + space_info = btrfs_find_space_info(trans->fs_info, type); + if (!space_info) { + DEBUG_WARN(); + return -EINVAL; + } + + return btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); } -static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags) +static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_space_info *space_info, + u64 flags) { struct btrfs_block_group *bg; int ret; @@ -3890,7 +3952,7 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans */ check_system_chunk(trans, flags); - bg = btrfs_create_chunk(trans, flags); + bg = btrfs_create_chunk(trans, space_info, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); goto out; @@ -3938,8 +4000,16 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans if (ret == -ENOSPC) { const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info); struct btrfs_block_group *sys_bg; + struct btrfs_space_info *sys_space_info; - sys_bg = btrfs_create_chunk(trans, sys_flags); + sys_space_info = btrfs_find_space_info(trans->fs_info, sys_flags); + if (!sys_space_info) { + ret = -EINVAL; + btrfs_abort_transaction(trans, ret); + goto out; + } + + sys_bg = btrfs_create_chunk(trans, sys_space_info, sys_flags); if (IS_ERR(sys_bg)) { ret = PTR_ERR(sys_bg); btrfs_abort_transaction(trans, ret); @@ -4070,6 +4140,8 @@ out: * * This function, btrfs_chunk_alloc(), belongs to phase 1. * + * @space_info: specify which space_info the new chunk should belong to. + * * If @force is CHUNK_ALLOC_FORCE: * - return 1 if it successfully allocates a chunk, * - return errors including -ENOSPC otherwise. @@ -4078,11 +4150,11 @@ out: * - return 1 if it successfully allocates a chunk, * - return errors including -ENOSPC otherwise. */ -int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, +int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, + struct btrfs_space_info *space_info, u64 flags, enum btrfs_chunk_alloc_enum force) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct btrfs_space_info *space_info; struct btrfs_block_group *ret_bg; bool wait_for_alloc = false; bool should_alloc = false; @@ -4121,9 +4193,6 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, if (flags & BTRFS_BLOCK_GROUP_SYSTEM) return -ENOSPC; - space_info = btrfs_find_space_info(fs_info, flags); - ASSERT(space_info); - do { spin_lock(&space_info->lock); if (force < space_info->force_alloc) @@ -4184,7 +4253,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, force_metadata_allocation(fs_info); } - ret_bg = do_chunk_alloc(trans, flags); + ret_bg = do_chunk_alloc(trans, space_info, flags); trans->allocating_chunk = false; if (IS_ERR(ret_bg)) { @@ -4260,6 +4329,10 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans, if (left < bytes) { u64 flags = btrfs_system_alloc_profile(fs_info); struct btrfs_block_group *bg; + struct btrfs_space_info *space_info; + + space_info = btrfs_find_space_info(fs_info, flags); + ASSERT(space_info); /* * Ignore failure to create system chunk. We might end up not @@ -4267,7 +4340,7 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans, * the paths we visit in the chunk tree (they were already COWed * or created in the current transaction for example). */ - bg = btrfs_create_chunk(trans, flags); + bg = btrfs_create_chunk(trans, space_info, flags); if (IS_ERR(bg)) { ret = PTR_ERR(bg); } else { @@ -4375,6 +4448,43 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) } } +static void check_removing_space_info(struct btrfs_space_info *space_info) +{ + struct btrfs_fs_info *info = space_info->fs_info; + + if (space_info->subgroup_id == BTRFS_SUB_GROUP_PRIMARY) { + /* This is a top space_info, proceed with its children first. */ + for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) { + if (space_info->sub_group[i]) { + check_removing_space_info(space_info->sub_group[i]); + kfree(space_info->sub_group[i]); + space_info->sub_group[i] = NULL; + } + } + } + + /* + * Do not hide this behind enospc_debug, this is actually important and + * indicates a real bug if this happens. + */ + if (WARN_ON(space_info->bytes_pinned > 0 || space_info->bytes_may_use > 0)) + btrfs_dump_space_info(info, space_info, 0, 0); + + /* + * If there was a failure to cleanup a log tree, very likely due to an + * IO failure on a writeback attempt of one or more of its extent + * buffers, we could not do proper (and cheap) unaccounting of their + * reserved space, so don't warn on bytes_reserved > 0 in that case. + */ + if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) || + !BTRFS_FS_LOG_CLEANUP_ERROR(info)) { + if (WARN_ON(space_info->bytes_reserved > 0)) + btrfs_dump_space_info(info, space_info, 0, 0); + } + + WARN_ON(space_info->reclaim_size > 0); +} + /* * Must be called only after stopping all workers, since we could have block * group caching kthreads running, and therefore they could race with us if we @@ -4400,8 +4510,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) write_lock(&info->block_group_cache_lock); while (!list_empty(&info->caching_block_groups)) { - caching_ctl = list_entry(info->caching_block_groups.next, - struct btrfs_caching_control, list); + caching_ctl = list_first_entry(&info->caching_block_groups, + struct btrfs_caching_control, list); list_del(&caching_ctl->list); btrfs_put_caching_control(caching_ctl); } @@ -4472,32 +4582,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) btrfs_release_global_block_rsv(info); while (!list_empty(&info->space_info)) { - space_info = list_entry(info->space_info.next, - struct btrfs_space_info, - list); - - /* - * Do not hide this behind enospc_debug, this is actually - * important and indicates a real bug if this happens. - */ - if (WARN_ON(space_info->bytes_pinned > 0 || - space_info->bytes_may_use > 0)) - btrfs_dump_space_info(info, space_info, 0, 0); - - /* - * If there was a failure to cleanup a log tree, very likely due - * to an IO failure on a writeback attempt of one or more of its - * extent buffers, we could not do proper (and cheap) unaccounting - * of their reserved space, so don't warn on bytes_reserved > 0 in - * that case. - */ - if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) || - !BTRFS_FS_LOG_CLEANUP_ERROR(info)) { - if (WARN_ON(space_info->bytes_reserved > 0)) - btrfs_dump_space_info(info, space_info, 0, 0); - } + space_info = list_first_entry(&info->space_info, + struct btrfs_space_info, list); - WARN_ON(space_info->reclaim_size > 0); + check_removing_space_info(space_info); list_del(&space_info->list); btrfs_sysfs_remove_space_info(space_info); } |