summaryrefslogtreecommitdiff
path: root/fs/btrfs/block-group.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/block-group.c')
-rw-r--r--fs/btrfs/block-group.c107
1 files changed, 74 insertions, 33 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 4423d8b716a5..83a196521670 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -34,6 +34,19 @@ int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group
}
#endif
+static inline bool has_unwritten_metadata(struct btrfs_block_group *block_group)
+{
+ /* The meta_write_pointer is available only on the zoned setup. */
+ if (!btrfs_is_zoned(block_group->fs_info))
+ return false;
+
+ if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
+ return false;
+
+ return block_group->start + block_group->alloc_offset >
+ block_group->meta_write_pointer;
+}
+
/*
* Return target flags in extended format or 0 if restripe for this chunk_type
* is not in progress
@@ -1249,6 +1262,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
goto out;
spin_lock(&block_group->lock);
+ /*
+ * Hitting this WARN means we removed a block group with an unwritten
+ * region. It will cause "unable to find chunk map for logical" errors.
+ */
+ if (WARN_ON(has_unwritten_metadata(block_group)))
+ btrfs_warn(fs_info,
+ "block group %llu is removed before metadata write out",
+ block_group->start);
+
set_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags);
/*
@@ -1460,6 +1482,32 @@ out:
}
/*
+ * Link the block_group to a list via bg_list.
+ *
+ * @bg: The block_group to link to the list.
+ * @list: The list to link it to.
+ *
+ * Use this rather than list_add_tail() directly to ensure proper respect
+ * to locking and refcounting.
+ *
+ * Returns: true if the bg was linked with a refcount bump and false otherwise.
+ */
+static bool btrfs_link_bg_list(struct btrfs_block_group *bg, struct list_head *list)
+{
+ struct btrfs_fs_info *fs_info = bg->fs_info;
+ bool added = false;
+
+ spin_lock(&fs_info->unused_bgs_lock);
+ if (list_empty(&bg->bg_list)) {
+ btrfs_get_block_group(bg);
+ list_add_tail(&bg->bg_list, list);
+ added = true;
+ }
+ spin_unlock(&fs_info->unused_bgs_lock);
+ return added;
+}
+
+/*
* Process the unused_bgs list and remove any that don't have any allocated
* space inside of them.
*/
@@ -1567,15 +1615,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* needing to allocate extents from the block group.
*/
used = btrfs_space_info_used(space_info, true);
- if (space_info->total_bytes - block_group->length < used &&
- block_group->zone_unusable < block_group->length) {
+ if ((space_info->total_bytes - block_group->length < used &&
+ block_group->zone_unusable < block_group->length) ||
+ has_unwritten_metadata(block_group)) {
/*
* Add a reference for the list, compensate for the ref
* drop under the "next" label for the
* fs_info->unused_bgs list.
*/
- btrfs_get_block_group(block_group);
- list_add_tail(&block_group->bg_list, &retry_list);
+ btrfs_link_bg_list(block_group, &retry_list);
trace_btrfs_skip_unused_block_group(block_group);
spin_unlock(&block_group->lock);
@@ -1598,8 +1646,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
ret = btrfs_zone_finish(block_group);
if (ret < 0) {
btrfs_dec_block_group_ro(block_group);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
+ btrfs_link_bg_list(block_group, &retry_list);
ret = 0;
+ }
goto next;
}
@@ -1891,6 +1941,17 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
up_write(&space_info->groups_sem);
goto next;
}
+
+ /*
+ * Cache the zone_unusable value before turning the block group
+ * to read only. As soon as the block group is read only it's
+ * zone_unusable value gets moved to the block group's read-only
+ * bytes and isn't available for calculations anymore. We also
+ * cache it before unlocking the block group, to prevent races
+ * (reports from KCSAN and such tools) with tasks updating it.
+ */
+ zone_unusable = bg->zone_unusable;
+
spin_unlock(&bg->lock);
spin_unlock(&space_info->lock);
@@ -1907,13 +1968,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
goto next;
}
- /*
- * Cache the zone_unusable value before turning the block group
- * to read only. As soon as the blog group is read only it's
- * zone_unusable value gets moved to the block group's read-only
- * bytes and isn't available for calculations anymore.
- */
- zone_unusable = bg->zone_unusable;
ret = inc_block_group_ro(bg, 0);
up_write(&space_info->groups_sem);
if (ret < 0)
@@ -1944,20 +1998,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
spin_unlock(&space_info->lock);
next:
- if (ret && !READ_ONCE(space_info->periodic_reclaim)) {
- /* Refcount held by the reclaim_bgs list after splice. */
- spin_lock(&fs_info->unused_bgs_lock);
- /*
- * This block group might be added to the unused list
- * during the above process. Move it back to the
- * reclaim list otherwise.
- */
- if (list_empty(&bg->bg_list)) {
- btrfs_get_block_group(bg);
- list_add_tail(&bg->bg_list, &retry_list);
- }
- spin_unlock(&fs_info->unused_bgs_lock);
- }
+ if (ret && !READ_ONCE(space_info->periodic_reclaim))
+ btrfs_link_bg_list(bg, &retry_list);
btrfs_put_block_group(bg);
mutex_unlock(&fs_info->reclaim_bgs_lock);
@@ -1997,13 +2039,8 @@ void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
- spin_lock(&fs_info->unused_bgs_lock);
- if (list_empty(&bg->bg_list)) {
- btrfs_get_block_group(bg);
+ if (btrfs_link_bg_list(bg, &fs_info->reclaim_bgs))
trace_btrfs_add_reclaim_block_group(bg);
- list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs);
- }
- spin_unlock(&fs_info->unused_bgs_lock);
}
static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key *key,
@@ -2776,8 +2813,12 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
/* Already aborted the transaction if it failed. */
next:
btrfs_dec_delayed_refs_rsv_bg_inserts(fs_info);
+
+ spin_lock(&fs_info->unused_bgs_lock);
list_del_init(&block_group->bg_list);
clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
+ btrfs_put_block_group(block_group);
+ spin_unlock(&fs_info->unused_bgs_lock);
/*
* If the block group is still unused, add it to the list of
@@ -2915,7 +2956,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
}
#endif
- list_add_tail(&cache->bg_list, &trans->new_bgs);
+ btrfs_link_bg_list(cache, &trans->new_bgs);
btrfs_inc_delayed_refs_rsv_bg_inserts(fs_info);
set_avail_alloc_bits(fs_info, type);