summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/block-group.c105
-rw-r--r--fs/btrfs/block-group.h20
-rw-r--r--fs/btrfs/extent-tree.c71
-rw-r--r--fs/btrfs/extent-tree.h3
-rw-r--r--include/trace/events/btrfs.h9
5 files changed, 155 insertions, 53 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index e90800388a41..6557b1b7f89a 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/sizes.h>
#include <linux/list_sort.h>
#include "misc.h"
#include "ctree.h"
@@ -3379,6 +3380,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
cache->space_info->disk_used -= num_bytes * factor;
reclaim = should_reclaim_block_group(cache, num_bytes);
+
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -3433,32 +3435,42 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* reservation and return -EAGAIN, otherwise this function always succeeds.
*/
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
- u64 ram_bytes, u64 num_bytes, int delalloc)
+ u64 ram_bytes, u64 num_bytes, int delalloc,
+ bool force_wrong_size_class)
{
struct btrfs_space_info *space_info = cache->space_info;
+ enum btrfs_block_group_size_class size_class;
int ret = 0;
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
if (cache->ro) {
ret = -EAGAIN;
- } else {
- cache->reserved += num_bytes;
- space_info->bytes_reserved += num_bytes;
- trace_btrfs_space_reservation(cache->fs_info, "space_info",
- space_info->flags, num_bytes, 1);
- btrfs_space_info_update_bytes_may_use(cache->fs_info,
- space_info, -ram_bytes);
- if (delalloc)
- cache->delalloc_bytes += num_bytes;
+ goto out;
+ }
- /*
- * Compression can use less space than we reserved, so wake
- * tickets if that happens
- */
- if (num_bytes < ram_bytes)
- btrfs_try_granting_tickets(cache->fs_info, space_info);
+ if (btrfs_is_block_group_data_only(cache)) {
+ size_class = btrfs_calc_block_group_size_class(num_bytes);
+ ret = btrfs_use_block_group_size_class(cache, size_class, force_wrong_size_class);
+ if (ret)
+ goto out;
}
+ cache->reserved += num_bytes;
+ space_info->bytes_reserved += num_bytes;
+ trace_btrfs_space_reservation(cache->fs_info, "space_info",
+ space_info->flags, num_bytes, 1);
+ btrfs_space_info_update_bytes_may_use(cache->fs_info,
+ space_info, -ram_bytes);
+ if (delalloc)
+ cache->delalloc_bytes += num_bytes;
+
+ /*
+ * Compression can use less space than we reserved, so wake tickets if
+ * that happens.
+ */
+ if (num_bytes < ram_bytes)
+ btrfs_try_granting_tickets(cache->fs_info, space_info);
+out:
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
return ret;
@@ -4218,3 +4230,64 @@ void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount
bg->swap_extents -= amount;
spin_unlock(&bg->lock);
}
+
+enum btrfs_block_group_size_class btrfs_calc_block_group_size_class(u64 size)
+{
+ if (size <= SZ_128K)
+ return BTRFS_BG_SZ_SMALL;
+ if (size <= SZ_8M)
+ return BTRFS_BG_SZ_MEDIUM;
+ return BTRFS_BG_SZ_LARGE;
+}
+
+/*
+ * Handle a block group allocating an extent in a size class
+ *
+ * @bg: The block group we allocated in.
+ * @size_class: The size class of the allocation.
+ * @force_wrong_size_class: Whether we are desperate enough to allow
+ * mismatched size classes.
+ *
+ * Returns: 0 if the size class was valid for this block_group, -EAGAIN in the
+ * case of a race that leads to the wrong size class without
+ * force_wrong_size_class set.
+ *
+ * find_free_extent will skip block groups with a mismatched size class until
+ * it really needs to avoid ENOSPC. In that case it will set
+ * force_wrong_size_class. However, if a block group is newly allocated and
+ * doesn't yet have a size class, then it is possible for two allocations of
+ * different sizes to race and both try to use it. The loser is caught here and
+ * has to retry.
+ */
+int btrfs_use_block_group_size_class(struct btrfs_block_group *bg,
+ enum btrfs_block_group_size_class size_class,
+ bool force_wrong_size_class)
+{
+ ASSERT(size_class != BTRFS_BG_SZ_NONE);
+
+ /* The new allocation is in the right size class, do nothing */
+ if (bg->size_class == size_class)
+ return 0;
+ /*
+ * The new allocation is in a mismatched size class.
+ * This means one of two things:
+ *
+ * 1. Two tasks in find_free_extent for different size_classes raced
+ * and hit the same empty block_group. Make the loser try again.
+ * 2. A call to find_free_extent got desperate enough to set
+ * 'force_wrong_slab'. Don't change the size_class, but allow the
+ * allocation.
+ */
+ if (bg->size_class != BTRFS_BG_SZ_NONE) {
+ if (force_wrong_size_class)
+ return 0;
+ return -EAGAIN;
+ }
+ /*
+ * The happy new block group case: the new allocation is the first
+ * one in the block_group so we set size_class.
+ */
+ bg->size_class = size_class;
+
+ return 0;
+}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index a02ea76fd6cf..2b9d328b589e 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -12,6 +12,17 @@ enum btrfs_disk_cache_state {
BTRFS_DC_SETUP,
};
+enum btrfs_block_group_size_class {
+ /* Unset */
+ BTRFS_BG_SZ_NONE,
+ /* 0 < size <= 128K */
+ BTRFS_BG_SZ_SMALL,
+ /* 128K < size <= 8M */
+ BTRFS_BG_SZ_MEDIUM,
+ /* 8M < size < BG_LENGTH */
+ BTRFS_BG_SZ_LARGE,
+};
+
/*
* This describes the state of the block_group for async discard. This is due
* to the two pass nature of it where extent discarding is prioritized over
@@ -233,6 +244,7 @@ struct btrfs_block_group {
struct list_head active_bg_list;
struct work_struct zone_finish_work;
struct extent_buffer *last_eb;
+ enum btrfs_block_group_size_class size_class;
};
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
@@ -302,7 +314,8 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, bool alloc);
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
- u64 ram_bytes, u64 num_bytes, int delalloc);
+ u64 ram_bytes, u64 num_bytes, int delalloc,
+ bool force_wrong_size_class);
void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
u64 num_bytes, int delalloc);
int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
@@ -346,4 +359,9 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg);
void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount);
+enum btrfs_block_group_size_class btrfs_calc_block_group_size_class(u64 size);
+int btrfs_use_block_group_size_class(struct btrfs_block_group *bg,
+ enum btrfs_block_group_size_class size_class,
+ bool force_wrong_size_class);
+
#endif /* BTRFS_BLOCK_GROUP_H */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 203c8cbb9983..e9be9430faa0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3385,7 +3385,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
enum btrfs_loop_type {
LOOP_CACHING_NOWAIT,
LOOP_CACHING_WAIT,
+ LOOP_UNSET_SIZE_CLASS,
LOOP_ALLOC_CHUNK,
+ LOOP_WRONG_SIZE_CLASS,
LOOP_NO_EMPTY_SIZE,
};
@@ -3950,24 +3952,6 @@ static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
}
}
-static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl)
-{
- switch (ffe_ctl->policy) {
- case BTRFS_EXTENT_ALLOC_CLUSTERED:
- /*
- * If we can't allocate a new chunk we've already looped through
- * at least once, move on to the NO_EMPTY_SIZE case.
- */
- ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
- return 0;
- case BTRFS_EXTENT_ALLOC_ZONED:
- /* Give up here */
- return -ENOSPC;
- default:
- BUG();
- }
-}
-
/*
* Return >0 means caller needs to re-search for free extent
* Return 0 means we have the needed free extent.
@@ -4001,31 +3985,28 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
* caching kthreads as we move along
* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
+ * LOOP_UNSET_SIZE_CLASS, allow unset size class
* LOOP_ALLOC_CHUNK, force a chunk allocation and try again
* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
* again
*/
if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
ffe_ctl->index = 0;
- if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) {
- /*
- * We want to skip the LOOP_CACHING_WAIT step if we
- * don't have any uncached bgs and we've already done a
- * full search through.
- */
- if (ffe_ctl->orig_have_caching_bg || !full_search)
- ffe_ctl->loop = LOOP_CACHING_WAIT;
- else
- ffe_ctl->loop = LOOP_ALLOC_CHUNK;
- } else {
+ /*
+ * We want to skip the LOOP_CACHING_WAIT step if we don't have
+ * any uncached bgs and we've already done a full search
+ * through.
+ */
+ if (ffe_ctl->loop == LOOP_CACHING_NOWAIT &&
+ (!ffe_ctl->orig_have_caching_bg && full_search))
ffe_ctl->loop++;
- }
+ ffe_ctl->loop++;
if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
struct btrfs_trans_handle *trans;
int exist = 0;
- /*Check if allocation policy allows to create a new chunk */
+ /* Check if allocation policy allows to create a new chunk */
ret = can_allocate_chunk(fs_info, ffe_ctl);
if (ret)
return ret;
@@ -4045,8 +4026,10 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
CHUNK_ALLOC_FORCE_FOR_EXTENT);
/* Do not bail out on ENOSPC since we can do more. */
- if (ret == -ENOSPC)
- ret = chunk_allocation_failed(ffe_ctl);
+ if (ret == -ENOSPC) {
+ ret = 0;
+ ffe_ctl->loop++;
+ }
else if (ret < 0)
btrfs_abort_transaction(trans, ret);
else
@@ -4076,6 +4059,21 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
return -ENOSPC;
}
+static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
+ struct btrfs_block_group *bg)
+{
+ if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
+ return true;
+ if (!btrfs_is_block_group_data_only(bg))
+ return true;
+ if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
+ return true;
+ if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
+ bg->size_class == BTRFS_BG_SZ_NONE)
+ return true;
+ return ffe_ctl->size_class == bg->size_class;
+}
+
static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl,
struct btrfs_space_info *space_info,
@@ -4210,6 +4208,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
ffe_ctl->total_free_space = 0;
ffe_ctl->found_offset = 0;
ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
+ ffe_ctl->size_class = btrfs_calc_block_group_size_class(ffe_ctl->num_bytes);
if (btrfs_is_zoned(fs_info))
ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
@@ -4346,6 +4345,9 @@ have_block_group:
if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
goto loop;
+ if (!find_free_extent_check_size_class(ffe_ctl, block_group))
+ goto loop;
+
bg_ret = NULL;
ret = do_allocation(block_group, ffe_ctl, &bg_ret);
if (ret == 0) {
@@ -4380,7 +4382,8 @@ have_block_group:
ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
ffe_ctl->num_bytes,
- ffe_ctl->delalloc);
+ ffe_ctl->delalloc,
+ ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS);
if (ret == -EAGAIN) {
btrfs_add_free_space_unused(block_group,
ffe_ctl->found_offset,
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index daa5e3505886..0c958fc1b3b8 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -79,6 +79,9 @@ struct find_free_extent_ctl {
/* Whether or not the allocator is currently following a hint */
bool hinted;
+
+ /* Size class of block groups to prefer in early loops */
+ enum btrfs_block_group_size_class size_class;
};
enum btrfs_inline_ref_type {
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index cc2eaab28d58..75d7d22c3a27 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1349,28 +1349,33 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
TP_STRUCT__entry_btrfs(
__field( u64, bg_objectid )
__field( u64, flags )
+ __field( int, bg_size_class )
__field( u64, start )
__field( u64, len )
__field( u64, loop )
__field( bool, hinted )
+ __field( int, size_class )
),
TP_fast_assign_btrfs(block_group->fs_info,
__entry->bg_objectid = block_group->start;
__entry->flags = block_group->flags;
+ __entry->bg_size_class = block_group->size_class;
__entry->start = ffe_ctl->search_start;
__entry->len = ffe_ctl->num_bytes;
__entry->loop = ffe_ctl->loop;
__entry->hinted = ffe_ctl->hinted;
+ __entry->size_class = ffe_ctl->size_class;
),
TP_printk_btrfs(
-"root=%llu(%s) block_group=%llu flags=%llu(%s) start=%llu len=%llu loop=%llu hinted=%d",
+"root=%llu(%s) block_group=%llu flags=%llu(%s) bg_size_class=%d start=%llu len=%llu loop=%llu hinted=%d size_class=%d",
show_root_type(BTRFS_EXTENT_TREE_OBJECTID),
__entry->bg_objectid,
__entry->flags, __print_flags((unsigned long)__entry->flags,
"|", BTRFS_GROUP_FLAGS),
- __entry->start, __entry->len, __entry->loop, __entry->hinted)
+ __entry->bg_size_class, __entry->start, __entry->len,
+ __entry->loop, __entry->hinted, __entry->size_class)
);
DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent,