diff options
author | Naohiro Aota <naohiro.aota@wdc.com> | 2021-02-04 13:22:18 +0300 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2021-02-09 04:46:08 +0300 |
commit | 40ab3be102f0a61dbb93093f330b432324a793f1 (patch) | |
tree | b7b92c35cdf2845fa4da3febca37f0b288d0e359 /fs/btrfs/extent-tree.c | |
parent | 6ab6ebb76042d3d94a7c6c447f770a28a412c68c (diff) | |
download | linux-40ab3be102f0a61dbb93093f330b432324a793f1.tar.xz |
btrfs: zoned: extend zoned allocator to use dedicated tree-log block group
This is the 1/3 patch to enable tree log on zoned filesystems.
The tree-log feature does not work on a zoned filesystem as is. Blocks for
a tree-log tree are allocated mixed with other metadata blocks and btrfs
writes and syncs the tree-log blocks to devices at the time of fsync(),
which has a different timing than a global transaction commit. As a
result, both writing tree-log blocks and writing other metadata blocks
become non-sequential writes that zoned filesystems must avoid.
Introduce a dedicated block group for tree-log blocks, so that tree-log
blocks and other metadata blocks can be separate write streams. As a
result, each write stream can now be written to devices separately.
"fs_info->treelog_bg" tracks the dedicated block group and assigns
"treelog_bg" on-demand on tree-log block allocation time.
This commit extends the zoned block allocator to use the block group.
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 75 |
1 files changed, 71 insertions, 4 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd308bf3a220..78ad31a59e59 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3497,6 +3497,9 @@ struct find_free_extent_ctl { bool have_caching_bg; bool orig_have_caching_bg; + /* Allocation is called for tree-log */ + bool for_treelog; + /* RAID index, converted from flags */ int index; @@ -3726,6 +3729,22 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group, } /* + * Tree-log block group locking + * ============================ + * + * fs_info::treelog_bg_lock protects the fs_info::treelog_bg which + * indicates the starting address of a block group, which is reserved only + * for tree-log metadata. + * + * Lock nesting + * ============ + * + * space_info::lock + * block_group::lock + * fs_info::treelog_bg_lock + */ + +/* * Simple allocator for sequential-only block group. It only allows sequential * allocation. No need to play with trees. This function also reserves the * bytes as in btrfs_add_reserved_bytes. @@ -3734,23 +3753,54 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, struct find_free_extent_ctl *ffe_ctl, struct btrfs_block_group **bg_ret) { + struct btrfs_fs_info *fs_info = block_group->fs_info; struct btrfs_space_info *space_info = block_group->space_info; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; u64 start = block_group->start; u64 num_bytes = ffe_ctl->num_bytes; u64 avail; + u64 bytenr = block_group->start; + u64 log_bytenr; int ret = 0; + bool skip; ASSERT(btrfs_is_zoned(block_group->fs_info)); + /* + * Do not allow non-tree-log blocks in the dedicated tree-log block + * group, and vice versa. + */ + spin_lock(&fs_info->treelog_bg_lock); + log_bytenr = fs_info->treelog_bg; + skip = log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) || + (!ffe_ctl->for_treelog && bytenr == log_bytenr)); + spin_unlock(&fs_info->treelog_bg_lock); + if (skip) + return 1; + spin_lock(&space_info->lock); spin_lock(&block_group->lock); + spin_lock(&fs_info->treelog_bg_lock); + + ASSERT(!ffe_ctl->for_treelog || + block_group->start == fs_info->treelog_bg || + fs_info->treelog_bg == 0); if (block_group->ro) { ret = 1; goto out; } + /* + * Do not allow currently using block group to be tree-log dedicated + * block group. + */ + if (ffe_ctl->for_treelog && !fs_info->treelog_bg && + (block_group->used || block_group->reserved)) { + ret = 1; + goto out; + } + avail = block_group->length - block_group->alloc_offset; if (avail < num_bytes) { if (ffe_ctl->max_extent_size < avail) { @@ -3765,6 +3815,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, goto out; } + if (ffe_ctl->for_treelog && !fs_info->treelog_bg) + fs_info->treelog_bg = block_group->start; + ffe_ctl->found_offset = start + block_group->alloc_offset; block_group->alloc_offset += num_bytes; spin_lock(&ctl->tree_lock); @@ -3779,6 +3832,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, ffe_ctl->search_start = ffe_ctl->found_offset; out: + if (ret && ffe_ctl->for_treelog) + fs_info->treelog_bg = 0; + spin_unlock(&fs_info->treelog_bg_lock); spin_unlock(&block_group->lock); spin_unlock(&space_info->lock); return ret; @@ -4028,7 +4084,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info, return prepare_allocation_clustered(fs_info, ffe_ctl, space_info, ins); case BTRFS_EXTENT_ALLOC_ZONED: - /* Nothing to do */ + if (ffe_ctl->for_treelog) { + spin_lock(&fs_info->treelog_bg_lock); + if (fs_info->treelog_bg) + ffe_ctl->hint_byte = fs_info->treelog_bg; + spin_unlock(&fs_info->treelog_bg_lock); + } return 0; default: BUG(); @@ -4072,6 +4133,7 @@ static noinline int find_free_extent(struct btrfs_root *root, struct find_free_extent_ctl ffe_ctl = {0}; struct btrfs_space_info *space_info; bool full_search = false; + bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); WARN_ON(num_bytes < fs_info->sectorsize); @@ -4085,6 +4147,7 @@ static noinline int find_free_extent(struct btrfs_root *root, ffe_ctl.orig_have_caching_bg = false; ffe_ctl.found_offset = 0; ffe_ctl.hint_byte = hint_byte_orig; + ffe_ctl.for_treelog = for_treelog; ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED; /* For clustered allocation */ @@ -4159,8 +4222,11 @@ search: struct btrfs_block_group *bg_ret; /* If the block group is read-only, we can skip it entirely. */ - if (unlikely(block_group->ro)) + if (unlikely(block_group->ro)) { + if (for_treelog) + btrfs_clear_treelog_bg(block_group); continue; + } btrfs_grab_block_group(block_group, delalloc); ffe_ctl.search_start = block_group->start; @@ -4346,6 +4412,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, bool final_tried = num_bytes == min_alloc_size; u64 flags; int ret; + bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); flags = get_alloc_profile_by_root(root, is_data); again: @@ -4369,8 +4436,8 @@ again: sinfo = btrfs_find_space_info(fs_info, flags); btrfs_err(fs_info, - "allocation failed flags %llu, wanted %llu", - flags, num_bytes); + "allocation failed flags %llu, wanted %llu tree-log %d", + flags, num_bytes, for_treelog); if (sinfo) btrfs_dump_space_info(fs_info, sinfo, num_bytes, 1); |