summaryrefslogtreecommitdiff
path: root/fs/btrfs/zoned.c
diff options
context:
space:
mode:
authorNaohiro Aota <naohiro.aota@wdc.com>2023-07-21 10:42:14 +0300
committerDavid Sterba <dsterba@suse.com>2023-08-21 15:54:47 +0300
commit332581bde2a419d5f12a93a1cdc2856af649a3cc (patch)
treec51f576cfcb66508650d71525e803892c5da5891 /fs/btrfs/zoned.c
parente7f1326cc24e22b38afc3acd328480a1183f9e79 (diff)
downloadlinux-332581bde2a419d5f12a93a1cdc2856af649a3cc.tar.xz
btrfs: zoned: do not zone finish data relocation block group
When multiple writes happen at once, we may need to sacrifice a currently active block group to be zone finished for a new allocation. We choose a block group with the least free space left, and zone finish it. To do the finishing, we need to send IOs for already allocated region and wait for them and on-going IOs. Otherwise, these IOs fail because the zone is already finished at the time the IO reach a device. However, if a block group dedicated to the data relocation is zone finished, there is a chance that finishing it before an ongoing write IO reaches the device. That is because there is timing gap between an allocation is done (block_group->reservations == 0, as pre-allocation is done) and an ordered extent is created when the relocation IO starts. Thus, if we finish the zone between them, we can fail the IOs. We cannot simply use "fs_info->data_reloc_bg == block_group->start" to avoid the zone finishing. Because, the data_reloc_bg may already switch to a new block group, while there are still ongoing write IOs to the old data_reloc_bg. So, this patch reworks the BLOCK_GROUP_FLAG_ZONED_DATA_RELOC bit to indicate there is a data relocation allocation and/or ongoing write to the block group. The bit is set on allocation and cleared in end_io function of the last IO for the currently allocated region. To change the timing of the bit setting also solves the issue that the bit being left even after there is no IO going on. With the current code, if the data_reloc_bg switches after the last IO to the current data_reloc_bg, the bit is set at this timing and there is no one clearing that bit. As a result, that block group is kept unallocatable for anything. Fixes: 343d8a30851c ("btrfs: zoned: prevent allocation from previous data relocation BG") Fixes: 74e91b12b115 ("btrfs: zoned: zone finish unused block group") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/zoned.c')
-rw-r--r--fs/btrfs/zoned.c16
1 files changed, 13 insertions, 3 deletions
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 52f49b0d2dee..a2f8e7440f8c 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -2091,6 +2091,10 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
* and block_group->meta_write_pointer for metadata.
*/
if (!fully_written) {
+ if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
+ spin_unlock(&block_group->lock);
+ return -EAGAIN;
+ }
spin_unlock(&block_group->lock);
ret = btrfs_inc_block_group_ro(block_group, false);
@@ -2119,7 +2123,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
return 0;
}
- if (block_group->reserved) {
+ if (block_group->reserved ||
+ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
+ &block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
btrfs_dec_block_group_ro(block_group);
return -EAGAIN;
@@ -2362,7 +2368,10 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
/* All relocation extents are written. */
if (block_group->start + block_group->alloc_offset == logical + length) {
- /* Now, release this block group for further allocations. */
+ /*
+ * Now, release this block group for further allocations and
+ * zone finish.
+ */
clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
&block_group->runtime_flags);
}
@@ -2386,7 +2395,8 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
spin_lock(&block_group->lock);
if (block_group->reserved || block_group->alloc_offset == 0 ||
- (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
+ (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) ||
+ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
continue;
}