summaryrefslogtreecommitdiff
path: root/fs/btrfs/zoned.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/zoned.c')
-rw-r--r--fs/btrfs/zoned.c346
1 files changed, 258 insertions, 88 deletions
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 4a3e02b49f29..ea662036f441 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -9,7 +9,6 @@
#include "ctree.h"
#include "volumes.h"
#include "zoned.h"
-#include "rcu-string.h"
#include "disk-io.h"
#include "block-group.h"
#include "dev-replace.h"
@@ -17,6 +16,8 @@
#include "fs.h"
#include "accessors.h"
#include "bio.h"
+#include "transaction.h"
+#include "sysfs.h"
/* Maximum number of zones to report per blkdev_report_zones() call */
#define BTRFS_REPORT_NR_ZONES 4096
@@ -42,6 +43,9 @@
/* Number of superblock log zones */
#define BTRFS_NR_SB_LOG_ZONES 2
+/* Default number of max active zones when the device has no limits. */
+#define BTRFS_DEFAULT_MAX_ACTIVE_ZONES 128
+
/*
* Minimum of active zones we need:
*
@@ -263,9 +267,9 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones,
copy_zone_info_cb, zones);
if (ret < 0) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"zoned: failed to read zone %llu on %s (devid %llu)",
- pos, rcu_str_deref(device->name),
+ pos, rcu_dereference(device->name),
device->devid);
return ret;
}
@@ -395,16 +399,16 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
/* We reject devices with a zone size larger than 8GB */
if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) {
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"zoned: %s: zone size %llu larger than supported maximum %llu",
- rcu_str_deref(device->name),
+ rcu_dereference(device->name),
zone_info->zone_size, BTRFS_MAX_ZONE_SIZE);
ret = -EINVAL;
goto out;
} else if (zone_info->zone_size < BTRFS_MIN_ZONE_SIZE) {
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"zoned: %s: zone size %llu smaller than supported minimum %u",
- rcu_str_deref(device->name),
+ rcu_dereference(device->name),
zone_info->zone_size, BTRFS_MIN_ZONE_SIZE);
ret = -EINVAL;
goto out;
@@ -416,11 +420,14 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
- max_active_zones = bdev_max_active_zones(bdev);
+ max_active_zones = min_not_zero(bdev_max_active_zones(bdev),
+ bdev_max_open_zones(bdev));
+ if (!max_active_zones && zone_info->nr_zones > BTRFS_DEFAULT_MAX_ACTIVE_ZONES)
+ max_active_zones = BTRFS_DEFAULT_MAX_ACTIVE_ZONES;
if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) {
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"zoned: %s: max active zones %u is too small, need at least %u active zones",
- rcu_str_deref(device->name), max_active_zones,
+ rcu_dereference(device->name), max_active_zones,
BTRFS_MIN_ACTIVE_ZONES);
ret = -EINVAL;
goto out;
@@ -460,9 +467,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
zone_info->zone_cache = vcalloc(zone_info->nr_zones,
sizeof(struct blk_zone));
if (!zone_info->zone_cache) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"zoned: failed to allocate zone cache for %s",
- rcu_str_deref(device->name));
+ rcu_dereference(device->name));
ret = -ENOMEM;
goto out;
}
@@ -497,9 +504,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
if (nreported != zone_info->nr_zones) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"inconsistent number of zones on %s (%u/%u)",
- rcu_str_deref(device->name), nreported,
+ rcu_dereference(device->name), nreported,
zone_info->nr_zones);
ret = -EIO;
goto out;
@@ -507,9 +514,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
if (max_active_zones) {
if (nactive > max_active_zones) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"zoned: %u active zones on %s exceeds max_active_zones %u",
- nactive, rcu_str_deref(device->name),
+ nactive, rcu_dereference(device->name),
max_active_zones);
ret = -EIO;
goto out;
@@ -538,7 +545,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
goto out;
if (nr_zones != BTRFS_NR_SB_LOG_ZONES) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"zoned: failed to read super block log zone info at devid %llu zone %u",
device->devid, sb_zone);
ret = -EUCLEAN;
@@ -556,7 +563,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
ret = sb_write_pointer(device->bdev,
&zone_info->sb_zones[sb_pos], &sb_wp);
if (ret != -ENOENT && ret) {
- btrfs_err_in_rcu(device->fs_info,
+ btrfs_err(device->fs_info,
"zoned: super block log zone corrupted devid %llu zone %u",
device->devid, sb_zone);
ret = -EUCLEAN;
@@ -575,9 +582,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
emulated = "emulated ";
}
- btrfs_info_in_rcu(fs_info,
+ btrfs_info(fs_info,
"%s block device %s, %u %szones of %llu bytes",
- model, rcu_str_deref(device->name), zone_info->nr_zones,
+ model, rcu_dereference(device->name), zone_info->nr_zones,
emulated, zone_info->zone_size);
return 0;
@@ -989,7 +996,7 @@ int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
}
/* All the zones are FULL. Should not reach here. */
- ASSERT(0);
+ DEBUG_WARN("unexpected state, all zones full");
return -EIO;
}
@@ -1182,10 +1189,10 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
continue;
/* Free regions should be empty */
- btrfs_warn_in_rcu(
+ btrfs_warn(
device->fs_info,
"zoned: resetting device %s (devid %llu) zone %llu for allocation",
- rcu_str_deref(device->name), device->devid, pos >> shift);
+ rcu_dereference(device->name), device->devid, pos >> shift);
WARN_ON_ONCE(1);
ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size,
@@ -1345,9 +1352,9 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
}
if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"zoned: unexpected conventional zone %llu on device %s (devid %llu)",
- zone.start << SECTOR_SHIFT, rcu_str_deref(device->name),
+ zone.start << SECTOR_SHIFT, rcu_dereference(device->name),
device->devid);
up_read(&dev_replace->rwsem);
return -EIO;
@@ -1358,10 +1365,10 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
switch (zone.cond) {
case BLK_ZONE_COND_OFFLINE:
case BLK_ZONE_COND_READONLY:
- btrfs_err_in_rcu(fs_info,
+ btrfs_err(fs_info,
"zoned: offline/readonly zone %llu on device %s (devid %llu)",
(info->physical >> device->zone_info->zone_size_shift),
- rcu_str_deref(device->name), device->devid);
+ rcu_dereference(device->name), device->devid);
info->alloc_offset = WP_MISSING_DEV;
break;
case BLK_ZONE_COND_EMPTY:
@@ -1403,7 +1410,8 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@@ -1426,6 +1434,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
zone_info[1].physical);
return -EIO;
}
+
+ if (zone_info[0].alloc_offset == WP_CONVENTIONAL)
+ zone_info[0].alloc_offset = last_alloc;
+
+ if (zone_info[1].alloc_offset == WP_CONVENTIONAL)
+ zone_info[1].alloc_offset = last_alloc;
+
if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
btrfs_err(bg->fs_info,
"zoned: write pointer offset mismatch of zones in DUP profile");
@@ -1446,7 +1461,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
int i;
@@ -1461,10 +1477,12 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
for (i = 0; i < map->num_stripes; i++) {
- if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
- zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ zone_info[i].alloc_offset = last_alloc;
+
if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) &&
!btrfs_test_opt(fs_info, DEGRADED)) {
btrfs_err(fs_info,
@@ -1494,7 +1512,8 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@@ -1505,10 +1524,29 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
}
for (int i = 0; i < map->num_stripes; i++) {
- if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
- zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
+ u64 stripe_nr, full_stripe_nr;
+ u64 stripe_offset;
+ int stripe_index;
+
+ stripe_nr = div64_u64(last_alloc, map->stripe_size);
+ stripe_offset = stripe_nr * map->stripe_size;
+ full_stripe_nr = div_u64(stripe_nr, map->num_stripes);
+ div_u64_rem(stripe_nr, map->num_stripes, &stripe_index);
+
+ zone_info[i].alloc_offset =
+ full_stripe_nr * map->stripe_size;
+
+ if (stripe_index > i)
+ zone_info[i].alloc_offset += map->stripe_size;
+ else if (stripe_index == i)
+ zone_info[i].alloc_offset +=
+ (last_alloc - stripe_offset);
+ }
+
if (test_bit(0, active) != test_bit(i, active)) {
if (!btrfs_zone_activate(bg))
return -EIO;
@@ -1526,7 +1564,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
struct btrfs_chunk_map *map,
struct zone_info *zone_info,
- unsigned long *active)
+ unsigned long *active,
+ u64 last_alloc)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@@ -1537,8 +1576,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
}
for (int i = 0; i < map->num_stripes; i++) {
- if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
- zone_info[i].alloc_offset == WP_CONVENTIONAL)
+ if (zone_info[i].alloc_offset == WP_MISSING_DEV)
continue;
if (test_bit(0, active) != test_bit(i, active)) {
@@ -1549,6 +1587,29 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
}
+ if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
+ u64 stripe_nr, full_stripe_nr;
+ u64 stripe_offset;
+ int stripe_index;
+
+ stripe_nr = div64_u64(last_alloc, map->stripe_size);
+ stripe_offset = stripe_nr * map->stripe_size;
+ full_stripe_nr = div_u64(stripe_nr,
+ map->num_stripes / map->sub_stripes);
+ div_u64_rem(stripe_nr,
+ (map->num_stripes / map->sub_stripes),
+ &stripe_index);
+
+ zone_info[i].alloc_offset =
+ full_stripe_nr * map->stripe_size;
+
+ if (stripe_index > (i / map->sub_stripes))
+ zone_info[i].alloc_offset += map->stripe_size;
+ else if (stripe_index == (i / map->sub_stripes))
+ zone_info[i].alloc_offset +=
+ (last_alloc - stripe_offset);
+ }
+
if ((i % map->sub_stripes) == 0) {
bg->zone_capacity += zone_info[i].capacity;
bg->alloc_offset += zone_info[i].alloc_offset;
@@ -1637,18 +1698,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
break;
case BTRFS_BLOCK_GROUP_DUP:
- ret = btrfs_load_block_group_dup(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_dup(cache, map, zone_info, active,
+ last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID1:
case BTRFS_BLOCK_GROUP_RAID1C3:
case BTRFS_BLOCK_GROUP_RAID1C4:
- ret = btrfs_load_block_group_raid1(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid1(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID0:
- ret = btrfs_load_block_group_raid0(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid0(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID10:
- ret = btrfs_load_block_group_raid10(cache, map, zone_info, active);
+ ret = btrfs_load_block_group_raid10(cache, map, zone_info,
+ active, last_alloc);
break;
case BTRFS_BLOCK_GROUP_RAID5:
case BTRFS_BLOCK_GROUP_RAID6:
@@ -1797,12 +1862,12 @@ static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered,
ordered->disk_bytenr = logical;
write_lock(&em_tree->lock);
- em = search_extent_mapping(em_tree, ordered->file_offset,
- ordered->num_bytes);
+ em = btrfs_search_extent_mapping(em_tree, ordered->file_offset,
+ ordered->num_bytes);
/* The em should be a new COW extent, thus it should not have an offset. */
ASSERT(em->offset == 0);
em->disk_bytenr = logical;
- free_extent_map(em);
+ btrfs_free_extent_map(em);
write_unlock(&em_tree->lock);
}
@@ -1812,8 +1877,8 @@ static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered,
struct btrfs_ordered_extent *new;
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
- split_extent_map(ordered->inode, ordered->file_offset,
- ordered->num_bytes, len, logical))
+ btrfs_split_extent_map(ordered->inode, ordered->file_offset,
+ ordered->num_bytes, len, logical))
return false;
new = btrfs_split_ordered_extent(ordered, len);
@@ -2110,10 +2175,15 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
goto out_unlock;
}
- /* No space left */
- if (btrfs_zoned_bg_is_full(block_group)) {
- ret = false;
- goto out_unlock;
+ if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) {
+ /* The caller should check if the block group is full. */
+ if (WARN_ON_ONCE(btrfs_zoned_bg_is_full(block_group))) {
+ ret = false;
+ goto out_unlock;
+ }
+ } else {
+ /* Since it is already written, it should have been active. */
+ WARN_ON_ONCE(block_group->meta_write_pointer != block_group->start);
}
for (i = 0; i < map->num_stripes; i++) {
@@ -2171,27 +2241,15 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
const u64 end = block_group->start + block_group->length;
- struct radix_tree_iter iter;
struct extent_buffer *eb;
- void __rcu **slot;
+ unsigned long index, start = (block_group->start >> fs_info->nodesize_bits);
rcu_read_lock();
- radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter,
- block_group->start >> fs_info->sectorsize_bits) {
- eb = radix_tree_deref_slot(slot);
- if (!eb)
- continue;
- if (radix_tree_deref_retry(eb)) {
- slot = radix_tree_iter_retry(&iter);
- continue;
- }
-
+ xa_for_each_start(&fs_info->buffer_tree, index, eb, start) {
if (eb->start < block_group->start)
continue;
if (eb->start >= end)
break;
-
- slot = radix_tree_iter_resume(slot, &iter);
rcu_read_unlock();
wait_on_extent_buffer_writeback(eb);
rcu_read_lock();
@@ -2199,6 +2257,40 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group)
rcu_read_unlock();
}
+static int call_zone_finish(struct btrfs_block_group *block_group,
+ struct btrfs_io_stripe *stripe)
+{
+ struct btrfs_device *device = stripe->dev;
+ const u64 physical = stripe->physical;
+ struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ int ret;
+
+ if (!device->bdev)
+ return 0;
+
+ if (zinfo->max_active_zones == 0)
+ return 0;
+
+ if (btrfs_dev_is_sequential(device, physical)) {
+ unsigned int nofs_flags;
+
+ nofs_flags = memalloc_nofs_save();
+ ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
+ physical >> SECTOR_SHIFT,
+ zinfo->zone_size >> SECTOR_SHIFT);
+ memalloc_nofs_restore(nofs_flags);
+
+ if (ret)
+ return ret;
+ }
+
+ if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
+ zinfo->reserved_active_zones++;
+ btrfs_dev_clear_active_zone(device, physical);
+
+ return 0;
+}
+
static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
@@ -2283,31 +2375,12 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
down_read(&dev_replace->rwsem);
map = block_group->physical_map;
for (i = 0; i < map->num_stripes; i++) {
- struct btrfs_device *device = map->stripes[i].dev;
- const u64 physical = map->stripes[i].physical;
- struct btrfs_zoned_device_info *zinfo = device->zone_info;
- unsigned int nofs_flags;
-
- if (!device->bdev)
- continue;
-
- if (zinfo->max_active_zones == 0)
- continue;
-
- nofs_flags = memalloc_nofs_save();
- ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
- physical >> SECTOR_SHIFT,
- zinfo->zone_size >> SECTOR_SHIFT);
- memalloc_nofs_restore(nofs_flags);
+ ret = call_zone_finish(block_group, &map->stripes[i]);
if (ret) {
up_read(&dev_replace->rwsem);
return ret;
}
-
- if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
- zinfo->reserved_active_zones++;
- btrfs_dev_clear_active_zone(device, physical);
}
up_read(&dev_replace->rwsem);
@@ -2439,7 +2512,7 @@ void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
/* For the work */
btrfs_get_block_group(bg);
- atomic_inc(&eb->refs);
+ refcount_inc(&eb->refs);
bg->last_eb = eb;
INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn);
queue_work(system_unbound_wq, &bg->zone_finish_work);
@@ -2455,6 +2528,104 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
spin_unlock(&fs_info->relocation_bg_lock);
}
+void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
+ struct btrfs_space_info *space_info = data_sinfo;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_block_group *bg;
+ struct list_head *bg_list;
+ u64 alloc_flags;
+ bool first = true;
+ bool did_chunk_alloc = false;
+ int index;
+ int ret;
+
+ if (!btrfs_is_zoned(fs_info))
+ return;
+
+ if (fs_info->data_reloc_bg)
+ return;
+
+ if (sb_rdonly(fs_info->sb))
+ return;
+
+ alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags);
+ index = btrfs_bg_flags_to_raid_index(alloc_flags);
+
+ /* Scan the data space_info to find empty block groups. Take the second one. */
+again:
+ bg_list = &space_info->block_groups[index];
+ list_for_each_entry(bg, bg_list, list) {
+ if (bg->alloc_offset != 0)
+ continue;
+
+ if (first) {
+ first = false;
+ continue;
+ }
+
+ if (space_info == data_sinfo) {
+ /* Migrate the block group to the data relocation space_info. */
+ struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0];
+ int factor;
+
+ ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+ factor = btrfs_bg_type_to_factor(bg->flags);
+
+ down_write(&space_info->groups_sem);
+ list_del_init(&bg->list);
+ /* We can assume this as we choose the second empty one. */
+ ASSERT(!list_empty(&space_info->block_groups[index]));
+ up_write(&space_info->groups_sem);
+
+ spin_lock(&space_info->lock);
+ space_info->total_bytes -= bg->length;
+ space_info->disk_total -= bg->length * factor;
+ /* There is no allocation ever happened. */
+ ASSERT(bg->used == 0);
+ ASSERT(bg->zone_unusable == 0);
+ /* No super block in a block group on the zoned setup. */
+ ASSERT(bg->bytes_super == 0);
+ spin_unlock(&space_info->lock);
+
+ bg->space_info = reloc_sinfo;
+ if (reloc_sinfo->block_group_kobjs[index] == NULL)
+ btrfs_sysfs_add_block_group_type(bg);
+
+ btrfs_add_bg_to_space_info(fs_info, bg);
+ }
+
+ fs_info->data_reloc_bg = bg->start;
+ set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags);
+ btrfs_zone_activate(bg);
+
+ return;
+ }
+
+ if (did_chunk_alloc)
+ return;
+
+ trans = btrfs_join_transaction(fs_info->tree_root);
+ if (IS_ERR(trans))
+ return;
+
+ /* Allocate new BG in the data relocation space_info. */
+ space_info = data_sinfo->sub_group[0];
+ ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC);
+ ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE);
+ btrfs_end_transaction(trans);
+ if (ret == 1) {
+ /*
+ * We allocated a new block group in the data relocation space_info. We
+ * can take that one.
+ */
+ first = false;
+ did_chunk_alloc = true;
+ goto again;
+ }
+}
+
void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
@@ -2477,8 +2648,8 @@ bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
+ u64 total = btrfs_super_total_bytes(fs_info->super_copy);
u64 used = 0;
- u64 total = 0;
u64 factor;
ASSERT(btrfs_is_zoned(fs_info));
@@ -2491,7 +2662,6 @@ bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info)
if (!device->bdev)
continue;
- total += device->disk_total_bytes;
used += device->bytes_used;
}
mutex_unlock(&fs_devices->device_list_mutex);
@@ -2545,7 +2715,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
spin_lock(&block_group->lock);
if (block_group->reserved || block_group->alloc_offset == 0 ||
- (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) ||
+ !(block_group->flags & BTRFS_BLOCK_GROUP_DATA) ||
test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
continue;