summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/backref.c19
-rw-r--r--fs/btrfs/backref.h6
-rw-r--r--fs/btrfs/bio.c6
-rw-r--r--fs/btrfs/block-group.c14
-rw-r--r--fs/btrfs/block-rsv.c3
-rw-r--r--fs/btrfs/ctree.c38
-rw-r--r--fs/btrfs/disk-io.c45
-rw-r--r--fs/btrfs/file-item.c9
-rw-r--r--fs/btrfs/free-space-cache.c7
-rw-r--r--fs/btrfs/free-space-tree.c50
-rw-r--r--fs/btrfs/free-space-tree.h3
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/print-tree.c6
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c83
-rw-r--r--fs/btrfs/super.c12
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/btrfs/zoned.c17
20 files changed, 252 insertions, 78 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e54f0884802a..79336fa853db 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -45,7 +45,8 @@ static int check_extent_in_eb(struct btrfs_backref_walk_ctx *ctx,
int root_count;
bool cached;
- if (!btrfs_file_extent_compression(eb, fi) &&
+ if (!ctx->ignore_extent_item_pos &&
+ !btrfs_file_extent_compression(eb, fi) &&
!btrfs_file_extent_encryption(eb, fi) &&
!btrfs_file_extent_other_encoding(eb, fi)) {
u64 data_offset;
@@ -552,7 +553,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx,
count++;
else
goto next;
- if (!ctx->ignore_extent_item_pos) {
+ if (!ctx->skip_inode_ref_list) {
ret = check_extent_in_eb(ctx, &key, eb, fi, &eie);
if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP ||
ret < 0)
@@ -564,7 +565,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx,
eie, (void **)&old, GFP_NOFS);
if (ret < 0)
break;
- if (!ret && !ctx->ignore_extent_item_pos) {
+ if (!ret && !ctx->skip_inode_ref_list) {
while (old->next)
old = old->next;
old->next = eie;
@@ -1606,7 +1607,7 @@ again:
goto out;
}
if (ref->count && ref->parent) {
- if (!ctx->ignore_extent_item_pos && !ref->inode_list &&
+ if (!ctx->skip_inode_ref_list && !ref->inode_list &&
ref->level == 0) {
struct btrfs_tree_parent_check check = { 0 };
struct extent_buffer *eb;
@@ -1647,7 +1648,7 @@ again:
(void **)&eie, GFP_NOFS);
if (ret < 0)
goto out;
- if (!ret && !ctx->ignore_extent_item_pos) {
+ if (!ret && !ctx->skip_inode_ref_list) {
/*
* We've recorded that parent, so we must extend
* its inode list here.
@@ -1743,7 +1744,7 @@ int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx)
static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
{
const u64 orig_bytenr = ctx->bytenr;
- const bool orig_ignore_extent_item_pos = ctx->ignore_extent_item_pos;
+ const bool orig_skip_inode_ref_list = ctx->skip_inode_ref_list;
bool roots_ulist_allocated = false;
struct ulist_iterator uiter;
int ret = 0;
@@ -1764,7 +1765,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
roots_ulist_allocated = true;
}
- ctx->ignore_extent_item_pos = true;
+ ctx->skip_inode_ref_list = true;
ULIST_ITER_INIT(&uiter);
while (1) {
@@ -1789,7 +1790,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx)
ulist_free(ctx->refs);
ctx->refs = NULL;
ctx->bytenr = orig_bytenr;
- ctx->ignore_extent_item_pos = orig_ignore_extent_item_pos;
+ ctx->skip_inode_ref_list = orig_skip_inode_ref_list;
return ret;
}
@@ -1912,7 +1913,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr,
goto out_trans;
}
- walk_ctx.ignore_extent_item_pos = true;
+ walk_ctx.skip_inode_ref_list = true;
walk_ctx.trans = trans;
walk_ctx.fs_info = fs_info;
walk_ctx.refs = &ctx->refs;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index ef6bbea3f456..1616e3e3f1e4 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -60,6 +60,12 @@ struct btrfs_backref_walk_ctx {
* @extent_item_pos is ignored.
*/
bool ignore_extent_item_pos;
+ /*
+ * If true and bytenr corresponds to a data extent, then the inode list
+ * (each member describing inode number, file offset and root) is not
+ * added to each reference added to the @refs ulist.
+ */
+ bool skip_inode_ref_list;
/* A valid transaction handle or NULL. */
struct btrfs_trans_handle *trans;
/*
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 5379c4714905..b3ad0f51e616 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -330,7 +330,7 @@ static void btrfs_end_bio_work(struct work_struct *work)
if (bbio->inode && !(bbio->bio.bi_opf & REQ_META))
btrfs_check_read_bio(bbio, bbio->bio.bi_private);
else
- bbio->end_io(bbio);
+ btrfs_orig_bbio_end_io(bbio);
}
static void btrfs_simple_end_io(struct bio *bio)
@@ -811,10 +811,6 @@ void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_
goto fail;
if (dev_replace) {
- if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) {
- bbio->bio.bi_opf &= ~REQ_OP_WRITE;
- bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND;
- }
ASSERT(smap.dev == fs_info->dev_replace.srcdev);
smap.dev = fs_info->dev_replace.tgtdev;
}
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 957ad1c31c4f..590b03560265 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2818,10 +2818,20 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
}
ret = inc_block_group_ro(cache, 0);
- if (!do_chunk_alloc || ret == -ETXTBSY)
- goto unlock_out;
if (!ret)
goto out;
+ if (ret == -ETXTBSY)
+ goto unlock_out;
+
+ /*
+ * Skip chunk alloction if the bg is SYSTEM, this is to avoid system
+ * chunk allocation storm to exhaust the system chunk array. Otherwise
+ * we still want to try our best to mark the block group read-only.
+ */
+ if (!do_chunk_alloc && ret == -ENOSPC &&
+ (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM))
+ goto unlock_out;
+
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
if (ret < 0)
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 3ab707e26fa2..ac18c43fadad 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -124,7 +124,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
} else {
num_bytes = 0;
}
- if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
+ if (qgroup_to_release_ret &&
+ block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
qgroup_to_release = block_rsv->qgroup_rsv_reserved -
block_rsv->qgroup_rsv_size;
block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 3c983c70028a..2ff2961b1183 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2627,6 +2627,10 @@ static bool check_sibling_keys(struct extent_buffer *left,
}
if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
+ btrfs_crit(left->fs_info, "left extent buffer:");
+ btrfs_print_tree(left, false);
+ btrfs_crit(left->fs_info, "right extent buffer:");
+ btrfs_print_tree(right, false);
btrfs_crit(left->fs_info,
"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
left_last.objectid, left_last.type,
@@ -3215,6 +3219,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
btrfs_tree_unlock(right);
free_extent_buffer(right);
return ret;
@@ -3433,6 +3438,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
if (check_sibling_keys(left, right)) {
ret = -EUCLEAN;
+ btrfs_abort_transaction(trans, ret);
goto out;
}
return __push_leaf_left(trans, path, min_data_size, empty, left,
@@ -4478,10 +4484,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
+ struct btrfs_key orig_key;
struct btrfs_disk_key found_key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
+ orig_key = key;
if (key.offset > 0) {
key.offset--;
@@ -4498,8 +4506,36 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret <= 0)
return ret;
+
+ /*
+ * Previous key not found. Even if we were at slot 0 of the leaf we had
+ * before releasing the path and calling btrfs_search_slot(), we now may
+ * be in a slot pointing to the same original key - this can happen if
+ * after we released the path, one of more items were moved from a
+ * sibling leaf into the front of the leaf we had due to an insertion
+ * (see push_leaf_right()).
+ * If we hit this case and our slot is > 0 and just decrement the slot
+ * so that the caller does not process the same key again, which may or
+ * may not break the caller, depending on its logic.
+ */
+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
+ ret = comp_keys(&found_key, &orig_key);
+ if (ret == 0) {
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ return 0;
+ }
+ /*
+ * At slot 0, same key as before, it means orig_key is
+ * the lowest, leftmost, key in the tree. We're done.
+ */
+ return 1;
+ }
+ }
+
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 59ea049fe7ee..88e6d1072a35 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -96,7 +96,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
first_page_part - BTRFS_CSUM_SIZE);
- for (i = 1; i < num_pages; i++) {
+ for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
kaddr = page_address(buf->pages[i]);
crypto_shash_update(shash, kaddr, PAGE_SIZE);
}
@@ -242,7 +242,6 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
int mirror_num)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- u64 start = eb->start;
int i, num_pages = num_extent_pages(eb);
int ret = 0;
@@ -251,12 +250,14 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
for (i = 0; i < num_pages; i++) {
struct page *p = eb->pages[i];
+ u64 start = max_t(u64, eb->start, page_offset(p));
+ u64 end = min_t(u64, eb->start + eb->len, page_offset(p) + PAGE_SIZE);
+ u32 len = end - start;
- ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE,
- start, p, start - page_offset(p), mirror_num);
+ ret = btrfs_repair_io_failure(fs_info, 0, start, len,
+ start, p, offset_in_page(start), mirror_num);
if (ret)
break;
- start += PAGE_SIZE;
}
return ret;
@@ -3121,23 +3122,34 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
{
int ret;
const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
- bool clear_free_space_tree = false;
+ bool rebuild_free_space_tree = false;
if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
- clear_free_space_tree = true;
+ rebuild_free_space_tree = true;
} else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
btrfs_warn(fs_info, "free space tree is invalid");
- clear_free_space_tree = true;
+ rebuild_free_space_tree = true;
}
- if (clear_free_space_tree) {
- btrfs_info(fs_info, "clearing free space tree");
- ret = btrfs_clear_free_space_tree(fs_info);
+ if (rebuild_free_space_tree) {
+ btrfs_info(fs_info, "rebuilding free space tree");
+ ret = btrfs_rebuild_free_space_tree(fs_info);
if (ret) {
btrfs_warn(fs_info,
- "failed to clear free space tree: %d", ret);
+ "failed to rebuild free space tree: %d", ret);
+ goto out;
+ }
+ }
+
+ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+ !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) {
+ btrfs_info(fs_info, "disabling free space tree");
+ ret = btrfs_delete_free_space_tree(fs_info);
+ if (ret) {
+ btrfs_warn(fs_info,
+ "failed to disable free space tree: %d", ret);
goto out;
}
}
@@ -4925,7 +4937,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
*/
inode = igrab(&btrfs_inode->vfs_inode);
if (inode) {
+ unsigned int nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
invalidate_inode_pages2(inode->i_mapping);
+ memalloc_nofs_restore(nofs_flag);
iput(inode);
}
spin_lock(&root->delalloc_lock);
@@ -5031,7 +5047,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
inode = cache->io_ctl.inode;
if (inode) {
+ unsigned int nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
invalidate_inode_pages2(inode->i_mapping);
+ memalloc_nofs_restore(nofs_flag);
+
BTRFS_I(inode)->generation = 0;
cache->io_ctl.inode = NULL;
iput(inode);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 018c711a0bc8..d1cd0a692f8e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -52,13 +52,13 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
u64 start, end, i_size;
int ret;
+ spin_lock(&inode->lock);
i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
inode->disk_i_size = i_size;
- return;
+ goto out_unlock;
}
- spin_lock(&inode->lock);
ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start,
&end, EXTENT_DIRTY);
if (!ret && start == 0)
@@ -66,6 +66,7 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz
else
i_size = 0;
inode->disk_i_size = i_size;
+out_unlock:
spin_unlock(&inode->lock);
}
@@ -791,7 +792,9 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
bytes_left), GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
- BUG_ON(!sums); /* -ENOMEM */
+ if (!sums)
+ return BLK_STS_RESOURCE;
+
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode,
offset);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d84cef89cdff..cf98a3c05480 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -870,15 +870,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
- ctl->total_bitmaps++;
- recalculate_thresholds(ctl);
- spin_unlock(&ctl->tree_lock);
if (ret) {
+ spin_unlock(&ctl->tree_lock);
btrfs_err(fs_info,
"Duplicate entries in free space cache, dumping");
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
+ ctl->total_bitmaps++;
+ recalculate_thresholds(ctl);
+ spin_unlock(&ctl->tree_lock);
list_add_tail(&e->list, &bitmaps);
}
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 4d155a48ec59..b21da1446f2a 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1252,7 +1252,7 @@ out:
return ret;
}
-int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
+int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *tree_root = fs_info->tree_root;
@@ -1298,6 +1298,54 @@ abort:
return ret;
}
+int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_key key = {
+ .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
+ .type = BTRFS_ROOT_ITEM_KEY,
+ .offset = 0,
+ };
+ struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
+ struct rb_node *node;
+ int ret;
+
+ trans = btrfs_start_transaction(free_space_root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+ set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+
+ ret = clear_free_space_tree(trans, free_space_root);
+ if (ret)
+ goto abort;
+
+ node = rb_first_cached(&fs_info->block_group_cache_tree);
+ while (node) {
+ struct btrfs_block_group *block_group;
+
+ block_group = rb_entry(node, struct btrfs_block_group,
+ cache_node);
+ ret = populate_free_space_tree(trans, block_group);
+ if (ret)
+ goto abort;
+ node = rb_next(node);
+ }
+
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+ btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
+ clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
+
+ ret = btrfs_commit_transaction(trans);
+ clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
+ return ret;
+abort:
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ return ret;
+}
+
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path)
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index dc2463e4cfe3..6d5551d0ced8 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -18,7 +18,8 @@ struct btrfs_caching_control;
void set_free_space_tree_thresholds(struct btrfs_block_group *block_group);
int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
-int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
+int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info);
+int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info);
int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
int add_block_group_free_space(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 57d070025c7a..19c707bc8801 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3108,6 +3108,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
btrfs_rewrite_logical_zoned(ordered_extent);
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
+ } else if (btrfs_is_data_reloc_root(inode->root)) {
+ btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
+ ordered_extent->disk_num_bytes);
}
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 25833b4eeaf5..2fa36f694daa 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -454,7 +454,9 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
case BTRFS_EXCLOP_BALANCE_PAUSED:
spin_lock(&fs_info->super_lock);
ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE ||
- fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD);
+ fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD ||
+ fs_info->exclusive_operation == BTRFS_EXCLOP_NONE ||
+ fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED);
fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED;
spin_unlock(&fs_info->super_lock);
break;
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index b93c96213304..497b9dbd8a13 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -151,10 +151,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
pr_cont("shared data backref parent %llu count %u\n",
offset, btrfs_shared_data_ref_count(eb, sref));
/*
- * offset is supposed to be a tree block which
- * must be aligned to nodesize.
+ * Offset is supposed to be a tree block which must be
+ * aligned to sectorsize.
*/
- if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
+ if (!IS_ALIGNED(offset, eb->fs_info->sectorsize))
pr_info(
"\t\t\t(parent %llu not aligned to sectorsize %u)\n",
offset, eb->fs_info->sectorsize);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 09b1988d1791..59a06499c647 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3422,7 +3422,7 @@ int add_data_references(struct reloc_control *rc,
btrfs_release_path(path);
ctx.bytenr = extent_key->objectid;
- ctx.ignore_extent_item_pos = true;
+ ctx.skip_inode_ref_list = true;
ctx.fs_info = rc->extent_root->fs_info;
ret = btrfs_find_all_leafs(&ctx);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 836725a19661..50c241aba1a1 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -134,8 +134,14 @@ struct scrub_stripe {
* The errors hit during the initial read of the stripe.
*
* Would be utilized for error reporting and repair.
+ *
+ * The remaining init_nr_* records the number of errors hit, only used
+ * by error reporting.
*/
unsigned long init_error_bitmap;
+ unsigned int init_nr_io_errors;
+ unsigned int init_nr_csum_errors;
+ unsigned int init_nr_meta_errors;
/*
* The following error bitmaps are all for the current status.
@@ -1003,12 +1009,9 @@ skip:
sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits;
sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits;
sctx->stat.no_csum += nr_nodatacsum_sectors;
- sctx->stat.read_errors +=
- bitmap_weight(&stripe->io_error_bitmap, stripe->nr_sectors);
- sctx->stat.csum_errors +=
- bitmap_weight(&stripe->csum_error_bitmap, stripe->nr_sectors);
- sctx->stat.verify_errors +=
- bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors);
+ sctx->stat.read_errors += stripe->init_nr_io_errors;
+ sctx->stat.csum_errors += stripe->init_nr_csum_errors;
+ sctx->stat.verify_errors += stripe->init_nr_meta_errors;
sctx->stat.uncorrectable_errors +=
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
sctx->stat.corrected_errors += nr_repaired_sectors;
@@ -1041,6 +1044,12 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap);
/* Save the initial failed bitmap for later repair and report usage. */
stripe->init_error_bitmap = stripe->error_bitmap;
+ stripe->init_nr_io_errors = bitmap_weight(&stripe->io_error_bitmap,
+ stripe->nr_sectors);
+ stripe->init_nr_csum_errors = bitmap_weight(&stripe->csum_error_bitmap,
+ stripe->nr_sectors);
+ stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
+ stripe->nr_sectors);
if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
goto out;
@@ -1137,6 +1146,35 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
wake_up(&stripe->io_wait);
}
+static void scrub_submit_write_bio(struct scrub_ctx *sctx,
+ struct scrub_stripe *stripe,
+ struct btrfs_bio *bbio, bool dev_replace)
+{
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
+ u32 bio_len = bbio->bio.bi_iter.bi_size;
+ u32 bio_off = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT) -
+ stripe->logical;
+
+ fill_writer_pointer_gap(sctx, stripe->physical + bio_off);
+ atomic_inc(&stripe->pending_io);
+ btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
+ if (!btrfs_is_zoned(fs_info))
+ return;
+ /*
+ * For zoned writeback, queue depth must be 1, thus we must wait for
+ * the write to finish before the next write.
+ */
+ wait_scrub_stripe_io(stripe);
+
+ /*
+ * And also need to update the write pointer if write finished
+ * successfully.
+ */
+ if (!test_bit(bio_off >> fs_info->sectorsize_bits,
+ &stripe->write_error_bitmap))
+ sctx->write_pointer += bio_len;
+}
+
/*
* Submit the write bio(s) for the sectors specified by @write_bitmap.
*
@@ -1155,7 +1193,6 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct btrfs_bio *bbio = NULL;
- const bool zoned = btrfs_is_zoned(fs_info);
int sector_nr;
for_each_set_bit(sector_nr, &write_bitmap, stripe->nr_sectors) {
@@ -1168,13 +1205,7 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
/* Cannot merge with previous sector, submit the current one. */
if (bbio && sector_nr && !test_bit(sector_nr - 1, &write_bitmap)) {
- fill_writer_pointer_gap(sctx, stripe->physical +
- (sector_nr << fs_info->sectorsize_bits));
- atomic_inc(&stripe->pending_io);
- btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
- /* For zoned writeback, queue depth must be 1. */
- if (zoned)
- wait_scrub_stripe_io(stripe);
+ scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
bbio = NULL;
}
if (!bbio) {
@@ -1187,14 +1218,8 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str
ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff);
ASSERT(ret == fs_info->sectorsize);
}
- if (bbio) {
- fill_writer_pointer_gap(sctx, bbio->bio.bi_iter.bi_sector <<
- SECTOR_SHIFT);
- atomic_inc(&stripe->pending_io);
- btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace);
- if (zoned)
- wait_scrub_stripe_io(stripe);
- }
+ if (bbio)
+ scrub_submit_write_bio(sctx, stripe, bbio, dev_replace);
}
/*
@@ -1474,6 +1499,9 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
{
stripe->extent_sector_bitmap = 0;
stripe->init_error_bitmap = 0;
+ stripe->init_nr_io_errors = 0;
+ stripe->init_nr_csum_errors = 0;
+ stripe->init_nr_meta_errors = 0;
stripe->error_bitmap = 0;
stripe->io_error_bitmap = 0;
stripe->csum_error_bitmap = 0;
@@ -1714,7 +1742,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
break;
}
}
- } else {
+ } else if (!sctx->readonly) {
for (int i = 0; i < nr_stripes; i++) {
unsigned long repaired;
@@ -2518,13 +2546,20 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
if (ret == 0) {
ro_set = 1;
- } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
+ } else if (ret == -ENOSPC && !sctx->is_dev_replace &&
+ !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) {
/*
* btrfs_inc_block_group_ro return -ENOSPC when it
* failed in creating new chunk for metadata.
* It is not a problem for scrub, because
* metadata are always cowed, and our scrub paused
* commit_transactions.
+ *
+ * For RAID56 chunks, we have to mark them read-only
+ * for scrub, as later we would use our own cache
+ * out of RAID56 realm.
+ * Thus we want the RAID56 bg to be marked RO to
+ * prevent RMW from screwing up out cache.
*/
ro_set = 0;
} else if (ret == -ETXTBSY) {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 6cb97efee976..efeb1a9d040a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -826,7 +826,11 @@ out:
!btrfs_test_opt(info, CLEAR_CACHE)) {
btrfs_err(info, "cannot disable free space tree");
ret = -EINVAL;
-
+ }
+ if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) &&
+ !btrfs_test_opt(info, FREE_SPACE_TREE)) {
+ btrfs_err(info, "cannot disable free space tree with block-group-tree feature");
+ ret = -EINVAL;
}
if (!ret)
ret = btrfs_check_mountopts_zoned(info);
@@ -1837,6 +1841,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_clear_sb_rdonly(sb);
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
+
+ /*
+ * If we've gone from readonly -> read/write, we need to get
+ * our sync/async discard lists in the right state.
+ */
+ btrfs_discard_resume(fs_info);
}
out:
/*
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9b212e8c70cc..d2755d5e338b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6158,7 +6158,7 @@ static int log_delayed_deletions_incremental(struct btrfs_trans_handle *trans,
{
struct btrfs_root *log = inode->root->log_root;
const struct btrfs_delayed_item *curr;
- u64 last_range_start;
+ u64 last_range_start = 0;
u64 last_range_end = 0;
struct btrfs_key key;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 03f52e4a20aa..841e799dece5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -395,6 +395,7 @@ void btrfs_free_device(struct btrfs_device *device)
{
WARN_ON(!list_empty(&device->post_commit_list));
rcu_string_free(device->name);
+ extent_io_tree_release(&device->alloc_state);
btrfs_destroy_dev_zone_info(device);
kfree(device);
}
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index a9b32ba6b2ce..39828af4a4e8 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -122,10 +122,9 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
int i;
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
- u64 bytenr;
-
- bytenr = ((zones[i].start + zones[i].len)
- << SECTOR_SHIFT) - BTRFS_SUPER_INFO_SIZE;
+ u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT;
+ u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) -
+ BTRFS_SUPER_INFO_SIZE;
page[i] = read_cache_page_gfp(mapping,
bytenr >> PAGE_SHIFT, GFP_NOFS);
@@ -1168,12 +1167,12 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
return -ERANGE;
/* All the zones are conventional */
- if (find_next_bit(zinfo->seq_zones, begin, end) == end)
+ if (find_next_bit(zinfo->seq_zones, end, begin) == end)
return 0;
/* All the zones are sequential and empty */
- if (find_next_zero_bit(zinfo->seq_zones, begin, end) == end &&
- find_next_zero_bit(zinfo->empty_zones, begin, end) == end)
+ if (find_next_zero_bit(zinfo->seq_zones, end, begin) == end &&
+ find_next_zero_bit(zinfo->empty_zones, end, begin) == end)
return 0;
for (pos = start; pos < start + size; pos += zinfo->zone_size) {
@@ -1610,11 +1609,11 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans,
!list_empty(&eb->release_list))
return;
+ memzero_extent_buffer(eb, 0, eb->len);
+ set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
set_extent_buffer_dirty(eb);
set_extent_bits_nowait(&trans->dirty_pages, eb->start,
eb->start + eb->len - 1, EXTENT_DIRTY);
- memzero_extent_buffer(eb, 0, eb->len);
- set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
spin_lock(&trans->releasing_ebs_lock);
list_add_tail(&eb->release_list, &trans->releasing_ebs);