diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/backref.c | 19 | ||||
-rw-r--r-- | fs/btrfs/backref.h | 6 | ||||
-rw-r--r-- | fs/btrfs/bio.c | 6 | ||||
-rw-r--r-- | fs/btrfs/block-group.c | 14 | ||||
-rw-r--r-- | fs/btrfs/block-rsv.c | 3 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 38 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 45 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 9 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 7 | ||||
-rw-r--r-- | fs/btrfs/free-space-tree.c | 50 | ||||
-rw-r--r-- | fs/btrfs/free-space-tree.h | 3 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 3 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 4 | ||||
-rw-r--r-- | fs/btrfs/print-tree.c | 6 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 2 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 83 | ||||
-rw-r--r-- | fs/btrfs/super.c | 12 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 2 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 1 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 17 |
20 files changed, 252 insertions, 78 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index e54f0884802a..79336fa853db 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -45,7 +45,8 @@ static int check_extent_in_eb(struct btrfs_backref_walk_ctx *ctx, int root_count; bool cached; - if (!btrfs_file_extent_compression(eb, fi) && + if (!ctx->ignore_extent_item_pos && + !btrfs_file_extent_compression(eb, fi) && !btrfs_file_extent_encryption(eb, fi) && !btrfs_file_extent_other_encoding(eb, fi)) { u64 data_offset; @@ -552,7 +553,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, count++; else goto next; - if (!ctx->ignore_extent_item_pos) { + if (!ctx->skip_inode_ref_list) { ret = check_extent_in_eb(ctx, &key, eb, fi, &eie); if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP || ret < 0) @@ -564,7 +565,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, eie, (void **)&old, GFP_NOFS); if (ret < 0) break; - if (!ret && !ctx->ignore_extent_item_pos) { + if (!ret && !ctx->skip_inode_ref_list) { while (old->next) old = old->next; old->next = eie; @@ -1606,7 +1607,7 @@ again: goto out; } if (ref->count && ref->parent) { - if (!ctx->ignore_extent_item_pos && !ref->inode_list && + if (!ctx->skip_inode_ref_list && !ref->inode_list && ref->level == 0) { struct btrfs_tree_parent_check check = { 0 }; struct extent_buffer *eb; @@ -1647,7 +1648,7 @@ again: (void **)&eie, GFP_NOFS); if (ret < 0) goto out; - if (!ret && !ctx->ignore_extent_item_pos) { + if (!ret && !ctx->skip_inode_ref_list) { /* * We've recorded that parent, so we must extend * its inode list here. @@ -1743,7 +1744,7 @@ int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx) static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx) { const u64 orig_bytenr = ctx->bytenr; - const bool orig_ignore_extent_item_pos = ctx->ignore_extent_item_pos; + const bool orig_skip_inode_ref_list = ctx->skip_inode_ref_list; bool roots_ulist_allocated = false; struct ulist_iterator uiter; int ret = 0; @@ -1764,7 +1765,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx) roots_ulist_allocated = true; } - ctx->ignore_extent_item_pos = true; + ctx->skip_inode_ref_list = true; ULIST_ITER_INIT(&uiter); while (1) { @@ -1789,7 +1790,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx) ulist_free(ctx->refs); ctx->refs = NULL; ctx->bytenr = orig_bytenr; - ctx->ignore_extent_item_pos = orig_ignore_extent_item_pos; + ctx->skip_inode_ref_list = orig_skip_inode_ref_list; return ret; } @@ -1912,7 +1913,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, goto out_trans; } - walk_ctx.ignore_extent_item_pos = true; + walk_ctx.skip_inode_ref_list = true; walk_ctx.trans = trans; walk_ctx.fs_info = fs_info; walk_ctx.refs = &ctx->refs; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index ef6bbea3f456..1616e3e3f1e4 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -60,6 +60,12 @@ struct btrfs_backref_walk_ctx { * @extent_item_pos is ignored. */ bool ignore_extent_item_pos; + /* + * If true and bytenr corresponds to a data extent, then the inode list + * (each member describing inode number, file offset and root) is not + * added to each reference added to the @refs ulist. + */ + bool skip_inode_ref_list; /* A valid transaction handle or NULL. */ struct btrfs_trans_handle *trans; /* diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 5379c4714905..b3ad0f51e616 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -330,7 +330,7 @@ static void btrfs_end_bio_work(struct work_struct *work) if (bbio->inode && !(bbio->bio.bi_opf & REQ_META)) btrfs_check_read_bio(bbio, bbio->bio.bi_private); else - bbio->end_io(bbio); + btrfs_orig_bbio_end_io(bbio); } static void btrfs_simple_end_io(struct bio *bio) @@ -811,10 +811,6 @@ void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_ goto fail; if (dev_replace) { - if (btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE && btrfs_is_zoned(fs_info)) { - bbio->bio.bi_opf &= ~REQ_OP_WRITE; - bbio->bio.bi_opf |= REQ_OP_ZONE_APPEND; - } ASSERT(smap.dev == fs_info->dev_replace.srcdev); smap.dev = fs_info->dev_replace.tgtdev; } diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 957ad1c31c4f..590b03560265 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2818,10 +2818,20 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, } ret = inc_block_group_ro(cache, 0); - if (!do_chunk_alloc || ret == -ETXTBSY) - goto unlock_out; if (!ret) goto out; + if (ret == -ETXTBSY) + goto unlock_out; + + /* + * Skip chunk alloction if the bg is SYSTEM, this is to avoid system + * chunk allocation storm to exhaust the system chunk array. Otherwise + * we still want to try our best to mark the block group read-only. + */ + if (!do_chunk_alloc && ret == -ENOSPC && + (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM)) + goto unlock_out; + alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags); ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); if (ret < 0) diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index 3ab707e26fa2..ac18c43fadad 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -124,7 +124,8 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, } else { num_bytes = 0; } - if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { + if (qgroup_to_release_ret && + block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) { qgroup_to_release = block_rsv->qgroup_rsv_reserved - block_rsv->qgroup_rsv_size; block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3c983c70028a..2ff2961b1183 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2627,6 +2627,10 @@ static bool check_sibling_keys(struct extent_buffer *left, } if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) { + btrfs_crit(left->fs_info, "left extent buffer:"); + btrfs_print_tree(left, false); + btrfs_crit(left->fs_info, "right extent buffer:"); + btrfs_print_tree(right, false); btrfs_crit(left->fs_info, "bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)", left_last.objectid, left_last.type, @@ -3215,6 +3219,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root if (check_sibling_keys(left, right)) { ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); btrfs_tree_unlock(right); free_extent_buffer(right); return ret; @@ -3433,6 +3438,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root if (check_sibling_keys(left, right)) { ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); goto out; } return __push_leaf_left(trans, path, min_data_size, empty, left, @@ -4478,10 +4484,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) { struct btrfs_key key; + struct btrfs_key orig_key; struct btrfs_disk_key found_key; int ret; btrfs_item_key_to_cpu(path->nodes[0], &key, 0); + orig_key = key; if (key.offset > 0) { key.offset--; @@ -4498,8 +4506,36 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) btrfs_release_path(path); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) + if (ret <= 0) return ret; + + /* + * Previous key not found. Even if we were at slot 0 of the leaf we had + * before releasing the path and calling btrfs_search_slot(), we now may + * be in a slot pointing to the same original key - this can happen if + * after we released the path, one of more items were moved from a + * sibling leaf into the front of the leaf we had due to an insertion + * (see push_leaf_right()). + * If we hit this case and our slot is > 0 and just decrement the slot + * so that the caller does not process the same key again, which may or + * may not break the caller, depending on its logic. + */ + if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) { + btrfs_item_key(path->nodes[0], &found_key, path->slots[0]); + ret = comp_keys(&found_key, &orig_key); + if (ret == 0) { + if (path->slots[0] > 0) { + path->slots[0]--; + return 0; + } + /* + * At slot 0, same key as before, it means orig_key is + * the lowest, leftmost, key in the tree. We're done. + */ + return 1; + } + } + btrfs_item_key(path->nodes[0], &found_key, 0); ret = comp_keys(&found_key, &key); /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 59ea049fe7ee..88e6d1072a35 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -96,7 +96,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result) crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE, first_page_part - BTRFS_CSUM_SIZE); - for (i = 1; i < num_pages; i++) { + for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) { kaddr = page_address(buf->pages[i]); crypto_shash_update(shash, kaddr, PAGE_SIZE); } @@ -242,7 +242,6 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) { struct btrfs_fs_info *fs_info = eb->fs_info; - u64 start = eb->start; int i, num_pages = num_extent_pages(eb); int ret = 0; @@ -251,12 +250,14 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; + u64 start = max_t(u64, eb->start, page_offset(p)); + u64 end = min_t(u64, eb->start + eb->len, page_offset(p) + PAGE_SIZE); + u32 len = end - start; - ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE, - start, p, start - page_offset(p), mirror_num); + ret = btrfs_repair_io_failure(fs_info, 0, start, len, + start, p, offset_in_page(start), mirror_num); if (ret) break; - start += PAGE_SIZE; } return ret; @@ -3121,23 +3122,34 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) { int ret; const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE); - bool clear_free_space_tree = false; + bool rebuild_free_space_tree = false; if (btrfs_test_opt(fs_info, CLEAR_CACHE) && btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { - clear_free_space_tree = true; + rebuild_free_space_tree = true; } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) { btrfs_warn(fs_info, "free space tree is invalid"); - clear_free_space_tree = true; + rebuild_free_space_tree = true; } - if (clear_free_space_tree) { - btrfs_info(fs_info, "clearing free space tree"); - ret = btrfs_clear_free_space_tree(fs_info); + if (rebuild_free_space_tree) { + btrfs_info(fs_info, "rebuilding free space tree"); + ret = btrfs_rebuild_free_space_tree(fs_info); if (ret) { btrfs_warn(fs_info, - "failed to clear free space tree: %d", ret); + "failed to rebuild free space tree: %d", ret); + goto out; + } + } + + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) { + btrfs_info(fs_info, "disabling free space tree"); + ret = btrfs_delete_free_space_tree(fs_info); + if (ret) { + btrfs_warn(fs_info, + "failed to disable free space tree: %d", ret); goto out; } } @@ -4925,7 +4937,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) */ inode = igrab(&btrfs_inode->vfs_inode); if (inode) { + unsigned int nofs_flag; + + nofs_flag = memalloc_nofs_save(); invalidate_inode_pages2(inode->i_mapping); + memalloc_nofs_restore(nofs_flag); iput(inode); } spin_lock(&root->delalloc_lock); @@ -5031,7 +5047,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache) inode = cache->io_ctl.inode; if (inode) { + unsigned int nofs_flag; + + nofs_flag = memalloc_nofs_save(); invalidate_inode_pages2(inode->i_mapping); + memalloc_nofs_restore(nofs_flag); + BTRFS_I(inode)->generation = 0; cache->io_ctl.inode = NULL; iput(inode); diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 018c711a0bc8..d1cd0a692f8e 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -52,13 +52,13 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz u64 start, end, i_size; int ret; + spin_lock(&inode->lock); i_size = new_i_size ?: i_size_read(&inode->vfs_inode); if (btrfs_fs_incompat(fs_info, NO_HOLES)) { inode->disk_i_size = i_size; - return; + goto out_unlock; } - spin_lock(&inode->lock); ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start, &end, EXTENT_DIRTY); if (!ret && start == 0) @@ -66,6 +66,7 @@ void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_siz else i_size = 0; inode->disk_i_size = i_size; +out_unlock: spin_unlock(&inode->lock); } @@ -791,7 +792,9 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bytes_left), GFP_KERNEL); memalloc_nofs_restore(nofs_flag); - BUG_ON(!sums); /* -ENOMEM */ + if (!sums) + return BLK_STS_RESOURCE; + sums->len = bytes_left; ordered = btrfs_lookup_ordered_extent(inode, offset); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d84cef89cdff..cf98a3c05480 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -870,15 +870,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, } spin_lock(&ctl->tree_lock); ret = link_free_space(ctl, e); - ctl->total_bitmaps++; - recalculate_thresholds(ctl); - spin_unlock(&ctl->tree_lock); if (ret) { + spin_unlock(&ctl->tree_lock); btrfs_err(fs_info, "Duplicate entries in free space cache, dumping"); kmem_cache_free(btrfs_free_space_cachep, e); goto free_cache; } + ctl->total_bitmaps++; + recalculate_thresholds(ctl); + spin_unlock(&ctl->tree_lock); list_add_tail(&e->list, &bitmaps); } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 4d155a48ec59..b21da1446f2a 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1252,7 +1252,7 @@ out: return ret; } -int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) +int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) { struct btrfs_trans_handle *trans; struct btrfs_root *tree_root = fs_info->tree_root; @@ -1298,6 +1298,54 @@ abort: return ret; } +int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_key key = { + .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, + .type = BTRFS_ROOT_ITEM_KEY, + .offset = 0, + }; + struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); + struct rb_node *node; + int ret; + + trans = btrfs_start_transaction(free_space_root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); + + ret = clear_free_space_tree(trans, free_space_root); + if (ret) + goto abort; + + node = rb_first_cached(&fs_info->block_group_cache_tree); + while (node) { + struct btrfs_block_group *block_group; + + block_group = rb_entry(node, struct btrfs_block_group, + cache_node); + ret = populate_free_space_tree(trans, block_group); + if (ret) + goto abort; + node = rb_next(node); + } + + btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); + btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); + clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); + + ret = btrfs_commit_transaction(trans); + clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); + return ret; +abort: + btrfs_abort_transaction(trans, ret); + btrfs_end_transaction(trans); + return ret; +} + static int __add_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, struct btrfs_path *path) diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h index dc2463e4cfe3..6d5551d0ced8 100644 --- a/fs/btrfs/free-space-tree.h +++ b/fs/btrfs/free-space-tree.h @@ -18,7 +18,8 @@ struct btrfs_caching_control; void set_free_space_tree_thresholds(struct btrfs_block_group *block_group); int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info); -int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info); +int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info); +int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info); int load_free_space_tree(struct btrfs_caching_control *caching_ctl); int add_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 57d070025c7a..19c707bc8801 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3108,6 +3108,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) btrfs_rewrite_logical_zoned(ordered_extent); btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, ordered_extent->disk_num_bytes); + } else if (btrfs_is_data_reloc_root(inode->root)) { + btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes); } if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 25833b4eeaf5..2fa36f694daa 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -454,7 +454,9 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info, case BTRFS_EXCLOP_BALANCE_PAUSED: spin_lock(&fs_info->super_lock); ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE || - fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD); + fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD || + fs_info->exclusive_operation == BTRFS_EXCLOP_NONE || + fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED); fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED; spin_unlock(&fs_info->super_lock); break; diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index b93c96213304..497b9dbd8a13 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -151,10 +151,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type) pr_cont("shared data backref parent %llu count %u\n", offset, btrfs_shared_data_ref_count(eb, sref)); /* - * offset is supposed to be a tree block which - * must be aligned to nodesize. + * Offset is supposed to be a tree block which must be + * aligned to sectorsize. */ - if (!IS_ALIGNED(offset, eb->fs_info->nodesize)) + if (!IS_ALIGNED(offset, eb->fs_info->sectorsize)) pr_info( "\t\t\t(parent %llu not aligned to sectorsize %u)\n", offset, eb->fs_info->sectorsize); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 09b1988d1791..59a06499c647 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3422,7 +3422,7 @@ int add_data_references(struct reloc_control *rc, btrfs_release_path(path); ctx.bytenr = extent_key->objectid; - ctx.ignore_extent_item_pos = true; + ctx.skip_inode_ref_list = true; ctx.fs_info = rc->extent_root->fs_info; ret = btrfs_find_all_leafs(&ctx); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 836725a19661..50c241aba1a1 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -134,8 +134,14 @@ struct scrub_stripe { * The errors hit during the initial read of the stripe. * * Would be utilized for error reporting and repair. + * + * The remaining init_nr_* records the number of errors hit, only used + * by error reporting. */ unsigned long init_error_bitmap; + unsigned int init_nr_io_errors; + unsigned int init_nr_csum_errors; + unsigned int init_nr_meta_errors; /* * The following error bitmaps are all for the current status. @@ -1003,12 +1009,9 @@ skip: sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits; sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits; sctx->stat.no_csum += nr_nodatacsum_sectors; - sctx->stat.read_errors += - bitmap_weight(&stripe->io_error_bitmap, stripe->nr_sectors); - sctx->stat.csum_errors += - bitmap_weight(&stripe->csum_error_bitmap, stripe->nr_sectors); - sctx->stat.verify_errors += - bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors); + sctx->stat.read_errors += stripe->init_nr_io_errors; + sctx->stat.csum_errors += stripe->init_nr_csum_errors; + sctx->stat.verify_errors += stripe->init_nr_meta_errors; sctx->stat.uncorrectable_errors += bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors); sctx->stat.corrected_errors += nr_repaired_sectors; @@ -1041,6 +1044,12 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work) scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap); /* Save the initial failed bitmap for later repair and report usage. */ stripe->init_error_bitmap = stripe->error_bitmap; + stripe->init_nr_io_errors = bitmap_weight(&stripe->io_error_bitmap, + stripe->nr_sectors); + stripe->init_nr_csum_errors = bitmap_weight(&stripe->csum_error_bitmap, + stripe->nr_sectors); + stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap, + stripe->nr_sectors); if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) goto out; @@ -1137,6 +1146,35 @@ static void scrub_write_endio(struct btrfs_bio *bbio) wake_up(&stripe->io_wait); } +static void scrub_submit_write_bio(struct scrub_ctx *sctx, + struct scrub_stripe *stripe, + struct btrfs_bio *bbio, bool dev_replace) +{ + struct btrfs_fs_info *fs_info = sctx->fs_info; + u32 bio_len = bbio->bio.bi_iter.bi_size; + u32 bio_off = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT) - + stripe->logical; + + fill_writer_pointer_gap(sctx, stripe->physical + bio_off); + atomic_inc(&stripe->pending_io); + btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace); + if (!btrfs_is_zoned(fs_info)) + return; + /* + * For zoned writeback, queue depth must be 1, thus we must wait for + * the write to finish before the next write. + */ + wait_scrub_stripe_io(stripe); + + /* + * And also need to update the write pointer if write finished + * successfully. + */ + if (!test_bit(bio_off >> fs_info->sectorsize_bits, + &stripe->write_error_bitmap)) + sctx->write_pointer += bio_len; +} + /* * Submit the write bio(s) for the sectors specified by @write_bitmap. * @@ -1155,7 +1193,6 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; - const bool zoned = btrfs_is_zoned(fs_info); int sector_nr; for_each_set_bit(sector_nr, &write_bitmap, stripe->nr_sectors) { @@ -1168,13 +1205,7 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str /* Cannot merge with previous sector, submit the current one. */ if (bbio && sector_nr && !test_bit(sector_nr - 1, &write_bitmap)) { - fill_writer_pointer_gap(sctx, stripe->physical + - (sector_nr << fs_info->sectorsize_bits)); - atomic_inc(&stripe->pending_io); - btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace); - /* For zoned writeback, queue depth must be 1. */ - if (zoned) - wait_scrub_stripe_io(stripe); + scrub_submit_write_bio(sctx, stripe, bbio, dev_replace); bbio = NULL; } if (!bbio) { @@ -1187,14 +1218,8 @@ static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *str ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); ASSERT(ret == fs_info->sectorsize); } - if (bbio) { - fill_writer_pointer_gap(sctx, bbio->bio.bi_iter.bi_sector << - SECTOR_SHIFT); - atomic_inc(&stripe->pending_io); - btrfs_submit_repair_write(bbio, stripe->mirror_num, dev_replace); - if (zoned) - wait_scrub_stripe_io(stripe); - } + if (bbio) + scrub_submit_write_bio(sctx, stripe, bbio, dev_replace); } /* @@ -1474,6 +1499,9 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe) { stripe->extent_sector_bitmap = 0; stripe->init_error_bitmap = 0; + stripe->init_nr_io_errors = 0; + stripe->init_nr_csum_errors = 0; + stripe->init_nr_meta_errors = 0; stripe->error_bitmap = 0; stripe->io_error_bitmap = 0; stripe->csum_error_bitmap = 0; @@ -1714,7 +1742,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx) break; } } - } else { + } else if (!sctx->readonly) { for (int i = 0; i < nr_stripes; i++) { unsigned long repaired; @@ -2518,13 +2546,20 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (ret == 0) { ro_set = 1; - } else if (ret == -ENOSPC && !sctx->is_dev_replace) { + } else if (ret == -ENOSPC && !sctx->is_dev_replace && + !(cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK)) { /* * btrfs_inc_block_group_ro return -ENOSPC when it * failed in creating new chunk for metadata. * It is not a problem for scrub, because * metadata are always cowed, and our scrub paused * commit_transactions. + * + * For RAID56 chunks, we have to mark them read-only + * for scrub, as later we would use our own cache + * out of RAID56 realm. + * Thus we want the RAID56 bg to be marked RO to + * prevent RMW from screwing up out cache. */ ro_set = 0; } else if (ret == -ETXTBSY) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6cb97efee976..efeb1a9d040a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -826,7 +826,11 @@ out: !btrfs_test_opt(info, CLEAR_CACHE)) { btrfs_err(info, "cannot disable free space tree"); ret = -EINVAL; - + } + if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) && + !btrfs_test_opt(info, FREE_SPACE_TREE)) { + btrfs_err(info, "cannot disable free space tree with block-group-tree feature"); + ret = -EINVAL; } if (!ret) ret = btrfs_check_mountopts_zoned(info); @@ -1837,6 +1841,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_clear_sb_rdonly(sb); set_bit(BTRFS_FS_OPEN, &fs_info->flags); + + /* + * If we've gone from readonly -> read/write, we need to get + * our sync/async discard lists in the right state. + */ + btrfs_discard_resume(fs_info); } out: /* diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9b212e8c70cc..d2755d5e338b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6158,7 +6158,7 @@ static int log_delayed_deletions_incremental(struct btrfs_trans_handle *trans, { struct btrfs_root *log = inode->root->log_root; const struct btrfs_delayed_item *curr; - u64 last_range_start; + u64 last_range_start = 0; u64 last_range_end = 0; struct btrfs_key key; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 03f52e4a20aa..841e799dece5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -395,6 +395,7 @@ void btrfs_free_device(struct btrfs_device *device) { WARN_ON(!list_empty(&device->post_commit_list)); rcu_string_free(device->name); + extent_io_tree_release(&device->alloc_state); btrfs_destroy_dev_zone_info(device); kfree(device); } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index a9b32ba6b2ce..39828af4a4e8 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -122,10 +122,9 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, int i; for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { - u64 bytenr; - - bytenr = ((zones[i].start + zones[i].len) - << SECTOR_SHIFT) - BTRFS_SUPER_INFO_SIZE; + u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT; + u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) - + BTRFS_SUPER_INFO_SIZE; page[i] = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS); @@ -1168,12 +1167,12 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) return -ERANGE; /* All the zones are conventional */ - if (find_next_bit(zinfo->seq_zones, begin, end) == end) + if (find_next_bit(zinfo->seq_zones, end, begin) == end) return 0; /* All the zones are sequential and empty */ - if (find_next_zero_bit(zinfo->seq_zones, begin, end) == end && - find_next_zero_bit(zinfo->empty_zones, begin, end) == end) + if (find_next_zero_bit(zinfo->seq_zones, end, begin) == end && + find_next_zero_bit(zinfo->empty_zones, end, begin) == end) return 0; for (pos = start; pos < start + size; pos += zinfo->zone_size) { @@ -1610,11 +1609,11 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans, !list_empty(&eb->release_list)) return; + memzero_extent_buffer(eb, 0, eb->len); + set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags); set_extent_buffer_dirty(eb); set_extent_bits_nowait(&trans->dirty_pages, eb->start, eb->start + eb->len - 1, EXTENT_DIRTY); - memzero_extent_buffer(eb, 0, eb->len); - set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags); spin_lock(&trans->releasing_ebs_lock); list_add_tail(&eb->release_list, &trans->releasing_ebs); |