From 597441b3436a43011f31ce71dc0a6c0bf5ce958a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 11 May 2023 12:45:59 -0400 Subject: btrfs: use nofs when cleaning up aborted transactions Our CI system caught a lockdep splat: ====================================================== WARNING: possible circular locking dependency detected 6.3.0-rc7+ #1167 Not tainted ------------------------------------------------------ kswapd0/46 is trying to acquire lock: ffff8c6543abd650 (sb_internal#2){++++}-{0:0}, at: btrfs_commit_inode_delayed_inode+0x5f/0x120 but task is already holding lock: ffffffffabe61b40 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x4aa/0x7a0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (fs_reclaim){+.+.}-{0:0}: fs_reclaim_acquire+0xa5/0xe0 kmem_cache_alloc+0x31/0x2c0 alloc_extent_state+0x1d/0xd0 __clear_extent_bit+0x2e0/0x4f0 try_release_extent_mapping+0x216/0x280 btrfs_release_folio+0x2e/0x90 invalidate_inode_pages2_range+0x397/0x470 btrfs_cleanup_dirty_bgs+0x9e/0x210 btrfs_cleanup_one_transaction+0x22/0x760 btrfs_commit_transaction+0x3b7/0x13a0 create_subvol+0x59b/0x970 btrfs_mksubvol+0x435/0x4f0 __btrfs_ioctl_snap_create+0x11e/0x1b0 btrfs_ioctl_snap_create_v2+0xbf/0x140 btrfs_ioctl+0xa45/0x28f0 __x64_sys_ioctl+0x88/0xc0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc -> #0 (sb_internal#2){++++}-{0:0}: __lock_acquire+0x1435/0x21a0 lock_acquire+0xc2/0x2b0 start_transaction+0x401/0x730 btrfs_commit_inode_delayed_inode+0x5f/0x120 btrfs_evict_inode+0x292/0x3d0 evict+0xcc/0x1d0 inode_lru_isolate+0x14d/0x1e0 __list_lru_walk_one+0xbe/0x1c0 list_lru_walk_one+0x58/0x80 prune_icache_sb+0x39/0x60 super_cache_scan+0x161/0x1f0 do_shrink_slab+0x163/0x340 shrink_slab+0x1d3/0x290 shrink_node+0x300/0x720 balance_pgdat+0x35c/0x7a0 kswapd+0x205/0x410 kthread+0xf0/0x120 ret_from_fork+0x29/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(sb_internal#2); lock(fs_reclaim); lock(sb_internal#2); *** DEADLOCK *** 3 locks held by kswapd0/46: #0: ffffffffabe61b40 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat+0x4aa/0x7a0 #1: ffffffffabe50270 (shrinker_rwsem){++++}-{3:3}, at: shrink_slab+0x113/0x290 #2: ffff8c6543abd0e0 (&type->s_umount_key#44){++++}-{3:3}, at: super_cache_scan+0x38/0x1f0 stack backtrace: CPU: 0 PID: 46 Comm: kswapd0 Not tainted 6.3.0-rc7+ #1167 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack_lvl+0x58/0x90 check_noncircular+0xd6/0x100 ? save_trace+0x3f/0x310 ? add_lock_to_list+0x97/0x120 __lock_acquire+0x1435/0x21a0 lock_acquire+0xc2/0x2b0 ? btrfs_commit_inode_delayed_inode+0x5f/0x120 start_transaction+0x401/0x730 ? btrfs_commit_inode_delayed_inode+0x5f/0x120 btrfs_commit_inode_delayed_inode+0x5f/0x120 btrfs_evict_inode+0x292/0x3d0 ? lock_release+0x134/0x270 ? __pfx_wake_bit_function+0x10/0x10 evict+0xcc/0x1d0 inode_lru_isolate+0x14d/0x1e0 __list_lru_walk_one+0xbe/0x1c0 ? __pfx_inode_lru_isolate+0x10/0x10 ? __pfx_inode_lru_isolate+0x10/0x10 list_lru_walk_one+0x58/0x80 prune_icache_sb+0x39/0x60 super_cache_scan+0x161/0x1f0 do_shrink_slab+0x163/0x340 shrink_slab+0x1d3/0x290 shrink_node+0x300/0x720 balance_pgdat+0x35c/0x7a0 kswapd+0x205/0x410 ? __pfx_autoremove_wake_function+0x10/0x10 ? __pfx_kswapd+0x10/0x10 kthread+0xf0/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x29/0x50 This happens because when we abort the transaction in the transaction commit path we call invalidate_inode_pages2_range on our block group cache inodes (if we have space cache v1) and any delalloc inodes we may have. The plain invalidate_inode_pages2_range() call passes through GFP_KERNEL, which makes sense in most cases, but not here. Wrap these two invalidate callees with memalloc_nofs_save/memalloc_nofs_restore to make sure we don't end up with the fs reclaim dependency under the transaction dependency. CC: stable@vger.kernel.org # 4.14+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fbf9006c6234..6de6dcf2743e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4936,7 +4936,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) */ inode = igrab(&btrfs_inode->vfs_inode); if (inode) { + unsigned int nofs_flag; + + nofs_flag = memalloc_nofs_save(); invalidate_inode_pages2(inode->i_mapping); + memalloc_nofs_restore(nofs_flag); iput(inode); } spin_lock(&root->delalloc_lock); @@ -5042,7 +5046,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache) inode = cache->io_ctl.inode; if (inode) { + unsigned int nofs_flag; + + nofs_flag = memalloc_nofs_save(); invalidate_inode_pages2(inode->i_mapping); + memalloc_nofs_restore(nofs_flag); + BTRFS_I(inode)->generation = 0; cache->io_ctl.inode = NULL; iput(inode); -- cgit v1.2.3 From 5ad9b4719fc9bc4715c7e19875a962095b0577e7 Mon Sep 17 00:00:00 2001 From: pengfuyuan Date: Tue, 23 May 2023 15:09:55 +0800 Subject: btrfs: fix csum_tree_block page iteration to avoid tripping on -Werror=array-bounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling on a MIPS 64-bit machine we get these warnings: In file included from ./arch/mips/include/asm/cacheflush.h:13, from ./include/linux/cacheflush.h:5, from ./include/linux/highmem.h:8, from ./include/linux/bvec.h:10, from ./include/linux/blk_types.h:10, from ./include/linux/blkdev.h:9, from fs/btrfs/disk-io.c:7: fs/btrfs/disk-io.c: In function ‘csum_tree_block’: fs/btrfs/disk-io.c:100:34: error: array subscript 1 is above array bounds of ‘struct page *[1]’ [-Werror=array-bounds] 100 | kaddr = page_address(buf->pages[i]); | ~~~~~~~~~~^~~ ./include/linux/mm.h:2135:48: note: in definition of macro ‘page_address’ 2135 | #define page_address(page) lowmem_page_address(page) | ^~~~ cc1: all warnings being treated as errors We can check if i overflows to solve the problem. However, this doesn't make much sense, since i == 1 and num_pages == 1 doesn't execute the body of the loop. In addition, i < num_pages can also ensure that buf->pages[i] will not cross the boundary. Unfortunately, this doesn't help with the problem observed here: gcc still complains. To fix this add a compile-time condition for the extent buffer page array size limit, which would eventually lead to eliminating the whole for loop. CC: stable@vger.kernel.org # 5.10+ Signed-off-by: pengfuyuan Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6de6dcf2743e..2b1b227505f3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -96,7 +96,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result) crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE, first_page_part - BTRFS_CSUM_SIZE); - for (i = 1; i < num_pages; i++) { + for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) { kaddr = page_address(buf->pages[i]); crypto_shash_update(shash, kaddr, PAGE_SIZE); } -- cgit v1.2.3 From 917ac77846b907dfdbd878688a9a61236ad6c51e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 26 May 2023 20:30:20 +0800 Subject: btrfs: subpage: fix a crash in metadata repair path [BUG] Test case btrfs/027 would crash with subpage (64K page size, 4K sectorsize) with the following dying messages: debug: map_length=16384 length=65536 type=metadata|raid6(0x104) assertion failed: map_length >= length, in fs/btrfs/volumes.c:8093 ------------[ cut here ]------------ kernel BUG at fs/btrfs/messages.c:259! Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 Call trace: btrfs_assertfail+0x28/0x2c [btrfs] btrfs_map_repair_block+0x150/0x2b8 [btrfs] btrfs_repair_io_failure+0xd4/0x31c [btrfs] btrfs_read_extent_buffer+0x150/0x16c [btrfs] read_tree_block+0x38/0xbc [btrfs] read_tree_root_path+0xfc/0x1bc [btrfs] btrfs_get_root_ref.part.0+0xd4/0x3a8 [btrfs] open_ctree+0xa30/0x172c [btrfs] btrfs_mount_root+0x3c4/0x4a4 [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xec vfs_kern_mount.part.0+0x90/0xd4 vfs_kern_mount+0x14/0x28 btrfs_mount+0x114/0x418 [btrfs] legacy_get_tree+0x30/0x60 vfs_get_tree+0x28/0xec path_mount+0x3e0/0xb64 __arm64_sys_mount+0x200/0x2d8 invoke_syscall+0x48/0x114 el0_svc_common.constprop.0+0x60/0x11c do_el0_svc+0x38/0x98 el0_svc+0x40/0xa8 el0t_64_sync_handler+0xf4/0x120 el0t_64_sync+0x190/0x194 Code: aa0403e2 b0fff060 91010000 959c2024 (d4210000) [CAUSE] In btrfs/027 we test RAID6 with missing devices, in this particular case, we're repairing a metadata at the end of a data stripe. But at btrfs_repair_io_failure(), we always pass a full PAGE for repair, and for subpage case this can cross stripe boundary and lead to the above BUG_ON(). This metadata repair code is always there, since the introduction of subpage support, but this can trigger BUG_ON() after the bio split ability at btrfs_map_bio(). [FIX] Instead of passing the old PAGE_SIZE, we calculate the correct length based on the eb size and page size for both regular and subpage cases. CC: stable@vger.kernel.org # 6.3+ Reviewed-by: Christoph Hellwig Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2b1b227505f3..88e6d1072a35 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -242,7 +242,6 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) { struct btrfs_fs_info *fs_info = eb->fs_info; - u64 start = eb->start; int i, num_pages = num_extent_pages(eb); int ret = 0; @@ -251,12 +250,14 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, for (i = 0; i < num_pages; i++) { struct page *p = eb->pages[i]; + u64 start = max_t(u64, eb->start, page_offset(p)); + u64 end = min_t(u64, eb->start + eb->len, page_offset(p) + PAGE_SIZE); + u32 len = end - start; - ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE, - start, p, start - page_offset(p), mirror_num); + ret = btrfs_repair_io_failure(fs_info, 0, start, len, + start, p, offset_in_page(start), mirror_num); if (ret) break; - start += PAGE_SIZE; } return ret; -- cgit v1.2.3 From 745806fb4554f334e6406fa82b328562aa48f08f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 11 Jun 2023 08:09:13 +0800 Subject: btrfs: do not ASSERT() on duplicated global roots [BUG] Syzbot reports a reproducible ASSERT() when using rescue=usebackuproot mount option on a corrupted fs. The full report can be found here: https://syzkaller.appspot.com/bug?extid=c4614eae20a166c25bf0 BTRFS error (device loop0: state C): failed to load root csum assertion failed: !tmp, in fs/btrfs/disk-io.c:1103 ------------[ cut here ]------------ kernel BUG at fs/btrfs/ctree.h:3664! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 3608 Comm: syz-executor356 Not tainted 6.0.0-rc7-syzkaller-00029-g3800a713b607 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 RIP: 0010:assertfail+0x1a/0x1c fs/btrfs/ctree.h:3663 RSP: 0018:ffffc90003aaf250 EFLAGS: 00010246 RAX: 0000000000000032 RBX: 0000000000000000 RCX: f21c13f886638400 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffff888021c640a0 R08: ffffffff816bd38d R09: ffffed10173667f1 R10: ffffed10173667f1 R11: 1ffff110173667f0 R12: dffffc0000000000 R13: ffff8880229c21f7 R14: ffff888021c64060 R15: ffff8880226c0000 FS: 0000555556a73300(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055a2637d7a00 CR3: 00000000709c4000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_global_root_insert+0x1a7/0x1b0 fs/btrfs/disk-io.c:1103 load_global_roots_objectid+0x482/0x8c0 fs/btrfs/disk-io.c:2467 load_global_roots fs/btrfs/disk-io.c:2501 [inline] btrfs_read_roots fs/btrfs/disk-io.c:2528 [inline] init_tree_roots+0xccb/0x203c fs/btrfs/disk-io.c:2939 open_ctree+0x1e53/0x33df fs/btrfs/disk-io.c:3574 btrfs_fill_super+0x1c6/0x2d0 fs/btrfs/super.c:1456 btrfs_mount_root+0x885/0x9a0 fs/btrfs/super.c:1824 legacy_get_tree+0xea/0x180 fs/fs_context.c:610 vfs_get_tree+0x88/0x270 fs/super.c:1530 fc_mount fs/namespace.c:1043 [inline] vfs_kern_mount+0xc9/0x160 fs/namespace.c:1073 btrfs_mount+0x3d3/0xbb0 fs/btrfs/super.c:1884 [CAUSE] Since the introduction of global roots, we handle csum/extent/free-space-tree roots as global roots, even if no extent-tree-v2 feature is enabled. So for regular csum/extent/fst roots, we load them into fs_info::global_root_tree rb tree. And we should not expect any conflicts in that rb tree, thus we have an ASSERT() inside btrfs_global_root_insert(). But rescue=usebackuproot can break the assumption, as we will try to load those trees again and again as long as we have bad roots and have backup roots slot remaining. So in that case we can have conflicting roots in the rb tree, and triggering the ASSERT() crash. [FIX] We can safely remove that ASSERT(), as the caller will properly put the offending root. To make further debugging easier, also add two explicit error messages: - Error message for conflicting global roots - Error message when using backup roots slot Reported-by: syzbot+a694851c6ab28cbcfb9c@syzkaller.appspotmail.com Fixes: abed4aaae4f7 ("btrfs: track the csum, extent, and free space trees in a rb tree") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 88e6d1072a35..dabc79c1af1b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -996,13 +996,18 @@ int btrfs_global_root_insert(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct rb_node *tmp; + int ret = 0; write_lock(&fs_info->global_root_lock); tmp = rb_find_add(&root->rb_node, &fs_info->global_root_tree, global_root_cmp); write_unlock(&fs_info->global_root_lock); - ASSERT(!tmp); - return tmp ? -EEXIST : 0; + if (tmp) { + ret = -EEXIST; + btrfs_warn(fs_info, "global root %llu %llu already exists", + root->root_key.objectid, root->root_key.offset); + } + return ret; } void btrfs_global_root_delete(struct btrfs_root *root) @@ -2842,6 +2847,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) /* We can't trust the free space cache either */ btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE); + btrfs_warn(fs_info, "try to load backup roots slot %d", i); ret = read_backup_root(fs_info, i); backup_index = ret; if (ret < 0) -- cgit v1.2.3 From 8bfec2e426e40597d594db07de4e53def38c8879 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 May 2023 09:06:15 +0200 Subject: btrfs: remove hipri_workers workqueue Now that btrfs_wq_submit_bio is never called for synchronous I/O, the hipri_workers workqueue is not used anymore and can be removed. Reviewed-by: Chris Mason Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/bio.c | 5 +---- fs/btrfs/disk-io.c | 6 +----- fs/btrfs/fs.h | 1 - fs/btrfs/super.c | 1 - 4 files changed, 2 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 990a517c069b..2e2e1abd5838 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -615,10 +615,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio, btrfs_init_work(&async->work, run_one_async_start, run_one_async_done, run_one_async_free); - if (op_is_sync(bbio->bio.bi_opf)) - btrfs_queue_work(fs_info->hipri_workers, &async->work); - else - btrfs_queue_work(fs_info->workers, &async->work); + btrfs_queue_work(fs_info->workers, &async->work); return true; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dabc79c1af1b..f4adda2b4317 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1991,7 +1991,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) { btrfs_destroy_workqueue(fs_info->fixup_workers); btrfs_destroy_workqueue(fs_info->delalloc_workers); - btrfs_destroy_workqueue(fs_info->hipri_workers); btrfs_destroy_workqueue(fs_info->workers); if (fs_info->endio_workers) destroy_workqueue(fs_info->endio_workers); @@ -2186,9 +2185,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) fs_info->workers = btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16); - fs_info->hipri_workers = - btrfs_alloc_workqueue(fs_info, "worker-high", - flags | WQ_HIGHPRI, max_active, 16); fs_info->delalloc_workers = btrfs_alloc_workqueue(fs_info, "delalloc", @@ -2225,7 +2221,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) fs_info->discard_ctl.discard_workers = alloc_workqueue("btrfs_discard", WQ_UNBOUND | WQ_FREEZABLE, 1); - if (!(fs_info->workers && fs_info->hipri_workers && + if (!(fs_info->workers && fs_info->delalloc_workers && fs_info->flush_workers && fs_info->endio_workers && fs_info->endio_meta_workers && fs_info->compressed_write_workers && diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h index 0d98fc5f6f44..840e4def18b5 100644 --- a/fs/btrfs/fs.h +++ b/fs/btrfs/fs.h @@ -543,7 +543,6 @@ struct btrfs_fs_info { * A third pool does submit_bio to avoid deadlocking with the other two. */ struct btrfs_workqueue *workers; - struct btrfs_workqueue *hipri_workers; struct btrfs_workqueue *delalloc_workers; struct btrfs_workqueue *flush_workers; struct workqueue_struct *endio_workers; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index efeb1a9d040a..8b1c1225245e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1631,7 +1631,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, old_pool_size, new_pool_size); btrfs_workqueue_set_max(fs_info->workers, new_pool_size); - btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); workqueue_set_max_active(fs_info->endio_workers, new_pool_size); -- cgit v1.2.3 From 85d8a826c7cde17f9cca9c4debecb4538bdb6573 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sat, 29 Apr 2023 16:07:12 -0400 Subject: btrfs: simplify btrfs_check_leaf_* helpers into a single helper We have two helpers for checking leaves, because we have an extra check for debugging in btrfs_mark_buffer_dirty(), and at that stage we may have item data that isn't consistent yet. However we can handle this case internally in the helper, if BTRFS_HEADER_FLAG_WRITTEN is set we know the buffer should be internally consistent, otherwise we need to skip checking the item data. Simplify this helper down a single helper and handle the item data checking logic internally to the helper. Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 12 +++++------- fs/btrfs/tree-checker.c | 20 +++++++------------- fs/btrfs/tree-checker.h | 13 +------------ 3 files changed, 13 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f4adda2b4317..922755926ee9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -326,7 +326,7 @@ static int csum_one_extent_buffer(struct extent_buffer *eb) if (btrfs_header_level(eb)) ret = btrfs_check_node(eb); else - ret = btrfs_check_leaf_full(eb); + ret = btrfs_check_leaf(eb); if (ret < 0) goto error; @@ -583,7 +583,7 @@ static int validate_extent_buffer(struct extent_buffer *eb, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && btrfs_check_leaf_full(eb)) { + if (found_level == 0 && btrfs_check_leaf(eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } @@ -4701,12 +4701,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY /* - * Since btrfs_mark_buffer_dirty() can be called with item pointer set - * but item data not updated. - * So here we should only check item pointers, not item data. + * btrfs_check_leaf() won't check item data if we don't have WRITTEN + * set, so this will only validate the basic structure of the items. */ - if (btrfs_header_level(buf) == 0 && - btrfs_check_leaf_relaxed(buf)) { + if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(buf)) { btrfs_print_leaf(buf); ASSERT(0); } diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index e2b54793bf0c..2eff4e2f2c47 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1674,7 +1674,7 @@ static int check_leaf_item(struct extent_buffer *leaf, return ret; } -static int check_leaf(struct extent_buffer *leaf, bool check_item_data) +int btrfs_check_leaf(struct extent_buffer *leaf) { struct btrfs_fs_info *fs_info = leaf->fs_info; /* No valid key type is 0, so all key should be larger than this key */ @@ -1807,7 +1807,11 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) return -EUCLEAN; } - if (check_item_data) { + /* + * We only want to do this if WRITTEN is set, otherwise the leaf + * may be in some intermediate state and won't appear valid. + */ + if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) { /* * Check if the item size and content meet other * criteria @@ -1824,17 +1828,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) return 0; } - -int btrfs_check_leaf_full(struct extent_buffer *leaf) -{ - return check_leaf(leaf, true); -} -ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO); - -int btrfs_check_leaf_relaxed(struct extent_buffer *leaf) -{ - return check_leaf(leaf, false); -} +ALLOW_ERROR_INJECTION(btrfs_check_leaf, ERRNO); int btrfs_check_node(struct extent_buffer *node) { diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index bfb5efa4e01f..48321e8d91bb 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -40,18 +40,7 @@ struct btrfs_tree_parent_check { u8 level; }; -/* - * Comprehensive leaf checker. - * Will check not only the item pointers, but also every possible member - * in item data. - */ -int btrfs_check_leaf_full(struct extent_buffer *leaf); - -/* - * Less strict leaf checker. - * Will only check item pointers, not reading item data. - */ -int btrfs_check_leaf_relaxed(struct extent_buffer *leaf); +int btrfs_check_leaf(struct extent_buffer *leaf); int btrfs_check_node(struct extent_buffer *node); int btrfs_check_chunk_valid(struct extent_buffer *leaf, -- cgit v1.2.3 From 2cac5af16537f8eafaba5e525fbb5a93160ebaff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sat, 29 Apr 2023 16:07:17 -0400 Subject: btrfs: move btrfs_verify_level_key into tree-checker.c This is more a buffer validation helper, move it into the tree-checker files where it makes more sense. Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 58 ------------------------------------------------- fs/btrfs/disk-io.h | 2 -- fs/btrfs/tree-checker.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/tree-checker.h | 2 ++ 4 files changed, 60 insertions(+), 60 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 922755926ee9..83518ed71bfd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -180,64 +180,6 @@ int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_verify_level_key(struct extent_buffer *eb, int level, - struct btrfs_key *first_key, u64 parent_transid) -{ - struct btrfs_fs_info *fs_info = eb->fs_info; - int found_level; - struct btrfs_key found_key; - int ret; - - found_level = btrfs_header_level(eb); - if (found_level != level) { - WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), - KERN_ERR "BTRFS: tree level check failed\n"); - btrfs_err(fs_info, -"tree level mismatch detected, bytenr=%llu level expected=%u has=%u", - eb->start, level, found_level); - return -EIO; - } - - if (!first_key) - return 0; - - /* - * For live tree block (new tree blocks in current transaction), - * we need proper lock context to avoid race, which is impossible here. - * So we only checks tree blocks which is read from disk, whose - * generation <= fs_info->last_trans_committed. - */ - if (btrfs_header_generation(eb) > fs_info->last_trans_committed) - return 0; - - /* We have @first_key, so this @eb must have at least one item */ - if (btrfs_header_nritems(eb) == 0) { - btrfs_err(fs_info, - "invalid tree nritems, bytenr=%llu nritems=0 expect >0", - eb->start); - WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); - return -EUCLEAN; - } - - if (found_level) - btrfs_node_key_to_cpu(eb, &found_key, 0); - else - btrfs_item_key_to_cpu(eb, &found_key, 0); - ret = btrfs_comp_cpu_keys(first_key, &found_key); - - if (ret) { - WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), - KERN_ERR "BTRFS: tree first key check failed\n"); - btrfs_err(fs_info, -"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", - eb->start, parent_transid, first_key->objectid, - first_key->type, first_key->offset, - found_key.objectid, found_key.type, - found_key.offset); - } - return ret; -} - static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) { diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 4d5772330110..a26ab3cbb449 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -31,8 +31,6 @@ struct btrfs_tree_parent_check; void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info); void btrfs_init_fs_info(struct btrfs_fs_info *fs_info); -int btrfs_verify_level_key(struct extent_buffer *eb, int level, - struct btrfs_key *first_key, u64 parent_transid); struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, struct btrfs_tree_parent_check *check); struct extent_buffer *btrfs_find_create_tree_block( diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index bd85205a6249..9e92548a6aa2 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1963,3 +1963,61 @@ int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner) } return 0; } + +int btrfs_verify_level_key(struct extent_buffer *eb, int level, + struct btrfs_key *first_key, u64 parent_transid) +{ + struct btrfs_fs_info *fs_info = eb->fs_info; + int found_level; + struct btrfs_key found_key; + int ret; + + found_level = btrfs_header_level(eb); + if (found_level != level) { + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: tree level check failed\n"); + btrfs_err(fs_info, +"tree level mismatch detected, bytenr=%llu level expected=%u has=%u", + eb->start, level, found_level); + return -EIO; + } + + if (!first_key) + return 0; + + /* + * For live tree block (new tree blocks in current transaction), + * we need proper lock context to avoid race, which is impossible here. + * So we only checks tree blocks which is read from disk, whose + * generation <= fs_info->last_trans_committed. + */ + if (btrfs_header_generation(eb) > fs_info->last_trans_committed) + return 0; + + /* We have @first_key, so this @eb must have at least one item */ + if (btrfs_header_nritems(eb) == 0) { + btrfs_err(fs_info, + "invalid tree nritems, bytenr=%llu nritems=0 expect >0", + eb->start); + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + return -EUCLEAN; + } + + if (found_level) + btrfs_node_key_to_cpu(eb, &found_key, 0); + else + btrfs_item_key_to_cpu(eb, &found_key, 0); + ret = btrfs_comp_cpu_keys(first_key, &found_key); + + if (ret) { + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: tree first key check failed\n"); + btrfs_err(fs_info, +"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", + eb->start, parent_transid, first_key->objectid, + first_key->type, first_key->offset, + found_key.objectid, found_key.type, + found_key.offset); + } + return ret; +} diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index c0861ce1429b..3c2a02a72f64 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -66,5 +66,7 @@ int btrfs_check_node(struct extent_buffer *node); int btrfs_check_chunk_valid(struct extent_buffer *leaf, struct btrfs_chunk *chunk, u64 logical); int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner); +int btrfs_verify_level_key(struct extent_buffer *eb, int level, + struct btrfs_key *first_key, u64 parent_transid); #endif -- cgit v1.2.3 From f18cc97845aa4ae0e795c088c979fe1642b3b8e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2023 07:58:38 -0700 Subject: btrfs: fix dirty_metadata_bytes for redirtied buffers dirty_metadata_bytes is decremented in both places that clear the dirty bit in a buffer, but only incremented in btrfs_mark_buffer_dirty, which means that a buffer that is redirtied using btrfs_redirty_list_add won't be added to dirty_metadata_bytes, but it will be subtracted when written out, leading an inconsistency in the counter. Move the dirty_metadata_bytes from btrfs_mark_buffer_dirty into set_extent_buffer_dirty to also account for the redirty case, and remove the now unused set_extent_buffer_dirty return value. Fixes: d3575156f662 ("btrfs: zoned: redirty released extent buffers") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Naohiro Aota Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 7 +------ fs/btrfs/extent_io.c | 7 ++++--- fs/btrfs/extent_io.h | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 83518ed71bfd..112c99dde57f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4621,7 +4621,6 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { struct btrfs_fs_info *fs_info = buf->fs_info; u64 transid = btrfs_header_generation(buf); - int was_dirty; #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* @@ -4636,11 +4635,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (transid != fs_info->generation) WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n", buf->start, transid, fs_info->generation); - was_dirty = set_extent_buffer_dirty(buf); - if (!was_dirty) - percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, - buf->len, - fs_info->dirty_metadata_batch); + set_extent_buffer_dirty(buf); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY /* * btrfs_check_leaf() won't check item data if we don't have WRITTEN diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a1adadd5d25d..a829390632a5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4148,7 +4148,7 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, WARN_ON(atomic_read(&eb->refs) == 0); } -bool set_extent_buffer_dirty(struct extent_buffer *eb) +void set_extent_buffer_dirty(struct extent_buffer *eb) { int i; int num_pages; @@ -4183,13 +4183,14 @@ bool set_extent_buffer_dirty(struct extent_buffer *eb) eb->start, eb->len); if (subpage) unlock_page(eb->pages[0]); + percpu_counter_add_batch(&eb->fs_info->dirty_metadata_bytes, + eb->len, + eb->fs_info->dirty_metadata_batch); } #ifdef CONFIG_BTRFS_DEBUG for (i = 0; i < num_pages; i++) ASSERT(PageDirty(eb->pages[i])); #endif - - return was_dirty; } void clear_extent_buffer_uptodate(struct extent_buffer *eb) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4341ad978fb8..f937654230d3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -262,7 +262,7 @@ void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long star void extent_buffer_bitmap_clear(const struct extent_buffer *eb, unsigned long start, unsigned long pos, unsigned long len); -bool set_extent_buffer_dirty(struct extent_buffer *eb); +void set_extent_buffer_dirty(struct extent_buffer *eb); void set_extent_buffer_uptodate(struct extent_buffer *eb); void clear_extent_buffer_uptodate(struct extent_buffer *eb); int extent_buffer_under_io(const struct extent_buffer *eb); -- cgit v1.2.3 From f880fe6e0b4b127d6e2a007fe68bdf5651677ae2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 May 2023 07:58:39 -0700 Subject: btrfs: don't hold an extra reference for redirtied buffers When btrfs_redirty_list_add redirties a buffer, it also acquires an extra reference that is released on transaction commit. But this is not required as buffers that are dirty or under writeback are never freed (look for calls to extent_buffer_under_io())). Remove the extra reference and the infrastructure used to drop it again. History behind redirty logic: In the first place, it used releasing_list to hold all the to-be-released extent buffers, and decided which buffers to re-dirty at the commit time. Then, in a later version, the behaviour got changed to re-dirty a necessary buffer and add re-dirtied one to the list in btrfs_free_tree_block(). In short, the list was there mostly for the patch series' historical reason. Reviewed-by: Naohiro Aota Signed-off-by: Christoph Hellwig [ add Naohiro's comment regarding history ] Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 -- fs/btrfs/extent_io.c | 1 - fs/btrfs/extent_io.h | 1 - fs/btrfs/transaction.c | 9 --------- fs/btrfs/transaction.h | 3 --- fs/btrfs/zoned.c | 28 ++++------------------------ fs/btrfs/zoned.h | 2 -- 7 files changed, 4 insertions(+), 42 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 112c99dde57f..16224fd82987 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5073,8 +5073,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, EXTENT_DIRTY); btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents); - btrfs_free_redirty_list(cur_trans); - cur_trans->state =TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a829390632a5..d8becf1cdbc0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3557,7 +3557,6 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, init_rwsem(&eb->lock); btrfs_leak_debug_add_eb(eb); - INIT_LIST_HEAD(&eb->release_list); spin_lock_init(&eb->refs_lock); atomic_set(&eb->refs, 1); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f937654230d3..6f3cfadd232c 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -89,7 +89,6 @@ struct extent_buffer { struct rw_semaphore lock; struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; - struct list_head release_list; #ifdef CONFIG_BTRFS_DEBUG struct list_head leak_list; #endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 27c616fdfae2..fe0f00e717a8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -374,8 +374,6 @@ loop: spin_lock_init(&cur_trans->dirty_bgs_lock); INIT_LIST_HEAD(&cur_trans->deleted_bgs); spin_lock_init(&cur_trans->dropped_roots_lock); - INIT_LIST_HEAD(&cur_trans->releasing_ebs); - spin_lock_init(&cur_trans->releasing_ebs_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(fs_info, &cur_trans->dirty_pages, IO_TREE_TRANS_DIRTY_PAGES); @@ -2482,13 +2480,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) goto scrub_continue; } - /* - * At this point, we should have written all the tree blocks allocated - * in this transaction. So it's now safe to free the redirtyied extent - * buffers. - */ - btrfs_free_redirty_list(cur_trans); - ret = write_all_supers(fs_info, 0); /* * the super is written, we can safely allow the tree-loggers diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index fa728ab80826..8e9fa23bd7fe 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -94,9 +94,6 @@ struct btrfs_transaction { */ atomic_t pending_ordered; wait_queue_head_t pending_wait; - - spinlock_t releasing_ebs_lock; - struct list_head releasing_ebs; }; enum { diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index dac879fe2871..bda301a55cbe 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1602,37 +1602,17 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) void btrfs_redirty_list_add(struct btrfs_transaction *trans, struct extent_buffer *eb) { - struct btrfs_fs_info *fs_info = eb->fs_info; - - if (!btrfs_is_zoned(fs_info) || - btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN) || - !list_empty(&eb->release_list)) + if (!btrfs_is_zoned(eb->fs_info) || + btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)) return; + ASSERT(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); + memzero_extent_buffer(eb, 0, eb->len); set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags); set_extent_buffer_dirty(eb); set_extent_bits_nowait(&trans->dirty_pages, eb->start, eb->start + eb->len - 1, EXTENT_DIRTY); - - spin_lock(&trans->releasing_ebs_lock); - list_add_tail(&eb->release_list, &trans->releasing_ebs); - spin_unlock(&trans->releasing_ebs_lock); - atomic_inc(&eb->refs); -} - -void btrfs_free_redirty_list(struct btrfs_transaction *trans) -{ - spin_lock(&trans->releasing_ebs_lock); - while (!list_empty(&trans->releasing_ebs)) { - struct extent_buffer *eb; - - eb = list_first_entry(&trans->releasing_ebs, - struct extent_buffer, release_list); - list_del_init(&eb->release_list); - free_extent_buffer(eb); - } - spin_unlock(&trans->releasing_ebs_lock); } bool btrfs_use_zone_append(struct btrfs_bio *bbio) diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index c0570d35fea2..3058ef559c98 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -54,7 +54,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new); void btrfs_calc_zone_unusable(struct btrfs_block_group *cache); void btrfs_redirty_list_add(struct btrfs_transaction *trans, struct extent_buffer *eb); -void btrfs_free_redirty_list(struct btrfs_transaction *trans); bool btrfs_use_zone_append(struct btrfs_bio *bbio); void btrfs_record_physical_zoned(struct btrfs_bio *bbio); void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered); @@ -179,7 +178,6 @@ static inline void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) { } static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans, struct extent_buffer *eb) { } -static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { } static inline bool btrfs_use_zone_append(struct btrfs_bio *bbio) { -- cgit v1.2.3 From aebcc1596b5c37095385ecd930cf335254828538 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 May 2023 17:24:23 +0200 Subject: btrfs: move setting the buffer uptodate out of validate_extent_buffer Setting the buffer uptodate in a function that is named as a validation helper is a it confusing. Move the call from validate_extent_buffer to the one of its two callers that didn't already have a duplicate call to set_extent_buffer_uptodate. Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 16224fd82987..41045c900c2c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -533,9 +533,7 @@ static int validate_extent_buffer(struct extent_buffer *eb, if (found_level > 0 && btrfs_check_node(eb)) ret = -EIO; - if (!ret) - set_extent_buffer_uptodate(eb); - else + if (ret) btrfs_err(fs_info, "read time tree block corruption detected on logical %llu mirror %u", eb->start, eb->read_mirror); @@ -627,6 +625,8 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, goto err; } ret = validate_extent_buffer(eb, &bbio->parent_check); + if (!ret) + set_extent_buffer_uptodate(eb); err: if (ret) { /* -- cgit v1.2.3 From d87e6575e9d1c9d43e223c3fe858e4e453265707 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 May 2023 17:24:24 +0200 Subject: btrfs: merge verify_parent_transid and btrfs_buffer_uptodate verify_parent_transid is only called by btrfs_buffer_uptodate, which confusingly inverts the return value. Merge the two functions and reflow the parent_transid so that error handling is in a branch. Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 45 ++++++++++++++------------------------------- 1 file changed, 14 insertions(+), 31 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 41045c900c2c..d4f7578cda00 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -110,32 +110,32 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result) * detect blocks that either didn't get written at all or got written * in the wrong place. */ -static int verify_parent_transid(struct extent_io_tree *io_tree, - struct extent_buffer *eb, u64 parent_transid, - int atomic) +int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic) { + struct inode *btree_inode = eb->pages[0]->mapping->host; + struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; struct extent_state *cached_state = NULL; - int ret; + int ret = 1; - if (!parent_transid || btrfs_header_generation(eb) == parent_transid) + if (!extent_buffer_uptodate(eb)) return 0; + if (!parent_transid || btrfs_header_generation(eb) == parent_transid) + return 1; + if (atomic) return -EAGAIN; lock_extent(io_tree, eb->start, eb->start + eb->len - 1, &cached_state); - if (extent_buffer_uptodate(eb) && - btrfs_header_generation(eb) == parent_transid) { - ret = 0; - goto out; - } - btrfs_err_rl(eb->fs_info, + if (!extent_buffer_uptodate(eb) || + btrfs_header_generation(eb) != parent_transid) { + btrfs_err_rl(eb->fs_info, "parent transid verify failed on logical %llu mirror %u wanted %llu found %llu", eb->start, eb->read_mirror, parent_transid, btrfs_header_generation(eb)); - ret = 1; - clear_extent_buffer_uptodate(eb); -out: + clear_extent_buffer_uptodate(eb); + ret = 0; + } unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, &cached_state); return ret; @@ -4600,23 +4600,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_close_devices(fs_info->fs_devices); } -int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, - int atomic) -{ - int ret; - struct inode *btree_inode = buf->pages[0]->mapping->host; - - ret = extent_buffer_uptodate(buf); - if (!ret) - return ret; - - ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, - parent_transid, atomic); - if (ret == -EAGAIN) - return ret; - return !ret; -} - void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { struct btrfs_fs_info *fs_info = buf->fs_info; -- cgit v1.2.3 From 046b562b20a5cfe205fb78c6f8a1a8b74f01d303 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 May 2023 17:24:28 +0200 Subject: btrfs: use a separate end_io handler for read_extent_buffer Now that we always use a single bio to read an extent_buffer, the buffer can be passed to the end_io handler as private data. This allows implementing a much simplified dedicated end I/O handler for metadata reads. Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 105 +-------------------------------------------------- fs/btrfs/disk-io.h | 5 +-- fs/btrfs/extent_io.c | 80 ++++++++++++++++++--------------------- 3 files changed, 41 insertions(+), 149 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d4f7578cda00..db04a957b445 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -427,8 +427,8 @@ static int check_tree_block_fsid(struct extent_buffer *eb) } /* Do basic extent buffer checks at read time */ -static int validate_extent_buffer(struct extent_buffer *eb, - struct btrfs_tree_parent_check *check) +int btrfs_validate_extent_buffer(struct extent_buffer *eb, + struct btrfs_tree_parent_check *check) { struct btrfs_fs_info *fs_info = eb->fs_info; u64 found_start; @@ -541,107 +541,6 @@ out: return ret; } -static int validate_subpage_buffer(struct page *page, u64 start, u64 end, - int mirror, struct btrfs_tree_parent_check *check) -{ - struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb); - struct extent_buffer *eb; - bool reads_done; - int ret = 0; - - ASSERT(check); - - /* - * We don't allow bio merge for subpage metadata read, so we should - * only get one eb for each endio hook. - */ - ASSERT(end == start + fs_info->nodesize - 1); - ASSERT(PagePrivate(page)); - - eb = find_extent_buffer(fs_info, start); - /* - * When we are reading one tree block, eb must have been inserted into - * the radix tree. If not, something is wrong. - */ - ASSERT(eb); - - reads_done = atomic_dec_and_test(&eb->io_pages); - /* Subpage read must finish in page read */ - ASSERT(reads_done); - - eb->read_mirror = mirror; - if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) { - ret = -EIO; - goto err; - } - ret = validate_extent_buffer(eb, check); - if (ret < 0) - goto err; - - set_extent_buffer_uptodate(eb); - - free_extent_buffer(eb); - return ret; -err: - /* - * end_bio_extent_readpage decrements io_pages in case of error, - * make sure it has something to decrement. - */ - atomic_inc(&eb->io_pages); - clear_extent_buffer_uptodate(eb); - free_extent_buffer(eb); - return ret; -} - -int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, - struct page *page, u64 start, u64 end, - int mirror) -{ - struct extent_buffer *eb; - int ret = 0; - int reads_done; - - ASSERT(page->private); - - if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE) - return validate_subpage_buffer(page, start, end, mirror, - &bbio->parent_check); - - eb = (struct extent_buffer *)page->private; - - /* - * The pending IO might have been the only thing that kept this buffer - * in memory. Make sure we have a ref for all this other checks - */ - atomic_inc(&eb->refs); - - reads_done = atomic_dec_and_test(&eb->io_pages); - if (!reads_done) - goto err; - - eb->read_mirror = mirror; - if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) { - ret = -EIO; - goto err; - } - ret = validate_extent_buffer(eb, &bbio->parent_check); - if (!ret) - set_extent_buffer_uptodate(eb); -err: - if (ret) { - /* - * our io error hook is going to dec the io pages - * again, we have to make sure it has something - * to decrement - */ - atomic_inc(&eb->io_pages); - clear_extent_buffer_uptodate(eb); - } - free_extent_buffer(eb); - - return ret; -} - #ifdef CONFIG_MIGRATION static int btree_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a26ab3cbb449..b03767f4d7ed 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -82,9 +82,8 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info); void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info); void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); -int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, - struct page *page, u64 start, u64 end, - int mirror); +int btrfs_validate_extent_buffer(struct extent_buffer *eb, + struct btrfs_tree_parent_check *check); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info); #endif diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ea99b2a3755e..4581ee5a0835 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -663,35 +663,6 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page) btrfs_subpage_start_reader(fs_info, page, page_offset(page), PAGE_SIZE); } -/* - * Find extent buffer for a givne bytenr. - * - * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking - * in endio context. - */ -static struct extent_buffer *find_extent_buffer_readpage( - struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr) -{ - struct extent_buffer *eb; - - /* - * For regular sectorsize, we can use page->private to grab extent - * buffer - */ - if (fs_info->nodesize >= PAGE_SIZE) { - ASSERT(PagePrivate(page) && page->private); - return (struct extent_buffer *)page->private; - } - - /* For subpage case, we need to lookup buffer radix tree */ - rcu_read_lock(); - eb = radix_tree_lookup(&fs_info->buffer_radix, - bytenr >> fs_info->sectorsize_bits); - rcu_read_unlock(); - ASSERT(eb); - return eb; -} - /* * after a readpage IO is done, we need to: * clear the uptodate bits on error @@ -713,7 +684,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) * larger than UINT_MAX, u32 here is enough. */ u32 bio_offset = 0; - int mirror; struct bvec_iter_all iter_all; ASSERT(!bio_flagged(bio, BIO_CLONED)); @@ -753,11 +723,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) end = start + bvec->bv_len - 1; len = bvec->bv_len; - mirror = bbio->mirror_num; - if (uptodate && !is_data_inode(inode) && - btrfs_validate_metadata_buffer(bbio, page, start, end, mirror)) - uptodate = false; - if (likely(uptodate)) { loff_t i_size = i_size_read(inode); pgoff_t end_index = i_size >> PAGE_SHIFT; @@ -778,13 +743,6 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) zero_user_segment(page, zero_start, offset_in_page(end) + 1); } - } else if (!is_data_inode(inode)) { - struct extent_buffer *eb; - - eb = find_extent_buffer_readpage(fs_info, page, start); - set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); - eb->read_mirror = mirror; - atomic_dec(&eb->io_pages); } /* Update page status and unlock. */ @@ -4221,6 +4179,42 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb) } } +static void extent_buffer_read_end_io(struct btrfs_bio *bbio) +{ + struct extent_buffer *eb = bbio->private; + bool uptodate = !bbio->bio.bi_status; + struct bvec_iter_all iter_all; + struct bio_vec *bvec; + u32 bio_offset = 0; + + atomic_inc(&eb->refs); + eb->read_mirror = bbio->mirror_num; + + if (uptodate && + btrfs_validate_extent_buffer(eb, &bbio->parent_check) < 0) + uptodate = false; + + if (uptodate) { + set_extent_buffer_uptodate(eb); + } else { + clear_extent_buffer_uptodate(eb); + set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); + } + + bio_for_each_segment_all(bvec, &bbio->bio, iter_all) { + atomic_dec(&eb->io_pages); + end_page_read(bvec->bv_page, uptodate, eb->start + bio_offset, + bvec->bv_len); + bio_offset += bvec->bv_len; + } + + unlock_extent(&bbio->inode->io_tree, eb->start, + eb->start + bio_offset - 1, NULL); + free_extent_buffer(eb); + + bio_put(&bbio->bio); +} + static void __read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num, struct btrfs_tree_parent_check *check) { @@ -4234,7 +4228,7 @@ static void __read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num, bbio = btrfs_bio_alloc(INLINE_EXTENT_BUFFER_PAGES, REQ_OP_READ | REQ_META, eb->fs_info, - end_bio_extent_readpage, NULL); + extent_buffer_read_end_io, eb); bbio->bio.bi_iter.bi_sector = eb->start >> SECTOR_SHIFT; bbio->inode = BTRFS_I(eb->fs_info->btree_inode); bbio->file_offset = eb->start; -- cgit v1.2.3 From 31d89399dad0cf5524bd0e4e2c2827ae7004b2ca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 May 2023 17:24:35 +0200 Subject: btrfs: remove the extent_buffer lookup in btree block checksumming The checksumming of btree blocks always operates on the entire extent_buffer, and because btree blocks are always allocated contiguously on disk they are never split by btrfs_submit_bio. Simplify the checksumming code by finding the extent_buffer in the btrfs_bio private data instead of trying to search through the bio_vec. Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 120 +++++++++++------------------------------------------ 1 file changed, 24 insertions(+), 96 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index db04a957b445..384c8110ef2a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -254,12 +254,34 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb, return ret; } -static int csum_one_extent_buffer(struct extent_buffer *eb) +/* + * Checksum a dirty tree block before IO. + */ +blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio) { + struct extent_buffer *eb = bbio->private; struct btrfs_fs_info *fs_info = eb->fs_info; + u64 found_start = btrfs_header_bytenr(eb); u8 result[BTRFS_CSUM_SIZE]; int ret; + /* Btree blocks are always contiguous on disk. */ + if (WARN_ON_ONCE(bbio->file_offset != eb->start)) + return BLK_STS_IOERR; + if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len)) + return BLK_STS_IOERR; + + if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) { + WARN_ON_ONCE(found_start != 0); + return BLK_STS_OK; + } + + if (WARN_ON_ONCE(found_start != eb->start)) + return BLK_STS_IOERR; + if (WARN_ON(!btrfs_page_test_uptodate(fs_info, eb->pages[0], eb->start, + eb->len))) + return BLK_STS_IOERR; + ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid, offsetof(struct btrfs_header, fsid), BTRFS_FSID_SIZE) == 0); @@ -286,8 +308,7 @@ static int csum_one_extent_buffer(struct extent_buffer *eb) goto error; } write_extent_buffer(eb, result, 0, fs_info->csum_size); - - return 0; + return BLK_STS_OK; error: btrfs_print_tree(eb, 0); @@ -301,99 +322,6 @@ error: */ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) || btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID); - return ret; -} - -/* Checksum all dirty extent buffers in one bio_vec */ -static int csum_dirty_subpage_buffers(struct btrfs_fs_info *fs_info, - struct bio_vec *bvec) -{ - struct page *page = bvec->bv_page; - u64 bvec_start = page_offset(page) + bvec->bv_offset; - u64 cur; - int ret = 0; - - for (cur = bvec_start; cur < bvec_start + bvec->bv_len; - cur += fs_info->nodesize) { - struct extent_buffer *eb; - bool uptodate; - - eb = find_extent_buffer(fs_info, cur); - uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur, - fs_info->nodesize); - - /* A dirty eb shouldn't disappear from buffer_radix */ - if (WARN_ON(!eb)) - return -EUCLEAN; - - if (WARN_ON(cur != btrfs_header_bytenr(eb))) { - free_extent_buffer(eb); - return -EUCLEAN; - } - if (WARN_ON(!uptodate)) { - free_extent_buffer(eb); - return -EUCLEAN; - } - - ret = csum_one_extent_buffer(eb); - free_extent_buffer(eb); - if (ret < 0) - return ret; - } - return ret; -} - -/* - * Checksum a dirty tree block before IO. This has extra checks to make sure - * we only fill in the checksum field in the first page of a multi-page block. - * For subpage extent buffers we need bvec to also read the offset in the page. - */ -static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec) -{ - struct page *page = bvec->bv_page; - u64 start = page_offset(page); - u64 found_start; - struct extent_buffer *eb; - - if (fs_info->nodesize < PAGE_SIZE) - return csum_dirty_subpage_buffers(fs_info, bvec); - - eb = (struct extent_buffer *)page->private; - if (page != eb->pages[0]) - return 0; - - found_start = btrfs_header_bytenr(eb); - - if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) { - WARN_ON(found_start != 0); - return 0; - } - - /* - * Please do not consolidate these warnings into a single if. - * It is useful to know what went wrong. - */ - if (WARN_ON(found_start != start)) - return -EUCLEAN; - if (WARN_ON(!PageUptodate(page))) - return -EUCLEAN; - - return csum_one_extent_buffer(eb); -} - -blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio) -{ - struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; - struct bvec_iter iter; - struct bio_vec bv; - int ret = 0; - - bio_for_each_segment(bv, &bbio->bio, iter) { - ret = csum_dirty_buffer(fs_info, &bv); - if (ret) - break; - } - return errno_to_blk_status(ret); } -- cgit v1.2.3 From 9e2aff90fc2ad2b94933762f5442fef9ee7a692e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 May 2023 17:24:39 +0200 Subject: btrfs: stop using lock_extent in btrfs_buffer_uptodate The only other place that locks extents on the btree inode is read_extent_buffer_subpage while reading in the partial page for a buffer. This means locking the extent in btrfs_buffer_uptodate does not synchronize with anything on non-subpage file systems, and on subpage file systems it only waits for a parallel read(-ahead) to finish, which seems to be counter to what the callers actually expect. Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 384c8110ef2a..a3366fe546b1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -112,11 +112,6 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result) */ int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic) { - struct inode *btree_inode = eb->pages[0]->mapping->host; - struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; - struct extent_state *cached_state = NULL; - int ret = 1; - if (!extent_buffer_uptodate(eb)) return 0; @@ -126,7 +121,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atom if (atomic) return -EAGAIN; - lock_extent(io_tree, eb->start, eb->start + eb->len - 1, &cached_state); if (!extent_buffer_uptodate(eb) || btrfs_header_generation(eb) != parent_transid) { btrfs_err_rl(eb->fs_info, @@ -134,11 +128,9 @@ int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atom eb->start, eb->read_mirror, parent_transid, btrfs_header_generation(eb)); clear_extent_buffer_uptodate(eb); - ret = 0; + return 0; } - unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, - &cached_state); - return ret; + return 1; } static bool btrfs_supported_super_csum(u16 csum_type) -- cgit v1.2.3 From 413fb1bc1d3224e508c698cd82e5ff1dc7d16d64 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 24 May 2023 20:02:39 +0800 Subject: btrfs: return bool from check_tree_block_fsid instead of int Simplify the return type of check_tree_block_fsid() from int (1 or 0) to bool. Its only user is interested in knowing the success or failure. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a3366fe546b1..8102d2aa3f62 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -317,7 +317,7 @@ error: return errno_to_blk_status(ret); } -static int check_tree_block_fsid(struct extent_buffer *eb) +static bool check_tree_block_fsid(struct extent_buffer *eb) { struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; @@ -337,13 +337,13 @@ static int check_tree_block_fsid(struct extent_buffer *eb) metadata_uuid = fs_devices->fsid; if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE)) - return 0; + return false; list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) if (!memcmp(fsid, seed_devs->fsid, BTRFS_FSID_SIZE)) - return 0; + return false; - return 1; + return true; } /* Do basic extent buffer checks at read time */ -- cgit v1.2.3 From 25984a5ae8f144e823468d39e483face534e45d1 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 24 May 2023 20:02:42 +0800 Subject: btrfs: consolidate uuid comparisons in btrfs_validate_super There are three ways the fsid is validated in btrfs_validate_super(): - verify that super_copy::fsid is the same as fs_devices::fsid - if the metadata_uuid flag is set, verify if super_copy::metadata_uuid and fs_devices::metadata_uuid are the same. - a few lines below, often missed out, verify if dev_item::fsid is the same as fs_devices::metadata_uuid. The function btrfs_validate_super() contains multiple if-statements with memcmp() to check UUIDs. This patch consolidates them into a single location. Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8102d2aa3f62..6e0d4f99ab04 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2399,6 +2399,14 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info, ret = -EINVAL; } + if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid, + BTRFS_FSID_SIZE) != 0) { + btrfs_err(fs_info, + "dev_item UUID does not match metadata fsid: %pU != %pU", + fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid); + ret = -EINVAL; + } + /* * Artificial requirement for block-group-tree to force newer features * (free-space-tree, no-holes) so the test matrix is smaller. @@ -2411,14 +2419,6 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info, ret = -EINVAL; } - if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid, - BTRFS_FSID_SIZE) != 0) { - btrfs_err(fs_info, - "dev_item UUID does not match metadata fsid: %pU != %pU", - fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid); - ret = -EINVAL; - } - /* * Hint to catch really bogus numbers, bitflips or so, more exact checks are * done later -- cgit v1.2.3 From 85724171b302914bb8999b9df091fd4616a36eb7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 May 2023 10:40:18 +0200 Subject: btrfs: fix the btrfs_get_global_root return value btrfs_grab_root returns either the root or NULL, and the callers of btrfs_get_global_root expect it to return the same. But all the more recently added roots instead return an ERR_PTR, so fix this. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6e0d4f99ab04..4d347995fcaa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1183,19 +1183,13 @@ static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info, if (objectid == BTRFS_CSUM_TREE_OBJECTID) return btrfs_grab_root(btrfs_global_root(fs_info, &key)); if (objectid == BTRFS_QUOTA_TREE_OBJECTID) - return btrfs_grab_root(fs_info->quota_root) ? - fs_info->quota_root : ERR_PTR(-ENOENT); + return btrfs_grab_root(fs_info->quota_root); if (objectid == BTRFS_UUID_TREE_OBJECTID) - return btrfs_grab_root(fs_info->uuid_root) ? - fs_info->uuid_root : ERR_PTR(-ENOENT); + return btrfs_grab_root(fs_info->uuid_root); if (objectid == BTRFS_BLOCK_GROUP_TREE_OBJECTID) - return btrfs_grab_root(fs_info->block_group_root) ? - fs_info->block_group_root : ERR_PTR(-ENOENT); - if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) { - struct btrfs_root *root = btrfs_global_root(fs_info, &key); - - return btrfs_grab_root(root) ? root : ERR_PTR(-ENOENT); - } + return btrfs_grab_root(fs_info->block_group_root); + if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) + return btrfs_grab_root(btrfs_global_root(fs_info, &key)); return NULL; } -- cgit v1.2.3 From e91909aace336a182da20bcbe507995ac24f74bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 May 2023 10:40:19 +0200 Subject: btrfs: convert btrfs_get_global_root to use a switch statement Use a switch statement instead of an endless chain of if statements to make the code a little cleaner. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4d347995fcaa..c53c82cd0885 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1172,25 +1172,28 @@ static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info, .offset = 0, }; - if (objectid == BTRFS_ROOT_TREE_OBJECTID) + switch (objectid) { + case BTRFS_ROOT_TREE_OBJECTID: return btrfs_grab_root(fs_info->tree_root); - if (objectid == BTRFS_EXTENT_TREE_OBJECTID) + case BTRFS_EXTENT_TREE_OBJECTID: return btrfs_grab_root(btrfs_global_root(fs_info, &key)); - if (objectid == BTRFS_CHUNK_TREE_OBJECTID) + case BTRFS_CHUNK_TREE_OBJECTID: return btrfs_grab_root(fs_info->chunk_root); - if (objectid == BTRFS_DEV_TREE_OBJECTID) + case BTRFS_DEV_TREE_OBJECTID: return btrfs_grab_root(fs_info->dev_root); - if (objectid == BTRFS_CSUM_TREE_OBJECTID) + case BTRFS_CSUM_TREE_OBJECTID: return btrfs_grab_root(btrfs_global_root(fs_info, &key)); - if (objectid == BTRFS_QUOTA_TREE_OBJECTID) + case BTRFS_QUOTA_TREE_OBJECTID: return btrfs_grab_root(fs_info->quota_root); - if (objectid == BTRFS_UUID_TREE_OBJECTID) + case BTRFS_UUID_TREE_OBJECTID: return btrfs_grab_root(fs_info->uuid_root); - if (objectid == BTRFS_BLOCK_GROUP_TREE_OBJECTID) + case BTRFS_BLOCK_GROUP_TREE_OBJECTID: return btrfs_grab_root(fs_info->block_group_root); - if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) + case BTRFS_FREE_SPACE_TREE_OBJECTID: return btrfs_grab_root(btrfs_global_root(fs_info, &key)); - return NULL; + default: + return NULL; + } } int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, -- cgit v1.2.3 From 25ac047c9d3df373de35ba4ee3f0c902874a1fab Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 May 2023 10:40:20 +0200 Subject: btrfs: remove a pointless NULL check in btrfs_lookup_fs_root btrfs_grab_root already checks for a NULL root itself. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c53c82cd0885..06dab9d018c2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1157,8 +1157,7 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, spin_lock(&fs_info->fs_roots_radix_lock); root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)root_id); - if (root) - root = btrfs_grab_root(root); + root = btrfs_grab_root(root); spin_unlock(&fs_info->fs_roots_radix_lock); return root; } -- cgit v1.2.3 From 58e814fcacc1f652d2b794c82b7c9d96ee3c3bab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 25 May 2023 13:33:08 -1000 Subject: btrfs: use alloc_ordered_workqueue() to create ordered workqueues BACKGROUND ========== When multiple work items are queued to a workqueue, their execution order doesn't match the queueing order. They may get executed in any order and simultaneously. When fully serialized execution - one by one in the queueing order - is needed, an ordered workqueue should be used which can be created with alloc_ordered_workqueue(). However, alloc_ordered_workqueue() was a later addition. Before it, an ordered workqueue could be obtained by creating an UNBOUND workqueue with @max_active==1. This originally was an implementation side-effect which was broken by 4c16bd327c74 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered"). Because there were users that depended on the ordered execution, 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") made workqueue allocation path to implicitly promote UNBOUND workqueues w/ @max_active==1 to ordered workqueues. While this has worked okay, overloading the UNBOUND allocation interface this way creates other issues. It's difficult to tell whether a given workqueue actually needs to be ordered and users that legitimately want a min concurrency level wq unexpectedly gets an ordered one instead. With planned UNBOUND workqueue updates to improve execution locality and more prevalence of chiplet designs which can benefit from such improvements, this isn't a state we wanna be in forever. This patch series audits all call sites that create an UNBOUND workqueue w/ @max_active==1 and converts them to alloc_ordered_workqueue() as necessary. BTRFS ===== * fs_info->scrub_workers initialized in scrub_workers_get() was setting @max_active to 1 when @is_dev_replace is set and it seems that the workqueue actually needs to be ordered if @is_dev_replace. Update the code so that alloc_ordered_workqueue() is used if @is_dev_replace. * fs_info->discard_ctl.discard_workers initialized in btrfs_init_workqueues() was directly using alloc_workqueue() w/ @max_active==1. Converted to alloc_ordered_workqueue(). * fs_info->fixup_workers and fs_info->qgroup_rescan_workers initialized in btrfs_queue_work() use the btrfs's workqueue wrapper, btrfs_workqueue, which are allocated with btrfs_alloc_workqueue(). btrfs_workqueue implements automatic @max_active adjustment which is disabled when the specified max limit is below a certain threshold, so calling btrfs_alloc_workqueue() with @limit_active==1 yields an ordered workqueue whose @max_active won't be changed as the auto-tuning is disabled. This is rather brittle in that nothing clearly indicates that the two workqueues should be ordered or btrfs_alloc_workqueue() must disable auto-tuning when @limit_active==1. This patch factors out the common btrfs_workqueue init code into btrfs_init_workqueue() and add explicit btrfs_alloc_ordered_workqueue(). The two workqueues are converted to use the new ordered allocation interface. Signed-off-by: Tejun Heo Signed-off-by: David Sterba --- fs/btrfs/async-thread.c | 44 +++++++++++++++++++++++++++++++++++++++----- fs/btrfs/async-thread.h | 3 +++ fs/btrfs/disk-io.c | 8 +++++--- fs/btrfs/scrub.c | 6 ++++-- 4 files changed, 51 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index aac240430efe..ce083e99ef68 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -71,6 +71,16 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq) return atomic_read(&wq->pending) > wq->thresh * 2; } +static void btrfs_init_workqueue(struct btrfs_workqueue *wq, + struct btrfs_fs_info *fs_info) +{ + wq->fs_info = fs_info; + atomic_set(&wq->pending, 0); + INIT_LIST_HEAD(&wq->ordered_list); + spin_lock_init(&wq->list_lock); + spin_lock_init(&wq->thres_lock); +} + struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name, unsigned int flags, int limit_active, int thresh) @@ -80,9 +90,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, if (!ret) return NULL; - ret->fs_info = fs_info; + btrfs_init_workqueue(ret, fs_info); + ret->limit_active = limit_active; - atomic_set(&ret->pending, 0); if (thresh == 0) thresh = DFT_THRESHOLD; /* For low threshold, disabling threshold is a better choice */ @@ -106,9 +116,33 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, return NULL; } - INIT_LIST_HEAD(&ret->ordered_list); - spin_lock_init(&ret->list_lock); - spin_lock_init(&ret->thres_lock); + trace_btrfs_workqueue_alloc(ret, name); + return ret; +} + +struct btrfs_workqueue *btrfs_alloc_ordered_workqueue( + struct btrfs_fs_info *fs_info, const char *name, + unsigned int flags) +{ + struct btrfs_workqueue *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return NULL; + + btrfs_init_workqueue(ret, fs_info); + + /* Ordered workqueues don't allow @max_active adjustments. */ + ret->limit_active = 1; + ret->current_active = 1; + ret->thresh = NO_THRESHOLD; + + ret->normal_wq = alloc_ordered_workqueue("btrfs-%s", flags, name); + if (!ret->normal_wq) { + kfree(ret); + return NULL; + } + trace_btrfs_workqueue_alloc(ret, name); return ret; } diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 6e2596ddae10..30f66c5e2e6e 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h @@ -31,6 +31,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, unsigned int flags, int limit_active, int thresh); +struct btrfs_workqueue *btrfs_alloc_ordered_workqueue( + struct btrfs_fs_info *fs_info, const char *name, + unsigned int flags); void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func, btrfs_func_t ordered_func, btrfs_func_t ordered_free); void btrfs_queue_work(struct btrfs_workqueue *wq, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 06dab9d018c2..1ad7037c6631 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1939,6 +1939,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) { u32 max_active = fs_info->thread_pool_size; unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; + unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE; fs_info->workers = btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16); @@ -1955,7 +1956,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0); fs_info->fixup_workers = - btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0); + btrfs_alloc_ordered_workqueue(fs_info, "fixup", ordered_flags); fs_info->endio_workers = alloc_workqueue("btrfs-endio", flags, max_active); @@ -1974,9 +1975,10 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) btrfs_alloc_workqueue(fs_info, "delayed-meta", flags, max_active, 0); fs_info->qgroup_rescan_workers = - btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0); + btrfs_alloc_ordered_workqueue(fs_info, "qgroup-rescan", + ordered_flags); fs_info->discard_ctl.discard_workers = - alloc_workqueue("btrfs_discard", WQ_UNBOUND | WQ_FREEZABLE, 1); + alloc_ordered_workqueue("btrfs_discard", WQ_FREEZABLE); if (!(fs_info->workers && fs_info->delalloc_workers && fs_info->flush_workers && diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index d7a14458c4a7..316c5a0e3a44 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2740,8 +2740,10 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt)) return 0; - scrub_workers = alloc_workqueue("btrfs-scrub", flags, - is_dev_replace ? 1 : max_active); + if (is_dev_replace) + scrub_workers = alloc_ordered_workqueue("btrfs-scrub", flags); + else + scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active); if (!scrub_workers) goto fail_scrub_workers; -- cgit v1.2.3 From 4d34ad34d7cc5390ec03b25f2a7f2fd5041cb7d8 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 29 May 2023 16:16:59 +0100 Subject: btrfs: remove pointless in_tree field from struct btrfs_delayed_ref_node The 'in_tree' field is really not needed in struct btrfs_delayed_ref_node, as we can check whether a reference is in the tree or not simply by checking its red black tree node member with RB_EMPTY_NODE(), as when we remove it from the tree we always call RB_CLEAR_NODE(). So remove that field and use RB_EMPTY_NODE(). Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-ref.c | 2 -- fs/btrfs/delayed-ref.h | 3 +-- fs/btrfs/disk-io.c | 1 - fs/btrfs/extent-tree.c | 1 - 4 files changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index be1d18ec5cef..d6dce5792c0f 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -407,7 +407,6 @@ static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs, RB_CLEAR_NODE(&ref->ref_node); if (!list_empty(&ref->add_list)) list_del(&ref->add_list); - ref->in_tree = 0; btrfs_put_delayed_ref(ref); atomic_dec(&delayed_refs->num_entries); } @@ -853,7 +852,6 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info, ref->num_bytes = num_bytes; ref->ref_mod = 1; ref->action = action; - ref->in_tree = 1; ref->seq = seq; ref->type = ref_type; RB_CLEAR_NODE(&ref->ref_node); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 77b3e735772d..9b103bf27185 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -48,7 +48,6 @@ struct btrfs_delayed_ref_node { unsigned int action:8; unsigned int type:8; - unsigned int in_tree:1; }; struct btrfs_delayed_extent_op { @@ -341,7 +340,7 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) { WARN_ON(refcount_read(&ref->refs) == 0); if (refcount_dec_and_test(&ref->refs)) { - WARN_ON(ref->in_tree); + WARN_ON(!RB_EMPTY_NODE(&ref->ref_node)); switch (ref->type) { case BTRFS_TREE_BLOCK_REF_KEY: case BTRFS_SHARED_BLOCK_REF_KEY: diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1ad7037c6631..55cbbe7b0dba 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4602,7 +4602,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, while ((n = rb_first_cached(&head->ref_tree)) != NULL) { ref = rb_entry(n, struct btrfs_delayed_ref_node, ref_node); - ref->in_tree = 0; rb_erase_cached(&ref->ref_node, &head->ref_tree); RB_CLEAR_NODE(&ref->ref_node); if (!list_empty(&ref->add_list)) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71aaf28fafc9..81a673506077 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1913,7 +1913,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, return -EAGAIN; } - ref->in_tree = 0; rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree); RB_CLEAR_NODE(&ref->ref_node); if (!list_empty(&ref->add_list)) -- cgit v1.2.3 From 184533e3618f4d0b382c1ef3de0ce34e849005d7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 29 May 2023 16:17:06 +0100 Subject: btrfs: remove unnecessary prototype declarations at disk-io.c We have a few static functions at disk-io.c for which we have a forward declaration of their prototype, but it's not needed because all those functions are defined before they are called, so remove them. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 55cbbe7b0dba..c22f8f58c951 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -60,15 +60,6 @@ BTRFS_SUPER_FLAG_METADUMP |\ BTRFS_SUPER_FLAG_METADUMP_V2) -static void btrfs_destroy_ordered_extents(struct btrfs_root *root); -static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, - struct btrfs_fs_info *fs_info); -static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); -static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, - struct extent_io_tree *dirty_pages, - int mark); -static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, - struct extent_io_tree *pinned_extents); static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info); static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info); -- cgit v1.2.3 From 99f09ce309b8307ce8dca209f936e99a7c332214 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 2 Jun 2023 12:19:42 +0100 Subject: btrfs: make btrfs_destroy_delayed_refs() return void btrfs_destroy_delayed_refs() always returns 0 and its single caller does not check its return value, as it also returns void, and so does the callers' caller and so on. This is because we are in the transaction abort path, where we have no way to deal with errors (we are in a critical situation) and all cleanup of resources works in a best effort fashion. So make btrfs_destroy_delayed_refs() return void. Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c22f8f58c951..4328c6b1120e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4562,13 +4562,12 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); } -static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, - struct btrfs_fs_info *fs_info) +static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, + struct btrfs_fs_info *fs_info) { struct rb_node *node; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_node *ref; - int ret = 0; delayed_refs = &trans->delayed_refs; @@ -4576,7 +4575,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, if (atomic_read(&delayed_refs->num_entries) == 0) { spin_unlock(&delayed_refs->lock); btrfs_debug(fs_info, "delayed_refs has NO entry"); - return ret; + return; } while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) { @@ -4637,8 +4636,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, btrfs_qgroup_destroy_extent_records(trans); spin_unlock(&delayed_refs->lock); - - return ret; } static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) -- cgit v1.2.3 From efcfcbc6a36195c42d98e0ee697baba36da94dc8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 4 Apr 2023 00:06:02 +0200 Subject: btrfs: add xxhash to fast checksum implementations The implementation of XXHASH is now CPU only but still fast enough to be considered for the synchronous checksumming, like non-generic crc32c. A userspace benchmark comparing it to various implementations (patched hash-speedtest from btrfs-progs): Block size: 4096 Iterations: 1000000 Implementation: builtin Units: CPU cycles NULL-NOP: cycles: 73384294, cycles/i 73 NULL-MEMCPY: cycles: 228033868, cycles/i 228, 61664.320 MiB/s CRC32C-ref: cycles: 24758559416, cycles/i 24758, 567.950 MiB/s CRC32C-NI: cycles: 1194350470, cycles/i 1194, 11773.433 MiB/s CRC32C-ADLERSW: cycles: 6150186216, cycles/i 6150, 2286.372 MiB/s CRC32C-ADLERHW: cycles: 626979180, cycles/i 626, 22427.453 MiB/s CRC32C-PCL: cycles: 466746732, cycles/i 466, 30126.699 MiB/s XXHASH: cycles: 860656400, cycles/i 860, 16338.188 MiB/s Comparing purely software implementation (ref), current outdated accelerated using crc32q instruction (NI), optimized implementations by M. Adler (https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software/17646775#17646775) and the best one that was taken from kernel using the PCLMULQDQ instruction (PCL). Reviewed-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4328c6b1120e..7513388b0567 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2011,6 +2011,9 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) if (!strstr(crypto_shash_driver_name(csum_shash), "generic")) set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); break; + case BTRFS_CSUM_TYPE_XXHASH: + set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); + break; default: break; } -- cgit v1.2.3