diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-10-04 20:05:13 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-10-04 20:05:13 +0300 |
commit | 79eb2c07afbe4d165734ea61a258dd8410ec6624 (patch) | |
tree | 40144fe701942830adcece5474846015afe87361 /fs | |
parent | b7a838ee7e8904c14e5d6ca2d0029bbad70fb761 (diff) | |
parent | d6e7ac65d4c106149d08a0ffba39fc516ae3d21b (diff) | |
download | linux-79eb2c07afbe4d165734ea61a258dd8410ec6624.tar.xz |
Merge tag 'for-6.12-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba:
- in incremental send, fix invalid clone operation for file that got
its size decreased
- fix __counted_by() annotation of send path cache entries, we do not
store the terminating NUL
- fix a longstanding bug in relocation (and quite hard to hit by
chance), drop back reference cache that can get out of sync after
transaction commit
- wait for fixup worker kthread before finishing umount
- add missing raid-stripe-tree extent for NOCOW files, zoned mode
cannot have NOCOW files but RST is meant to be a standalone feature
- handle transaction start error during relocation, avoid potential
NULL pointer dereference of relocation control structure (reported by
syzbot)
- disable module-wide rate limiting of debug level messages
- minor fix to tracepoint definition (reported by checkpatch.pl)
* tag 'for-6.12-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
btrfs: disable rate limiting when debug enabled
btrfs: wait for fixup workers before stopping cleaner kthread during umount
btrfs: fix a NULL pointer dereference when failed to start a new trasacntion
btrfs: send: fix invalid clone operation for file that got its size decreased
btrfs: tracepoints: end assignment with semicolon at btrfs_qgroup_extent event class
btrfs: drop the backref cache during relocation if we commit
btrfs: also add stripe entries for NOCOW writes
btrfs: send: fix buffer overflow detection when copying path to cache entry
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/backref.c | 12 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 11 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 5 | ||||
-rw-r--r-- | fs/btrfs/messages.c | 3 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 77 | ||||
-rw-r--r-- | fs/btrfs/send.c | 31 |
6 files changed, 57 insertions, 82 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index e2f478ecd7fd..f8e1d5b2c512 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -3179,10 +3179,14 @@ void btrfs_backref_release_cache(struct btrfs_backref_cache *cache) btrfs_backref_cleanup_node(cache, node); } - cache->last_trans = 0; - - for (i = 0; i < BTRFS_MAX_LEVEL; i++) - ASSERT(list_empty(&cache->pending[i])); + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + while (!list_empty(&cache->pending[i])) { + node = list_first_entry(&cache->pending[i], + struct btrfs_backref_node, + list); + btrfs_backref_cleanup_node(cache, node); + } + } ASSERT(list_empty(&cache->pending_edge)); ASSERT(list_empty(&cache->useless_node)); ASSERT(list_empty(&cache->changed)); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 831fb901683c..4ad5db619b00 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4256,6 +4256,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_cleanup_defrag_inodes(fs_info); /* + * Wait for any fixup workers to complete. + * If we don't wait for them here and they are still running by the time + * we call kthread_stop() against the cleaner kthread further below, we + * get an use-after-free on the cleaner because the fixup worker adds an + * inode to the list of delayed iputs and then attempts to wakeup the + * cleaner kthread, which was already stopped and destroyed. We parked + * already the cleaner, but below we run all pending delayed iputs. + */ + btrfs_flush_workqueue(fs_info->fixup_workers); + + /* * After we parked the cleaner kthread, ordered extents may have * completed and created new delayed iputs. If one of the async reclaim * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9122afcb712c..5618ca02934a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3111,6 +3111,11 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent) ret = btrfs_update_inode_fallback(trans, inode); if (ret) /* -ENOMEM or corruption */ btrfs_abort_transaction(trans, ret); + + ret = btrfs_insert_raid_extent(trans, ordered_extent); + if (ret) + btrfs_abort_transaction(trans, ret); + goto out; } diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c index 77752eec125d..363fd28c0268 100644 --- a/fs/btrfs/messages.c +++ b/fs/btrfs/messages.c @@ -239,7 +239,8 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, vaf.fmt = fmt; vaf.va = &args; - if (__ratelimit(ratelimit)) { + /* Do not ratelimit if CONFIG_BTRFS_DEBUG is enabled. */ + if (IS_ENABLED(CONFIG_BTRFS_DEBUG) || __ratelimit(ratelimit)) { if (fs_info) { char statestr[STATE_STRING_BUF_LEN]; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ea4ed85919ec..f3834f8d26b4 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -232,70 +232,6 @@ static struct btrfs_backref_node *walk_down_backref( return NULL; } -static void update_backref_node(struct btrfs_backref_cache *cache, - struct btrfs_backref_node *node, u64 bytenr) -{ - struct rb_node *rb_node; - rb_erase(&node->rb_node, &cache->rb_root); - node->bytenr = bytenr; - rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node); - if (rb_node) - btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST); -} - -/* - * update backref cache after a transaction commit - */ -static int update_backref_cache(struct btrfs_trans_handle *trans, - struct btrfs_backref_cache *cache) -{ - struct btrfs_backref_node *node; - int level = 0; - - if (cache->last_trans == 0) { - cache->last_trans = trans->transid; - return 0; - } - - if (cache->last_trans == trans->transid) - return 0; - - /* - * detached nodes are used to avoid unnecessary backref - * lookup. transaction commit changes the extent tree. - * so the detached nodes are no longer useful. - */ - while (!list_empty(&cache->detached)) { - node = list_entry(cache->detached.next, - struct btrfs_backref_node, list); - btrfs_backref_cleanup_node(cache, node); - } - - while (!list_empty(&cache->changed)) { - node = list_entry(cache->changed.next, - struct btrfs_backref_node, list); - list_del_init(&node->list); - BUG_ON(node->pending); - update_backref_node(cache, node, node->new_bytenr); - } - - /* - * some nodes can be left in the pending list if there were - * errors during processing the pending nodes. - */ - for (level = 0; level < BTRFS_MAX_LEVEL; level++) { - list_for_each_entry(node, &cache->pending[level], list) { - BUG_ON(!node->pending); - if (node->bytenr == node->new_bytenr) - continue; - update_backref_node(cache, node, node->new_bytenr); - } - } - - cache->last_trans = 0; - return 1; -} - static bool reloc_root_is_dead(const struct btrfs_root *root) { /* @@ -551,9 +487,6 @@ static int clone_backref_node(struct btrfs_trans_handle *trans, struct btrfs_backref_edge *new_edge; struct rb_node *rb_node; - if (cache->last_trans > 0) - update_backref_cache(trans, cache); - rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start); if (rb_node) { node = rb_entry(rb_node, struct btrfs_backref_node, rb_node); @@ -923,7 +856,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, btrfs_grab_root(reloc_root); /* root->reloc_root will stay until current relocation finished */ - if (fs_info->reloc_ctl->merge_reloc_tree && + if (fs_info->reloc_ctl && fs_info->reloc_ctl->merge_reloc_tree && btrfs_root_refs(root_item) == 0) { set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state); /* @@ -3698,11 +3631,9 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) break; } restart: - if (update_backref_cache(trans, &rc->backref_cache)) { - btrfs_end_transaction(trans); - trans = NULL; - continue; - } + if (rc->backref_cache.last_trans != trans->transid) + btrfs_backref_release_cache(&rc->backref_cache); + rc->backref_cache.last_trans = trans->transid; ret = find_next_extent(rc, path, &key); if (ret < 0) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 7f48ba6c1c77..27306d98ec43 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -346,8 +346,10 @@ struct name_cache_entry { u64 parent_gen; int ret; int need_later_update; + /* Name length without NUL terminator. */ int name_len; - char name[] __counted_by(name_len); + /* Not NUL terminated. */ + char name[] __counted_by(name_len) __nonstring; }; /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ @@ -2388,7 +2390,7 @@ out_cache: /* * Store the result of the lookup in the name cache. */ - nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL); + nce = kmalloc(sizeof(*nce) + fs_path_len(dest), GFP_KERNEL); if (!nce) { ret = -ENOMEM; goto out; @@ -2400,7 +2402,7 @@ out_cache: nce->parent_gen = *parent_gen; nce->name_len = fs_path_len(dest); nce->ret = ret; - strcpy(nce->name, dest->start); + memcpy(nce->name, dest->start, nce->name_len); if (ino < sctx->send_progress) nce->need_later_update = 0; @@ -6187,8 +6189,29 @@ static int send_write_or_clone(struct send_ctx *sctx, if (ret < 0) return ret; - if (clone_root->offset + num_bytes == info.size) + if (clone_root->offset + num_bytes == info.size) { + /* + * The final size of our file matches the end offset, but it may + * be that its current size is larger, so we have to truncate it + * to any value between the start offset of the range and the + * final i_size, otherwise the clone operation is invalid + * because it's unaligned and it ends before the current EOF. + * We do this truncate to the final i_size when we finish + * processing the inode, but it's too late by then. And here we + * truncate to the start offset of the range because it's always + * sector size aligned while if it were the final i_size it + * would result in dirtying part of a page, filling part of a + * page with zeroes and then having the clone operation at the + * receiver trigger IO and wait for it due to the dirty page. + */ + if (sctx->parent_root != NULL) { + ret = send_truncate(sctx, sctx->cur_ino, + sctx->cur_inode_gen, offset); + if (ret < 0) + return ret; + } goto clone_data; + } write_data: ret = send_extent_data(sctx, path, offset, num_bytes); |