diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/block-group.c | 18 | ||||
-rw-r--r-- | fs/btrfs/discard.c | 51 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 43 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 30 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 7 | ||||
-rw-r--r-- | fs/btrfs/file.c | 9 | ||||
-rw-r--r-- | fs/btrfs/free-space-tree.c | 16 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 122 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 6 | ||||
-rw-r--r-- | fs/btrfs/send.c | 6 | ||||
-rw-r--r-- | fs/btrfs/super.c | 3 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 8 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 6 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 6 |
14 files changed, 238 insertions, 93 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 0dcf7fecaf55..91440ef79a26 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1678,6 +1678,17 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) up_write(&space_info->groups_sem); goto next; } + + /* + * Cache the zone_unusable value before turning the block group + * to read only. As soon as the block group is read only it's + * zone_unusable value gets moved to the block group's read-only + * bytes and isn't available for calculations anymore. We also + * cache it before unlocking the block group, to prevent races + * (reports from KCSAN and such tools) with tasks updating it. + */ + zone_unusable = bg->zone_unusable; + spin_unlock(&bg->lock); /* @@ -1693,13 +1704,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) goto next; } - /* - * Cache the zone_unusable value before turning the block group - * to read only. As soon as the blog group is read only it's - * zone_unusable value gets moved to the block group's read-only - * bytes and isn't available for calculations anymore. - */ - zone_unusable = bg->zone_unusable; ret = inc_block_group_ro(bg, 0); up_write(&space_info->groups_sem); if (ret < 0) diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index bd9dde374e5d..a90f3cb83c70 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -78,8 +78,6 @@ static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, struct btrfs_block_group *block_group) { lockdep_assert_held(&discard_ctl->lock); - if (!btrfs_run_discard_work(discard_ctl)) - return; if (list_empty(&block_group->discard_list) || block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { @@ -102,6 +100,9 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, if (!btrfs_is_block_group_data_only(block_group)) return; + if (!btrfs_run_discard_work(discard_ctl)) + return; + spin_lock(&discard_ctl->lock); __add_to_discard_list(discard_ctl, block_group); spin_unlock(&discard_ctl->lock); @@ -151,13 +152,7 @@ static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl, block_group->discard_eligible_time = 0; queued = !list_empty(&block_group->discard_list); list_del_init(&block_group->discard_list); - /* - * If the block group is currently running in the discard workfn, we - * don't want to deref it, since it's still being used by the workfn. - * The workfn will notice this case and deref the block group when it is - * finished. - */ - if (queued && !running) + if (queued) btrfs_put_block_group(block_group); spin_unlock(&discard_ctl->lock); @@ -233,6 +228,18 @@ again: block_group->used != 0) { if (btrfs_is_block_group_data_only(block_group)) { __add_to_discard_list(discard_ctl, block_group); + /* + * The block group must have been moved to other + * discard list even if discard was disabled in + * the meantime or a transaction abort happened, + * otherwise we can end up in an infinite loop, + * always jumping into the 'again' label and + * keep getting this block group over and over + * in case there are no other block groups in + * the discard lists. + */ + ASSERT(block_group->discard_index != + BTRFS_DISCARD_INDEX_UNUSED); } else { list_del_init(&block_group->discard_list); btrfs_put_block_group(block_group); @@ -243,9 +250,10 @@ again: block_group->discard_cursor = block_group->start; block_group->discard_state = BTRFS_DISCARD_EXTENTS; } - discard_ctl->block_group = block_group; } if (block_group) { + btrfs_get_block_group(block_group); + discard_ctl->block_group = block_group; *discard_state = block_group->discard_state; *discard_index = block_group->discard_index; } @@ -469,9 +477,20 @@ static void btrfs_discard_workfn(struct work_struct *work) block_group = peek_discard_list(discard_ctl, &discard_state, &discard_index, now); - if (!block_group || !btrfs_run_discard_work(discard_ctl)) + if (!block_group) + return; + if (!btrfs_run_discard_work(discard_ctl)) { + spin_lock(&discard_ctl->lock); + btrfs_put_block_group(block_group); + discard_ctl->block_group = NULL; + spin_unlock(&discard_ctl->lock); return; + } if (now < block_group->discard_eligible_time) { + spin_lock(&discard_ctl->lock); + btrfs_put_block_group(block_group); + discard_ctl->block_group = NULL; + spin_unlock(&discard_ctl->lock); btrfs_discard_schedule_work(discard_ctl, false); return; } @@ -523,15 +542,7 @@ static void btrfs_discard_workfn(struct work_struct *work) spin_lock(&discard_ctl->lock); discard_ctl->prev_discard = trimmed; discard_ctl->prev_discard_time = now; - /* - * If the block group was removed from the discard list while it was - * running in this workfn, then we didn't deref it, since this function - * still owned that reference. But we set the discard_ctl->block_group - * back to NULL, so we can use that condition to know that now we need - * to deref the block_group. - */ - if (discard_ctl->block_group == NULL) - btrfs_put_block_group(block_group); + btrfs_put_block_group(block_group); discard_ctl->block_group = NULL; __btrfs_discard_schedule_work(discard_ctl, now, false); spin_unlock(&discard_ctl->lock); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 30fe5ebc3650..76a261cbf39d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2481,8 +2481,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root, found = true; root = read_tree_root_path(tree_root, path, &key); if (IS_ERR(root)) { - if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) - ret = PTR_ERR(root); + ret = PTR_ERR(root); break; } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); @@ -4642,6 +4641,14 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_cleanup_defrag_inodes(fs_info); /* + * Handle the error fs first, as it will flush and wait for all ordered + * extents. This will generate delayed iputs, thus we want to handle + * it first. + */ + if (unlikely(BTRFS_FS_ERROR(fs_info))) + btrfs_error_commit_super(fs_info); + + /* * Wait for any fixup workers to complete. * If we don't wait for them here and they are still running by the time * we call kthread_stop() against the cleaner kthread further below, we @@ -4662,6 +4669,31 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_flush_workqueue(fs_info->delalloc_workers); /* + * We can have ordered extents getting their last reference dropped from + * the fs_info->workers queue because for async writes for data bios we + * queue a work for that queue, at btrfs_wq_submit_bio(), that runs + * run_one_async_done() which calls btrfs_bio_end_io() in case the bio + * has an error, and that later function can do the final + * btrfs_put_ordered_extent() on the ordered extent attached to the bio, + * which adds a delayed iput for the inode. So we must flush the queue + * so that we don't have delayed iputs after committing the current + * transaction below and stopping the cleaner and transaction kthreads. + */ + btrfs_flush_workqueue(fs_info->workers); + + /* + * When finishing a compressed write bio we schedule a work queue item + * to finish an ordered extent - btrfs_finish_compressed_write_work() + * calls btrfs_finish_ordered_extent() which in turns does a call to + * btrfs_queue_ordered_fn(), and that queues the ordered extent + * completion either in the endio_write_workers work queue or in the + * fs_info->endio_freespace_worker work queue. We flush those queues + * below, so before we flush them we must flush this queue for the + * workers of compressed writes. + */ + flush_workqueue(fs_info->compressed_write_workers); + + /* * After we parked the cleaner kthread, ordered extents may have * completed and created new delayed iputs. If one of the async reclaim * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we @@ -4718,9 +4750,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_err(fs_info, "commit super ret %d", ret); } - if (BTRFS_FS_ERROR(fs_info)) - btrfs_error_commit_super(fs_info); - kthread_stop(fs_info->transaction_kthread); kthread_stop(fs_info->cleaner_kthread); @@ -4876,10 +4905,6 @@ static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info) /* cleanup FS via transaction */ btrfs_cleanup_transaction(fs_info); - mutex_lock(&fs_info->cleaner_mutex); - btrfs_run_delayed_iputs(fs_info); - mutex_unlock(&fs_info->cleaner_mutex); - down_write(&fs_info->cleanup_work_sem); up_write(&fs_info->cleanup_work_sem); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 50bc553cc73a..5395e27f9e89 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -179,6 +179,14 @@ search_again: ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); num_refs = btrfs_extent_refs(leaf, ei); + if (unlikely(num_refs == 0)) { + ret = -EUCLEAN; + btrfs_err(fs_info, + "unexpected zero reference count for extent item (%llu %u %llu)", + key.objectid, key.type, key.offset); + btrfs_abort_transaction(trans, ret); + goto out_free; + } extent_flags = btrfs_extent_flags(leaf, ei); } else { ret = -EINVAL; @@ -190,8 +198,6 @@ search_again: goto out_free; } - - BUG_ON(num_refs == 0); } else { num_refs = 0; extent_flags = 0; @@ -221,10 +227,19 @@ search_again: goto search_again; } spin_lock(&head->lock); - if (head->extent_op && head->extent_op->update_flags) + if (head->extent_op && head->extent_op->update_flags) { extent_flags |= head->extent_op->flags_to_set; - else - BUG_ON(num_refs == 0); + } else if (unlikely(num_refs == 0)) { + spin_unlock(&head->lock); + mutex_unlock(&head->mutex); + spin_unlock(&delayed_refs->lock); + ret = -EUCLEAN; + btrfs_err(fs_info, + "unexpected zero reference count for extent %llu (%s)", + bytenr, metadata ? "metadata" : "data"); + btrfs_abort_transaction(trans, ret); + goto out_free; + } num_refs += head->ref_mod; spin_unlock(&head->lock); @@ -5575,7 +5590,10 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ret = btrfs_dec_ref(trans, root, eb, 1); else ret = btrfs_dec_ref(trans, root, eb, 0); - BUG_ON(ret); /* -ENOMEM */ + if (ret) { + btrfs_abort_transaction(trans, ret); + return ret; + } if (is_fstree(root->root_key.objectid)) { ret = btrfs_qgroup_trace_leaf_items(trans, eb); if (ret) { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 72227c0b4b5a..d5552875f872 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4459,10 +4459,10 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, return eb; } -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, u64 start) { +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct extent_buffer *eb, *exists = NULL; int ret; @@ -4498,8 +4498,11 @@ again: free_eb: btrfs_release_extent_buffer(eb); return exists; -} +#else + /* Stub to avoid linker error when compiled with optimizations turned off. */ + return NULL; #endif +} static struct extent_buffer *grab_extent_buffer( struct btrfs_fs_info *fs_info, struct page *page) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9e06d1a0d373..3814f09dc4ae 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2224,15 +2224,20 @@ static void btrfs_punch_hole_lock_range(struct inode *inode, * will always return true. * So here we need to do extra page alignment for * filemap_range_has_page(). + * + * And do not decrease page_lockend right now, as it can be 0. */ const u64 page_lockstart = round_up(lockstart, PAGE_SIZE); - const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1; + const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE); while (1) { truncate_pagecache_range(inode, lockstart, lockend); lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, cached_state); + /* The same page or adjacent pages. */ + if (page_lockend <= page_lockstart) + break; /* * We can't have ordered extents in the range, nor dirty/writeback * pages, because we have locked the inode's VFS lock in exclusive @@ -2244,7 +2249,7 @@ static void btrfs_punch_hole_lock_range(struct inode *inode, * we do, unlock the range and retry. */ if (!filemap_range_has_page(inode->i_mapping, page_lockstart, - page_lockend)) + page_lockend - 1)) break; unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 6a44733a95e1..14bdb241ff6b 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1098,11 +1098,21 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); if (ret < 0) goto out_locked; - ASSERT(ret == 0); + /* + * If ret is 1 (no key found), it means this is an empty block group, + * without any extents allocated from it and there's no block group + * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree + * because we are using the block group tree feature, so block group + * items are stored in the block group tree. It also means there are no + * extents allocated for block groups with a start offset beyond this + * block group's end offset (this is the last, highest, block group). + */ + if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE)) + ASSERT(ret == 0); start = block_group->start; end = block_group->start + block_group->length; - while (1) { + while (ret == 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.type == BTRFS_EXTENT_ITEM_KEY || @@ -1132,8 +1142,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_next_item(extent_root, path); if (ret < 0) goto out_locked; - if (ret) - break; } if (start < end) { ret = __add_to_free_space_tree(trans, block_group, path2, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a13ab3abef12..469a622b440b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1428,6 +1428,7 @@ out_unlock: locked_page, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); start += cur_alloc_size; } @@ -1441,6 +1442,7 @@ out_unlock: clear_bits |= EXTENT_CLEAR_DATA_RESV; extent_clear_unlock_delalloc(inode, start, end, locked_page, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, end - start + 1, NULL); } return ret; } @@ -2168,13 +2170,15 @@ error: if (nocow) btrfs_dec_nocow_writers(bg); - if (ret && cur_offset < end) + if (ret && cur_offset < end) { extent_clear_unlock_delalloc(inode, cur_offset, end, locked_page, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); + btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL); + } btrfs_free_path(path); return ret; } @@ -4852,7 +4856,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; int err = 0; struct btrfs_trans_handle *trans; - u64 last_unlink_trans; struct fscrypt_name fname; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) @@ -4878,6 +4881,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out_notrans; } + /* + * Propagate the last_unlink_trans value of the deleted dir to its + * parent directory. This is to prevent an unrecoverable log tree in the + * case we do something like this: + * 1) create dir foo + * 2) create snapshot under dir foo + * 3) delete the snapshot + * 4) rmdir foo + * 5) mkdir foo + * 6) fsync foo or some file inside foo + * + * This is because we can't unlink other roots when replaying the dir + * deletes for directory foo. + */ + if (BTRFS_I(inode)->last_unlink_trans >= trans->transid) + BTRFS_I(dir)->last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; + if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { err = btrfs_unlink_subvol(trans, dir, dentry); goto out; @@ -4887,26 +4907,13 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (err) goto out; - last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; - /* now the directory is empty */ err = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), &fname.disk_name); if (!err) { btrfs_i_size_write(BTRFS_I(inode), 0); - /* - * Propagate the last_unlink_trans value of the deleted dir to - * its parent directory. This is to prevent an unrecoverable - * log tree in the case we do something like this: - * 1) create dir foo - * 2) create snapshot under dir foo - * 3) delete the snapshot - * 4) rmdir foo - * 5) mkdir foo - * 6) fsync foo or some file inside foo - */ - if (last_unlink_trans >= trans->transid) - BTRFS_I(dir)->last_unlink_trans = last_unlink_trans; + if (BTRFS_I(inode)->last_unlink_trans >= trans->transid) + btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); } out: btrfs_end_transaction(trans); @@ -9177,6 +9184,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, int ret; int ret2; bool need_abort = false; + bool logs_pinned = false; struct fscrypt_name old_fname, new_fname; struct fscrypt_str *old_name, *new_name; @@ -9305,6 +9313,31 @@ static int btrfs_rename_exchange(struct inode *old_dir, old_inode->i_ctime = ctime; new_inode->i_ctime = ctime; + if (old_ino != BTRFS_FIRST_FREE_OBJECTID && + new_ino != BTRFS_FIRST_FREE_OBJECTID) { + /* + * If we are renaming in the same directory (and it's not for + * root entries) pin the log early to prevent any concurrent + * task from logging the directory after we removed the old + * entries and before we add the new entries, otherwise that + * task can sync a log without any entry for the inodes we are + * renaming and therefore replaying that log, if a power failure + * happens after syncing the log, would result in deleting the + * inodes. + * + * If the rename affects two different directories, we want to + * make sure the that there's no log commit that contains + * updates for only one of the directories but not for the + * other. + * + * If we are renaming an entry for a root, we don't care about + * log updates since we called btrfs_set_log_full_commit(). + */ + btrfs_pin_log_trans(root); + btrfs_pin_log_trans(dest); + logs_pinned = true; + } + if (old_dentry->d_parent != new_dentry->d_parent) { btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), 1); @@ -9362,30 +9395,23 @@ static int btrfs_rename_exchange(struct inode *old_dir, BTRFS_I(new_inode)->dir_index = new_idx; /* - * Now pin the logs of the roots. We do it to ensure that no other task - * can sync the logs while we are in progress with the rename, because - * that could result in an inconsistency in case any of the inodes that - * are part of this rename operation were logged before. + * Do the log updates for all inodes. + * + * If either entry is for a root we don't need to update the logs since + * we've called btrfs_set_log_full_commit() before. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) - btrfs_pin_log_trans(root); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) - btrfs_pin_log_trans(dest); - - /* Do the log updates for all inodes. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) + if (logs_pinned) { btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), old_rename_ctx.index, new_dentry->d_parent); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir), new_rename_ctx.index, old_dentry->d_parent); + } - /* Now unpin the logs. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) +out_fail: + if (logs_pinned) { btrfs_end_log_trans(root); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) btrfs_end_log_trans(dest); -out_fail: + } ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: @@ -9435,6 +9461,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, int ret2; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); struct fscrypt_name old_fname, new_fname; + bool logs_pinned = false; if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; @@ -9573,6 +9600,29 @@ static int btrfs_rename(struct user_namespace *mnt_userns, new_dir->i_ctime = old_dir->i_mtime; old_inode->i_ctime = old_dir->i_mtime; + if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { + /* + * If we are renaming in the same directory (and it's not a + * root entry) pin the log to prevent any concurrent task from + * logging the directory after we removed the old entry and + * before we add the new entry, otherwise that task can sync + * a log without any entry for the inode we are renaming and + * therefore replaying that log, if a power failure happens + * after syncing the log, would result in deleting the inode. + * + * If the rename affects two different directories, we want to + * make sure the that there's no log commit that contains + * updates for only one of the directories but not for the + * other. + * + * If we are renaming an entry for a root, we don't care about + * log updates since we called btrfs_set_log_full_commit(). + */ + btrfs_pin_log_trans(root); + btrfs_pin_log_trans(dest); + logs_pinned = true; + } + if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), 1); @@ -9622,7 +9672,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns, if (old_inode->i_nlink == 1) BTRFS_I(old_inode)->dir_index = index; - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) + if (logs_pinned) btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), rename_ctx.index, new_dentry->d_parent); @@ -9638,6 +9688,10 @@ static int btrfs_rename(struct user_namespace *mnt_userns, } } out_fail: + if (logs_pinned) { + btrfs_end_log_trans(root); + btrfs_end_log_trans(dest); + } ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d6cda0b2e925..fd6ea3fcab33 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2977,6 +2977,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra, int ret; ASSERT(page_index <= last_index); +again: page = find_lock_page(inode->i_mapping, page_index); if (!page) { page_cache_sync_readahead(inode->i_mapping, ra, NULL, @@ -2998,6 +2999,11 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra, ret = -EIO; goto release_page; } + if (page->mapping != inode->i_mapping) { + unlock_page(page); + put_page(page); + goto again; + } } /* diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index a2b95ccb4cf5..0735decec99b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -431,10 +431,8 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) if (p->buf_len >= len) return 0; - if (len > PATH_MAX) { - WARN_ON(1); - return -ENOMEM; - } + if (WARN_ON(len > PATH_MAX)) + return -ENAMETOOLONG; path_len = p->end - p->start; old_buf_len = p->buf_len; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index c0ff0c2fc01d..91b19d66449b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1640,8 +1640,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) subvol_name = btrfs_get_subvol_name_from_objectid(info, BTRFS_I(d_inode(dentry))->root->root_key.objectid); if (!IS_ERR(subvol_name)) { - seq_puts(seq, ",subvol="); - seq_escape(seq, subvol_name, " \t\n\\"); + seq_show_option(seq, "subvol", subvol_name); kfree(subvol_name); } return 0; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fdc432b3352a..982dc92bdf1d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1085,7 +1085,9 @@ again: search_key.type = BTRFS_INODE_REF_KEY; search_key.offset = parent_objectid; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); - if (ret == 0) { + if (ret < 0) { + return ret; + } else if (ret == 0) { struct btrfs_inode_ref *victim_ref; unsigned long ptr; unsigned long ptr_end; @@ -1158,13 +1160,13 @@ again: struct fscrypt_str victim_name; extref = (struct btrfs_inode_extref *)(base + cur_offset); + victim_name.len = btrfs_inode_extref_name_len(leaf, extref); if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) goto next; ret = read_alloc_one_name(leaf, &extref->name, - btrfs_inode_extref_name_len(leaf, extref), - &victim_name); + victim_name.len, &victim_name); if (ret) return ret; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a4177014eb8b..628238493167 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3203,6 +3203,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) device->bytes_used - dev_extent_len); atomic64_add(dev_extent_len, &fs_info->free_chunk_space); btrfs_clear_space_info_full(fs_info); + + if (list_empty(&device->post_commit_list)) { + list_add_tail(&device->post_commit_list, + &trans->transaction->dev_update_list); + } + mutex_unlock(&fs_info->chunk_mutex); } } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 794526ab90d2..1dff64e62047 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1909,6 +1909,9 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) device = map->stripes[i].dev; physical = map->stripes[i].physical; + if (!device->bdev) + continue; + if (device->zone_info->max_active_zones == 0) continue; @@ -2052,6 +2055,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ struct btrfs_device *device = map->stripes[i].dev; const u64 physical = map->stripes[i].physical; + if (!device->bdev) + continue; + if (device->zone_info->max_active_zones == 0) continue; |