summaryrefslogtreecommitdiff
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-17 22:38:04 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-17 22:38:04 +0300
commita1b547f0f217cfb06af7eb4ce8488b02d83a0370 (patch)
tree2935586a4371bfb9c5abec688dc279f99504cd08 /fs/btrfs/tree-log.c
parent6706415bf9f3dcb425f4b60a08a3a6f1d94ec0e0 (diff)
parent8e7860543a94784d744c7ce34b78a2e11beefa5c (diff)
downloadlinux-a1b547f0f217cfb06af7eb4ce8488b02d83a0370.tar.xz
Merge tag 'for-6.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "The highlights are new logic behind background block group reclaim, automatic removal of qgroup after removing a subvolume and new 'rescue=' mount options. The rest is optimizations, cleanups and refactoring. User visible features: - dynamic block group reclaim: - tunable framework to avoid situations where eager data allocations prevent creating new metadata chunks due to lack of unallocated space - reuse sysfs knob bg_reclaim_threshold (otherwise used only in zoned mode) for a fixed value threshold - new on/off sysfs knob "dynamic_reclaim" calculating the value based on heuristics, aiming to keep spare working space for relocating chunks but not to needlessly relocate partially utilized block groups or reclaim newly allocated ones - stats are exported in sysfs per block group type, files "reclaim_*" - this may increase IO load at unexpected times but the corner case of no allocatable block groups is known to be worse - automatically remove qgroup of deleted subvolumes: - adjust qgroup removal conditions, make sure all related subvolume data are already removed, or return EBUSY, also take into account setting of sysfs drop_subtree_threshold - also works in squota mode - mount option updates: new modes of 'rescue=' that allow to mount images (read-only) that could have been partially converted by user space tools - ignoremetacsums - invalid metadata checksums are ignored - ignoresuperflags - super block flags that track conversion in progress (like UUID or checksums) Core: - size of struct btrfs_inode is now below 1024 (on a release config), improved memory packing and other secondary effects - switch tracking of open inodes from rb-tree to xarray, minor performance improvement - reduce number of empty transaction commits when there are no dirty data/metadata - memory allocation optimizations (reduced numbers, reordering out of critical sections) - extent map structure optimizations and refactoring, more sanity checks - more subpage in zoned mode preparations or fixes - general snapshot code cleanups, improvements and documentation - tree-checker updates: more file extent ram_bytes fixes, continued - raid-stripe-tree update (not backward compatible): - remove extent encoding field from the structure, can be inferred from other information - requires btrfs-progs 6.9.1 or newer - cleanups and refactoring - error message updates - error handling improvements - return type and parameter cleanups and improvements" * tag 'for-6.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (152 commits) btrfs: fix extent map use-after-free when adding pages to compressed bio btrfs: fix bitmap leak when loading free space cache on duplicate entry btrfs: remove the BUG_ON() inside extent_range_clear_dirty_for_io() btrfs: move extent_range_clear_dirty_for_io() into inode.c btrfs: enhance compression error messages btrfs: fix data race when accessing the last_trans field of a root btrfs: rename the extra_gfp parameter of btrfs_alloc_page_array() btrfs: remove the extra_gfp parameter from btrfs_alloc_folio_array() btrfs: introduce new "rescue=ignoresuperflags" mount option btrfs: introduce new "rescue=ignoremetacsums" mount option btrfs: output the unrecognized super block flags as hex btrfs: remove unused Opt enums btrfs: tree-checker: add extra ram_bytes and disk_num_bytes check btrfs: fix the ram_bytes assignment for truncated ordered extents btrfs: make validate_extent_map() catch ram_bytes mismatch btrfs: ignore incorrect btrfs_file_extent_item::ram_bytes btrfs: cleanup the bytenr usage inside btrfs_extent_item_to_extent_map() btrfs: fix typo in error message in btrfs_validate_super() btrfs: move the direct IO code into its own file btrfs: pass a btrfs_inode to btrfs_set_prop() ...
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c74
1 files changed, 53 insertions, 21 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 0bce1d45e252..f0cf8ce26f01 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -151,7 +151,7 @@ static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root)
* attempt a transaction commit, resulting in a deadlock.
*/
nofs_flag = memalloc_nofs_save();
- inode = btrfs_iget(root->fs_info->sb, objectid, root);
+ inode = btrfs_iget(objectid, root);
memalloc_nofs_restore(nofs_flag);
return inode;
@@ -1644,7 +1644,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (ret)
goto out;
}
- BTRFS_I(inode)->index_cnt = (u64)-1;
+ if (S_ISDIR(inode->i_mode))
+ BTRFS_I(inode)->index_cnt = (u64)-1;
if (inode->i_nlink == 0) {
if (S_ISDIR(inode->i_mode)) {
@@ -2839,7 +2840,7 @@ static void wait_for_writer(struct btrfs_root *root)
finish_wait(&root->log_writer_wait, &wait);
}
-void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode)
+void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct btrfs_inode *inode)
{
ctx->log_ret = 0;
ctx->log_transid = 0;
@@ -2858,7 +2859,7 @@ void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode)
void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx)
{
- struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+ struct btrfs_inode *inode = ctx->inode;
if (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
!test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags))
@@ -2876,7 +2877,7 @@ void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_extent *tmp;
- ASSERT(inode_is_locked(ctx->inode));
+ ASSERT(inode_is_locked(&ctx->inode->vfs_inode));
list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
list_del_init(&ordered->log_list);
@@ -4253,8 +4254,10 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, bool inode_item_dropped)
{
struct btrfs_inode_item *inode_item;
+ struct btrfs_key key;
int ret;
+ btrfs_get_inode_key(inode, &key);
/*
* If we are doing a fast fsync and the inode was logged before in the
* current transaction, then we know the inode was previously logged and
@@ -4266,7 +4269,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
* already exists can also result in unnecessarily splitting a leaf.
*/
if (!inode_item_dropped && inode->logged_trans == trans->transid) {
- ret = btrfs_search_slot(trans, log, &inode->location, path, 0, 1);
+ ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
ASSERT(ret <= 0);
if (ret > 0)
ret = -ENOENT;
@@ -4280,7 +4283,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
* the inode, we set BTRFS_INODE_NEEDS_FULL_SYNC on its runtime
* flags and set ->logged_trans to 0.
*/
- ret = btrfs_insert_empty_item(trans, log, path, &inode->location,
+ ret = btrfs_insert_empty_item(trans, log, path, &key,
sizeof(*inode_item));
ASSERT(ret != -EEXIST);
}
@@ -4594,6 +4597,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
{
struct btrfs_ordered_extent *ordered;
struct btrfs_root *csum_root;
+ u64 block_start;
u64 csum_offset;
u64 csum_len;
u64 mod_start = em->start;
@@ -4603,7 +4607,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
if (inode->flags & BTRFS_INODE_NODATASUM ||
(em->flags & EXTENT_FLAG_PREALLOC) ||
- em->block_start == EXTENT_MAP_HOLE)
+ em->disk_bytenr == EXTENT_MAP_HOLE)
return 0;
list_for_each_entry(ordered, &ctx->ordered_extents, log_list) {
@@ -4667,17 +4671,18 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
/* If we're compressed we have to save the entire range of csums. */
if (extent_map_is_compressed(em)) {
csum_offset = 0;
- csum_len = max(em->block_len, em->orig_block_len);
+ csum_len = em->disk_num_bytes;
} else {
csum_offset = mod_start - em->start;
csum_len = mod_len;
}
/* block start is already adjusted for the file extent offset. */
- csum_root = btrfs_csum_root(trans->fs_info, em->block_start);
- ret = btrfs_lookup_csums_list(csum_root, em->block_start + csum_offset,
- em->block_start + csum_offset +
- csum_len - 1, &ordered_sums, false);
+ block_start = extent_map_block_start(em);
+ csum_root = btrfs_csum_root(trans->fs_info, block_start);
+ ret = btrfs_lookup_csums_list(csum_root, block_start + csum_offset,
+ block_start + csum_offset + csum_len - 1,
+ &ordered_sums, false);
if (ret < 0)
return ret;
ret = 0;
@@ -4707,7 +4712,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_key key;
enum btrfs_compression_type compress_type;
- u64 extent_offset = em->start - em->orig_start;
+ u64 extent_offset = em->offset;
+ u64 block_start = extent_map_block_start(em);
u64 block_len;
int ret;
@@ -4717,14 +4723,13 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
else
btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_REG);
- block_len = max(em->block_len, em->orig_block_len);
+ block_len = em->disk_num_bytes;
compress_type = extent_map_compression(em);
if (compress_type != BTRFS_COMPRESS_NONE) {
- btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start);
+ btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start);
btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
- } else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
- btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start -
- extent_offset);
+ } else if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
+ btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start - extent_offset);
btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
}
@@ -5927,7 +5932,7 @@ again:
if (ret < 0) {
return ret;
} else if (ret > 0 &&
- other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
+ other_ino != btrfs_ino(ctx->inode)) {
if (ins_nr > 0) {
ins_nr++;
} else {
@@ -7074,6 +7079,15 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
}
/*
+ * If we're logging an inode from a subvolume created in the current
+ * transaction we must force a commit since the root is not persisted.
+ */
+ if (btrfs_root_generation(&root->root_item) == trans->transid) {
+ ret = BTRFS_LOG_FORCE_COMMIT;
+ goto end_no_trans;
+ }
+
+ /*
* Skip already logged inodes or inodes corresponding to tmpfiles
* (since logging them is pointless, a link count of 0 means they
* will never be accessible).
@@ -7454,6 +7468,24 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
}
/*
+ * Call this when creating a subvolume in a directory.
+ * Because we don't commit a transaction when creating a subvolume, we can't
+ * allow the directory pointing to the subvolume to be logged with an entry that
+ * points to an unpersisted root if we are still in the transaction used to
+ * create the subvolume, so make any attempt to log the directory to result in a
+ * full log sync.
+ * Also we don't need to worry with renames, since btrfs_rename() marks the log
+ * for full commit when renaming a subvolume.
+ */
+void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
+ struct btrfs_inode *dir)
+{
+ mutex_lock(&dir->log_mutex);
+ dir->last_unlink_trans = trans->transid;
+ mutex_unlock(&dir->log_mutex);
+}
+
+/*
* Update the log after adding a new name for an inode.
*
* @trans: Transaction handle.
@@ -7585,7 +7617,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
goto out;
}
- btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
+ btrfs_init_log_ctx(&ctx, inode);
ctx.logging_new_name = true;
btrfs_init_log_ctx_scratch_eb(&ctx);
/*