From 89cbf5f6b6c2a5f0ac7cb83c8b3fd97eadba0d11 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 30 Aug 2019 17:44:47 +0300 Subject: btrfs: Don't opencode btrfs_find_name_in_backref in backref_in_log Direct replacement, though note that the inside of the loop in btrfs_find_name_in_backref is organized in a slightly different way but is equvalent. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba [ add changelog ] Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 48 ++++++++++++------------------------------------ 1 file changed, 12 insertions(+), 36 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 8a6cc600bf18..1d7f22951ef2 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -945,54 +945,30 @@ static noinline int backref_in_log(struct btrfs_root *log, const char *name, int namelen) { struct btrfs_path *path; - struct btrfs_inode_ref *ref; - unsigned long ptr; - unsigned long ptr_end; - unsigned long name_ptr; - int found_name_len; - int item_size; int ret; - int match = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; ret = btrfs_search_slot(NULL, log, key, path, 0, 0); - if (ret != 0) - goto out; - - ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); - - if (key->type == BTRFS_INODE_EXTREF_KEY) { - if (btrfs_find_name_in_ext_backref(path->nodes[0], - path->slots[0], - ref_objectid, - name, namelen)) - match = 1; - + if (ret != 0) { + ret = 0; goto out; } - item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); - ptr_end = ptr + item_size; - while (ptr < ptr_end) { - ref = (struct btrfs_inode_ref *)ptr; - found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref); - if (found_name_len == namelen) { - name_ptr = (unsigned long)(ref + 1); - ret = memcmp_extent_buffer(path->nodes[0], name, - name_ptr, namelen); - if (ret == 0) { - match = 1; - goto out; - } - } - ptr = (unsigned long)(ref + 1) + found_name_len; - } + if (key->type == BTRFS_INODE_EXTREF_KEY) + ret = !!btrfs_find_name_in_ext_backref(path->nodes[0], + path->slots[0], + ref_objectid, + name, namelen); + else + ret = !!btrfs_find_name_in_backref(path->nodes[0], + path->slots[0], + name, namelen); out: btrfs_free_path(path); - return match; + return ret; } static inline int __add_inode_ref(struct btrfs_trans_handle *trans, -- cgit v1.2.3 From d3316c8233bb05e0dd855d30aac347bb8ad76ee4 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 25 Sep 2019 14:03:03 +0300 Subject: btrfs: Properly handle backref_in_log retval This function can return a negative error value if btrfs_search_slot errors for whatever reason or if btrfs_alloc_path runs out of memory. This is currently problemattic because backref_in_log is treated by its callers as if it returns boolean. Fix this by adding proper error handling in callers. That also enables the function to return the direct error code from btrfs_search_slot. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1d7f22951ef2..24fa6560655c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -952,7 +952,9 @@ static noinline int backref_in_log(struct btrfs_root *log, return -ENOMEM; ret = btrfs_search_slot(NULL, log, key, path, 0, 0); - if (ret != 0) { + if (ret < 0) { + goto out; + } else if (ret == 1) { ret = 0; goto out; } @@ -1026,10 +1028,13 @@ again: (unsigned long)(victim_ref + 1), victim_name_len); - if (!backref_in_log(log_root, &search_key, - parent_objectid, - victim_name, - victim_name_len)) { + ret = backref_in_log(log_root, &search_key, + parent_objectid, victim_name, + victim_name_len); + if (ret < 0) { + kfree(victim_name); + return ret; + } else if (!ret) { inc_nlink(&inode->vfs_inode); btrfs_release_path(path); @@ -1091,10 +1096,12 @@ again: search_key.offset = btrfs_extref_hash(parent_objectid, victim_name, victim_name_len); - ret = 0; - if (!backref_in_log(log_root, &search_key, - parent_objectid, victim_name, - victim_name_len)) { + ret = backref_in_log(log_root, &search_key, + parent_objectid, victim_name, + victim_name_len); + if (ret < 0) { + return ret; + } else if (!ret) { ret = -ENOENT; victim_parent = read_one_inode(root, parent_objectid); @@ -1869,16 +1876,19 @@ static bool name_in_log_ref(struct btrfs_root *log_root, const u64 dirid, const u64 ino) { struct btrfs_key search_key; + int ret; search_key.objectid = ino; search_key.type = BTRFS_INODE_REF_KEY; search_key.offset = dirid; - if (backref_in_log(log_root, &search_key, dirid, name, name_len)) + ret = backref_in_log(log_root, &search_key, dirid, name, name_len); + if (ret == 1) return true; search_key.type = BTRFS_INODE_EXTREF_KEY; search_key.offset = btrfs_extref_hash(dirid, name, name_len); - if (backref_in_log(log_root, &search_key, dirid, name, name_len)) + ret = backref_in_log(log_root, &search_key, dirid, name, name_len); + if (ret == 1) return true; return false; -- cgit v1.2.3 From 725af92a62511aee68216438192041135bf14238 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Fri, 30 Aug 2019 17:44:49 +0300 Subject: btrfs: Open-code name_in_log_ref in replay_one_name That function adds unnecessary indirection between backref_in_log and the caller. Furthermore it also "downgrades" backref_in_log's return value to a boolean, when in fact it could very well be an error. Rectify the situation by simply opencoding name_in_log_ref in replay_one_name and properly handling possible return codes from backref_in_log. Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba [ update comment ] Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 54 +++++++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 24fa6560655c..fa35fb890bf3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1867,33 +1867,6 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, return ret; } -/* - * Return true if an inode reference exists in the log for the given name, - * inode and parent inode. - */ -static bool name_in_log_ref(struct btrfs_root *log_root, - const char *name, const int name_len, - const u64 dirid, const u64 ino) -{ - struct btrfs_key search_key; - int ret; - - search_key.objectid = ino; - search_key.type = BTRFS_INODE_REF_KEY; - search_key.offset = dirid; - ret = backref_in_log(log_root, &search_key, dirid, name, name_len); - if (ret == 1) - return true; - - search_key.type = BTRFS_INODE_EXTREF_KEY; - search_key.offset = btrfs_extref_hash(dirid, name, name_len); - ret = backref_in_log(log_root, &search_key, dirid, name, name_len); - if (ret == 1) - return true; - - return false; -} - /* * take a single entry in a log directory item and replay it into * the subvolume. @@ -2010,8 +1983,31 @@ out: return ret; insert: - if (name_in_log_ref(root->log_root, name, name_len, - key->objectid, log_key.objectid)) { + /* + * Check if the inode reference exists in the log for the given name, + * inode and parent inode + */ + found_key.objectid = log_key.objectid; + found_key.type = BTRFS_INODE_REF_KEY; + found_key.offset = key->objectid; + ret = backref_in_log(root->log_root, &found_key, 0, name, name_len); + if (ret < 0) { + goto out; + } else if (ret) { + /* The dentry will be added later. */ + ret = 0; + update_size = false; + goto out; + } + + found_key.objectid = log_key.objectid; + found_key.type = BTRFS_INODE_EXTREF_KEY; + found_key.offset = key->objectid; + ret = backref_in_log(root->log_root, &found_key, key->objectid, name, + name_len); + if (ret < 0) { + goto out; + } else if (ret) { /* The dentry will be added later. */ ret = 0; update_size = false; -- cgit v1.2.3 From 4c66e0d4243bb8829f2c936e966030d967726e90 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Oct 2019 19:09:35 +0200 Subject: btrfs: drop unused parameter is_new from btrfs_iget The parameter is now always set to NULL and could be dropped. The last user was get_default_root but that got reworked in 05dbe6837b60 ("Btrfs: unify subvol= and subvolid= mounting") and the parameter became unused. Reviewed-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 5 ++--- fs/btrfs/export.c | 4 ++-- fs/btrfs/file.c | 2 +- fs/btrfs/free-space-cache.c | 2 +- fs/btrfs/inode.c | 24 ++++++++++++------------ fs/btrfs/ioctl.c | 2 +- fs/btrfs/props.c | 4 ++-- fs/btrfs/relocation.c | 4 ++-- fs/btrfs/send.c | 2 +- fs/btrfs/super.c | 2 +- fs/btrfs/tree-log.c | 14 ++++++-------- 11 files changed, 31 insertions(+), 34 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 668524097a3c..b23fb083a1d5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2869,10 +2869,9 @@ int btrfs_drop_inode(struct inode *inode); int __init btrfs_init_cachep(void); void __cold btrfs_destroy_cachep(void); struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root, int *new, - struct btrfs_path *path); + struct btrfs_root *root, struct btrfs_path *path); struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root, int *was_new); + struct btrfs_root *root); struct extent_map *btrfs_get_extent(struct btrfs_inode *inode, struct page *page, size_t pg_offset, u64 start, u64 end, int create); diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index ddf28ecf17f9..72e312cae69d 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -87,7 +87,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(sb, &key, root, NULL); + inode = btrfs_iget(sb, &key, root); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto fail; @@ -214,7 +214,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - return d_obtain_alias(btrfs_iget(fs_info->sb, &key, root, NULL)); + return d_obtain_alias(btrfs_iget(fs_info->sb, &key, root)); fail: btrfs_free_path(path); return ERR_PTR(ret); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 25ce1b6dbda9..f9434fa3e387 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -296,7 +296,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, key.objectid = defrag->ino; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); + inode = btrfs_iget(fs_info->sb, &key, inode_root); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto cleanup; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d54dcd0ab230..85cd874e7b48 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -78,7 +78,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, * sure NOFS is set to keep us from deadlocking. */ nofs_flag = memalloc_nofs_save(); - inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path); + inode = btrfs_iget_path(fs_info->sb, &location, root, path); btrfs_release_path(path); memalloc_nofs_restore(nofs_flag); if (IS_ERR(inode)) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3f7abf50c91..2ef5f542f58a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2672,7 +2672,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path, key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, root, NULL); + inode = btrfs_iget(fs_info->sb, &key, root); if (IS_ERR(inode)) { srcu_read_unlock(&fs_info->subvol_srcu, index); return 0; @@ -3523,7 +3523,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) found_key.objectid = found_key.offset; found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; - inode = btrfs_iget(fs_info->sb, &found_key, root, NULL); + inode = btrfs_iget(fs_info->sb, &found_key, root); ret = PTR_ERR_OR_ZERO(inode); if (ret && ret != -ENOENT) goto out; @@ -5742,12 +5742,14 @@ static struct inode *btrfs_iget_locked(struct super_block *s, return inode; } -/* Get an inode object given its location and corresponding root. - * Returns in *is_new if the inode was read from disk +/* + * Get an inode object given its location and corresponding root. + * Path can be preallocated to prevent recursing back to iget through + * allocator. NULL is also valid but may require an additional allocation + * later. */ struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root, int *new, - struct btrfs_path *path) + struct btrfs_root *root, struct btrfs_path *path) { struct inode *inode; @@ -5762,8 +5764,6 @@ struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, if (!ret) { inode_tree_add(inode); unlock_new_inode(inode); - if (new) - *new = 1; } else { iget_failed(inode); /* @@ -5781,9 +5781,9 @@ struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location, } struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root, int *new) + struct btrfs_root *root) { - return btrfs_iget_path(s, location, root, new, NULL); + return btrfs_iget_path(s, location, root, NULL); } static struct inode *new_simple_dir(struct super_block *s, @@ -5849,7 +5849,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return ERR_PTR(ret); if (location.type == BTRFS_INODE_ITEM_KEY) { - inode = btrfs_iget(dir->i_sb, &location, root, NULL); + inode = btrfs_iget(dir->i_sb, &location, root); if (IS_ERR(inode)) return inode; @@ -5874,7 +5874,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) else inode = new_simple_dir(dir->i_sb, &location, sub_root); } else { - inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL); + inode = btrfs_iget(dir->i_sb, &location, sub_root); } srcu_read_unlock(&fs_info->subvol_srcu, index); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 23272d9154f3..589b95eb2b80 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2462,7 +2462,7 @@ static int btrfs_search_path_in_tree_user(struct inode *inode, goto out; } - temp_inode = btrfs_iget(sb, &key2, root, NULL); + temp_inode = btrfs_iget(sb, &key2, root); if (IS_ERR(temp_inode)) { ret = PTR_ERR(temp_inode); goto out; diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 1e664e0b59b8..aac596300c89 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -416,11 +416,11 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans, key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - parent_inode = btrfs_iget(sb, &key, parent_root, NULL); + parent_inode = btrfs_iget(sb, &key, parent_root); if (IS_ERR(parent_inode)) return PTR_ERR(parent_inode); - child_inode = btrfs_iget(sb, &key, root, NULL); + child_inode = btrfs_iget(sb, &key, root); if (IS_ERR(child_inode)) { iput(parent_inode); return PTR_ERR(child_inode); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 5cd42b66818c..df195e2bd45f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3560,7 +3560,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, root, NULL); + inode = btrfs_iget(fs_info->sb, &key, root); if (IS_ERR(inode)) return -ENOENT; @@ -4246,7 +4246,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, key.objectid = objectid; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, root, NULL); + inode = btrfs_iget(fs_info->sb, &key, root); BUG_ON(IS_ERR(inode)); BTRFS_I(inode)->index_cnt = group->key.objectid; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 123ac54af071..27e92594a81b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4779,7 +4779,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, root, NULL); + inode = btrfs_iget(fs_info->sb, &key, root); if (IS_ERR(inode)) return PTR_ERR(inode); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 843015b9a11e..c7d78ac64b83 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1219,7 +1219,7 @@ static int btrfs_fill_super(struct super_block *sb, key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL); + inode = btrfs_iget(sb, &key, fs_info->fs_root); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto fail_close; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fa35fb890bf3..30a17143448d 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -559,7 +559,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, key.objectid = objectid; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); + inode = btrfs_iget(root->fs_info->sb, &key, root); if (IS_ERR(inode)) inode = NULL; return inode; @@ -4965,7 +4965,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, key.objectid = ino; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, root, NULL); + inode = btrfs_iget(fs_info->sb, &key, root); /* * If the other inode that had a conflicting dir entry was * deleted in the current transaction, we need to log its parent @@ -4975,8 +4975,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, ret = PTR_ERR(inode); if (ret == -ENOENT) { key.objectid = parent; - inode = btrfs_iget(fs_info->sb, &key, root, - NULL); + inode = btrfs_iget(fs_info->sb, &key, root); if (IS_ERR(inode)) { ret = PTR_ERR(inode); } else { @@ -5681,7 +5680,7 @@ process_leaf: continue; btrfs_release_path(path); - di_inode = btrfs_iget(fs_info->sb, &di_key, root, NULL); + di_inode = btrfs_iget(fs_info->sb, &di_key, root); if (IS_ERR(di_inode)) { ret = PTR_ERR(di_inode); goto next_dir_inode; @@ -5807,8 +5806,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, cur_offset = item_size; } - dir_inode = btrfs_iget(fs_info->sb, &inode_key, - root, NULL); + dir_inode = btrfs_iget(fs_info->sb, &inode_key, root); /* * If the parent inode was deleted, return an error to * fallback to a transaction commit. This is to prevent @@ -5882,7 +5880,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, search_key.objectid = found_key.offset; search_key.type = BTRFS_INODE_ITEM_KEY; search_key.offset = 0; - inode = btrfs_iget(fs_info->sb, &search_key, root, NULL); + inode = btrfs_iget(fs_info->sb, &search_key, root); if (IS_ERR(inode)) return PTR_ERR(inode); -- cgit v1.2.3 From 67439dadb03ad9da45bfccb4cdb6ef6b1a7f8da9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 8 Oct 2019 13:28:47 +0200 Subject: btrfs: opencode extent_buffer_get The helper is trivial and we can understand what the atomic_inc on something named refs does. Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 12 ++++++------ fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/extent_io.h | 5 ----- fs/btrfs/qgroup.c | 6 +++--- fs/btrfs/relocation.c | 4 ++-- fs/btrfs/tree-log.c | 2 +- 7 files changed, 14 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3a4d8e27e565..739757978686 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1100,7 +1100,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) parent_start = buf->start; - extent_buffer_get(cow); + atomic_inc(&cow->refs); ret = tree_mod_log_insert_root(root->node, cow, 1); BUG_ON(ret < 0); rcu_assign_pointer(root->node, cow); @@ -2011,7 +2011,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* update the path */ if (left) { if (btrfs_header_nritems(left) > orig_slot) { - extent_buffer_get(left); + atomic_inc(&left->refs); /* left was locked after cow */ path->nodes[level] = left; path->slots[level + 1] -= 1; @@ -2601,7 +2601,7 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, } else { b = root->commit_root; - extent_buffer_get(b); + atomic_inc(&b->refs); } level = btrfs_header_level(b); /* @@ -3375,7 +3375,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, free_extent_buffer(old); add_root_to_dirty_list(root); - extent_buffer_get(c); + atomic_inc(&c->refs); path->nodes[level] = c; path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; path->slots[level] = 0; @@ -4908,7 +4908,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, root_sub_used(root, leaf->len); - extent_buffer_get(leaf); + atomic_inc(&leaf->refs); btrfs_free_tree_block(trans, root, leaf, 0, 1); free_extent_buffer_stale(leaf); } @@ -4989,7 +4989,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, * for possible call to del_ptr below */ slot = path->slots[1]; - extent_buffer_get(leaf); + atomic_inc(&leaf->refs); btrfs_set_path_blocking(path); wret = push_leaf_left(trans, root, path, 1, 1, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f54a4782d18c..43e14a17e3f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -610,7 +610,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, /* the pending IO might have been the only thing that kept this buffer * in memory. Make sure we have a ref for all this other checks */ - extent_buffer_get(eb); + atomic_inc(&eb->refs); reads_done = atomic_dec_and_test(&eb->io_pages); if (!reads_done) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 49cb26fa7c63..9e5845548b76 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5436,7 +5436,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, btrfs_assert_tree_locked(parent); parent_level = btrfs_header_level(parent); - extent_buffer_get(parent); + atomic_inc(&parent->refs); path->nodes[parent_level] = parent; path->slots[parent_level] = btrfs_header_nritems(parent); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e22045cef89b..a8551a1f56e2 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -230,11 +230,6 @@ static inline int num_extent_pages(const struct extent_buffer *eb) (eb->start >> PAGE_SHIFT); } -static inline void extent_buffer_get(struct extent_buffer *eb) -{ - atomic_inc(&eb->refs); -} - static inline int extent_buffer_uptodate(struct extent_buffer *eb) { return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index fde0973d893a..f414fd914ddd 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1811,7 +1811,7 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0); /* For src_path */ - extent_buffer_get(src_eb); + atomic_inc(&src_eb->refs); src_path->nodes[root_level] = src_eb; src_path->slots[root_level] = dst_path->slots[root_level]; src_path->locks[root_level] = 0; @@ -2067,7 +2067,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans, goto out; } /* For dst_path */ - extent_buffer_get(dst_eb); + atomic_inc(&dst_eb->refs); dst_path->nodes[level] = dst_eb; dst_path->slots[level] = 0; dst_path->locks[level] = 0; @@ -2126,7 +2126,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, * walk back up the tree (adjusting slot pointers as we go) * and restart the search process. */ - extent_buffer_get(root_eb); /* For path */ + atomic_inc(&root_eb->refs); /* For path */ path->nodes[root_level] = root_eb; path->slots[root_level] = 0; path->locks[root_level] = 0; /* so release_path doesn't try to unlock */ diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index df195e2bd45f..90b80da5abe6 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2246,7 +2246,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { level = btrfs_root_level(root_item); - extent_buffer_get(reloc_root->node); + atomic_inc(&reloc_root->node->refs); path->nodes[level] = reloc_root->node; path->slots[level] = 0; } else { @@ -4688,7 +4688,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, node->new_bytenr != buf->start); drop_node_buffer(node); - extent_buffer_get(cow); + atomic_inc(&cow->refs); node->eb = cow; node->new_bytenr = cow->start; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 30a17143448d..6f757361db53 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2851,7 +2851,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, level = btrfs_header_level(log->node); orig_level = level; path->nodes[level] = log->node; - extent_buffer_get(log->node); + atomic_inc(&log->node->refs); path->slots[level] = 0; while (1) { -- cgit v1.2.3 From 40e046acbd2f369cfbf93c3413639c66514cec2d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 5 Dec 2019 16:58:30 +0000 Subject: Btrfs: fix missing data checksums after replaying a log tree When logging a file that has shared extents (reflinked with other files or with itself), we can end up logging multiple checksum items that cover overlapping ranges. This confuses the search for checksums at log replay time causing some checksums to never be added to the fs/subvolume tree. Consider the following example of a file that shares the same extent at offsets 0 and 256Kb: [ bytenr 13893632, offset 64Kb, len 64Kb ] 0 64Kb [ bytenr 13631488, offset 64Kb, len 192Kb ] 64Kb 256Kb [ bytenr 13893632, offset 0, len 256Kb ] 256Kb 512Kb When logging the inode, at tree-log.c:copy_items(), when processing the file extent item at offset 0, we log a checksum item covering the range 13959168 to 14024704, which corresponds to 13893632 + 64Kb and 13893632 + 64Kb + 64Kb, respectively. Later when processing the extent item at offset 256K, we log the checksums for the range from 13893632 to 14155776 (which corresponds to 13893632 + 256Kb). These checksums get merged with the checksum item for the range from 13631488 to 13893632 (13631488 + 256Kb), logged by a previous fsync. So after this we get the two following checksum items in the log tree: (...) item 6 key (EXTENT_CSUM EXTENT_CSUM 13631488) itemoff 3095 itemsize 512 range start 13631488 end 14155776 length 524288 item 7 key (EXTENT_CSUM EXTENT_CSUM 13959168) itemoff 3031 itemsize 64 range start 13959168 end 14024704 length 65536 The first one covers the range from the second one, they overlap. So far this does not cause a problem after replaying the log, because when replaying the file extent item for offset 256K, we copy all the checksums for the extent 13893632 from the log tree to the fs/subvolume tree, since searching for an checksum item for bytenr 13893632 leaves us at the first checksum item, which covers the whole range of the extent. However if we write 64Kb to file offset 256Kb for example, we will not be able to find and copy the checksums for the last 128Kb of the extent at bytenr 13893632, referenced by the file range 384Kb to 512Kb. After writing 64Kb into file offset 256Kb we get the following extent layout for our file: [ bytenr 13893632, offset 64K, len 64Kb ] 0 64Kb [ bytenr 13631488, offset 64Kb, len 192Kb ] 64Kb 256Kb [ bytenr 14155776, offset 0, len 64Kb ] 256Kb 320Kb [ bytenr 13893632, offset 64Kb, len 192Kb ] 320Kb 512Kb After fsync'ing the file, if we have a power failure and then mount the filesystem to replay the log, the following happens: 1) When replaying the file extent item for file offset 320Kb, we lookup for the checksums for the extent range from 13959168 (13893632 + 64Kb) to 14155776 (13893632 + 256Kb), through a call to btrfs_lookup_csums_range(); 2) btrfs_lookup_csums_range() finds the checksum item that starts precisely at offset 13959168 (item 7 in the log tree, shown before); 3) However that checksum item only covers 64Kb of data, and not 192Kb of data; 4) As a result only the checksums for the first 64Kb of data referenced by the file extent item are found and copied to the fs/subvolume tree. The remaining 128Kb of data, file range 384Kb to 512Kb, doesn't get the corresponding data checksums found and copied to the fs/subvolume tree. 5) After replaying the log userspace will not be able to read the file range from 384Kb to 512Kb, because the checksums are missing and resulting in an -EIO error. The following steps reproduce this scenario: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt/sdc $ xfs_io -f -c "pwrite -S 0xa3 0 256K" /mnt/sdc/foobar $ xfs_io -c "fsync" /mnt/sdc/foobar $ xfs_io -c "pwrite -S 0xc7 256K 256K" /mnt/sdc/foobar $ xfs_io -c "reflink /mnt/sdc/foobar 320K 0 64K" /mnt/sdc/foobar $ xfs_io -c "fsync" /mnt/sdc/foobar $ xfs_io -c "pwrite -S 0xe5 256K 64K" /mnt/sdc/foobar $ xfs_io -c "fsync" /mnt/sdc/foobar $ mount /dev/sdc /mnt/sdc $ md5sum /mnt/sdc/foobar md5sum: /mnt/sdc/foobar: Input/output error $ dmesg | tail [165305.003464] BTRFS info (device sdc): no csum found for inode 257 start 401408 [165305.004014] BTRFS info (device sdc): no csum found for inode 257 start 405504 [165305.004559] BTRFS info (device sdc): no csum found for inode 257 start 409600 [165305.005101] BTRFS info (device sdc): no csum found for inode 257 start 413696 [165305.005627] BTRFS info (device sdc): no csum found for inode 257 start 417792 [165305.006134] BTRFS info (device sdc): no csum found for inode 257 start 421888 [165305.006625] BTRFS info (device sdc): no csum found for inode 257 start 425984 [165305.007278] BTRFS info (device sdc): no csum found for inode 257 start 430080 [165305.008248] BTRFS warning (device sdc): csum failed root 5 ino 257 off 393216 csum 0x1337385e expected csum 0x00000000 mirror 1 [165305.009550] BTRFS warning (device sdc): csum failed root 5 ino 257 off 393216 csum 0x1337385e expected csum 0x00000000 mirror 1 Fix this simply by deleting first any checksums, from the log tree, for the range of the extent we are logging at copy_items(). This ensures we do not get checksum items in the log tree that have overlapping ranges. This is a long time issue that has been present since we have the clone (and deduplication) ioctl, and can happen both when an extent is shared between different files and within the same file. A test case for fstests follows soon. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 7 ++++--- fs/btrfs/file-item.c | 7 +++++-- fs/btrfs/tree-log.c | 29 ++++++++++++++++++++++++++--- 4 files changed, 36 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b2e8fd8a8e59..54efb21c2727 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2787,7 +2787,7 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref( /* file-item.c */ struct btrfs_dio_private; int btrfs_del_csums(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, u64 len); + struct btrfs_root *root, u64 bytenr, u64 len); blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst); blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 18df434bfe52..274318e9114e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1869,8 +1869,8 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans, btrfs_pin_extent(fs_info, head->bytenr, head->num_bytes, 1); if (head->is_data) { - ret = btrfs_del_csums(trans, fs_info, head->bytenr, - head->num_bytes); + ret = btrfs_del_csums(trans, fs_info->csum_root, + head->bytenr, head->num_bytes); } } @@ -3175,7 +3175,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, btrfs_release_path(path); if (is_data) { - ret = btrfs_del_csums(trans, info, bytenr, num_bytes); + ret = btrfs_del_csums(trans, info->csum_root, bytenr, + num_bytes); if (ret) { btrfs_abort_transaction(trans, ret); goto out; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 3270a40b0777..b1bfdc5c1387 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -590,9 +590,9 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info, * range of bytes. */ int btrfs_del_csums(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, u64 len) + struct btrfs_root *root, u64 bytenr, u64 len) { - struct btrfs_root *root = fs_info->csum_root; + struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_path *path; struct btrfs_key key; u64 end_byte = bytenr + len; @@ -602,6 +602,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); int blocksize_bits = fs_info->sb->s_blocksize_bits; + ASSERT(root == fs_info->csum_root || + root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); + path = btrfs_alloc_path(); if (!path) return -ENOMEM; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6f757361db53..79866f1b33d6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -808,7 +808,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum, list); if (!ret) - ret = btrfs_del_csums(trans, fs_info, + ret = btrfs_del_csums(trans, + fs_info->csum_root, sums->bytenr, sums->len); if (!ret) @@ -3909,6 +3910,28 @@ static int log_inode_item(struct btrfs_trans_handle *trans, return 0; } +static int log_csums(struct btrfs_trans_handle *trans, + struct btrfs_root *log_root, + struct btrfs_ordered_sum *sums) +{ + int ret; + + /* + * Due to extent cloning, we might have logged a csum item that covers a + * subrange of a cloned extent, and later we can end up logging a csum + * item for a larger subrange of the same extent or the entire range. + * This would leave csum items in the log tree that cover the same range + * and break the searches for checksums in the log tree, resulting in + * some checksums missing in the fs/subvolume tree. So just delete (or + * trim and adjust) any existing csum items in the log for this range. + */ + ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len); + if (ret) + return ret; + + return btrfs_csum_file_blocks(trans, log_root, sums); +} + static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *dst_path, @@ -4054,7 +4077,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum, list); if (!ret) - ret = btrfs_csum_file_blocks(trans, log, sums); + ret = log_csums(trans, log, sums); list_del(&sums->list); kfree(sums); } @@ -4274,7 +4297,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum, list); if (!ret) - ret = btrfs_csum_file_blocks(trans, log_root, sums); + ret = log_csums(trans, log_root, sums); list_del(&sums->list); kfree(sums); } -- cgit v1.2.3 From 9bc574de590510eff899c3ca8dbaf013566b5efe Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 6 Dec 2019 09:37:17 -0500 Subject: btrfs: skip log replay on orphaned roots My fsstress modifications coupled with generic/475 uncovered a failure to mount and replay the log if we hit a orphaned root. We do not want to replay the log for an orphan root, but it's completely legitimate to have an orphaned root with a log attached. Fix this by simply skipping replaying the log. We still need to pin it's root node so that we do not overwrite it while replaying other logs, as we re-read the log root at every stage of the replay. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 79866f1b33d6..d3f115909ff0 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6317,9 +6317,28 @@ again: wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); if (IS_ERR(wc.replay_dest)) { ret = PTR_ERR(wc.replay_dest); + + /* + * We didn't find the subvol, likely because it was + * deleted. This is ok, simply skip this log and go to + * the next one. + * + * We need to exclude the root because we can't have + * other log replays overwriting this log as we'll read + * it back in a few more times. This will keep our + * block from being modified, and we'll just bail for + * each subsequent pass. + */ + if (ret == -ENOENT) + ret = btrfs_pin_extent_for_log_replay(fs_info, + log->node->start, + log->node->len); free_extent_buffer(log->node); free_extent_buffer(log->commit_root); kfree(log); + + if (!ret) + goto next; btrfs_handle_fs_error(fs_info, ret, "Couldn't read target root for tree log recovery."); goto error; @@ -6351,7 +6370,6 @@ again: &root->highest_objectid); } - key.offset = found_key.offset - 1; wc.replay_dest->log_root = NULL; free_extent_buffer(log->node); free_extent_buffer(log->commit_root); @@ -6359,9 +6377,10 @@ again: if (ret) goto error; - +next: if (found_key.offset == 0) break; + key.offset = found_key.offset - 1; } btrfs_release_path(path); -- cgit v1.2.3