diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 1061 |
1 files changed, 549 insertions, 512 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c8d6587688b3..9f05d454b9df 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -112,7 +112,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_root *log, struct btrfs_path *path, - u64 dirid, int del_all); + u64 dirid, bool del_all); static void wait_log_commit(struct btrfs_root *root, int transid); /* @@ -138,10 +138,13 @@ static void wait_log_commit(struct btrfs_root *root, int transid); * and once to do all the other items. */ -static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root) +static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root) { unsigned int nofs_flag; - struct inode *inode; + struct btrfs_inode *inode; + + /* Only meant to be called for subvolume roots and not for log roots. */ + ASSERT(btrfs_is_fstree(btrfs_root_id(root))); /* * We're holding a transaction handle whether we are logging or @@ -376,12 +379,12 @@ static int process_one_buffer(struct btrfs_root *log, } /* - * Item overwrite used by replay and tree logging. eb, slot and key all refer - * to the src data we are copying out. + * Item overwrite used by log replay. The given eb, slot and key all refer to + * the source data we are copying out. * - * root is the tree we are copying into, and path is a scratch - * path for use in this function (it should be released on entry and - * will be released on exit). + * The given root is for the tree we are copying into, and path is a scratch + * path for use in this function (it should be released on entry and will be + * released on exit). * * If the key is already in the destination tree the existing item is * overwritten. If the existing item isn't big enough, it is extended. @@ -401,6 +404,8 @@ static int overwrite_item(struct btrfs_trans_handle *trans, int save_old_i_size = 0; unsigned long src_ptr; unsigned long dst_ptr; + struct extent_buffer *dst_eb; + int dst_slot; bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; /* @@ -420,11 +425,13 @@ static int overwrite_item(struct btrfs_trans_handle *trans, if (ret < 0) return ret; + dst_eb = path->nodes[0]; + dst_slot = path->slots[0]; + if (ret == 0) { char *src_copy; - char *dst_copy; - u32 dst_size = btrfs_item_size(path->nodes[0], - path->slots[0]); + const u32 dst_size = btrfs_item_size(dst_eb, dst_slot); + if (dst_size != item_size) goto insert; @@ -432,23 +439,16 @@ static int overwrite_item(struct btrfs_trans_handle *trans, btrfs_release_path(path); return 0; } - dst_copy = kmalloc(item_size, GFP_NOFS); src_copy = kmalloc(item_size, GFP_NOFS); - if (!dst_copy || !src_copy) { + if (!src_copy) { btrfs_release_path(path); - kfree(dst_copy); - kfree(src_copy); return -ENOMEM; } read_extent_buffer(eb, src_copy, src_ptr, item_size); + dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot); + ret = memcmp_extent_buffer(dst_eb, src_copy, dst_ptr, item_size); - dst_ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); - read_extent_buffer(path->nodes[0], dst_copy, dst_ptr, - item_size); - ret = memcmp(dst_copy, src_copy, item_size); - - kfree(dst_copy); kfree(src_copy); /* * they have the same contents, just return, this saves @@ -470,9 +470,9 @@ static int overwrite_item(struct btrfs_trans_handle *trans, u64 nbytes; u32 mode; - item = btrfs_item_ptr(path->nodes[0], path->slots[0], + item = btrfs_item_ptr(dst_eb, dst_slot, struct btrfs_inode_item); - nbytes = btrfs_inode_nbytes(path->nodes[0], item); + nbytes = btrfs_inode_nbytes(dst_eb, item); item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); btrfs_set_inode_nbytes(eb, item, nbytes); @@ -514,11 +514,13 @@ insert: key, item_size); path->skip_release_on_error = 0; + dst_eb = path->nodes[0]; + dst_slot = path->slots[0]; + /* make sure any existing item is the correct size */ if (ret == -EEXIST || ret == -EOVERFLOW) { - u32 found_size; - found_size = btrfs_item_size(path->nodes[0], - path->slots[0]); + const u32 found_size = btrfs_item_size(dst_eb, dst_slot); + if (found_size > item_size) btrfs_truncate_item(trans, path, item_size, 1); else if (found_size < item_size) @@ -526,8 +528,7 @@ insert: } else if (ret) { return ret; } - dst_ptr = btrfs_item_ptr_offset(path->nodes[0], - path->slots[0]); + dst_ptr = btrfs_item_ptr_offset(dst_eb, dst_slot); /* don't overwrite an existing inode if the generation number * was logged as zero. This is done when the tree logging code @@ -546,7 +547,6 @@ insert: dst_item = (struct btrfs_inode_item *)dst_ptr; if (btrfs_inode_generation(eb, src_item) == 0) { - struct extent_buffer *dst_eb = path->nodes[0]; const u64 ino_size = btrfs_inode_size(eb, src_item); /* @@ -564,33 +564,30 @@ insert: } if (S_ISDIR(btrfs_inode_mode(eb, src_item)) && - S_ISDIR(btrfs_inode_mode(path->nodes[0], dst_item))) { + S_ISDIR(btrfs_inode_mode(dst_eb, dst_item))) { save_old_i_size = 1; - saved_i_size = btrfs_inode_size(path->nodes[0], - dst_item); + saved_i_size = btrfs_inode_size(dst_eb, dst_item); } } - copy_extent_buffer(path->nodes[0], eb, dst_ptr, - src_ptr, item_size); + copy_extent_buffer(dst_eb, eb, dst_ptr, src_ptr, item_size); if (save_old_i_size) { struct btrfs_inode_item *dst_item; + dst_item = (struct btrfs_inode_item *)dst_ptr; - btrfs_set_inode_size(path->nodes[0], dst_item, saved_i_size); + btrfs_set_inode_size(dst_eb, dst_item, saved_i_size); } /* make sure the generation is filled in */ if (key->type == BTRFS_INODE_ITEM_KEY) { struct btrfs_inode_item *dst_item; + dst_item = (struct btrfs_inode_item *)dst_ptr; - if (btrfs_inode_generation(path->nodes[0], dst_item) == 0) { - btrfs_set_inode_generation(path->nodes[0], dst_item, - trans->transid); - } + if (btrfs_inode_generation(dst_eb, dst_item) == 0) + btrfs_set_inode_generation(dst_eb, dst_item, trans->transid); } no_copy: - btrfs_mark_buffer_dirty(trans, path->nodes[0]); btrfs_release_path(path); return 0; } @@ -610,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len, return 0; } -/* - * simple helper to read an inode off the disk from a given root - * This can only be called for subvolume roots and not for the log - */ -static noinline struct inode *read_one_inode(struct btrfs_root *root, - u64 objectid) -{ - struct inode *inode; - - inode = btrfs_iget_logging(objectid, root); - if (IS_ERR(inode)) - inode = NULL; - return inode; -} - /* replays a single extent in 'eb' at 'slot' with 'key' into the * subvolume 'root'. path is released on entry and should be released * on exit. @@ -650,7 +632,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, u64 start = key->offset; u64 nbytes = 0; struct btrfs_file_extent_item *item; - struct inode *inode = NULL; + struct btrfs_inode *inode = NULL; unsigned long size; int ret = 0; @@ -674,46 +656,38 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, extent_end = ALIGN(start + size, fs_info->sectorsize); } else { - ret = 0; - goto out; + btrfs_err(fs_info, + "unexpected extent type=%d root=%llu inode=%llu offset=%llu", + found_type, btrfs_root_id(root), key->objectid, key->offset); + return -EUCLEAN; } - inode = read_one_inode(root, key->objectid); - if (!inode) { - ret = -EIO; - goto out; - } + inode = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); /* * first check to see if we already have this extent in the * file. This must be done before the btrfs_drop_extents run * so we don't try to drop this extent. */ - ret = btrfs_lookup_file_extent(trans, root, path, - btrfs_ino(BTRFS_I(inode)), start, 0); + ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), start, 0); if (ret == 0 && (found_type == BTRFS_FILE_EXTENT_REG || found_type == BTRFS_FILE_EXTENT_PREALLOC)) { - struct btrfs_file_extent_item cmp1; - struct btrfs_file_extent_item cmp2; - struct btrfs_file_extent_item *existing; - struct extent_buffer *leaf; - - leaf = path->nodes[0]; - existing = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); + struct btrfs_file_extent_item existing; + unsigned long ptr; - read_extent_buffer(eb, &cmp1, (unsigned long)item, - sizeof(cmp1)); - read_extent_buffer(leaf, &cmp2, (unsigned long)existing, - sizeof(cmp2)); + ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); + read_extent_buffer(path->nodes[0], &existing, ptr, sizeof(existing)); /* * we already have a pointer to this exact extent, * we don't have to do anything */ - if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { + if (memcmp_extent_buffer(eb, &existing, (unsigned long)item, + sizeof(existing)) == 0) { btrfs_release_path(path); goto out; } @@ -724,7 +698,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, drop_args.start = start; drop_args.end = extent_end; drop_args.drop_cache = true; - ret = btrfs_drop_extents(trans, root, BTRFS_I(inode), &drop_args); + ret = btrfs_drop_extents(trans, root, inode, &drop_args); if (ret) goto out; @@ -748,8 +722,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, (unsigned long)item, sizeof(*item)); ins.objectid = btrfs_file_extent_disk_bytenr(eb, item); - ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); ins.type = BTRFS_EXTENT_ITEM_KEY; + ins.offset = btrfs_file_extent_disk_num_bytes(eb, item); offset = key->offset - btrfs_file_extent_offset(eb, item); /* @@ -874,9 +848,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum *sums; struct btrfs_root *csum_root; - sums = list_entry(ordered_sums.next, - struct btrfs_ordered_sum, - list); + sums = list_first_entry(&ordered_sums, + struct btrfs_ordered_sum, + list); csum_root = btrfs_csum_root(fs_info, sums->logical); if (!ret) @@ -902,16 +876,15 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, goto out; } - ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, - extent_end - start); + ret = btrfs_inode_set_file_extent_range(inode, start, extent_end - start); if (ret) goto out; update_inode: - btrfs_update_inode_bytes(BTRFS_I(inode), nbytes, drop_args.bytes_found); - ret = btrfs_update_inode(trans, BTRFS_I(inode)); + btrfs_update_inode_bytes(inode, nbytes, drop_args.bytes_found); + ret = btrfs_update_inode(trans, inode); out: - iput(inode); + iput(&inode->vfs_inode); return ret; } @@ -948,7 +921,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, struct btrfs_dir_item *di) { struct btrfs_root *root = dir->root; - struct inode *inode; + struct btrfs_inode *inode; struct fscrypt_str name; struct extent_buffer *leaf; struct btrfs_key location; @@ -963,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, btrfs_release_path(path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -973,10 +947,11 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, if (ret) goto out; - ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), &name); + ret = unlink_inode_for_log_replay(trans, dir, inode, &name); out: kfree(name.name); - iput(inode); + if (inode) + iput(&inode->vfs_inode); return ret; } @@ -1066,6 +1041,126 @@ out: return ret; } +static int unlink_refs_not_in_log(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_root *log_root, + struct btrfs_key *search_key, + struct btrfs_inode *dir, + struct btrfs_inode *inode, + u64 parent_objectid) +{ + struct extent_buffer *leaf = path->nodes[0]; + unsigned long ptr; + unsigned long ptr_end; + + /* + * Check all the names in this back reference to see if they are in the + * log. If so, we allow them to stay otherwise they must be unlinked as + * a conflict. + */ + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]); + while (ptr < ptr_end) { + struct fscrypt_str victim_name; + struct btrfs_inode_ref *victim_ref; + int ret; + + victim_ref = (struct btrfs_inode_ref *)ptr; + ret = read_alloc_one_name(leaf, (victim_ref + 1), + btrfs_inode_ref_name_len(leaf, victim_ref), + &victim_name); + if (ret) + return ret; + + ret = backref_in_log(log_root, search_key, parent_objectid, &victim_name); + if (ret) { + kfree(victim_name.name); + if (ret < 0) + return ret; + ptr = (unsigned long)(victim_ref + 1) + victim_name.len; + continue; + } + + inc_nlink(&inode->vfs_inode); + btrfs_release_path(path); + + ret = unlink_inode_for_log_replay(trans, dir, inode, &victim_name); + kfree(victim_name.name); + if (ret) + return ret; + return -EAGAIN; + } + + return 0; +} + +static int unlink_extrefs_not_in_log(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_root *root, + struct btrfs_root *log_root, + struct btrfs_key *search_key, + struct btrfs_inode *inode, + u64 inode_objectid, + u64 parent_objectid) +{ + struct extent_buffer *leaf = path->nodes[0]; + const unsigned long base = btrfs_item_ptr_offset(leaf, path->slots[0]); + const u32 item_size = btrfs_item_size(leaf, path->slots[0]); + u32 cur_offset = 0; + + while (cur_offset < item_size) { + struct btrfs_inode_extref *extref; + struct btrfs_inode *victim_parent; + struct fscrypt_str victim_name; + int ret; + + extref = (struct btrfs_inode_extref *)(base + cur_offset); + victim_name.len = btrfs_inode_extref_name_len(leaf, extref); + + if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) + goto next; + + ret = read_alloc_one_name(leaf, &extref->name, victim_name.len, + &victim_name); + if (ret) + return ret; + + search_key->objectid = inode_objectid; + search_key->type = BTRFS_INODE_EXTREF_KEY; + search_key->offset = btrfs_extref_hash(parent_objectid, + victim_name.name, + victim_name.len); + ret = backref_in_log(log_root, search_key, parent_objectid, &victim_name); + if (ret) { + kfree(victim_name.name); + if (ret < 0) + return ret; +next: + cur_offset += victim_name.len + sizeof(*extref); + continue; + } + + victim_parent = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(victim_parent)) { + kfree(victim_name.name); + return PTR_ERR(victim_parent); + } + + inc_nlink(&inode->vfs_inode); + btrfs_release_path(path); + + ret = unlink_inode_for_log_replay(trans, victim_parent, inode, + &victim_name); + iput(&victim_parent->vfs_inode); + kfree(victim_name.name); + if (ret) + return ret; + return -EAGAIN; + } + + return 0; +} + static inline int __add_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -1076,7 +1171,6 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans, u64 ref_index, struct fscrypt_str *name) { int ret; - struct extent_buffer *leaf; struct btrfs_dir_item *di; struct btrfs_key search_key; struct btrfs_inode_extref *extref; @@ -1087,121 +1181,37 @@ again: search_key.type = BTRFS_INODE_REF_KEY; search_key.offset = parent_objectid; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); - if (ret == 0) { - struct btrfs_inode_ref *victim_ref; - unsigned long ptr; - unsigned long ptr_end; - - leaf = path->nodes[0]; - - /* are we trying to overwrite a back ref for the root directory - * if so, just jump out, we're done + if (ret < 0) { + return ret; + } else if (ret == 0) { + /* + * Are we trying to overwrite a back ref for the root directory? + * If so, we're done. */ if (search_key.objectid == search_key.offset) return 1; - /* check all the names in this back reference to see - * if they are in the log. if so, we allow them to stay - * otherwise they must be unlinked as a conflict - */ - ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); - ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]); - while (ptr < ptr_end) { - struct fscrypt_str victim_name; - - victim_ref = (struct btrfs_inode_ref *)ptr; - ret = read_alloc_one_name(leaf, (victim_ref + 1), - btrfs_inode_ref_name_len(leaf, victim_ref), - &victim_name); - if (ret) - return ret; - - ret = backref_in_log(log_root, &search_key, - parent_objectid, &victim_name); - if (ret < 0) { - kfree(victim_name.name); - return ret; - } else if (!ret) { - inc_nlink(&inode->vfs_inode); - btrfs_release_path(path); - - ret = unlink_inode_for_log_replay(trans, dir, inode, - &victim_name); - kfree(victim_name.name); - if (ret) - return ret; - goto again; - } - kfree(victim_name.name); - - ptr = (unsigned long)(victim_ref + 1) + victim_name.len; - } + ret = unlink_refs_not_in_log(trans, path, log_root, &search_key, + dir, inode, parent_objectid); + if (ret == -EAGAIN) + goto again; + else if (ret) + return ret; } btrfs_release_path(path); /* Same search but for extended refs */ - extref = btrfs_lookup_inode_extref(NULL, root, path, name, - inode_objectid, parent_objectid, 0, - 0); + extref = btrfs_lookup_inode_extref(root, path, name, inode_objectid, parent_objectid); if (IS_ERR(extref)) { return PTR_ERR(extref); } else if (extref) { - u32 item_size; - u32 cur_offset = 0; - unsigned long base; - struct inode *victim_parent; - - leaf = path->nodes[0]; - - item_size = btrfs_item_size(leaf, path->slots[0]); - base = btrfs_item_ptr_offset(leaf, path->slots[0]); - - while (cur_offset < item_size) { - struct fscrypt_str victim_name; - - extref = (struct btrfs_inode_extref *)(base + cur_offset); - - if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) - goto next; - - ret = read_alloc_one_name(leaf, &extref->name, - btrfs_inode_extref_name_len(leaf, extref), - &victim_name); - if (ret) - return ret; - - search_key.objectid = inode_objectid; - search_key.type = BTRFS_INODE_EXTREF_KEY; - search_key.offset = btrfs_extref_hash(parent_objectid, - victim_name.name, - victim_name.len); - ret = backref_in_log(log_root, &search_key, - parent_objectid, &victim_name); - if (ret < 0) { - kfree(victim_name.name); - return ret; - } else if (!ret) { - ret = -ENOENT; - victim_parent = read_one_inode(root, - parent_objectid); - if (victim_parent) { - inc_nlink(&inode->vfs_inode); - btrfs_release_path(path); - - ret = unlink_inode_for_log_replay(trans, - BTRFS_I(victim_parent), - inode, &victim_name); - } - iput(victim_parent); - kfree(victim_name.name); - if (ret) - return ret; - goto again; - } - kfree(victim_name.name); -next: - cur_offset += victim_name.len + sizeof(*extref); - } + ret = unlink_extrefs_not_in_log(trans, path, root, log_root, + &search_key, inode, + inode_objectid, parent_objectid); + if (ret == -EAGAIN) + goto again; + else if (ret) + return ret; } btrfs_release_path(path); @@ -1326,19 +1336,18 @@ again: ret = !!btrfs_find_name_in_backref(log_eb, log_slot, &name); if (!ret) { - struct inode *dir; + struct btrfs_inode *dir; btrfs_release_path(path); - dir = read_one_inode(root, parent_id); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_id, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); kfree(name.name); goto out; } - ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), - inode, &name); + ret = unlink_inode_for_log_replay(trans, dir, inode, &name); kfree(name.name); - iput(dir); + iput(&dir->vfs_inode); if (ret) goto out; goto again; @@ -1370,13 +1379,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, struct extent_buffer *eb, int slot, struct btrfs_key *key) { - struct inode *dir = NULL; - struct inode *inode = NULL; + struct btrfs_inode *dir = NULL; + struct btrfs_inode *inode = NULL; unsigned long ref_ptr; unsigned long ref_end; struct fscrypt_str name = { 0 }; int ret; - int log_ref_ver = 0; + const bool is_extref_item = (key->type == BTRFS_INODE_EXTREF_KEY); u64 parent_objectid; u64 inode_objectid; u64 ref_index = 0; @@ -1385,11 +1394,10 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ref_ptr = btrfs_item_ptr_offset(eb, slot); ref_end = ref_ptr + btrfs_item_size(eb, slot); - if (key->type == BTRFS_INODE_EXTREF_KEY) { + if (is_extref_item) { struct btrfs_inode_extref *r; ref_struct_size = sizeof(struct btrfs_inode_extref); - log_ref_ver = 1; r = (struct btrfs_inode_extref *)ref_ptr; parent_objectid = btrfs_inode_extref_parent(eb, r); } else { @@ -1404,40 +1412,64 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * copy the back ref in. The link count fixup code will take * care of the rest */ - dir = read_one_inode(root, parent_objectid); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + if (ret == -ENOENT) + ret = 0; + dir = NULL; goto out; } - inode = read_one_inode(root, inode_objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(inode_objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } while (ref_ptr < ref_end) { - if (log_ref_ver) { + if (is_extref_item) { ret = extref_get_fields(eb, ref_ptr, &name, &ref_index, &parent_objectid); + if (ret) + goto out; /* * parent object can change from one array * item to another. */ - if (!dir) - dir = read_one_inode(root, parent_objectid); if (!dir) { - ret = -ENOENT; - goto out; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; + /* + * A new parent dir may have not been + * logged and not exist in the subvolume + * tree, see the comment above before + * the loop when getting the first + * parent dir. + */ + if (ret == -ENOENT) { + /* + * The next extref may refer to + * another parent dir that + * exists, so continue. + */ + ret = 0; + goto next; + } + goto out; + } } } else { ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); + if (ret) + goto out; } - if (ret) - goto out; - ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), - btrfs_ino(BTRFS_I(inode)), ref_index, &name); + ret = inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), + ref_index, &name); if (ret < 0) { goto out; } else if (ret == 0) { @@ -1448,8 +1480,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * overwrite any existing back reference, and we don't * want to create dangling pointers in the directory. */ - ret = __add_inode_ref(trans, root, path, log, - BTRFS_I(dir), BTRFS_I(inode), + ret = __add_inode_ref(trans, root, path, log, dir, inode, inode_objectid, parent_objectid, ref_index, &name); if (ret) { @@ -1459,22 +1490,22 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, } /* insert our name */ - ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), - &name, 0, ref_index); + ret = btrfs_add_link(trans, dir, inode, &name, 0, ref_index); if (ret) goto out; - ret = btrfs_update_inode(trans, BTRFS_I(inode)); + ret = btrfs_update_inode(trans, inode); if (ret) goto out; } /* Else, ret == 1, we already have a perfect match, we're done. */ +next: ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + name.len; kfree(name.name); name.name = NULL; - if (log_ref_ver) { - iput(dir); + if (is_extref_item && dir) { + iput(&dir->vfs_inode); dir = NULL; } } @@ -1487,8 +1518,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * dir index entries exist for a name but there is no inode reference * item with the same name. */ - ret = unlink_old_inode_refs(trans, root, path, BTRFS_I(inode), eb, slot, - key); + ret = unlink_old_inode_refs(trans, root, path, inode, eb, slot, key); if (ret) goto out; @@ -1497,8 +1527,10 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, out: btrfs_release_path(path); kfree(name.name); - iput(dir); - iput(inode); + if (dir) + iput(&dir->vfs_inode); + if (inode) + iput(&inode->vfs_inode); return ret; } @@ -1612,25 +1644,25 @@ process_slot: * will free the inode. */ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, - struct inode *inode) + struct btrfs_inode *inode) { - struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root *root = inode->root; struct btrfs_path *path; int ret; u64 nlink = 0; - u64 ino = btrfs_ino(BTRFS_I(inode)); + const u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); if (!path) return -ENOMEM; - ret = count_inode_refs(BTRFS_I(inode), path); + ret = count_inode_refs(inode, path); if (ret < 0) goto out; nlink = ret; - ret = count_inode_extrefs(BTRFS_I(inode), path); + ret = count_inode_extrefs(inode, path); if (ret < 0) goto out; @@ -1638,19 +1670,18 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, ret = 0; - if (nlink != inode->i_nlink) { - set_nlink(inode, nlink); - ret = btrfs_update_inode(trans, BTRFS_I(inode)); + if (nlink != inode->vfs_inode.i_nlink) { + set_nlink(&inode->vfs_inode, nlink); + ret = btrfs_update_inode(trans, inode); if (ret) goto out; } - if (S_ISDIR(inode->i_mode)) - BTRFS_I(inode)->index_cnt = (u64)-1; + if (S_ISDIR(inode->vfs_inode.i_mode)) + inode->index_cnt = (u64)-1; - if (inode->i_nlink == 0) { - if (S_ISDIR(inode->i_mode)) { - ret = replay_dir_deletes(trans, root, NULL, path, - ino, 1); + if (inode->vfs_inode.i_nlink == 0) { + if (S_ISDIR(inode->vfs_inode.i_mode)) { + ret = replay_dir_deletes(trans, root, NULL, path, ino, true); if (ret) goto out; } @@ -1670,12 +1701,13 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, { int ret; struct btrfs_key key; - struct inode *inode; key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = (u64)-1; while (1) { + struct btrfs_inode *inode; + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) break; @@ -1697,14 +1729,14 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, break; btrfs_release_path(path); - inode = read_one_inode(root, key.offset); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.offset, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } ret = fixup_inode_link_count(trans, inode); - iput(inode); + iput(&inode->vfs_inode); if (ret) break; @@ -1732,12 +1764,14 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, { struct btrfs_key key; int ret = 0; - struct inode *inode; + struct btrfs_inode *inode; + struct inode *vfs_inode; - inode = read_one_inode(root, objectid); - if (!inode) - return -EIO; + inode = btrfs_iget_logging(objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); + vfs_inode = &inode->vfs_inode; key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = objectid; @@ -1746,15 +1780,15 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, btrfs_release_path(path); if (ret == 0) { - if (!inode->i_nlink) - set_nlink(inode, 1); + if (!vfs_inode->i_nlink) + set_nlink(vfs_inode, 1); else - inc_nlink(inode); - ret = btrfs_update_inode(trans, BTRFS_I(inode)); + inc_nlink(vfs_inode); + ret = btrfs_update_inode(trans, inode); } else if (ret == -EEXIST) { ret = 0; } - iput(inode); + iput(vfs_inode); return ret; } @@ -1770,27 +1804,26 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, const struct fscrypt_str *name, struct btrfs_key *location) { - struct inode *inode; - struct inode *dir; + struct btrfs_inode *inode; + struct btrfs_inode *dir; int ret; - inode = read_one_inode(root, location->objectid); - if (!inode) - return -ENOENT; + inode = btrfs_iget_logging(location->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); - dir = read_one_inode(root, dirid); - if (!dir) { - iput(inode); - return -EIO; + dir = btrfs_iget_logging(dirid, root); + if (IS_ERR(dir)) { + iput(&inode->vfs_inode); + return PTR_ERR(dir); } - ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name, - 1, index); + ret = btrfs_add_link(trans, dir, inode, name, 1, index); /* FIXME, put inode into FIXUP list */ - iput(inode); - iput(dir); + iput(&inode->vfs_inode); + iput(&dir->vfs_inode); return ret; } @@ -1852,16 +1885,16 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, bool index_dst_matches = false; struct btrfs_key log_key; struct btrfs_key search_key; - struct inode *dir; + struct btrfs_inode *dir; u8 log_flags; bool exists; int ret; bool update_size = true; bool name_added = false; - dir = read_one_inode(root, key->objectid); - if (!dir) - return -EIO; + dir = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(dir)) + return PTR_ERR(dir); ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); if (ret) @@ -1882,9 +1915,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, ret = PTR_ERR(dir_dst_di); goto out; } else if (dir_dst_di) { - ret = delete_conflicting_dir_entry(trans, BTRFS_I(dir), path, - dir_dst_di, &log_key, - log_flags, exists); + ret = delete_conflicting_dir_entry(trans, dir, path, dir_dst_di, + &log_key, log_flags, exists); if (ret < 0) goto out; dir_dst_matches = (ret == 1); @@ -1899,9 +1931,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, ret = PTR_ERR(index_dst_di); goto out; } else if (index_dst_di) { - ret = delete_conflicting_dir_entry(trans, BTRFS_I(dir), path, - index_dst_di, &log_key, - log_flags, exists); + ret = delete_conflicting_dir_entry(trans, dir, path, index_dst_di, + &log_key, log_flags, exists); if (ret < 0) goto out; index_dst_matches = (ret == 1); @@ -1956,11 +1987,11 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, out: if (!ret && update_size) { - btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name.len * 2); - ret = btrfs_update_inode(trans, BTRFS_I(dir)); + btrfs_i_size_write(dir, dir->vfs_inode.i_size + name.len * 2); + ret = btrfs_update_inode(trans, dir); } kfree(name.name); - iput(dir); + iput(&dir->vfs_inode); if (!ret && name_added) ret = 1; return ret; @@ -2117,16 +2148,16 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *log, struct btrfs_path *path, struct btrfs_path *log_path, - struct inode *dir, + struct btrfs_inode *dir, struct btrfs_key *dir_key) { - struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_root *root = dir->root; int ret; struct extent_buffer *eb; int slot; struct btrfs_dir_item *di; struct fscrypt_str name = { 0 }; - struct inode *inode = NULL; + struct btrfs_inode *inode = NULL; struct btrfs_key location; /* @@ -2163,9 +2194,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -2173,9 +2205,8 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, if (ret) goto out; - inc_nlink(inode); - ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(inode), - &name); + inc_nlink(&inode->vfs_inode); + ret = unlink_inode_for_log_replay(trans, dir, inode, &name); /* * Unlike dir item keys, dir index keys can only have one name (entry) in * them, as there are no key collisions since each key has a unique offset @@ -2185,7 +2216,8 @@ out: btrfs_release_path(path); btrfs_release_path(log_path); kfree(name.name); - iput(inode); + if (inode) + iput(&inode->vfs_inode); return ret; } @@ -2301,7 +2333,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_root *log, struct btrfs_path *path, - u64 dirid, int del_all) + u64 dirid, bool del_all) { u64 range_start; u64 range_end; @@ -2309,7 +2341,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, struct btrfs_key dir_key; struct btrfs_key found_key; struct btrfs_path *log_path; - struct inode *dir; + struct btrfs_inode *dir; dir_key.objectid = dirid; dir_key.type = BTRFS_DIR_INDEX_KEY; @@ -2317,14 +2349,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, if (!log_path) return -ENOMEM; - dir = read_one_inode(root, dirid); - /* it isn't an error if the inode isn't there, that can happen - * because we replay the deletes before we copy in the inode item - * from the log + dir = btrfs_iget_logging(dirid, root); + /* + * It isn't an error if the inode isn't there, that can happen because + * we replay the deletes before we copy in the inode item from the log. */ - if (!dir) { + if (IS_ERR(dir)) { btrfs_free_path(log_path); - return 0; + ret = PTR_ERR(dir); + if (ret == -ENOENT) + ret = 0; + return ret; } range_start = 0; @@ -2386,7 +2421,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, out: btrfs_release_path(path); btrfs_free_path(log_path); - iput(dir); + iput(&dir->vfs_inode); return ret; } @@ -2460,8 +2495,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, break; mode = btrfs_inode_mode(eb, inode_item); if (S_ISDIR(mode)) { - ret = replay_dir_deletes(wc->trans, - root, log, path, key.objectid, 0); + ret = replay_dir_deletes(wc->trans, root, log, path, + key.objectid, false); if (ret) break; } @@ -2480,30 +2515,28 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, */ if (S_ISREG(mode)) { struct btrfs_drop_extents_args drop_args = { 0 }; - struct inode *inode; + struct btrfs_inode *inode; u64 from; - inode = read_one_inode(root, key.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } - from = ALIGN(i_size_read(inode), + from = ALIGN(i_size_read(&inode->vfs_inode), root->fs_info->sectorsize); drop_args.start = from; drop_args.end = (u64)-1; drop_args.drop_cache = true; - ret = btrfs_drop_extents(wc->trans, root, - BTRFS_I(inode), + ret = btrfs_drop_extents(wc->trans, root, inode, &drop_args); if (!ret) { - inode_sub_bytes(inode, + inode_sub_bytes(&inode->vfs_inode, drop_args.bytes_found); /* Update the inode's nbytes. */ - ret = btrfs_update_inode(wc->trans, - BTRFS_I(inode)); + ret = btrfs_update_inode(wc->trans, inode); } - iput(inode); + iput(&inode->vfs_inode); if (ret) break; } @@ -2538,9 +2571,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, key.type == BTRFS_INODE_EXTREF_KEY) { ret = add_inode_ref(wc->trans, root, log, path, eb, i, &key); - if (ret && ret != -ENOENT) + if (ret) break; - ret = 0; } else if (key.type == BTRFS_EXTENT_DATA_KEY) { ret = replay_one_extent(wc->trans, root, path, eb, i, &key); @@ -2739,7 +2771,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, level = btrfs_header_level(log->node); orig_level = level; path->nodes[level] = log->node; - atomic_inc(&log->node->refs); + refcount_inc(&log->node->refs); path->slots[level] = 0; while (1) { @@ -2980,9 +3012,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } if (log_transid % 2 == 0) - mark = EXTENT_DIRTY; + mark = EXTENT_DIRTY_LOG1; else - mark = EXTENT_NEW; + mark = EXTENT_DIRTY_LOG2; /* we start IO on all the marked extents here, but we don't actually * wait for them until later. @@ -3113,7 +3145,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_write_marked_extents(fs_info, &log_root_tree->dirty_log_pages, - EXTENT_DIRTY | EXTENT_NEW); + EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2); blk_finish_plug(&plug); /* * As described above, -EAGAIN indicates a hole in the extents. We @@ -3133,7 +3165,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = btrfs_wait_tree_log_extents(log, mark); if (!ret) ret = btrfs_wait_tree_log_extents(log_root_tree, - EXTENT_NEW | EXTENT_DIRTY); + EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2); if (ret) { btrfs_set_log_full_commit(trans); mutex_unlock(&log_root_tree->log_mutex); @@ -3259,9 +3291,9 @@ static void free_log_tree(struct btrfs_trans_handle *trans, */ btrfs_write_marked_extents(log->fs_info, &log->dirty_log_pages, - EXTENT_DIRTY | EXTENT_NEW); + EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2); btrfs_wait_tree_log_extents(log, - EXTENT_DIRTY | EXTENT_NEW); + EXTENT_DIRTY_LOG1 | EXTENT_DIRTY_LOG2); if (trans) btrfs_abort_transaction(trans, ret); @@ -3270,8 +3302,8 @@ static void free_log_tree(struct btrfs_trans_handle *trans, } } - extent_io_tree_release(&log->dirty_log_pages); - extent_io_tree_release(&log->log_csum_range); + btrfs_extent_io_tree_release(&log->dirty_log_pages); + btrfs_extent_io_tree_release(&log->log_csum_range); btrfs_put_root(log); } @@ -3451,7 +3483,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans, * inode item because on log replay we update the field to reflect * all existing entries in the directory (see overwrite_item()). */ - return btrfs_delete_one_dir_name(trans, log, path, di); + return btrfs_del_item(trans, log, path); } /* @@ -3491,26 +3523,27 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, return; } - ret = join_running_log_trans(root); - if (ret) - return; - - mutex_lock(&dir->log_mutex); - path = btrfs_alloc_path(); if (!path) { - ret = -ENOMEM; - goto out_unlock; + btrfs_set_log_full_commit(trans); + return; } + ret = join_running_log_trans(root); + ASSERT(ret == 0, "join_running_log_trans() ret=%d", ret); + if (WARN_ON(ret)) + goto out; + + mutex_lock(&dir->log_mutex); + ret = del_logged_dentry(trans, root->log_root, path, btrfs_ino(dir), name, index); - btrfs_free_path(path); -out_unlock: mutex_unlock(&dir->log_mutex); if (ret < 0) btrfs_set_log_full_commit(trans); btrfs_end_log_trans(root); +out: + btrfs_free_path(path); } /* see comments for btrfs_del_dir_entries_in_log */ @@ -3520,7 +3553,6 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 dirid) { struct btrfs_root *log; - u64 index; int ret; ret = inode_logged(trans, inode, NULL); @@ -3532,13 +3564,13 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, } ret = join_running_log_trans(root); - if (ret) + ASSERT(ret == 0, "join_running_log_trans() ret=%d", ret); + if (WARN_ON(ret)) return; log = root->log_root; mutex_lock(&inode->log_mutex); - ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode), - dirid, &index); + ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode), dirid, NULL); mutex_unlock(&inode->log_mutex); if (ret < 0 && ret != -ENOENT) btrfs_set_log_full_commit(trans); @@ -3561,8 +3593,8 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, struct btrfs_dir_log_item *item; key.objectid = dirid; - key.offset = first_offset; key.type = BTRFS_DIR_LOG_INDEX_KEY; + key.offset = first_offset; ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); /* * -EEXIST is fine and can happen sporadically when we are logging a @@ -3588,7 +3620,6 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, last_offset = max(last_offset, curr_end); } btrfs_set_dir_log_end(path->nodes[0], item, last_offset); - btrfs_mark_buffer_dirty(trans, path->nodes[0]); btrfs_release_path(path); return 0; } @@ -3704,7 +3735,7 @@ static int clone_leaf(struct btrfs_path *path, struct btrfs_log_ctx *ctx) * Add extra ref to scratch eb so that it is not freed when callers * release the path, so we can reuse it later if needed. */ - atomic_inc(&ctx->scratch_eb->refs); + refcount_inc(&ctx->scratch_eb->refs); return 0; } @@ -4192,44 +4223,37 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct inode *inode, int log_inode_only, u64 logged_isize) { - struct btrfs_map_token token; u64 flags; - btrfs_init_map_token(&token, leaf); - if (log_inode_only) { /* set the generation to zero so the recover code * can tell the difference between an logging * just to say 'this inode exists' and a logging * to say 'update this inode with these values' */ - btrfs_set_token_inode_generation(&token, item, 0); - btrfs_set_token_inode_size(&token, item, logged_isize); + btrfs_set_inode_generation(leaf, item, 0); + btrfs_set_inode_size(leaf, item, logged_isize); } else { - btrfs_set_token_inode_generation(&token, item, - BTRFS_I(inode)->generation); - btrfs_set_token_inode_size(&token, item, inode->i_size); + btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); + btrfs_set_inode_size(leaf, item, inode->i_size); } - btrfs_set_token_inode_uid(&token, item, i_uid_read(inode)); - btrfs_set_token_inode_gid(&token, item, i_gid_read(inode)); - btrfs_set_token_inode_mode(&token, item, inode->i_mode); - btrfs_set_token_inode_nlink(&token, item, inode->i_nlink); + btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); + btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); + btrfs_set_inode_mode(leaf, item, inode->i_mode); + btrfs_set_inode_nlink(leaf, item, inode->i_nlink); + + btrfs_set_timespec_sec(leaf, &item->atime, inode_get_atime_sec(inode)); + btrfs_set_timespec_nsec(leaf, &item->atime, inode_get_atime_nsec(inode)); - btrfs_set_token_timespec_sec(&token, &item->atime, - inode_get_atime_sec(inode)); - btrfs_set_token_timespec_nsec(&token, &item->atime, - inode_get_atime_nsec(inode)); + btrfs_set_timespec_sec(leaf, &item->mtime, inode_get_mtime_sec(inode)); + btrfs_set_timespec_nsec(leaf, &item->mtime, inode_get_mtime_nsec(inode)); - btrfs_set_token_timespec_sec(&token, &item->mtime, - inode_get_mtime_sec(inode)); - btrfs_set_token_timespec_nsec(&token, &item->mtime, - inode_get_mtime_nsec(inode)); + btrfs_set_timespec_sec(leaf, &item->ctime, inode_get_ctime_sec(inode)); + btrfs_set_timespec_nsec(leaf, &item->ctime, inode_get_ctime_nsec(inode)); - btrfs_set_token_timespec_sec(&token, &item->ctime, - inode_get_ctime_sec(inode)); - btrfs_set_token_timespec_nsec(&token, &item->ctime, - inode_get_ctime_nsec(inode)); + btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec); + btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec); /* * We do not need to set the nbytes field, in fact during a fast fsync @@ -4240,13 +4264,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, * inode item in subvolume tree as needed (see overwrite_item()). */ - btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode)); - btrfs_set_token_inode_transid(&token, item, trans->transid); - btrfs_set_token_inode_rdev(&token, item, inode->i_rdev); + btrfs_set_inode_sequence(leaf, item, inode_peek_iversion(inode)); + btrfs_set_inode_transid(leaf, item, trans->transid); + btrfs_set_inode_rdev(leaf, item, inode->i_rdev); flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags, BTRFS_I(inode)->ro_flags); - btrfs_set_token_inode_flags(&token, item, flags); - btrfs_set_token_inode_block_group(&token, item, 0); + btrfs_set_inode_flags(leaf, item, flags); + btrfs_set_inode_block_group(leaf, item, 0); } static int log_inode_item(struct btrfs_trans_handle *trans, @@ -4320,8 +4344,8 @@ static int log_csums(struct btrfs_trans_handle *trans, * file which happens to refer to the same extent as well. Such races * can leave checksum items in the log with overlapping ranges. */ - ret = lock_extent(&log_root->log_csum_range, sums->logical, lock_end, - &cached_state); + ret = btrfs_lock_extent(&log_root->log_csum_range, sums->logical, lock_end, + &cached_state); if (ret) return ret; /* @@ -4337,8 +4361,8 @@ static int log_csums(struct btrfs_trans_handle *trans, if (!ret) ret = btrfs_csum_file_blocks(trans, log_root, sums); - unlock_extent(&log_root->log_csum_range, sums->logical, lock_end, - &cached_state); + btrfs_unlock_extent(&log_root->log_csum_range, sums->logical, lock_end, + &cached_state); return ret; } @@ -4566,7 +4590,6 @@ copy_item: dst_index++; } - btrfs_mark_buffer_dirty(trans, dst_path->nodes[0]); btrfs_release_path(dst_path); out: kfree(ins_data); @@ -4669,7 +4692,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, return 0; /* If we're compressed we have to save the entire range of csums. */ - if (extent_map_is_compressed(em)) { + if (btrfs_extent_map_is_compressed(em)) { csum_offset = 0; csum_len = em->disk_num_bytes; } else { @@ -4678,7 +4701,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, } /* block start is already adjusted for the file extent offset. */ - block_start = extent_map_block_start(em); + block_start = btrfs_extent_map_block_start(em); csum_root = btrfs_csum_root(trans->fs_info, block_start); ret = btrfs_lookup_csums_list(csum_root, block_start + csum_offset, block_start + csum_offset + csum_len - 1, @@ -4688,9 +4711,9 @@ static int log_extent_csums(struct btrfs_trans_handle *trans, ret = 0; while (!list_empty(&ordered_sums)) { - struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, - struct btrfs_ordered_sum, - list); + struct btrfs_ordered_sum *sums = list_first_entry(&ordered_sums, + struct btrfs_ordered_sum, + list); if (!ret) ret = log_csums(trans, inode, log_root, sums); list_del(&sums->list); @@ -4713,7 +4736,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, struct btrfs_key key; enum btrfs_compression_type compress_type; u64 extent_offset = em->offset; - u64 block_start = extent_map_block_start(em); + u64 block_start = btrfs_extent_map_block_start(em); u64 block_len; int ret; @@ -4724,7 +4747,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_REG); block_len = em->disk_num_bytes; - compress_type = extent_map_compression(em); + compress_type = btrfs_extent_map_compression(em); if (compress_type != BTRFS_COMPRESS_NONE) { btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start); btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len); @@ -4776,7 +4799,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, write_extent_buffer(leaf, &fi, btrfs_item_ptr_offset(leaf, path->slots[0]), sizeof(fi)); - btrfs_mark_buffer_dirty(trans, leaf); btrfs_release_path(path); @@ -4969,7 +4991,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, list_sort(NULL, &extents, extent_cmp); process: while (!list_empty(&extents)) { - em = list_entry(extents.next, struct extent_map, list); + em = list_first_entry(&extents, struct extent_map, list); list_del_init(&em->list); @@ -4978,8 +5000,8 @@ process: * private list. */ if (ret) { - clear_em_logging(inode, em); - free_extent_map(em); + btrfs_clear_em_logging(inode, em); + btrfs_free_extent_map(em); continue; } @@ -4987,8 +5009,8 @@ process: ret = log_one_extent(trans, inode, em, path, ctx); write_lock(&tree->lock); - clear_em_logging(inode, em); - free_extent_map(em); + btrfs_clear_em_logging(inode, em); + btrfs_free_extent_map(em); } WARN_ON(!list_empty(&extents)); write_unlock(&tree->lock); @@ -5485,7 +5507,6 @@ static int log_new_dir_dentries(struct btrfs_trans_handle *trans, ihold(&curr_inode->vfs_inode); while (true) { - struct inode *vfs_inode; struct btrfs_key key; struct btrfs_key found_key; u64 next_index; @@ -5501,7 +5522,7 @@ again: struct extent_buffer *leaf = path->nodes[0]; struct btrfs_dir_item *di; struct btrfs_key di_key; - struct inode *di_inode; + struct btrfs_inode *di_inode; int log_mode = LOG_INODE_EXISTS; int type; @@ -5528,17 +5549,16 @@ again: goto out; } - if (!need_log_inode(trans, BTRFS_I(di_inode))) { - btrfs_add_delayed_iput(BTRFS_I(di_inode)); + if (!need_log_inode(trans, di_inode)) { + btrfs_add_delayed_iput(di_inode); break; } ctx->log_new_dentries = false; if (type == BTRFS_FT_DIR) log_mode = LOG_INODE_ALL; - ret = btrfs_log_inode(trans, BTRFS_I(di_inode), - log_mode, ctx); - btrfs_add_delayed_iput(BTRFS_I(di_inode)); + ret = btrfs_log_inode(trans, di_inode, log_mode, ctx); + btrfs_add_delayed_iput(di_inode); if (ret) goto out; if (ctx->log_new_dentries) { @@ -5580,14 +5600,13 @@ again: kfree(dir_elem); btrfs_add_delayed_iput(curr_inode); - curr_inode = NULL; - vfs_inode = btrfs_iget_logging(ino, root); - if (IS_ERR(vfs_inode)) { - ret = PTR_ERR(vfs_inode); + curr_inode = btrfs_iget_logging(ino, root); + if (IS_ERR(curr_inode)) { + ret = PTR_ERR(curr_inode); + curr_inode = NULL; break; } - curr_inode = BTRFS_I(vfs_inode); } out: btrfs_free_path(path); @@ -5665,7 +5684,7 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx) { struct btrfs_ino_list *ino_elem; - struct inode *inode; + struct btrfs_inode *inode; /* * It's rare to have a lot of conflicting inodes, in practice it is not @@ -5756,12 +5775,12 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, * inode in LOG_INODE_EXISTS mode and rename operations update the log, * so that the log ends up with the new name and without the old name. */ - if (!need_log_inode(trans, BTRFS_I(inode))) { - btrfs_add_delayed_iput(BTRFS_I(inode)); + if (!need_log_inode(trans, inode)) { + btrfs_add_delayed_iput(inode); return 0; } - btrfs_add_delayed_iput(BTRFS_I(inode)); + btrfs_add_delayed_iput(inode); ino_elem = kmalloc(sizeof(*ino_elem), GFP_NOFS); if (!ino_elem) @@ -5797,7 +5816,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, */ while (!list_empty(&ctx->conflict_inodes)) { struct btrfs_ino_list *curr; - struct inode *inode; + struct btrfs_inode *inode; u64 ino; u64 parent; @@ -5833,9 +5852,8 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, * dir index key range logged for the directory. So we * must make sure the deletion is recorded. */ - ret = btrfs_log_inode(trans, BTRFS_I(inode), - LOG_INODE_ALL, ctx); - btrfs_add_delayed_iput(BTRFS_I(inode)); + ret = btrfs_log_inode(trans, inode, LOG_INODE_ALL, ctx); + btrfs_add_delayed_iput(inode); if (ret) break; continue; @@ -5851,8 +5869,8 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, * it again because if some other task logged the inode after * that, we can avoid doing it again. */ - if (!need_log_inode(trans, BTRFS_I(inode))) { - btrfs_add_delayed_iput(BTRFS_I(inode)); + if (!need_log_inode(trans, inode)) { + btrfs_add_delayed_iput(inode); continue; } @@ -5863,8 +5881,8 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, * well because during a rename we pin the log and update the * log with the new name before we unpin it. */ - ret = btrfs_log_inode(trans, BTRFS_I(inode), LOG_INODE_EXISTS, ctx); - btrfs_add_delayed_iput(BTRFS_I(inode)); + ret = btrfs_log_inode(trans, inode, LOG_INODE_EXISTS, ctx); + btrfs_add_delayed_iput(inode); if (ret) break; } @@ -6355,7 +6373,7 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, list_for_each_entry(item, delayed_ins_list, log_list) { struct btrfs_dir_item *dir_item; - struct inode *di_inode; + struct btrfs_inode *di_inode; struct btrfs_key key; int log_mode = LOG_INODE_EXISTS; @@ -6371,8 +6389,8 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, break; } - if (!need_log_inode(trans, BTRFS_I(di_inode))) { - btrfs_add_delayed_iput(BTRFS_I(di_inode)); + if (!need_log_inode(trans, di_inode)) { + btrfs_add_delayed_iput(di_inode); continue; } @@ -6380,12 +6398,12 @@ static int log_new_delayed_dentries(struct btrfs_trans_handle *trans, log_mode = LOG_INODE_ALL; ctx->log_new_dentries = false; - ret = btrfs_log_inode(trans, BTRFS_I(di_inode), log_mode, ctx); + ret = btrfs_log_inode(trans, di_inode, log_mode, ctx); if (!ret && ctx->log_new_dentries) - ret = log_new_dir_dentries(trans, BTRFS_I(di_inode), ctx); + ret = log_new_dir_dentries(trans, di_inode, ctx); - btrfs_add_delayed_iput(BTRFS_I(di_inode)); + btrfs_add_delayed_iput(di_inode); if (ret) break; @@ -6609,6 +6627,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, btrfs_log_get_delayed_items(inode, &delayed_ins_list, &delayed_del_list); + /* + * If we are fsyncing a file with 0 hard links, then commit the delayed + * inode because the last inode ref (or extref) item may still be in the + * subvolume tree and if we log it the file will still exist after a log + * replay. So commit the delayed inode to delete that last ref and we + * skip logging it. + */ + if (inode->vfs_inode.i_nlink == 0) { + ret = btrfs_commit_inode_delayed_inode(inode); + if (ret) + goto out_unlock; + } + ret = copy_inode_items_to_log(trans, inode, &min_key, &max_key, path, dst_path, logged_isize, inode_only, ctx, @@ -6793,7 +6824,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, ptr = btrfs_item_ptr_offset(leaf, slot); while (cur_offset < item_size) { struct btrfs_key inode_key; - struct inode *dir_inode; + struct btrfs_inode *dir_inode; inode_key.type = BTRFS_INODE_ITEM_KEY; inode_key.offset = 0; @@ -6842,18 +6873,16 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, goto out; } - if (!need_log_inode(trans, BTRFS_I(dir_inode))) { - btrfs_add_delayed_iput(BTRFS_I(dir_inode)); + if (!need_log_inode(trans, dir_inode)) { + btrfs_add_delayed_iput(dir_inode); continue; } ctx->log_new_dentries = false; - ret = btrfs_log_inode(trans, BTRFS_I(dir_inode), - LOG_INODE_ALL, ctx); + ret = btrfs_log_inode(trans, dir_inode, LOG_INODE_ALL, ctx); if (!ret && ctx->log_new_dentries) - ret = log_new_dir_dentries(trans, - BTRFS_I(dir_inode), ctx); - btrfs_add_delayed_iput(BTRFS_I(dir_inode)); + ret = log_new_dir_dentries(trans, dir_inode, ctx); + btrfs_add_delayed_iput(dir_inode); if (ret) goto out; } @@ -6878,7 +6907,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; int slot; struct btrfs_key search_key; - struct inode *inode; + struct btrfs_inode *inode; u64 ino; int ret = 0; @@ -6893,11 +6922,10 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans, if (IS_ERR(inode)) return PTR_ERR(inode); - if (BTRFS_I(inode)->generation >= trans->transid && - need_log_inode(trans, BTRFS_I(inode))) - ret = btrfs_log_inode(trans, BTRFS_I(inode), - LOG_INODE_EXISTS, ctx); - btrfs_add_delayed_iput(BTRFS_I(inode)); + if (inode->generation >= trans->transid && + need_log_inode(trans, inode)) + ret = btrfs_log_inode(trans, inode, LOG_INODE_EXISTS, ctx); + btrfs_add_delayed_iput(inode); if (ret) return ret; @@ -7065,42 +7093,29 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; - bool log_dentries = false; + bool log_dentries; - if (btrfs_test_opt(fs_info, NOTREELOG)) { - ret = BTRFS_LOG_FORCE_COMMIT; - goto end_no_trans; - } + if (btrfs_test_opt(fs_info, NOTREELOG)) + return BTRFS_LOG_FORCE_COMMIT; - if (btrfs_root_refs(&root->root_item) == 0) { - ret = BTRFS_LOG_FORCE_COMMIT; - goto end_no_trans; - } + if (btrfs_root_refs(&root->root_item) == 0) + return BTRFS_LOG_FORCE_COMMIT; /* * If we're logging an inode from a subvolume created in the current * transaction we must force a commit since the root is not persisted. */ - if (btrfs_root_generation(&root->root_item) == trans->transid) { - ret = BTRFS_LOG_FORCE_COMMIT; - goto end_no_trans; - } + if (btrfs_root_generation(&root->root_item) == trans->transid) + return BTRFS_LOG_FORCE_COMMIT; - /* - * Skip already logged inodes or inodes corresponding to tmpfiles - * (since logging them is pointless, a link count of 0 means they - * will never be accessible). - */ - if ((btrfs_inode_in_log(inode, trans->transid) && - list_empty(&ctx->ordered_extents)) || - inode->vfs_inode.i_nlink == 0) { - ret = BTRFS_NO_LOG_SYNC; - goto end_no_trans; - } + /* Skip already logged inodes and without new extents. */ + if (btrfs_inode_in_log(inode, trans->transid) && + list_empty(&ctx->ordered_extents)) + return BTRFS_NO_LOG_SYNC; ret = start_log_trans(trans, root, ctx); if (ret) - goto end_no_trans; + return ret; ret = btrfs_log_inode(trans, inode, inode_only, ctx); if (ret) @@ -7119,8 +7134,11 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, goto end_trans; } - if (S_ISDIR(inode->vfs_inode.i_mode) && ctx->log_new_dentries) - log_dentries = true; + /* + * Track if we need to log dentries because ctx->log_new_dentries can + * be modified in the call chains below. + */ + log_dentries = ctx->log_new_dentries; /* * On unlink we must make sure all our current and old parent directory @@ -7175,8 +7193,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (log_dentries) ret = log_new_dir_dentries(trans, inode, ctx); - else - ret = 0; end_trans: if (ret < 0) { btrfs_set_log_full_commit(trans); @@ -7186,7 +7202,7 @@ end_trans: if (ret) btrfs_remove_log_ctx(root, ctx); btrfs_end_log_trans(root); -end_no_trans: + return ret; } @@ -7220,8 +7236,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) struct btrfs_path *path; struct btrfs_trans_handle *trans; struct btrfs_key key; - struct btrfs_key found_key; - struct btrfs_root *log; struct btrfs_fs_info *fs_info = log_root_tree->fs_info; struct walk_control wc = { .process_func = process_one_buffer, @@ -7251,10 +7265,13 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) again: key.objectid = BTRFS_TREE_LOG_OBJECTID; - key.offset = (u64)-1; key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; while (1) { + struct btrfs_root *log; + struct btrfs_key found_key; + ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0); if (ret < 0) { @@ -7283,6 +7300,12 @@ again: true); if (IS_ERR(wc.replay_dest)) { ret = PTR_ERR(wc.replay_dest); + wc.replay_dest = NULL; + if (ret != -ENOENT) { + btrfs_put_root(log); + btrfs_abort_transaction(trans, ret); + goto error; + } /* * We didn't find the subvol, likely because it was @@ -7295,36 +7318,36 @@ again: * block from being modified, and we'll just bail for * each subsequent pass. */ - if (ret == -ENOENT) - ret = btrfs_pin_extent_for_log_replay(trans, log->node); - btrfs_put_root(log); - - if (!ret) - goto next; - btrfs_abort_transaction(trans, ret); - goto error; + ret = btrfs_pin_extent_for_log_replay(trans, log->node); + if (ret) { + btrfs_put_root(log); + btrfs_abort_transaction(trans, ret); + goto error; + } + goto next; } wc.replay_dest->log_root = log; ret = btrfs_record_root_in_trans(trans, wc.replay_dest); - if (ret) - /* The loop needs to continue due to the root refs */ + if (ret) { btrfs_abort_transaction(trans, ret); - else - ret = walk_log_tree(trans, log, &wc); + goto next; + } - if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { - ret = fixup_inode_link_counts(trans, wc.replay_dest, - path); - if (ret) - btrfs_abort_transaction(trans, ret); + ret = walk_log_tree(trans, log, &wc); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto next; } - if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { + if (wc.stage == LOG_WALK_REPLAY_ALL) { struct btrfs_root *root = wc.replay_dest; - btrfs_release_path(path); - + ret = fixup_inode_link_counts(trans, wc.replay_dest, path); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto next; + } /* * We have just replayed everything, and the highest * objectid of fs roots probably has changed in case @@ -7334,17 +7357,20 @@ again: * could only happen during mount. */ ret = btrfs_init_root_free_objectid(root); - if (ret) + if (ret) { btrfs_abort_transaction(trans, ret); + goto next; + } + } +next: + if (wc.replay_dest) { + wc.replay_dest->log_root = NULL; + btrfs_put_root(wc.replay_dest); } - - wc.replay_dest->log_root = NULL; - btrfs_put_root(wc.replay_dest); btrfs_put_root(log); if (ret) goto error; -next: if (found_key.offset == 0) break; key.offset = found_key.offset - 1; @@ -7475,6 +7501,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, * full log sync. * Also we don't need to worry with renames, since btrfs_rename() marks the log * for full commit when renaming a subvolume. + * + * Must be called before creating the subvolume entry in its parent directory. */ void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans, struct btrfs_inode *dir) @@ -7511,6 +7539,9 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, bool log_pinned = false; int ret; + btrfs_init_log_ctx(&ctx, inode); + ctx.logging_new_name = true; + /* * this will force the logging code to walk the dentry chain * up for the file @@ -7542,6 +7573,13 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, ret = 0; /* + * Now that we know we need to update the log, allocate the scratch eb + * for the context before joining a log transaction below, as this can + * take time and therefore we could delay log commits from other tasks. + */ + btrfs_init_log_ctx_scratch_eb(&ctx); + + /* * If we are doing a rename (old_dir is not NULL) from a directory that * was previously logged, make sure that on log replay we get the old * dir entry deleted. This is needed because we will also log the new @@ -7559,6 +7597,14 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, &old_dentry->d_name, 0, &fname); if (ret) goto out; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + fscrypt_free_filename(&fname); + goto out; + } + /* * We have two inodes to update in the log, the old directory and * the inode that got renamed, so we must pin the log to prevent @@ -7572,19 +7618,13 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * mark the log for a full commit. */ if (WARN_ON_ONCE(ret < 0)) { + btrfs_free_path(path); fscrypt_free_filename(&fname); goto out; } log_pinned = true; - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - fscrypt_free_filename(&fname); - goto out; - } - /* * Other concurrent task might be logging the old directory, * as it can be triggered when logging other inode that had or @@ -7616,9 +7656,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, goto out; } - btrfs_init_log_ctx(&ctx, inode); - ctx.logging_new_name = true; - btrfs_init_log_ctx_scratch_eb(&ctx); /* * We don't care about the return value. If we fail to log the new name * then we know the next attempt to sync the log will fallback to a full @@ -7627,7 +7664,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * inconsistent state after a rename operation. */ btrfs_log_inode_parent(trans, inode, parent, LOG_INODE_EXISTS, &ctx); - free_extent_buffer(ctx.scratch_eb); ASSERT(list_empty(&ctx.conflict_inodes)); out: /* @@ -7640,5 +7676,6 @@ out: btrfs_set_log_full_commit(trans); if (log_pinned) btrfs_end_log_trans(root); + free_extent_buffer(ctx.scratch_eb); } |