diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 270 |
1 files changed, 91 insertions, 179 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f8220ec02036..1650dc44a5e3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -545,12 +545,8 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); - if (IS_ERR(inode)) { + if (IS_ERR(inode)) inode = NULL; - } else if (is_bad_inode(inode)) { - iput(inode); - inode = NULL; - } return inode; } @@ -597,7 +593,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, if (btrfs_file_extent_disk_bytenr(eb, item) == 0) nbytes = 0; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - size = btrfs_file_extent_inline_len(eb, slot, item); + size = btrfs_file_extent_ram_bytes(eb, item); nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = ALIGN(start + size, fs_info->sectorsize); @@ -685,7 +681,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * as the owner of the file extent changed from log tree * (doesn't affect qgroup) to fs/file tree(affects qgroup) */ - ret = btrfs_qgroup_trace_extent(trans, fs_info, + ret = btrfs_qgroup_trace_extent(trans, btrfs_file_extent_disk_bytenr(eb, item), btrfs_file_extent_disk_num_bytes(eb, item), GFP_NOFS); @@ -715,7 +711,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * allocation tree */ ret = btrfs_alloc_logged_file_extent(trans, - fs_info, root->root_key.objectid, key->objectid, offset, &ins); if (ret) @@ -1291,6 +1286,46 @@ again: return ret; } +static int btrfs_inode_ref_exists(struct inode *inode, struct inode *dir, + const u8 ref_type, const char *name, + const int namelen) +{ + struct btrfs_key key; + struct btrfs_path *path; + const u64 parent_id = btrfs_ino(BTRFS_I(dir)); + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = btrfs_ino(BTRFS_I(inode)); + key.type = ref_type; + if (key.type == BTRFS_INODE_REF_KEY) + key.offset = parent_id; + else + key.offset = btrfs_extref_hash(parent_id, name, namelen); + + ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &key, path, 0, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + goto out; + } + if (key.type == BTRFS_INODE_EXTREF_KEY) + ret = btrfs_find_name_in_ext_backref(path->nodes[0], + path->slots[0], parent_id, + name, namelen, NULL); + else + ret = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], + name, namelen, NULL); + +out: + btrfs_free_path(path); + return ret; +} + /* * replay one inode back reference item found in the log tree. * eb, slot and key refer to the buffer and key found in the log tree. @@ -1400,6 +1435,32 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, } } + /* + * If a reference item already exists for this inode + * with the same parent and name, but different index, + * drop it and the corresponding directory index entries + * from the parent before adding the new reference item + * and dir index entries, otherwise we would fail with + * -EEXIST returned from btrfs_add_link() below. + */ + ret = btrfs_inode_ref_exists(inode, dir, key->type, + name, namelen); + if (ret > 0) { + ret = btrfs_unlink_inode(trans, root, + BTRFS_I(dir), + BTRFS_I(inode), + name, namelen); + /* + * If we dropped the link count to 0, bump it so + * that later the iput() on the inode will not + * free it. We will fixup the link count later. + */ + if (!ret && inode->i_nlink == 0) + inc_nlink(inode); + } + if (ret < 0) + goto out; + /* insert our name */ ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), @@ -2120,7 +2181,7 @@ again: dir_key->offset, name, name_len, 0); } - if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) { + if (!log_di || log_di == ERR_PTR(-ENOENT)) { btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); @@ -2933,7 +2994,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, /* bail out if we need to do a full commit */ if (btrfs_need_log_full_commit(fs_info, trans)) { ret = -EAGAIN; - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&root->log_mutex); goto out; } @@ -2951,7 +3011,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (ret) { blk_finish_plug(&plug); btrfs_abort_transaction(trans, ret); - btrfs_free_logged_extents(log, log_transid); btrfs_set_log_full_commit(fs_info, trans); mutex_unlock(&root->log_mutex); goto out; @@ -3002,7 +3061,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out; } btrfs_wait_tree_log_extents(log, mark); - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = -EAGAIN; goto out; @@ -3020,7 +3078,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (atomic_read(&log_root_tree->log_commit[index2])) { blk_finish_plug(&plug); ret = btrfs_wait_tree_log_extents(log, mark); - btrfs_wait_logged_extents(trans, log, log_transid); wait_log_commit(log_root_tree, root_log_ctx.log_transid); mutex_unlock(&log_root_tree->log_mutex); @@ -3045,7 +3102,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (btrfs_need_log_full_commit(fs_info, trans)) { blk_finish_plug(&plug); btrfs_wait_tree_log_extents(log, mark); - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); ret = -EAGAIN; goto out_wake_log_root; @@ -3058,7 +3114,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, if (ret) { btrfs_set_log_full_commit(fs_info, trans); btrfs_abort_transaction(trans, ret); - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } @@ -3068,11 +3123,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, EXTENT_NEW | EXTENT_DIRTY); if (ret) { btrfs_set_log_full_commit(fs_info, trans); - btrfs_free_logged_extents(log, log_transid); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } - btrfs_wait_logged_extents(trans, log, log_transid); btrfs_set_super_log_root(fs_info->super_for_commit, log_root_tree->node->start); @@ -3159,14 +3212,6 @@ static void free_log_tree(struct btrfs_trans_handle *trans, EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT); } - /* - * We may have short-circuited the log tree with the full commit logic - * and left ordered extents on our list, so clear these out to keep us - * from leaking inodes and memory. - */ - btrfs_free_logged_extents(log, 0); - btrfs_free_logged_extents(log, 1); - free_extent_buffer(log->node); kfree(log); } @@ -3756,7 +3801,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, int start_slot, int nr, int inode_only, u64 logged_isize) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = trans->fs_info; unsigned long src_offset; unsigned long dst_offset; struct btrfs_root *log = inode->root->log_root; @@ -3937,9 +3982,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item); if (btrfs_file_extent_type(src, extent) == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(src, - src_path->slots[0], - extent); + len = btrfs_file_extent_ram_bytes(src, extent); *last_extent = ALIGN(key.offset + len, fs_info->sectorsize); } else { @@ -4004,7 +4047,7 @@ fill_holes: extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); if (btrfs_file_extent_type(src, extent) == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(src, i, extent); + len = btrfs_file_extent_ram_bytes(src, extent); extent_end = ALIGN(key.offset + len, fs_info->sectorsize); } else { @@ -4078,131 +4121,32 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) return 0; } -static int wait_ordered_extents(struct btrfs_trans_handle *trans, - struct inode *inode, - struct btrfs_root *root, - const struct extent_map *em, - const struct list_head *logged_list, - bool *ordered_io_error) +static int log_extent_csums(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_root *log_root, + const struct extent_map *em) { - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_ordered_extent *ordered; - struct btrfs_root *log = root->log_root; - u64 mod_start = em->mod_start; - u64 mod_len = em->mod_len; - const bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; u64 csum_offset; u64 csum_len; LIST_HEAD(ordered_sums); int ret = 0; - *ordered_io_error = false; - - if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || + if (inode->flags & BTRFS_INODE_NODATASUM || + test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || em->block_start == EXTENT_MAP_HOLE) return 0; - /* - * Wait far any ordered extent that covers our extent map. If it - * finishes without an error, first check and see if our csums are on - * our outstanding ordered extents. - */ - list_for_each_entry(ordered, logged_list, log_list) { - struct btrfs_ordered_sum *sum; - - if (!mod_len) - break; - - if (ordered->file_offset + ordered->len <= mod_start || - mod_start + mod_len <= ordered->file_offset) - continue; - - if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && - !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) && - !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { - const u64 start = ordered->file_offset; - const u64 end = ordered->file_offset + ordered->len - 1; - - WARN_ON(ordered->inode != inode); - filemap_fdatawrite_range(inode->i_mapping, start, end); - } - - wait_event(ordered->wait, - (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) || - test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))); - - if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) { - /* - * Clear the AS_EIO/AS_ENOSPC flags from the inode's - * i_mapping flags, so that the next fsync won't get - * an outdated io error too. - */ - filemap_check_errors(inode->i_mapping); - *ordered_io_error = true; - break; - } - /* - * We are going to copy all the csums on this ordered extent, so - * go ahead and adjust mod_start and mod_len in case this - * ordered extent has already been logged. - */ - if (ordered->file_offset > mod_start) { - if (ordered->file_offset + ordered->len >= - mod_start + mod_len) - mod_len = ordered->file_offset - mod_start; - /* - * If we have this case - * - * |--------- logged extent ---------| - * |----- ordered extent ----| - * - * Just don't mess with mod_start and mod_len, we'll - * just end up logging more csums than we need and it - * will be ok. - */ - } else { - if (ordered->file_offset + ordered->len < - mod_start + mod_len) { - mod_len = (mod_start + mod_len) - - (ordered->file_offset + ordered->len); - mod_start = ordered->file_offset + - ordered->len; - } else { - mod_len = 0; - } - } - - if (skip_csum) - continue; - - /* - * To keep us from looping for the above case of an ordered - * extent that falls inside of the logged extent. - */ - if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, - &ordered->flags)) - continue; - - list_for_each_entry(sum, &ordered->list, list) { - ret = btrfs_csum_file_blocks(trans, log, sum); - if (ret) - break; - } - } - - if (*ordered_io_error || !mod_len || ret || skip_csum) - return ret; - + /* If we're compressed we have to save the entire range of csums. */ if (em->compress_type) { csum_offset = 0; csum_len = max(em->block_len, em->orig_block_len); } else { - csum_offset = mod_start - em->start; - csum_len = mod_len; + csum_offset = em->mod_start - em->start; + csum_len = em->mod_len; } /* block start is already adjusted for the file extent offset. */ - ret = btrfs_lookup_csums_range(fs_info->csum_root, + ret = btrfs_lookup_csums_range(trans->fs_info->csum_root, em->block_start + csum_offset, em->block_start + csum_offset + csum_len - 1, &ordered_sums, 0); @@ -4214,7 +4158,7 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum, list); if (!ret) - ret = btrfs_csum_file_blocks(trans, log, sums); + ret = btrfs_csum_file_blocks(trans, log_root, sums); list_del(&sums->list); kfree(sums); } @@ -4226,7 +4170,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_root *root, const struct extent_map *em, struct btrfs_path *path, - const struct list_head *logged_list, struct btrfs_log_ctx *ctx) { struct btrfs_root *log = root->log_root; @@ -4238,18 +4181,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans, u64 block_len; int ret; int extent_inserted = 0; - bool ordered_io_err = false; - ret = wait_ordered_extents(trans, &inode->vfs_inode, root, em, - logged_list, &ordered_io_err); + ret = log_extent_csums(trans, inode, log, em); if (ret) return ret; - if (ordered_io_err) { - ctx->io_err = -EIO; - return ctx->io_err; - } - btrfs_init_map_token(&token); ret = __btrfs_drop_extents(trans, log, &inode->vfs_inode, path, em->start, @@ -4424,7 +4360,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, struct btrfs_path *path, - struct list_head *logged_list, struct btrfs_log_ctx *ctx, const u64 start, const u64 end) @@ -4480,20 +4415,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, } list_sort(NULL, &extents, extent_cmp); - btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); - /* - * Some ordered extents started by fsync might have completed - * before we could collect them into the list logged_list, which - * means they're gone, not in our logged_list nor in the inode's - * ordered tree. We want the application/user space to know an - * error happened while attempting to persist file data so that - * it can take proper action. If such error happened, we leave - * without writing to the log tree and the fsync must report the - * file data write error and not commit the current transaction. - */ - ret = filemap_check_errors(inode->vfs_inode.i_mapping); - if (ret) - ctx->io_err = ret; process: while (!list_empty(&extents)) { em = list_entry(extents.next, struct extent_map, list); @@ -4512,8 +4433,7 @@ process: write_unlock(&tree->lock); - ret = log_one_extent(trans, inode, root, em, path, logged_list, - ctx); + ret = log_one_extent(trans, inode, root, em, path, ctx); write_lock(&tree->lock); clear_em_logging(tree, em); free_extent_map(em); @@ -4712,9 +4632,7 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(leaf, - path->slots[0], - extent); + len = btrfs_file_extent_ram_bytes(leaf, extent); ASSERT(len == i_size || (len == fs_info->sectorsize && btrfs_file_extent_compression(leaf, extent) != @@ -4898,7 +4816,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_key min_key; struct btrfs_key max_key; struct btrfs_root *log = root->log_root; - LIST_HEAD(logged_list); u64 last_extent = 0; int err = 0; int ret; @@ -5094,8 +5011,7 @@ again: * we don't need to do more work nor fallback to * a transaction commit. */ - if (IS_ERR(other_inode) && - PTR_ERR(other_inode) == -ENOENT) { + if (other_inode == ERR_PTR(-ENOENT)) { goto next_key; } else if (IS_ERR(other_inode)) { err = PTR_ERR(other_inode); @@ -5235,7 +5151,7 @@ log_extents: } if (fast_search) { ret = btrfs_log_changed_extents(trans, root, inode, dst_path, - &logged_list, ctx, start, end); + ctx, start, end); if (ret) { err = ret; goto out_unlock; @@ -5286,10 +5202,6 @@ log_extents: inode->last_log_commit = inode->last_sub_trans; spin_unlock(&inode->lock); out_unlock: - if (unlikely(err)) - btrfs_put_logged_extents(&logged_list); - else - btrfs_submit_logged_extents(&logged_list, log); mutex_unlock(&inode->log_mutex); btrfs_free_path(path); @@ -5585,7 +5497,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_log_ctx *ctx) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = trans->fs_info; int ret; struct btrfs_path *path; struct btrfs_key key; @@ -6120,7 +6032,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_inode *old_dir, struct dentry *parent) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); + struct btrfs_fs_info *fs_info = trans->fs_info; /* * this will force the logging code to walk the dentry chain |