diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/block-group.c | 59 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 9 | ||||
-rw-r--r-- | fs/btrfs/free-space-tree.c | 17 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 38 | ||||
-rw-r--r-- | fs/btrfs/send.c | 359 | ||||
-rw-r--r-- | fs/btrfs/subpage.c | 19 | ||||
-rw-r--r-- | fs/btrfs/super.c | 13 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 1 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 13 |
9 files changed, 321 insertions, 207 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 7eef79ece5b3..83a196521670 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1482,6 +1482,32 @@ out: } /* + * Link the block_group to a list via bg_list. + * + * @bg: The block_group to link to the list. + * @list: The list to link it to. + * + * Use this rather than list_add_tail() directly to ensure proper respect + * to locking and refcounting. + * + * Returns: true if the bg was linked with a refcount bump and false otherwise. + */ +static bool btrfs_link_bg_list(struct btrfs_block_group *bg, struct list_head *list) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + bool added = false; + + spin_lock(&fs_info->unused_bgs_lock); + if (list_empty(&bg->bg_list)) { + btrfs_get_block_group(bg); + list_add_tail(&bg->bg_list, list); + added = true; + } + spin_unlock(&fs_info->unused_bgs_lock); + return added; +} + +/* * Process the unused_bgs list and remove any that don't have any allocated * space inside of them. */ @@ -1597,8 +1623,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * drop under the "next" label for the * fs_info->unused_bgs list. */ - btrfs_get_block_group(block_group); - list_add_tail(&block_group->bg_list, &retry_list); + btrfs_link_bg_list(block_group, &retry_list); trace_btrfs_skip_unused_block_group(block_group); spin_unlock(&block_group->lock); @@ -1621,8 +1646,10 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) ret = btrfs_zone_finish(block_group); if (ret < 0) { btrfs_dec_block_group_ro(block_group); - if (ret == -EAGAIN) + if (ret == -EAGAIN) { + btrfs_link_bg_list(block_group, &retry_list); ret = 0; + } goto next; } @@ -1971,20 +1998,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) spin_unlock(&space_info->lock); next: - if (ret && !READ_ONCE(space_info->periodic_reclaim)) { - /* Refcount held by the reclaim_bgs list after splice. */ - spin_lock(&fs_info->unused_bgs_lock); - /* - * This block group might be added to the unused list - * during the above process. Move it back to the - * reclaim list otherwise. - */ - if (list_empty(&bg->bg_list)) { - btrfs_get_block_group(bg); - list_add_tail(&bg->bg_list, &retry_list); - } - spin_unlock(&fs_info->unused_bgs_lock); - } + if (ret && !READ_ONCE(space_info->periodic_reclaim)) + btrfs_link_bg_list(bg, &retry_list); btrfs_put_block_group(bg); mutex_unlock(&fs_info->reclaim_bgs_lock); @@ -2024,13 +2039,8 @@ void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; - spin_lock(&fs_info->unused_bgs_lock); - if (list_empty(&bg->bg_list)) { - btrfs_get_block_group(bg); + if (btrfs_link_bg_list(bg, &fs_info->reclaim_bgs)) trace_btrfs_add_reclaim_block_group(bg); - list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs); - } - spin_unlock(&fs_info->unused_bgs_lock); } static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key *key, @@ -2807,6 +2817,7 @@ next: spin_lock(&fs_info->unused_bgs_lock); list_del_init(&block_group->bg_list); clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags); + btrfs_put_block_group(block_group); spin_unlock(&fs_info->unused_bgs_lock); /* @@ -2945,7 +2956,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran } #endif - list_add_tail(&cache->bg_list, &trans->new_bgs); + btrfs_link_bg_list(cache, &trans->new_bgs); btrfs_inc_delayed_refs_rsv_bg_inserts(fs_info); set_avail_alloc_bits(fs_info, type); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3ba15d9c3e88..81735d19feff 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -350,7 +350,14 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); - WARN_ON(btrfs_header_generation(buf) > trans->transid); + if (unlikely(btrfs_header_generation(buf) > trans->transid)) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); + ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); + return ret; + } + if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) ret = btrfs_inc_ref(trans, root, cow, 1); else diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 308abbf8855b..51f286d5d00a 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1379,12 +1379,17 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans, clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags); ret = add_new_free_space_info(trans, block_group, path); - if (ret) + if (ret) { + btrfs_abort_transaction(trans, ret); return ret; + } + + ret = __add_to_free_space_tree(trans, block_group, path, + block_group->start, block_group->length); + if (ret) + btrfs_abort_transaction(trans, ret); - return __add_to_free_space_tree(trans, block_group, path, - block_group->start, - block_group->length); + return 0; } int add_block_group_free_space(struct btrfs_trans_handle *trans, @@ -1404,16 +1409,14 @@ int add_block_group_free_space(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; + btrfs_abort_transaction(trans, ret); goto out; } ret = __add_block_group_free_space(trans, block_group, path); - out: btrfs_free_path(path); mutex_unlock(&block_group->free_space_lock); - if (ret) - btrfs_abort_transaction(trans, ret); return ret; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 6b181bf9f156..530a2bab6ada 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -226,8 +226,7 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info, return qgroup; } -static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, - struct btrfs_qgroup *qgroup) +static void __del_qgroup_rb(struct btrfs_qgroup *qgroup) { struct btrfs_qgroup_list *list; @@ -258,7 +257,7 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid) return -ENOENT; rb_erase(&qgroup->node, &fs_info->qgroup_tree); - __del_qgroup_rb(fs_info, qgroup); + __del_qgroup_rb(qgroup); return 0; } @@ -631,22 +630,30 @@ bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info) /* * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), - * first two are in single-threaded paths.And for the third one, we have set - * quota_root to be null with qgroup_lock held before, so it is safe to clean - * up the in-memory structures without qgroup_lock held. + * first two are in single-threaded paths. */ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) { struct rb_node *n; struct btrfs_qgroup *qgroup; + /* + * btrfs_quota_disable() can be called concurrently with + * btrfs_qgroup_rescan() -> qgroup_rescan_zero_tracking(), so take the + * lock. + */ + spin_lock(&fs_info->qgroup_lock); while ((n = rb_first(&fs_info->qgroup_tree))) { qgroup = rb_entry(n, struct btrfs_qgroup, node); rb_erase(n, &fs_info->qgroup_tree); - __del_qgroup_rb(fs_info, qgroup); + __del_qgroup_rb(qgroup); + spin_unlock(&fs_info->qgroup_lock); btrfs_sysfs_del_one_qgroup(fs_info, qgroup); kfree(qgroup); + spin_lock(&fs_info->qgroup_lock); } + spin_unlock(&fs_info->qgroup_lock); + /* * We call btrfs_free_qgroup_config() when unmounting * filesystem and disabling quota, so we set qgroup_ulist @@ -4057,12 +4064,21 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) qgroup_rescan_zero_tracking(fs_info); mutex_lock(&fs_info->qgroup_rescan_lock); - fs_info->qgroup_rescan_running = true; - btrfs_queue_work(fs_info->qgroup_rescan_workers, - &fs_info->qgroup_rescan_work); + /* + * The rescan worker is only for full accounting qgroups, check if it's + * enabled as it is pointless to queue it otherwise. A concurrent quota + * disable may also have just cleared BTRFS_FS_QUOTA_ENABLED. + */ + if (btrfs_qgroup_full_accounting(fs_info)) { + fs_info->qgroup_rescan_running = true; + btrfs_queue_work(fs_info->qgroup_rescan_workers, + &fs_info->qgroup_rescan_work); + } else { + ret = -ENOTCONN; + } mutex_unlock(&fs_info->qgroup_rescan_lock); - return 0; + return ret; } int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index c843b4aefb8a..41b7cbd07025 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4,6 +4,7 @@ */ #include <linux/bsearch.h> +#include <linux/falloc.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/sort.h> @@ -178,6 +179,7 @@ struct send_ctx { u64 cur_inode_rdev; u64 cur_inode_last_extent; u64 cur_inode_next_write_offset; + struct fs_path cur_inode_path; bool cur_inode_new; bool cur_inode_new_gen; bool cur_inode_deleted; @@ -436,6 +438,14 @@ static void fs_path_reset(struct fs_path *p) } } +static void init_path(struct fs_path *p) +{ + p->reversed = 0; + p->buf = p->inline_buf; + p->buf_len = FS_PATH_INLINE_SIZE; + fs_path_reset(p); +} + static struct fs_path *fs_path_alloc(void) { struct fs_path *p; @@ -443,10 +453,7 @@ static struct fs_path *fs_path_alloc(void) p = kmalloc(sizeof(*p), GFP_KERNEL); if (!p) return NULL; - p->reversed = 0; - p->buf = p->inline_buf; - p->buf_len = FS_PATH_INLINE_SIZE; - fs_path_reset(p); + init_path(p); return p; } @@ -471,7 +478,7 @@ static void fs_path_free(struct fs_path *p) kfree(p); } -static int fs_path_len(struct fs_path *p) +static inline int fs_path_len(const struct fs_path *p) { return p->end - p->start; } @@ -624,6 +631,14 @@ static void fs_path_unreverse(struct fs_path *p) p->reversed = 0; } +static inline bool is_current_inode_path(const struct send_ctx *sctx, + const struct fs_path *path) +{ + const struct fs_path *cur = &sctx->cur_inode_path; + + return (strncmp(path->start, cur->start, fs_path_len(cur)) == 0); +} + static struct btrfs_path *alloc_path_for_send(void) { struct btrfs_path *path; @@ -2450,6 +2465,14 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, u64 parent_inode = 0; u64 parent_gen = 0; int stop = 0; + const bool is_cur_inode = (ino == sctx->cur_ino && gen == sctx->cur_inode_gen); + + if (is_cur_inode && fs_path_len(&sctx->cur_inode_path) > 0) { + if (dest != &sctx->cur_inode_path) + return fs_path_copy(dest, &sctx->cur_inode_path); + + return 0; + } name = fs_path_alloc(); if (!name) { @@ -2501,8 +2524,12 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, out: fs_path_free(name); - if (!ret) + if (!ret) { fs_path_unreverse(dest); + if (is_cur_inode && dest != &sctx->cur_inode_path) + ret = fs_path_copy(&sctx->cur_inode_path, dest); + } + return ret; } @@ -2597,6 +2624,47 @@ out: return ret; } +static struct fs_path *get_cur_inode_path(struct send_ctx *sctx) +{ + if (fs_path_len(&sctx->cur_inode_path) == 0) { + int ret; + + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, + &sctx->cur_inode_path); + if (ret < 0) + return ERR_PTR(ret); + } + + return &sctx->cur_inode_path; +} + +static struct fs_path *get_path_for_command(struct send_ctx *sctx, u64 ino, u64 gen) +{ + struct fs_path *path; + int ret; + + if (ino == sctx->cur_ino && gen == sctx->cur_inode_gen) + return get_cur_inode_path(sctx); + + path = fs_path_alloc(); + if (!path) + return ERR_PTR(-ENOMEM); + + ret = get_cur_path(sctx, ino, gen, path); + if (ret < 0) { + fs_path_free(path); + return ERR_PTR(ret); + } + + return path; +} + +static void free_path_for_command(const struct send_ctx *sctx, struct fs_path *path) +{ + if (path != &sctx->cur_inode_path) + fs_path_free(path); +} + static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) { struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; @@ -2605,17 +2673,14 @@ static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) btrfs_debug(fs_info, "send_truncate %llu size=%llu", ino, size); - p = fs_path_alloc(); - if (!p) - return -ENOMEM; + p = get_path_for_command(sctx, ino, gen); + if (IS_ERR(p)) + return PTR_ERR(p); ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); if (ret < 0) goto out; - ret = get_cur_path(sctx, ino, gen, p); - if (ret < 0) - goto out; TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); @@ -2623,7 +2688,7 @@ static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) tlv_put_failure: out: - fs_path_free(p); + free_path_for_command(sctx, p); return ret; } @@ -2635,17 +2700,14 @@ static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) btrfs_debug(fs_info, "send_chmod %llu mode=%llu", ino, mode); - p = fs_path_alloc(); - if (!p) - return -ENOMEM; + p = get_path_for_command(sctx, ino, gen); + if (IS_ERR(p)) + return PTR_ERR(p); ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); if (ret < 0) goto out; - ret = get_cur_path(sctx, ino, gen, p); - if (ret < 0) - goto out; TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode & 07777); @@ -2653,7 +2715,7 @@ static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) tlv_put_failure: out: - fs_path_free(p); + free_path_for_command(sctx, p); return ret; } @@ -2668,17 +2730,14 @@ static int send_fileattr(struct send_ctx *sctx, u64 ino, u64 gen, u64 fileattr) btrfs_debug(fs_info, "send_fileattr %llu fileattr=%llu", ino, fileattr); - p = fs_path_alloc(); - if (!p) - return -ENOMEM; + p = get_path_for_command(sctx, ino, gen); + if (IS_ERR(p)) + return PTR_ERR(p); ret = begin_cmd(sctx, BTRFS_SEND_C_FILEATTR); if (ret < 0) goto out; - ret = get_cur_path(sctx, ino, gen, p); - if (ret < 0) - goto out; TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); TLV_PUT_U64(sctx, BTRFS_SEND_A_FILEATTR, fileattr); @@ -2686,7 +2745,7 @@ static int send_fileattr(struct send_ctx *sctx, u64 ino, u64 gen, u64 fileattr) tlv_put_failure: out: - fs_path_free(p); + free_path_for_command(sctx, p); return ret; } @@ -2699,17 +2758,14 @@ static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) btrfs_debug(fs_info, "send_chown %llu uid=%llu, gid=%llu", ino, uid, gid); - p = fs_path_alloc(); - if (!p) - return -ENOMEM; + p = get_path_for_command(sctx, ino, gen); + if (IS_ERR(p)) + return PTR_ERR(p); ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); if (ret < 0) goto out; - ret = get_cur_path(sctx, ino, gen, p); - if (ret < 0) - goto out; TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); @@ -2718,7 +2774,7 @@ static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) tlv_put_failure: out: - fs_path_free(p); + free_path_for_command(sctx, p); return ret; } @@ -2735,9 +2791,9 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) btrfs_debug(fs_info, "send_utimes %llu", ino); - p = fs_path_alloc(); - if (!p) - return -ENOMEM; + p = get_path_for_command(sctx, ino, gen); + if (IS_ERR(p)) + return PTR_ERR(p); path = alloc_path_for_send(); if (!path) { @@ -2762,9 +2818,6 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) if (ret < 0) goto out; - ret = get_cur_path(sctx, ino, gen, p); - if (ret < 0) - goto out; TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, &ii->atime); TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, &ii->mtime); @@ -2776,7 +2829,7 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) tlv_put_failure: out: - fs_path_free(p); + free_path_for_command(sctx, p); btrfs_free_path(path); return ret; } @@ -3112,6 +3165,11 @@ static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, goto out; ret = send_rename(sctx, path, orphan); + if (ret < 0) + goto out; + + if (ino == sctx->cur_ino && gen == sctx->cur_inode_gen) + ret = fs_path_copy(&sctx->cur_inode_path, orphan); out: fs_path_free(orphan); @@ -4165,6 +4223,23 @@ out: return ret; } +static int rename_current_inode(struct send_ctx *sctx, + struct fs_path *current_path, + struct fs_path *new_path) +{ + int ret; + + ret = send_rename(sctx, current_path, new_path); + if (ret < 0) + return ret; + + ret = fs_path_copy(&sctx->cur_inode_path, new_path); + if (ret < 0) + return ret; + + return fs_path_copy(current_path, new_path); +} + /* * This does all the move/link/unlink/rmdir magic. */ @@ -4179,9 +4254,9 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) u64 ow_inode = 0; u64 ow_gen; u64 ow_mode; - int did_overwrite = 0; - int is_orphan = 0; u64 last_dir_ino_rm = 0; + bool did_overwrite = false; + bool is_orphan = false; bool can_rename = true; bool orphanized_dir = false; bool orphanized_ancestor = false; @@ -4223,14 +4298,14 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) if (ret < 0) goto out; if (ret) - did_overwrite = 1; + did_overwrite = true; } if (sctx->cur_inode_new || did_overwrite) { ret = gen_unique_name(sctx, sctx->cur_ino, sctx->cur_inode_gen, valid_path); if (ret < 0) goto out; - is_orphan = 1; + is_orphan = true; } else { ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, valid_path); @@ -4355,6 +4430,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) if (ret > 0) { orphanized_ancestor = true; fs_path_reset(valid_path); + fs_path_reset(&sctx->cur_inode_path); ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, valid_path); @@ -4450,13 +4526,10 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) * it depending on the inode mode. */ if (is_orphan && can_rename) { - ret = send_rename(sctx, valid_path, cur->full_path); - if (ret < 0) - goto out; - is_orphan = 0; - ret = fs_path_copy(valid_path, cur->full_path); + ret = rename_current_inode(sctx, valid_path, cur->full_path); if (ret < 0) goto out; + is_orphan = false; } else if (can_rename) { if (S_ISDIR(sctx->cur_inode_mode)) { /* @@ -4464,10 +4537,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) * dirs, we always have one new and one deleted * ref. The deleted ref is ignored later. */ - ret = send_rename(sctx, valid_path, - cur->full_path); - if (!ret) - ret = fs_path_copy(valid_path, + ret = rename_current_inode(sctx, valid_path, cur->full_path); if (ret < 0) goto out; @@ -4514,7 +4584,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) sctx->cur_inode_gen, valid_path); if (ret < 0) goto out; - is_orphan = 1; + is_orphan = true; } list_for_each_entry(cur, &sctx->deleted_refs, list) { @@ -4560,6 +4630,8 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) ret = send_unlink(sctx, cur->full_path); if (ret < 0) goto out; + if (is_current_inode_path(sctx, cur->full_path)) + fs_path_reset(&sctx->cur_inode_path); } ret = dup_ref(cur, &check_dirs); if (ret < 0) @@ -4878,11 +4950,15 @@ out: } static int send_set_xattr(struct send_ctx *sctx, - struct fs_path *path, const char *name, int name_len, const char *data, int data_len) { - int ret = 0; + struct fs_path *path; + int ret; + + path = get_cur_inode_path(sctx); + if (IS_ERR(path)) + return PTR_ERR(path); ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); if (ret < 0) @@ -4923,19 +4999,13 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key, const char *name, int name_len, const char *data, int data_len, void *ctx) { - int ret; struct send_ctx *sctx = ctx; - struct fs_path *p; struct posix_acl_xattr_header dummy_acl; /* Capabilities are emitted by finish_inode_if_needed */ if (!strncmp(name, XATTR_NAME_CAPS, name_len)) return 0; - p = fs_path_alloc(); - if (!p) - return -ENOMEM; - /* * This hack is needed because empty acls are stored as zero byte * data in xattrs. Problem with that is, that receiving these zero byte @@ -4952,38 +5022,21 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key, } } - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); - if (ret < 0) - goto out; - - ret = send_set_xattr(sctx, p, name, name_len, data, data_len); - -out: - fs_path_free(p); - return ret; + return send_set_xattr(sctx, name, name_len, data, data_len); } static int __process_deleted_xattr(int num, struct btrfs_key *di_key, const char *name, int name_len, const char *data, int data_len, void *ctx) { - int ret; struct send_ctx *sctx = ctx; struct fs_path *p; - p = fs_path_alloc(); - if (!p) - return -ENOMEM; - - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); - if (ret < 0) - goto out; - - ret = send_remove_xattr(sctx, p, name, name_len); + p = get_cur_inode_path(sctx); + if (IS_ERR(p)) + return PTR_ERR(p); -out: - fs_path_free(p); - return ret; + return send_remove_xattr(sctx, p, name, name_len); } static int process_new_xattr(struct send_ctx *sctx) @@ -5216,21 +5269,13 @@ static int process_verity(struct send_ctx *sctx) if (ret < 0) goto iput; - p = fs_path_alloc(); - if (!p) { - ret = -ENOMEM; + p = get_cur_inode_path(sctx); + if (IS_ERR(p)) { + ret = PTR_ERR(p); goto iput; } - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); - if (ret < 0) - goto free_path; ret = send_verity(sctx, p, sctx->verity_descriptor); - if (ret < 0) - goto free_path; - -free_path: - fs_path_free(p); iput: iput(inode); return ret; @@ -5352,31 +5397,25 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len) int ret = 0; struct fs_path *p; - p = fs_path_alloc(); - if (!p) - return -ENOMEM; - btrfs_debug(fs_info, "send_write offset=%llu, len=%d", offset, len); - ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); - if (ret < 0) - goto out; + p = get_cur_inode_path(sctx); + if (IS_ERR(p)) + return PTR_ERR(p); - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); + ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); if (ret < 0) - goto out; + return ret; TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); ret = put_file_data(sctx, offset, len); if (ret < 0) - goto out; + return ret; ret = send_cmd(sctx); tlv_put_failure: -out: - fs_path_free(p); return ret; } @@ -5389,6 +5428,7 @@ static int send_clone(struct send_ctx *sctx, { int ret = 0; struct fs_path *p; + struct fs_path *cur_inode_path; u64 gen; btrfs_debug(sctx->send_root->fs_info, @@ -5396,6 +5436,10 @@ static int send_clone(struct send_ctx *sctx, offset, len, btrfs_root_id(clone_root->root), clone_root->ino, clone_root->offset); + cur_inode_path = get_cur_inode_path(sctx); + if (IS_ERR(cur_inode_path)) + return PTR_ERR(cur_inode_path); + p = fs_path_alloc(); if (!p) return -ENOMEM; @@ -5404,13 +5448,9 @@ static int send_clone(struct send_ctx *sctx, if (ret < 0) goto out; - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); - if (ret < 0) - goto out; - TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); - TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, cur_inode_path); if (clone_root->root == sctx->send_root) { ret = get_inode_gen(sctx->send_root, clone_root->ino, &gen); @@ -5461,27 +5501,45 @@ static int send_update_extent(struct send_ctx *sctx, int ret = 0; struct fs_path *p; - p = fs_path_alloc(); - if (!p) - return -ENOMEM; + p = get_cur_inode_path(sctx); + if (IS_ERR(p)) + return PTR_ERR(p); ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); if (ret < 0) - goto out; + return ret; + + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); + TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); + + ret = send_cmd(sctx); - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); +tlv_put_failure: + return ret; +} + +static int send_fallocate(struct send_ctx *sctx, u32 mode, u64 offset, u64 len) +{ + struct fs_path *path; + int ret; + + path = get_cur_inode_path(sctx); + if (IS_ERR(path)) + return PTR_ERR(path); + + ret = begin_cmd(sctx, BTRFS_SEND_C_FALLOCATE); if (ret < 0) - goto out; + return ret; - TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); + TLV_PUT_U32(sctx, BTRFS_SEND_A_FALLOCATE_MODE, mode); TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); ret = send_cmd(sctx); tlv_put_failure: -out: - fs_path_free(p); return ret; } @@ -5493,6 +5551,14 @@ static int send_hole(struct send_ctx *sctx, u64 end) int ret = 0; /* + * Starting with send stream v2 we have fallocate and can use it to + * punch holes instead of sending writes full of zeroes. + */ + if (proto_cmd_ok(sctx, BTRFS_SEND_C_FALLOCATE)) + return send_fallocate(sctx, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + offset, end - offset); + + /* * A hole that starts at EOF or beyond it. Since we do not yet support * fallocate (for extent preallocation and hole punching), sending a * write of zeroes starting at EOF or beyond would later require issuing @@ -5510,12 +5576,10 @@ static int send_hole(struct send_ctx *sctx, u64 end) if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) return send_update_extent(sctx, offset, end - offset); - p = fs_path_alloc(); - if (!p) - return -ENOMEM; - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); - if (ret < 0) - goto tlv_put_failure; + p = get_cur_inode_path(sctx); + if (IS_ERR(p)) + return PTR_ERR(p); + while (offset < end) { u64 len = min(end - offset, read_size); @@ -5536,7 +5600,6 @@ static int send_hole(struct send_ctx *sctx, u64 end) } sctx->cur_inode_next_write_offset = offset; tlv_put_failure: - fs_path_free(p); return ret; } @@ -5559,9 +5622,9 @@ static int send_encoded_inline_extent(struct send_ctx *sctx, if (IS_ERR(inode)) return PTR_ERR(inode); - fspath = fs_path_alloc(); - if (!fspath) { - ret = -ENOMEM; + fspath = get_cur_inode_path(sctx); + if (IS_ERR(fspath)) { + ret = PTR_ERR(fspath); goto out; } @@ -5569,10 +5632,6 @@ static int send_encoded_inline_extent(struct send_ctx *sctx, if (ret < 0) goto out; - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath); - if (ret < 0) - goto out; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); ram_bytes = btrfs_file_extent_ram_bytes(leaf, ei); @@ -5601,7 +5660,6 @@ static int send_encoded_inline_extent(struct send_ctx *sctx, tlv_put_failure: out: - fs_path_free(fspath); iput(inode); return ret; } @@ -5626,9 +5684,9 @@ static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path, if (IS_ERR(inode)) return PTR_ERR(inode); - fspath = fs_path_alloc(); - if (!fspath) { - ret = -ENOMEM; + fspath = get_cur_inode_path(sctx); + if (IS_ERR(fspath)) { + ret = PTR_ERR(fspath); goto out; } @@ -5636,10 +5694,6 @@ static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path, if (ret < 0) goto out; - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath); - if (ret < 0) - goto out; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); @@ -5706,7 +5760,6 @@ static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path, tlv_put_failure: out: - fs_path_free(fspath); iput(inode); return ret; } @@ -5836,7 +5889,6 @@ static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path, */ static int send_capabilities(struct send_ctx *sctx) { - struct fs_path *fspath = NULL; struct btrfs_path *path; struct btrfs_dir_item *di; struct extent_buffer *leaf; @@ -5862,25 +5914,19 @@ static int send_capabilities(struct send_ctx *sctx) leaf = path->nodes[0]; buf_len = btrfs_dir_data_len(leaf, di); - fspath = fs_path_alloc(); buf = kmalloc(buf_len, GFP_KERNEL); - if (!fspath || !buf) { + if (!buf) { ret = -ENOMEM; goto out; } - ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath); - if (ret < 0) - goto out; - data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di); read_extent_buffer(leaf, buf, data_ptr, buf_len); - ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS, + ret = send_set_xattr(sctx, XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), buf, buf_len); out: kfree(buf); - fs_path_free(fspath); btrfs_free_path(path); return ret; } @@ -6906,6 +6952,7 @@ static int changed_inode(struct send_ctx *sctx, sctx->cur_inode_last_extent = (u64)-1; sctx->cur_inode_next_write_offset = 0; sctx->ignore_cur_inode = false; + fs_path_reset(&sctx->cur_inode_path); /* * Set send_progress to current inode. This will tell all get_cur_xxx @@ -8178,6 +8225,7 @@ long btrfs_ioctl_send(struct btrfs_inode *inode, const struct btrfs_ioctl_send_a goto out; } + init_path(&sctx->cur_inode_path); INIT_LIST_HEAD(&sctx->new_refs); INIT_LIST_HEAD(&sctx->deleted_refs); @@ -8463,6 +8511,9 @@ out: btrfs_lru_cache_clear(&sctx->dir_created_cache); btrfs_lru_cache_clear(&sctx->dir_utimes_cache); + if (sctx->cur_inode_path.buf != sctx->cur_inode_path.inline_buf) + kfree(sctx->cur_inode_path.buf); + kfree(sctx); } diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 88a01d51ab11..71a56aaac7ad 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -452,8 +452,25 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, spin_lock_irqsave(&subpage->lock, flags); bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + + /* + * Don't clear the TOWRITE tag when starting writeback on a still-dirty + * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it, + * assume writeback is complete, and exit too early — violating sync + * ordering guarantees. + */ if (!folio_test_writeback(folio)) - folio_start_writeback(folio); + __folio_start_writeback(folio, true); + if (!folio_test_dirty(folio)) { + struct address_space *mapping = folio_mapping(folio); + XA_STATE(xas, &mapping->i_pages, folio->index); + unsigned long flags; + + xas_lock_irqsave(&xas, flags); + xas_load(&xas); + xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); + xas_unlock_irqrestore(&xas, flags); + } spin_unlock_irqrestore(&subpage->lock, flags); } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 6119a06b0569..69f9d5f5cc3c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -88,6 +88,9 @@ struct btrfs_fs_context { refcount_t refs; }; +static void btrfs_emit_options(struct btrfs_fs_info *info, + struct btrfs_fs_context *old); + enum { Opt_acl, Opt_clear_cache, @@ -697,12 +700,9 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { - btrfs_info(info, "disk space caching is enabled"); btrfs_warn(info, "space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); } - if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) - btrfs_info(info, "using free-space-tree"); } return ret; @@ -979,6 +979,8 @@ static int btrfs_fill_super(struct super_block *sb, return err; } + btrfs_emit_options(fs_info, NULL); + inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); if (IS_ERR(inode)) { err = PTR_ERR(inode); @@ -1436,7 +1438,7 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, { btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum"); btrfs_info_if_set(info, old, DEGRADED, "allowing degraded mounts"); - btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum"); + btrfs_info_if_set(info, old, NODATACOW, "setting nodatacow"); btrfs_info_if_set(info, old, SSD, "enabling ssd optimizations"); btrfs_info_if_set(info, old, SSD_SPREAD, "using spread ssd allocation scheme"); btrfs_info_if_set(info, old, NOBARRIER, "turning off barriers"); @@ -1458,10 +1460,11 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, btrfs_info_if_set(info, old, IGNOREMETACSUMS, "ignoring meta csums"); btrfs_info_if_set(info, old, IGNORESUPERFLAGS, "ignoring unknown super block flags"); + btrfs_info_if_unset(info, old, NODATASUM, "setting datasum"); btrfs_info_if_unset(info, old, NODATACOW, "setting datacow"); btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations"); btrfs_info_if_unset(info, old, SSD_SPREAD, "not using spread ssd allocation scheme"); - btrfs_info_if_unset(info, old, NOBARRIER, "turning off barriers"); + btrfs_info_if_unset(info, old, NOBARRIER, "turning on barriers"); btrfs_info_if_unset(info, old, NOTREELOG, "enabling tree log"); btrfs_info_if_unset(info, old, SPACE_CACHE, "disabling disk space caching"); btrfs_info_if_unset(info, old, FREE_SPACE_TREE, "disabling free space tree"); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index dbef80cd5a9f..1a029392eac5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2113,6 +2113,7 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) */ spin_lock(&fs_info->unused_bgs_lock); list_del_init(&block_group->bg_list); + btrfs_put_block_group(block_group); spin_unlock(&fs_info->unused_bgs_lock); } } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 53d8c49ec058..2fdb2987c83a 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2092,10 +2092,15 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) goto out_unlock; } - /* No space left */ - if (btrfs_zoned_bg_is_full(block_group)) { - ret = false; - goto out_unlock; + if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) { + /* The caller should check if the block group is full. */ + if (WARN_ON_ONCE(btrfs_zoned_bg_is_full(block_group))) { + ret = false; + goto out_unlock; + } + } else { + /* Since it is already written, it should have been active. */ + WARN_ON_ONCE(block_group->meta_write_pointer != block_group->start); } for (i = 0; i < map->num_stripes; i++) { |