diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/block-group.c | 27 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 1 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 33 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 13 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 19 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 6 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 107 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 5 |
8 files changed, 144 insertions, 67 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index dd35e29d8082..7eef79ece5b3 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -34,6 +34,19 @@ int btrfs_should_fragment_free_space(const struct btrfs_block_group *block_group } #endif +static inline bool has_unwritten_metadata(struct btrfs_block_group *block_group) +{ + /* The meta_write_pointer is available only on the zoned setup. */ + if (!btrfs_is_zoned(block_group->fs_info)) + return false; + + if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) + return false; + + return block_group->start + block_group->alloc_offset > + block_group->meta_write_pointer; +} + /* * Return target flags in extended format or 0 if restripe for this chunk_type * is not in progress @@ -1249,6 +1262,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, goto out; spin_lock(&block_group->lock); + /* + * Hitting this WARN means we removed a block group with an unwritten + * region. It will cause "unable to find chunk map for logical" errors. + */ + if (WARN_ON(has_unwritten_metadata(block_group))) + btrfs_warn(fs_info, + "block group %llu is removed before metadata write out", + block_group->start); + set_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags); /* @@ -1567,8 +1589,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * needing to allocate extents from the block group. */ used = btrfs_space_info_used(space_info, true); - if (space_info->total_bytes - block_group->length < used && - block_group->zone_unusable < block_group->length) { + if ((space_info->total_bytes - block_group->length < used && + block_group->zone_unusable < block_group->length) || + has_unwritten_metadata(block_group)) { /* * Add a reference for the list, compensate for the ref * drop under the "next" label for the diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 29c164597401..3ba15d9c3e88 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2901,6 +2901,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, if (ret < 0) { int ret2; + btrfs_clear_buffer_dirty(trans, c); ret2 = btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1); if (ret2 < 0) btrfs_abort_transaction(trans, ret2); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4ceffbef3298..bb3602059906 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3654,6 +3654,21 @@ btrfs_release_block_group(struct btrfs_block_group *cache, btrfs_put_block_group(cache); } +static bool find_free_extent_check_size_class(const struct find_free_extent_ctl *ffe_ctl, + const struct btrfs_block_group *bg) +{ + if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED) + return true; + if (!btrfs_block_group_should_use_size_class(bg)) + return true; + if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS) + return true; + if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS && + bg->size_class == BTRFS_BG_SZ_NONE) + return true; + return ffe_ctl->size_class == bg->size_class; +} + /* * Helper function for find_free_extent(). * @@ -3675,7 +3690,8 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg, if (!cluster_bg) goto refill_cluster; if (cluster_bg != bg && (cluster_bg->ro || - !block_group_bits(cluster_bg, ffe_ctl->flags))) + !block_group_bits(cluster_bg, ffe_ctl->flags) || + !find_free_extent_check_size_class(ffe_ctl, cluster_bg))) goto release_cluster; offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr, @@ -4231,21 +4247,6 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, return -ENOSPC; } -static bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl, - struct btrfs_block_group *bg) -{ - if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED) - return true; - if (!btrfs_block_group_should_use_size_class(bg)) - return true; - if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS) - return true; - if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS && - bg->size_class == BTRFS_BG_SZ_NONE) - return true; - return ffe_ctl->size_class == bg->size_class; -} - static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info, struct find_free_extent_ctl *ffe_ctl, struct btrfs_space_info *space_info, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e9f58cdeeb5f..6b181bf9f156 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1373,11 +1373,14 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) /* * We have nothing held here and no trans handle, just return the error - * if there is one. + * if there is one and set back the quota enabled bit since we didn't + * actually disable quotas. */ ret = flush_reservations(fs_info); - if (ret) + if (ret) { + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); return ret; + } /* * 1 For the root item @@ -1489,7 +1492,6 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, struct btrfs_qgroup *src, int sign) { struct btrfs_qgroup *qgroup; - struct btrfs_qgroup *cur; LIST_HEAD(qgroup_list); u64 num_bytes = src->excl; int ret = 0; @@ -1499,7 +1501,7 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info, u64 ref_root, goto out; qgroup_iterator_add(&qgroup_list, qgroup); - list_for_each_entry(cur, &qgroup_list, iterator) { + list_for_each_entry(qgroup, &qgroup_list, iterator) { struct btrfs_qgroup_list *glist; qgroup->rfer += sign * num_bytes; @@ -1698,9 +1700,6 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) struct btrfs_qgroup *prealloc = NULL; int ret = 0; - if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED) - return 0; - mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) { ret = -ENOTCONN; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index f24a80857cd6..79eb984041dd 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -687,6 +687,25 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, if (btrfs_root_id(root) == objectid) { u64 commit_root_gen; + /* + * Relocation will wait for cleaner thread, and any half-dropped + * subvolume will be fully cleaned up at mount time. + * So here we shouldn't hit a subvolume with non-zero drop_progress. + * + * If this isn't the case, error out since it can make us attempt to + * drop references for extents that were already dropped before. + */ + if (unlikely(btrfs_disk_key_objectid(&root->root_item.drop_progress))) { + struct btrfs_key cpu_key; + + btrfs_disk_key_to_cpu(&cpu_key, &root->root_item.drop_progress); + btrfs_err(fs_info, + "cannot relocate partially dropped subvolume %llu, drop progress key (%llu %u %llu)", + objectid, cpu_key.objectid, cpu_key.type, cpu_key.offset); + ret = -EUCLEAN; + goto fail; + } + /* called by btrfs_init_reloc_root */ ret = btrfs_copy_root(trans, root, root->commit_root, &eb, BTRFS_TREE_RELOC_OBJECTID); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 24806e19c7c4..dbef80cd5a9f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1739,8 +1739,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, ret = btrfs_create_qgroup(trans, objectid); if (ret && ret != -EEXIST) { - btrfs_abort_transaction(trans, ret); - goto fail; + if (ret != -ENOTCONN || btrfs_qgroup_enabled(fs_info)) { + btrfs_abort_transaction(trans, ret); + goto fail; + } } /* diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 16b4474ded4b..31adea5b0b96 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -324,8 +324,7 @@ struct walk_control { /* * Ignore any items from the inode currently being processed. Needs - * to be set every time we find a BTRFS_INODE_ITEM_KEY and we are in - * the LOG_WALK_REPLAY_INODES stage. + * to be set every time we find a BTRFS_INODE_ITEM_KEY. */ bool ignore_cur_inode; @@ -1396,6 +1395,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, dir = btrfs_iget_logging(parent_objectid, root); if (IS_ERR(dir)) { ret = PTR_ERR(dir); + if (ret == -ENOENT) + ret = 0; dir = NULL; goto out; } @@ -1411,6 +1412,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, if (log_ref_ver) { ret = extref_get_fields(eb, ref_ptr, &name, &ref_index, &parent_objectid); + if (ret) + goto out; /* * parent object can change from one array * item to another. @@ -1420,14 +1423,30 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, if (IS_ERR(dir)) { ret = PTR_ERR(dir); dir = NULL; + /* + * A new parent dir may have not been + * logged and not exist in the subvolume + * tree, see the comment above before + * the loop when getting the first + * parent dir. + */ + if (ret == -ENOENT) { + /* + * The next extref may refer to + * another parent dir that + * exists, so continue. + */ + ret = 0; + goto next; + } goto out; } } } else { ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); + if (ret) + goto out; } - if (ret) - goto out; ret = inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), ref_index, &name); @@ -1461,10 +1480,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, } /* Else, ret == 1, we already have a perfect match, we're done. */ +next: ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + name.len; kfree(name.name); name.name = NULL; - if (log_ref_ver) { + if (log_ref_ver && dir) { iput(&dir->vfs_inode); dir = NULL; } @@ -2426,23 +2446,30 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, nritems = btrfs_header_nritems(eb); for (i = 0; i < nritems; i++) { - btrfs_item_key_to_cpu(eb, &key, i); + struct btrfs_inode_item *inode_item; - /* inode keys are done during the first stage */ - if (key.type == BTRFS_INODE_ITEM_KEY && - wc->stage == LOG_WALK_REPLAY_INODES) { - struct btrfs_inode_item *inode_item; - u32 mode; + btrfs_item_key_to_cpu(eb, &key, i); - inode_item = btrfs_item_ptr(eb, i, - struct btrfs_inode_item); + if (key.type == BTRFS_INODE_ITEM_KEY) { + inode_item = btrfs_item_ptr(eb, i, struct btrfs_inode_item); /* - * If we have a tmpfile (O_TMPFILE) that got fsync'ed - * and never got linked before the fsync, skip it, as - * replaying it is pointless since it would be deleted - * later. We skip logging tmpfiles, but it's always - * possible we are replaying a log created with a kernel - * that used to log tmpfiles. + * An inode with no links is either: + * + * 1) A tmpfile (O_TMPFILE) that got fsync'ed and never + * got linked before the fsync, skip it, as replaying + * it is pointless since it would be deleted later. + * We skip logging tmpfiles, but it's always possible + * we are replaying a log created with a kernel that + * used to log tmpfiles; + * + * 2) A non-tmpfile which got its last link deleted + * while holding an open fd on it and later got + * fsynced through that fd. We always log the + * parent inodes when inode->last_unlink_trans is + * set to the current transaction, so ignore all the + * inode items for this inode. We will delete the + * inode when processing the parent directory with + * replay_dir_deletes(). */ if (btrfs_inode_nlink(eb, inode_item) == 0) { wc->ignore_cur_inode = true; @@ -2450,8 +2477,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, } else { wc->ignore_cur_inode = false; } - ret = replay_xattr_deletes(wc->trans, root, log, - path, key.objectid); + } + + /* Inode keys are done during the first stage. */ + if (key.type == BTRFS_INODE_ITEM_KEY && + wc->stage == LOG_WALK_REPLAY_INODES) { + u32 mode; + + ret = replay_xattr_deletes(wc->trans, root, log, path, key.objectid); if (ret) break; mode = btrfs_inode_mode(eb, inode_item); @@ -2532,9 +2565,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, key.type == BTRFS_INODE_EXTREF_KEY) { ret = add_inode_ref(wc->trans, root, log, path, eb, i, &key); - if (ret && ret != -ENOENT) + if (ret) break; - ret = 0; } else if (key.type == BTRFS_EXTENT_DATA_KEY) { ret = replay_one_extent(wc->trans, root, path, eb, i, &key); @@ -2555,14 +2587,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, /* * Correctly adjust the reserved bytes occupied by a log tree extent buffer */ -static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) +static int unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) { struct btrfs_block_group *cache; cache = btrfs_lookup_block_group(fs_info, start); if (!cache) { btrfs_err(fs_info, "unable to find block group for %llu", start); - return; + return -ENOENT; } spin_lock(&cache->space_info->lock); @@ -2573,27 +2605,22 @@ static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start) spin_unlock(&cache->space_info->lock); btrfs_put_block_group(cache); + + return 0; } static int clean_log_buffer(struct btrfs_trans_handle *trans, struct extent_buffer *eb) { - int ret; - btrfs_tree_lock(eb); btrfs_clear_buffer_dirty(trans, eb); wait_on_extent_buffer_writeback(eb); btrfs_tree_unlock(eb); - if (trans) { - ret = btrfs_pin_reserved_extent(trans, eb); - if (ret) - return ret; - } else { - unaccount_log_buffer(eb->fs_info, eb->start); - } + if (trans) + return btrfs_pin_reserved_extent(trans, eb); - return 0; + return unaccount_log_buffer(eb->fs_info, eb->start); } static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, @@ -4225,6 +4252,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_token_timespec_nsec(&token, &item->ctime, inode_get_ctime_nsec(inode)); + btrfs_set_timespec_sec(leaf, &item->otime, BTRFS_I(inode)->i_otime_sec); + btrfs_set_timespec_nsec(leaf, &item->otime, BTRFS_I(inode)->i_otime_nsec); + /* * We do not need to set the nbytes field, in fact during a fast fsync * its value may not even be correct, since a fast fsync does not wait @@ -7295,11 +7325,14 @@ again: wc.replay_dest->log_root = log; ret = btrfs_record_root_in_trans(trans, wc.replay_dest); - if (ret) + if (ret) { /* The loop needs to continue due to the root refs */ btrfs_abort_transaction(trans, ret); - else + } else { ret = walk_log_tree(trans, log, &wc); + if (ret) + btrfs_abort_transaction(trans, ret); + } if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) { ret = fixup_inode_link_counts(trans, wc.replay_dest, diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 2603c9d60fd2..53d8c49ec058 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -2456,8 +2456,8 @@ bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; + u64 total = btrfs_super_total_bytes(fs_info->super_copy); u64 used = 0; - u64 total = 0; u64 factor; ASSERT(btrfs_is_zoned(fs_info)); @@ -2470,7 +2470,6 @@ bool btrfs_zoned_should_reclaim(const struct btrfs_fs_info *fs_info) if (!device->bdev) continue; - total += device->disk_total_bytes; used += device->bytes_used; } mutex_unlock(&fs_devices->device_list_mutex); @@ -2524,7 +2523,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) spin_lock(&block_group->lock); if (block_group->reserved || block_group->alloc_offset == 0 || - (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) || + !(block_group->flags & BTRFS_BLOCK_GROUP_DATA) || test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { spin_unlock(&block_group->lock); continue; |