diff options
Diffstat (limited to 'fs/btrfs/qgroup.c')
-rw-r--r-- | fs/btrfs/qgroup.c | 140 |
1 files changed, 87 insertions, 53 deletions
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 77c54749f432..fe3046007f52 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <linux/workqueue.h> #include <linux/btrfs.h> +#include <linux/sched/mm.h> #include "ctree.h" #include "transaction.h" @@ -497,13 +498,13 @@ next2: break; } out: + btrfs_free_path(path); fs_info->qgroup_flags |= flags; if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN && ret >= 0) ret = qgroup_rescan_init(fs_info, rescan_progress, 0); - btrfs_free_path(path); if (ret < 0) { ulist_free(fs_info->qgroup_ulist); @@ -893,8 +894,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - path->leave_spinning = 1; - key.objectid = 0; key.offset = 0; key.type = 0; @@ -936,6 +935,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) struct btrfs_key found_key; struct btrfs_qgroup *qgroup = NULL; struct btrfs_trans_handle *trans = NULL; + struct ulist *ulist = NULL; int ret = 0; int slot; @@ -943,8 +943,8 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) if (fs_info->quota_root) goto out; - fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL); - if (!fs_info->qgroup_ulist) { + ulist = ulist_alloc(GFP_KERNEL); + if (!ulist) { ret = -ENOMEM; goto out; } @@ -952,6 +952,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) ret = btrfs_sysfs_add_qgroups(fs_info); if (ret < 0) goto out; + + /* + * Unlock qgroup_ioctl_lock before starting the transaction. This is to + * avoid lock acquisition inversion problems (reported by lockdep) between + * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we + * start a transaction. + * After we started the transaction lock qgroup_ioctl_lock again and + * check if someone else created the quota root in the meanwhile. If so, + * just return success and release the transaction handle. + * + * Also we don't need to worry about someone else calling + * btrfs_sysfs_add_qgroups() after we unlock and getting an error because + * that function returns 0 (success) when the sysfs entries already exist. + */ + mutex_unlock(&fs_info->qgroup_ioctl_lock); + /* * 1 for quota root item * 1 for BTRFS_QGROUP_STATUS item @@ -961,12 +977,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) * would be a lot of overkill. */ trans = btrfs_start_transaction(tree_root, 2); + + mutex_lock(&fs_info->qgroup_ioctl_lock); if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; goto out; } + if (fs_info->quota_root) + goto out; + + fs_info->qgroup_ulist = ulist; + ulist = NULL; + /* * initially create the quota tree */ @@ -1124,11 +1148,14 @@ out: if (ret) { ulist_free(fs_info->qgroup_ulist); fs_info->qgroup_ulist = NULL; - if (trans) - btrfs_end_transaction(trans); btrfs_sysfs_del_qgroups(fs_info); } mutex_unlock(&fs_info->qgroup_ioctl_lock); + if (ret && trans) + btrfs_end_transaction(trans); + else if (trans) + ret = btrfs_end_transaction(trans); + ulist_free(ulist); return ret; } @@ -1141,19 +1168,29 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; + mutex_unlock(&fs_info->qgroup_ioctl_lock); /* * 1 For the root item * * We should also reserve enough items for the quota tree deletion in * btrfs_clean_quota_tree but this is not done. + * + * Also, we must always start a transaction without holding the mutex + * qgroup_ioctl_lock, see btrfs_quota_enable(). */ trans = btrfs_start_transaction(fs_info->tree_root, 1); + + mutex_lock(&fs_info->qgroup_ioctl_lock); if (IS_ERR(trans)) { ret = PTR_ERR(trans); + trans = NULL; goto out; } + if (!fs_info->quota_root) + goto out; + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); @@ -1167,13 +1204,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) ret = btrfs_clean_quota_tree(trans, quota_root); if (ret) { btrfs_abort_transaction(trans, ret); - goto end_trans; + goto out; } ret = btrfs_del_root(trans, "a_root->root_key); if (ret) { btrfs_abort_transaction(trans, ret); - goto end_trans; + goto out; } list_del("a_root->dirty_list); @@ -1185,10 +1222,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) btrfs_put_root(quota_root); -end_trans: - ret = btrfs_end_transaction(trans); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); + if (ret && trans) + btrfs_end_transaction(trans); + else if (trans) + ret = btrfs_end_transaction(trans); + return ret; } @@ -1324,13 +1364,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, struct btrfs_qgroup *member; struct btrfs_qgroup_list *list; struct ulist *tmp; + unsigned int nofs_flag; int ret = 0; /* Check the level of src and dst first */ if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) return -EINVAL; + /* We hold a transaction handle open, must do a NOFS allocation. */ + nofs_flag = memalloc_nofs_save(); tmp = ulist_alloc(GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); if (!tmp) return -ENOMEM; @@ -1387,10 +1431,14 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, struct btrfs_qgroup_list *list; struct ulist *tmp; bool found = false; + unsigned int nofs_flag; int ret = 0; int ret2; + /* We hold a transaction handle open, must do a NOFS allocation. */ + nofs_flag = memalloc_nofs_save(); tmp = ulist_alloc(GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); if (!tmp) return -ENOMEM; @@ -1894,34 +1942,22 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans, struct btrfs_key dst_key; if (src_path->nodes[cur_level] == NULL) { - struct btrfs_key first_key; struct extent_buffer *eb; int parent_slot; - u64 child_gen; - u64 child_bytenr; eb = src_path->nodes[cur_level + 1]; parent_slot = src_path->slots[cur_level + 1]; - child_bytenr = btrfs_node_blockptr(eb, parent_slot); - child_gen = btrfs_node_ptr_generation(eb, parent_slot); - btrfs_node_key_to_cpu(eb, &first_key, parent_slot); - eb = read_tree_block(fs_info, child_bytenr, child_gen, - cur_level, &first_key); + eb = btrfs_read_node_slot(eb, parent_slot); if (IS_ERR(eb)) { ret = PTR_ERR(eb); goto out; - } else if (!extent_buffer_uptodate(eb)) { - free_extent_buffer(eb); - ret = -EIO; - goto out; } src_path->nodes[cur_level] = eb; btrfs_tree_read_lock(eb); - btrfs_set_lock_blocking_read(eb); - src_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING; + src_path->locks[cur_level] = BTRFS_READ_LOCK; } src_path->slots[cur_level] = dst_path->slots[cur_level]; @@ -2016,10 +2052,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans, /* Read the tree block if needed */ if (dst_path->nodes[cur_level] == NULL) { - struct btrfs_key first_key; int parent_slot; u64 child_gen; - u64 child_bytenr; /* * dst_path->nodes[root_level] must be initialized before @@ -2038,31 +2072,23 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans, */ eb = dst_path->nodes[cur_level + 1]; parent_slot = dst_path->slots[cur_level + 1]; - child_bytenr = btrfs_node_blockptr(eb, parent_slot); child_gen = btrfs_node_ptr_generation(eb, parent_slot); - btrfs_node_key_to_cpu(eb, &first_key, parent_slot); /* This node is old, no need to trace */ if (child_gen < last_snapshot) goto out; - eb = read_tree_block(fs_info, child_bytenr, child_gen, - cur_level, &first_key); + eb = btrfs_read_node_slot(eb, parent_slot); if (IS_ERR(eb)) { ret = PTR_ERR(eb); goto out; - } else if (!extent_buffer_uptodate(eb)) { - free_extent_buffer(eb); - ret = -EIO; - goto out; } dst_path->nodes[cur_level] = eb; dst_path->slots[cur_level] = 0; btrfs_tree_read_lock(eb); - btrfs_set_lock_blocking_read(eb); - dst_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING; + dst_path->locks[cur_level] = BTRFS_READ_LOCK; need_cleanup = true; } @@ -2206,38 +2232,28 @@ walk_down: level = root_level; while (level >= 0) { if (path->nodes[level] == NULL) { - struct btrfs_key first_key; int parent_slot; - u64 child_gen; u64 child_bytenr; /* - * We need to get child blockptr/gen from parent before - * we can read it. + * We need to get child blockptr from parent before we + * can read it. */ eb = path->nodes[level + 1]; parent_slot = path->slots[level + 1]; child_bytenr = btrfs_node_blockptr(eb, parent_slot); - child_gen = btrfs_node_ptr_generation(eb, parent_slot); - btrfs_node_key_to_cpu(eb, &first_key, parent_slot); - eb = read_tree_block(fs_info, child_bytenr, child_gen, - level, &first_key); + eb = btrfs_read_node_slot(eb, parent_slot); if (IS_ERR(eb)) { ret = PTR_ERR(eb); goto out; - } else if (!extent_buffer_uptodate(eb)) { - free_extent_buffer(eb); - ret = -EIO; - goto out; } path->nodes[level] = eb; path->slots[level] = 0; btrfs_tree_read_lock(eb); - btrfs_set_lock_blocking_read(eb); - path->locks[level] = BTRFS_READ_LOCK_BLOCKING; + path->locks[level] = BTRFS_READ_LOCK; ret = btrfs_qgroup_trace_extent(trans, child_bytenr, fs_info->nodesize, @@ -3512,6 +3528,7 @@ static int try_flush_qgroup(struct btrfs_root *root) { struct btrfs_trans_handle *trans; int ret; + bool can_commit = true; /* * We don't want to run flush again and again, so if there is a running @@ -3523,6 +3540,20 @@ static int try_flush_qgroup(struct btrfs_root *root) return 0; } + /* + * If current process holds a transaction, we shouldn't flush, as we + * assume all space reservation happens before a transaction handle is + * held. + * + * But there are cases like btrfs_delayed_item_reserve_metadata() where + * we try to reserve space with one transction handle already held. + * In that case we can't commit transaction, but at least try to end it + * and hope the started data writes can free some space. + */ + if (current->journal_info && + current->journal_info != BTRFS_SEND_TRANS_STUB) + can_commit = false; + ret = btrfs_start_delalloc_snapshot(root); if (ret < 0) goto out; @@ -3534,7 +3565,10 @@ static int try_flush_qgroup(struct btrfs_root *root) goto out; } - ret = btrfs_commit_transaction(trans); + if (can_commit) + ret = btrfs_commit_transaction(trans); + else + ret = btrfs_end_transaction(trans); out: clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state); wake_up(&root->qgroup_flush_wait); @@ -4174,7 +4208,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, spin_unlock(&blocks->lock); /* Read out reloc subtree root */ - reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, + reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, 0, block->reloc_generation, block->level, &block->first_key); if (IS_ERR(reloc_eb)) { |