summaryrefslogtreecommitdiff
path: root/fs/btrfs/qgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/qgroup.c')
-rw-r--r--fs/btrfs/qgroup.c840
1 files changed, 671 insertions, 169 deletions
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b44124dd2370..9d49c586995a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -31,13 +31,13 @@
#include "locking.h"
#include "ulist.h"
#include "backref.h"
+#include "extent_io.h"
/* TODO XXX FIXME
* - subvol delete -> delete when ref goes to 0? delete limits also?
* - reorganize keys
* - compressed
* - sync
- * - rescan
* - copy also limits on subvol creation
* - limit
* - caches fuer ulists
@@ -98,7 +98,15 @@ struct btrfs_qgroup_list {
struct btrfs_qgroup *member;
};
-/* must be called with qgroup_lock held */
+struct qgroup_rescan {
+ struct btrfs_work work;
+ struct btrfs_fs_info *fs_info;
+};
+
+static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
+ struct qgroup_rescan *qscan);
+
+/* must be called with qgroup_ioctl_lock held */
static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
u64 qgroupid)
{
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
}
fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
ptr);
- /* FIXME read scan element */
+ fs_info->qgroup_rescan_progress.objectid =
+ btrfs_qgroup_status_rescan(l, ptr);
+ if (fs_info->qgroup_flags &
+ BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ struct qgroup_rescan *qscan =
+ kmalloc(sizeof(*qscan), GFP_NOFS);
+ if (!qscan) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ fs_info->qgroup_rescan_progress.type = 0;
+ fs_info->qgroup_rescan_progress.offset = 0;
+ qgroup_rescan_start(fs_info, qscan);
+ }
goto next1;
}
@@ -420,8 +441,6 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
qgroup = rb_entry(n, struct btrfs_qgroup, node);
rb_erase(n, &fs_info->qgroup_tree);
- WARN_ON(!list_empty(&qgroup->dirty));
-
while (!list_empty(&qgroup->groups)) {
list = list_first_entry(&qgroup->groups,
struct btrfs_qgroup_list,
@@ -721,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
- /* XXX scan */
+ btrfs_set_qgroup_status_rescan(l, ptr,
+ fs_info->qgroup_rescan_progress.objectid);
btrfs_mark_buffer_dirty(l);
@@ -783,19 +803,21 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *quota_root;
+ struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_path *path = NULL;
struct btrfs_qgroup_status_item *ptr;
struct extent_buffer *leaf;
struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_qgroup *qgroup = NULL;
int ret = 0;
+ int slot;
- spin_lock(&fs_info->qgroup_lock);
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
if (fs_info->quota_root) {
fs_info->pending_quota_state = 1;
- spin_unlock(&fs_info->qgroup_lock);
goto out;
}
- spin_unlock(&fs_info->qgroup_lock);
/*
* initially create the quota tree
@@ -830,10 +852,57 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
- btrfs_set_qgroup_status_scan(leaf, ptr, 0);
+ btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
btrfs_mark_buffer_dirty(leaf);
+ key.objectid = 0;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = 0;
+
+ btrfs_release_path(path);
+ ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
+ if (ret > 0)
+ goto out_add_root;
+ if (ret < 0)
+ goto out_free_path;
+
+
+ while (1) {
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+ if (found_key.type == BTRFS_ROOT_REF_KEY) {
+ ret = add_qgroup_item(trans, quota_root,
+ found_key.offset);
+ if (ret)
+ goto out_free_path;
+
+ qgroup = add_qgroup_rb(fs_info, found_key.offset);
+ if (IS_ERR(qgroup)) {
+ ret = PTR_ERR(qgroup);
+ goto out_free_path;
+ }
+ }
+ ret = btrfs_next_item(tree_root, path);
+ if (ret < 0)
+ goto out_free_path;
+ if (ret)
+ break;
+ }
+
+out_add_root:
+ btrfs_release_path(path);
+ ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
+ if (ret)
+ goto out_free_path;
+
+ qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
+ if (IS_ERR(qgroup)) {
+ ret = PTR_ERR(qgroup);
+ goto out_free_path;
+ }
spin_lock(&fs_info->qgroup_lock);
fs_info->quota_root = quota_root;
fs_info->pending_quota_state = 1;
@@ -847,6 +916,7 @@ out_free_root:
kfree(quota_root);
}
out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -857,11 +927,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root;
int ret = 0;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
+ if (!fs_info->quota_root)
+ goto out;
spin_lock(&fs_info->qgroup_lock);
- if (!fs_info->quota_root) {
- spin_unlock(&fs_info->qgroup_lock);
- return 0;
- }
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
quota_root = fs_info->quota_root;
@@ -869,8 +938,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
btrfs_free_qgroup_config(fs_info);
spin_unlock(&fs_info->qgroup_lock);
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
ret = btrfs_clean_quota_tree(trans, quota_root);
if (ret)
@@ -891,39 +962,62 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
free_extent_buffer(quota_root->commit_root);
kfree(quota_root);
out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
+static void qgroup_dirty(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup)
{
- /* FIXME */
- return 0;
+ if (list_empty(&qgroup->dirty))
+ list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
}
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst)
{
struct btrfs_root *quota_root;
+ struct btrfs_qgroup *parent;
+ struct btrfs_qgroup *member;
+ struct btrfs_qgroup_list *list;
int ret = 0;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
+ member = find_qgroup_rb(fs_info, src);
+ parent = find_qgroup_rb(fs_info, dst);
+ if (!member || !parent) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* check if such qgroup relation exist firstly */
+ list_for_each_entry(list, &member->groups, next_group) {
+ if (list->group == parent) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
ret = add_qgroup_relation_item(trans, quota_root, src, dst);
if (ret)
- return ret;
+ goto out;
ret = add_qgroup_relation_item(trans, quota_root, dst, src);
if (ret) {
del_qgroup_relation_item(trans, quota_root, src, dst);
- return ret;
+ goto out;
}
spin_lock(&fs_info->qgroup_lock);
ret = add_relation_rb(quota_root->fs_info, src, dst);
spin_unlock(&fs_info->qgroup_lock);
-
+out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -931,13 +1025,34 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst)
{
struct btrfs_root *quota_root;
+ struct btrfs_qgroup *parent;
+ struct btrfs_qgroup *member;
+ struct btrfs_qgroup_list *list;
int ret = 0;
int err;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
+ member = find_qgroup_rb(fs_info, src);
+ parent = find_qgroup_rb(fs_info, dst);
+ if (!member || !parent) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* check if such qgroup relation exist firstly */
+ list_for_each_entry(list, &member->groups, next_group) {
+ if (list->group == parent)
+ goto exist;
+ }
+ ret = -ENOENT;
+ goto out;
+exist:
ret = del_qgroup_relation_item(trans, quota_root, src, dst);
err = del_qgroup_relation_item(trans, quota_root, dst, src);
if (err && !ret)
@@ -945,9 +1060,9 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
spin_lock(&fs_info->qgroup_lock);
del_relation_rb(fs_info, src, dst);
-
spin_unlock(&fs_info->qgroup_lock);
-
+out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -958,11 +1073,21 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_qgroup *qgroup;
int ret = 0;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+ if (qgroup) {
+ ret = -EEXIST;
+ goto out;
+ }
ret = add_qgroup_item(trans, quota_root, qgroupid);
+ if (ret)
+ goto out;
spin_lock(&fs_info->qgroup_lock);
qgroup = add_qgroup_rb(fs_info, qgroupid);
@@ -970,7 +1095,8 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
if (IS_ERR(qgroup))
ret = PTR_ERR(qgroup);
-
+out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -981,27 +1107,32 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_qgroup *qgroup;
int ret = 0;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
- /* check if there are no relations to this qgroup */
- spin_lock(&fs_info->qgroup_lock);
qgroup = find_qgroup_rb(fs_info, qgroupid);
- if (qgroup) {
- if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
- spin_unlock(&fs_info->qgroup_lock);
- return -EBUSY;
+ if (!qgroup) {
+ ret = -ENOENT;
+ goto out;
+ } else {
+ /* check if there are no relations to this qgroup */
+ if (!list_empty(&qgroup->groups) ||
+ !list_empty(&qgroup->members)) {
+ ret = -EBUSY;
+ goto out;
}
}
- spin_unlock(&fs_info->qgroup_lock);
-
ret = del_qgroup_item(trans, quota_root, qgroupid);
spin_lock(&fs_info->qgroup_lock);
del_qgroup_rb(quota_root->fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock);
-
+out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -1009,13 +1140,22 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
struct btrfs_qgroup_limit *limit)
{
- struct btrfs_root *quota_root = fs_info->quota_root;
+ struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
- if (!quota_root)
- return -EINVAL;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
+ quota_root = fs_info->quota_root;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+ if (!qgroup) {
+ ret = -ENOENT;
+ goto out;
+ }
ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
limit->flags, limit->max_rfer,
limit->max_excl, limit->rsv_rfer,
@@ -1027,31 +1167,17 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
}
spin_lock(&fs_info->qgroup_lock);
-
- qgroup = find_qgroup_rb(fs_info, qgroupid);
- if (!qgroup) {
- ret = -ENOENT;
- goto unlock;
- }
qgroup->lim_flags = limit->flags;
qgroup->max_rfer = limit->max_rfer;
qgroup->max_excl = limit->max_excl;
qgroup->rsv_rfer = limit->rsv_rfer;
qgroup->rsv_excl = limit->rsv_excl;
-
-unlock:
spin_unlock(&fs_info->qgroup_lock);
-
+out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
-static void qgroup_dirty(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup *qgroup)
-{
- if (list_empty(&qgroup->dirty))
- list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
-}
-
/*
* btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
* the modification into a list that's later used by btrfs_end_transaction to
@@ -1075,6 +1201,144 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
return 0;
}
+static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
+ struct ulist *roots, struct ulist *tmp,
+ u64 seq)
+{
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct ulist_node *tmp_unode;
+ struct ulist_iterator tmp_uiter;
+ struct btrfs_qgroup *qg;
+ int ret;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ qg = find_qgroup_rb(fs_info, unode->val);
+ if (!qg)
+ continue;
+
+ ulist_reinit(tmp);
+ /* XXX id not needed */
+ ret = ulist_add(tmp, qg->qgroupid,
+ (u64)(uintptr_t)qg, GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+ ULIST_ITER_INIT(&tmp_uiter);
+ while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
+ if (qg->refcnt < seq)
+ qg->refcnt = seq + 1;
+ else
+ ++qg->refcnt;
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ret = ulist_add(tmp, glist->group->qgroupid,
+ (u64)(uintptr_t)glist->group,
+ GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
+ struct ulist *roots, struct ulist *tmp,
+ u64 seq, int sgn, u64 num_bytes,
+ struct btrfs_qgroup *qgroup)
+{
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct btrfs_qgroup *qg;
+ struct btrfs_qgroup_list *glist;
+ int ret;
+
+ ulist_reinit(tmp);
+ ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(tmp, &uiter))) {
+ qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
+ if (qg->refcnt < seq) {
+ /* not visited by step 1 */
+ qg->rfer += sgn * num_bytes;
+ qg->rfer_cmpr += sgn * num_bytes;
+ if (roots->nnodes == 0) {
+ qg->excl += sgn * num_bytes;
+ qg->excl_cmpr += sgn * num_bytes;
+ }
+ qgroup_dirty(fs_info, qg);
+ }
+ WARN_ON(qg->tag >= seq);
+ qg->tag = seq;
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ret = ulist_add(tmp, glist->group->qgroupid,
+ (uintptr_t)glist->group, GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
+ struct ulist *roots, struct ulist *tmp,
+ u64 seq, int sgn, u64 num_bytes)
+{
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct btrfs_qgroup *qg;
+ struct ulist_node *tmp_unode;
+ struct ulist_iterator tmp_uiter;
+ int ret;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ qg = find_qgroup_rb(fs_info, unode->val);
+ if (!qg)
+ continue;
+
+ ulist_reinit(tmp);
+ ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+
+ ULIST_ITER_INIT(&tmp_uiter);
+ while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
+ if (qg->tag == seq)
+ continue;
+
+ if (qg->refcnt - seq == roots->nnodes) {
+ qg->excl -= sgn * num_bytes;
+ qg->excl_cmpr -= sgn * num_bytes;
+ qgroup_dirty(fs_info, qg);
+ }
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ret = ulist_add(tmp, glist->group->qgroupid,
+ (uintptr_t)glist->group,
+ GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
/*
* btrfs_qgroup_account_ref is called for every ref that is added to or deleted
* from the fs. First, all roots referencing the extent are searched, and
@@ -1090,10 +1354,8 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root;
u64 ref_root;
struct btrfs_qgroup *qgroup;
- struct ulist_node *unode;
struct ulist *roots = NULL;
struct ulist *tmp = NULL;
- struct ulist_iterator uiter;
u64 seq;
int ret = 0;
int sgn;
@@ -1132,9 +1394,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
case BTRFS_ADD_DELAYED_REF:
case BTRFS_ADD_DELAYED_EXTENT:
sgn = 1;
+ seq = btrfs_tree_mod_seq_prev(node->seq);
break;
case BTRFS_DROP_DELAYED_REF:
sgn = -1;
+ seq = node->seq;
break;
case BTRFS_UPDATE_DELAYED_HEAD:
return 0;
@@ -1142,20 +1406,37 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
BUG();
}
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ return 0;
+ }
+ }
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
/*
* the delayed ref sequence number we pass depends on the direction of
- * the operation. for add operations, we pass (node->seq - 1) to skip
+ * the operation. for add operations, we pass
+ * tree_mod_log_prev_seq(node->seq) to skip
* the delayed ref's current sequence number, because we need the state
* of the tree before the add operation. for delete operations, we pass
* (node->seq) to include the delayed ref's current sequence number,
* because we need the state of the tree after the delete operation.
*/
- ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
- sgn > 0 ? node->seq - 1 : node->seq, &roots);
+ ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
if (ret < 0)
return ret;
+ mutex_lock(&fs_info->qgroup_rescan_lock);
spin_lock(&fs_info->qgroup_lock);
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+ ret = 0;
+ goto unlock;
+ }
+ }
+
quota_root = fs_info->quota_root;
if (!quota_root)
goto unlock;
@@ -1175,106 +1456,29 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
seq = fs_info->qgroup_seq;
fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(roots, &uiter))) {
- struct ulist_node *tmp_unode;
- struct ulist_iterator tmp_uiter;
- struct btrfs_qgroup *qg;
-
- qg = find_qgroup_rb(fs_info, unode->val);
- if (!qg)
- continue;
-
- ulist_reinit(tmp);
- /* XXX id not needed */
- ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
- ULIST_ITER_INIT(&tmp_uiter);
- while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
- struct btrfs_qgroup_list *glist;
-
- qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
- if (qg->refcnt < seq)
- qg->refcnt = seq + 1;
- else
- ++qg->refcnt;
-
- list_for_each_entry(glist, &qg->groups, next_group) {
- ulist_add(tmp, glist->group->qgroupid,
- (u64)(uintptr_t)glist->group,
- GFP_ATOMIC);
- }
- }
- }
+ ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
+ if (ret)
+ goto unlock;
/*
* step 2: walk from the new root
*/
- ulist_reinit(tmp);
- ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(tmp, &uiter))) {
- struct btrfs_qgroup *qg;
- struct btrfs_qgroup_list *glist;
-
- qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
- if (qg->refcnt < seq) {
- /* not visited by step 1 */
- qg->rfer += sgn * node->num_bytes;
- qg->rfer_cmpr += sgn * node->num_bytes;
- if (roots->nnodes == 0) {
- qg->excl += sgn * node->num_bytes;
- qg->excl_cmpr += sgn * node->num_bytes;
- }
- qgroup_dirty(fs_info, qg);
- }
- WARN_ON(qg->tag >= seq);
- qg->tag = seq;
-
- list_for_each_entry(glist, &qg->groups, next_group) {
- ulist_add(tmp, glist->group->qgroupid,
- (uintptr_t)glist->group, GFP_ATOMIC);
- }
- }
+ ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn,
+ node->num_bytes, qgroup);
+ if (ret)
+ goto unlock;
/*
* step 3: walk again from old refs
*/
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(roots, &uiter))) {
- struct btrfs_qgroup *qg;
- struct ulist_node *tmp_unode;
- struct ulist_iterator tmp_uiter;
-
- qg = find_qgroup_rb(fs_info, unode->val);
- if (!qg)
- continue;
-
- ulist_reinit(tmp);
- ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
- ULIST_ITER_INIT(&tmp_uiter);
- while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
- struct btrfs_qgroup_list *glist;
-
- qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
- if (qg->tag == seq)
- continue;
-
- if (qg->refcnt - seq == roots->nnodes) {
- qg->excl -= sgn * node->num_bytes;
- qg->excl_cmpr -= sgn * node->num_bytes;
- qgroup_dirty(fs_info, qg);
- }
+ ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn,
+ node->num_bytes);
+ if (ret)
+ goto unlock;
- list_for_each_entry(glist, &qg->groups, next_group) {
- ulist_add(tmp, glist->group->qgroupid,
- (uintptr_t)glist->group,
- GFP_ATOMIC);
- }
- }
- }
- ret = 0;
unlock:
spin_unlock(&fs_info->qgroup_lock);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
ulist_free(roots);
ulist_free(tmp);
@@ -1289,10 +1493,14 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
{
struct btrfs_root *quota_root = fs_info->quota_root;
int ret = 0;
+ int start_rescan_worker = 0;
if (!quota_root)
goto out;
+ if (!fs_info->quota_enabled && fs_info->pending_quota_state)
+ start_rescan_worker = 1;
+
fs_info->quota_enabled = fs_info->pending_quota_state;
spin_lock(&fs_info->qgroup_lock);
@@ -1318,6 +1526,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
if (ret)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ if (!ret && start_rescan_worker) {
+ ret = btrfs_qgroup_rescan(fs_info);
+ if (ret)
+ pr_err("btrfs: start rescan quota failed: %d\n", ret);
+ ret = 0;
+ }
+
out:
return ret;
@@ -1338,12 +1553,30 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
u32 level_size = 0;
+ u64 nums;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_enabled)
- return 0;
+ goto out;
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (inherit) {
+ i_qgroups = (u64 *)(inherit + 1);
+ nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
+ 2 * inherit->num_excl_copies;
+ for (i = 0; i < nums; ++i) {
+ srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
+ if (!srcgroup) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ++i_qgroups;
+ }
+ }
/*
* create a tracking group for the subvol itself
@@ -1470,6 +1703,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
unlock:
spin_unlock(&fs_info->qgroup_lock);
out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -1514,7 +1748,10 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
ret = -ENOMEM;
goto out;
}
- ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+ ret = ulist_add(ulist, qgroup->qgroupid,
+ (uintptr_t)qgroup, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(ulist, &uiter))) {
struct btrfs_qgroup *qg;
@@ -1523,25 +1760,27 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
- qg->reserved + qg->rfer + num_bytes >
+ qg->reserved + (s64)qg->rfer + num_bytes >
qg->max_rfer) {
ret = -EDQUOT;
goto out;
}
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
- qg->reserved + qg->excl + num_bytes >
+ qg->reserved + (s64)qg->excl + num_bytes >
qg->max_excl) {
ret = -EDQUOT;
goto out;
}
list_for_each_entry(glist, &qg->groups, next_group) {
- ulist_add(ulist, glist->group->qgroupid,
- (uintptr_t)glist->group, GFP_ATOMIC);
+ ret = ulist_add(ulist, glist->group->qgroupid,
+ (uintptr_t)glist->group, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
}
}
-
+ ret = 0;
/*
* no limits exceeded, now record the reservation into all qgroups
*/
@@ -1570,6 +1809,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
struct ulist_node *unode;
struct ulist_iterator uiter;
u64 ref_root = root->root_key.objectid;
+ int ret = 0;
if (!is_fstree(ref_root))
return;
@@ -1592,7 +1832,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
btrfs_std_error(fs_info, -ENOMEM);
goto out;
}
- ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+ ret = ulist_add(ulist, qgroup->qgroupid,
+ (uintptr_t)qgroup, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(ulist, &uiter))) {
struct btrfs_qgroup *qg;
@@ -1603,8 +1846,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
qg->reserved -= num_bytes;
list_for_each_entry(glist, &qg->groups, next_group) {
- ulist_add(ulist, glist->group->qgroupid,
- (uintptr_t)glist->group, GFP_ATOMIC);
+ ret = ulist_add(ulist, glist->group->qgroupid,
+ (uintptr_t)glist->group, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
}
}
@@ -1617,8 +1862,265 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
return;
- printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n",
+ pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
- trans->delayed_ref_elem.seq);
+ (u32)(trans->delayed_ref_elem.seq >> 32),
+ (u32)trans->delayed_ref_elem.seq);
BUG();
}
+
+/*
+ * returns < 0 on error, 0 when more leafs are to be scanned.
+ * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
+ */
+static int
+qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
+ struct btrfs_trans_handle *trans, struct ulist *tmp,
+ struct extent_buffer *scratch_leaf)
+{
+ struct btrfs_key found;
+ struct btrfs_fs_info *fs_info = qscan->fs_info;
+ struct ulist *roots = NULL;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct seq_list tree_mod_seq_elem = {};
+ u64 seq;
+ int slot;
+ int ret;
+
+ path->leave_spinning = 1;
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ ret = btrfs_search_slot_for_read(fs_info->extent_root,
+ &fs_info->qgroup_rescan_progress,
+ path, 1, 0);
+
+ pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
+ (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
+ fs_info->qgroup_rescan_progress.type,
+ (unsigned long long)fs_info->qgroup_rescan_progress.offset,
+ ret);
+
+ if (ret) {
+ /*
+ * The rescan is about to end, we will not be scanning any
+ * further blocks. We cannot unset the RESCAN flag here, because
+ * we want to commit the transaction if everything went well.
+ * To make the live accounting work in this phase, we set our
+ * scan progress pointer such that every real extent objectid
+ * will be smaller.
+ */
+ fs_info->qgroup_rescan_progress.objectid = (u64)-1;
+ btrfs_release_path(path);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ return ret;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &found,
+ btrfs_header_nritems(path->nodes[0]) - 1);
+ fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
+
+ btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+ memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
+ slot = path->slots[0];
+ btrfs_release_path(path);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+ for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
+ btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
+ if (found.type != BTRFS_EXTENT_ITEM_KEY)
+ continue;
+ ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
+ tree_mod_seq_elem.seq, &roots);
+ if (ret < 0)
+ goto out;
+ spin_lock(&fs_info->qgroup_lock);
+ seq = fs_info->qgroup_seq;
+ fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
+
+ ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
+ if (ret) {
+ spin_unlock(&fs_info->qgroup_lock);
+ ulist_free(roots);
+ goto out;
+ }
+
+ /*
+ * step2 of btrfs_qgroup_account_ref works from a single root,
+ * we're doing all at once here.
+ */
+ ulist_reinit(tmp);
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ struct btrfs_qgroup *qg;
+
+ qg = find_qgroup_rb(fs_info, unode->val);
+ if (!qg)
+ continue;
+
+ ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
+ GFP_ATOMIC);
+ if (ret < 0) {
+ spin_unlock(&fs_info->qgroup_lock);
+ ulist_free(roots);
+ goto out;
+ }
+ }
+
+ /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(tmp, &uiter))) {
+ struct btrfs_qgroup *qg;
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
+ qg->rfer += found.offset;
+ qg->rfer_cmpr += found.offset;
+ WARN_ON(qg->tag >= seq);
+ if (qg->refcnt - seq == roots->nnodes) {
+ qg->excl += found.offset;
+ qg->excl_cmpr += found.offset;
+ }
+ qgroup_dirty(fs_info, qg);
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ret = ulist_add(tmp, glist->group->qgroupid,
+ (uintptr_t)glist->group,
+ GFP_ATOMIC);
+ if (ret < 0) {
+ spin_unlock(&fs_info->qgroup_lock);
+ ulist_free(roots);
+ goto out;
+ }
+ }
+ }
+
+ spin_unlock(&fs_info->qgroup_lock);
+ ulist_free(roots);
+ ret = 0;
+ }
+
+out:
+ btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+
+ return ret;
+}
+
+static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
+{
+ struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
+ work);
+ struct btrfs_path *path;
+ struct btrfs_trans_handle *trans = NULL;
+ struct btrfs_fs_info *fs_info = qscan->fs_info;
+ struct ulist *tmp = NULL;
+ struct extent_buffer *scratch_leaf = NULL;
+ int err = -ENOMEM;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out;
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ goto out;
+ scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
+ if (!scratch_leaf)
+ goto out;
+
+ err = 0;
+ while (!err) {
+ trans = btrfs_start_transaction(fs_info->fs_root, 0);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ break;
+ }
+ if (!fs_info->quota_enabled) {
+ err = -EINTR;
+ } else {
+ err = qgroup_rescan_leaf(qscan, path, trans,
+ tmp, scratch_leaf);
+ }
+ if (err > 0)
+ btrfs_commit_transaction(trans, fs_info->fs_root);
+ else
+ btrfs_end_transaction(trans, fs_info->fs_root);
+ }
+
+out:
+ kfree(scratch_leaf);
+ ulist_free(tmp);
+ btrfs_free_path(path);
+ kfree(qscan);
+
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+
+ if (err == 2 &&
+ fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ } else if (err < 0) {
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ }
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+ if (err >= 0) {
+ pr_info("btrfs: qgroup scan completed%s\n",
+ err == 2 ? " (inconsistency flag cleared)" : "");
+ } else {
+ pr_err("btrfs: qgroup scan failed with %d\n", err);
+ }
+}
+
+static void
+qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
+{
+ memset(&qscan->work, 0, sizeof(qscan->work));
+ qscan->work.func = btrfs_qgroup_rescan_worker;
+ qscan->fs_info = fs_info;
+
+ pr_info("btrfs: qgroup scan started\n");
+ btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
+}
+
+int
+btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
+{
+ int ret = 0;
+ struct rb_node *n;
+ struct btrfs_qgroup *qgroup;
+ struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
+
+ if (!qscan)
+ return -ENOMEM;
+
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ spin_lock(&fs_info->qgroup_lock);
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+ ret = -EINPROGRESS;
+ else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+ ret = -EINVAL;
+ if (ret) {
+ spin_unlock(&fs_info->qgroup_lock);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ kfree(qscan);
+ return ret;
+ }
+
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+ memset(&fs_info->qgroup_rescan_progress, 0,
+ sizeof(fs_info->qgroup_rescan_progress));
+
+ /* clear all current qgroup tracking information */
+ for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
+ qgroup = rb_entry(n, struct btrfs_qgroup, node);
+ qgroup->rfer = 0;
+ qgroup->rfer_cmpr = 0;
+ qgroup->excl = 0;
+ qgroup->excl_cmpr = 0;
+ }
+ spin_unlock(&fs_info->qgroup_lock);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+ qgroup_rescan_start(fs_info, qscan);
+
+ return 0;
+}