summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/bcachefs.h5
-rw-r--r--fs/bcachefs/btree_update_interior.c13
-rw-r--r--fs/bcachefs/btree_update_leaf.c44
-rw-r--r--fs/bcachefs/buckets.c102
-rw-r--r--fs/bcachefs/buckets.h14
-rw-r--r--fs/bcachefs/extents.c5
-rw-r--r--fs/bcachefs/replicas.c48
-rw-r--r--fs/bcachefs/replicas.h2
-rw-r--r--fs/bcachefs/super.c4
9 files changed, 166 insertions, 71 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 0b495dd32f67..27ffecb912a3 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -635,7 +635,10 @@ struct bch_fs {
struct percpu_rw_semaphore mark_lock;
struct bch_fs_usage __percpu *usage[2];
- struct bch_fs_usage __percpu *usage_scratch;
+
+ /* single element mempool: */
+ struct mutex usage_scratch_lock;
+ struct bch_fs_usage *usage_scratch;
/*
* When we invalidate buckets, we use both the priority and the amount
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 7ccf2f935701..31c1474cd494 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1076,8 +1076,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read(&c->mark_lock);
- preempt_disable();
- fs_usage = bch2_fs_usage_get_scratch(c);
+ fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key),
true, 0,
@@ -1090,7 +1089,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
fs_usage);
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
- preempt_enable();
+ bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
}
@@ -1171,8 +1170,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read(&c->mark_lock);
- preempt_disable();
- fs_usage = bch2_fs_usage_get_scratch(c);
+ fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
true, 0,
@@ -1193,7 +1191,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
- preempt_enable();
+ bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
@@ -1987,7 +1985,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
mutex_lock(&c->btree_interior_update_lock);
percpu_down_read(&c->mark_lock);
- fs_usage = bch2_fs_usage_get_scratch(c);
+ fs_usage = bch2_fs_usage_scratch_get(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i),
true, 0,
@@ -1998,6 +1996,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
fs_usage);
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res);
+ bch2_fs_usage_scratch_put(c, fs_usage);
percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock);
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 42fdb6c2963a..5349790547f4 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -269,8 +269,6 @@ static void btree_insert_key_leaf(struct btree_trans *trans,
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
- bch2_mark_update(trans, insert);
-
if (!btree_node_is_extents(b))
bch2_insert_fixup_key(trans, insert);
else
@@ -499,11 +497,6 @@ btree_key_can_insert(struct btree_trans *trans,
if (unlikely(btree_node_fake(b)))
return BTREE_INSERT_BTREE_NODE_FULL;
- if (!bch2_bkey_replicas_marked(c,
- bkey_i_to_s_c(insert->k),
- true))
- return BTREE_INSERT_NEED_MARK_REPLICAS;
-
ret = !btree_node_is_extents(b)
? BTREE_INSERT_OK
: bch2_extent_can_insert(trans, insert, u64s);
@@ -555,6 +548,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
struct btree_insert_entry **stopped_at)
{
struct bch_fs *c = trans->c;
+ struct bch_fs_usage *fs_usage = NULL;
struct btree_insert_entry *i;
struct btree_iter *linked;
int ret;
@@ -562,12 +556,29 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
trans_for_each_update_iter(trans, i)
BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
+ trans_for_each_update_iter(trans, i) {
+ if (i->deferred ||
+ !btree_node_type_needs_gc(i->iter->btree_id))
+ continue;
+
+ if (!fs_usage) {
+ percpu_down_read(&c->mark_lock);
+ fs_usage = bch2_fs_usage_scratch_get(c);
+ }
+
+ if (!bch2_bkey_replicas_marked_locked(c,
+ bkey_i_to_s_c(i->k), true)) {
+ ret = BTREE_INSERT_NEED_MARK_REPLICAS;
+ goto out;
+ }
+ }
+
btree_trans_lock_write(c, trans);
if (race_fault()) {
ret = -EINTR;
trans_restart(" (race)");
- goto out;
+ goto out_unlock;
}
/*
@@ -577,7 +588,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
*/
ret = btree_trans_check_can_insert(trans, stopped_at);
if (ret)
- goto out;
+ goto out_unlock;
/*
* Don't get journal reservation until after we know insert will
@@ -585,7 +596,7 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
*/
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK);
if (ret)
- goto out;
+ goto out_unlock;
if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
if (journal_seq_verify(c))
@@ -610,14 +621,25 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
}
}
+ trans_for_each_update_iter(trans, i)
+ bch2_mark_update(trans, i, fs_usage);
+ if (fs_usage)
+ bch2_trans_fs_usage_apply(trans, fs_usage);
+
trans_for_each_update(trans, i)
do_btree_insert_one(trans, i);
-out:
+out_unlock:
BUG_ON(ret &&
(trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
trans->journal_res.ref);
btree_trans_unlock_write(trans);
+out:
+ if (fs_usage) {
+ bch2_fs_usage_scratch_put(c, fs_usage);
+ percpu_up_read(&c->mark_lock);
+ }
+
bch2_journal_res_put(&c->journal, &trans->journal_res);
return ret;
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 3744d55b8495..2fbcd85d9e75 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -144,6 +144,37 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
percpu_up_write(&c->mark_lock);
}
+void bch2_fs_usage_scratch_put(struct bch_fs *c, struct bch_fs_usage *fs_usage)
+{
+ if (fs_usage == c->usage_scratch)
+ mutex_unlock(&c->usage_scratch_lock);
+ else
+ kfree(fs_usage);
+}
+
+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *c)
+{
+ struct bch_fs_usage *ret;
+ unsigned bytes = fs_usage_u64s(c) * sizeof(u64);
+
+ ret = kzalloc(bytes, GFP_NOWAIT);
+ if (ret)
+ return ret;
+
+ if (mutex_trylock(&c->usage_scratch_lock))
+ goto out_pool;
+
+ ret = kzalloc(bytes, GFP_NOFS);
+ if (ret)
+ return ret;
+
+ mutex_lock(&c->usage_scratch_lock);
+out_pool:
+ ret = c->usage_scratch;
+ memset(ret, 0, bytes);
+ return ret;
+}
+
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
{
struct bch_dev_usage ret;
@@ -906,31 +937,39 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
unsigned journal_seq, unsigned flags,
bool gc)
{
+ int ret = 0;
+
+ preempt_disable();
+
if (!fs_usage || gc)
fs_usage = this_cpu_ptr(c->usage[gc]);
switch (k.k->type) {
case KEY_TYPE_alloc:
- return bch2_mark_alloc(c, k, inserting,
+ ret = bch2_mark_alloc(c, k, inserting,
fs_usage, journal_seq, flags, gc);
+ break;
case KEY_TYPE_btree_ptr:
- return bch2_mark_extent(c, k, inserting
+ ret = bch2_mark_extent(c, k, inserting
? c->opts.btree_node_size
: -c->opts.btree_node_size,
BCH_DATA_BTREE,
fs_usage, journal_seq, flags, gc);
+ break;
case KEY_TYPE_extent:
- return bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
+ ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
fs_usage, journal_seq, flags, gc);
+ break;
case KEY_TYPE_stripe:
- return bch2_mark_stripe(c, k, inserting,
+ ret = bch2_mark_stripe(c, k, inserting,
fs_usage, journal_seq, flags, gc);
+ break;
case KEY_TYPE_inode:
if (inserting)
fs_usage->nr_inodes++;
else
fs_usage->nr_inodes--;
- return 0;
+ break;
case KEY_TYPE_reservation: {
unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@@ -940,11 +979,13 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
fs_usage->reserved += sectors;
fs_usage->persistent_reserved[replicas - 1] += sectors;
- return 0;
+ break;
}
- default:
- return 0;
}
+
+ preempt_enable();
+
+ return ret;
}
int bch2_mark_key_locked(struct bch_fs *c,
@@ -976,25 +1017,19 @@ int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
}
void bch2_mark_update(struct btree_trans *trans,
- struct btree_insert_entry *insert)
+ struct btree_insert_entry *insert,
+ struct bch_fs_usage *fs_usage)
{
struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
struct btree_node_iter node_iter = iter->l[0].iter;
- struct bch_fs_usage *fs_usage;
struct gc_pos pos = gc_pos_btree_node(b);
struct bkey_packed *_k;
- u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
- static int warned_disk_usage = 0;
if (!btree_node_type_needs_gc(iter->btree_id))
return;
- percpu_down_read(&c->mark_lock);
- preempt_disable();
- fs_usage = bch2_fs_usage_get_scratch(c);
-
if (!(trans->flags & BTREE_INSERT_NOMARK))
bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
bpos_min(insert->k->k.p, b->key.k.p).offset -
@@ -1047,16 +1082,32 @@ void bch2_mark_update(struct btree_trans *trans,
bch2_btree_node_iter_advance(&node_iter, b);
}
+}
- if (bch2_fs_usage_apply(c, fs_usage, trans->disk_res) &&
- !warned_disk_usage &&
- !xchg(&warned_disk_usage, 1)) {
- char buf[200];
+void bch2_trans_fs_usage_apply(struct btree_trans *trans,
+ struct bch_fs_usage *fs_usage)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_insert_entry *i;
+ static int warned_disk_usage = 0;
+ u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0;
+ char buf[200];
+
+ if (!bch2_fs_usage_apply(c, fs_usage, trans->disk_res) ||
+ warned_disk_usage ||
+ xchg(&warned_disk_usage, 1))
+ return;
- pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
+ pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
+
+ trans_for_each_update_iter(trans, i) {
+ struct btree_iter *iter = i->iter;
+ struct btree *b = iter->l[0].b;
+ struct btree_node_iter node_iter = iter->l[0].iter;
+ struct bkey_packed *_k;
pr_err("while inserting");
- bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(insert->k));
+ bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
pr_err("%s", buf);
pr_err("overlapping with");
@@ -1069,8 +1120,8 @@ void bch2_mark_update(struct btree_trans *trans,
k = bkey_disassemble(b, _k, &unpacked);
if (btree_node_is_extents(b)
- ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
- : bkey_cmp(insert->k->k.p, k.k->p))
+ ? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
+ : bkey_cmp(i->k->k.p, k.k->p))
break;
bch2_bkey_val_to_text(&PBUF(buf), c, k);
@@ -1079,9 +1130,6 @@ void bch2_mark_update(struct btree_trans *trans,
bch2_btree_node_iter_advance(&node_iter, b);
}
}
-
- preempt_enable();
- percpu_up_read(&c->mark_lock);
}
/* Disk reservations: */
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index fc2c212392b6..e34c9d24dc38 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -219,13 +219,8 @@ static inline unsigned fs_usage_u64s(struct bch_fs *c)
READ_ONCE(c->replicas.nr);
}
-static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
-{
- struct bch_fs_usage *ret = this_cpu_ptr(c->usage_scratch);
-
- memset(ret, 0, fs_usage_u64s(c) * sizeof(u64));
- return ret;
-}
+void bch2_fs_usage_scratch_put(struct bch_fs *, struct bch_fs_usage *);
+struct bch_fs_usage *bch2_fs_usage_scratch_get(struct bch_fs *);
struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
@@ -256,10 +251,13 @@ int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c,
int bch2_mark_key(struct bch_fs *, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
-void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *);
int bch2_fs_usage_apply(struct bch_fs *, struct bch_fs_usage *,
struct disk_reservation *);
+void bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
+ struct bch_fs_usage *);
+void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage *);
+
/* disk reservations: */
void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 80531017b237..194b8d6da1bb 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -1190,11 +1190,12 @@ void bch2_insert_fixup_extent(struct btree_trans *trans,
if (s.deleting)
tmp.k.k.type = KEY_TYPE_discard;
-
+#if 0
+ /* disabled due to lock recursion - mark_lock: */
if (debug_check_bkeys(c))
bch2_bkey_debugcheck(c, iter->l[0].b,
bkey_i_to_s_c(&tmp.k));
-
+#endif
EBUG_ON(bkey_deleted(&tmp.k.k) || !tmp.k.k.size);
extent_bset_insert(c, iter, &tmp.k);
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 72592df9afc0..b66217989b71 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -207,22 +207,29 @@ static bool __replicas_has_entry(struct bch_replicas_cpu *r,
return __replicas_entry_idx(r, search) >= 0;
}
-bool bch2_replicas_marked(struct bch_fs *c,
+static bool bch2_replicas_marked_locked(struct bch_fs *c,
struct bch_replicas_entry *search,
bool check_gc_replicas)
{
- bool marked;
-
if (!search->nr_devs)
return true;
verify_replicas_entry_sorted(search);
- percpu_down_read(&c->mark_lock);
- marked = __replicas_has_entry(&c->replicas, search) &&
+ return __replicas_has_entry(&c->replicas, search) &&
(!check_gc_replicas ||
likely((!c->replicas_gc.entries)) ||
__replicas_has_entry(&c->replicas_gc, search));
+}
+
+bool bch2_replicas_marked(struct bch_fs *c,
+ struct bch_replicas_entry *search,
+ bool check_gc_replicas)
+{
+ bool marked;
+
+ percpu_down_read(&c->mark_lock);
+ marked = bch2_replicas_marked_locked(c, search, check_gc_replicas);
percpu_up_read(&c->mark_lock);
return marked;
@@ -263,7 +270,7 @@ static int replicas_table_update(struct bch_fs *c,
struct bch_replicas_cpu *new_r)
{
struct bch_fs_usage __percpu *new_usage[2] = { NULL, NULL };
- struct bch_fs_usage __percpu *new_scratch = NULL;
+ struct bch_fs_usage *new_scratch = NULL;
unsigned bytes = sizeof(struct bch_fs_usage) +
sizeof(u64) * new_r->nr;
int ret = -ENOMEM;
@@ -273,8 +280,7 @@ static int replicas_table_update(struct bch_fs *c,
(c->usage[1] &&
!(new_usage[1] = __alloc_percpu_gfp(bytes, sizeof(u64),
GFP_NOIO))) ||
- !(new_scratch = __alloc_percpu_gfp(bytes, sizeof(u64),
- GFP_NOIO)))
+ !(new_scratch = kmalloc(bytes, GFP_NOIO)))
goto err;
if (c->usage[0])
@@ -290,7 +296,7 @@ static int replicas_table_update(struct bch_fs *c,
swap(c->replicas, *new_r);
ret = 0;
err:
- free_percpu(new_scratch);
+ kfree(new_scratch);
free_percpu(new_usage[1]);
free_percpu(new_usage[0]);
return ret;
@@ -390,9 +396,9 @@ int bch2_mark_replicas(struct bch_fs *c,
: bch2_mark_replicas_slowpath(c, r);
}
-bool bch2_bkey_replicas_marked(struct bch_fs *c,
- struct bkey_s_c k,
- bool check_gc_replicas)
+bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
+ struct bkey_s_c k,
+ bool check_gc_replicas)
{
struct bch_replicas_padded search;
struct bch_devs_list cached = bch2_bkey_cached_devs(k);
@@ -401,13 +407,27 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c,
for (i = 0; i < cached.nr; i++) {
bch2_replicas_entry_cached(&search.e, cached.devs[i]);
- if (!bch2_replicas_marked(c, &search.e, check_gc_replicas))
+ if (!bch2_replicas_marked_locked(c, &search.e,
+ check_gc_replicas))
return false;
}
bkey_to_replicas(&search.e, k);
- return bch2_replicas_marked(c, &search.e, check_gc_replicas);
+ return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
+}
+
+bool bch2_bkey_replicas_marked(struct bch_fs *c,
+ struct bkey_s_c k,
+ bool check_gc_replicas)
+{
+ bool marked;
+
+ percpu_down_read(&c->mark_lock);
+ marked = bch2_bkey_replicas_marked_locked(c, k, check_gc_replicas);
+ percpu_up_read(&c->mark_lock);
+
+ return marked;
}
int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
index d1457c786bb5..0777e7056d55 100644
--- a/fs/bcachefs/replicas.h
+++ b/fs/bcachefs/replicas.h
@@ -26,6 +26,8 @@ bool bch2_replicas_marked(struct bch_fs *,
int bch2_mark_replicas(struct bch_fs *,
struct bch_replicas_entry *);
+bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
+ struct bkey_s_c, bool);
bool bch2_bkey_replicas_marked(struct bch_fs *,
struct bkey_s_c, bool);
int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 4f627e91f041..b1eb70556f75 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -404,7 +404,7 @@ static void bch2_fs_free(struct bch_fs *c)
bch2_io_clock_exit(&c->io_clock[READ]);
bch2_fs_compress_exit(c);
percpu_free_rwsem(&c->mark_lock);
- free_percpu(c->usage_scratch);
+ kfree(c->usage_scratch);
free_percpu(c->usage[0]);
free_percpu(c->pcpu);
mempool_exit(&c->btree_iters_pool);
@@ -572,6 +572,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->btree_reserve_cache_lock);
mutex_init(&c->btree_interior_update_lock);
+ mutex_init(&c->usage_scratch_lock);
+
mutex_init(&c->bio_bounce_pages_lock);
bio_list_init(&c->btree_write_error_list);