summaryrefslogtreecommitdiff
path: root/fs/bcachefs/alloc_foreground.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/alloc_foreground.c')
-rw-r--r--fs/bcachefs/alloc_foreground.c312
1 files changed, 114 insertions, 198 deletions
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 372178c8d416..5a781fb4c794 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -107,14 +107,10 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob)
return;
}
- percpu_down_read(&c->mark_lock);
spin_lock(&ob->lock);
-
ob->valid = false;
ob->data_type = 0;
-
spin_unlock(&ob->lock);
- percpu_up_read(&c->mark_lock);
spin_lock(&c->freelist_lock);
bch2_open_bucket_hash_remove(c, ob);
@@ -156,6 +152,14 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
return ob;
}
+static inline bool is_superblock_bucket(struct bch_fs *c, struct bch_dev *ca, u64 b)
+{
+ if (c->curr_recovery_pass > BCH_RECOVERY_PASS_trans_mark_dev_sbs)
+ return false;
+
+ return bch2_is_superblock_bucket(ca, b);
+}
+
static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
{
BUG_ON(c->open_buckets_partial_nr >=
@@ -175,20 +179,6 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
closure_wake_up(&c->freelist_wait);
}
-/* _only_ for allocating the journal on a new device: */
-long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
-{
- while (ca->new_fs_bucket_idx < ca->mi.nbuckets) {
- u64 b = ca->new_fs_bucket_idx++;
-
- if (!is_superblock_bucket(ca, b) &&
- (!ca->buckets_nouse || !test_bit(b, ca->buckets_nouse)))
- return b;
- }
-
- return -1;
-}
-
static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
{
switch (watermark) {
@@ -206,33 +196,44 @@ static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
}
}
-static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
- u64 bucket,
- enum bch_watermark watermark,
- const struct bch_alloc_v4 *a,
- struct bucket_alloc_state *s,
- struct closure *cl)
+static inline bool may_alloc_bucket(struct bch_fs *c,
+ struct bpos bucket,
+ struct bucket_alloc_state *s)
{
- struct open_bucket *ob;
-
- if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) {
- s->skipped_nouse++;
- return NULL;
- }
-
- if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) {
+ if (bch2_bucket_is_open(c, bucket.inode, bucket.offset)) {
s->skipped_open++;
- return NULL;
+ return false;
}
- if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
- c->journal.flushed_seq_ondisk, ca->dev_idx, bucket)) {
+ u64 journal_seq_ready =
+ bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
+ bucket.inode, bucket.offset);
+ if (journal_seq_ready > c->journal.flushed_seq_ondisk) {
+ if (journal_seq_ready > c->journal.flushing_seq)
+ s->need_journal_commit++;
s->skipped_need_journal_commit++;
- return NULL;
+ return false;
}
- if (bch2_bucket_nocow_is_locked(&c->nocow_locks, POS(ca->dev_idx, bucket))) {
+ if (bch2_bucket_nocow_is_locked(&c->nocow_locks, bucket)) {
s->skipped_nocow++;
+ return false;
+ }
+
+ return true;
+}
+
+static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *ca,
+ u64 bucket, u8 gen,
+ enum bch_watermark watermark,
+ struct bucket_alloc_state *s,
+ struct closure *cl)
+{
+ if (unlikely(is_superblock_bucket(c, ca, bucket)))
+ return NULL;
+
+ if (unlikely(ca->buckets_nouse && test_bit(bucket, ca->buckets_nouse))) {
+ s->skipped_nouse++;
return NULL;
}
@@ -254,14 +255,13 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
return NULL;
}
- ob = bch2_open_bucket_alloc(c);
+ struct open_bucket *ob = bch2_open_bucket_alloc(c);
spin_lock(&ob->lock);
-
ob->valid = true;
ob->sectors_free = ca->mi.bucket_size;
ob->dev = ca->dev_idx;
- ob->gen = a->gen;
+ ob->gen = gen;
ob->bucket = bucket;
spin_unlock(&ob->lock);
@@ -276,111 +276,29 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct bch_dev *
}
static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bch_dev *ca,
- enum bch_watermark watermark, u64 free_entry,
+ enum bch_watermark watermark,
struct bucket_alloc_state *s,
- struct bkey_s_c freespace_k,
+ struct btree_iter *freespace_iter,
struct closure *cl)
{
struct bch_fs *c = trans->c;
- struct btree_iter iter = { NULL };
- struct bkey_s_c k;
- struct open_bucket *ob;
- struct bch_alloc_v4 a_convert;
- const struct bch_alloc_v4 *a;
- u64 b = free_entry & ~(~0ULL << 56);
- unsigned genbits = free_entry >> 56;
- struct printbuf buf = PRINTBUF;
- int ret;
-
- if (b < ca->mi.first_bucket || b >= ca->mi.nbuckets) {
- prt_printf(&buf, "freespace btree has bucket outside allowed range %u-%llu\n"
- " freespace key ",
- ca->mi.first_bucket, ca->mi.nbuckets);
- bch2_bkey_val_to_text(&buf, c, freespace_k);
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- ob = ERR_PTR(-EIO);
- goto err;
- }
+ u64 b = freespace_iter->pos.offset & ~(~0ULL << 56);
- k = bch2_bkey_get_iter(trans, &iter,
- BTREE_ID_alloc, POS(ca->dev_idx, b),
- BTREE_ITER_cached);
- ret = bkey_err(k);
- if (ret) {
- ob = ERR_PTR(ret);
- goto err;
- }
-
- a = bch2_alloc_to_v4(k, &a_convert);
-
- if (a->data_type != BCH_DATA_free) {
- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_alloc_info) {
- ob = NULL;
- goto err;
- }
-
- prt_printf(&buf, "non free bucket in freespace btree\n"
- " freespace key ");
- bch2_bkey_val_to_text(&buf, c, freespace_k);
- prt_printf(&buf, "\n ");
- bch2_bkey_val_to_text(&buf, c, k);
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- ob = ERR_PTR(-EIO);
- goto err;
- }
-
- if (genbits != (alloc_freespace_genbits(*a) >> 56) &&
- c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
- prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
- " freespace key ",
- genbits, alloc_freespace_genbits(*a) >> 56);
- bch2_bkey_val_to_text(&buf, c, freespace_k);
- prt_printf(&buf, "\n ");
- bch2_bkey_val_to_text(&buf, c, k);
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- ob = ERR_PTR(-EIO);
- goto err;
- }
-
- if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_extents_to_backpointers) {
- struct bch_backpointer bp;
- struct bpos bp_pos = POS_MIN;
-
- ret = bch2_get_next_backpointer(trans, ca, POS(ca->dev_idx, b), -1,
- &bp_pos, &bp,
- BTREE_ITER_nopreserve);
- if (ret) {
- ob = ERR_PTR(ret);
- goto err;
- }
+ if (!may_alloc_bucket(c, POS(ca->dev_idx, b), s))
+ return NULL;
- if (!bkey_eq(bp_pos, POS_MAX)) {
- /*
- * Bucket may have data in it - we don't call
- * bc2h_trans_inconnsistent() because fsck hasn't
- * finished yet
- */
- ob = NULL;
- goto err;
- }
- }
+ u8 gen;
+ int ret = bch2_check_discard_freespace_key(trans, freespace_iter, &gen, true);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret)
+ return NULL;
- ob = __try_alloc_bucket(c, ca, b, watermark, a, s, cl);
- if (!ob)
- bch2_set_btree_iter_dontneed(&iter);
-err:
- if (iter.path)
- bch2_set_btree_iter_dontneed(&iter);
- bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf);
- return ob;
+ return __try_alloc_bucket(c, ca, b, gen, watermark, s, cl);
}
/*
* This path is for before the freespace btree is initialized:
- *
- * If ca->new_fs_bucket_idx is nonzero, we haven't yet marked superblock &
- * journal buckets - journal buckets will be < ca->new_fs_bucket_idx
*/
static noinline struct open_bucket *
bch2_bucket_alloc_early(struct btree_trans *trans,
@@ -389,10 +307,11 @@ bch2_bucket_alloc_early(struct btree_trans *trans,
struct bucket_alloc_state *s,
struct closure *cl)
{
+ struct bch_fs *c = trans->c;
struct btree_iter iter, citer;
struct bkey_s_c k, ck;
struct open_bucket *ob = NULL;
- u64 first_bucket = max_t(u64, ca->mi.first_bucket, ca->new_fs_bucket_idx);
+ u64 first_bucket = ca->mi.first_bucket;
u64 *dev_alloc_cursor = &ca->alloc_cursor[s->btree_bitmap];
u64 alloc_start = max(first_bucket, *dev_alloc_cursor);
u64 alloc_cursor = alloc_start;
@@ -415,10 +334,6 @@ again:
if (bkey_ge(k.k->p, POS(ca->dev_idx, ca->mi.nbuckets)))
break;
- if (ca->new_fs_bucket_idx &&
- is_superblock_bucket(ca, k.k->p.offset))
- continue;
-
if (s->btree_bitmap != BTREE_BITMAP_ANY &&
s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
bucket_to_sector(ca, bucket), ca->mi.bucket_size)) {
@@ -452,7 +367,10 @@ again:
s->buckets_seen++;
- ob = __try_alloc_bucket(trans->c, ca, k.k->p.offset, watermark, a, s, cl);
+ ob = may_alloc_bucket(c, k.k->p, s)
+ ? __try_alloc_bucket(c, ca, k.k->p.offset, a->gen,
+ watermark, s, cl)
+ : NULL;
next:
bch2_set_btree_iter_dontneed(&citer);
bch2_trans_iter_exit(trans, &citer);
@@ -489,20 +407,21 @@ static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
u64 alloc_start = max_t(u64, ca->mi.first_bucket, READ_ONCE(*dev_alloc_cursor));
u64 alloc_cursor = alloc_start;
int ret;
-
- BUG_ON(ca->new_fs_bucket_idx);
again:
- for_each_btree_key_norestart(trans, iter, BTREE_ID_freespace,
- POS(ca->dev_idx, alloc_cursor), 0, k, ret) {
- if (k.k->p.inode != ca->dev_idx)
- break;
+ for_each_btree_key_max_norestart(trans, iter, BTREE_ID_freespace,
+ POS(ca->dev_idx, alloc_cursor),
+ POS(ca->dev_idx, U64_MAX),
+ 0, k, ret) {
+ /*
+ * peek normally dosen't trim extents - they can span iter.pos,
+ * which is not what we want here:
+ */
+ iter.k.size = iter.k.p.offset - iter.pos.offset;
- for (alloc_cursor = max(alloc_cursor, bkey_start_offset(k.k));
- alloc_cursor < k.k->p.offset;
- alloc_cursor++) {
+ while (iter.k.size) {
s->buckets_seen++;
- u64 bucket = alloc_cursor & ~(~0ULL << 56);
+ u64 bucket = iter.pos.offset & ~(~0ULL << 56);
if (s->btree_bitmap != BTREE_BITMAP_ANY &&
s->btree_bitmap != bch2_dev_btree_bitmap_marked_sectors(ca,
bucket_to_sector(ca, bucket), ca->mi.bucket_size)) {
@@ -511,32 +430,36 @@ again:
goto fail;
bucket = sector_to_bucket(ca,
- round_up(bucket_to_sector(ca, bucket) + 1,
+ round_up(bucket_to_sector(ca, bucket + 1),
1ULL << ca->mi.btree_bitmap_shift));
- u64 genbits = alloc_cursor >> 56;
- alloc_cursor = bucket | (genbits << 56);
+ alloc_cursor = bucket|(iter.pos.offset & (~0ULL << 56));
- if (alloc_cursor > k.k->p.offset)
- bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor));
+ bch2_btree_iter_set_pos(&iter, POS(ca->dev_idx, alloc_cursor));
s->skipped_mi_btree_bitmap++;
- continue;
+ goto next;
}
- ob = try_alloc_bucket(trans, ca, watermark,
- alloc_cursor, s, k, cl);
+ ob = try_alloc_bucket(trans, ca, watermark, s, &iter, cl);
if (ob) {
+ if (!IS_ERR(ob))
+ *dev_alloc_cursor = iter.pos.offset;
bch2_set_btree_iter_dontneed(&iter);
break;
}
- }
+ iter.k.size--;
+ iter.pos.offset++;
+ }
+next:
if (ob || ret)
break;
}
fail:
bch2_trans_iter_exit(trans, &iter);
- if (!ob && ret)
+ BUG_ON(ob && ret);
+
+ if (ret)
ob = ERR_PTR(ret);
if (!ob && alloc_start > ca->mi.first_bucket) {
@@ -544,8 +467,6 @@ fail:
goto again;
}
- *dev_alloc_cursor = alloc_cursor;
-
return ob;
}
@@ -595,6 +516,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct bch_dev *ca,
* @watermark: how important is this allocation?
* @data_type: BCH_DATA_journal, btree, user...
* @cl: if not NULL, closure to be used to wait if buckets not available
+ * @nowait: if true, do not wait for buckets to become available
* @usage: for secondarily also returning the current device usage
*
* Returns: an open_bucket on success, or an ERR_PTR() on failure.
@@ -629,6 +551,10 @@ again:
bch2_dev_do_invalidates(ca);
if (!avail) {
+ if (watermark > BCH_WATERMARK_normal &&
+ c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations)
+ goto alloc;
+
if (cl && !waiting) {
closure_wait(&c->freelist_wait, cl);
waiting = true;
@@ -648,7 +574,7 @@ alloc:
? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl)
: bch2_bucket_alloc_early(trans, ca, watermark, &s, cl);
- if (s.skipped_need_journal_commit * 2 > avail)
+ if (s.need_journal_commit * 2 > avail)
bch2_journal_flush_async(&c->journal, NULL);
if (!ob && s.btree_bitmap != BTREE_BITMAP_ANY) {
@@ -711,9 +637,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
unsigned i;
for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
- ret.devs[ret.nr++] = i;
+ ret.data[ret.nr++] = i;
- bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
+ bubble_sort(ret.data, ret.nr, dev_stripe_cmp);
return ret;
}
@@ -785,18 +711,13 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
struct closure *cl)
{
struct bch_fs *c = trans->c;
- struct dev_alloc_list devs_sorted =
- bch2_dev_alloc_list(c, stripe, devs_may_alloc);
int ret = -BCH_ERR_insufficient_devices;
BUG_ON(*nr_effective >= nr_replicas);
- for (unsigned i = 0; i < devs_sorted.nr; i++) {
- struct bch_dev_usage usage;
- struct open_bucket *ob;
-
- unsigned dev = devs_sorted.devs[i];
- struct bch_dev *ca = bch2_dev_tryget_noerror(c, dev);
+ struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, stripe, devs_may_alloc);
+ darray_for_each(devs_sorted, i) {
+ struct bch_dev *ca = bch2_dev_tryget_noerror(c, *i);
if (!ca)
continue;
@@ -805,8 +726,9 @@ int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
continue;
}
- ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type,
- cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage);
+ struct bch_dev_usage usage;
+ struct open_bucket *ob = bch2_bucket_alloc_trans(trans, ca, watermark, data_type,
+ cl, flags & BCH_WRITE_ALLOC_NOWAIT, &usage);
if (!IS_ERR(ob))
bch2_dev_stripe_increment_inlined(ca, stripe, &usage);
bch2_dev_put(ca);
@@ -850,10 +772,6 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
struct closure *cl)
{
struct bch_fs *c = trans->c;
- struct dev_alloc_list devs_sorted;
- struct ec_stripe_head *h;
- struct open_bucket *ob;
- unsigned i, ec_idx;
int ret = 0;
if (nr_replicas < 2)
@@ -862,34 +780,32 @@ static int bucket_alloc_from_stripe(struct btree_trans *trans,
if (ec_open_bucket(c, ptrs))
return 0;
- h = bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl);
+ struct ec_stripe_head *h =
+ bch2_ec_stripe_head_get(trans, target, 0, nr_replicas - 1, watermark, cl);
if (IS_ERR(h))
return PTR_ERR(h);
if (!h)
return 0;
- devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
-
- for (i = 0; i < devs_sorted.nr; i++)
- for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
+ struct dev_alloc_list devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
+ darray_for_each(devs_sorted, i)
+ for (unsigned ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
if (!h->s->blocks[ec_idx])
continue;
- ob = c->open_buckets + h->s->blocks[ec_idx];
- if (ob->dev == devs_sorted.devs[i] &&
- !test_and_set_bit(ec_idx, h->s->blocks_allocated))
- goto got_bucket;
+ struct open_bucket *ob = c->open_buckets + h->s->blocks[ec_idx];
+ if (ob->dev == *i && !test_and_set_bit(ec_idx, h->s->blocks_allocated)) {
+ ob->ec_idx = ec_idx;
+ ob->ec = h->s;
+ ec_stripe_new_get(h->s, STRIPE_REF_io);
+
+ ret = add_new_bucket(c, ptrs, devs_may_alloc,
+ nr_replicas, nr_effective,
+ have_cache, ob);
+ goto out;
+ }
}
- goto out_put_head;
-got_bucket:
- ob->ec_idx = ec_idx;
- ob->ec = h->s;
- ec_stripe_new_get(h->s, STRIPE_REF_io);
-
- ret = add_new_bucket(c, ptrs, devs_may_alloc,
- nr_replicas, nr_effective,
- have_cache, ob);
-out_put_head:
+out:
bch2_ec_stripe_head_put(c, h);
return ret;
}