summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2020-07-07 05:33:54 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:08:42 +0300
commitf6b94a3baa956ff10a52a545a9ad60f35e88e683 (patch)
treeb973107ffc3a29c6dd0febbb7b8c42faa7bb2e97
parent703e2a43bf30c1d5610fa7d1a823911d96487dac (diff)
downloadlinux-f6b94a3baa956ff10a52a545a9ad60f35e88e683.tar.xz
bcachefs: Refactor stripe creation
Prep work for the patch to update existing stripes with new data blocks. This moves allocating new stripes into ec.c, and also sets up the data structures so that we can handly only allocating some of the blocks in a stripe. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/alloc_foreground.c103
-rw-r--r--fs/bcachefs/alloc_foreground.h5
-rw-r--r--fs/bcachefs/ec.c205
-rw-r--r--fs/bcachefs/ec.h6
4 files changed, 180 insertions, 139 deletions
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 04c1c1b592bc..1675f0dfca8a 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -344,10 +344,9 @@ struct dev_alloc_list bch2_dev_alloc_list(struct bch_fs *c,
struct bch_devs_mask *devs)
{
struct dev_alloc_list ret = { .nr = 0 };
- struct bch_dev *ca;
unsigned i;
- for_each_member_device_rcu(ca, c, i, devs)
+ for_each_set_bit(i, devs->d, BCH_SB_MEMBERS_MAX)
ret.devs[ret.nr++] = i;
bubble_sort(ret.devs, ret.nr, dev_stripe_cmp);
@@ -396,16 +395,16 @@ static void add_new_bucket(struct bch_fs *c,
ob_push(c, ptrs, ob);
}
-static int bch2_bucket_alloc_set(struct bch_fs *c,
- struct open_buckets *ptrs,
- struct dev_stripe_state *stripe,
- struct bch_devs_mask *devs_may_alloc,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache,
- enum alloc_reserve reserve,
- unsigned flags,
- struct closure *cl)
+int bch2_bucket_alloc_set(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct dev_stripe_state *stripe,
+ struct bch_devs_mask *devs_may_alloc,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ enum alloc_reserve reserve,
+ unsigned flags,
+ struct closure *cl)
{
struct dev_alloc_list devs_sorted =
bch2_dev_alloc_list(c, stripe, devs_may_alloc);
@@ -456,74 +455,6 @@ static int bch2_bucket_alloc_set(struct bch_fs *c,
/* Allocate from stripes: */
/*
- * XXX: use a higher watermark for allocating open buckets here:
- */
-static int ec_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
-{
- struct bch_devs_mask devs;
- struct open_bucket *ob;
- unsigned i, nr_have = 0, nr_data =
- min_t(unsigned, h->nr_active_devs,
- EC_STRIPE_MAX) - h->redundancy;
- bool have_cache = true;
- int ret = 0;
-
- BUG_ON(h->blocks.nr > nr_data);
- BUG_ON(h->parity.nr > h->redundancy);
-
- devs = h->devs;
-
- open_bucket_for_each(c, &h->parity, ob, i)
- __clear_bit(ob->ptr.dev, devs.d);
- open_bucket_for_each(c, &h->blocks, ob, i)
- __clear_bit(ob->ptr.dev, devs.d);
-
- percpu_down_read(&c->mark_lock);
- rcu_read_lock();
-
- if (h->parity.nr < h->redundancy) {
- nr_have = h->parity.nr;
-
- ret = bch2_bucket_alloc_set(c, &h->parity,
- &h->parity_stripe,
- &devs,
- h->redundancy,
- &nr_have,
- &have_cache,
- RESERVE_NONE,
- 0,
- NULL);
- if (ret)
- goto err;
- }
-
- if (h->blocks.nr < nr_data) {
- nr_have = h->blocks.nr;
-
- ret = bch2_bucket_alloc_set(c, &h->blocks,
- &h->block_stripe,
- &devs,
- nr_data,
- &nr_have,
- &have_cache,
- RESERVE_NONE,
- 0,
- NULL);
- if (ret)
- goto err;
- }
-
- rcu_read_unlock();
- percpu_up_read(&c->mark_lock);
-
- return bch2_ec_stripe_new_alloc(c, h);
-err:
- rcu_read_unlock();
- percpu_up_read(&c->mark_lock);
- return -1;
-}
-
-/*
* if we can't allocate a new stripe because there are already too many
* partially filled stripes, force allocating from an existing stripe even when
* it's to a device we don't want:
@@ -555,27 +486,23 @@ static void bucket_alloc_from_stripe(struct bch_fs *c,
if (ec_open_bucket(c, ptrs))
return;
- h = bch2_ec_stripe_head_get(c, target, erasure_code, nr_replicas - 1);
+ h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1);
if (!h)
return;
- if (!h->s && ec_stripe_alloc(c, h))
- goto out_put_head;
-
- rcu_read_lock();
devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
- rcu_read_unlock();
for (i = 0; i < devs_sorted.nr; i++)
open_bucket_for_each(c, &h->s->blocks, ob, ec_idx)
if (ob->ptr.dev == devs_sorted.devs[i] &&
- !test_and_set_bit(ec_idx, h->s->blocks_allocated))
+ !test_and_set_bit(h->s->data_block_idx[ec_idx],
+ h->s->blocks_allocated))
goto got_bucket;
goto out_put_head;
got_bucket:
ca = bch_dev_bkey_exists(c, ob->ptr.dev);
- ob->ec_idx = ec_idx;
+ ob->ec_idx = h->s->data_block_idx[ec_idx];
ob->ec = h->s;
add_new_bucket(c, ptrs, devs_may_alloc,
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h
index 687f973e4b3a..17a6869bb8cd 100644
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -92,6 +92,11 @@ static inline void bch2_open_bucket_get(struct bch_fs *c,
}
}
+int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
+ struct dev_stripe_state *, struct bch_devs_mask *,
+ unsigned, unsigned *, bool *, enum alloc_reserve,
+ unsigned, struct closure *);
+
struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
unsigned, unsigned,
struct write_point_specifier,
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index b1084b74778a..8d8683f8b2df 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -200,40 +200,6 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
return false;
}
-static void ec_stripe_key_init(struct bch_fs *c,
- struct bkey_i_stripe *s,
- struct open_buckets *blocks,
- struct open_buckets *parity,
- unsigned stripe_size)
-{
- struct open_bucket *ob;
- unsigned i, u64s;
-
- bkey_stripe_init(&s->k_i);
- s->v.sectors = cpu_to_le16(stripe_size);
- s->v.algorithm = 0;
- s->v.nr_blocks = parity->nr + blocks->nr;
- s->v.nr_redundant = parity->nr;
- s->v.csum_granularity_bits = ilog2(c->sb.encoded_extent_max);
- s->v.csum_type = BCH_CSUM_CRC32C;
- s->v.pad = 0;
-
- open_bucket_for_each(c, blocks, ob, i)
- s->v.ptrs[i] = ob->ptr;
-
- open_bucket_for_each(c, parity, ob, i)
- s->v.ptrs[blocks->nr + i] = ob->ptr;
-
- while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
- BUG_ON(1 << s->v.csum_granularity_bits >=
- le16_to_cpu(s->v.sectors) ||
- s->v.csum_granularity_bits == U8_MAX);
- s->v.csum_granularity_bits++;
- }
-
- set_bkey_val_u64s(&s->k, u64s);
-}
-
/* Checksumming: */
static void ec_generate_checksums(struct ec_stripe_buf *buf)
@@ -866,6 +832,8 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err;
}
+ BUG_ON(!s->allocated);
+
if (!percpu_ref_tryget(&c->writes))
goto err;
@@ -953,6 +921,8 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_new *s = h->s;
+ BUG_ON(!s->allocated && !s->err);
+
h->s = NULL;
s->pending = true;
@@ -1063,14 +1033,38 @@ static unsigned pick_blocksize(struct bch_fs *c,
return best.size;
}
-int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
+static void ec_stripe_key_init(struct bch_fs *c,
+ struct bkey_i_stripe *s,
+ unsigned nr_data,
+ unsigned nr_parity,
+ unsigned stripe_size)
+{
+ unsigned u64s;
+
+ bkey_stripe_init(&s->k_i);
+ s->v.sectors = cpu_to_le16(stripe_size);
+ s->v.algorithm = 0;
+ s->v.nr_blocks = nr_data + nr_parity;
+ s->v.nr_redundant = nr_parity;
+ s->v.csum_granularity_bits = ilog2(c->sb.encoded_extent_max);
+ s->v.csum_type = BCH_CSUM_CRC32C;
+ s->v.pad = 0;
+
+ while ((u64s = stripe_val_u64s(&s->v)) > BKEY_VAL_U64s_MAX) {
+ BUG_ON(1 << s->v.csum_granularity_bits >=
+ le16_to_cpu(s->v.sectors) ||
+ s->v.csum_granularity_bits == U8_MAX);
+ s->v.csum_granularity_bits++;
+ }
+
+ set_bkey_val_u64s(&s->k, u64s);
+}
+
+static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_new *s;
unsigned i;
- BUG_ON(h->parity.nr != h->redundancy);
- BUG_ON(!h->blocks.nr);
- BUG_ON(h->parity.nr + h->blocks.nr > EC_STRIPE_MAX);
lockdep_assert_held(&h->lock);
s = kzalloc(sizeof(*s), GFP_KERNEL);
@@ -1081,11 +1075,9 @@ int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
atomic_set(&s->pin, 1);
s->c = c;
s->h = h;
- s->blocks = h->blocks;
- s->parity = h->parity;
-
- memset(&h->blocks, 0, sizeof(h->blocks));
- memset(&h->parity, 0, sizeof(h->parity));
+ s->nr_data = min_t(unsigned, h->nr_active_devs,
+ EC_STRIPE_MAX) - h->redundancy;
+ s->nr_parity = h->redundancy;
bch2_keylist_init(&s->keys, s->inline_keys);
@@ -1093,9 +1085,8 @@ int bch2_ec_stripe_new_alloc(struct bch_fs *c, struct ec_stripe_head *h)
s->stripe.size = h->blocksize;
memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
- ec_stripe_key_init(c, &s->stripe.key,
- &s->blocks, &s->parity,
- h->blocksize);
+ ec_stripe_key_init(c, &s->stripe.key, s->nr_data,
+ s->nr_parity, h->blocksize);
for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
@@ -1153,6 +1144,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
{
if (h->s &&
+ h->s->allocated &&
bitmap_weight(h->s->blocks_allocated,
h->s->blocks.nr) == h->s->blocks.nr)
ec_stripe_set_pending(c, h);
@@ -1160,7 +1152,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
mutex_unlock(&h->lock);
}
-struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
+struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned target,
unsigned algo,
unsigned redundancy)
@@ -1185,6 +1177,122 @@ found:
return h;
}
+/*
+ * XXX: use a higher watermark for allocating open buckets here:
+ */
+static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
+{
+ struct bch_devs_mask devs;
+ struct open_bucket *ob;
+ unsigned i, nr_have, nr_data =
+ min_t(unsigned, h->nr_active_devs,
+ EC_STRIPE_MAX) - h->redundancy;
+ bool have_cache = true;
+ int ret = 0;
+
+ devs = h->devs;
+
+ for_each_set_bit(i, h->s->blocks_allocated, EC_STRIPE_MAX) {
+ __clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
+ --nr_data;
+ }
+
+ BUG_ON(h->s->blocks.nr > nr_data);
+ BUG_ON(h->s->parity.nr > h->redundancy);
+
+ open_bucket_for_each(c, &h->s->parity, ob, i)
+ __clear_bit(ob->ptr.dev, devs.d);
+ open_bucket_for_each(c, &h->s->blocks, ob, i)
+ __clear_bit(ob->ptr.dev, devs.d);
+
+ percpu_down_read(&c->mark_lock);
+ rcu_read_lock();
+
+ if (h->s->parity.nr < h->redundancy) {
+ nr_have = h->s->parity.nr;
+
+ ret = bch2_bucket_alloc_set(c, &h->s->parity,
+ &h->parity_stripe,
+ &devs,
+ h->redundancy,
+ &nr_have,
+ &have_cache,
+ RESERVE_NONE,
+ 0,
+ NULL);
+ if (ret)
+ goto err;
+ }
+
+ if (h->s->blocks.nr < nr_data) {
+ nr_have = h->s->blocks.nr;
+
+ ret = bch2_bucket_alloc_set(c, &h->s->blocks,
+ &h->block_stripe,
+ &devs,
+ nr_data,
+ &nr_have,
+ &have_cache,
+ RESERVE_NONE,
+ 0,
+ NULL);
+ if (ret)
+ goto err;
+ }
+err:
+ rcu_read_unlock();
+ percpu_up_read(&c->mark_lock);
+ return ret;
+}
+
+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
+ unsigned target,
+ unsigned algo,
+ unsigned redundancy)
+{
+ struct closure cl;
+ struct ec_stripe_head *h;
+ struct open_bucket *ob;
+ unsigned i, data_idx = 0;
+
+ closure_init_stack(&cl);
+
+ h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
+ if (!h)
+ return NULL;
+
+ if (!h->s && ec_new_stripe_alloc(c, h)) {
+ bch2_ec_stripe_head_put(c, h);
+ return NULL;
+ }
+
+ if (!h->s->allocated) {
+ if (new_stripe_alloc_buckets(c, h)) {
+ bch2_ec_stripe_head_put(c, h);
+ h = NULL;
+ goto out;
+ }
+
+ open_bucket_for_each(c, &h->s->blocks, ob, i) {
+ data_idx = find_next_zero_bit(h->s->blocks_allocated,
+ h->s->nr_data, data_idx);
+ BUG_ON(data_idx >= h->s->nr_data);
+
+ h->s->stripe.key.v.ptrs[data_idx] = ob->ptr;
+ h->s->data_block_idx[i] = data_idx;
+ data_idx++;
+ }
+
+ open_bucket_for_each(c, &h->s->parity, ob, i)
+ h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
+
+ h->s->allocated = true;
+ }
+out:
+ closure_sync(&cl);
+ return h;
+}
+
void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
{
struct ec_stripe_head *h;
@@ -1195,9 +1303,6 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
list_for_each_entry(h, &c->ec_stripe_head_list, list) {
mutex_lock(&h->lock);
- bch2_open_buckets_stop_dev(c, ca, &h->blocks);
- bch2_open_buckets_stop_dev(c, ca, &h->parity);
-
if (!h->s)
goto unlock;
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 6f9354f82656..d7396885792e 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -92,11 +92,15 @@ struct ec_stripe_new {
atomic_t pin;
int err;
- bool pending;
+ u8 nr_data;
+ u8 nr_parity;
+ bool allocated;
+ bool pending;
unsigned long blocks_allocated[BITS_TO_LONGS(EC_STRIPE_MAX)];
struct open_buckets blocks;
+ u8 data_block_idx[EC_STRIPE_MAX];
struct open_buckets parity;
struct keylist keys;