summaryrefslogtreecommitdiff
path: root/fs/bcachefs/ec.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/ec.c')
-rw-r--r--fs/bcachefs/ec.c787
1 files changed, 319 insertions, 468 deletions
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 6094afb0c6be..c6cb26981923 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -20,12 +20,14 @@
#include "io_read.h"
#include "io_write.h"
#include "keylist.h"
+#include "lru.h"
#include "recovery.h"
#include "replicas.h"
#include "super-io.h"
#include "util.h"
#include <linux/sort.h>
+#include <linux/string_choices.h>
#ifdef __KERNEL__
@@ -103,13 +105,15 @@ struct ec_bio {
struct bch_dev *ca;
struct ec_stripe_buf *buf;
size_t idx;
+ int rw;
+ u64 submit_time;
struct bio bio;
};
/* Stripes btree keys: */
int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k,
- enum bch_validate_flags flags)
+ struct bkey_validate_context from)
{
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
int ret = 0;
@@ -129,7 +133,7 @@ int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k,
"invalid csum granularity (%u >= 64)",
s->csum_granularity_bits);
- ret = bch2_bkey_ptrs_validate(c, k, flags);
+ ret = bch2_bkey_ptrs_validate(c, k, from);
fsck_err:
return ret;
}
@@ -297,20 +301,31 @@ static int mark_stripe_bucket(struct btree_trans *trans,
struct bpos bucket = PTR_BUCKET_POS(ca, ptr);
if (flags & BTREE_TRIGGER_transactional) {
+ struct extent_ptr_decoded p = {
+ .ptr = *ptr,
+ .crc = bch2_extent_crc_unpack(s.k, NULL),
+ };
+ struct bkey_i_backpointer bp;
+ bch2_extent_ptr_to_bp(c, BTREE_ID_stripes, 0, s.s_c, p,
+ (const union bch_extent_entry *) ptr, &bp);
+
struct bkey_i_alloc_v4 *a =
bch2_trans_start_alloc_update(trans, bucket, 0);
- ret = PTR_ERR_OR_ZERO(a) ?:
- __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags);
+ ret = PTR_ERR_OR_ZERO(a) ?:
+ __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags) ?:
+ bch2_bucket_backpointer_mod(trans, s.s_c, &bp,
+ !(flags & BTREE_TRIGGER_overwrite));
+ if (ret)
+ goto err;
}
if (flags & BTREE_TRIGGER_gc) {
- percpu_down_read(&c->mark_lock);
struct bucket *g = gc_bucket(ca, bucket.offset);
- if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s",
+ if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n%s",
ptr->dev,
(bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) {
ret = -BCH_ERR_mark_stripe;
- goto err_unlock;
+ goto err;
}
bucket_lock(g);
@@ -318,8 +333,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags);
alloc_to_bucket(g, new);
bucket_unlock(g);
-err_unlock:
- percpu_up_read(&c->mark_lock);
+
if (!ret)
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags);
}
@@ -367,19 +381,6 @@ static int mark_stripe_buckets(struct btree_trans *trans,
return 0;
}
-static inline void stripe_to_mem(struct stripe *m, const struct bch_stripe *s)
-{
- m->sectors = le16_to_cpu(s->sectors);
- m->algorithm = s->algorithm;
- m->nr_blocks = s->nr_blocks;
- m->nr_redundant = s->nr_redundant;
- m->disk_label = s->disk_label;
- m->blocks_nonempty = 0;
-
- for (unsigned i = 0; i < s->nr_blocks; i++)
- m->blocks_nonempty += !!stripe_blockcount_get(s, i);
-}
-
int bch2_trigger_stripe(struct btree_trans *trans,
enum btree_id btree, unsigned level,
struct bkey_s_c old, struct bkey_s _new,
@@ -400,6 +401,15 @@ int bch2_trigger_stripe(struct btree_trans *trans,
(new_s->nr_blocks != old_s->nr_blocks ||
new_s->nr_redundant != old_s->nr_redundant));
+ if (flags & BTREE_TRIGGER_transactional) {
+ int ret = bch2_lru_change(trans,
+ BCH_LRU_STRIPE_FRAGMENTATION,
+ idx,
+ stripe_lru_pos(old_s),
+ stripe_lru_pos(new_s));
+ if (ret)
+ return ret;
+ }
if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) {
/*
@@ -444,24 +454,25 @@ int bch2_trigger_stripe(struct btree_trans *trans,
if (new_s) {
s64 sectors = (u64) le16_to_cpu(new_s->sectors) * new_s->nr_redundant;
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- };
+ struct disk_accounting_pos acc;
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, new);
int ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, gc);
if (ret)
return ret;
if (gc)
- memcpy(&gc->r.e, &acc.replicas, replicas_entry_bytes(&acc.replicas));
+ unsafe_memcpy(&gc->r.e, &acc.replicas,
+ replicas_entry_bytes(&acc.replicas), "VLA");
}
if (old_s) {
s64 sectors = -((s64) le16_to_cpu(old_s->sectors)) * old_s->nr_redundant;
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- };
+ struct disk_accounting_pos acc;
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, old);
int ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, gc);
if (ret)
@@ -473,38 +484,6 @@ int bch2_trigger_stripe(struct btree_trans *trans,
return ret;
}
- if (flags & BTREE_TRIGGER_atomic) {
- struct stripe *m = genradix_ptr(&c->stripes, idx);
-
- if (!m) {
- struct printbuf buf1 = PRINTBUF;
- struct printbuf buf2 = PRINTBUF;
-
- bch2_bkey_val_to_text(&buf1, c, old);
- bch2_bkey_val_to_text(&buf2, c, new);
- bch_err_ratelimited(c, "error marking nonexistent stripe %llu while marking\n"
- "old %s\n"
- "new %s", idx, buf1.buf, buf2.buf);
- printbuf_exit(&buf2);
- printbuf_exit(&buf1);
- bch2_inconsistent_error(c);
- return -1;
- }
-
- if (!new_s) {
- bch2_stripes_heap_del(c, m, idx);
-
- memset(m, 0, sizeof(*m));
- } else {
- stripe_to_mem(m, new_s);
-
- if (!old_s)
- bch2_stripes_heap_insert(c, m, idx);
- else
- bch2_stripes_heap_update(c, m, idx);
- }
- }
-
return 0;
}
@@ -528,20 +507,14 @@ static const struct bch_extent_ptr *bkey_matches_stripe(struct bch_stripe *s,
static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
{
- switch (k.k->type) {
- case KEY_TYPE_extent: {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const union bch_extent_entry *entry;
-
- extent_for_each_entry(e, entry)
- if (extent_entry_type(entry) ==
- BCH_EXTENT_ENTRY_stripe_ptr &&
- entry->stripe_ptr.idx == idx)
- return true;
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
- break;
- }
- }
+ bkey_extent_entry_for_each(ptrs, entry)
+ if (extent_entry_type(entry) ==
+ BCH_EXTENT_ENTRY_stripe_ptr &&
+ entry->stripe_ptr.idx == idx)
+ return true;
return false;
}
@@ -726,15 +699,17 @@ static void ec_block_endio(struct bio *bio)
struct bch_extent_ptr *ptr = &v->ptrs[ec_bio->idx];
struct bch_dev *ca = ec_bio->ca;
struct closure *cl = bio->bi_private;
+ int rw = ec_bio->rw;
+
+ bch2_account_io_completion(ca, bio_data_dir(bio),
+ ec_bio->submit_time, !bio->bi_status);
- if (bch2_dev_io_err_on(bio->bi_status, ca,
- bio_data_dir(bio)
- ? BCH_MEMBER_ERROR_write
- : BCH_MEMBER_ERROR_read,
- "erasure coding %s error: %s",
- bio_data_dir(bio) ? "write" : "read",
- bch2_blk_status_to_str(bio->bi_status)))
+ if (bio->bi_status) {
+ bch_err_dev_ratelimited(ca, "erasure coding %s error: %s",
+ str_write_read(bio_data_dir(bio)),
+ bch2_blk_status_to_str(bio->bi_status));
clear_bit(ec_bio->idx, ec_bio->buf->valid);
+ }
int stale = dev_ptr_stale(ca, ptr);
if (stale) {
@@ -746,7 +721,7 @@ static void ec_block_endio(struct bio *bio)
}
bio_put(&ec_bio->bio);
- percpu_ref_put(&ca->io_ref);
+ percpu_ref_put(&ca->io_ref[rw]);
closure_put(cl);
}
@@ -797,6 +772,8 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
ec_bio->ca = ca;
ec_bio->buf = buf;
ec_bio->idx = idx;
+ ec_bio->rw = rw;
+ ec_bio->submit_time = local_clock();
ec_bio->bio.bi_iter.bi_sector = ptr->offset + buf->offset + (offset >> 9);
ec_bio->bio.bi_end_io = ec_block_endio;
@@ -805,14 +782,14 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf,
bch2_bio_map(&ec_bio->bio, buf->data[idx] + offset, b);
closure_get(cl);
- percpu_ref_get(&ca->io_ref);
+ percpu_ref_get(&ca->io_ref[rw]);
submit_bio(&ec_bio->bio);
offset += b;
}
- percpu_ref_put(&ca->io_ref);
+ percpu_ref_put(&ca->io_ref[rw]);
}
static int get_stripe_key_trans(struct btree_trans *trans, u64 idx,
@@ -909,7 +886,7 @@ err:
bch2_bkey_val_to_text(&msgbuf, c, orig_k);
bch_err_ratelimited(c,
"error doing reconstruct read: %s\n %s", msg, msgbuf.buf);
- printbuf_exit(&msgbuf);;
+ printbuf_exit(&msgbuf);
ret = -BCH_ERR_stripe_reconstruct;
goto out;
}
@@ -918,26 +895,6 @@ err:
static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
{
- ec_stripes_heap n, *h = &c->ec_stripes_heap;
-
- if (idx >= h->size) {
- if (!init_heap(&n, max(1024UL, roundup_pow_of_two(idx + 1)), gfp))
- return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
-
- mutex_lock(&c->ec_stripes_heap_lock);
- if (n.size > h->size) {
- memcpy(n.data, h->data, h->nr * sizeof(h->data[0]));
- n.nr = h->nr;
- swap(*h, n);
- }
- mutex_unlock(&c->ec_stripes_heap_lock);
-
- free_heap(&n);
- }
-
- if (!genradix_ptr_alloc(&c->stripes, idx, gfp))
- return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
-
if (c->gc_pos.phase != GC_PHASE_not_running &&
!genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
@@ -1010,180 +967,50 @@ static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s)
s->idx = 0;
}
-/* Heap of all existing stripes, ordered by blocks_nonempty */
-
-static u64 stripe_idx_to_delete(struct bch_fs *c)
-{
- ec_stripes_heap *h = &c->ec_stripes_heap;
-
- lockdep_assert_held(&c->ec_stripes_heap_lock);
-
- if (h->nr &&
- h->data[0].blocks_nonempty == 0 &&
- !bch2_stripe_is_open(c, h->data[0].idx))
- return h->data[0].idx;
-
- return 0;
-}
-
-static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
- size_t i)
-{
- struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
-
- genradix_ptr(&c->stripes, h->data[i].idx)->heap_idx = i;
-}
-
-static inline bool ec_stripes_heap_cmp(const void *l, const void *r, void __always_unused *args)
-{
- struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
- struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
-
- return ((_l->blocks_nonempty > _r->blocks_nonempty) <
- (_l->blocks_nonempty < _r->blocks_nonempty));
-}
-
-static inline void ec_stripes_heap_swap(void *l, void *r, void *h)
-{
- struct ec_stripe_heap_entry *_l = (struct ec_stripe_heap_entry *)l;
- struct ec_stripe_heap_entry *_r = (struct ec_stripe_heap_entry *)r;
- ec_stripes_heap *_h = (ec_stripes_heap *)h;
- size_t i = _l - _h->data;
- size_t j = _r - _h->data;
-
- swap(*_l, *_r);
-
- ec_stripes_heap_set_backpointer(_h, i);
- ec_stripes_heap_set_backpointer(_h, j);
-}
-
-static const struct min_heap_callbacks callbacks = {
- .less = ec_stripes_heap_cmp,
- .swp = ec_stripes_heap_swap,
-};
-
-static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
-{
- ec_stripes_heap *h = &c->ec_stripes_heap;
- struct stripe *m = genradix_ptr(&c->stripes, idx);
-
- BUG_ON(m->heap_idx >= h->nr);
- BUG_ON(h->data[m->heap_idx].idx != idx);
-}
-
-void bch2_stripes_heap_del(struct bch_fs *c,
- struct stripe *m, size_t idx)
-{
- mutex_lock(&c->ec_stripes_heap_lock);
- heap_verify_backpointer(c, idx);
-
- min_heap_del(&c->ec_stripes_heap, m->heap_idx, &callbacks, &c->ec_stripes_heap);
- mutex_unlock(&c->ec_stripes_heap_lock);
-}
-
-void bch2_stripes_heap_insert(struct bch_fs *c,
- struct stripe *m, size_t idx)
-{
- mutex_lock(&c->ec_stripes_heap_lock);
- BUG_ON(min_heap_full(&c->ec_stripes_heap));
-
- genradix_ptr(&c->stripes, idx)->heap_idx = c->ec_stripes_heap.nr;
- min_heap_push(&c->ec_stripes_heap, &((struct ec_stripe_heap_entry) {
- .idx = idx,
- .blocks_nonempty = m->blocks_nonempty,
- }),
- &callbacks,
- &c->ec_stripes_heap);
-
- heap_verify_backpointer(c, idx);
- mutex_unlock(&c->ec_stripes_heap_lock);
-}
-
-void bch2_stripes_heap_update(struct bch_fs *c,
- struct stripe *m, size_t idx)
-{
- ec_stripes_heap *h = &c->ec_stripes_heap;
- bool do_deletes;
- size_t i;
-
- mutex_lock(&c->ec_stripes_heap_lock);
- heap_verify_backpointer(c, idx);
-
- h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
-
- i = m->heap_idx;
- min_heap_sift_up(h, i, &callbacks, &c->ec_stripes_heap);
- min_heap_sift_down(h, i, &callbacks, &c->ec_stripes_heap);
-
- heap_verify_backpointer(c, idx);
-
- do_deletes = stripe_idx_to_delete(c) != 0;
- mutex_unlock(&c->ec_stripes_heap_lock);
-
- if (do_deletes)
- bch2_do_stripe_deletes(c);
-}
-
/* stripe deletion */
static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
{
- struct bch_fs *c = trans->c;
struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_s_c_stripe s;
- int ret;
-
- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_stripes, POS(0, idx),
- BTREE_ITER_intent);
- ret = bkey_err(k);
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter,
+ BTREE_ID_stripes, POS(0, idx),
+ BTREE_ITER_intent);
+ int ret = bkey_err(k);
if (ret)
goto err;
- if (k.k->type != KEY_TYPE_stripe) {
- bch2_fs_inconsistent(c, "attempting to delete nonexistent stripe %llu", idx);
- ret = -EINVAL;
- goto err;
- }
-
- s = bkey_s_c_to_stripe(k);
- for (unsigned i = 0; i < s.v->nr_blocks; i++)
- if (stripe_blockcount_get(s.v, i)) {
- struct printbuf buf = PRINTBUF;
-
- bch2_bkey_val_to_text(&buf, c, k);
- bch2_fs_inconsistent(c, "attempting to delete nonempty stripe %s", buf.buf);
- printbuf_exit(&buf);
- ret = -EINVAL;
- goto err;
- }
-
- ret = bch2_btree_delete_at(trans, &iter, 0);
+ /*
+ * We expect write buffer races here
+ * Important: check stripe_is_open with stripe key locked:
+ */
+ if (k.k->type == KEY_TYPE_stripe &&
+ !bch2_stripe_is_open(trans->c, idx) &&
+ stripe_lru_pos(bkey_s_c_to_stripe(k).v) == 1)
+ ret = bch2_btree_delete_at(trans, &iter, 0);
err:
bch2_trans_iter_exit(trans, &iter);
return ret;
}
+/*
+ * XXX
+ * can we kill this and delete stripes from the trigger?
+ */
static void ec_stripe_delete_work(struct work_struct *work)
{
struct bch_fs *c =
container_of(work, struct bch_fs, ec_stripe_delete_work);
- while (1) {
- mutex_lock(&c->ec_stripes_heap_lock);
- u64 idx = stripe_idx_to_delete(c);
- mutex_unlock(&c->ec_stripes_heap_lock);
-
- if (!idx)
- break;
-
- int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- ec_stripe_delete(trans, idx));
- bch_err_fn(c, ret);
- if (ret)
- break;
- }
-
+ bch2_trans_run(c,
+ bch2_btree_write_buffer_tryflush(trans) ?:
+ for_each_btree_key_max_commit(trans, lru_iter, BTREE_ID_lru,
+ lru_pos(BCH_LRU_STRIPE_FRAGMENTATION, 1, 0),
+ lru_pos(BCH_LRU_STRIPE_FRAGMENTATION, 1, LRU_TIME_MAX),
+ 0, lru_k,
+ NULL, NULL,
+ BCH_TRANS_COMMIT_no_enospc, ({
+ ec_stripe_delete(trans, lru_k.k->p.offset);
+ })));
bch2_write_ref_put(c, BCH_WRITE_REF_stripe_delete);
}
@@ -1266,11 +1093,11 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
struct bch_dev *ca,
struct bpos bucket, u8 gen,
struct ec_stripe_buf *s,
- struct bpos *bp_pos)
+ struct bkey_s_c_backpointer bp,
+ struct bkey_buf *last_flushed)
{
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
struct bch_fs *c = trans->c;
- struct bch_backpointer bp;
struct btree_iter iter;
struct bkey_s_c k;
const struct bch_extent_ptr *ptr_c;
@@ -1279,33 +1106,26 @@ static int ec_stripe_update_extent(struct btree_trans *trans,
struct bkey_i *n;
int ret, dev, block;
- ret = bch2_get_next_backpointer(trans, ca, bucket, gen,
- bp_pos, &bp, BTREE_ITER_cached);
- if (ret)
- return ret;
- if (bpos_eq(*bp_pos, SPOS_MAX))
- return 0;
-
- if (bp.level) {
+ if (bp.v->level) {
struct printbuf buf = PRINTBUF;
struct btree_iter node_iter;
struct btree *b;
- b = bch2_backpointer_get_node(trans, &node_iter, *bp_pos, bp);
+ b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed);
bch2_trans_iter_exit(trans, &node_iter);
if (!b)
return 0;
prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b);
- bch2_backpointer_to_text(&buf, &bp);
+ bch2_bkey_val_to_text(&buf, c, bp.s_c);
bch2_fs_inconsistent(c, "%s", buf.buf);
printbuf_exit(&buf);
- return -EIO;
+ return -BCH_ERR_erasure_coding_found_btree_node;
}
- k = bch2_backpointer_get_key(trans, &iter, *bp_pos, bp, BTREE_ITER_intent);
+ k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed);
ret = bkey_err(k);
if (ret)
return ret;
@@ -1364,28 +1184,39 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b
struct bch_fs *c = trans->c;
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
struct bch_extent_ptr ptr = v->ptrs[block];
- struct bpos bp_pos = POS_MIN;
int ret = 0;
struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev);
if (!ca)
- return -EIO;
+ return -BCH_ERR_ENOENT_dev_not_found;
struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr);
- while (1) {
- ret = commit_do(trans, NULL, NULL,
- BCH_TRANS_COMMIT_no_check_rw|
- BCH_TRANS_COMMIT_no_enospc,
- ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, &bp_pos));
- if (ret)
- break;
- if (bkey_eq(bp_pos, POS_MAX))
+ struct bkey_buf last_flushed;
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ ret = for_each_btree_key_max_commit(trans, bp_iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp_start(ca, bucket_pos),
+ bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k,
+ NULL, NULL,
+ BCH_TRANS_COMMIT_no_check_rw|
+ BCH_TRANS_COMMIT_no_enospc, ({
+ if (bkey_ge(bp_k.k->p, bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket_pos), 0)))
break;
- bp_pos = bpos_nosnap_successor(bp_pos);
- }
+ if (bp_k.k->type != KEY_TYPE_backpointer)
+ continue;
+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
+ if (bp.v->btree_id == BTREE_ID_stripes)
+ continue;
+
+ ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s,
+ bp, &last_flushed);
+ }));
+
+ bch2_bkey_buf_exit(&last_flushed, c);
bch2_dev_put(ca);
return ret;
}
@@ -1394,21 +1225,19 @@ static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s)
{
struct btree_trans *trans = bch2_trans_get(c);
struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v;
- unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
- int ret = 0;
+ unsigned nr_data = v->nr_blocks - v->nr_redundant;
- ret = bch2_btree_write_buffer_flush_sync(trans);
+ int ret = bch2_btree_write_buffer_flush_sync(trans);
if (ret)
goto err;
- for (i = 0; i < nr_data; i++) {
+ for (unsigned i = 0; i < nr_data; i++) {
ret = ec_stripe_update_bucket(trans, s, i);
if (ret)
break;
}
err:
bch2_trans_put(trans);
-
return ret;
}
@@ -1433,7 +1262,7 @@ static void zero_out_rest_of_ec_bucket(struct bch_fs *c,
ob->sectors_free,
GFP_KERNEL, 0);
- percpu_ref_put(&ca->io_ref);
+ percpu_ref_put(&ca->io_ref[WRITE]);
if (ret)
s->err = ret;
@@ -1474,6 +1303,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
if (s->err) {
if (!bch2_err_matches(s->err, EROFS))
bch_err(c, "error creating stripe: error writing data buckets");
+ ret = s->err;
goto err;
}
@@ -1482,6 +1312,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
if (ec_do_recov(c, &s->existing_stripe)) {
bch_err(c, "error creating stripe: error reading existing stripe");
+ ret = -BCH_ERR_ec_block_read;
goto err;
}
@@ -1507,6 +1338,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
if (ec_nr_failed(&s->new_stripe)) {
bch_err(c, "error creating stripe: error writing redundancy buckets");
+ ret = -BCH_ERR_ec_block_write;
goto err;
}
@@ -1528,6 +1360,8 @@ static void ec_stripe_create(struct ec_stripe_new *s)
if (ret)
goto err;
err:
+ trace_stripe_create(c, s->idx, ret);
+
bch2_disk_reservation_put(c, &s->res);
for (i = 0; i < v->nr_blocks; i++)
@@ -1613,11 +1447,11 @@ static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int
ec_stripe_new_set_pending(c, h);
}
-void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
+void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob, int err)
{
struct ec_stripe_new *s = ob->ec;
- s->err = -EIO;
+ s->err = err;
}
void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
@@ -1707,7 +1541,7 @@ static void ec_stripe_key_init(struct bch_fs *c,
set_bkey_val_u64s(&s->k, u64s);
}
-static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
+static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_new *s;
@@ -1715,7 +1549,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s)
- return -BCH_ERR_ENOMEM_ec_new_stripe_alloc;
+ return NULL;
mutex_init(&s->lock);
closure_init(&s->iodone, NULL);
@@ -1730,10 +1564,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
ec_stripe_key_init(c, &s->new_stripe.key,
s->nr_data, s->nr_parity,
h->blocksize, h->disk_label);
-
- h->s = s;
- h->nr_created++;
- return 0;
+ return s;
}
static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
@@ -1878,25 +1709,26 @@ err:
return h;
}
-static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h,
+static int new_stripe_alloc_buckets(struct btree_trans *trans,
+ struct ec_stripe_head *h, struct ec_stripe_new *s,
enum bch_watermark watermark, struct closure *cl)
{
struct bch_fs *c = trans->c;
struct bch_devs_mask devs = h->devs;
struct open_bucket *ob;
struct open_buckets buckets;
- struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
+ struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
bool have_cache = true;
int ret = 0;
- BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity);
- BUG_ON(v->nr_redundant != h->s->nr_parity);
+ BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity);
+ BUG_ON(v->nr_redundant != s->nr_parity);
/* * We bypass the sector allocator which normally does this: */
bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX);
- for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) {
+ for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) {
/*
* Note: we don't yet repair invalid blocks (failed/removed
* devices) when reusing stripes - we still need a codepath to
@@ -1906,21 +1738,21 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
if (v->ptrs[i].dev != BCH_SB_MEMBER_INVALID)
__clear_bit(v->ptrs[i].dev, devs.d);
- if (i < h->s->nr_data)
+ if (i < s->nr_data)
nr_have_data++;
else
nr_have_parity++;
}
- BUG_ON(nr_have_data > h->s->nr_data);
- BUG_ON(nr_have_parity > h->s->nr_parity);
+ BUG_ON(nr_have_data > s->nr_data);
+ BUG_ON(nr_have_parity > s->nr_parity);
buckets.nr = 0;
- if (nr_have_parity < h->s->nr_parity) {
+ if (nr_have_parity < s->nr_parity) {
ret = bch2_bucket_alloc_set_trans(trans, &buckets,
&h->parity_stripe,
&devs,
- h->s->nr_parity,
+ s->nr_parity,
&nr_have_parity,
&have_cache, 0,
BCH_DATA_parity,
@@ -1928,14 +1760,14 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
cl);
open_bucket_for_each(c, &buckets, ob, i) {
- j = find_next_zero_bit(h->s->blocks_gotten,
- h->s->nr_data + h->s->nr_parity,
- h->s->nr_data);
- BUG_ON(j >= h->s->nr_data + h->s->nr_parity);
+ j = find_next_zero_bit(s->blocks_gotten,
+ s->nr_data + s->nr_parity,
+ s->nr_data);
+ BUG_ON(j >= s->nr_data + s->nr_parity);
- h->s->blocks[j] = buckets.v[i];
+ s->blocks[j] = buckets.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob);
- __set_bit(j, h->s->blocks_gotten);
+ __set_bit(j, s->blocks_gotten);
}
if (ret)
@@ -1943,11 +1775,11 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
}
buckets.nr = 0;
- if (nr_have_data < h->s->nr_data) {
+ if (nr_have_data < s->nr_data) {
ret = bch2_bucket_alloc_set_trans(trans, &buckets,
&h->block_stripe,
&devs,
- h->s->nr_data,
+ s->nr_data,
&nr_have_data,
&have_cache, 0,
BCH_DATA_user,
@@ -1955,13 +1787,13 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
cl);
open_bucket_for_each(c, &buckets, ob, i) {
- j = find_next_zero_bit(h->s->blocks_gotten,
- h->s->nr_data, 0);
- BUG_ON(j >= h->s->nr_data);
+ j = find_next_zero_bit(s->blocks_gotten,
+ s->nr_data, 0);
+ BUG_ON(j >= s->nr_data);
- h->s->blocks[j] = buckets.v[i];
+ s->blocks[j] = buckets.v[i];
v->ptrs[j] = bch2_ob_ptr(c, ob);
- __set_bit(j, h->s->blocks_gotten);
+ __set_bit(j, s->blocks_gotten);
}
if (ret)
@@ -1971,109 +1803,124 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_
return 0;
}
-static s64 get_existing_stripe(struct bch_fs *c,
- struct ec_stripe_head *head)
+static int __get_existing_stripe(struct btree_trans *trans,
+ struct ec_stripe_head *head,
+ struct ec_stripe_buf *stripe,
+ u64 idx)
{
- ec_stripes_heap *h = &c->ec_stripes_heap;
- struct stripe *m;
- size_t heap_idx;
- u64 stripe_idx;
- s64 ret = -1;
-
- if (may_create_new_stripe(c))
- return -1;
+ struct bch_fs *c = trans->c;
- mutex_lock(&c->ec_stripes_heap_lock);
- for (heap_idx = 0; heap_idx < h->nr; heap_idx++) {
- /* No blocks worth reusing, stripe will just be deleted: */
- if (!h->data[heap_idx].blocks_nonempty)
- continue;
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter,
+ BTREE_ID_stripes, POS(0, idx), 0);
+ int ret = bkey_err(k);
+ if (ret)
+ goto err;
- stripe_idx = h->data[heap_idx].idx;
+ /* We expect write buffer races here */
+ if (k.k->type != KEY_TYPE_stripe)
+ goto out;
- m = genradix_ptr(&c->stripes, stripe_idx);
+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+ if (stripe_lru_pos(s.v) <= 1)
+ goto out;
- if (m->disk_label == head->disk_label &&
- m->algorithm == head->algo &&
- m->nr_redundant == head->redundancy &&
- m->sectors == head->blocksize &&
- m->blocks_nonempty < m->nr_blocks - m->nr_redundant &&
- bch2_try_open_stripe(c, head->s, stripe_idx)) {
- ret = stripe_idx;
- break;
- }
+ if (s.v->disk_label == head->disk_label &&
+ s.v->algorithm == head->algo &&
+ s.v->nr_redundant == head->redundancy &&
+ le16_to_cpu(s.v->sectors) == head->blocksize &&
+ bch2_try_open_stripe(c, head->s, idx)) {
+ bkey_reassemble(&stripe->key, k);
+ ret = 1;
}
- mutex_unlock(&c->ec_stripes_heap_lock);
+out:
+ bch2_set_btree_iter_dontneed(trans, &iter);
+err:
+ bch2_trans_iter_exit(trans, &iter);
return ret;
}
-static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h)
+static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s)
{
- struct bch_fs *c = trans->c;
- struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v;
- struct bch_stripe *existing_v;
+ struct bch_stripe *new_v = &bkey_i_to_stripe(&s->new_stripe.key)->v;
+ struct bch_stripe *existing_v = &bkey_i_to_stripe(&s->existing_stripe.key)->v;
unsigned i;
- s64 idx;
- int ret;
- /*
- * If we can't allocate a new stripe, and there's no stripes with empty
- * blocks for us to reuse, that means we have to wait on copygc:
- */
- idx = get_existing_stripe(c, h);
- if (idx < 0)
- return -BCH_ERR_stripe_alloc_blocked;
-
- ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
- bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c,
- "reading stripe key: %s", bch2_err_str(ret));
- if (ret) {
- bch2_stripe_close(c, h->s);
- return ret;
- }
-
- existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v;
-
- BUG_ON(existing_v->nr_redundant != h->s->nr_parity);
- h->s->nr_data = existing_v->nr_blocks -
+ BUG_ON(existing_v->nr_redundant != s->nr_parity);
+ s->nr_data = existing_v->nr_blocks -
existing_v->nr_redundant;
- ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize);
+ int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors));
if (ret) {
- bch2_stripe_close(c, h->s);
+ bch2_stripe_close(c, s);
return ret;
}
- BUG_ON(h->s->existing_stripe.size != h->blocksize);
- BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
+ BUG_ON(s->existing_stripe.size != le16_to_cpu(existing_v->sectors));
/*
* Free buckets we initially allocated - they might conflict with
* blocks from the stripe we're reusing:
*/
- for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) {
- bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]);
- h->s->blocks[i] = 0;
+ for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) {
+ bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]);
+ s->blocks[i] = 0;
}
- memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten));
- memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated));
+ memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten));
+ memset(s->blocks_allocated, 0, sizeof(s->blocks_allocated));
- for (i = 0; i < existing_v->nr_blocks; i++) {
+ for (unsigned i = 0; i < existing_v->nr_blocks; i++) {
if (stripe_blockcount_get(existing_v, i)) {
- __set_bit(i, h->s->blocks_gotten);
- __set_bit(i, h->s->blocks_allocated);
+ __set_bit(i, s->blocks_gotten);
+ __set_bit(i, s->blocks_allocated);
}
- ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
+ ec_block_io(c, &s->existing_stripe, READ, i, &s->iodone);
}
- bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key);
- h->s->have_existing_stripe = true;
+ bkey_copy(&s->new_stripe.key, &s->existing_stripe.key);
+ s->have_existing_stripe = true;
return 0;
}
-static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h)
+static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h,
+ struct ec_stripe_new *s)
+{
+ struct bch_fs *c = trans->c;
+
+ /*
+ * If we can't allocate a new stripe, and there's no stripes with empty
+ * blocks for us to reuse, that means we have to wait on copygc:
+ */
+ if (may_create_new_stripe(c))
+ return -1;
+
+ struct btree_iter lru_iter;
+ struct bkey_s_c lru_k;
+ int ret = 0;
+
+ for_each_btree_key_max_norestart(trans, lru_iter, BTREE_ID_lru,
+ lru_pos(BCH_LRU_STRIPE_FRAGMENTATION, 2, 0),
+ lru_pos(BCH_LRU_STRIPE_FRAGMENTATION, 2, LRU_TIME_MAX),
+ 0, lru_k, ret) {
+ ret = __get_existing_stripe(trans, h, &s->existing_stripe, lru_k.k->p.offset);
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &lru_iter);
+ if (!ret)
+ ret = -BCH_ERR_stripe_alloc_blocked;
+ if (ret == 1)
+ ret = 0;
+ if (ret)
+ return ret;
+
+ return init_new_stripe_from_existing(c, s);
+}
+
+static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h,
+ struct ec_stripe_new *s)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@@ -2082,21 +1929,25 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st
struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint));
int ret;
- if (!h->s->res.sectors) {
- ret = bch2_disk_reservation_get(c, &h->s->res,
+ if (!s->res.sectors) {
+ ret = bch2_disk_reservation_get(c, &s->res,
h->blocksize,
- h->s->nr_parity,
+ s->nr_parity,
BCH_DISK_RESERVATION_NOFAIL);
if (ret)
return ret;
}
+ /*
+ * Allocate stripe slot
+ * XXX: we're going to need a bitrange btree of free stripes
+ */
for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos,
BTREE_ITER_slots|BTREE_ITER_intent, k, ret) {
if (bkey_gt(k.k->p, POS(0, U32_MAX))) {
if (start_pos.offset) {
start_pos = min_pos;
- bch2_btree_iter_set_pos(&iter, start_pos);
+ bch2_btree_iter_set_pos(trans, &iter, start_pos);
continue;
}
@@ -2105,7 +1956,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st
}
if (bkey_deleted(k.k) &&
- bch2_try_open_stripe(c, h->s, k.k->p.offset))
+ bch2_try_open_stripe(c, s, k.k->p.offset))
break;
}
@@ -2116,16 +1967,16 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st
ret = ec_stripe_mem_alloc(trans, &iter);
if (ret) {
- bch2_stripe_close(c, h->s);
+ bch2_stripe_close(c, s);
goto err;
}
- h->s->new_stripe.key.k.p = iter.pos;
+ s->new_stripe.key.k.p = iter.pos;
out:
bch2_trans_iter_exit(trans, &iter);
return ret;
err:
- bch2_disk_reservation_put(c, &h->s->res);
+ bch2_disk_reservation_put(c, &s->res);
goto out;
}
@@ -2156,22 +2007,27 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
return h;
if (!h->s) {
- ret = ec_new_stripe_alloc(c, h);
- if (ret) {
+ h->s = ec_new_stripe_alloc(c, h);
+ if (!h->s) {
+ ret = -BCH_ERR_ENOMEM_ec_new_stripe_alloc;
bch_err(c, "failed to allocate new stripe");
goto err;
}
+
+ h->nr_created++;
}
- if (h->s->allocated)
+ struct ec_stripe_new *s = h->s;
+
+ if (s->allocated)
goto allocated;
- if (h->s->have_existing_stripe)
+ if (s->have_existing_stripe)
goto alloc_existing;
/* First, try to allocate a full stripe: */
- ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?:
- __bch2_ec_stripe_head_reserve(trans, h);
+ ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?:
+ __bch2_ec_stripe_head_reserve(trans, h, s);
if (!ret)
goto allocate_buf;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
@@ -2183,15 +2039,15 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans,
* existing stripe:
*/
while (1) {
- ret = __bch2_ec_stripe_head_reuse(trans, h);
+ ret = __bch2_ec_stripe_head_reuse(trans, h, s);
if (!ret)
break;
if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked)
goto err;
if (watermark == BCH_WATERMARK_copygc) {
- ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?:
- __bch2_ec_stripe_head_reserve(trans, h);
+ ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?:
+ __bch2_ec_stripe_head_reserve(trans, h, s);
if (ret)
goto err;
goto allocate_buf;
@@ -2209,19 +2065,19 @@ alloc_existing:
* Retry allocating buckets, with the watermark for this
* particular write:
*/
- ret = new_stripe_alloc_buckets(trans, h, watermark, cl);
+ ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl);
if (ret)
goto err;
allocate_buf:
- ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize);
+ ret = ec_stripe_buf_init(&s->new_stripe, 0, h->blocksize);
if (ret)
goto err;
- h->s->allocated = true;
+ s->allocated = true;
allocated:
- BUG_ON(!h->s->idx);
- BUG_ON(!h->s->new_stripe.data[0]);
+ BUG_ON(!s->idx);
+ BUG_ON(!s->new_stripe.data[0]);
BUG_ON(trans->restarted);
return h;
err:
@@ -2252,14 +2108,14 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_
if (ret)
return ret;
- struct disk_accounting_pos acc = {
- .type = BCH_DISK_ACCOUNTING_replicas,
- };
+ struct disk_accounting_pos acc;
s64 sectors = 0;
for (unsigned i = 0; i < s->v.nr_blocks; i++)
sectors -= stripe_blockcount_get(&s->v, i);
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
acc.replicas.data_type = BCH_DATA_user;
ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
@@ -2273,6 +2129,8 @@ static int bch2_invalidate_stripe_to_dev(struct btree_trans *trans, struct bkey_
sectors = -sectors;
+ memset(&acc, 0, sizeof(acc));
+ acc.type = BCH_DISK_ACCOUNTING_replicas;
bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i));
acc.replicas.data_type = BCH_DATA_user;
ret = bch2_disk_accounting_mod(trans, &acc, &sectors, 1, false);
@@ -2286,7 +2144,7 @@ err:
int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx)
{
return bch2_trans_run(c,
- for_each_btree_key_upto_commit(trans, iter,
+ for_each_btree_key_max_commit(trans, iter,
BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX),
BTREE_ITER_intent, k,
NULL, NULL, 0, ({
@@ -2340,10 +2198,10 @@ void bch2_fs_ec_stop(struct bch_fs *c)
static bool bch2_fs_ec_flush_done(struct bch_fs *c)
{
- bool ret;
+ sched_annotate_sleep();
mutex_lock(&c->ec_stripe_new_lock);
- ret = list_empty(&c->ec_stripe_new_list);
+ bool ret = list_empty(&c->ec_stripe_new_list);
mutex_unlock(&c->ec_stripe_new_lock);
return ret;
@@ -2356,46 +2214,7 @@ void bch2_fs_ec_flush(struct bch_fs *c)
int bch2_stripes_read(struct bch_fs *c)
{
- int ret = bch2_trans_run(c,
- for_each_btree_key(trans, iter, BTREE_ID_stripes, POS_MIN,
- BTREE_ITER_prefetch, k, ({
- if (k.k->type != KEY_TYPE_stripe)
- continue;
-
- ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
- if (ret)
- break;
-
- struct stripe *m = genradix_ptr(&c->stripes, k.k->p.offset);
-
- stripe_to_mem(m, bkey_s_c_to_stripe(k).v);
-
- bch2_stripes_heap_insert(c, m, k.k->p.offset);
- 0;
- })));
- bch_err_fn(c, ret);
- return ret;
-}
-
-void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
-{
- ec_stripes_heap *h = &c->ec_stripes_heap;
- struct stripe *m;
- size_t i;
-
- mutex_lock(&c->ec_stripes_heap_lock);
- for (i = 0; i < min_t(size_t, h->nr, 50); i++) {
- m = genradix_ptr(&c->stripes, h->data[i].idx);
-
- prt_printf(out, "%zu %u/%u+%u", h->data[i].idx,
- h->data[i].blocks_nonempty,
- m->nr_blocks - m->nr_redundant,
- m->nr_redundant);
- if (bch2_stripe_is_open(c, h->data[i].idx))
- prt_str(out, " open");
- prt_newline(out);
- }
- mutex_unlock(&c->ec_stripes_heap_lock);
+ return 0;
}
static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c,
@@ -2449,11 +2268,9 @@ void bch2_fs_ec_exit(struct bch_fs *c)
while (1) {
mutex_lock(&c->ec_stripe_head_lock);
- h = list_first_entry_or_null(&c->ec_stripe_head_list,
- struct ec_stripe_head, list);
- if (h)
- list_del(&h->list);
+ h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list);
mutex_unlock(&c->ec_stripe_head_lock);
+
if (!h)
break;
@@ -2468,15 +2285,12 @@ void bch2_fs_ec_exit(struct bch_fs *c)
BUG_ON(!list_empty(&c->ec_stripe_new_list));
- free_heap(&c->ec_stripes_heap);
- genradix_free(&c->stripes);
bioset_exit(&c->ec_bioset);
}
void bch2_fs_ec_init_early(struct bch_fs *c)
{
spin_lock_init(&c->ec_stripes_new_lock);
- mutex_init(&c->ec_stripes_heap_lock);
INIT_LIST_HEAD(&c->ec_stripe_head_list);
mutex_init(&c->ec_stripe_head_lock);
@@ -2494,3 +2308,40 @@ int bch2_fs_ec_init(struct bch_fs *c)
return bioset_init(&c->ec_bioset, 1, offsetof(struct ec_bio, bio),
BIOSET_NEED_BVECS);
}
+
+static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans,
+ struct bkey_s_c k,
+ struct bkey_buf *last_flushed)
+{
+ if (k.k->type != KEY_TYPE_stripe)
+ return 0;
+
+ struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+
+ u64 lru_idx = stripe_lru_pos(s.v);
+ if (lru_idx) {
+ int ret = bch2_lru_check_set(trans, BCH_LRU_STRIPE_FRAGMENTATION,
+ k.k->p.offset, lru_idx, k, last_flushed);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+int bch2_check_stripe_to_lru_refs(struct bch_fs *c)
+{
+ struct bkey_buf last_flushed;
+
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ int ret = bch2_trans_run(c,
+ for_each_btree_key_commit(trans, iter, BTREE_ID_stripes,
+ POS_MIN, BTREE_ITER_prefetch, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ bch2_check_stripe_to_lru_ref(trans, k, &last_flushed)));
+
+ bch2_bkey_buf_exit(&last_flushed, c);
+ bch_err_fn(c, ret);
+ return ret;
+}