diff options
Diffstat (limited to 'fs/bcachefs/ec.c')
-rw-r--r-- | fs/bcachefs/ec.c | 267 |
1 files changed, 138 insertions, 129 deletions
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 6094afb0c6be..d2a5e76e6479 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -26,6 +26,7 @@ #include "util.h" #include <linux/sort.h> +#include <linux/string_choices.h> #ifdef __KERNEL__ @@ -109,7 +110,7 @@ struct ec_bio { /* Stripes btree keys: */ int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags) + struct bkey_validate_context from) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; int ret = 0; @@ -129,7 +130,7 @@ int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, "invalid csum granularity (%u >= 64)", s->csum_granularity_bits); - ret = bch2_bkey_ptrs_validate(c, k, flags); + ret = bch2_bkey_ptrs_validate(c, k, from); fsck_err: return ret; } @@ -304,13 +305,12 @@ static int mark_stripe_bucket(struct btree_trans *trans, } if (flags & BTREE_TRIGGER_gc) { - percpu_down_read(&c->mark_lock); struct bucket *g = gc_bucket(ca, bucket.offset); if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", ptr->dev, (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { ret = -BCH_ERR_mark_stripe; - goto err_unlock; + goto err; } bucket_lock(g); @@ -318,8 +318,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); alloc_to_bucket(g, new); bucket_unlock(g); -err_unlock: - percpu_up_read(&c->mark_lock); + if (!ret) ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); } @@ -732,7 +731,7 @@ static void ec_block_endio(struct bio *bio) ? BCH_MEMBER_ERROR_write : BCH_MEMBER_ERROR_read, "erasure coding %s error: %s", - bio_data_dir(bio) ? "write" : "read", + str_write_read(bio_data_dir(bio)), bch2_blk_status_to_str(bio->bi_status))) clear_bit(ec_bio->idx, ec_bio->buf->valid); @@ -909,7 +908,7 @@ err: bch2_bkey_val_to_text(&msgbuf, c, orig_k); bch_err_ratelimited(c, "error doing reconstruct read: %s\n %s", msg, msgbuf.buf); - printbuf_exit(&msgbuf);; + printbuf_exit(&msgbuf); ret = -BCH_ERR_stripe_reconstruct; goto out; } @@ -1266,11 +1265,11 @@ static int ec_stripe_update_extent(struct btree_trans *trans, struct bch_dev *ca, struct bpos bucket, u8 gen, struct ec_stripe_buf *s, - struct bpos *bp_pos) + struct bkey_s_c_backpointer bp, + struct bkey_buf *last_flushed) { struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; struct bch_fs *c = trans->c; - struct bch_backpointer bp; struct btree_iter iter; struct bkey_s_c k; const struct bch_extent_ptr *ptr_c; @@ -1279,33 +1278,26 @@ static int ec_stripe_update_extent(struct btree_trans *trans, struct bkey_i *n; int ret, dev, block; - ret = bch2_get_next_backpointer(trans, ca, bucket, gen, - bp_pos, &bp, BTREE_ITER_cached); - if (ret) - return ret; - if (bpos_eq(*bp_pos, SPOS_MAX)) - return 0; - - if (bp.level) { + if (bp.v->level) { struct printbuf buf = PRINTBUF; struct btree_iter node_iter; struct btree *b; - b = bch2_backpointer_get_node(trans, &node_iter, *bp_pos, bp); + b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); bch2_trans_iter_exit(trans, &node_iter); if (!b) return 0; prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b); - bch2_backpointer_to_text(&buf, &bp); + bch2_bkey_val_to_text(&buf, c, bp.s_c); bch2_fs_inconsistent(c, "%s", buf.buf); printbuf_exit(&buf); return -EIO; } - k = bch2_backpointer_get_key(trans, &iter, *bp_pos, bp, BTREE_ITER_intent); + k = bch2_backpointer_get_key(trans, bp, &iter, BTREE_ITER_intent, last_flushed); ret = bkey_err(k); if (ret) return ret; @@ -1364,7 +1356,6 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b struct bch_fs *c = trans->c; struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; struct bch_extent_ptr ptr = v->ptrs[block]; - struct bpos bp_pos = POS_MIN; int ret = 0; struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev); @@ -1373,19 +1364,27 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b struct bpos bucket_pos = PTR_BUCKET_POS(ca, &ptr); - while (1) { - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc, - ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, &bp_pos)); - if (ret) - break; - if (bkey_eq(bp_pos, POS_MAX)) + struct bkey_buf last_flushed; + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + + ret = for_each_btree_key_max_commit(trans, bp_iter, BTREE_ID_backpointers, + bucket_pos_to_bp_start(ca, bucket_pos), + bucket_pos_to_bp_end(ca, bucket_pos), 0, bp_k, + NULL, NULL, + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc, ({ + if (bkey_ge(bp_k.k->p, bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket_pos), 0))) break; - bp_pos = bpos_nosnap_successor(bp_pos); - } + if (bp_k.k->type != KEY_TYPE_backpointer) + continue; + ec_stripe_update_extent(trans, ca, bucket_pos, ptr.gen, s, + bkey_s_c_to_backpointer(bp_k), &last_flushed); + })); + + bch2_bkey_buf_exit(&last_flushed, c); bch2_dev_put(ca); return ret; } @@ -1707,7 +1706,7 @@ static void ec_stripe_key_init(struct bch_fs *c, set_bkey_val_u64s(&s->k, u64s); } -static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) +static struct ec_stripe_new *ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) { struct ec_stripe_new *s; @@ -1715,7 +1714,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) - return -BCH_ERR_ENOMEM_ec_new_stripe_alloc; + return NULL; mutex_init(&s->lock); closure_init(&s->iodone, NULL); @@ -1730,10 +1729,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data, s->nr_parity, h->blocksize, h->disk_label); - - h->s = s; - h->nr_created++; - return 0; + return s; } static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h) @@ -1878,25 +1874,26 @@ err: return h; } -static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_head *h, +static int new_stripe_alloc_buckets(struct btree_trans *trans, + struct ec_stripe_head *h, struct ec_stripe_new *s, enum bch_watermark watermark, struct closure *cl) { struct bch_fs *c = trans->c; struct bch_devs_mask devs = h->devs; struct open_bucket *ob; struct open_buckets buckets; - struct bch_stripe *v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; + struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; unsigned i, j, nr_have_parity = 0, nr_have_data = 0; bool have_cache = true; int ret = 0; - BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); - BUG_ON(v->nr_redundant != h->s->nr_parity); + BUG_ON(v->nr_blocks != s->nr_data + s->nr_parity); + BUG_ON(v->nr_redundant != s->nr_parity); /* * We bypass the sector allocator which normally does this: */ bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); - for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { + for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) { /* * Note: we don't yet repair invalid blocks (failed/removed * devices) when reusing stripes - we still need a codepath to @@ -1906,21 +1903,21 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ if (v->ptrs[i].dev != BCH_SB_MEMBER_INVALID) __clear_bit(v->ptrs[i].dev, devs.d); - if (i < h->s->nr_data) + if (i < s->nr_data) nr_have_data++; else nr_have_parity++; } - BUG_ON(nr_have_data > h->s->nr_data); - BUG_ON(nr_have_parity > h->s->nr_parity); + BUG_ON(nr_have_data > s->nr_data); + BUG_ON(nr_have_parity > s->nr_parity); buckets.nr = 0; - if (nr_have_parity < h->s->nr_parity) { + if (nr_have_parity < s->nr_parity) { ret = bch2_bucket_alloc_set_trans(trans, &buckets, &h->parity_stripe, &devs, - h->s->nr_parity, + s->nr_parity, &nr_have_parity, &have_cache, 0, BCH_DATA_parity, @@ -1928,14 +1925,14 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ cl); open_bucket_for_each(c, &buckets, ob, i) { - j = find_next_zero_bit(h->s->blocks_gotten, - h->s->nr_data + h->s->nr_parity, - h->s->nr_data); - BUG_ON(j >= h->s->nr_data + h->s->nr_parity); + j = find_next_zero_bit(s->blocks_gotten, + s->nr_data + s->nr_parity, + s->nr_data); + BUG_ON(j >= s->nr_data + s->nr_parity); - h->s->blocks[j] = buckets.v[i]; + s->blocks[j] = buckets.v[i]; v->ptrs[j] = bch2_ob_ptr(c, ob); - __set_bit(j, h->s->blocks_gotten); + __set_bit(j, s->blocks_gotten); } if (ret) @@ -1943,11 +1940,11 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ } buckets.nr = 0; - if (nr_have_data < h->s->nr_data) { + if (nr_have_data < s->nr_data) { ret = bch2_bucket_alloc_set_trans(trans, &buckets, &h->block_stripe, &devs, - h->s->nr_data, + s->nr_data, &nr_have_data, &have_cache, 0, BCH_DATA_user, @@ -1955,13 +1952,13 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ cl); open_bucket_for_each(c, &buckets, ob, i) { - j = find_next_zero_bit(h->s->blocks_gotten, - h->s->nr_data, 0); - BUG_ON(j >= h->s->nr_data); + j = find_next_zero_bit(s->blocks_gotten, + s->nr_data, 0); + BUG_ON(j >= s->nr_data); - h->s->blocks[j] = buckets.v[i]; + s->blocks[j] = buckets.v[i]; v->ptrs[j] = bch2_ob_ptr(c, ob); - __set_bit(j, h->s->blocks_gotten); + __set_bit(j, s->blocks_gotten); } if (ret) @@ -2007,73 +2004,78 @@ static s64 get_existing_stripe(struct bch_fs *c, return ret; } -static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h) +static int init_new_stripe_from_existing(struct bch_fs *c, struct ec_stripe_new *s) { - struct bch_fs *c = trans->c; - struct bch_stripe *new_v = &bkey_i_to_stripe(&h->s->new_stripe.key)->v; - struct bch_stripe *existing_v; + struct bch_stripe *new_v = &bkey_i_to_stripe(&s->new_stripe.key)->v; + struct bch_stripe *existing_v = &bkey_i_to_stripe(&s->existing_stripe.key)->v; unsigned i; - s64 idx; - int ret; - - /* - * If we can't allocate a new stripe, and there's no stripes with empty - * blocks for us to reuse, that means we have to wait on copygc: - */ - idx = get_existing_stripe(c, h); - if (idx < 0) - return -BCH_ERR_stripe_alloc_blocked; - - ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe); - bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, - "reading stripe key: %s", bch2_err_str(ret)); - if (ret) { - bch2_stripe_close(c, h->s); - return ret; - } - existing_v = &bkey_i_to_stripe(&h->s->existing_stripe.key)->v; - - BUG_ON(existing_v->nr_redundant != h->s->nr_parity); - h->s->nr_data = existing_v->nr_blocks - + BUG_ON(existing_v->nr_redundant != s->nr_parity); + s->nr_data = existing_v->nr_blocks - existing_v->nr_redundant; - ret = ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize); + int ret = ec_stripe_buf_init(&s->existing_stripe, 0, le16_to_cpu(existing_v->sectors)); if (ret) { - bch2_stripe_close(c, h->s); + bch2_stripe_close(c, s); return ret; } - BUG_ON(h->s->existing_stripe.size != h->blocksize); - BUG_ON(h->s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); + BUG_ON(s->existing_stripe.size != le16_to_cpu(existing_v->sectors)); /* * Free buckets we initially allocated - they might conflict with * blocks from the stripe we're reusing: */ - for_each_set_bit(i, h->s->blocks_gotten, new_v->nr_blocks) { - bch2_open_bucket_put(c, c->open_buckets + h->s->blocks[i]); - h->s->blocks[i] = 0; + for_each_set_bit(i, s->blocks_gotten, new_v->nr_blocks) { + bch2_open_bucket_put(c, c->open_buckets + s->blocks[i]); + s->blocks[i] = 0; } - memset(h->s->blocks_gotten, 0, sizeof(h->s->blocks_gotten)); - memset(h->s->blocks_allocated, 0, sizeof(h->s->blocks_allocated)); + memset(s->blocks_gotten, 0, sizeof(s->blocks_gotten)); + memset(s->blocks_allocated, 0, sizeof(s->blocks_allocated)); - for (i = 0; i < existing_v->nr_blocks; i++) { + for (unsigned i = 0; i < existing_v->nr_blocks; i++) { if (stripe_blockcount_get(existing_v, i)) { - __set_bit(i, h->s->blocks_gotten); - __set_bit(i, h->s->blocks_allocated); + __set_bit(i, s->blocks_gotten); + __set_bit(i, s->blocks_allocated); } - ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone); + ec_block_io(c, &s->existing_stripe, READ, i, &s->iodone); } - bkey_copy(&h->s->new_stripe.key, &h->s->existing_stripe.key); - h->s->have_existing_stripe = true; + bkey_copy(&s->new_stripe.key, &s->existing_stripe.key); + s->have_existing_stripe = true; return 0; } -static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h) +static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stripe_head *h, + struct ec_stripe_new *s) +{ + struct bch_fs *c = trans->c; + s64 idx; + int ret; + + /* + * If we can't allocate a new stripe, and there's no stripes with empty + * blocks for us to reuse, that means we have to wait on copygc: + */ + idx = get_existing_stripe(c, h); + if (idx < 0) + return -BCH_ERR_stripe_alloc_blocked; + + ret = get_stripe_key_trans(trans, idx, &s->existing_stripe); + bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c, + "reading stripe key: %s", bch2_err_str(ret)); + if (ret) { + bch2_stripe_close(c, s); + return ret; + } + + return init_new_stripe_from_existing(c, s); +} + +static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_stripe_head *h, + struct ec_stripe_new *s) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -2082,15 +2084,19 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st struct bpos start_pos = bpos_max(min_pos, POS(0, c->ec_stripe_hint)); int ret; - if (!h->s->res.sectors) { - ret = bch2_disk_reservation_get(c, &h->s->res, + if (!s->res.sectors) { + ret = bch2_disk_reservation_get(c, &s->res, h->blocksize, - h->s->nr_parity, + s->nr_parity, BCH_DISK_RESERVATION_NOFAIL); if (ret) return ret; } + /* + * Allocate stripe slot + * XXX: we're going to need a bitrange btree of free stripes + */ for_each_btree_key_norestart(trans, iter, BTREE_ID_stripes, start_pos, BTREE_ITER_slots|BTREE_ITER_intent, k, ret) { if (bkey_gt(k.k->p, POS(0, U32_MAX))) { @@ -2105,7 +2111,7 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st } if (bkey_deleted(k.k) && - bch2_try_open_stripe(c, h->s, k.k->p.offset)) + bch2_try_open_stripe(c, s, k.k->p.offset)) break; } @@ -2116,16 +2122,16 @@ static int __bch2_ec_stripe_head_reserve(struct btree_trans *trans, struct ec_st ret = ec_stripe_mem_alloc(trans, &iter); if (ret) { - bch2_stripe_close(c, h->s); + bch2_stripe_close(c, s); goto err; } - h->s->new_stripe.key.k.p = iter.pos; + s->new_stripe.key.k.p = iter.pos; out: bch2_trans_iter_exit(trans, &iter); return ret; err: - bch2_disk_reservation_put(c, &h->s->res); + bch2_disk_reservation_put(c, &s->res); goto out; } @@ -2156,22 +2162,27 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, return h; if (!h->s) { - ret = ec_new_stripe_alloc(c, h); - if (ret) { + h->s = ec_new_stripe_alloc(c, h); + if (!h->s) { + ret = -BCH_ERR_ENOMEM_ec_new_stripe_alloc; bch_err(c, "failed to allocate new stripe"); goto err; } + + h->nr_created++; } - if (h->s->allocated) + struct ec_stripe_new *s = h->s; + + if (s->allocated) goto allocated; - if (h->s->have_existing_stripe) + if (s->have_existing_stripe) goto alloc_existing; /* First, try to allocate a full stripe: */ - ret = new_stripe_alloc_buckets(trans, h, BCH_WATERMARK_stripe, NULL) ?: - __bch2_ec_stripe_head_reserve(trans, h); + ret = new_stripe_alloc_buckets(trans, h, s, BCH_WATERMARK_stripe, NULL) ?: + __bch2_ec_stripe_head_reserve(trans, h, s); if (!ret) goto allocate_buf; if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || @@ -2183,15 +2194,15 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct btree_trans *trans, * existing stripe: */ while (1) { - ret = __bch2_ec_stripe_head_reuse(trans, h); + ret = __bch2_ec_stripe_head_reuse(trans, h, s); if (!ret) break; if (waiting || !cl || ret != -BCH_ERR_stripe_alloc_blocked) goto err; if (watermark == BCH_WATERMARK_copygc) { - ret = new_stripe_alloc_buckets(trans, h, watermark, NULL) ?: - __bch2_ec_stripe_head_reserve(trans, h); + ret = new_stripe_alloc_buckets(trans, h, s, watermark, NULL) ?: + __bch2_ec_stripe_head_reserve(trans, h, s); if (ret) goto err; goto allocate_buf; @@ -2209,19 +2220,19 @@ alloc_existing: * Retry allocating buckets, with the watermark for this * particular write: */ - ret = new_stripe_alloc_buckets(trans, h, watermark, cl); + ret = new_stripe_alloc_buckets(trans, h, s, watermark, cl); if (ret) goto err; allocate_buf: - ret = ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize); + ret = ec_stripe_buf_init(&s->new_stripe, 0, h->blocksize); if (ret) goto err; - h->s->allocated = true; + s->allocated = true; allocated: - BUG_ON(!h->s->idx); - BUG_ON(!h->s->new_stripe.data[0]); + BUG_ON(!s->idx); + BUG_ON(!s->new_stripe.data[0]); BUG_ON(trans->restarted); return h; err: @@ -2286,7 +2297,7 @@ err: int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx) { return bch2_trans_run(c, - for_each_btree_key_upto_commit(trans, iter, + for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX), BTREE_ITER_intent, k, NULL, NULL, 0, ({ @@ -2449,11 +2460,9 @@ void bch2_fs_ec_exit(struct bch_fs *c) while (1) { mutex_lock(&c->ec_stripe_head_lock); - h = list_first_entry_or_null(&c->ec_stripe_head_list, - struct ec_stripe_head, list); - if (h) - list_del(&h->list); + h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); mutex_unlock(&c->ec_stripe_head_lock); + if (!h) break; |