diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-02-15 08:06:59 +0300 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-23 00:09:24 +0300 |
commit | eb331fe5a4e801dc11d96ba7fbda0a91c8bd626c (patch) | |
tree | bb8144cd4214f0bf41f7e8f680c1a09d8d4adda7 /fs/bcachefs | |
parent | fcf01959eaa828b1005f8f30732949e64edb8c4d (diff) | |
download | linux-eb331fe5a4e801dc11d96ba7fbda0a91c8bd626c.tar.xz |
bcachefs: Check for stale dirty pointer before reads
Since we retry reads when we discover we read from a pointer that went
stale, if a dirty pointer is erroniously stale it would cause us to loop
retrying that read forever - unless we check before issuing the read,
while the btree is still locked, when we know that a dirty pointer
should never be stale.
This patch adds that check, along with printing some helpful debug info.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r-- | fs/bcachefs/fs-io.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 60 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 6 |
3 files changed, 54 insertions, 14 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 5fce958bafc9..9161125aec17 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -1043,8 +1043,6 @@ retry: sectors = min(sectors, k.k->size - offset_into_extent); - bch2_trans_unlock(trans); - if (readpages_iter) readpage_bio_extend(readpages_iter, &rbio->bio, sectors, extent_partial_reads_expensive(k)); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 218934b4e19b..914e22c5c247 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -2032,6 +2032,33 @@ err: return ret; } +static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, + struct bkey_s_c k, + struct bch_extent_ptr ptr) +{ + struct bch_fs *c = trans->c; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev); + struct btree_iter iter; + char buf[200]; + int ret; + + bch2_bkey_val_to_text(&PBUF(buf), c, k); + bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf); + + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, + POS(ptr.dev, PTR_BUCKET_NR(ca, &ptr)), + BTREE_ITER_CACHED); + + ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); + if (ret) + return; + + bch2_bkey_val_to_text(&PBUF(buf), c, k); + bch_err(c, "%s", buf); + bch_err(c, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); + bch2_trans_iter_exit(trans, &iter); +} + int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bvec_iter iter, struct bpos read_pos, enum btree_id data_btree, struct bkey_s_c k, @@ -2041,7 +2068,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bch_fs *c = trans->c; struct extent_ptr_decoded pick; struct bch_read_bio *rbio = NULL; - struct bch_dev *ca; + struct bch_dev *ca = NULL; struct promote_op *promote = NULL; bool bounce = false, read_full = false, narrow_crcs = false; struct bpos data_pos = bkey_start_pos(k.k); @@ -2058,7 +2085,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, zero_fill_bio_iter(&orig->bio, iter); goto out_read_done; } - +retry_pick: pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); /* hole or reservation - just zero fill: */ @@ -2071,8 +2098,27 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, goto err; } - if (pick_ret > 0) - ca = bch_dev_bkey_exists(c, pick.ptr.dev); + ca = bch_dev_bkey_exists(c, pick.ptr.dev); + + /* + * Stale dirty pointers are treated as IO errors, but @failed isn't + * allocated unless we're in the retry path - so if we're not in the + * retry path, don't check here, it'll be caught in bch2_read_endio() + * and we'll end up in the retry path: + */ + if ((flags & BCH_READ_IN_RETRY) && + !pick.ptr.cached && + unlikely(ptr_stale(ca, &pick.ptr))) { + read_from_stale_dirty_pointer(trans, k, pick.ptr); + bch2_mark_io_failure(failed, &pick); + goto retry_pick; + } + + /* + * Unlock the iterator while the btree node's lock is still in + * cache, before doing the IO: + */ + bch2_trans_unlock(trans); if (flags & BCH_READ_NODECODE) { /* @@ -2367,12 +2413,6 @@ retry: */ sectors = min(sectors, k.k->size - offset_into_extent); - /* - * Unlock the iterator while the btree node's lock is still in - * cache, before doing the IO: - */ - bch2_trans_unlock(&trans); - bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; swap(bvec_iter.bi_size, bytes); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 04971bf847bf..4751d79219cb 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -752,10 +752,12 @@ static int __bch2_move_data(struct bch_fs *c, BUG(); } - /* unlock before doing IO: */ + /* + * The iterator gets unlocked by __bch2_read_extent - need to + * save a copy of @k elsewhere: + */ bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - bch2_trans_unlock(&trans); ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k, data_cmd, data_opts); |