summaryrefslogtreecommitdiff
path: root/fs/bcachefs/io_read.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2024-12-31 00:32:57 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2025-03-15 04:02:12 +0300
commitdff6de9518848b5afa0bc6fec57e657701be67ec (patch)
treec69c6700e0b075ddff4fd7ed39e63d204f3b1b9a /fs/bcachefs/io_read.c
parent7b1d6551060066a1fed2a1f83485b0ea37ca3001 (diff)
downloadlinux-dff6de9518848b5afa0bc6fec57e657701be67ec.tar.xz
bcachefs: Internal reads can now correct errors
Rework the read path so that BCH_READ_NODECODE reads now also self-heal after a read error and a successful retry - prerequisite for scrub. - __bch2_read_endio() now handles a read that's both BCH_READ_NODECODE and a bounce. Normally, we don't want a BCH_READ_NODECODE read to ever allocate a split bch_read_bio: we want to maintain the relationship between the bch_read_bio and the data_update it's embedded in. But correcting read errors requires allocating a split/bounce rbio that's embedded in a promote_op. We do still have a 1-1 relationship, i.e. we only allocate a single split/bounce if it's a BCH_READ_NODECODE, so things hopefully don't get too crazy. - __bch2_read_extent() now is allowed to allocate the promote_op for rewriting after a failed read, even if it's BCH_READ_NODECODE. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/io_read.c')
-rw-r--r--fs/bcachefs/io_read.c108
1 files changed, 56 insertions, 52 deletions
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index bb5d1de25aa1..18c8e54f455e 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -696,32 +696,40 @@ static void __bch2_read_endio(struct work_struct *work)
if (unlikely(rbio->narrow_crcs))
bch2_rbio_narrow_crcs(rbio);
- if (rbio->flags & BCH_READ_data_update)
- goto nodecode;
-
- /* Adjust crc to point to subset of data we want: */
- crc.offset += rbio->offset_into_extent;
- crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
+ if (likely(!(rbio->flags & BCH_READ_data_update))) {
+ /* Adjust crc to point to subset of data we want: */
+ crc.offset += rbio->offset_into_extent;
+ crc.live_size = bvec_iter_sectors(rbio->bvec_iter);
+
+ if (crc_is_compressed(crc)) {
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+ if (ret)
+ goto decrypt_err;
+
+ if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
+ !c->opts.no_data_io)
+ goto decompression_err;
+ } else {
+ /* don't need to decrypt the entire bio: */
+ nonce = nonce_add(nonce, crc.offset << 9);
+ bio_advance(src, crc.offset << 9);
- if (crc_is_compressed(crc)) {
- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
- if (ret)
- goto decrypt_err;
+ BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
+ src->bi_iter.bi_size = dst_iter.bi_size;
- if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) &&
- !c->opts.no_data_io)
- goto decompression_err;
- } else {
- /* don't need to decrypt the entire bio: */
- nonce = nonce_add(nonce, crc.offset << 9);
- bio_advance(src, crc.offset << 9);
+ ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
+ if (ret)
+ goto decrypt_err;
- BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size);
- src->bi_iter.bi_size = dst_iter.bi_size;
+ if (rbio->bounce) {
+ struct bvec_iter src_iter = src->bi_iter;
- ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src);
- if (ret)
- goto decrypt_err;
+ bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+ }
+ }
+ } else {
+ if (rbio->split)
+ rbio->parent->pick = rbio->pick;
if (rbio->bounce) {
struct bvec_iter src_iter = src->bi_iter;
@@ -739,7 +747,7 @@ static void __bch2_read_endio(struct work_struct *work)
if (ret)
goto decrypt_err;
}
-nodecode:
+
if (likely(!(rbio->flags & BCH_READ_in_retry))) {
rbio = bch2_rbio_free(rbio);
bch2_rbio_done(rbio);
@@ -931,13 +939,35 @@ retry_pick:
goto retry_pick;
}
- if (flags & BCH_READ_data_update) {
- struct data_update *u = container_of(orig, struct data_update, rbio);
+ if (!(flags & BCH_READ_data_update)) {
+ if (!(flags & BCH_READ_last_fragment) ||
+ bio_flagged(&orig->bio, BIO_CHAIN))
+ flags |= BCH_READ_must_clone;
+
+ narrow_crcs = !(flags & BCH_READ_in_retry) &&
+ bch2_can_narrow_extent_crcs(k, pick.crc);
+
+ if (narrow_crcs && (flags & BCH_READ_user_mapped))
+ flags |= BCH_READ_must_bounce;
+ EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
+
+ if (crc_is_compressed(pick.crc) ||
+ (pick.crc.csum_type != BCH_CSUM_none &&
+ (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
+ (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
+ (flags & BCH_READ_user_mapped)) ||
+ (flags & BCH_READ_must_bounce)))) {
+ read_full = true;
+ bounce = true;
+ }
+ } else {
+ read_full = true;
/*
* can happen if we retry, and the extent we were going to read
* has been merged in the meantime:
*/
+ struct data_update *u = container_of(orig, struct data_update, rbio);
if (pick.crc.compressed_size > u->op.wbio.bio.bi_iter.bi_size) {
if (ca)
percpu_ref_put(&ca->io_ref);
@@ -945,29 +975,6 @@ retry_pick:
}
iter.bi_size = pick.crc.compressed_size << 9;
- goto get_bio;
- }
-
- if (!(flags & BCH_READ_last_fragment) ||
- bio_flagged(&orig->bio, BIO_CHAIN))
- flags |= BCH_READ_must_clone;
-
- narrow_crcs = !(flags & BCH_READ_in_retry) &&
- bch2_can_narrow_extent_crcs(k, pick.crc);
-
- if (narrow_crcs && (flags & BCH_READ_user_mapped))
- flags |= BCH_READ_must_bounce;
-
- EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size);
-
- if (crc_is_compressed(pick.crc) ||
- (pick.crc.csum_type != BCH_CSUM_none &&
- (bvec_iter_sectors(iter) != pick.crc.uncompressed_size ||
- (bch2_csum_type_is_encryption(pick.crc.csum_type) &&
- (flags & BCH_READ_user_mapped)) ||
- (flags & BCH_READ_must_bounce)))) {
- read_full = true;
- bounce = true;
}
if (orig->opts.promote_target || have_io_error(failed))
@@ -991,7 +998,7 @@ retry_pick:
pick.crc.offset = 0;
pick.crc.live_size = bvec_iter_sectors(iter);
}
-get_bio:
+
if (rbio) {
/*
* promote already allocated bounce rbio:
@@ -1055,9 +1062,6 @@ get_bio:
rbio->version = k.k->bversion;
INIT_WORK(&rbio->work, NULL);
- if (flags & BCH_READ_data_update)
- orig->pick = pick;
-
rbio->bio.bi_opf = orig->bio.bi_opf;
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio;