summaryrefslogtreecommitdiff
path: root/fs/bcachefs/io_read.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/io_read.c')
-rw-r--r--fs/bcachefs/io_read.c278
1 files changed, 171 insertions, 107 deletions
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index b3b934a87c6d..aa91fcf51eec 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -21,6 +21,7 @@
#include "io_read.h"
#include "io_misc.h"
#include "io_write.h"
+#include "reflink.h"
#include "subvolume.h"
#include "trace.h"
@@ -58,7 +59,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
}
rcu_read_unlock();
- return bch2_rand_range(nr * CONGESTED_MAX) < total;
+ return get_random_u32_below(nr * CONGESTED_MAX) < total;
}
#else
@@ -90,13 +91,18 @@ static const struct rhashtable_params bch_promote_params = {
.automatic_shrinking = true,
};
+static inline bool have_io_error(struct bch_io_failures *failed)
+{
+ return failed && failed->nr;
+}
+
static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
struct bpos pos,
struct bch_io_opts opts,
unsigned flags,
struct bch_io_failures *failed)
{
- if (!failed) {
+ if (!have_io_error(failed)) {
BUG_ON(!opts.promote_target);
if (!(flags & BCH_READ_MAY_PROMOTE))
@@ -223,7 +229,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
struct data_update_opts update_opts = {};
- if (!failed) {
+ if (!have_io_error(failed)) {
update_opts.target = opts.promote_target;
update_opts.extra_replicas = 1;
update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED;
@@ -231,11 +237,11 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans,
update_opts.target = opts.foreground_target;
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
- unsigned i = 0;
+ unsigned ptr_bit = 1;
bkey_for_each_ptr(ptrs, ptr) {
if (bch2_dev_io_failures(failed, ptr->dev))
- update_opts.rewrite_ptrs |= BIT(i);
- i++;
+ update_opts.rewrite_ptrs |= ptr_bit;
+ ptr_bit <<= 1;
}
}
@@ -285,7 +291,7 @@ static struct promote_op *promote_alloc(struct btree_trans *trans,
* if failed != NULL we're not actually doing a promote, we're
* recovering from an io/checksum error
*/
- bool promote_full = (failed ||
+ bool promote_full = (have_io_error(failed) ||
*read_full ||
READ_ONCE(c->opts.promote_whole_extents));
/* data might have to be decompressed in the write path: */
@@ -321,6 +327,20 @@ nopromote:
/* Read */
+static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out,
+ struct bch_read_bio *rbio, struct bpos read_pos)
+{
+ return bch2_inum_offset_err_msg_trans(trans, out,
+ (subvol_inum) { rbio->subvol, read_pos.inode },
+ read_pos.offset << 9);
+}
+
+static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out,
+ struct bch_read_bio *rbio, struct bpos read_pos)
+{
+ bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos));
+}
+
#define READ_RETRY_AVOID 1
#define READ_RETRY 2
#define READ_ERR 3
@@ -499,6 +519,29 @@ static void bch2_rbio_error(struct bch_read_bio *rbio, int retry,
}
}
+static void bch2_read_io_err(struct work_struct *work)
+{
+ struct bch_read_bio *rbio =
+ container_of(work, struct bch_read_bio, work);
+ struct bio *bio = &rbio->bio;
+ struct bch_fs *c = rbio->c;
+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
+ struct printbuf buf = PRINTBUF;
+
+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
+ prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status));
+
+ if (ca) {
+ bch2_io_error(ca, BCH_MEMBER_ERROR_read);
+ bch_err_ratelimited(ca, "%s", buf.buf);
+ } else {
+ bch_err_ratelimited(c, "%s", buf.buf);
+ }
+
+ printbuf_exit(&buf);
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
+}
+
static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
struct bch_read_bio *rbio)
{
@@ -562,6 +605,73 @@ static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
__bch2_rbio_narrow_crcs(trans, rbio));
}
+static void bch2_read_csum_err(struct work_struct *work)
+{
+ struct bch_read_bio *rbio =
+ container_of(work, struct bch_read_bio, work);
+ struct bch_fs *c = rbio->c;
+ struct bio *src = &rbio->bio;
+ struct bch_extent_crc_unpacked crc = rbio->pick.crc;
+ struct nonce nonce = extent_nonce(rbio->version, crc);
+ struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src);
+ struct printbuf buf = PRINTBUF;
+
+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
+ prt_str(&buf, "data ");
+ bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
+
+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
+ if (ca) {
+ bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
+ bch_err_ratelimited(ca, "%s", buf.buf);
+ } else {
+ bch_err_ratelimited(c, "%s", buf.buf);
+ }
+
+ bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ printbuf_exit(&buf);
+}
+
+static void bch2_read_decompress_err(struct work_struct *work)
+{
+ struct bch_read_bio *rbio =
+ container_of(work, struct bch_read_bio, work);
+ struct bch_fs *c = rbio->c;
+ struct printbuf buf = PRINTBUF;
+
+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
+ prt_str(&buf, "decompression error");
+
+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
+ if (ca)
+ bch_err_ratelimited(ca, "%s", buf.buf);
+ else
+ bch_err_ratelimited(c, "%s", buf.buf);
+
+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+ printbuf_exit(&buf);
+}
+
+static void bch2_read_decrypt_err(struct work_struct *work)
+{
+ struct bch_read_bio *rbio =
+ container_of(work, struct bch_read_bio, work);
+ struct bch_fs *c = rbio->c;
+ struct printbuf buf = PRINTBUF;
+
+ bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
+ prt_str(&buf, "decrypt error");
+
+ struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
+ if (ca)
+ bch_err_ratelimited(ca, "%s", buf.buf);
+ else
+ bch_err_ratelimited(c, "%s", buf.buf);
+
+ bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+ printbuf_exit(&buf);
+}
+
/* Inner part that may run in process context */
static void __bch2_read_endio(struct work_struct *work)
{
@@ -668,33 +778,13 @@ csum_err:
goto out;
}
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
- prt_str(&buf, "data ");
- bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum);
-
- struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL;
- if (ca) {
- bch_err_inum_offset_ratelimited(ca,
- rbio->read_pos.inode,
- rbio->read_pos.offset << 9,
- "data %s", buf.buf);
- bch2_io_error(ca, BCH_MEMBER_ERROR_checksum);
- }
- printbuf_exit(&buf);
- bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
+ bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
goto out;
decompression_err:
- bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode,
- rbio->read_pos.offset << 9,
- "decompression error");
- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+ bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
goto out;
decrypt_err:
- bch_err_inum_offset_ratelimited(c, rbio->read_pos.inode,
- rbio->read_pos.offset << 9,
- "decrypt error");
- bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR);
+ bch2_rbio_punt(rbio, bch2_read_decrypt_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
goto out;
}
@@ -715,16 +805,8 @@ static void bch2_read_endio(struct bio *bio)
if (!rbio->split)
rbio->bio.bi_end_io = rbio->end_io;
- if (bio->bi_status) {
- if (ca) {
- bch_err_inum_offset_ratelimited(ca,
- rbio->read_pos.inode,
- rbio->read_pos.offset,
- "data read error: %s",
- bch2_blk_status_to_str(bio->bi_status));
- bch2_io_error(ca, BCH_MEMBER_ERROR_read);
- }
- bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status);
+ if (unlikely(bio->bi_status)) {
+ bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq);
return;
}
@@ -750,45 +832,6 @@ static void bch2_read_endio(struct bio *bio)
bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
}
-int __bch2_read_indirect_extent(struct btree_trans *trans,
- unsigned *offset_into_extent,
- struct bkey_buf *orig_k)
-{
- struct btree_iter iter;
- struct bkey_s_c k;
- u64 reflink_offset;
- int ret;
-
- reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k->k)->v.idx) +
- *offset_into_extent;
-
- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_reflink,
- POS(0, reflink_offset), 0);
- ret = bkey_err(k);
- if (ret)
- goto err;
-
- if (k.k->type != KEY_TYPE_reflink_v &&
- k.k->type != KEY_TYPE_indirect_inline_data) {
- bch_err_inum_offset_ratelimited(trans->c,
- orig_k->k->k.p.inode,
- orig_k->k->k.p.offset << 9,
- "%llu len %u points to nonexistent indirect extent %llu",
- orig_k->k->k.p.offset,
- orig_k->k->k.size,
- reflink_offset);
- bch2_inconsistent_error(trans->c);
- ret = -BCH_ERR_missing_indirect_extent;
- goto err;
- }
-
- *offset_into_extent = iter.pos.offset - bkey_start_offset(k.k);
- bch2_bkey_buf_reassemble(orig_k, trans->c, k);
-err:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
-}
-
static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
struct bch_dev *ca,
struct bkey_s_c k,
@@ -868,15 +911,24 @@ retry_pick:
if (!pick_ret)
goto hole;
- if (pick_ret < 0) {
+ if (unlikely(pick_ret < 0)) {
struct printbuf buf = PRINTBUF;
+ bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
+ prt_printf(&buf, "no device to read from: %s\n ", bch2_err_str(pick_ret));
bch2_bkey_val_to_text(&buf, c, k);
- bch_err_inum_offset_ratelimited(c,
- read_pos.inode, read_pos.offset << 9,
- "no device to read from: %s\n %s",
- bch2_err_str(pick_ret),
- buf.buf);
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ goto err;
+ }
+
+ if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) {
+ struct printbuf buf = PRINTBUF;
+ bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
+ prt_printf(&buf, "attempting to read encrypted data without encryption key\n ");
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
goto err;
}
@@ -899,12 +951,6 @@ retry_pick:
goto retry_pick;
}
- /*
- * Unlock the iterator while the btree node's lock is still in
- * cache, before doing the IO:
- */
- bch2_trans_unlock(trans);
-
if (flags & BCH_READ_NODECODE) {
/*
* can happen if we retry, and the extent we were going to read
@@ -942,7 +988,7 @@ retry_pick:
bounce = true;
}
- if (orig->opts.promote_target)// || failed)
+ if (orig->opts.promote_target || have_io_error(failed))
promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags,
&rbio, &bounce, &read_full, failed);
@@ -1061,12 +1107,25 @@ get_bio:
trace_and_count(c, read_split, &orig->bio);
}
+ /*
+ * Unlock the iterator while the btree node's lock is still in
+ * cache, before doing the IO:
+ */
+ if (!(flags & BCH_READ_IN_RETRY))
+ bch2_trans_unlock(trans);
+ else
+ bch2_trans_unlock_long(trans);
+
if (!rbio->pick.idx) {
- if (!rbio->have_ioref) {
- bch_err_inum_offset_ratelimited(c,
- read_pos.inode,
- read_pos.offset << 9,
- "no device to read from");
+ if (unlikely(!rbio->have_ioref)) {
+ struct printbuf buf = PRINTBUF;
+ bch2_read_err_msg_trans(trans, &buf, rbio, read_pos);
+ prt_printf(&buf, "no device to read from:\n ");
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+
bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR);
goto out;
}
@@ -1104,6 +1163,8 @@ out:
if (likely(!(flags & BCH_READ_IN_RETRY))) {
return 0;
} else {
+ bch2_trans_unlock(trans);
+
int ret;
rbio->context = RBIO_CONTEXT_UNBOUND;
@@ -1164,7 +1225,6 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
BTREE_ITER_slots);
while (1) {
- unsigned bytes, sectors, offset_into_extent;
enum btree_id data_btree = BTREE_ID_extents;
bch2_trans_begin(trans);
@@ -1184,9 +1244,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
if (ret)
goto err;
- offset_into_extent = iter.pos.offset -
+ s64 offset_into_extent = iter.pos.offset -
bkey_start_offset(k.k);
- sectors = k.k->size - offset_into_extent;
+ unsigned sectors = k.k->size - offset_into_extent;
bch2_bkey_buf_reassemble(&sk, c, k);
@@ -1201,9 +1261,9 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
* With indirect extents, the amount of data to read is the min
* of the original extent and the indirect extent:
*/
- sectors = min(sectors, k.k->size - offset_into_extent);
+ sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent);
- bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
+ unsigned bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
swap(bvec_iter.bi_size, bytes);
if (bvec_iter.bi_size == bytes)
@@ -1229,16 +1289,20 @@ err:
}
bch2_trans_iter_exit(trans, &iter);
- bch2_trans_put(trans);
- bch2_bkey_buf_exit(&sk, c);
if (ret) {
- bch_err_inum_offset_ratelimited(c, inum.inum,
- bvec_iter.bi_sector << 9,
- "read error %i from btree lookup", ret);
+ struct printbuf buf = PRINTBUF;
+ bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9);
+ prt_printf(&buf, "read error %i from btree lookup", ret);
+ bch_err_ratelimited(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+
rbio->bio.bi_status = BLK_STS_IOERR;
bch2_rbio_done(rbio);
}
+
+ bch2_trans_put(trans);
+ bch2_bkey_buf_exit(&sk, c);
}
void bch2_fs_io_read_exit(struct bch_fs *c)