summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2025-02-11 01:04:08 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2025-03-15 04:02:14 +0300
commit1ccbcd320577271c85d9a5bfbdd3394cb9baadb3 (patch)
tree44450a7787571ff40b316edf16398047ba6ae092
parent3faa4647a0c3fd0e27e966a8c72ab9863014d518 (diff)
downloadlinux-1ccbcd320577271c85d9a5bfbdd3394cb9baadb3.tar.xz
bcachefs: bch2_write_op_error() now prints info about data update
A user has been seeing the "error verifying existing checksum while rewriting existing data (memory corruption?)" error. This generally indicates a hardware issue (and that may be the case here), but it might also indicate a bug, in which case we need more information to look for patterns. Reported-by: Roland Vet <vet.roland@protonmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/compress.c8
-rw-r--r--fs/bcachefs/error.c6
-rw-r--r--fs/bcachefs/error.h1
-rw-r--r--fs/bcachefs/io_write.c92
-rw-r--r--fs/bcachefs/io_write.h8
5 files changed, 80 insertions, 35 deletions
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
index 114bf2f3879f..31467f77930f 100644
--- a/fs/bcachefs/compress.c
+++ b/fs/bcachefs/compress.c
@@ -271,8 +271,8 @@ int bch2_bio_uncompress_inplace(struct bch_write_op *op,
if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
crc->compressed_size << 9 > c->opts.encoded_extent_max) {
struct printbuf buf = PRINTBUF;
- bch2_write_op_error(&buf, op);
- prt_printf(&buf, "error rewriting existing data: extent too big");
+ bch2_write_op_error(&buf, op, op->pos.offset,
+ "extent too big to decompress");
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
return -EIO;
@@ -283,8 +283,8 @@ int bch2_bio_uncompress_inplace(struct bch_write_op *op,
if (__bio_uncompress(c, bio, data.b, *crc)) {
if (!c->opts.no_data_io) {
struct printbuf buf = PRINTBUF;
- bch2_write_op_error(&buf, op);
- prt_printf(&buf, "error rewriting existing data: decompression error");
+ bch2_write_op_error(&buf, op, op->pos.offset,
+ "decompression error");
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
}
diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
index c8fc58fab958..3f93a5a6bbfa 100644
--- a/fs/bcachefs/error.c
+++ b/fs/bcachefs/error.c
@@ -580,3 +580,9 @@ int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printb
prt_printf(out, " offset %llu: ", pos.offset << 8);
return 0;
}
+
+void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out,
+ struct bpos pos)
+{
+ bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
+}
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 76da0e88cee8..b3cc69f29fd9 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -243,5 +243,6 @@ int bch2_inum_offset_err_msg_trans(struct btree_trans *, struct printbuf *, subv
void bch2_inum_offset_err_msg(struct bch_fs *, struct printbuf *, subvol_inum, u64);
int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *, struct printbuf *, struct bpos);
+void bch2_inum_snap_offset_err_msg(struct bch_fs *, struct printbuf *, struct bpos);
#endif /* _BCACHEFS_ERROR_H */
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 076e39474610..738bdbfbdb14 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -396,29 +396,61 @@ static int bch2_write_index_default(struct bch_write_op *op)
/* Writes */
-static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op,
- u64 offset)
+void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
+ struct bch_write_op *op, u64 offset, const char *fmt, ...)
{
- bch2_inum_offset_err_msg(op->c, out,
- (subvol_inum) { op->subvol, op->pos.inode, },
- offset << 9);
- prt_printf(out, "write error%s: ",
- op->flags & BCH_WRITE_move ? "(internal move)" : "");
-}
+ if (op->subvol)
+ lockrestart_do(trans,
+ bch2_inum_offset_err_msg_trans(trans, out,
+ (subvol_inum) { op->subvol, op->pos.inode, },
+ offset << 9));
+ else {
+ struct bpos pos = op->pos;
+ pos.offset = offset;
+ lockrestart_do(trans, bch2_inum_snap_offset_err_msg_trans(trans, out, pos));
+ }
-void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
-{
- __bch2_write_op_error(out, op, op->pos.offset);
+ prt_str(out, "write error: ");
+
+ va_list args;
+ va_start(args, fmt);
+ prt_vprintf(out, fmt, args);
+ va_end(args);
+
+ if (op->flags & BCH_WRITE_move) {
+ struct data_update *u = container_of(op, struct data_update, op);
+
+ prt_printf(out, "\n from internal move ");
+ bch2_bkey_val_to_text(out, op->c, bkey_i_to_s_c(u->k.k));
+ }
}
-static void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
- struct bch_write_op *op, u64 offset)
+void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, u64 offset,
+ const char *fmt, ...)
{
- bch2_inum_offset_err_msg_trans(trans, out,
- (subvol_inum) { op->subvol, op->pos.inode, },
- offset << 9);
- prt_printf(out, "write error%s: ",
- op->flags & BCH_WRITE_move ? "(internal move)" : "");
+ if (op->subvol)
+ bch2_inum_offset_err_msg(op->c, out,
+ (subvol_inum) { op->subvol, op->pos.inode, },
+ offset << 9);
+ else {
+ struct bpos pos = op->pos;
+ pos.offset = offset;
+ bch2_inum_snap_offset_err_msg(op->c, out, pos);
+ }
+
+ prt_str(out, "write error: ");
+
+ va_list args;
+ va_start(args, fmt);
+ prt_vprintf(out, fmt, args);
+ va_end(args);
+
+ if (op->flags & BCH_WRITE_move) {
+ struct data_update *u = container_of(op, struct data_update, op);
+
+ prt_printf(out, "\n from internal move ");
+ bch2_bkey_val_to_text(out, op->c, bkey_i_to_s_c(u->k.k));
+ }
}
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
@@ -561,8 +593,8 @@ static void __bch2_write_index(struct bch_write_op *op)
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
struct printbuf buf = PRINTBUF;
- __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k));
- prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
+ bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k),
+ "btree update error: %s", bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
}
@@ -1114,8 +1146,8 @@ do_write:
csum_err:
{
struct printbuf buf = PRINTBUF;
- bch2_write_op_error(&buf, op);
- prt_printf(&buf, "error verifying existing checksum while rewriting existing data (memory corruption?)");
+ bch2_write_op_error(&buf, op, op->pos.offset,
+ "error verifying existing checksum while rewriting existing data (memory corruption?)");
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
}
@@ -1211,8 +1243,8 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
struct printbuf buf = PRINTBUF;
- bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k));
- prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
+ bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k),
+ "btree update error: %s", bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
}
@@ -1379,8 +1411,8 @@ err:
if (ret) {
struct printbuf buf = PRINTBUF;
- bch2_write_op_error(&buf, op);
- prt_printf(&buf, "%s(): btree lookup error: %s", __func__, bch2_err_str(ret));
+ bch2_write_op_error(&buf, op, op->pos.offset,
+ "%s(): btree lookup error: %s", __func__, bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
op->error = ret;
@@ -1502,8 +1534,8 @@ err:
if (unlikely(ret < 0)) {
if (!(op->flags & BCH_WRITE_alloc_nowait)) {
struct printbuf buf = PRINTBUF;
- bch2_write_op_error(&buf, op);
- prt_printf(&buf, "%s(): %s", __func__, bch2_err_str(ret));
+ bch2_write_op_error(&buf, op, op->pos.offset,
+ "%s(): %s", __func__, bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
}
@@ -1634,8 +1666,8 @@ CLOSURE_CALLBACK(bch2_write)
if (unlikely(bio->bi_iter.bi_size & (c->opts.block_size - 1))) {
struct printbuf buf = PRINTBUF;
- bch2_write_op_error(&buf, op);
- prt_printf(&buf, "misaligned write");
+ bch2_write_op_error(&buf, op, op->pos.offset,
+ "misaligned write");
printbuf_exit(&buf);
op->error = -EIO;
goto err;
diff --git a/fs/bcachefs/io_write.h b/fs/bcachefs/io_write.h
index 02cca52be0bd..bf942566a8eb 100644
--- a/fs/bcachefs/io_write.h
+++ b/fs/bcachefs/io_write.h
@@ -20,7 +20,13 @@ static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
enum bch_data_type, const struct bkey_i *, bool);
-void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op);
+__printf(5, 6)
+void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
+ struct bch_write_op *op, u64, const char *, ...);
+
+__printf(4, 5)
+void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op, u64,
+ const char *, ...);
#define BCH_WRITE_FLAGS() \
x(alloc_nowait) \