diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-14 21:00:23 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-11-14 21:00:23 +0300 |
commit | 4abcd80f23357808b0444d261ed08e5a77dbaa9a (patch) | |
tree | ac3f12ee1e2a4626f9b3e430c72b121547740e38 | |
parent | 0a9b9d17f3a781dea03baca01c835deaa07f7cc3 (diff) | |
parent | 840c2fbcc5cd33ba8fab180f09da0bb7f354ea71 (diff) | |
download | linux-4abcd80f23357808b0444d261ed08e5a77dbaa9a.tar.xz |
Merge tag 'bcachefs-2024-11-13' of git://evilpiepirate.org/bcachefs
Pull bcachefs fixes from Kent Overstreet:
"This fixes one minor regression from the btree cache fixes (in the
scan_for_btree_nodes repair path) - and the shutdown path fix is the
big one here, in terms of bugs closed:
- Assorted tiny syzbot fixes
- Shutdown path fix: "bch2_btree_write_buffer_flush_going_ro()"
The shutdown path wasn't flushing the btree write buffer, leading
to shutting down while we still had operations in flight. This
fixes a whole slew of syzbot bugs, and undoubtedly other strange
heisenbugs.
* tag 'bcachefs-2024-11-13' of git://evilpiepirate.org/bcachefs:
bcachefs: Fix assertion pop in bch2_ptr_swab()
bcachefs: Fix journal_entry_dev_usage_to_text() overrun
bcachefs: Allow for unknown key types in backpointers fsck
bcachefs: Fix assertion pop in topology repair
bcachefs: Fix hidden btree errors when reading roots
bcachefs: Fix validate_bset() repair path
bcachefs: Fix missing validation for bch_backpointer.level
bcachefs: Fix bch_member.btree_bitmap_shift validation
bcachefs: bch2_btree_write_buffer_flush_going_ro()
-rw-r--r-- | fs/bcachefs/backpointers.c | 17 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_write_buffer.c | 30 | ||||
-rw-r--r-- | fs/bcachefs/btree_write_buffer.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/recovery_passes.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/recovery_passes_types.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/sb-errors_format.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/sb-members.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/sb-members_format.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 1 |
14 files changed, 72 insertions, 19 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 47455a85c909..654a58132a4d 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -52,6 +52,12 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, enum bch_validate_flags flags) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); + int ret = 0; + + bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH, + c, backpointer_level_bad, + "backpointer level bad: %u >= %u", + bp.v->level, BTREE_MAX_DEPTH); rcu_read_lock(); struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); @@ -64,7 +70,6 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); rcu_read_unlock(); - int ret = 0; bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || !bpos_eq(bp.k->p, bp_pos), @@ -947,9 +952,13 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) static int check_one_backpointer(struct btree_trans *trans, struct bbpos start, struct bbpos end, - struct bkey_s_c_backpointer bp, + struct bkey_s_c bp_k, struct bkey_buf *last_flushed) { + if (bp_k.k->type != KEY_TYPE_backpointer) + return 0; + + struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); struct bch_fs *c = trans->c; struct btree_iter iter; struct bbpos pos = bp_to_bbpos(*bp.v); @@ -1004,9 +1013,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); - check_one_backpointer(trans, start, end, - bkey_s_c_to_backpointer(k), - &last_flushed); + check_one_backpointer(trans, start, end, k, &last_flushed); })); bch2_bkey_buf_exit(&last_flushed, c); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 0ca3feeb42c8..81dcf9e512c0 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -182,7 +182,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) bch2_btree_node_drop_keys_outside_node(b); mutex_lock(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, b); + __bch2_btree_node_hash_remove(&c->btree_cache, b); bkey_copy(&b->key, &new->k_i); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 6296a11ccb09..839d68802e42 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -733,11 +733,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, c, ca, b, i, NULL, bset_past_end_of_btree_node, "bset past end of btree node (offset %u len %u but written %zu)", - offset, sectors, ptr_written ?: btree_sectors(c))) { + offset, sectors, ptr_written ?: btree_sectors(c))) i->u64s = 0; - ret = 0; - goto out; - } btree_err_on(offset && !i->u64s, -BCH_ERR_btree_node_read_err_fixable, @@ -829,7 +826,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BSET_BIG_ENDIAN(i), write, &bn->format); } -out: fsck_err: printbuf_exit(&buf2); printbuf_exit(&buf1); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 22740b605f0a..d596ef93239f 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -2398,7 +2398,8 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, if (new_hash) { mutex_lock(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, new_hash); - bch2_btree_node_hash_remove(&c->btree_cache, b); + + __bch2_btree_node_hash_remove(&c->btree_cache, b); bkey_copy(&b->key, new_key); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 3f56b584f8ec..1639c60dffa0 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -277,6 +277,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags); int ret = 0; + ret = bch2_journal_error(&c->journal); + if (ret) + return ret; + bch2_trans_unlock(trans); bch2_trans_begin(trans); @@ -491,7 +495,8 @@ static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq) return ret; } -static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq) +static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq, + bool *did_work) { struct bch_fs *c = trans->c; struct btree_write_buffer *wb = &c->btree_write_buffer; @@ -502,6 +507,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq) fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq); + *did_work |= wb->inc.keys.nr || wb->flushing.keys.nr; + /* * On memory allocation failure, bch2_btree_write_buffer_flush_locked() * is not guaranteed to empty wb->inc: @@ -521,17 +528,34 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); + bool did_work = false; - return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq)); + return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq, &did_work)); } int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) { struct bch_fs *c = trans->c; + bool did_work = false; trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_); - return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal)); + return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal), &did_work); +} + +/* + * The write buffer requires flushing when going RO: keys in the journal for the + * write buffer don't have a journal pin yet + */ +bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c) +{ + if (bch2_journal_error(&c->journal)) + return false; + + bool did_work = false; + bch2_trans_run(c, btree_write_buffer_flush_seq(trans, + journal_cur_seq(&c->journal), &did_work)); + return did_work; } int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans) diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index 725e79654216..d535cea28bde 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -21,6 +21,7 @@ static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c) struct btree_trans; int bch2_btree_write_buffer_flush_sync(struct btree_trans *); +bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *); int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); int bch2_btree_write_buffer_tryflush(struct btree_trans *); diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index c4e91d123849..37e3d69bec06 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -1364,7 +1364,7 @@ void bch2_ptr_swab(struct bkey_s k) for (entry = ptrs.start; entry < ptrs.end; entry = extent_entry_next(entry)) { - switch (extent_entry_type(entry)) { + switch (__extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: break; case BCH_EXTENT_ENTRY_crc32: @@ -1384,6 +1384,9 @@ void bch2_ptr_swab(struct bkey_s k) break; case BCH_EXTENT_ENTRY_rebalance: break; + default: + /* Bad entry type: will be caught by validate() */ + return; } } } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index ccaafa90f4f4..fb35dd336331 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -708,6 +708,9 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs container_of(entry, struct jset_entry_dev_usage, entry); unsigned i, nr_types = jset_entry_dev_usage_nr_types(u); + if (vstruct_bytes(entry) < sizeof(*u)) + return; + prt_printf(out, "dev=%u", le32_to_cpu(u->dev)); printbuf_indent_add(out, 2); diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 4bbeac9e0526..dff589ddc984 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -27,6 +27,12 @@ const char * const bch2_recovery_passes[] = { NULL }; +/* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */ +static int bch2_recovery_pass_empty(struct bch_fs *c) +{ + return 0; +} + static int bch2_set_may_go_rw(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h index 9d96c06e365c..94dc20ca2065 100644 --- a/fs/bcachefs/recovery_passes_types.h +++ b/fs/bcachefs/recovery_passes_types.h @@ -13,6 +13,7 @@ * must never change: */ #define BCH_RECOVERY_PASSES() \ + x(recovery_pass_empty, 41, PASS_SILENT) \ x(scan_for_btree_nodes, 37, 0) \ x(check_topology, 4, 0) \ x(accounting_read, 39, PASS_ALWAYS) \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 937275d061fe..9feb6739f77a 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -136,7 +136,9 @@ enum bch_fsck_flags { x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ x(need_discard_freespace_key_bad, 124, 0) \ + x(discarding_bucket_not_in_need_discard_btree, 291, 0) \ x(backpointer_bucket_offset_wrong, 125, 0) \ + x(backpointer_level_bad, 294, 0) \ x(backpointer_to_missing_device, 126, 0) \ x(backpointer_to_missing_alloc, 127, 0) \ x(backpointer_to_missing_ptr, 128, 0) \ @@ -177,7 +179,9 @@ enum bch_fsck_flags { x(ptr_stripe_redundant, 163, 0) \ x(reservation_key_nr_replicas_invalid, 164, 0) \ x(reflink_v_refcount_wrong, 165, 0) \ + x(reflink_v_pos_bad, 292, 0) \ x(reflink_p_to_missing_reflink_v, 166, 0) \ + x(reflink_refcount_underflow, 293, 0) \ x(stripe_pos_bad, 167, 0) \ x(stripe_val_size_bad, 168, 0) \ x(stripe_csum_granularity_bad, 290, 0) \ @@ -302,7 +306,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 291, 0) + x(MAX, 295, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index fb08dd680dac..116131f95815 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -163,7 +163,7 @@ static int validate_member(struct printbuf *err, return -BCH_ERR_invalid_sb_members; } - if (m.btree_bitmap_shift >= 64) { + if (m.btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX) { prt_printf(err, "device %u: invalid btree_bitmap_shift %u", i, m.btree_bitmap_shift); return -BCH_ERR_invalid_sb_members; } @@ -450,7 +450,7 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns m->btree_bitmap_shift += resize; } - BUG_ON(m->btree_bitmap_shift > 57); + BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX); BUG_ON(end > 64ULL << m->btree_bitmap_shift); for (unsigned bit = start >> m->btree_bitmap_shift; diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h index d727d2dfda08..2adf1221a440 100644 --- a/fs/bcachefs/sb-members_format.h +++ b/fs/bcachefs/sb-members_format.h @@ -66,6 +66,12 @@ struct bch_member { }; /* + * btree_allocated_bitmap can represent sector addresses of a u64: it itself has + * 64 elements, so 64 - ilog2(64) + */ +#define BCH_MI_BTREE_BITMAP_SHIFT_MAX 58 + +/* * This limit comes from the bucket_gens array - it's a single allocation, and * kernel allocation are limited to INT_MAX */ diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 657fd3759e7b..a6ed9a0bf1c7 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -272,6 +272,7 @@ static void __bch2_fs_read_only(struct bch_fs *c) clean_passes++; if (bch2_btree_interior_updates_flush(c) || + bch2_btree_write_buffer_flush_going_ro(c) || bch2_journal_flush_all_pins(&c->journal) || bch2_btree_flush_all_writes(c) || seq != atomic64_read(&c->journal.seq)) { |