diff options
Diffstat (limited to 'fs/bcachefs/recovery.c')
-rw-r--r-- | fs/bcachefs/recovery.c | 213 |
1 files changed, 143 insertions, 70 deletions
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 3c7f941dde39..71c786cdb192 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -32,23 +32,84 @@ #include <linux/sort.h> #include <linux/stat.h> -#define QSTR(n) { { { .len = strlen(n) } }, .name = n } -void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) +int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree) { - if (btree >= BTREE_ID_NR_MAX) - return; - u64 b = BIT_ULL(btree); + int ret = 0; + + mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); if (!(c->sb.btrees_lost_data & b)) { - bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree)); + struct printbuf buf = PRINTBUF; + bch2_btree_id_to_text(&buf, btree); + bch_err(c, "flagging btree %s lost data", buf.buf); + printbuf_exit(&buf); + ext->btrees_lost_data |= cpu_to_le64(b); + } - mutex_lock(&c->sb_lock); - bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + /* Once we have runtime self healing for topology errors we won't need this: */ + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret; + + /* Btree node accounting will be off: */ + __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent); + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + +#ifdef CONFIG_BCACHEFS_DEBUG + /* + * These are much more minor, and don't need to be corrected right away, + * but in debug mode we want the next fsck run to be clean: + */ + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret; +#endif + + switch (btree) { + case BTREE_ID_alloc: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + + __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); + goto out; + case BTREE_ID_backpointers: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret; + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret; + goto out; + case BTREE_ID_need_discard: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_freespace: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_bucket_gens: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_lru: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret; + goto out; + case BTREE_ID_accounting: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret; + goto out; + default: + ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret; + goto out; } +out: + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + return ret; +} + +static void kill_btree(struct bch_fs *c, enum btree_id btree) +{ + bch2_btree_id_root(c, btree)->alive = false; + bch2_shoot_down_journal_keys(c, btree, 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); } /* for -o reconstruct_alloc: */ @@ -79,6 +140,8 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) __set_bit_le64(BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_fs_usage_replicas_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_to_missing_lru_entry, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); @@ -99,16 +162,9 @@ static void bch2_reconstruct_alloc(struct bch_fs *c) bch2_write_super(c); mutex_unlock(&c->sb_lock); - bch2_shoot_down_journal_keys(c, BTREE_ID_alloc, - 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers, - 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard, - 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - bch2_shoot_down_journal_keys(c, BTREE_ID_freespace, - 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens, - 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + for (unsigned i = 0; i < btree_id_nr_alive(c); i++) + if (btree_id_is_alloc(i)) + kill_btree(c, i); } /* @@ -354,10 +410,13 @@ int bch2_journal_replay(struct bch_fs *c) ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim : 0), bch2_journal_replay_key(trans, k)); - bch_err_msg(c, ret, "while replaying key at btree %s level %u:", - bch2_btree_id_str(k->btree_id), k->level); - if (ret) + if (ret) { + struct printbuf buf = PRINTBUF; + bch2_btree_id_level_to_text(&buf, k->btree_id, k->level); + bch_err_msg(c, ret, "while replaying key at %s:", buf.buf); + printbuf_exit(&buf); goto err; + } BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten); } @@ -403,7 +462,9 @@ static int journal_replay_entry_early(struct bch_fs *c, switch (entry->type) { case BCH_JSET_ENTRY_btree_root: { - struct btree_root *r; + + if (unlikely(!entry->u64s)) + return 0; if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX, c, invalid_btree_id, @@ -417,15 +478,11 @@ static int journal_replay_entry_early(struct bch_fs *c, return ret; } - r = bch2_btree_id_root(c, entry->btree_id); + struct btree_root *r = bch2_btree_id_root(c, entry->btree_id); - if (entry->u64s) { - r->level = entry->level; - bkey_copy(&r->key, (struct bkey_i *) entry->start); - r->error = 0; - } else { - r->error = -BCH_ERR_btree_node_read_error; - } + r->level = entry->level; + bkey_copy(&r->key, (struct bkey_i *) entry->start); + r->error = 0; r->alive = true; break; } @@ -505,6 +562,7 @@ static int journal_replay_early(struct bch_fs *c, static int read_btree_roots(struct bch_fs *c) { + struct printbuf buf = PRINTBUF; int ret = 0; for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { @@ -513,33 +571,22 @@ static int read_btree_roots(struct bch_fs *c) if (!r->alive) continue; - if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc) - continue; + printbuf_reset(&buf); + bch2_btree_id_level_to_text(&buf, i, r->level); if (mustfix_fsck_err_on((ret = r->error), c, btree_root_bkey_invalid, "invalid btree root %s", - bch2_btree_id_str(i)) || + buf.buf) || mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)), c, btree_root_read_error, - "error reading btree root %s l=%u: %s", - bch2_btree_id_str(i), r->level, bch2_err_str(ret))) { - if (btree_id_is_alloc(i)) { - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs); - c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info); + "error reading btree root %s: %s", + buf.buf, bch2_err_str(ret))) { + if (btree_id_is_alloc(i)) r->error = 0; - } else if (!(c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) { - bch_info(c, "will run btree node scan"); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes); - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); - } - ret = 0; - bch2_btree_lost_data(c, i); + ret = bch2_btree_lost_data(c, i); + BUG_ON(ret); } } @@ -553,6 +600,7 @@ static int read_btree_roots(struct bch_fs *c) } } fsck_err: + printbuf_exit(&buf); return ret; } @@ -563,6 +611,7 @@ static bool check_version_upgrade(struct bch_fs *c) bch2_latest_compatible_version(c->sb.version)); unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; unsigned new_version = 0; + bool ret = false; if (old_version < bcachefs_metadata_required_upgrade_below) { if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible || @@ -618,14 +667,32 @@ static bool check_version_upgrade(struct bch_fs *c) } bch_info(c, "%s", buf.buf); + printbuf_exit(&buf); - bch2_sb_upgrade(c, new_version); + ret = true; + } + if (new_version > c->sb.version_incompat && + c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "Now allowing incompatible features up to "); + bch2_version_to_text(&buf, new_version); + prt_str(&buf, ", previously allowed up to "); + bch2_version_to_text(&buf, c->sb.version_incompat_allowed); + prt_newline(&buf); + + bch_info(c, "%s", buf.buf); printbuf_exit(&buf); - return true; + + ret = true; } - return false; + if (ret) + bch2_sb_upgrade(c, new_version, + c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible); + + return ret; } int bch2_fs_recovery(struct bch_fs *c) @@ -660,8 +727,13 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } - if (c->opts.norecovery) - c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1; + if (c->opts.norecovery) { + c->opts.recovery_pass_last = c->opts.recovery_pass_last + ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) + : BCH_RECOVERY_PASS_snapshots_read; + c->opts.nochanges = true; + c->opts.read_only = true; + } mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); @@ -708,17 +780,20 @@ int bch2_fs_recovery(struct bch_fs *c) c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); + if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) { + SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe); + write_sb = true; + } + if (write_sb) bch2_write_super(c); mutex_unlock(&c->sb_lock); - if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) - c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology); - if (c->opts.fsck) set_bit(BCH_FS_fsck_running, &c->flags); if (c->sb.clean) set_bit(BCH_FS_clean_recovery, &c->flags); + set_bit(BCH_FS_recovery_running, &c->flags); ret = bch2_blacklist_table_initialize(c); if (ret) { @@ -807,15 +882,15 @@ use_clean: c->journal_replay_seq_start = last_seq; c->journal_replay_seq_end = blacklist_seq - 1; - if (c->opts.reconstruct_alloc) - bch2_reconstruct_alloc(c); - zero_out_btree_mem_ptr(&c->journal_keys); ret = journal_replay_early(c, clean); if (ret) goto err; + if (c->opts.reconstruct_alloc) + bch2_reconstruct_alloc(c); + /* * After an unclean shutdown, skip then next few journal sequence * numbers as they may have been referenced by btree writes that @@ -870,16 +945,17 @@ use_clean: */ set_bit(BCH_FS_may_go_rw, &c->flags); clear_bit(BCH_FS_fsck_running, &c->flags); + clear_bit(BCH_FS_recovery_running, &c->flags); /* in case we don't run journal replay, i.e. norecovery mode */ set_bit(BCH_FS_accounting_replay_done, &c->flags); + bch2_async_btree_node_rewrites_flush(c); + /* fsync if we fixed errors */ - if (test_bit(BCH_FS_errors_fixed, &c->flags) && - bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) { + if (test_bit(BCH_FS_errors_fixed, &c->flags)) { bch2_journal_flush_all_pins(&c->journal); bch2_journal_meta(&c->journal); - bch2_write_ref_put(c, BCH_WRITE_REF_fsync); } /* If we fixed errors, verify that fs is actually clean now: */ @@ -1021,7 +1097,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_check_version_downgrade(c); if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { - bch2_sb_upgrade(c, bcachefs_metadata_version_current); + bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); bch2_write_super(c); } @@ -1035,7 +1111,6 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_write_super(c); mutex_unlock(&c->sb_lock); - c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; set_bit(BCH_FS_btree_running, &c->flags); set_bit(BCH_FS_may_go_rw, &c->flags); @@ -1076,9 +1151,6 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - for_each_online_member(c, ca) - ca->new_fs_bucket_idx = 0; - ret = bch2_fs_freespace_init(c); if (ret) goto err; @@ -1137,6 +1209,7 @@ int bch2_fs_initialize(struct bch_fs *c) bch2_write_super(c); mutex_unlock(&c->sb_lock); + c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; return 0; err: bch_err_fn(c, ret); |