summaryrefslogtreecommitdiff
path: root/fs/bcachefs/recovery.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/recovery.c')
-rw-r--r--fs/bcachefs/recovery.c380
1 files changed, 271 insertions, 109 deletions
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 3c7f941dde39..c94debb12d2f 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -13,12 +13,13 @@
#include "disk_accounting.h"
#include "errcode.h"
#include "error.h"
-#include "fs-common.h"
#include "journal_io.h"
#include "journal_reclaim.h"
#include "journal_seq_blacklist.h"
#include "logged_ops.h"
#include "move.h"
+#include "movinggc.h"
+#include "namei.h"
#include "quota.h"
#include "rebalance.h"
#include "recovery.h"
@@ -32,31 +33,96 @@
#include <linux/sort.h>
#include <linux/stat.h>
-#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
-
-void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
+int bch2_btree_lost_data(struct bch_fs *c,
+ struct printbuf *msg,
+ enum btree_id btree)
{
- if (btree >= BTREE_ID_NR_MAX)
- return;
-
u64 b = BIT_ULL(btree);
+ int ret = 0;
+
+ mutex_lock(&c->sb_lock);
+ struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
if (!(c->sb.btrees_lost_data & b)) {
- bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree));
+ prt_printf(msg, "flagging btree ");
+ bch2_btree_id_to_text(msg, btree);
+ prt_printf(msg, " lost data\n");
- mutex_lock(&c->sb_lock);
- bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ ext->btrees_lost_data |= cpu_to_le64(b);
+ }
+
+ /* Once we have runtime self healing for topology errors we won't need this: */
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
+
+ /* Btree node accounting will be off: */
+ __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret;
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+ /*
+ * These are much more minor, and don't need to be corrected right away,
+ * but in debug mode we want the next fsck run to be clean:
+ */
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_lrus, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0) ?: ret;
+#endif
+
+ switch (btree) {
+ case BTREE_ID_alloc:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
+
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
+ goto out;
+ case BTREE_ID_backpointers:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_btree_backpointers, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_extents_to_backpointers, 0) ?: ret;
+ goto out;
+ case BTREE_ID_need_discard:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
+ goto out;
+ case BTREE_ID_freespace:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
+ goto out;
+ case BTREE_ID_bucket_gens:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
+ goto out;
+ case BTREE_ID_lru:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0) ?: ret;
+ goto out;
+ case BTREE_ID_accounting:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_allocations, 0) ?: ret;
+ goto out;
+ case BTREE_ID_snapshots:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret;
+ goto out;
+ default:
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret;
+ ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret;
+ goto out;
}
+out:
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return ret;
}
-/* for -o reconstruct_alloc: */
-static void bch2_reconstruct_alloc(struct bch_fs *c)
+static void kill_btree(struct bch_fs *c, enum btree_id btree)
{
- bch2_journal_log_msg(c, "dropping alloc info");
- bch_info(c, "dropping and reconstructing all alloc info");
+ bch2_btree_id_root(c, btree)->alive = false;
+ bch2_shoot_down_journal_keys(c, btree, 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+}
+/* for -o reconstruct_alloc: */
+void bch2_reconstruct_alloc(struct bch_fs *c)
+{
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
@@ -79,6 +145,8 @@ static void bch2_reconstruct_alloc(struct bch_fs *c)
__set_bit_le64(BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, ext->errors_silent);
__set_bit_le64(BCH_FSCK_ERR_fs_usage_replicas_wrong, ext->errors_silent);
+ __set_bit_le64(BCH_FSCK_ERR_alloc_key_to_missing_lru_entry, ext->errors_silent);
+
__set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
__set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
__set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
@@ -96,19 +164,14 @@ static void bch2_reconstruct_alloc(struct bch_fs *c)
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+ c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info));
+
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
- bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers,
- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
- bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard,
- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
- bch2_shoot_down_journal_keys(c, BTREE_ID_freespace,
- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
- bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens,
- 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ for (unsigned i = 0; i < btree_id_nr_alive(c); i++)
+ if (btree_id_is_alloc(i))
+ kill_btree(c, i);
}
/*
@@ -142,7 +205,7 @@ static int bch2_journal_replay_accounting_key(struct btree_trans *trans,
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level,
BTREE_ITER_intent);
- int ret = bch2_btree_iter_traverse(&iter);
+ int ret = bch2_btree_iter_traverse(trans, &iter);
if (ret)
goto out;
@@ -205,16 +268,38 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, k->level,
iter_flags);
- ret = bch2_btree_iter_traverse(&iter);
+ ret = bch2_btree_iter_traverse(trans, &iter);
if (ret)
goto out;
struct btree_path *path = btree_iter_path(trans, &iter);
if (unlikely(!btree_path_node(path, k->level))) {
+ struct bch_fs *c = trans->c;
+
+ CLASS(printbuf, buf)();
+ prt_str(&buf, "btree=");
+ bch2_btree_id_to_text(&buf, k->btree_id);
+ prt_printf(&buf, " level=%u ", k->level);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k->k));
+
+ if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)|
+ BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) {
+ bch_err(c, "have key in journal replay for btree depth that does not exist, confused\n%s",
+ buf.buf);
+ ret = -EINVAL;
+ }
+
+ if (!k->allocated) {
+ bch_notice(c, "dropping key in journal replay for depth that does not exist because we're recovering from scan\n%s",
+ buf.buf);
+ k->overwritten = true;
+ goto out;
+ }
+
bch2_trans_iter_exit(trans, &iter);
bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
BTREE_MAX_DEPTH, 0, iter_flags);
- ret = bch2_btree_iter_traverse(&iter) ?:
+ ret = bch2_btree_iter_traverse(trans, &iter) ?:
bch2_btree_increase_depth(trans, iter.path, 0) ?:
-BCH_ERR_transaction_restart_nested;
goto out;
@@ -225,7 +310,12 @@ static int bch2_journal_replay_key(struct btree_trans *trans,
goto out;
if (k->k->k.type == KEY_TYPE_accounting) {
- ret = bch2_trans_update_buffered(trans, BTREE_ID_accounting, k->k);
+ struct bkey_i *n = bch2_trans_subbuf_alloc(trans, &trans->accounting, k->k->k.u64s);
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ goto out;
+
+ bkey_copy(n, k->k);
goto out;
}
@@ -333,9 +423,9 @@ int bch2_journal_replay(struct bch_fs *c)
* Now, replay any remaining keys in the order in which they appear in
* the journal, unpinning those journal entries as we go:
*/
- sort(keys_sorted.data, keys_sorted.nr,
- sizeof(keys_sorted.data[0]),
- journal_sort_seq_cmp, NULL);
+ sort_nonatomic(keys_sorted.data, keys_sorted.nr,
+ sizeof(keys_sorted.data[0]),
+ journal_sort_seq_cmp, NULL);
darray_for_each(keys_sorted, kp) {
cond_resched();
@@ -354,10 +444,13 @@ int bch2_journal_replay(struct bch_fs *c)
? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim
: 0),
bch2_journal_replay_key(trans, k));
- bch_err_msg(c, ret, "while replaying key at btree %s level %u:",
- bch2_btree_id_str(k->btree_id), k->level);
- if (ret)
+ if (ret) {
+ struct printbuf buf = PRINTBUF;
+ bch2_btree_id_level_to_text(&buf, k->btree_id, k->level);
+ bch_err_msg(c, ret, "while replaying key at %s:", buf.buf);
+ printbuf_exit(&buf);
goto err;
+ }
BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten);
}
@@ -370,7 +463,7 @@ int bch2_journal_replay(struct bch_fs *c)
trans = NULL;
if (!c->opts.retain_recovery_info &&
- c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
+ c->recovery.pass_done >= BCH_RECOVERY_PASS_journal_replay)
bch2_journal_keys_put_initial(c);
replay_now_at(j, j->replay_journal_seq_end);
@@ -403,7 +496,9 @@ static int journal_replay_entry_early(struct bch_fs *c,
switch (entry->type) {
case BCH_JSET_ENTRY_btree_root: {
- struct btree_root *r;
+
+ if (unlikely(!entry->u64s))
+ return 0;
if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX,
c, invalid_btree_id,
@@ -417,15 +512,11 @@ static int journal_replay_entry_early(struct bch_fs *c,
return ret;
}
- r = bch2_btree_id_root(c, entry->btree_id);
+ struct btree_root *r = bch2_btree_id_root(c, entry->btree_id);
- if (entry->u64s) {
- r->level = entry->level;
- bkey_copy(&r->key, (struct bkey_i *) entry->start);
- r->error = 0;
- } else {
- r->error = -BCH_ERR_btree_node_read_error;
- }
+ r->level = entry->level;
+ bkey_copy(&r->key, (struct bkey_i *) entry->start);
+ r->error = 0;
r->alive = true;
break;
}
@@ -505,6 +596,7 @@ static int journal_replay_early(struct bch_fs *c,
static int read_btree_roots(struct bch_fs *c)
{
+ struct printbuf buf = PRINTBUF;
int ret = 0;
for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
@@ -513,33 +605,20 @@ static int read_btree_roots(struct bch_fs *c)
if (!r->alive)
continue;
- if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc)
- continue;
+ printbuf_reset(&buf);
+ bch2_btree_id_level_to_text(&buf, i, r->level);
if (mustfix_fsck_err_on((ret = r->error),
c, btree_root_bkey_invalid,
"invalid btree root %s",
- bch2_btree_id_str(i)) ||
+ buf.buf) ||
mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)),
c, btree_root_read_error,
- "error reading btree root %s l=%u: %s",
- bch2_btree_id_str(i), r->level, bch2_err_str(ret))) {
- if (btree_id_is_alloc(i)) {
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations);
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info);
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus);
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs);
- c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
+ "error reading btree root %s: %s",
+ buf.buf, bch2_err_str(ret))) {
+ if (btree_id_is_alloc(i))
r->error = 0;
- } else if (!(c->opts.recovery_passes & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) {
- bch_info(c, "will run btree node scan");
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes);
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
- }
-
ret = 0;
- bch2_btree_lost_data(c, i);
}
}
@@ -553,6 +632,7 @@ static int read_btree_roots(struct bch_fs *c)
}
}
fsck_err:
+ printbuf_exit(&buf);
return ret;
}
@@ -563,6 +643,7 @@ static bool check_version_upgrade(struct bch_fs *c)
bch2_latest_compatible_version(c->sb.version));
unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
unsigned new_version = 0;
+ bool ret = false;
if (old_version < bcachefs_metadata_required_upgrade_below) {
if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
@@ -617,15 +698,33 @@ static bool check_version_upgrade(struct bch_fs *c)
bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
}
- bch_info(c, "%s", buf.buf);
+ bch_notice(c, "%s", buf.buf);
+ printbuf_exit(&buf);
- bch2_sb_upgrade(c, new_version);
+ ret = true;
+ }
+
+ if (new_version > c->sb.version_incompat_allowed &&
+ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) {
+ struct printbuf buf = PRINTBUF;
+ prt_str(&buf, "Now allowing incompatible features up to ");
+ bch2_version_to_text(&buf, new_version);
+ prt_str(&buf, ", previously allowed up to ");
+ bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
+ prt_newline(&buf);
+
+ bch_notice(c, "%s", buf.buf);
printbuf_exit(&buf);
- return true;
+
+ ret = true;
}
- return false;
+ if (ret)
+ bch2_sb_upgrade(c, new_version,
+ c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible);
+
+ return ret;
}
int bch2_fs_recovery(struct bch_fs *c)
@@ -660,8 +759,30 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
- if (c->opts.norecovery)
- c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1;
+ if (c->opts.norecovery) {
+ c->opts.recovery_pass_last = c->opts.recovery_pass_last
+ ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read)
+ : BCH_RECOVERY_PASS_snapshots_read;
+ c->opts.nochanges = true;
+ }
+
+ if (c->opts.nochanges)
+ c->opts.read_only = true;
+
+ if (c->opts.journal_rewind) {
+ bch_info(c, "rewinding journal, fsck required");
+ c->opts.fsck = true;
+ }
+
+ if (go_rw_in_recovery(c)) {
+ /*
+ * start workqueues/kworkers early - kthread creation checks for
+ * pending signals, which is _very_ annoying
+ */
+ ret = bch2_fs_init_rw(c);
+ if (ret)
+ goto err;
+ }
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
@@ -708,17 +829,20 @@ int bch2_fs_recovery(struct bch_fs *c)
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+ if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) {
+ SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe);
+ write_sb = true;
+ }
+
if (write_sb)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
- c->opts.recovery_passes |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
-
- if (c->opts.fsck)
- set_bit(BCH_FS_fsck_running, &c->flags);
if (c->sb.clean)
set_bit(BCH_FS_clean_recovery, &c->flags);
+ if (c->opts.fsck)
+ set_bit(BCH_FS_in_fsck, &c->flags);
+ set_bit(BCH_FS_in_recovery, &c->flags);
ret = bch2_blacklist_table_initialize(c);
if (ret) {
@@ -797,7 +921,7 @@ int bch2_fs_recovery(struct bch_fs *c)
use_clean:
if (!clean) {
bch_err(c, "no superblock clean section found");
- ret = -BCH_ERR_fsck_repair_impossible;
+ ret = bch_err_throw(c, fsck_repair_impossible);
goto err;
}
@@ -807,15 +931,44 @@ use_clean:
c->journal_replay_seq_start = last_seq;
c->journal_replay_seq_end = blacklist_seq - 1;
- if (c->opts.reconstruct_alloc)
- bch2_reconstruct_alloc(c);
-
zero_out_btree_mem_ptr(&c->journal_keys);
ret = journal_replay_early(c, clean);
if (ret)
goto err;
+ ret = bch2_fs_resize_on_mount(c);
+ if (ret) {
+ up_write(&c->state_lock);
+ goto err;
+ }
+
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
+ bch_info(c, "filesystem is an unresized image file, mounting ro");
+ c->opts.read_only = true;
+ }
+
+ if (!c->opts.read_only &&
+ (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) {
+ bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate");
+
+ bch2_reconstruct_alloc(c);
+ } else if (c->opts.reconstruct_alloc) {
+ bch2_journal_log_msg(c, "dropping alloc info");
+ bch_info(c, "dropping and reconstructing all alloc info");
+
+ bch2_reconstruct_alloc(c);
+ }
+
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info)) {
+ /* We can't go RW to fix errors without alloc info */
+ if (c->opts.fix_errors == FSCK_FIX_yes ||
+ c->opts.fix_errors == FSCK_FIX_ask)
+ c->opts.fix_errors = FSCK_FIX_no;
+ if (c->opts.errors == BCH_ON_ERROR_fix_safe)
+ c->opts.errors = BCH_ON_ERROR_continue;
+ }
+
/*
* After an unclean shutdown, skip then next few journal sequence
* numbers as they may have been referenced by btree writes that
@@ -824,7 +977,7 @@ use_clean:
* journal sequence numbers:
*/
if (!c->sb.clean)
- journal_seq += 8;
+ journal_seq += JOURNAL_BUF_NR * 4;
if (blacklist_seq != journal_seq) {
ret = bch2_journal_log_msg(c, "blacklisting entries %llu-%llu",
@@ -839,7 +992,7 @@ use_clean:
ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
journal_seq, last_seq, blacklist_seq - 1) ?:
- bch2_fs_journal_start(&c->journal, journal_seq);
+ bch2_fs_journal_start(&c->journal, last_seq, journal_seq);
if (ret)
goto err;
@@ -857,8 +1010,10 @@ use_clean:
set_bit(BCH_FS_btree_running, &c->flags);
ret = bch2_sb_set_upgrade_extra(c);
+ if (ret)
+ goto err;
- ret = bch2_run_recovery_passes(c);
+ ret = bch2_run_recovery_passes(c, 0);
if (ret)
goto err;
@@ -869,17 +1024,17 @@ use_clean:
* multithreaded use:
*/
set_bit(BCH_FS_may_go_rw, &c->flags);
- clear_bit(BCH_FS_fsck_running, &c->flags);
+ clear_bit(BCH_FS_in_fsck, &c->flags);
/* in case we don't run journal replay, i.e. norecovery mode */
set_bit(BCH_FS_accounting_replay_done, &c->flags);
+ bch2_async_btree_node_rewrites_flush(c);
+
/* fsync if we fixed errors */
- if (test_bit(BCH_FS_errors_fixed, &c->flags) &&
- bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync)) {
+ if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
bch2_journal_flush_all_pins(&c->journal);
bch2_journal_meta(&c->journal);
- bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
}
/* If we fixed errors, verify that fs is actually clean now: */
@@ -892,9 +1047,8 @@ use_clean:
bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
clear_bit(BCH_FS_errors_fixed, &c->flags);
- c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
-
- ret = bch2_run_recovery_passes(c);
+ ret = bch2_run_recovery_passes(c,
+ BCH_RECOVERY_PASS_check_alloc_info);
if (ret)
goto err;
@@ -938,7 +1092,7 @@ use_clean:
if (c->opts.fsck &&
!test_bit(BCH_FS_error, &c->flags) &&
- c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 &&
+ c->recovery.pass_done == BCH_RECOVERY_PASS_NR - 1 &&
ext->btrees_lost_data) {
ext->btrees_lost_data = 0;
write_sb = true;
@@ -981,13 +1135,6 @@ use_clean:
out:
bch2_flush_fsck_errs(c);
- if (!c->opts.retain_recovery_info) {
- bch2_journal_keys_put_initial(c);
- bch2_find_btree_nodes_exit(&c->found_btree_nodes);
- }
- if (!IS_ERR(clean))
- kfree(clean);
-
if (!ret &&
test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) &&
!c->opts.nochanges) {
@@ -996,11 +1143,23 @@ out:
}
bch_err_fn(c, ret);
+final_out:
+ if (!IS_ERR(clean))
+ kfree(clean);
return ret;
err:
fsck_err:
- bch2_fs_emergency_read_only(c);
- goto out;
+ {
+ struct printbuf buf = PRINTBUF;
+ bch2_log_msg_start(c, &buf);
+
+ prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret));
+ bch2_fs_emergency_read_only2(c, &buf);
+
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ }
+ goto final_out;
}
int bch2_fs_initialize(struct bch_fs *c)
@@ -1021,7 +1180,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_check_version_downgrade(c);
if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
- bch2_sb_upgrade(c, bcachefs_metadata_version_current);
+ bch2_sb_upgrade(c, bcachefs_metadata_version_current, false);
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
bch2_write_super(c);
}
@@ -1035,7 +1194,6 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
- c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
set_bit(BCH_FS_btree_running, &c->flags);
set_bit(BCH_FS_may_go_rw, &c->flags);
@@ -1050,14 +1208,17 @@ int bch2_fs_initialize(struct bch_fs *c)
* journal_res_get() will crash if called before this has
* set up the journal.pin FIFO and journal.cur pointer:
*/
- bch2_fs_journal_start(&c->journal, 1);
- set_bit(BCH_FS_accounting_replay_done, &c->flags);
- bch2_journal_set_replay_done(&c->journal);
+ ret = bch2_fs_journal_start(&c->journal, 1, 1);
+ if (ret)
+ goto err;
ret = bch2_fs_read_write_early(c);
if (ret)
goto err;
+ set_bit(BCH_FS_accounting_replay_done, &c->flags);
+ bch2_journal_set_replay_done(&c->journal);
+
for_each_member_device(c, ca) {
ret = bch2_dev_usage_init(ca, false);
if (ret) {
@@ -1076,9 +1237,6 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret)
goto err;
- for_each_online_member(c, ca)
- ca->new_fs_bucket_idx = 0;
-
ret = bch2_fs_freespace_init(c);
if (ret)
goto err;
@@ -1117,7 +1275,10 @@ int bch2_fs_initialize(struct bch_fs *c)
if (ret)
goto err;
- c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
+ c->recovery.pass_done = BCH_RECOVERY_PASS_NR - 1;
+
+ bch2_copygc_wakeup(c);
+ bch2_rebalance_wakeup(c);
if (enabled_qtypes(c)) {
ret = bch2_fs_quota_read(c);
@@ -1137,6 +1298,7 @@ int bch2_fs_initialize(struct bch_fs *c)
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
+ c->recovery.curr_pass = BCH_RECOVERY_PASS_NR;
return 0;
err:
bch_err_fn(c, ret);