diff options
-rw-r--r-- | fs/bcachefs/btree_journal_iter.c | 60 | ||||
-rw-r--r-- | fs/bcachefs/btree_journal_iter_types.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 21 | ||||
-rw-r--r-- | fs/bcachefs/opts.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 5 |
5 files changed, 69 insertions, 27 deletions
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index de996c848e43..a41fabd06332 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -641,10 +641,11 @@ static int journal_sort_key_cmp(const void *_l, const void *_r) { const struct journal_key *l = _l; const struct journal_key *r = _r; + int rewind = l->rewind && r->rewind ? -1 : 1; return journal_key_cmp(l, r) ?: - cmp_int(l->journal_seq, r->journal_seq) ?: - cmp_int(l->journal_offset, r->journal_offset); + ((cmp_int(l->journal_seq, r->journal_seq) ?: + cmp_int(l->journal_offset, r->journal_offset)) * rewind); } void bch2_journal_keys_put(struct bch_fs *c) @@ -713,6 +714,8 @@ int bch2_journal_keys_sort(struct bch_fs *c) struct journal_keys *keys = &c->journal_keys; size_t nr_read = 0; + u64 rewind_seq = c->opts.journal_rewind ?: U64_MAX; + genradix_for_each(&c->journal_entries, iter, _i) { i = *_i; @@ -721,28 +724,43 @@ int bch2_journal_keys_sort(struct bch_fs *c) cond_resched(); - for_each_jset_key(k, entry, &i->j) { - struct journal_key n = (struct journal_key) { - .btree_id = entry->btree_id, - .level = entry->level, - .k = k, - .journal_seq = le64_to_cpu(i->j.seq), - .journal_offset = k->_data - i->j._data, - }; - - if (darray_push(keys, n)) { - __journal_keys_sort(keys); - - if (keys->nr * 8 > keys->size * 7) { - bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu", - keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq)); - return bch_err_throw(c, ENOMEM_journal_keys_sort); + vstruct_for_each(&i->j, entry) { + bool rewind = !entry->level && + !btree_id_is_alloc(entry->btree_id) && + le64_to_cpu(i->j.seq) >= rewind_seq; + + if (entry->type != (rewind + ? BCH_JSET_ENTRY_overwrite + : BCH_JSET_ENTRY_btree_keys)) + continue; + + if (!rewind && le64_to_cpu(i->j.seq) < c->journal_replay_seq_start) + continue; + + jset_entry_for_each_key(entry, k) { + struct journal_key n = (struct journal_key) { + .btree_id = entry->btree_id, + .level = entry->level, + .rewind = rewind, + .k = k, + .journal_seq = le64_to_cpu(i->j.seq), + .journal_offset = k->_data - i->j._data, + }; + + if (darray_push(keys, n)) { + __journal_keys_sort(keys); + + if (keys->nr * 8 > keys->size * 7) { + bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu", + keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq)); + return bch_err_throw(c, ENOMEM_journal_keys_sort); + } + + BUG_ON(darray_push(keys, n)); } - BUG_ON(darray_push(keys, n)); + nr_read++; } - - nr_read++; } } diff --git a/fs/bcachefs/btree_journal_iter_types.h b/fs/bcachefs/btree_journal_iter_types.h index 8b773823704f..86aacb254fb2 100644 --- a/fs/bcachefs/btree_journal_iter_types.h +++ b/fs/bcachefs/btree_journal_iter_types.h @@ -11,8 +11,9 @@ struct journal_key { u32 journal_offset; enum btree_id btree_id:8; unsigned level:8; - bool allocated; - bool overwritten; + bool allocated:1; + bool overwritten:1; + bool rewind:1; struct journal_key_range_overwritten __rcu * overwritten_range; struct bkey_i *k; diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 0b15d71a8d2d..afbf12e8f0c5 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -160,6 +160,9 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, struct printbuf buf = PRINTBUF; int ret = JOURNAL_ENTRY_ADD_OK; + if (last_seq && c->opts.journal_rewind) + last_seq = min(last_seq, c->opts.journal_rewind); + if (!c->journal.oldest_seq_found_ondisk || le64_to_cpu(j->seq) < c->journal.oldest_seq_found_ondisk) c->journal.oldest_seq_found_ondisk = le64_to_cpu(j->seq); @@ -1430,11 +1433,21 @@ int bch2_journal_read(struct bch_fs *c, printbuf_reset(&buf); prt_printf(&buf, "journal read done, replaying entries %llu-%llu", *last_seq, *blacklist_seq - 1); + + /* + * Drop blacklisted entries and entries older than last_seq (or start of + * journal rewind: + */ + u64 drop_before = *last_seq; + if (c->opts.journal_rewind) { + drop_before = min(drop_before, c->opts.journal_rewind); + prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind); + } + + *last_seq = drop_before; if (*start_seq != *blacklist_seq) prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1); bch_info(c, "%s", buf.buf); - - /* Drop blacklisted entries and entries older than last_seq: */ genradix_for_each(&c->journal_entries, radix_iter, _i) { i = *_i; @@ -1442,7 +1455,7 @@ int bch2_journal_read(struct bch_fs *c, continue; seq = le64_to_cpu(i->j.seq); - if (seq < *last_seq) { + if (seq < drop_before) { journal_replay_free(c, i, false); continue; } @@ -1455,7 +1468,7 @@ int bch2_journal_read(struct bch_fs *c, } } - ret = bch2_journal_check_for_missing(c, *last_seq, *blacklist_seq - 1); + ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1); if (ret) goto err; diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 2a02606254b3..b0a76bd6d6f5 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -379,6 +379,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH2_NO_SB_OPT, false, \ NULL, "Exit recovery immediately prior to journal replay")\ + x(journal_rewind, u64, \ + OPT_FS|OPT_MOUNT, \ + OPT_UINT(0, U64_MAX), \ + BCH2_NO_SB_OPT, 0, \ + NULL, "Rewind journal") \ x(recovery_passes, u64, \ OPT_FS|OPT_MOUNT, \ OPT_BITFIELD(bch2_recovery_passes), \ diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 820249e9c5ea..37f2cc1ec2f8 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -757,6 +757,11 @@ int bch2_fs_recovery(struct bch_fs *c) if (c->opts.nochanges) c->opts.read_only = true; + if (c->opts.journal_rewind) { + bch_info(c, "rewinding journal, fsck required"); + c->opts.fsck = true; + } + mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; |