diff options
Diffstat (limited to 'fs/bcachefs/disk_accounting.c')
-rw-r--r-- | fs/bcachefs/disk_accounting.c | 167 |
1 files changed, 141 insertions, 26 deletions
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index e972e2bca546..07eb8fa1b026 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -134,6 +134,10 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, void *end = &acc_k + 1; int ret = 0; + bkey_fsck_err_on(bversion_zero(k.k->bversion), + c, accounting_key_version_0, + "accounting key with version=0"); + switch (acc_k.type) { case BCH_DISK_ACCOUNTING_nr_inodes: end = field_end(acc_k, nr_inodes); @@ -238,6 +242,14 @@ void bch2_accounting_swab(struct bkey_s k) *p = swab64(*p); } +static inline void __accounting_to_replicas(struct bch_replicas_entry_v1 *r, + struct disk_accounting_pos acc) +{ + unsafe_memcpy(r, &acc.replicas, + replicas_entry_bytes(&acc.replicas), + "variable length struct"); +} + static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struct bpos p) { struct disk_accounting_pos acc_k; @@ -245,9 +257,7 @@ static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struc switch (acc_k.type) { case BCH_DISK_ACCOUNTING_replicas: - unsafe_memcpy(r, &acc_k.replicas, - replicas_entry_bytes(&acc_k.replicas), - "variable length struct"); + __accounting_to_replicas(r, acc_k); return true; default: return false; @@ -291,7 +301,7 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun struct accounting_mem_entry n = { .pos = a.k->p, - .version = a.k->version, + .bversion = a.k->bversion, .nr_counters = bch2_accounting_counters(a.k), .v[0] = __alloc_percpu_gfp(n.nr_counters * sizeof(u64), sizeof(u64), GFP_KERNEL), @@ -319,11 +329,13 @@ err: return -BCH_ERR_ENOMEM_disk_accounting; } -int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) +int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, + enum bch_accounting_mode mode) { struct bch_replicas_padded r; - if (accounting_to_replicas(&r.e, a.k->p) && + if (mode != BCH_ACCOUNTING_read && + accounting_to_replicas(&r.e, a.k->p) && !bch2_replicas_marked_locked(c, &r.e)) return -BCH_ERR_btree_insert_need_mark_replicas; @@ -566,7 +578,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; accounting_key_init(&k_i.k, &acc_k, src_v, nr); - bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); + bch2_accounting_mem_mod_locked(trans, + bkey_i_to_s_c_accounting(&k_i.k), + BCH_ACCOUNTING_normal); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -589,31 +603,90 @@ fsck_err: static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; if (k.k->type != KEY_TYPE_accounting) return 0; percpu_down_read(&c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); + int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), + BCH_ACCOUNTING_read); percpu_up_read(&c->mark_lock); + return ret; +} - if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && - ret == -BCH_ERR_btree_insert_need_mark_replicas) - ret = 0; +static int bch2_disk_accounting_validate_late(struct btree_trans *trans, + struct disk_accounting_pos acc, + u64 *v, unsigned nr) +{ + struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; + int ret = 0, invalid_dev = -1; + + switch (acc.type) { + case BCH_DISK_ACCOUNTING_replicas: { + struct bch_replicas_padded r; + __accounting_to_replicas(&r.e, acc); + + for (unsigned i = 0; i < r.e.nr_devs; i++) + if (r.e.devs[i] != BCH_SB_MEMBER_INVALID && + !bch2_dev_exists(c, r.e.devs[i])) { + invalid_dev = r.e.devs[i]; + goto invalid_device; + } - struct disk_accounting_pos acc; - bpos_to_disk_accounting_pos(&acc, k.k->p); + /* + * All replicas entry checks except for invalid device are done + * in bch2_accounting_validate + */ + BUG_ON(bch2_replicas_entry_validate(&r.e, c, &buf)); + + if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e), + trans, accounting_replicas_not_marked, + "accounting not marked in superblock replicas\n %s", + (printbuf_reset(&buf), + bch2_accounting_key_to_text(&buf, &acc), + buf.buf))) { + /* + * We're not RW yet and still single threaded, dropping + * and retaking lock is ok: + */ + percpu_up_write(&c->mark_lock); + ret = bch2_mark_replicas(c, &r.e); + if (ret) + goto fsck_err; + percpu_down_write(&c->mark_lock); + } + break; + } + + case BCH_DISK_ACCOUNTING_dev_data_type: + if (!bch2_dev_exists(c, acc.dev_data_type.dev)) { + invalid_dev = acc.dev_data_type.dev; + goto invalid_device; + } + break; + } - if (fsck_err_on(ret == -BCH_ERR_btree_insert_need_mark_replicas, - trans, accounting_replicas_not_marked, - "accounting not marked in superblock replicas\n %s", - (bch2_accounting_key_to_text(&buf, &acc), - buf.buf))) - ret = bch2_accounting_update_sb_one(c, k.k->p); fsck_err: printbuf_exit(&buf); return ret; +invalid_device: + if (fsck_err(trans, accounting_to_invalid_device, + "accounting entry points to invalid device %i\n %s", + invalid_dev, + (printbuf_reset(&buf), + bch2_accounting_key_to_text(&buf, &acc), + buf.buf))) { + for (unsigned i = 0; i < nr; i++) + v[i] = -v[i]; + + ret = commit_do(trans, NULL, NULL, 0, + bch2_disk_accounting_mod(trans, &acc, v, nr, false)) ?: + -BCH_ERR_remove_disk_accounting_entry; + } else { + ret = -BCH_ERR_remove_disk_accounting_entry; + } + goto fsck_err; } /* @@ -624,6 +697,7 @@ int bch2_accounting_read(struct bch_fs *c) { struct bch_accounting_mem *acc = &c->accounting; struct btree_trans *trans = bch2_trans_get(c); + struct printbuf buf = PRINTBUF; int ret = for_each_btree_key(trans, iter, BTREE_ID_accounting, POS_MIN, @@ -647,7 +721,7 @@ int bch2_accounting_read(struct bch_fs *c) accounting_pos_cmp, &k.k->p); bool applied = idx < acc->k.nr && - bversion_cmp(acc->k.data[idx].version, k.k->version) >= 0; + bversion_cmp(acc->k.data[idx].bversion, k.k->bversion) >= 0; if (applied) continue; @@ -655,7 +729,7 @@ int bch2_accounting_read(struct bch_fs *c) if (i + 1 < &darray_top(*keys) && i[1].k->k.type == KEY_TYPE_accounting && !journal_key_cmp(i, i + 1)) { - BUG_ON(bversion_cmp(i[0].k->k.version, i[1].k->k.version) >= 0); + WARN_ON(bversion_cmp(i[0].k->k.bversion, i[1].k->k.bversion) >= 0); i[1].journal_seq = i[0].journal_seq; @@ -673,7 +747,44 @@ int bch2_accounting_read(struct bch_fs *c) } keys->gap = keys->nr = dst - keys->data; - percpu_down_read(&c->mark_lock); + percpu_down_write(&c->mark_lock); + unsigned i = 0; + while (i < acc->k.nr) { + unsigned idx = inorder_to_eytzinger0(i, acc->k.nr); + + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos); + + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; + bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false); + + /* + * If the entry counters are zeroed, it should be treated as + * nonexistent - it might point to an invalid device. + * + * Remove it, so that if it's re-added it gets re-marked in the + * superblock: + */ + ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters) + ? -BCH_ERR_remove_disk_accounting_entry + : bch2_disk_accounting_validate_late(trans, acc_k, + v, acc->k.data[idx].nr_counters); + + if (ret == -BCH_ERR_remove_disk_accounting_entry) { + free_percpu(acc->k.data[idx].v[0]); + free_percpu(acc->k.data[idx].v[1]); + darray_remove_item(&acc->k, &acc->k.data[idx]); + eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, NULL); + ret = 0; + continue; + } + + if (ret) + goto fsck_err; + i++; + } + preempt_disable(); struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); @@ -709,8 +820,10 @@ int bch2_accounting_read(struct bch_fs *c) } } preempt_enable(); - percpu_up_read(&c->mark_lock); +fsck_err: + percpu_up_write(&c->mark_lock); err: + printbuf_exit(&buf); bch2_trans_put(trans); bch_err_fn(c, ret); return ret; @@ -743,8 +856,10 @@ int bch2_dev_usage_init(struct bch_dev *ca, bool gc) }; u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 }; - int ret = bch2_trans_do(c, NULL, NULL, 0, - bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc)); + int ret = bch2_trans_do(c, ({ + bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc) ?: + (!gc ? bch2_trans_commit(trans, NULL, NULL, 0) : 0); + })); bch_err_fn(c, ret); return ret; } |