diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2019-03-30 05:22:45 +0300 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-23 00:08:20 +0300 |
commit | a1d58243f943f5933e65e18e504333ac9eccb679 (patch) | |
tree | e9f6753f95985fdb60eaa6279cd3d599d6bb6eac | |
parent | cccf4e6df36ffb4752b4c83efd0723281e629693 (diff) | |
download | linux-a1d58243f943f5933e65e18e504333ac9eccb679.tar.xz |
bcachefs: add ability to run gc on metadata only
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/alloc_background.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 97 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 40 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/buckets_types.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/sysfs.c | 2 |
8 files changed, 88 insertions, 61 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index b5f5c223e008..c6a909bdfc02 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -292,8 +292,7 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list) } percpu_down_write(&c->mark_lock); - for_each_member_device(ca, c, i) - bch2_dev_usage_from_buckets(c, ca); + bch2_dev_usage_from_buckets(c); percpu_up_write(&c->mark_lock); mutex_lock(&c->bucket_clock[READ].lock); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 4119f48281fb..c572391c4dad 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -204,7 +204,7 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, } static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, - bool initial) + bool initial, bool metadata_only) { struct btree_trans trans; struct btree_iter *iter; @@ -224,7 +224,9 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, * and on startup, we have to read every btree node (XXX: only if it was * an unclean shutdown) */ - if (initial || expensive_debug_checks(c)) + if (metadata_only) + depth = 1; + else if (initial || expensive_debug_checks(c)) depth = 0; btree_node_range_checks_init(&r, depth); @@ -280,7 +282,7 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) } static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal, - bool initial) + bool initial, bool metadata_only) { enum btree_id ids[BTREE_ID_NR]; u8 max_stale; @@ -294,11 +296,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal, enum btree_id id = ids[i]; enum btree_node_type type = __btree_node_type(0, id); - int ret = bch2_gc_btree(c, id, initial); + int ret = bch2_gc_btree(c, id, initial, metadata_only); if (ret) return ret; - if (journal && btree_node_type_needs_gc(type)) { + if (journal && !metadata_only && + btree_node_type_needs_gc(type)) { struct bkey_i *k, *n; struct jset_entry *j; struct journal_replay *r; @@ -476,11 +479,13 @@ static void bch2_gc_free(struct bch_fs *c) c->usage[1] = NULL; } -static int bch2_gc_done(struct bch_fs *c, bool initial) +static int bch2_gc_done(struct bch_fs *c, + bool initial, bool metadata_only) { struct bch_dev *ca; - bool verify = !initial || - (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)); + bool verify = !metadata_only && + (!initial || + (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO))); unsigned i; int ret = 0; @@ -515,7 +520,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial) #define copy_fs_field(_f, _msg, ...) \ copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__) - { + if (!metadata_only) { struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0); struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0); struct stripe *dst, *src; @@ -567,26 +572,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial) } }; - for_each_member_device(ca, c, i) { - unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64); - struct bch_dev_usage *dst = (void *) - bch2_acc_percpu_u64s((void *) ca->usage[0], nr); - struct bch_dev_usage *src = (void *) - bch2_acc_percpu_u64s((void *) ca->usage[1], nr); - unsigned b; - - for (b = 0; b < BCH_DATA_NR; b++) - copy_dev_field(buckets[b], "buckets[%s]", - bch2_data_types[b]); - copy_dev_field(buckets_alloc, "buckets_alloc"); - copy_dev_field(buckets_ec, "buckets_ec"); - copy_dev_field(buckets_unavailable, "buckets_unavailable"); - - for (b = 0; b < BCH_DATA_NR; b++) - copy_dev_field(sectors[b], "sectors[%s]", - bch2_data_types[b]); - copy_dev_field(sectors_fragmented, "sectors_fragmented"); - } + bch2_dev_usage_from_buckets(c); { unsigned nr = fs_usage_u64s(c); @@ -596,20 +582,29 @@ static int bch2_gc_done(struct bch_fs *c, bool initial) bch2_acc_percpu_u64s((void *) c->usage[1], nr); copy_fs_field(hidden, "hidden"); - copy_fs_field(data, "data"); - copy_fs_field(cached, "cached"); - copy_fs_field(reserved, "reserved"); - copy_fs_field(nr_inodes, "nr_inodes"); + copy_fs_field(btree, "btree"); - for (i = 0; i < BCH_REPLICAS_MAX; i++) - copy_fs_field(persistent_reserved[i], - "persistent_reserved[%i]", i); + if (!metadata_only) { + copy_fs_field(data, "data"); + copy_fs_field(cached, "cached"); + copy_fs_field(reserved, "reserved"); + copy_fs_field(nr_inodes,"nr_inodes"); + + for (i = 0; i < BCH_REPLICAS_MAX; i++) + copy_fs_field(persistent_reserved[i], + "persistent_reserved[%i]", i); + } for (i = 0; i < c->replicas.nr; i++) { struct bch_replicas_entry *e = cpu_replicas_entry(&c->replicas, i); char buf[80]; + if (metadata_only && + (e->data_type == BCH_DATA_USER || + e->data_type == BCH_DATA_CACHED)) + continue; + bch2_replicas_entry_to_text(&PBUF(buf), e); copy_fs_field(replicas[i], "%s", buf); @@ -625,7 +620,8 @@ fsck_err: return ret; } -static int bch2_gc_start(struct bch_fs *c) +static int bch2_gc_start(struct bch_fs *c, + bool metadata_only) { struct bch_dev *ca; unsigned i; @@ -671,10 +667,18 @@ static int bch2_gc_start(struct bch_fs *c) dst->nbuckets = src->nbuckets; for (b = 0; b < src->nbuckets; b++) { - dst->b[b]._mark.gen = - dst->b[b].oldest_gen = - src->b[b].mark.gen; - dst->b[b].gen_valid = src->b[b].gen_valid; + struct bucket *d = &dst->b[b]; + struct bucket *s = &src->b[b]; + + d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen; + d->gen_valid = s->gen_valid; + + if (metadata_only && + (s->mark.data_type == BCH_DATA_USER || + s->mark.data_type == BCH_DATA_CACHED)) { + d->_mark = s->mark; + d->_mark.owned_by_allocator = 0; + } } }; @@ -699,7 +703,8 @@ static int bch2_gc_start(struct bch_fs *c) * move around - if references move backwards in the ordering GC * uses, GC could skip past them */ -int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial) +int bch2_gc(struct bch_fs *c, struct list_head *journal, + bool initial, bool metadata_only) { struct bch_dev *ca; u64 start_time = local_clock(); @@ -711,7 +716,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial) down_write(&c->gc_lock); again: percpu_down_write(&c->mark_lock); - ret = bch2_gc_start(c); + ret = bch2_gc_start(c, metadata_only); percpu_up_write(&c->mark_lock); if (ret) @@ -719,7 +724,7 @@ again: bch2_mark_superblocks(c); - ret = bch2_gc_btrees(c, journal, initial); + ret = bch2_gc_btrees(c, journal, initial, metadata_only); if (ret) goto out; @@ -753,7 +758,7 @@ out: percpu_down_write(&c->mark_lock); if (!ret) - ret = bch2_gc_done(c, initial); + ret = bch2_gc_done(c, initial, metadata_only); /* Indicates that gc is no longer in progress: */ __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); @@ -1155,7 +1160,7 @@ static int bch2_gc_thread(void *arg) last = atomic_long_read(&clock->now); last_kick = atomic_read(&c->kick_gc); - ret = bch2_gc(c, NULL, false); + ret = bch2_gc(c, NULL, false, false); if (ret) bch_err(c, "btree gc failed: %i", ret); diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h index 9eb2b0527a92..b7982e64b235 100644 --- a/fs/bcachefs/btree_gc.h +++ b/fs/bcachefs/btree_gc.h @@ -5,7 +5,7 @@ #include "btree_types.h" void bch2_coalesce(struct bch_fs *); -int bch2_gc(struct bch_fs *, struct list_head *, bool); +int bch2_gc(struct bch_fs *, struct list_head *, bool, bool); void bch2_gc_thread_stop(struct bch_fs *); int bch2_gc_thread_start(struct bch_fs *); void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 495ef4732602..4fe66ee1f745 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -132,6 +132,8 @@ void bch2_fs_usage_initialize(struct bch_fs *c) switch (e->data_type) { case BCH_DATA_BTREE: + usage->btree += usage->replicas[i]; + break; case BCH_DATA_USER: usage->data += usage->replicas[i]; break; @@ -226,6 +228,7 @@ static u64 avail_factor(u64 r) u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage) { return min(fs_usage->hidden + + fs_usage->btree + fs_usage->data + reserve_factor(fs_usage->reserved + fs_usage->online_reserved), @@ -241,7 +244,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c) ret.capacity = c->capacity - percpu_u64_get(&c->usage[0]->hidden); - data = percpu_u64_get(&c->usage[0]->data); + data = percpu_u64_get(&c->usage[0]->data) + + percpu_u64_get(&c->usage[0]->btree); reserved = percpu_u64_get(&c->usage[0]->reserved) + percpu_u64_get(&c->usage[0]->online_reserved); @@ -386,12 +390,17 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, bch2_wake_allocator(ca); } -void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca) +void bch2_dev_usage_from_buckets(struct bch_fs *c) { + struct bch_dev *ca; struct bucket_mark old = { .v.counter = 0 }; struct bch_fs_usage *fs_usage; struct bucket_array *buckets; struct bucket *g; + unsigned i; + int cpu; + + percpu_u64_set(&c->usage[0]->hidden, 0); /* * This is only called during startup, before there's any multithreaded @@ -401,11 +410,17 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca) fs_usage = this_cpu_ptr(c->usage[0]); preempt_enable(); - buckets = bucket_array(ca); + for_each_member_device(ca, c, i) { + for_each_possible_cpu(cpu) + memset(per_cpu_ptr(ca->usage[0], cpu), 0, + sizeof(*ca->usage[0])); + + buckets = bucket_array(ca); - for_each_bucket(g, buckets) - if (g->mark.data_type) - bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false); + for_each_bucket(g, buckets) + bch2_dev_usage_update(c, ca, fs_usage, + old, g->mark, false); + } } #define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \ @@ -426,10 +441,17 @@ static inline void update_replicas(struct bch_fs *c, BUG_ON(idx < 0); BUG_ON(!sectors); - if (r->data_type == BCH_DATA_CACHED) - fs_usage->cached += sectors; - else + switch (r->data_type) { + case BCH_DATA_BTREE: + fs_usage->btree += sectors; + break; + case BCH_DATA_USER: fs_usage->data += sectors; + break; + case BCH_DATA_CACHED: + fs_usage->cached += sectors; + break; + } fs_usage->replicas[idx] += sectors; } diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 6af8b418b1e3..095015f17f76 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -174,7 +174,7 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m, struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *); -void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *); +void bch2_dev_usage_from_buckets(struct bch_fs *); static inline u64 __dev_buckets_available(struct bch_dev *ca, struct bch_dev_usage stats) diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index 348d062dd744..a98493dd2ba8 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -70,6 +70,7 @@ struct bch_fs_usage { u64 gc_start[0]; u64 hidden; + u64 btree; u64 data; u64 cached; u64 reserved; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0fa952fa1053..67b4dda9cfeb 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -361,7 +361,7 @@ int bch2_fs_recovery(struct bch_fs *c) test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) { bch_verbose(c, "starting mark and sweep:"); err = "error in recovery"; - ret = bch2_gc(c, &journal, true); + ret = bch2_gc(c, &journal, true, false); if (ret) goto err; bch_verbose(c, "mark and sweep done"); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 1354dd33874c..59503ad0006c 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -497,7 +497,7 @@ STORE(__bch2_fs) bch2_coalesce(c); if (attr == &sysfs_trigger_gc) - bch2_gc(c, NULL, false); + bch2_gc(c, NULL, false, false); if (attr == &sysfs_trigger_alloc_write) { bool wrote; |