summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-03-30 05:22:45 +0300
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-23 00:08:20 +0300
commita1d58243f943f5933e65e18e504333ac9eccb679 (patch)
treee9f6753f95985fdb60eaa6279cd3d599d6bb6eac
parentcccf4e6df36ffb4752b4c83efd0723281e629693 (diff)
downloadlinux-a1d58243f943f5933e65e18e504333ac9eccb679.tar.xz
bcachefs: add ability to run gc on metadata only
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/alloc_background.c3
-rw-r--r--fs/bcachefs/btree_gc.c97
-rw-r--r--fs/bcachefs/btree_gc.h2
-rw-r--r--fs/bcachefs/buckets.c40
-rw-r--r--fs/bcachefs/buckets.h2
-rw-r--r--fs/bcachefs/buckets_types.h1
-rw-r--r--fs/bcachefs/recovery.c2
-rw-r--r--fs/bcachefs/sysfs.c2
8 files changed, 88 insertions, 61 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index b5f5c223e008..c6a909bdfc02 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -292,8 +292,7 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
}
percpu_down_write(&c->mark_lock);
- for_each_member_device(ca, c, i)
- bch2_dev_usage_from_buckets(c, ca);
+ bch2_dev_usage_from_buckets(c);
percpu_up_write(&c->mark_lock);
mutex_lock(&c->bucket_clock[READ].lock);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 4119f48281fb..c572391c4dad 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -204,7 +204,7 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b,
}
static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
- bool initial)
+ bool initial, bool metadata_only)
{
struct btree_trans trans;
struct btree_iter *iter;
@@ -224,7 +224,9 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
* and on startup, we have to read every btree node (XXX: only if it was
* an unclean shutdown)
*/
- if (initial || expensive_debug_checks(c))
+ if (metadata_only)
+ depth = 1;
+ else if (initial || expensive_debug_checks(c))
depth = 0;
btree_node_range_checks_init(&r, depth);
@@ -280,7 +282,7 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
}
static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
- bool initial)
+ bool initial, bool metadata_only)
{
enum btree_id ids[BTREE_ID_NR];
u8 max_stale;
@@ -294,11 +296,12 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal,
enum btree_id id = ids[i];
enum btree_node_type type = __btree_node_type(0, id);
- int ret = bch2_gc_btree(c, id, initial);
+ int ret = bch2_gc_btree(c, id, initial, metadata_only);
if (ret)
return ret;
- if (journal && btree_node_type_needs_gc(type)) {
+ if (journal && !metadata_only &&
+ btree_node_type_needs_gc(type)) {
struct bkey_i *k, *n;
struct jset_entry *j;
struct journal_replay *r;
@@ -476,11 +479,13 @@ static void bch2_gc_free(struct bch_fs *c)
c->usage[1] = NULL;
}
-static int bch2_gc_done(struct bch_fs *c, bool initial)
+static int bch2_gc_done(struct bch_fs *c,
+ bool initial, bool metadata_only)
{
struct bch_dev *ca;
- bool verify = !initial ||
- (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO));
+ bool verify = !metadata_only &&
+ (!initial ||
+ (c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)));
unsigned i;
int ret = 0;
@@ -515,7 +520,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
#define copy_fs_field(_f, _msg, ...) \
copy_field(_f, "fs has wrong " _msg, ##__VA_ARGS__)
- {
+ if (!metadata_only) {
struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
struct stripe *dst, *src;
@@ -567,26 +572,7 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
}
};
- for_each_member_device(ca, c, i) {
- unsigned nr = sizeof(struct bch_dev_usage) / sizeof(u64);
- struct bch_dev_usage *dst = (void *)
- bch2_acc_percpu_u64s((void *) ca->usage[0], nr);
- struct bch_dev_usage *src = (void *)
- bch2_acc_percpu_u64s((void *) ca->usage[1], nr);
- unsigned b;
-
- for (b = 0; b < BCH_DATA_NR; b++)
- copy_dev_field(buckets[b], "buckets[%s]",
- bch2_data_types[b]);
- copy_dev_field(buckets_alloc, "buckets_alloc");
- copy_dev_field(buckets_ec, "buckets_ec");
- copy_dev_field(buckets_unavailable, "buckets_unavailable");
-
- for (b = 0; b < BCH_DATA_NR; b++)
- copy_dev_field(sectors[b], "sectors[%s]",
- bch2_data_types[b]);
- copy_dev_field(sectors_fragmented, "sectors_fragmented");
- }
+ bch2_dev_usage_from_buckets(c);
{
unsigned nr = fs_usage_u64s(c);
@@ -596,20 +582,29 @@ static int bch2_gc_done(struct bch_fs *c, bool initial)
bch2_acc_percpu_u64s((void *) c->usage[1], nr);
copy_fs_field(hidden, "hidden");
- copy_fs_field(data, "data");
- copy_fs_field(cached, "cached");
- copy_fs_field(reserved, "reserved");
- copy_fs_field(nr_inodes, "nr_inodes");
+ copy_fs_field(btree, "btree");
- for (i = 0; i < BCH_REPLICAS_MAX; i++)
- copy_fs_field(persistent_reserved[i],
- "persistent_reserved[%i]", i);
+ if (!metadata_only) {
+ copy_fs_field(data, "data");
+ copy_fs_field(cached, "cached");
+ copy_fs_field(reserved, "reserved");
+ copy_fs_field(nr_inodes,"nr_inodes");
+
+ for (i = 0; i < BCH_REPLICAS_MAX; i++)
+ copy_fs_field(persistent_reserved[i],
+ "persistent_reserved[%i]", i);
+ }
for (i = 0; i < c->replicas.nr; i++) {
struct bch_replicas_entry *e =
cpu_replicas_entry(&c->replicas, i);
char buf[80];
+ if (metadata_only &&
+ (e->data_type == BCH_DATA_USER ||
+ e->data_type == BCH_DATA_CACHED))
+ continue;
+
bch2_replicas_entry_to_text(&PBUF(buf), e);
copy_fs_field(replicas[i], "%s", buf);
@@ -625,7 +620,8 @@ fsck_err:
return ret;
}
-static int bch2_gc_start(struct bch_fs *c)
+static int bch2_gc_start(struct bch_fs *c,
+ bool metadata_only)
{
struct bch_dev *ca;
unsigned i;
@@ -671,10 +667,18 @@ static int bch2_gc_start(struct bch_fs *c)
dst->nbuckets = src->nbuckets;
for (b = 0; b < src->nbuckets; b++) {
- dst->b[b]._mark.gen =
- dst->b[b].oldest_gen =
- src->b[b].mark.gen;
- dst->b[b].gen_valid = src->b[b].gen_valid;
+ struct bucket *d = &dst->b[b];
+ struct bucket *s = &src->b[b];
+
+ d->_mark.gen = dst->b[b].oldest_gen = s->mark.gen;
+ d->gen_valid = s->gen_valid;
+
+ if (metadata_only &&
+ (s->mark.data_type == BCH_DATA_USER ||
+ s->mark.data_type == BCH_DATA_CACHED)) {
+ d->_mark = s->mark;
+ d->_mark.owned_by_allocator = 0;
+ }
}
};
@@ -699,7 +703,8 @@ static int bch2_gc_start(struct bch_fs *c)
* move around - if references move backwards in the ordering GC
* uses, GC could skip past them
*/
-int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
+int bch2_gc(struct bch_fs *c, struct list_head *journal,
+ bool initial, bool metadata_only)
{
struct bch_dev *ca;
u64 start_time = local_clock();
@@ -711,7 +716,7 @@ int bch2_gc(struct bch_fs *c, struct list_head *journal, bool initial)
down_write(&c->gc_lock);
again:
percpu_down_write(&c->mark_lock);
- ret = bch2_gc_start(c);
+ ret = bch2_gc_start(c, metadata_only);
percpu_up_write(&c->mark_lock);
if (ret)
@@ -719,7 +724,7 @@ again:
bch2_mark_superblocks(c);
- ret = bch2_gc_btrees(c, journal, initial);
+ ret = bch2_gc_btrees(c, journal, initial, metadata_only);
if (ret)
goto out;
@@ -753,7 +758,7 @@ out:
percpu_down_write(&c->mark_lock);
if (!ret)
- ret = bch2_gc_done(c, initial);
+ ret = bch2_gc_done(c, initial, metadata_only);
/* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
@@ -1155,7 +1160,7 @@ static int bch2_gc_thread(void *arg)
last = atomic_long_read(&clock->now);
last_kick = atomic_read(&c->kick_gc);
- ret = bch2_gc(c, NULL, false);
+ ret = bch2_gc(c, NULL, false, false);
if (ret)
bch_err(c, "btree gc failed: %i", ret);
diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
index 9eb2b0527a92..b7982e64b235 100644
--- a/fs/bcachefs/btree_gc.h
+++ b/fs/bcachefs/btree_gc.h
@@ -5,7 +5,7 @@
#include "btree_types.h"
void bch2_coalesce(struct bch_fs *);
-int bch2_gc(struct bch_fs *, struct list_head *, bool);
+int bch2_gc(struct bch_fs *, struct list_head *, bool, bool);
void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 495ef4732602..4fe66ee1f745 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -132,6 +132,8 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
switch (e->data_type) {
case BCH_DATA_BTREE:
+ usage->btree += usage->replicas[i];
+ break;
case BCH_DATA_USER:
usage->data += usage->replicas[i];
break;
@@ -226,6 +228,7 @@ static u64 avail_factor(u64 r)
u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
{
return min(fs_usage->hidden +
+ fs_usage->btree +
fs_usage->data +
reserve_factor(fs_usage->reserved +
fs_usage->online_reserved),
@@ -241,7 +244,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c)
ret.capacity = c->capacity -
percpu_u64_get(&c->usage[0]->hidden);
- data = percpu_u64_get(&c->usage[0]->data);
+ data = percpu_u64_get(&c->usage[0]->data) +
+ percpu_u64_get(&c->usage[0]->btree);
reserved = percpu_u64_get(&c->usage[0]->reserved) +
percpu_u64_get(&c->usage[0]->online_reserved);
@@ -386,12 +390,17 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
bch2_wake_allocator(ca);
}
-void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
+void bch2_dev_usage_from_buckets(struct bch_fs *c)
{
+ struct bch_dev *ca;
struct bucket_mark old = { .v.counter = 0 };
struct bch_fs_usage *fs_usage;
struct bucket_array *buckets;
struct bucket *g;
+ unsigned i;
+ int cpu;
+
+ percpu_u64_set(&c->usage[0]->hidden, 0);
/*
* This is only called during startup, before there's any multithreaded
@@ -401,11 +410,17 @@ void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
fs_usage = this_cpu_ptr(c->usage[0]);
preempt_enable();
- buckets = bucket_array(ca);
+ for_each_member_device(ca, c, i) {
+ for_each_possible_cpu(cpu)
+ memset(per_cpu_ptr(ca->usage[0], cpu), 0,
+ sizeof(*ca->usage[0]));
+
+ buckets = bucket_array(ca);
- for_each_bucket(g, buckets)
- if (g->mark.data_type)
- bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false);
+ for_each_bucket(g, buckets)
+ bch2_dev_usage_update(c, ca, fs_usage,
+ old, g->mark, false);
+ }
}
#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr) \
@@ -426,10 +441,17 @@ static inline void update_replicas(struct bch_fs *c,
BUG_ON(idx < 0);
BUG_ON(!sectors);
- if (r->data_type == BCH_DATA_CACHED)
- fs_usage->cached += sectors;
- else
+ switch (r->data_type) {
+ case BCH_DATA_BTREE:
+ fs_usage->btree += sectors;
+ break;
+ case BCH_DATA_USER:
fs_usage->data += sectors;
+ break;
+ case BCH_DATA_CACHED:
+ fs_usage->cached += sectors;
+ break;
+ }
fs_usage->replicas[idx] += sectors;
}
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 6af8b418b1e3..095015f17f76 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -174,7 +174,7 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
-void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
+void bch2_dev_usage_from_buckets(struct bch_fs *);
static inline u64 __dev_buckets_available(struct bch_dev *ca,
struct bch_dev_usage stats)
diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
index 348d062dd744..a98493dd2ba8 100644
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@@ -70,6 +70,7 @@ struct bch_fs_usage {
u64 gc_start[0];
u64 hidden;
+ u64 btree;
u64 data;
u64 cached;
u64 reserved;
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 0fa952fa1053..67b4dda9cfeb 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -361,7 +361,7 @@ int bch2_fs_recovery(struct bch_fs *c)
test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
bch_verbose(c, "starting mark and sweep:");
err = "error in recovery";
- ret = bch2_gc(c, &journal, true);
+ ret = bch2_gc(c, &journal, true, false);
if (ret)
goto err;
bch_verbose(c, "mark and sweep done");
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 1354dd33874c..59503ad0006c 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -497,7 +497,7 @@ STORE(__bch2_fs)
bch2_coalesce(c);
if (attr == &sysfs_trigger_gc)
- bch2_gc(c, NULL, false);
+ bch2_gc(c, NULL, false, false);
if (attr == &sysfs_trigger_alloc_write) {
bool wrote;