summaryrefslogtreecommitdiff
path: root/fs/bcachefs/movinggc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-11 03:34:17 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-11 03:34:17 +0300
commit999a36b52b1b11b2ca0590756e4f8cf21f2d9182 (patch)
tree1b51ea332b5258e51fc9209168f66246bf7b3875 /fs/bcachefs/movinggc.c
parent84e9a2d5517bf62edda74f382757aa173b8e45fd (diff)
parent169de41985f53320580f3d347534966ea83343ca (diff)
downloadlinux-999a36b52b1b11b2ca0590756e4f8cf21f2d9182.tar.xz
Merge tag 'bcachefs-2024-01-10' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs updates from Kent Overstreet: - btree write buffer rewrite: instead of adding keys to the btree write buffer at transaction commit time, we now journal them with a different journal entry type and copy them from the journal to the write buffer just prior to journal write. This reduces the number of atomic operations on shared cachelines in the transaction commit path and is a signicant performance improvement on some workloads: multithreaded 4k random writes went from ~650k iops to ~850k iops. - Bring back optimistic spinning for six locks: the new implementation doesn't use osq locks; instead we add to the lock waitlist as normal, and then spin on the lock_acquired bit in the waitlist entry, _not_ the lock itself. - New ioctls: - BCH_IOCTL_DEV_USAGE_V2, which allows for new data types - BCH_IOCTL_OFFLINE_FSCK, which runs the kernel implementation of fsck but without mounting: useful for transparently using the kernel version of fsck from 'bcachefs fsck' when the kernel version is a better match for the on disk filesystem. - BCH_IOCTL_ONLINE_FSCK: online fsck. Not all passes are supported yet, but the passes that are supported are fully featured - errors may be corrected as normal. The new ioctls use the new 'thread_with_file' abstraction for kicking off a kthread that's tied to a file descriptor returned to userspace via the ioctl. - btree_paths within a btree_trans are now dynamically growable, instead of being limited to 64. This is important for the check_directory_structure phase of fsck, and also fixes some issues we were having with btree path overflow in the reflink btree. - Trigger refactoring; prep work for the upcoming disk space accounting rewrite - Numerous bugfixes :) * tag 'bcachefs-2024-01-10' of https://evilpiepirate.org/git/bcachefs: (226 commits) bcachefs: eytzinger0_find() search should be const bcachefs: move "ptrs not changing" optimization to bch2_trigger_extent() bcachefs: fix simulateously upgrading & downgrading bcachefs: Restart recovery passes more reliably bcachefs: bch2_dump_bset() doesn't choke on u64s == 0 bcachefs: improve checksum error messages bcachefs: improve validate_bset_keys() bcachefs: print sb magic when relevant bcachefs: __bch2_sb_field_to_text() bcachefs: %pg is banished bcachefs: Improve would_deadlock trace event bcachefs: fsck_err()s don't need to manually check c->sb.version anymore bcachefs: Upgrades now specify errors to fix, like downgrades bcachefs: no thread_with_file in userspace bcachefs: Don't autofix errors we can't fix bcachefs: add missing bch2_latency_acct() call bcachefs: increase max_active on io_complete_wq bcachefs: add time_stats for btree_node_read_done() bcachefs: don't clear accessed bit in btree node fill bcachefs: Add an option to control btree node prefetching ...
Diffstat (limited to 'fs/bcachefs/movinggc.c')
-rw-r--r--fs/bcachefs/movinggc.c49
1 files changed, 27 insertions, 22 deletions
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index a84e79f79e5e..69e06a84dad4 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -91,7 +91,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
a = bch2_alloc_to_v4(k, &_a);
b->k.gen = a->gen;
- b->sectors = a->dirty_sectors;
+ b->sectors = bch2_bucket_sectors_dirty(*a);
ret = data_type_movable(a->data_type) &&
a->fragmentation_lru &&
@@ -145,20 +145,21 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
{
struct btree_trans *trans = ctxt->trans;
struct bch_fs *c = trans->c;
- struct btree_iter iter;
- struct bkey_s_c k;
size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4);
size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0;
int ret;
move_buckets_wait(ctxt, buckets_in_flight, false);
- ret = bch2_btree_write_buffer_flush(trans);
- if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_flush()",
+ ret = bch2_btree_write_buffer_tryflush(trans);
+ if (bch2_err_matches(ret, EROFS))
+ return ret;
+
+ if (bch2_fs_fatal_err_on(ret, c, "%s: error %s from bch2_btree_write_buffer_tryflush()",
__func__, bch2_err_str(ret)))
return ret;
- ret = for_each_btree_key2_upto(trans, iter, BTREE_ID_lru,
+ ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0),
lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX),
0, k, ({
@@ -167,15 +168,23 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt,
saw++;
- if (!bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p)))
+ ret2 = bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p));
+ if (ret2 < 0)
+ goto err;
+
+ if (!ret2)
not_movable++;
else if (bucket_in_flight(buckets_in_flight, b.k))
in_flight++;
else {
- ret2 = darray_push(buckets, b) ?: buckets->nr >= nr_to_get;
- if (ret2 >= 0)
- sectors += b.sectors;
+ ret2 = darray_push(buckets, b);
+ if (ret2)
+ goto err;
+ sectors += b.sectors;
}
+
+ ret2 = buckets->nr >= nr_to_get;
+err:
ret2;
}));
@@ -198,7 +207,6 @@ static int bch2_copygc(struct moving_context *ctxt,
};
move_buckets buckets = { 0 };
struct move_bucket_in_flight *f;
- struct move_bucket *i;
u64 moved = atomic64_read(&ctxt->stats->sectors_moved);
int ret = 0;
@@ -221,7 +229,7 @@ static int bch2_copygc(struct moving_context *ctxt,
break;
}
- ret = __bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket,
+ ret = bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket,
f->bucket.k.gen, data_opts);
if (ret)
goto err;
@@ -259,19 +267,16 @@ err:
*/
unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
{
- struct bch_dev *ca;
- unsigned dev_idx;
s64 wait = S64_MAX, fragmented_allowed, fragmented;
- unsigned i;
- for_each_rw_member(ca, c, dev_idx) {
+ for_each_rw_member(c, ca) {
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
fragmented_allowed = ((__dev_buckets_available(ca, usage, BCH_WATERMARK_stripe) *
ca->mi.bucket_size) >> 1);
fragmented = 0;
- for (i = 0; i < BCH_DATA_NR; i++)
+ for (unsigned i = 0; i < BCH_DATA_NR; i++)
if (data_type_movable(i))
fragmented += usage.d[i].fragmented;
@@ -313,9 +318,9 @@ static int bch2_copygc_thread(void *arg)
if (!buckets)
return -ENOMEM;
ret = rhashtable_init(&buckets->table, &bch_move_bucket_params);
+ bch_err_msg(c, ret, "allocating copygc buckets in flight");
if (ret) {
kfree(buckets);
- bch_err_msg(c, ret, "allocating copygc buckets in flight");
return ret;
}
@@ -334,7 +339,8 @@ static int bch2_copygc_thread(void *arg)
if (!c->copy_gc_enabled) {
move_buckets_wait(&ctxt, buckets, true);
- kthread_wait_freezable(c->copy_gc_enabled);
+ kthread_wait_freezable(c->copy_gc_enabled ||
+ kthread_should_stop());
}
if (unlikely(freezing(current))) {
@@ -411,10 +417,9 @@ int bch2_copygc_start(struct bch_fs *c)
t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
ret = PTR_ERR_OR_ZERO(t);
- if (ret) {
- bch_err_msg(c, ret, "creating copygc thread");
+ bch_err_msg(c, ret, "creating copygc thread");
+ if (ret)
return ret;
- }
get_task_struct(t);