summaryrefslogtreecommitdiff
path: root/fs/bcachefs/replicas.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-23 20:05:41 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-23 20:05:41 +0300
commitb3f391fddf3cfaadda59ec8da8fd17f4520bbf42 (patch)
tree42a950e8661c3f4a3ec2754190323a56a94f71c6 /fs/bcachefs/replicas.c
parentf8ffbc365f703d74ecca8ca787318d05bbee2bf7 (diff)
parent025c55a4c7f11ea38521c6e797f3192ad8768c93 (diff)
downloadlinux-b3f391fddf3cfaadda59ec8da8fd17f4520bbf42.tar.xz
Merge tag 'bcachefs-2024-09-21' of git://evilpiepirate.org/bcachefs
Pull bcachefs updates from Kent Overstreet: - rcu_pending, btree key cache rework: this solves lock contenting in the key cache, eliminating the biggest source of the srcu lock hold time warnings, and drastically improving performance on some metadata heavy workloads - on multithreaded creates we're now 3-4x faster than xfs. - We're now using an rhashtable instead of the system inode hash table; this is another significant performance improvement on multithreaded metadata workloads, eliminating more lock contention. - for_each_btree_key_in_subvolume_upto(): new helper for iterating over keys within a specific subvolume, eliminating a lot of open coded "subvolume_get_snapshot()" and also fixing another source of srcu lock time warnings, by running each loop iteration in its own transaction (as the existing for_each_btree_key() does). - More work on btree_trans locking asserts; we now assert that we don't hold btree node locks when trans->locked is false, which is important because we don't use lockdep for tracking individual btree node locks. - Some cleanups and improvements in the bset.c btree node lookup code, from Alan. - Rework of btree node pinning, which we use in backpointers fsck. The old hacky implementation, where the shrinker just skipped over nodes in the pinned range, was causing OOMs; instead we now use another shrinker with a much higher seeks number for pinned nodes. - Rebalance now uses BCH_WRITE_ONLY_SPECIFIED_DEVS; this fixes an issue where rebalance would sometimes fall back to allocating from the full filesystem, which is not what we want when it's trying to move data to a specific target. - Use __GFP_ACCOUNT, GFP_RECLAIMABLE for btree node, key cache allocations. - Idmap mounts are now supported (Hongbo Li) - Rename whiteouts are now supported (Hongbo Li) - Erasure coding can now handle devices being marked as failed, or forcibly removed. We still need the evacuate path for erasure coding, but it's getting very close to ready for people to start using. * tag 'bcachefs-2024-09-21' of git://evilpiepirate.org/bcachefs: (99 commits) bcachefs: return err ptr instead of null in read sb clean bcachefs: Remove duplicated include in backpointers.c bcachefs: Don't drop devices with stripe pointers bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices bcachefs: bch_fs.rw_devs_change_count bcachefs: bch2_dev_remove_stripes() bcachefs: bch2_trigger_ptr() calculates sectors even when no device bcachefs: improve error messages in bch2_ec_read_extent() bcachefs: improve error message on too few devices for ec bcachefs: improve bch2_new_stripe_to_text() bcachefs: ec_stripe_head.nr_created bcachefs: bch_stripe.disk_label bcachefs: stripe_to_mem() bcachefs: EIO errcode cleanup bcachefs: Rework btree node pinning bcachefs: split up btree cache counters for live, freeable bcachefs: btree cache counters should be size_t bcachefs: Don't count "skipped access bit" as touched in btree cache scan bcachefs: Failed devices no longer require mounting in degraded mode bcachefs: bch2_dev_rcu_noerror() ...
Diffstat (limited to 'fs/bcachefs/replicas.c')
-rw-r--r--fs/bcachefs/replicas.c10
1 files changed, 5 insertions, 5 deletions
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 1f34c92a6d11..998c0bd06802 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -123,7 +123,7 @@ static void extent_to_replicas(struct bkey_s_c k,
continue;
if (!p.has_ec)
- r->devs[r->nr_devs++] = p.ptr.dev;
+ replicas_entry_add_dev(r, p.ptr.dev);
else
r->nr_required = 0;
}
@@ -140,7 +140,7 @@ static void stripe_to_replicas(struct bkey_s_c k,
for (ptr = s.v->ptrs;
ptr < s.v->ptrs + s.v->nr_blocks;
ptr++)
- r->devs[r->nr_devs++] = ptr->dev;
+ replicas_entry_add_dev(r, ptr->dev);
}
void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e,
@@ -181,7 +181,7 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
e->nr_required = 1;
darray_for_each(devs, i)
- e->devs[e->nr_devs++] = *i;
+ replicas_entry_add_dev(e, *i);
bch2_replicas_entry_sort(e);
}
@@ -795,12 +795,12 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
for (unsigned i = 0; i < e->nr_devs; i++) {
nr_online += test_bit(e->devs[i], devs.d);
- struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]);
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, e->devs[i]);
nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
}
rcu_read_unlock();
- if (nr_failed == e->nr_devs)
+ if (nr_online + nr_failed == e->nr_devs)
continue;
if (nr_online < e->nr_required)