summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-23 20:05:41 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-23 20:05:41 +0300
commitb3f391fddf3cfaadda59ec8da8fd17f4520bbf42 (patch)
tree42a950e8661c3f4a3ec2754190323a56a94f71c6 /include
parentf8ffbc365f703d74ecca8ca787318d05bbee2bf7 (diff)
parent025c55a4c7f11ea38521c6e797f3192ad8768c93 (diff)
downloadlinux-b3f391fddf3cfaadda59ec8da8fd17f4520bbf42.tar.xz
Merge tag 'bcachefs-2024-09-21' of git://evilpiepirate.org/bcachefs
Pull bcachefs updates from Kent Overstreet: - rcu_pending, btree key cache rework: this solves lock contenting in the key cache, eliminating the biggest source of the srcu lock hold time warnings, and drastically improving performance on some metadata heavy workloads - on multithreaded creates we're now 3-4x faster than xfs. - We're now using an rhashtable instead of the system inode hash table; this is another significant performance improvement on multithreaded metadata workloads, eliminating more lock contention. - for_each_btree_key_in_subvolume_upto(): new helper for iterating over keys within a specific subvolume, eliminating a lot of open coded "subvolume_get_snapshot()" and also fixing another source of srcu lock time warnings, by running each loop iteration in its own transaction (as the existing for_each_btree_key() does). - More work on btree_trans locking asserts; we now assert that we don't hold btree node locks when trans->locked is false, which is important because we don't use lockdep for tracking individual btree node locks. - Some cleanups and improvements in the bset.c btree node lookup code, from Alan. - Rework of btree node pinning, which we use in backpointers fsck. The old hacky implementation, where the shrinker just skipped over nodes in the pinned range, was causing OOMs; instead we now use another shrinker with a much higher seeks number for pinned nodes. - Rebalance now uses BCH_WRITE_ONLY_SPECIFIED_DEVS; this fixes an issue where rebalance would sometimes fall back to allocating from the full filesystem, which is not what we want when it's trying to move data to a specific target. - Use __GFP_ACCOUNT, GFP_RECLAIMABLE for btree node, key cache allocations. - Idmap mounts are now supported (Hongbo Li) - Rename whiteouts are now supported (Hongbo Li) - Erasure coding can now handle devices being marked as failed, or forcibly removed. We still need the evacuate path for erasure coding, but it's getting very close to ready for people to start using. * tag 'bcachefs-2024-09-21' of git://evilpiepirate.org/bcachefs: (99 commits) bcachefs: return err ptr instead of null in read sb clean bcachefs: Remove duplicated include in backpointers.c bcachefs: Don't drop devices with stripe pointers bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices bcachefs: bch_fs.rw_devs_change_count bcachefs: bch2_dev_remove_stripes() bcachefs: bch2_trigger_ptr() calculates sectors even when no device bcachefs: improve error messages in bch2_ec_read_extent() bcachefs: improve error message on too few devices for ec bcachefs: improve bch2_new_stripe_to_text() bcachefs: ec_stripe_head.nr_created bcachefs: bch_stripe.disk_label bcachefs: stripe_to_mem() bcachefs: EIO errcode cleanup bcachefs: Rework btree node pinning bcachefs: split up btree cache counters for live, freeable bcachefs: btree cache counters should be size_t bcachefs: Don't count "skipped access bit" as touched in btree cache scan bcachefs: Failed devices no longer require mounting in degraded mode bcachefs: bch2_dev_rcu_noerror() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/fs.h9
-rw-r--r--include/linux/generic-radix-tree.h105
2 files changed, 111 insertions, 3 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6b8df574729c..776298fbfcb4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3148,7 +3148,14 @@ static inline bool is_zero_ino(ino_t ino)
return (u32)ino == 0;
}
-extern void __iget(struct inode * inode);
+/*
+ * inode->i_lock must be held
+ */
+static inline void __iget(struct inode *inode)
+{
+ atomic_inc(&inode->i_count);
+}
+
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
extern void __destroy_inode(struct inode *);
diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
index f3512fddf3d7..5b51c3d582d6 100644
--- a/include/linux/generic-radix-tree.h
+++ b/include/linux/generic-radix-tree.h
@@ -41,6 +41,7 @@
#include <linux/limits.h>
#include <linux/log2.h>
#include <linux/math.h>
+#include <linux/slab.h>
#include <linux/types.h>
struct genradix_root;
@@ -48,10 +49,63 @@ struct genradix_root;
#define GENRADIX_NODE_SHIFT 9
#define GENRADIX_NODE_SIZE (1U << GENRADIX_NODE_SHIFT)
+#define GENRADIX_ARY (GENRADIX_NODE_SIZE / sizeof(struct genradix_node *))
+#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY)
+
+/* depth that's needed for a genradix that can address up to ULONG_MAX: */
+#define GENRADIX_MAX_DEPTH \
+ DIV_ROUND_UP(BITS_PER_LONG - GENRADIX_NODE_SHIFT, GENRADIX_ARY_SHIFT)
+
+#define GENRADIX_DEPTH_MASK \
+ ((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
+
+static inline int genradix_depth_shift(unsigned depth)
+{
+ return GENRADIX_NODE_SHIFT + GENRADIX_ARY_SHIFT * depth;
+}
+
+/*
+ * Returns size (of data, in bytes) that a tree of a given depth holds:
+ */
+static inline size_t genradix_depth_size(unsigned depth)
+{
+ return 1UL << genradix_depth_shift(depth);
+}
+
+static inline unsigned genradix_root_to_depth(struct genradix_root *r)
+{
+ return (unsigned long) r & GENRADIX_DEPTH_MASK;
+}
+
+static inline struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+{
+ return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
+}
+
struct __genradix {
struct genradix_root *root;
};
+struct genradix_node {
+ union {
+ /* Interior node: */
+ struct genradix_node *children[GENRADIX_ARY];
+
+ /* Leaf: */
+ u8 data[GENRADIX_NODE_SIZE];
+ };
+};
+
+static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
+{
+ return kzalloc(GENRADIX_NODE_SIZE, gfp_mask);
+}
+
+static inline void genradix_free_node(struct genradix_node *node)
+{
+ kfree(node);
+}
+
/*
* NOTE: currently, sizeof(_type) must not be larger than GENRADIX_NODE_SIZE:
*/
@@ -128,6 +182,30 @@ static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
#define __genradix_idx_to_offset(_radix, _idx) \
__idx_to_offset(_idx, __genradix_obj_size(_radix))
+static inline void *__genradix_ptr_inlined(struct __genradix *radix, size_t offset)
+{
+ struct genradix_root *r = READ_ONCE(radix->root);
+ struct genradix_node *n = genradix_root_to_node(r);
+ unsigned level = genradix_root_to_depth(r);
+ unsigned shift = genradix_depth_shift(level);
+
+ if (unlikely(ilog2(offset) >= genradix_depth_shift(level)))
+ return NULL;
+
+ while (n && shift > GENRADIX_NODE_SHIFT) {
+ shift -= GENRADIX_ARY_SHIFT;
+ n = n->children[offset >> shift];
+ offset &= (1UL << shift) - 1;
+ }
+
+ return n ? &n->data[offset] : NULL;
+}
+
+#define genradix_ptr_inlined(_radix, _idx) \
+ (__genradix_cast(_radix) \
+ __genradix_ptr_inlined(&(_radix)->tree, \
+ __genradix_idx_to_offset(_radix, _idx)))
+
void *__genradix_ptr(struct __genradix *, size_t);
/**
@@ -142,7 +220,24 @@ void *__genradix_ptr(struct __genradix *, size_t);
__genradix_ptr(&(_radix)->tree, \
__genradix_idx_to_offset(_radix, _idx)))
-void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
+void *__genradix_ptr_alloc(struct __genradix *, size_t,
+ struct genradix_node **, gfp_t);
+
+#define genradix_ptr_alloc_inlined(_radix, _idx, _gfp) \
+ (__genradix_cast(_radix) \
+ (__genradix_ptr_inlined(&(_radix)->tree, \
+ __genradix_idx_to_offset(_radix, _idx)) ?: \
+ __genradix_ptr_alloc(&(_radix)->tree, \
+ __genradix_idx_to_offset(_radix, _idx), \
+ NULL, _gfp)))
+
+#define genradix_ptr_alloc_preallocated_inlined(_radix, _idx, _new_node, _gfp)\
+ (__genradix_cast(_radix) \
+ (__genradix_ptr_inlined(&(_radix)->tree, \
+ __genradix_idx_to_offset(_radix, _idx)) ?: \
+ __genradix_ptr_alloc(&(_radix)->tree, \
+ __genradix_idx_to_offset(_radix, _idx), \
+ _new_node, _gfp)))
/**
* genradix_ptr_alloc - get a pointer to a genradix entry, allocating it
@@ -157,7 +252,13 @@ void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
(__genradix_cast(_radix) \
__genradix_ptr_alloc(&(_radix)->tree, \
__genradix_idx_to_offset(_radix, _idx), \
- _gfp))
+ NULL, _gfp))
+
+#define genradix_ptr_alloc_preallocated(_radix, _idx, _new_node, _gfp)\
+ (__genradix_cast(_radix) \
+ __genradix_ptr_alloc(&(_radix)->tree, \
+ __genradix_idx_to_offset(_radix, _idx), \
+ _new_node, _gfp))
struct genradix_iter {
size_t offset;