From 94d709be8c0dc875dfc9ebb64d3b8093d0790c15 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 16 Feb 2026 14:31:57 +0100 Subject: xattr: add rcu_head and rhash_head to struct simple_xattr In preparation for converting simple_xattrs from rbtree to rhashtable, add rhash_head and rcu_head members to struct simple_xattr. The rhashtable implementation will use rhash_head for hash table linkage and RCU-based lockless reads, requiring that replaced or removed xattr entries be freed via call_rcu() rather than immediately. Add simple_xattr_free_rcu() which schedules RCU-deferred freeing of an xattr entry. This will be used by callers of simple_xattr_set() once they switch to the rhashtable-based xattr store. No functional changes. Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-1-c2efa4f74cb7@kernel.org Acked-by: Darrick J. Wong Signed-off-by: Christian Brauner --- include/linux/xattr.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 296b5ee5c979..fdbd2095414a 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -112,6 +113,8 @@ struct simple_xattrs { struct simple_xattr { struct rb_node rb_node; + struct rhash_head hash_node; + struct rcu_head rcu; char *name; size_t size; char value[] __counted_by(size); @@ -122,6 +125,7 @@ void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space); size_t simple_xattr_space(const char *name, size_t size); struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); void simple_xattr_free(struct simple_xattr *xattr); +void simple_xattr_free_rcu(struct simple_xattr *xattr); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, -- cgit v1.2.3 From b32c4a213698ab351b44da2fd1b2a5976c7fa033 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 16 Feb 2026 14:31:58 +0100 Subject: xattr: add rhashtable-based simple_xattr infrastructure Add rhashtable support to the simple_xattr subsystem while keeping the existing rbtree code fully functional. This allows consumers to be migrated one at a time without breaking any intermediate build. struct simple_xattrs gains a dispatch flag and a union holding either the rbtree (rb_root + rwlock) or rhashtable state: struct simple_xattrs { bool use_rhashtable; union { struct { struct rb_root rb_root; rwlock_t lock; }; struct rhashtable ht; }; }; simple_xattrs_init() continues to set up the rbtree path for existing embedded-struct callers. Add simple_xattrs_alloc() which dynamically allocates a simple_xattrs and initializes the rhashtable path. This is the entry point for consumers switching to pointer-based lazy allocation. The five core functions (get, set, list, add, free) dispatch based on the use_rhashtable flag. Existing callers continue to use the rbtree path unchanged. As each consumer is converted it will switch to simple_xattrs_alloc() and the rhashtable path. Once all consumers are converted a follow-up patch will remove the rbtree code. Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-2-c2efa4f74cb7@kernel.org Acked-by: Darrick J. Wong Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/xattr.c | 439 ++++++++++++++++++++++++++++++++++++++------------ include/linux/xattr.h | 25 ++- mm/shmem.c | 2 +- 3 files changed, 357 insertions(+), 109 deletions(-) (limited to 'include') diff --git a/fs/xattr.c b/fs/xattr.c index 9cbb1917bcb2..1d98ea459b7b 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -1228,22 +1229,25 @@ void simple_xattr_free_rcu(struct simple_xattr *xattr) * Allocate a new xattr object and initialize respective members. The caller is * responsible for handling the name of the xattr. * - * Return: On success a new xattr object is returned. On failure NULL is - * returned. + * Return: New xattr object on success, NULL if @value is NULL, ERR_PTR on + * failure. */ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) { struct simple_xattr *new_xattr; size_t len; + if (!value) + return NULL; + /* wrap around? */ len = sizeof(*new_xattr) + size; if (len < sizeof(*new_xattr)) - return NULL; + return ERR_PTR(-ENOMEM); new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT); if (!new_xattr) - return NULL; + return ERR_PTR(-ENOMEM); new_xattr->size = size; memcpy(new_xattr->value, value, size); @@ -1287,6 +1291,33 @@ static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node, return rbtree_simple_xattr_cmp(xattr->name, node); } +static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed) +{ + const char *name = data; + return jhash(name, strlen(name), seed); +} + +static u32 simple_xattr_obj_hashfn(const void *obj, u32 len, u32 seed) +{ + const struct simple_xattr *xattr = obj; + return jhash(xattr->name, strlen(xattr->name), seed); +} + +static int simple_xattr_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct simple_xattr *xattr = obj; + return strcmp(xattr->name, arg->key); +} + +static const struct rhashtable_params simple_xattr_params = { + .head_offset = offsetof(struct simple_xattr, hash_node), + .hashfn = simple_xattr_hashfn, + .obj_hashfn = simple_xattr_obj_hashfn, + .obj_cmpfn = simple_xattr_obj_cmpfn, + .automatic_shrinking = true, +}; + /** * simple_xattr_get - get an xattr object * @xattrs: the header of the xattr object @@ -1306,22 +1337,41 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size) { struct simple_xattr *xattr = NULL; - struct rb_node *rbp; int ret = -ENODATA; - read_lock(&xattrs->lock); - rbp = rb_find(name, &xattrs->rb_root, rbtree_simple_xattr_cmp); - if (rbp) { - xattr = rb_entry(rbp, struct simple_xattr, rb_node); - ret = xattr->size; - if (buffer) { - if (size < xattr->size) - ret = -ERANGE; - else - memcpy(buffer, xattr->value, xattr->size); + if (xattrs->use_rhashtable) { + guard(rcu)(); + xattr = rhashtable_lookup(&xattrs->ht, name, + simple_xattr_params); + if (xattr) { + ret = xattr->size; + if (buffer) { + if (size < xattr->size) + ret = -ERANGE; + else + memcpy(buffer, xattr->value, + xattr->size); + } + } + } else { + struct rb_node *rbp; + + read_lock(&xattrs->lock); + rbp = rb_find(name, &xattrs->rb_root, + rbtree_simple_xattr_cmp); + if (rbp) { + xattr = rb_entry(rbp, struct simple_xattr, rb_node); + ret = xattr->size; + if (buffer) { + if (size < xattr->size) + ret = -ERANGE; + else + memcpy(buffer, xattr->value, + xattr->size); + } } + read_unlock(&xattrs->lock); } - read_unlock(&xattrs->lock); return ret; } @@ -1355,78 +1405,134 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags) { - struct simple_xattr *old_xattr = NULL, *new_xattr = NULL; - struct rb_node *parent = NULL, **rbp; - int err = 0, ret; + struct simple_xattr *old_xattr = NULL; + int err = 0; - /* value == NULL means remove */ - if (value) { - new_xattr = simple_xattr_alloc(value, size); - if (!new_xattr) - return ERR_PTR(-ENOMEM); + CLASS(simple_xattr, new_xattr)(value, size); + if (IS_ERR(new_xattr)) + return new_xattr; + if (new_xattr) { new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT); - if (!new_xattr->name) { - simple_xattr_free(new_xattr); + if (!new_xattr->name) return ERR_PTR(-ENOMEM); - } } - write_lock(&xattrs->lock); - rbp = &xattrs->rb_root.rb_node; - while (*rbp) { - parent = *rbp; - ret = rbtree_simple_xattr_cmp(name, *rbp); - if (ret < 0) - rbp = &(*rbp)->rb_left; - else if (ret > 0) - rbp = &(*rbp)->rb_right; - else - old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node); - if (old_xattr) - break; - } + if (xattrs->use_rhashtable) { + /* + * Lookup is safe without RCU here since writes are + * serialized by the caller. + */ + old_xattr = rhashtable_lookup_fast(&xattrs->ht, name, + simple_xattr_params); + + if (old_xattr) { + /* Fail if XATTR_CREATE is requested and the xattr exists. */ + if (flags & XATTR_CREATE) + return ERR_PTR(-EEXIST); + + if (new_xattr) { + err = rhashtable_replace_fast(&xattrs->ht, + &old_xattr->hash_node, + &new_xattr->hash_node, + simple_xattr_params); + if (err) + return ERR_PTR(err); + } else { + err = rhashtable_remove_fast(&xattrs->ht, + &old_xattr->hash_node, + simple_xattr_params); + if (err) + return ERR_PTR(err); + } + } else { + /* Fail if XATTR_REPLACE is requested but no xattr is found. */ + if (flags & XATTR_REPLACE) + return ERR_PTR(-ENODATA); + + /* + * If XATTR_CREATE or no flags are specified together + * with a new value simply insert it. + */ + if (new_xattr) { + err = rhashtable_insert_fast(&xattrs->ht, + &new_xattr->hash_node, + simple_xattr_params); + if (err) + return ERR_PTR(err); + } - if (old_xattr) { - /* Fail if XATTR_CREATE is requested and the xattr exists. */ - if (flags & XATTR_CREATE) { - err = -EEXIST; - goto out_unlock; + /* + * If XATTR_CREATE or no flags are specified and + * neither an old or new xattr exist then we don't + * need to do anything. + */ } - - if (new_xattr) - rb_replace_node(&old_xattr->rb_node, - &new_xattr->rb_node, &xattrs->rb_root); - else - rb_erase(&old_xattr->rb_node, &xattrs->rb_root); } else { - /* Fail if XATTR_REPLACE is requested but no xattr is found. */ - if (flags & XATTR_REPLACE) { - err = -ENODATA; - goto out_unlock; - } + struct rb_node *parent = NULL, **rbp; + int ret; - /* - * If XATTR_CREATE or no flags are specified together with a - * new value simply insert it. - */ - if (new_xattr) { - rb_link_node(&new_xattr->rb_node, parent, rbp); - rb_insert_color(&new_xattr->rb_node, &xattrs->rb_root); + write_lock(&xattrs->lock); + rbp = &xattrs->rb_root.rb_node; + while (*rbp) { + parent = *rbp; + ret = rbtree_simple_xattr_cmp(name, *rbp); + if (ret < 0) + rbp = &(*rbp)->rb_left; + else if (ret > 0) + rbp = &(*rbp)->rb_right; + else + old_xattr = rb_entry(*rbp, struct simple_xattr, + rb_node); + if (old_xattr) + break; } - /* - * If XATTR_CREATE or no flags are specified and neither an - * old or new xattr exist then we don't need to do anything. - */ - } + if (old_xattr) { + /* Fail if XATTR_CREATE is requested and the xattr exists. */ + if (flags & XATTR_CREATE) { + err = -EEXIST; + goto out_unlock; + } + + if (new_xattr) + rb_replace_node(&old_xattr->rb_node, + &new_xattr->rb_node, + &xattrs->rb_root); + else + rb_erase(&old_xattr->rb_node, + &xattrs->rb_root); + } else { + /* Fail if XATTR_REPLACE is requested but no xattr is found. */ + if (flags & XATTR_REPLACE) { + err = -ENODATA; + goto out_unlock; + } + + /* + * If XATTR_CREATE or no flags are specified together + * with a new value simply insert it. + */ + if (new_xattr) { + rb_link_node(&new_xattr->rb_node, parent, rbp); + rb_insert_color(&new_xattr->rb_node, + &xattrs->rb_root); + } + + /* + * If XATTR_CREATE or no flags are specified and + * neither an old or new xattr exist then we don't + * need to do anything. + */ + } out_unlock: - write_unlock(&xattrs->lock); - if (!err) - return old_xattr; - simple_xattr_free(new_xattr); - return ERR_PTR(err); + write_unlock(&xattrs->lock); + if (err) + return ERR_PTR(err); + } + retain_and_null_ptr(new_xattr); + return old_xattr; } static bool xattr_is_trusted(const char *name) @@ -1467,7 +1573,6 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, { bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); struct simple_xattr *xattr; - struct rb_node *rbp; ssize_t remaining_size = size; int err = 0; @@ -1487,23 +1592,62 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, remaining_size -= err; err = 0; - read_lock(&xattrs->lock); - for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) { - xattr = rb_entry(rbp, struct simple_xattr, rb_node); + if (!xattrs) + return size - remaining_size; - /* skip "trusted." attributes for unprivileged callers */ - if (!trusted && xattr_is_trusted(xattr->name)) - continue; + if (xattrs->use_rhashtable) { + struct rhashtable_iter iter; - /* skip MAC labels; these are provided by LSM above */ - if (xattr_is_maclabel(xattr->name)) - continue; + rhashtable_walk_enter(&xattrs->ht, &iter); + rhashtable_walk_start(&iter); - err = xattr_list_one(&buffer, &remaining_size, xattr->name); - if (err) - break; + while ((xattr = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(xattr)) { + if (PTR_ERR(xattr) == -EAGAIN) + continue; + err = PTR_ERR(xattr); + break; + } + + /* skip "trusted." attributes for unprivileged callers */ + if (!trusted && xattr_is_trusted(xattr->name)) + continue; + + /* skip MAC labels; these are provided by LSM above */ + if (xattr_is_maclabel(xattr->name)) + continue; + + err = xattr_list_one(&buffer, &remaining_size, + xattr->name); + if (err) + break; + } + + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + } else { + struct rb_node *rbp; + + read_lock(&xattrs->lock); + for (rbp = rb_first(&xattrs->rb_root); rbp; + rbp = rb_next(rbp)) { + xattr = rb_entry(rbp, struct simple_xattr, rb_node); + + /* skip "trusted." attributes for unprivileged callers */ + if (!trusted && xattr_is_trusted(xattr->name)) + continue; + + /* skip MAC labels; these are provided by LSM above */ + if (xattr_is_maclabel(xattr->name)) + continue; + + err = xattr_list_one(&buffer, &remaining_size, + xattr->name); + if (err) + break; + } + read_unlock(&xattrs->lock); } - read_unlock(&xattrs->lock); return err ? err : size - remaining_size; } @@ -1536,9 +1680,16 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node, void simple_xattr_add(struct simple_xattrs *xattrs, struct simple_xattr *new_xattr) { - write_lock(&xattrs->lock); - rb_add(&new_xattr->rb_node, &xattrs->rb_root, rbtree_simple_xattr_less); - write_unlock(&xattrs->lock); + if (xattrs->use_rhashtable) { + WARN_ON(rhashtable_insert_fast(&xattrs->ht, + &new_xattr->hash_node, + simple_xattr_params)); + } else { + write_lock(&xattrs->lock); + rb_add(&new_xattr->rb_node, &xattrs->rb_root, + rbtree_simple_xattr_less); + write_unlock(&xattrs->lock); + } } /** @@ -1549,10 +1700,80 @@ void simple_xattr_add(struct simple_xattrs *xattrs, */ void simple_xattrs_init(struct simple_xattrs *xattrs) { + xattrs->use_rhashtable = false; xattrs->rb_root = RB_ROOT; rwlock_init(&xattrs->lock); } +/** + * simple_xattrs_alloc - allocate and initialize a new xattr header + * + * Dynamically allocate a simple_xattrs header and initialize the + * underlying rhashtable. This is intended for consumers that want + * rhashtable-based xattr storage. + * + * Return: On success a new simple_xattrs is returned. On failure an + * ERR_PTR is returned. + */ +struct simple_xattrs *simple_xattrs_alloc(void) +{ + struct simple_xattrs *xattrs __free(kfree) = NULL; + + xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL); + if (!xattrs) + return ERR_PTR(-ENOMEM); + + xattrs->use_rhashtable = true; + if (rhashtable_init(&xattrs->ht, &simple_xattr_params)) + return ERR_PTR(-ENOMEM); + + return no_free_ptr(xattrs); +} + +/** + * simple_xattrs_lazy_alloc - get or allocate xattrs for a set operation + * @xattrsp: pointer to the xattrs pointer (may point to NULL) + * @value: value being set (NULL means remove) + * @flags: xattr set flags + * + * For lazily-allocated xattrs on the write path. If no xattrs exist yet + * and this is a remove operation, returns the appropriate result without + * allocating. Otherwise ensures xattrs is allocated and published with + * store-release semantics. + * + * Return: On success a valid pointer to the xattrs is returned. On + * failure or early-exit an ERR_PTR or NULL is returned. Callers should + * check with IS_ERR_OR_NULL() and propagate with PTR_ERR() which + * correctly returns 0 for the NULL no-op case. + */ +struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp, + const void *value, int flags) +{ + struct simple_xattrs *xattrs; + + xattrs = READ_ONCE(*xattrsp); + if (xattrs) + return xattrs; + + if (!value) + return (flags & XATTR_REPLACE) ? ERR_PTR(-ENODATA) : NULL; + + xattrs = simple_xattrs_alloc(); + if (!IS_ERR(xattrs)) + smp_store_release(xattrsp, xattrs); + return xattrs; +} + +static void simple_xattr_ht_free(void *ptr, void *arg) +{ + struct simple_xattr *xattr = ptr; + size_t *freed_space = arg; + + if (freed_space) + *freed_space += simple_xattr_space(xattr->name, xattr->size); + simple_xattr_free(xattr); +} + /** * simple_xattrs_free - free xattrs * @xattrs: xattr header whose xattrs to destroy @@ -1563,22 +1784,28 @@ void simple_xattrs_init(struct simple_xattrs *xattrs) */ void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space) { - struct rb_node *rbp; - if (freed_space) *freed_space = 0; - rbp = rb_first(&xattrs->rb_root); - while (rbp) { - struct simple_xattr *xattr; - struct rb_node *rbp_next; - - rbp_next = rb_next(rbp); - xattr = rb_entry(rbp, struct simple_xattr, rb_node); - rb_erase(&xattr->rb_node, &xattrs->rb_root); - if (freed_space) - *freed_space += simple_xattr_space(xattr->name, - xattr->size); - simple_xattr_free(xattr); - rbp = rbp_next; + + if (xattrs->use_rhashtable) { + rhashtable_free_and_destroy(&xattrs->ht, + simple_xattr_ht_free, freed_space); + } else { + struct rb_node *rbp; + + rbp = rb_first(&xattrs->rb_root); + while (rbp) { + struct simple_xattr *xattr; + struct rb_node *rbp_next; + + rbp_next = rb_next(rbp); + xattr = rb_entry(rbp, struct simple_xattr, rb_node); + rb_erase(&xattr->rb_node, &xattrs->rb_root); + if (freed_space) + *freed_space += simple_xattr_space(xattr->name, + xattr->size); + simple_xattr_free(xattr); + rbp = rbp_next; + } } } diff --git a/include/linux/xattr.h b/include/linux/xattr.h index fdbd2095414a..832a44358661 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -107,8 +107,14 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler) } struct simple_xattrs { - struct rb_root rb_root; - rwlock_t lock; + bool use_rhashtable; + union { + struct { + struct rb_root rb_root; + rwlock_t lock; + }; + struct rhashtable ht; + }; }; struct simple_xattr { @@ -121,6 +127,9 @@ struct simple_xattr { }; void simple_xattrs_init(struct simple_xattrs *xattrs); +struct simple_xattrs *simple_xattrs_alloc(void); +struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp, + const void *value, int flags); void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space); size_t simple_xattr_space(const char *name, size_t size); struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); @@ -137,4 +146,16 @@ void simple_xattr_add(struct simple_xattrs *xattrs, struct simple_xattr *new_xattr); int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name); +DEFINE_CLASS(simple_xattr, + struct simple_xattr *, + if (!IS_ERR_OR_NULL(_T)) simple_xattr_free(_T), + simple_xattr_alloc(value, size), + const void *value, size_t size) + +DEFINE_CLASS(simple_xattrs, + struct simple_xattrs *, + if (!IS_ERR_OR_NULL(_T)) { simple_xattrs_free(_T, NULL); kfree(_T); }, + simple_xattrs_alloc(), + void) + #endif /* _LINUX_XATTR_H */ diff --git a/mm/shmem.c b/mm/shmem.c index b40f3cd48961..35c2f8748668 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -4278,7 +4278,7 @@ static int shmem_initxattrs(struct inode *inode, for (xattr = xattr_array; xattr->name != NULL; xattr++) { new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len); - if (!new_xattr) + if (IS_ERR(new_xattr)) break; len = strlen(xattr->name) + 1; -- cgit v1.2.3 From 52b364fed6e1578e551fee20c76fecb3fc0e10ed Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 16 Feb 2026 14:31:59 +0100 Subject: shmem: adapt to rhashtable-based simple_xattrs with lazy allocation Adapt tmpfs/shmem to use the rhashtable-based xattr path and switch from an embedded struct to pointer-based lazy allocation. Change shmem_inode_info.xattrs from embedded 'struct simple_xattrs' to a pointer 'struct simple_xattrs *', initialized to NULL. This avoids the rhashtable overhead for every tmpfs inode, which helps when a lot of inodes exist. The xattr store is allocated on first use: - shmem_initxattrs(): Allocates via simple_xattrs_alloc() when security modules set initial xattrs during inode creation. - shmem_xattr_handler_set(): Allocates on first setxattr, with a short-circuit for removal when no xattrs are stored yet. All read paths (shmem_xattr_handler_get, shmem_listxattr) check for NULL xattrs pointer and return -ENODATA or 0 respectively. Replaced xattr entries are freed via simple_xattr_free_rcu() to allow concurrent RCU readers to finish. shmem_evict_inode() conditionally frees the xattr store only when allocated. Also change simple_xattr_add() from void to int to propagate rhashtable insertion failures. shmem_initxattrs() is the only caller. Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-3-c2efa4f74cb7@kernel.org Acked-by: Darrick J. Wong Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/xattr.c | 26 +++++++++++++------------- include/linux/shmem_fs.h | 2 +- include/linux/xattr.h | 4 ++-- mm/shmem.c | 44 +++++++++++++++++++++++++++++++------------- 4 files changed, 47 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/fs/xattr.c b/fs/xattr.c index 1d98ea459b7b..eb45ae0fd17f 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1677,19 +1677,19 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node, * of matching xattrs is wanted. Should only be called during inode * initialization when a few distinct initial xattrs are supposed to be set. */ -void simple_xattr_add(struct simple_xattrs *xattrs, - struct simple_xattr *new_xattr) -{ - if (xattrs->use_rhashtable) { - WARN_ON(rhashtable_insert_fast(&xattrs->ht, - &new_xattr->hash_node, - simple_xattr_params)); - } else { - write_lock(&xattrs->lock); - rb_add(&new_xattr->rb_node, &xattrs->rb_root, - rbtree_simple_xattr_less); - write_unlock(&xattrs->lock); - } +int simple_xattr_add(struct simple_xattrs *xattrs, + struct simple_xattr *new_xattr) +{ + if (xattrs->use_rhashtable) + return rhashtable_insert_fast(&xattrs->ht, + &new_xattr->hash_node, + simple_xattr_params); + + write_lock(&xattrs->lock); + rb_add(&new_xattr->rb_node, &xattrs->rb_root, + rbtree_simple_xattr_less); + write_unlock(&xattrs->lock); + return 0; } /** diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index a8273b32e041..f6a2d3402d76 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -48,7 +48,7 @@ struct shmem_inode_info { }; struct timespec64 i_crtime; /* file creation time */ struct shared_policy policy; /* NUMA memory alloc policy */ - struct simple_xattrs xattrs; /* list of xattrs */ + struct simple_xattrs *xattrs; /* list of xattrs */ pgoff_t fallocend; /* highest fallocate endindex */ unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */ atomic_t stop_eviction; /* hold when working on inode */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 832a44358661..6e619e185e90 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -142,8 +142,8 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, size_t size, int flags); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); -void simple_xattr_add(struct simple_xattrs *xattrs, - struct simple_xattr *new_xattr); +int simple_xattr_add(struct simple_xattrs *xattrs, + struct simple_xattr *new_xattr); int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name); DEFINE_CLASS(simple_xattr, diff --git a/mm/shmem.c b/mm/shmem.c index 35c2f8748668..0b0e577e880a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1425,7 +1425,10 @@ static void shmem_evict_inode(struct inode *inode) } } - simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL); + if (info->xattrs) { + simple_xattrs_free(info->xattrs, sbinfo->max_inodes ? &freed : NULL); + kfree(info->xattrs); + } shmem_free_inode(inode->i_sb, freed); WARN_ON(inode->i_blocks); clear_inode(inode); @@ -3101,7 +3104,6 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap, shmem_set_inode_flags(inode, info->fsflags, NULL); INIT_LIST_HEAD(&info->shrinklist); INIT_LIST_HEAD(&info->swaplist); - simple_xattrs_init(&info->xattrs); cache_no_acl(inode); if (sbinfo->noswap) mapping_set_unevictable(inode->i_mapping); @@ -4255,10 +4257,13 @@ static int shmem_initxattrs(struct inode *inode, struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); const struct xattr *xattr; - struct simple_xattr *new_xattr; size_t ispace = 0; size_t len; + CLASS(simple_xattrs, xattrs)(); + if (IS_ERR(xattrs)) + return PTR_ERR(xattrs); + if (sbinfo->max_inodes) { for (xattr = xattr_array; xattr->name != NULL; xattr++) { ispace += simple_xattr_space(xattr->name, @@ -4277,24 +4282,24 @@ static int shmem_initxattrs(struct inode *inode, } for (xattr = xattr_array; xattr->name != NULL; xattr++) { - new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len); + CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len); if (IS_ERR(new_xattr)) break; len = strlen(xattr->name) + 1; new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, GFP_KERNEL_ACCOUNT); - if (!new_xattr->name) { - kvfree(new_xattr); + if (!new_xattr->name) break; - } memcpy(new_xattr->name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN, xattr->name, len); - simple_xattr_add(&info->xattrs, new_xattr); + if (simple_xattr_add(xattrs, new_xattr)) + break; + retain_and_null_ptr(new_xattr); } if (xattr->name != NULL) { @@ -4303,10 +4308,10 @@ static int shmem_initxattrs(struct inode *inode, sbinfo->free_ispace += ispace; raw_spin_unlock(&sbinfo->stat_lock); } - simple_xattrs_free(&info->xattrs, NULL); return -ENOMEM; } + smp_store_release(&info->xattrs, no_free_ptr(xattrs)); return 0; } @@ -4315,9 +4320,14 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler, const char *name, void *buffer, size_t size) { struct shmem_inode_info *info = SHMEM_I(inode); + struct simple_xattrs *xattrs; + + xattrs = READ_ONCE(info->xattrs); + if (!xattrs) + return -ENODATA; name = xattr_full_name(handler, name); - return simple_xattr_get(&info->xattrs, name, buffer, size); + return simple_xattr_get(xattrs, name, buffer, size); } static int shmem_xattr_handler_set(const struct xattr_handler *handler, @@ -4328,10 +4338,16 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler, { struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); + struct simple_xattrs *xattrs; struct simple_xattr *old_xattr; size_t ispace = 0; name = xattr_full_name(handler, name); + + xattrs = simple_xattrs_lazy_alloc(&info->xattrs, value, flags); + if (IS_ERR_OR_NULL(xattrs)) + return PTR_ERR(xattrs); + if (value && sbinfo->max_inodes) { ispace = simple_xattr_space(name, size); raw_spin_lock(&sbinfo->stat_lock); @@ -4344,13 +4360,13 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler, return -ENOSPC; } - old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags); + old_xattr = simple_xattr_set(xattrs, name, value, size, flags); if (!IS_ERR(old_xattr)) { ispace = 0; if (old_xattr && sbinfo->max_inodes) ispace = simple_xattr_space(old_xattr->name, old_xattr->size); - simple_xattr_free(old_xattr); + simple_xattr_free_rcu(old_xattr); old_xattr = NULL; inode_set_ctime_current(inode); inode_inc_iversion(inode); @@ -4391,7 +4407,9 @@ static const struct xattr_handler * const shmem_xattr_handlers[] = { static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) { struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); - return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size); + + return simple_xattr_list(d_inode(dentry), READ_ONCE(info->xattrs), + buffer, size); } #endif /* CONFIG_TMPFS_XATTR */ -- cgit v1.2.3 From 25ab7b6f34c74ea555b4489b57f7219612991433 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 16 Feb 2026 14:32:02 +0100 Subject: xattr: remove rbtree-based simple_xattr infrastructure Now that all consumers (shmem, kernfs, pidfs) have been converted to use the rhashtable-based simple_xattrs with pointer-based lazy allocation, remove the legacy rbtree code path. The rhashtable implementation provides O(1) average-case lookup with RCU-based lockless reads, replacing the O(log n) rbtree with reader-writer spinlock contention. Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-6-c2efa4f74cb7@kernel.org Acked-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/xattr.c | 387 +++++++++++++------------------------------------- include/linux/xattr.h | 12 +- 2 files changed, 103 insertions(+), 296 deletions(-) (limited to 'include') diff --git a/fs/xattr.c b/fs/xattr.c index eb45ae0fd17f..64803097e1dc 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1200,20 +1200,18 @@ void simple_xattr_free(struct simple_xattr *xattr) static void simple_xattr_rcu_free(struct rcu_head *head) { - struct simple_xattr *xattr; + struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu); - xattr = container_of(head, struct simple_xattr, rcu); simple_xattr_free(xattr); } /** - * simple_xattr_free_rcu - free an xattr object after an RCU grace period + * simple_xattr_free_rcu - free an xattr object with RCU delay * @xattr: the xattr object * - * Schedule RCU-deferred freeing of an xattr entry. This is used by - * rhashtable-based callers of simple_xattr_set() that replace or remove - * an existing entry while concurrent RCU readers may still be accessing - * it. + * Free the xattr object after an RCU grace period. This must be used when + * the xattr was removed from a data structure that concurrent RCU readers + * may still be traversing. Can handle @xattr being NULL. */ void simple_xattr_free_rcu(struct simple_xattr *xattr) { @@ -1254,43 +1252,6 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) return new_xattr; } -/** - * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry - * @key: xattr name - * @node: current node - * - * Compare the xattr name with the xattr name attached to @node in the rbtree. - * - * Return: Negative value if continuing left, positive if continuing right, 0 - * if the xattr attached to @node matches @key. - */ -static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node) -{ - const char *xattr_name = key; - const struct simple_xattr *xattr; - - xattr = rb_entry(node, struct simple_xattr, rb_node); - return strcmp(xattr->name, xattr_name); -} - -/** - * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes - * @new_node: new node - * @node: current node - * - * Compare the xattr attached to @new_node with the xattr attached to @node. - * - * Return: Negative value if continuing left, positive if continuing right, 0 - * if the xattr attached to @new_node matches the xattr attached to @node. - */ -static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node, - const struct rb_node *node) -{ - struct simple_xattr *xattr; - xattr = rb_entry(new_node, struct simple_xattr, rb_node); - return rbtree_simple_xattr_cmp(xattr->name, node); -} - static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed) { const char *name = data; @@ -1336,41 +1297,19 @@ static const struct rhashtable_params simple_xattr_params = { int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size) { - struct simple_xattr *xattr = NULL; + struct simple_xattr *xattr; int ret = -ENODATA; - if (xattrs->use_rhashtable) { - guard(rcu)(); - xattr = rhashtable_lookup(&xattrs->ht, name, - simple_xattr_params); - if (xattr) { - ret = xattr->size; - if (buffer) { - if (size < xattr->size) - ret = -ERANGE; - else - memcpy(buffer, xattr->value, - xattr->size); - } - } - } else { - struct rb_node *rbp; - - read_lock(&xattrs->lock); - rbp = rb_find(name, &xattrs->rb_root, - rbtree_simple_xattr_cmp); - if (rbp) { - xattr = rb_entry(rbp, struct simple_xattr, rb_node); - ret = xattr->size; - if (buffer) { - if (size < xattr->size) - ret = -ERANGE; - else - memcpy(buffer, xattr->value, - xattr->size); - } + guard(rcu)(); + xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params); + if (xattr) { + ret = xattr->size; + if (buffer) { + if (size < xattr->size) + ret = -ERANGE; + else + memcpy(buffer, xattr->value, xattr->size); } - read_unlock(&xattrs->lock); } return ret; } @@ -1398,6 +1337,11 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For * XATTR_REPLACE we fail as mentioned above. * + * Note: Callers must externally serialize writes. All current callers hold + * the inode lock for write operations. The lookup->replace/remove sequence + * is not atomic with respect to the rhashtable's per-bucket locking, but + * is safe because writes are serialized by the caller. + * * Return: On success, the removed or replaced xattr is returned, to be freed * by the caller; or NULL if none. On failure a negative error code is returned. */ @@ -1406,7 +1350,7 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, size_t size, int flags) { struct simple_xattr *old_xattr = NULL; - int err = 0; + int err; CLASS(simple_xattr, new_xattr)(value, size); if (IS_ERR(new_xattr)) @@ -1418,119 +1362,52 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, return ERR_PTR(-ENOMEM); } - if (xattrs->use_rhashtable) { - /* - * Lookup is safe without RCU here since writes are - * serialized by the caller. - */ - old_xattr = rhashtable_lookup_fast(&xattrs->ht, name, - simple_xattr_params); - - if (old_xattr) { - /* Fail if XATTR_CREATE is requested and the xattr exists. */ - if (flags & XATTR_CREATE) - return ERR_PTR(-EEXIST); - - if (new_xattr) { - err = rhashtable_replace_fast(&xattrs->ht, - &old_xattr->hash_node, - &new_xattr->hash_node, - simple_xattr_params); - if (err) - return ERR_PTR(err); - } else { - err = rhashtable_remove_fast(&xattrs->ht, - &old_xattr->hash_node, - simple_xattr_params); - if (err) - return ERR_PTR(err); - } - } else { - /* Fail if XATTR_REPLACE is requested but no xattr is found. */ - if (flags & XATTR_REPLACE) - return ERR_PTR(-ENODATA); - - /* - * If XATTR_CREATE or no flags are specified together - * with a new value simply insert it. - */ - if (new_xattr) { - err = rhashtable_insert_fast(&xattrs->ht, - &new_xattr->hash_node, - simple_xattr_params); - if (err) - return ERR_PTR(err); - } - - /* - * If XATTR_CREATE or no flags are specified and - * neither an old or new xattr exist then we don't - * need to do anything. - */ - } - } else { - struct rb_node *parent = NULL, **rbp; - int ret; - - write_lock(&xattrs->lock); - rbp = &xattrs->rb_root.rb_node; - while (*rbp) { - parent = *rbp; - ret = rbtree_simple_xattr_cmp(name, *rbp); - if (ret < 0) - rbp = &(*rbp)->rb_left; - else if (ret > 0) - rbp = &(*rbp)->rb_right; - else - old_xattr = rb_entry(*rbp, struct simple_xattr, - rb_node); - if (old_xattr) - break; - } + /* Lookup is safe without RCU here since writes are serialized. */ + old_xattr = rhashtable_lookup_fast(&xattrs->ht, name, + simple_xattr_params); - if (old_xattr) { - /* Fail if XATTR_CREATE is requested and the xattr exists. */ - if (flags & XATTR_CREATE) { - err = -EEXIST; - goto out_unlock; - } + if (old_xattr) { + /* Fail if XATTR_CREATE is requested and the xattr exists. */ + if (flags & XATTR_CREATE) + return ERR_PTR(-EEXIST); - if (new_xattr) - rb_replace_node(&old_xattr->rb_node, - &new_xattr->rb_node, - &xattrs->rb_root); - else - rb_erase(&old_xattr->rb_node, - &xattrs->rb_root); + if (new_xattr) { + err = rhashtable_replace_fast(&xattrs->ht, + &old_xattr->hash_node, + &new_xattr->hash_node, + simple_xattr_params); + if (err) + return ERR_PTR(err); } else { - /* Fail if XATTR_REPLACE is requested but no xattr is found. */ - if (flags & XATTR_REPLACE) { - err = -ENODATA; - goto out_unlock; - } - - /* - * If XATTR_CREATE or no flags are specified together - * with a new value simply insert it. - */ - if (new_xattr) { - rb_link_node(&new_xattr->rb_node, parent, rbp); - rb_insert_color(&new_xattr->rb_node, - &xattrs->rb_root); - } + err = rhashtable_remove_fast(&xattrs->ht, + &old_xattr->hash_node, + simple_xattr_params); + if (err) + return ERR_PTR(err); + } + } else { + /* Fail if XATTR_REPLACE is requested but no xattr is found. */ + if (flags & XATTR_REPLACE) + return ERR_PTR(-ENODATA); - /* - * If XATTR_CREATE or no flags are specified and - * neither an old or new xattr exist then we don't - * need to do anything. - */ + /* + * If XATTR_CREATE or no flags are specified together with a + * new value simply insert it. + */ + if (new_xattr) { + err = rhashtable_insert_fast(&xattrs->ht, + &new_xattr->hash_node, + simple_xattr_params); + if (err) + return ERR_PTR(err); } -out_unlock: - write_unlock(&xattrs->lock); - if (err) - return ERR_PTR(err); + /* + * If XATTR_CREATE or no flags are specified and neither an + * old or new xattr exist then we don't need to do anything. + */ } + retain_and_null_ptr(new_xattr); return old_xattr; } @@ -1572,6 +1449,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size) { bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); + struct rhashtable_iter iter; struct simple_xattr *xattr; ssize_t remaining_size = size; int err = 0; @@ -1595,77 +1473,34 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, if (!xattrs) return size - remaining_size; - if (xattrs->use_rhashtable) { - struct rhashtable_iter iter; - - rhashtable_walk_enter(&xattrs->ht, &iter); - rhashtable_walk_start(&iter); - - while ((xattr = rhashtable_walk_next(&iter)) != NULL) { - if (IS_ERR(xattr)) { - if (PTR_ERR(xattr) == -EAGAIN) - continue; - err = PTR_ERR(xattr); - break; - } - - /* skip "trusted." attributes for unprivileged callers */ - if (!trusted && xattr_is_trusted(xattr->name)) - continue; + rhashtable_walk_enter(&xattrs->ht, &iter); + rhashtable_walk_start(&iter); - /* skip MAC labels; these are provided by LSM above */ - if (xattr_is_maclabel(xattr->name)) + while ((xattr = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(xattr)) { + if (PTR_ERR(xattr) == -EAGAIN) continue; - - err = xattr_list_one(&buffer, &remaining_size, - xattr->name); - if (err) - break; + err = PTR_ERR(xattr); + break; } - rhashtable_walk_stop(&iter); - rhashtable_walk_exit(&iter); - } else { - struct rb_node *rbp; - - read_lock(&xattrs->lock); - for (rbp = rb_first(&xattrs->rb_root); rbp; - rbp = rb_next(rbp)) { - xattr = rb_entry(rbp, struct simple_xattr, rb_node); - - /* skip "trusted." attributes for unprivileged callers */ - if (!trusted && xattr_is_trusted(xattr->name)) - continue; + /* skip "trusted." attributes for unprivileged callers */ + if (!trusted && xattr_is_trusted(xattr->name)) + continue; - /* skip MAC labels; these are provided by LSM above */ - if (xattr_is_maclabel(xattr->name)) - continue; + /* skip MAC labels; these are provided by LSM above */ + if (xattr_is_maclabel(xattr->name)) + continue; - err = xattr_list_one(&buffer, &remaining_size, - xattr->name); - if (err) - break; - } - read_unlock(&xattrs->lock); + err = xattr_list_one(&buffer, &remaining_size, xattr->name); + if (err) + break; } - return err ? err : size - remaining_size; -} + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); -/** - * rbtree_simple_xattr_less - compare two xattr rbtree nodes - * @new_node: new node - * @node: current node - * - * Compare the xattr attached to @new_node with the xattr attached to @node. - * Note that this function technically tolerates duplicate entries. - * - * Return: True if insertion point in the rbtree is found. - */ -static bool rbtree_simple_xattr_less(struct rb_node *new_node, - const struct rb_node *node) -{ - return rbtree_simple_xattr_node_cmp(new_node, node) < 0; + return err ? err : size - remaining_size; } /** @@ -1676,33 +1511,29 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node, * Add an xattr object to @xattrs. This assumes no replacement or removal * of matching xattrs is wanted. Should only be called during inode * initialization when a few distinct initial xattrs are supposed to be set. + * + * Return: On success zero is returned. On failure a negative error code is + * returned. */ int simple_xattr_add(struct simple_xattrs *xattrs, struct simple_xattr *new_xattr) { - if (xattrs->use_rhashtable) - return rhashtable_insert_fast(&xattrs->ht, - &new_xattr->hash_node, - simple_xattr_params); - - write_lock(&xattrs->lock); - rb_add(&new_xattr->rb_node, &xattrs->rb_root, - rbtree_simple_xattr_less); - write_unlock(&xattrs->lock); - return 0; + return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node, + simple_xattr_params); } /** * simple_xattrs_init - initialize new xattr header * @xattrs: header to initialize * - * Initialize relevant fields of a an xattr header. + * Initialize the rhashtable used to store xattr objects. + * + * Return: On success zero is returned. On failure a negative error code is + * returned. */ -void simple_xattrs_init(struct simple_xattrs *xattrs) +int simple_xattrs_init(struct simple_xattrs *xattrs) { - xattrs->use_rhashtable = false; - xattrs->rb_root = RB_ROOT; - rwlock_init(&xattrs->lock); + return rhashtable_init(&xattrs->ht, &simple_xattr_params); } /** @@ -1710,7 +1541,8 @@ void simple_xattrs_init(struct simple_xattrs *xattrs) * * Dynamically allocate a simple_xattrs header and initialize the * underlying rhashtable. This is intended for consumers that want - * rhashtable-based xattr storage. + * to lazily allocate xattr storage only when the first xattr is set, + * avoiding the per-inode rhashtable overhead when no xattrs are used. * * Return: On success a new simple_xattrs is returned. On failure an * ERR_PTR is returned. @@ -1718,14 +1550,15 @@ void simple_xattrs_init(struct simple_xattrs *xattrs) struct simple_xattrs *simple_xattrs_alloc(void) { struct simple_xattrs *xattrs __free(kfree) = NULL; + int ret; xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL); if (!xattrs) return ERR_PTR(-ENOMEM); - xattrs->use_rhashtable = true; - if (rhashtable_init(&xattrs->ht, &simple_xattr_params)) - return ERR_PTR(-ENOMEM); + ret = simple_xattrs_init(xattrs); + if (ret) + return ERR_PTR(ret); return no_free_ptr(xattrs); } @@ -1784,28 +1617,10 @@ static void simple_xattr_ht_free(void *ptr, void *arg) */ void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space) { + might_sleep(); + if (freed_space) *freed_space = 0; - - if (xattrs->use_rhashtable) { - rhashtable_free_and_destroy(&xattrs->ht, - simple_xattr_ht_free, freed_space); - } else { - struct rb_node *rbp; - - rbp = rb_first(&xattrs->rb_root); - while (rbp) { - struct simple_xattr *xattr; - struct rb_node *rbp_next; - - rbp_next = rb_next(rbp); - xattr = rb_entry(rbp, struct simple_xattr, rb_node); - rb_erase(&xattr->rb_node, &xattrs->rb_root); - if (freed_space) - *freed_space += simple_xattr_space(xattr->name, - xattr->size); - simple_xattr_free(xattr); - rbp = rbp_next; - } - } + rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free, + freed_space); } diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 6e619e185e90..3b5a5fd684eb 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -107,18 +107,10 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler) } struct simple_xattrs { - bool use_rhashtable; - union { - struct { - struct rb_root rb_root; - rwlock_t lock; - }; - struct rhashtable ht; - }; + struct rhashtable ht; }; struct simple_xattr { - struct rb_node rb_node; struct rhash_head hash_node; struct rcu_head rcu; char *name; @@ -126,7 +118,7 @@ struct simple_xattr { char value[] __counted_by(size); }; -void simple_xattrs_init(struct simple_xattrs *xattrs); +int simple_xattrs_init(struct simple_xattrs *xattrs); struct simple_xattrs *simple_xattrs_alloc(void); struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp, const void *value, int flags); -- cgit v1.2.3 From 4fbe9e78bb415dd632ff63a9f620af0be58ef820 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 16 Feb 2026 14:32:05 +0100 Subject: xattr: move user limits for xattrs to generic infra Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-9-c2efa4f74cb7@kernel.org Acked-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/kernfs/inode.c | 75 ++------------------------------------------- fs/kernfs/kernfs-internal.h | 3 +- fs/xattr.c | 65 +++++++++++++++++++++++++++++++++++++++ include/linux/kernfs.h | 2 -- include/linux/xattr.h | 18 +++++++++++ 5 files changed, 87 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index dfc3315b5afc..1de10500842d 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -45,8 +45,7 @@ static struct kernfs_iattrs *__kernfs_iattrs(struct kernfs_node *kn, bool alloc) ret->ia_mtime = ret->ia_atime; ret->ia_ctime = ret->ia_atime; - atomic_set(&ret->nr_user_xattrs, 0); - atomic_set(&ret->user_xattr_size, 0); + simple_xattr_limits_init(&ret->xattr_limits); /* If someone raced us, recognize it. */ if (!try_cmpxchg(&kn->iattr, &attr, ret)) @@ -355,69 +354,6 @@ static int kernfs_vfs_xattr_set(const struct xattr_handler *handler, return kernfs_xattr_set(kn, name, value, size, flags); } -static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn, - const char *full_name, - struct simple_xattrs *xattrs, - const void *value, size_t size, int flags) -{ - struct kernfs_iattrs *attr = kernfs_iattrs_noalloc(kn); - atomic_t *sz = &attr->user_xattr_size; - atomic_t *nr = &attr->nr_user_xattrs; - struct simple_xattr *old_xattr; - int ret; - - if (atomic_inc_return(nr) > KERNFS_MAX_USER_XATTRS) { - ret = -ENOSPC; - goto dec_count_out; - } - - if (atomic_add_return(size, sz) > KERNFS_USER_XATTR_SIZE_LIMIT) { - ret = -ENOSPC; - goto dec_size_out; - } - - old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags); - if (!old_xattr) - return 0; - - if (IS_ERR(old_xattr)) { - ret = PTR_ERR(old_xattr); - goto dec_size_out; - } - - ret = 0; - size = old_xattr->size; - simple_xattr_free_rcu(old_xattr); -dec_size_out: - atomic_sub(size, sz); -dec_count_out: - atomic_dec(nr); - return ret; -} - -static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn, - const char *full_name, - struct simple_xattrs *xattrs, - const void *value, size_t size, int flags) -{ - struct kernfs_iattrs *attr = kernfs_iattrs_noalloc(kn); - atomic_t *sz = &attr->user_xattr_size; - atomic_t *nr = &attr->nr_user_xattrs; - struct simple_xattr *old_xattr; - - old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags); - if (!old_xattr) - return 0; - - if (IS_ERR(old_xattr)) - return PTR_ERR(old_xattr); - - atomic_sub(old_xattr->size, sz); - atomic_dec(nr); - simple_xattr_free_rcu(old_xattr); - return 0; -} - static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, @@ -440,13 +376,8 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler, if (IS_ERR_OR_NULL(xattrs)) return PTR_ERR(xattrs); - if (value) - return kernfs_vfs_user_xattr_add(kn, full_name, xattrs, - value, size, flags); - else - return kernfs_vfs_user_xattr_rm(kn, full_name, xattrs, - value, size, flags); - + return simple_xattr_set_limited(xattrs, &attrs->xattr_limits, + full_name, value, size, flags); } static const struct xattr_handler kernfs_trusted_xattr_handler = { diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 1324ed8c0661..1d3831e3a270 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -27,8 +27,7 @@ struct kernfs_iattrs { struct timespec64 ia_ctime; struct simple_xattrs *xattrs; - atomic_t nr_user_xattrs; - atomic_t user_xattr_size; + struct simple_xattr_limits xattr_limits; }; struct kernfs_root { diff --git a/fs/xattr.c b/fs/xattr.c index 328ed7558dfc..5e559b1c651f 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1427,6 +1427,71 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, return old_xattr; } +static inline void simple_xattr_limits_dec(struct simple_xattr_limits *limits, + size_t size) +{ + atomic_sub(size, &limits->xattr_size); + atomic_dec(&limits->nr_xattrs); +} + +static inline int simple_xattr_limits_inc(struct simple_xattr_limits *limits, + size_t size) +{ + if (atomic_inc_return(&limits->nr_xattrs) > SIMPLE_XATTR_MAX_NR) { + atomic_dec(&limits->nr_xattrs); + return -ENOSPC; + } + + if (atomic_add_return(size, &limits->xattr_size) <= SIMPLE_XATTR_MAX_SIZE) + return 0; + + simple_xattr_limits_dec(limits, size); + return -ENOSPC; +} + +/** + * simple_xattr_set_limited - set an xattr with per-inode user.* limits + * @xattrs: the header of the xattr object + * @limits: per-inode limit counters for user.* xattrs + * @name: the name of the xattr to set or remove + * @value: the value to store (NULL to remove) + * @size: the size of @value + * @flags: XATTR_CREATE, XATTR_REPLACE, or 0 + * + * Like simple_xattr_set(), but enforces per-inode count and total value size + * limits for user.* xattrs. Uses speculative pre-increment of the atomic + * counters to avoid races without requiring external locks. + * + * Return: On success zero is returned. On failure a negative error code is + * returned. + */ +int simple_xattr_set_limited(struct simple_xattrs *xattrs, + struct simple_xattr_limits *limits, + const char *name, const void *value, + size_t size, int flags) +{ + struct simple_xattr *old_xattr; + int ret; + + if (value) { + ret = simple_xattr_limits_inc(limits, size); + if (ret) + return ret; + } + + old_xattr = simple_xattr_set(xattrs, name, value, size, flags); + if (IS_ERR(old_xattr)) { + if (value) + simple_xattr_limits_dec(limits, size); + return PTR_ERR(old_xattr); + } + if (old_xattr) { + simple_xattr_limits_dec(limits, old_xattr->size); + simple_xattr_free_rcu(old_xattr); + } + return 0; +} + static bool xattr_is_trusted(const char *name) { return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index b5a5f32fdfd1..d8f57f0af5e4 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -99,8 +99,6 @@ enum kernfs_node_type { #define KERNFS_TYPE_MASK 0x000f #define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK -#define KERNFS_MAX_USER_XATTRS 128 -#define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10) enum kernfs_node_flag { KERNFS_ACTIVATED = 0x0010, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 3b5a5fd684eb..8b6601367eae 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -118,6 +118,20 @@ struct simple_xattr { char value[] __counted_by(size); }; +#define SIMPLE_XATTR_MAX_NR 128 +#define SIMPLE_XATTR_MAX_SIZE (128 << 10) + +struct simple_xattr_limits { + atomic_t nr_xattrs; /* current user.* xattr count */ + atomic_t xattr_size; /* current total user.* value bytes */ +}; + +static inline void simple_xattr_limits_init(struct simple_xattr_limits *limits) +{ + atomic_set(&limits->nr_xattrs, 0); + atomic_set(&limits->xattr_size, 0); +} + int simple_xattrs_init(struct simple_xattrs *xattrs); struct simple_xattrs *simple_xattrs_alloc(void); struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp, @@ -132,6 +146,10 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags); +int simple_xattr_set_limited(struct simple_xattrs *xattrs, + struct simple_xattr_limits *limits, + const char *name, const void *value, + size_t size, int flags); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); int simple_xattr_add(struct simple_xattrs *xattrs, -- cgit v1.2.3