summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2026-02-23 15:06:04 +0300
committerChristian Brauner <brauner@kernel.org>2026-03-02 13:06:43 +0300
commit98779186aa0b3367489a87c6d8bc0911f577444e (patch)
tree337031c82c583f1e4b6d29ee72bf9fea921eb0a9
parent6de23f81a5e08be8fbf5e8d7e9febc72a5b5f27f (diff)
parent0f1f4e4e1503e605f2189bfa10290d7b8920187c (diff)
downloadlinux-98779186aa0b3367489a87c6d8bc0911f577444e.tar.xz
Merge patch series "xattr: rework simple xattrs and support user.* xattrs on sockets"
Christian Brauner <brauner@kernel.org> says: This reworks the simple_xattr infrastructure and adds support for user.* extended attributes on sockets. The simple_xattr subsystem currently uses an rbtree protected by a reader-writer spinlock. This series replaces the rbtree with an rhashtable giving O(1) average-case lookup with RCU-based lockless reads. This sped up concurrent access patterns on tmpfs quite a bit and it's an overall easy enough conversion to do and gets rid or rwlock_t. The conversion is done incrementally: a new rhashtable path is added alongside the existing rbtree, consumers are migrated one at a time (shmem, kernfs, pidfs), and then the rbtree code is removed. All three consumers switch from embedded structs to pointer-based lazy allocation so the rhashtable overhead is only paid for inodes that actually use xattrs. With this infrastructure in place the series adds support for user.* xattrs on sockets. Path-based AF_UNIX sockets inherit xattr support from the underlying filesystem (e.g. tmpfs) but sockets in sockfs - that is everything created via socket() including abstract namespace AF_UNIX sockets - had no xattr support at all. The xattr_permission() checks are reworked to allow user.* xattrs on S_IFSOCK inodes. Sockfs sockets get per-inode limits of 128 xattrs and 128KB total value size matching the limits already in use for kernfs. The practical motivation comes from several directions. systemd and GNOME are expanding their use of Varlink as an IPC mechanism. For D-Bus there are tools like dbus-monitor that can observe IPC traffic across the system but this only works because D-Bus has a central broker. For Varlink there is no broker and there is currently no way to identify which sockets speak Varlink. With user.* xattrs on sockets a service can label its socket with the IPC protocol it speaks (e.g., user.varlink=1) and an eBPF program can then selectively capture traffic on those sockets. Enumerating bound sockets via netlink combined with these xattr labels gives a way to discover all Varlink IPC entrypoints for debugging and introspection. Similarly, systemd-journald wants to use xattrs on the /dev/log socket for protocol negotiation to indicate whether RFC 5424 structured syslog is supported or whether only the legacy RFC 3164 format should be used. In containers these labels are particularly useful as high-privilege or more complicated solutions for socket identification aren't available. The series comes with comprehensive selftests covering path-based AF_UNIX sockets, sockfs socket operations, per-inode limit enforcement, and xattr operations across multiple address families (AF_INET, AF_INET6, AF_NETLINK, AF_PACKET). * patches from https://patch.msgid.link/20260216-work-xattr-socket-v1-0-c2efa4f74cb7@kernel.org: selftests/xattr: test xattrs on various socket families selftests/xattr: sockfs socket xattr tests selftests/xattr: path-based AF_UNIX socket xattr tests xattr: support extended attributes on sockets xattr,net: support limited amount of extended attributes on sockfs sockets xattr: move user limits for xattrs to generic infra xattr: switch xattr_permission() to switch statement xattr: add xattr_permission_error() xattr: remove rbtree-based simple_xattr infrastructure pidfs: adapt to rhashtable-based simple_xattrs kernfs: adapt to rhashtable-based simple_xattrs with lazy allocation shmem: adapt to rhashtable-based simple_xattrs with lazy allocation xattr: add rhashtable-based simple_xattr infrastructure xattr: add rcu_head and rhash_head to struct simple_xattr Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-0-c2efa4f74cb7@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/kernfs/dir.c15
-rw-r--r--fs/kernfs/inode.c99
-rw-r--r--fs/kernfs/kernfs-internal.h5
-rw-r--r--fs/pidfs.c64
-rw-r--r--fs/xattr.c423
-rw-r--r--include/linux/kernfs.h2
-rw-r--r--include/linux/shmem_fs.h2
-rw-r--r--include/linux/xattr.h47
-rw-r--r--mm/shmem.c46
-rw-r--r--net/socket.c119
-rw-r--r--tools/testing/selftests/filesystems/xattr/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/xattr/Makefile6
-rw-r--r--tools/testing/selftests/filesystems/xattr/xattr_socket_test.c470
-rw-r--r--tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c177
-rw-r--r--tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c363
15 files changed, 1546 insertions, 295 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 8d40c4b1db9f..62d83cc1ef4d 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -547,10 +547,8 @@ static void kernfs_free_rcu(struct rcu_head *rcu)
/* If the whole node goes away, then name can't be used outside */
kfree_const(rcu_access_pointer(kn->name));
- if (kn->iattr) {
- simple_xattrs_free(&kn->iattr->xattrs, NULL);
+ if (kn->iattr)
kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
- }
kmem_cache_free(kernfs_node_cache, kn);
}
@@ -584,6 +582,12 @@ void kernfs_put(struct kernfs_node *kn)
if (kernfs_type(kn) == KERNFS_LINK)
kernfs_put(kn->symlink.target_kn);
+ if (kn->iattr && kn->iattr->xattrs) {
+ simple_xattrs_free(kn->iattr->xattrs, NULL);
+ kfree(kn->iattr->xattrs);
+ kn->iattr->xattrs = NULL;
+ }
+
spin_lock(&root->kernfs_idr_lock);
idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
spin_unlock(&root->kernfs_idr_lock);
@@ -682,7 +686,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
err_out4:
if (kn->iattr) {
- simple_xattrs_free(&kn->iattr->xattrs, NULL);
+ if (kn->iattr->xattrs) {
+ simple_xattrs_free(kn->iattr->xattrs, NULL);
+ kfree(kn->iattr->xattrs);
+ }
kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
}
err_out3:
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index a36aaee98dce..1de10500842d 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -45,9 +45,7 @@ static struct kernfs_iattrs *__kernfs_iattrs(struct kernfs_node *kn, bool alloc)
ret->ia_mtime = ret->ia_atime;
ret->ia_ctime = ret->ia_atime;
- simple_xattrs_init(&ret->xattrs);
- atomic_set(&ret->nr_user_xattrs, 0);
- atomic_set(&ret->user_xattr_size, 0);
+ simple_xattr_limits_init(&ret->xattr_limits);
/* If someone raced us, recognize it. */
if (!try_cmpxchg(&kn->iattr, &attr, ret))
@@ -146,7 +144,8 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
if (!attrs)
return -ENOMEM;
- return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
+ return simple_xattr_list(d_inode(dentry), READ_ONCE(attrs->xattrs),
+ buf, size);
}
static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
@@ -298,27 +297,38 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
void *value, size_t size)
{
struct kernfs_iattrs *attrs = kernfs_iattrs_noalloc(kn);
+ struct simple_xattrs *xattrs;
+
if (!attrs)
return -ENODATA;
- return simple_xattr_get(&attrs->xattrs, name, value, size);
+ xattrs = READ_ONCE(attrs->xattrs);
+ if (!xattrs)
+ return -ENODATA;
+
+ return simple_xattr_get(xattrs, name, value, size);
}
int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
const void *value, size_t size, int flags)
{
struct simple_xattr *old_xattr;
+ struct simple_xattrs *xattrs;
struct kernfs_iattrs *attrs;
attrs = kernfs_iattrs(kn);
if (!attrs)
return -ENOMEM;
- old_xattr = simple_xattr_set(&attrs->xattrs, name, value, size, flags);
+ xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
+
+ old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
if (IS_ERR(old_xattr))
return PTR_ERR(old_xattr);
- simple_xattr_free(old_xattr);
+ simple_xattr_free_rcu(old_xattr);
return 0;
}
@@ -344,69 +354,6 @@ static int kernfs_vfs_xattr_set(const struct xattr_handler *handler,
return kernfs_xattr_set(kn, name, value, size, flags);
}
-static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
- const char *full_name,
- struct simple_xattrs *xattrs,
- const void *value, size_t size, int flags)
-{
- struct kernfs_iattrs *attr = kernfs_iattrs_noalloc(kn);
- atomic_t *sz = &attr->user_xattr_size;
- atomic_t *nr = &attr->nr_user_xattrs;
- struct simple_xattr *old_xattr;
- int ret;
-
- if (atomic_inc_return(nr) > KERNFS_MAX_USER_XATTRS) {
- ret = -ENOSPC;
- goto dec_count_out;
- }
-
- if (atomic_add_return(size, sz) > KERNFS_USER_XATTR_SIZE_LIMIT) {
- ret = -ENOSPC;
- goto dec_size_out;
- }
-
- old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
- if (!old_xattr)
- return 0;
-
- if (IS_ERR(old_xattr)) {
- ret = PTR_ERR(old_xattr);
- goto dec_size_out;
- }
-
- ret = 0;
- size = old_xattr->size;
- simple_xattr_free(old_xattr);
-dec_size_out:
- atomic_sub(size, sz);
-dec_count_out:
- atomic_dec(nr);
- return ret;
-}
-
-static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
- const char *full_name,
- struct simple_xattrs *xattrs,
- const void *value, size_t size, int flags)
-{
- struct kernfs_iattrs *attr = kernfs_iattrs_noalloc(kn);
- atomic_t *sz = &attr->user_xattr_size;
- atomic_t *nr = &attr->nr_user_xattrs;
- struct simple_xattr *old_xattr;
-
- old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
- if (!old_xattr)
- return 0;
-
- if (IS_ERR(old_xattr))
- return PTR_ERR(old_xattr);
-
- atomic_sub(old_xattr->size, sz);
- atomic_dec(nr);
- simple_xattr_free(old_xattr);
- return 0;
-}
-
static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
struct mnt_idmap *idmap,
struct dentry *unused, struct inode *inode,
@@ -415,6 +362,7 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
{
const char *full_name = xattr_full_name(handler, suffix);
struct kernfs_node *kn = inode->i_private;
+ struct simple_xattrs *xattrs;
struct kernfs_iattrs *attrs;
if (!(kernfs_root(kn)->flags & KERNFS_ROOT_SUPPORT_USER_XATTR))
@@ -424,13 +372,12 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
if (!attrs)
return -ENOMEM;
- if (value)
- return kernfs_vfs_user_xattr_add(kn, full_name, &attrs->xattrs,
- value, size, flags);
- else
- return kernfs_vfs_user_xattr_rm(kn, full_name, &attrs->xattrs,
- value, size, flags);
+ xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
+ return simple_xattr_set_limited(xattrs, &attrs->xattr_limits,
+ full_name, value, size, flags);
}
static const struct xattr_handler kernfs_trusted_xattr_handler = {
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 6061b6f70d2a..1d3831e3a270 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -26,9 +26,8 @@ struct kernfs_iattrs {
struct timespec64 ia_mtime;
struct timespec64 ia_ctime;
- struct simple_xattrs xattrs;
- atomic_t nr_user_xattrs;
- atomic_t user_xattr_size;
+ struct simple_xattrs *xattrs;
+ struct simple_xattr_limits xattr_limits;
};
struct kernfs_root {
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 318253344b5c..bae8e5d32a1c 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -22,6 +22,7 @@
#include <net/net_namespace.h>
#include <linux/coredump.h>
#include <linux/rhashtable.h>
+#include <linux/llist.h>
#include <linux/xattr.h>
#include <linux/cookie.h>
@@ -31,7 +32,6 @@
#define PIDFS_PID_DEAD ERR_PTR(-ESRCH)
static struct kmem_cache *pidfs_attr_cachep __ro_after_init;
-static struct kmem_cache *pidfs_xattr_cachep __ro_after_init;
static struct path pidfs_root_path = {};
@@ -46,9 +46,8 @@ enum pidfs_attr_mask_bits {
PIDFS_ATTR_BIT_COREDUMP = 1,
};
-struct pidfs_attr {
+struct pidfs_anon_attr {
unsigned long attr_mask;
- struct simple_xattrs *xattrs;
struct /* exit info */ {
__u64 cgroupid;
__s32 exit_code;
@@ -93,6 +92,13 @@ static const struct rhashtable_params pidfs_ino_ht_params = {
* inode number and the inode generation number to compare or
* use file handles.
*/
+struct pidfs_attr {
+ struct simple_xattrs *xattrs;
+ union {
+ struct pidfs_anon_attr;
+ struct llist_node pidfs_llist;
+ };
+};
#if BITS_PER_LONG == 32
@@ -178,10 +184,30 @@ void pidfs_remove_pid(struct pid *pid)
pidfs_ino_ht_params);
}
+static LLIST_HEAD(pidfs_free_list);
+
+static void pidfs_free_attr_work(struct work_struct *work)
+{
+ struct pidfs_attr *attr, *next;
+ struct llist_node *head;
+
+ head = llist_del_all(&pidfs_free_list);
+ llist_for_each_entry_safe(attr, next, head, pidfs_llist) {
+ struct simple_xattrs *xattrs = attr->xattrs;
+
+ if (xattrs) {
+ simple_xattrs_free(xattrs, NULL);
+ kfree(xattrs);
+ }
+ kfree(attr);
+ }
+}
+
+static DECLARE_WORK(pidfs_free_work, pidfs_free_attr_work);
+
void pidfs_free_pid(struct pid *pid)
{
- struct pidfs_attr *attr __free(kfree) = no_free_ptr(pid->attr);
- struct simple_xattrs *xattrs __free(kfree) = NULL;
+ struct pidfs_attr *attr = pid->attr;
/*
* Any dentry must've been wiped from the pid by now.
@@ -200,9 +226,10 @@ void pidfs_free_pid(struct pid *pid)
if (IS_ERR(attr))
return;
- xattrs = no_free_ptr(attr->xattrs);
- if (xattrs)
- simple_xattrs_free(xattrs, NULL);
+ if (likely(!attr->xattrs))
+ kfree(attr);
+ else if (llist_add(&attr->pidfs_llist, &pidfs_free_list))
+ schedule_work(&pidfs_free_work);
}
#ifdef CONFIG_PROC_FS
@@ -1011,7 +1038,7 @@ static int pidfs_xattr_get(const struct xattr_handler *handler,
xattrs = READ_ONCE(attr->xattrs);
if (!xattrs)
- return 0;
+ return -ENODATA;
name = xattr_full_name(handler, suffix);
return simple_xattr_get(xattrs, name, value, size);
@@ -1031,22 +1058,16 @@ static int pidfs_xattr_set(const struct xattr_handler *handler,
/* Ensure we're the only one to set @attr->xattrs. */
WARN_ON_ONCE(!inode_is_locked(inode));
- xattrs = READ_ONCE(attr->xattrs);
- if (!xattrs) {
- xattrs = kmem_cache_zalloc(pidfs_xattr_cachep, GFP_KERNEL);
- if (!xattrs)
- return -ENOMEM;
-
- simple_xattrs_init(xattrs);
- smp_store_release(&pid->attr->xattrs, xattrs);
- }
+ xattrs = simple_xattrs_lazy_alloc(&attr->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
name = xattr_full_name(handler, suffix);
old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
if (IS_ERR(old_xattr))
return PTR_ERR(old_xattr);
- simple_xattr_free(old_xattr);
+ simple_xattr_free_rcu(old_xattr);
return 0;
}
@@ -1124,11 +1145,6 @@ void __init pidfs_init(void)
(SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
SLAB_ACCOUNT | SLAB_PANIC), NULL);
- pidfs_xattr_cachep = kmem_cache_create("pidfs_xattr_cache",
- sizeof(struct simple_xattrs), 0,
- (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
- SLAB_ACCOUNT | SLAB_PANIC), NULL);
-
pidfs_mnt = kern_mount(&pidfs_type);
if (IS_ERR(pidfs_mnt))
panic("Failed to mount pidfs pseudo filesystem");
diff --git a/fs/xattr.c b/fs/xattr.c
index 3e49e612e1ba..09ecbaaa1660 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -22,6 +22,7 @@
#include <linux/audit.h>
#include <linux/vmalloc.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/rhashtable.h>
#include <linux/uaccess.h>
@@ -105,6 +106,13 @@ int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode)
return 0;
}
+static inline int xattr_permission_error(int mask)
+{
+ if (mask & MAY_WRITE)
+ return -EPERM;
+ return -ENODATA;
+}
+
/*
* Check permissions for extended attribute access. This is a bit complicated
* because different namespaces have very different rules.
@@ -134,7 +142,7 @@ xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
*/
if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
if (!capable(CAP_SYS_ADMIN))
- return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
+ return xattr_permission_error(mask);
return 0;
}
@@ -144,12 +152,22 @@ xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
* privileged users can write attributes.
*/
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
- return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
- if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
- (mask & MAY_WRITE) &&
- !inode_owner_or_capable(idmap, inode))
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFREG:
+ break;
+ case S_IFDIR:
+ if (!(inode->i_mode & S_ISVTX))
+ break;
+ if (!(mask & MAY_WRITE))
+ break;
+ if (inode_owner_or_capable(idmap, inode))
+ break;
return -EPERM;
+ case S_IFSOCK:
+ break;
+ default:
+ return xattr_permission_error(mask);
+ }
}
return inode_permission(idmap, inode, mask);
@@ -1197,6 +1215,27 @@ void simple_xattr_free(struct simple_xattr *xattr)
kvfree(xattr);
}
+static void simple_xattr_rcu_free(struct rcu_head *head)
+{
+ struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu);
+
+ simple_xattr_free(xattr);
+}
+
+/**
+ * simple_xattr_free_rcu - free an xattr object with RCU delay
+ * @xattr: the xattr object
+ *
+ * Free the xattr object after an RCU grace period. This must be used when
+ * the xattr was removed from a data structure that concurrent RCU readers
+ * may still be traversing. Can handle @xattr being NULL.
+ */
+void simple_xattr_free_rcu(struct simple_xattr *xattr)
+{
+ if (xattr)
+ call_rcu(&xattr->rcu, simple_xattr_rcu_free);
+}
+
/**
* simple_xattr_alloc - allocate new xattr object
* @value: value of the xattr object
@@ -1205,65 +1244,58 @@ void simple_xattr_free(struct simple_xattr *xattr)
* Allocate a new xattr object and initialize respective members. The caller is
* responsible for handling the name of the xattr.
*
- * Return: On success a new xattr object is returned. On failure NULL is
- * returned.
+ * Return: New xattr object on success, NULL if @value is NULL, ERR_PTR on
+ * failure.
*/
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
{
struct simple_xattr *new_xattr;
size_t len;
+ if (!value)
+ return NULL;
+
/* wrap around? */
len = sizeof(*new_xattr) + size;
if (len < sizeof(*new_xattr))
- return NULL;
+ return ERR_PTR(-ENOMEM);
new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
if (!new_xattr)
- return NULL;
+ return ERR_PTR(-ENOMEM);
new_xattr->size = size;
memcpy(new_xattr->value, value, size);
return new_xattr;
}
-/**
- * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
- * @key: xattr name
- * @node: current node
- *
- * Compare the xattr name with the xattr name attached to @node in the rbtree.
- *
- * Return: Negative value if continuing left, positive if continuing right, 0
- * if the xattr attached to @node matches @key.
- */
-static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
+static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
{
- const char *xattr_name = key;
- const struct simple_xattr *xattr;
+ const char *name = data;
+ return jhash(name, strlen(name), seed);
+}
- xattr = rb_entry(node, struct simple_xattr, rb_node);
- return strcmp(xattr->name, xattr_name);
+static u32 simple_xattr_obj_hashfn(const void *obj, u32 len, u32 seed)
+{
+ const struct simple_xattr *xattr = obj;
+ return jhash(xattr->name, strlen(xattr->name), seed);
}
-/**
- * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
- * @new_node: new node
- * @node: current node
- *
- * Compare the xattr attached to @new_node with the xattr attached to @node.
- *
- * Return: Negative value if continuing left, positive if continuing right, 0
- * if the xattr attached to @new_node matches the xattr attached to @node.
- */
-static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
- const struct rb_node *node)
+static int simple_xattr_obj_cmpfn(struct rhashtable_compare_arg *arg,
+ const void *obj)
{
- struct simple_xattr *xattr;
- xattr = rb_entry(new_node, struct simple_xattr, rb_node);
- return rbtree_simple_xattr_cmp(xattr->name, node);
+ const struct simple_xattr *xattr = obj;
+ return strcmp(xattr->name, arg->key);
}
+static const struct rhashtable_params simple_xattr_params = {
+ .head_offset = offsetof(struct simple_xattr, hash_node),
+ .hashfn = simple_xattr_hashfn,
+ .obj_hashfn = simple_xattr_obj_hashfn,
+ .obj_cmpfn = simple_xattr_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
/**
* simple_xattr_get - get an xattr object
* @xattrs: the header of the xattr object
@@ -1282,14 +1314,12 @@ static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size)
{
- struct simple_xattr *xattr = NULL;
- struct rb_node *rbp;
+ struct simple_xattr *xattr;
int ret = -ENODATA;
- read_lock(&xattrs->lock);
- rbp = rb_find(name, &xattrs->rb_root, rbtree_simple_xattr_cmp);
- if (rbp) {
- xattr = rb_entry(rbp, struct simple_xattr, rb_node);
+ guard(rcu)();
+ xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params);
+ if (xattr) {
ret = xattr->size;
if (buffer) {
if (size < xattr->size)
@@ -1298,7 +1328,6 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
memcpy(buffer, xattr->value, xattr->size);
}
}
- read_unlock(&xattrs->lock);
return ret;
}
@@ -1325,6 +1354,11 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
* nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
* XATTR_REPLACE we fail as mentioned above.
*
+ * Note: Callers must externally serialize writes. All current callers hold
+ * the inode lock for write operations. The lookup->replace/remove sequence
+ * is not atomic with respect to the rhashtable's per-bucket locking, but
+ * is safe because writes are serialized by the caller.
+ *
* Return: On success, the removed or replaced xattr is returned, to be freed
* by the caller; or NULL if none. On failure a negative error code is returned.
*/
@@ -1332,64 +1366,57 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
const char *name, const void *value,
size_t size, int flags)
{
- struct simple_xattr *old_xattr = NULL, *new_xattr = NULL;
- struct rb_node *parent = NULL, **rbp;
- int err = 0, ret;
+ struct simple_xattr *old_xattr = NULL;
+ int err;
- /* value == NULL means remove */
- if (value) {
- new_xattr = simple_xattr_alloc(value, size);
- if (!new_xattr)
- return ERR_PTR(-ENOMEM);
+ CLASS(simple_xattr, new_xattr)(value, size);
+ if (IS_ERR(new_xattr))
+ return new_xattr;
+ if (new_xattr) {
new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
- if (!new_xattr->name) {
- simple_xattr_free(new_xattr);
+ if (!new_xattr->name)
return ERR_PTR(-ENOMEM);
- }
}
- write_lock(&xattrs->lock);
- rbp = &xattrs->rb_root.rb_node;
- while (*rbp) {
- parent = *rbp;
- ret = rbtree_simple_xattr_cmp(name, *rbp);
- if (ret < 0)
- rbp = &(*rbp)->rb_left;
- else if (ret > 0)
- rbp = &(*rbp)->rb_right;
- else
- old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
- if (old_xattr)
- break;
- }
+ /* Lookup is safe without RCU here since writes are serialized. */
+ old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
+ simple_xattr_params);
if (old_xattr) {
/* Fail if XATTR_CREATE is requested and the xattr exists. */
- if (flags & XATTR_CREATE) {
- err = -EEXIST;
- goto out_unlock;
- }
+ if (flags & XATTR_CREATE)
+ return ERR_PTR(-EEXIST);
- if (new_xattr)
- rb_replace_node(&old_xattr->rb_node,
- &new_xattr->rb_node, &xattrs->rb_root);
- else
- rb_erase(&old_xattr->rb_node, &xattrs->rb_root);
+ if (new_xattr) {
+ err = rhashtable_replace_fast(&xattrs->ht,
+ &old_xattr->hash_node,
+ &new_xattr->hash_node,
+ simple_xattr_params);
+ if (err)
+ return ERR_PTR(err);
+ } else {
+ err = rhashtable_remove_fast(&xattrs->ht,
+ &old_xattr->hash_node,
+ simple_xattr_params);
+ if (err)
+ return ERR_PTR(err);
+ }
} else {
/* Fail if XATTR_REPLACE is requested but no xattr is found. */
- if (flags & XATTR_REPLACE) {
- err = -ENODATA;
- goto out_unlock;
- }
+ if (flags & XATTR_REPLACE)
+ return ERR_PTR(-ENODATA);
/*
* If XATTR_CREATE or no flags are specified together with a
* new value simply insert it.
*/
if (new_xattr) {
- rb_link_node(&new_xattr->rb_node, parent, rbp);
- rb_insert_color(&new_xattr->rb_node, &xattrs->rb_root);
+ err = rhashtable_insert_fast(&xattrs->ht,
+ &new_xattr->hash_node,
+ simple_xattr_params);
+ if (err)
+ return ERR_PTR(err);
}
/*
@@ -1398,12 +1425,73 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
*/
}
-out_unlock:
- write_unlock(&xattrs->lock);
- if (!err)
- return old_xattr;
- simple_xattr_free(new_xattr);
- return ERR_PTR(err);
+ retain_and_null_ptr(new_xattr);
+ return old_xattr;
+}
+
+static inline void simple_xattr_limits_dec(struct simple_xattr_limits *limits,
+ size_t size)
+{
+ atomic_sub(size, &limits->xattr_size);
+ atomic_dec(&limits->nr_xattrs);
+}
+
+static inline int simple_xattr_limits_inc(struct simple_xattr_limits *limits,
+ size_t size)
+{
+ if (atomic_inc_return(&limits->nr_xattrs) > SIMPLE_XATTR_MAX_NR) {
+ atomic_dec(&limits->nr_xattrs);
+ return -ENOSPC;
+ }
+
+ if (atomic_add_return(size, &limits->xattr_size) <= SIMPLE_XATTR_MAX_SIZE)
+ return 0;
+
+ simple_xattr_limits_dec(limits, size);
+ return -ENOSPC;
+}
+
+/**
+ * simple_xattr_set_limited - set an xattr with per-inode user.* limits
+ * @xattrs: the header of the xattr object
+ * @limits: per-inode limit counters for user.* xattrs
+ * @name: the name of the xattr to set or remove
+ * @value: the value to store (NULL to remove)
+ * @size: the size of @value
+ * @flags: XATTR_CREATE, XATTR_REPLACE, or 0
+ *
+ * Like simple_xattr_set(), but enforces per-inode count and total value size
+ * limits for user.* xattrs. Uses speculative pre-increment of the atomic
+ * counters to avoid races without requiring external locks.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
+ */
+int simple_xattr_set_limited(struct simple_xattrs *xattrs,
+ struct simple_xattr_limits *limits,
+ const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct simple_xattr *old_xattr;
+ int ret;
+
+ if (value) {
+ ret = simple_xattr_limits_inc(limits, size);
+ if (ret)
+ return ret;
+ }
+
+ old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
+ if (IS_ERR(old_xattr)) {
+ if (value)
+ simple_xattr_limits_dec(limits, size);
+ return PTR_ERR(old_xattr);
+ }
+ if (old_xattr) {
+ simple_xattr_limits_dec(limits, old_xattr->size);
+ simple_xattr_free_rcu(old_xattr);
+ }
+ return 0;
}
static bool xattr_is_trusted(const char *name)
@@ -1443,8 +1531,8 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size)
{
bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
+ struct rhashtable_iter iter;
struct simple_xattr *xattr;
- struct rb_node *rbp;
ssize_t remaining_size = size;
int err = 0;
@@ -1464,9 +1552,19 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
remaining_size -= err;
err = 0;
- read_lock(&xattrs->lock);
- for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
- xattr = rb_entry(rbp, struct simple_xattr, rb_node);
+ if (!xattrs)
+ return size - remaining_size;
+
+ rhashtable_walk_enter(&xattrs->ht, &iter);
+ rhashtable_walk_start(&iter);
+
+ while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
+ if (IS_ERR(xattr)) {
+ if (PTR_ERR(xattr) == -EAGAIN)
+ continue;
+ err = PTR_ERR(xattr);
+ break;
+ }
/* skip "trusted." attributes for unprivileged callers */
if (!trusted && xattr_is_trusted(xattr->name))
@@ -1480,25 +1578,11 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (err)
break;
}
- read_unlock(&xattrs->lock);
- return err ? err : size - remaining_size;
-}
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
-/**
- * rbtree_simple_xattr_less - compare two xattr rbtree nodes
- * @new_node: new node
- * @node: current node
- *
- * Compare the xattr attached to @new_node with the xattr attached to @node.
- * Note that this function technically tolerates duplicate entries.
- *
- * Return: True if insertion point in the rbtree is found.
- */
-static bool rbtree_simple_xattr_less(struct rb_node *new_node,
- const struct rb_node *node)
-{
- return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
+ return err ? err : size - remaining_size;
}
/**
@@ -1509,25 +1593,100 @@ static bool rbtree_simple_xattr_less(struct rb_node *new_node,
* Add an xattr object to @xattrs. This assumes no replacement or removal
* of matching xattrs is wanted. Should only be called during inode
* initialization when a few distinct initial xattrs are supposed to be set.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
*/
-void simple_xattr_add(struct simple_xattrs *xattrs,
- struct simple_xattr *new_xattr)
+int simple_xattr_add(struct simple_xattrs *xattrs,
+ struct simple_xattr *new_xattr)
{
- write_lock(&xattrs->lock);
- rb_add(&new_xattr->rb_node, &xattrs->rb_root, rbtree_simple_xattr_less);
- write_unlock(&xattrs->lock);
+ return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node,
+ simple_xattr_params);
}
/**
* simple_xattrs_init - initialize new xattr header
* @xattrs: header to initialize
*
- * Initialize relevant fields of a an xattr header.
+ * Initialize the rhashtable used to store xattr objects.
+ *
+ * Return: On success zero is returned. On failure a negative error code is
+ * returned.
+ */
+int simple_xattrs_init(struct simple_xattrs *xattrs)
+{
+ return rhashtable_init(&xattrs->ht, &simple_xattr_params);
+}
+
+/**
+ * simple_xattrs_alloc - allocate and initialize a new xattr header
+ *
+ * Dynamically allocate a simple_xattrs header and initialize the
+ * underlying rhashtable. This is intended for consumers that want
+ * to lazily allocate xattr storage only when the first xattr is set,
+ * avoiding the per-inode rhashtable overhead when no xattrs are used.
+ *
+ * Return: On success a new simple_xattrs is returned. On failure an
+ * ERR_PTR is returned.
+ */
+struct simple_xattrs *simple_xattrs_alloc(void)
+{
+ struct simple_xattrs *xattrs __free(kfree) = NULL;
+ int ret;
+
+ xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL);
+ if (!xattrs)
+ return ERR_PTR(-ENOMEM);
+
+ ret = simple_xattrs_init(xattrs);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return no_free_ptr(xattrs);
+}
+
+/**
+ * simple_xattrs_lazy_alloc - get or allocate xattrs for a set operation
+ * @xattrsp: pointer to the xattrs pointer (may point to NULL)
+ * @value: value being set (NULL means remove)
+ * @flags: xattr set flags
+ *
+ * For lazily-allocated xattrs on the write path. If no xattrs exist yet
+ * and this is a remove operation, returns the appropriate result without
+ * allocating. Otherwise ensures xattrs is allocated and published with
+ * store-release semantics.
+ *
+ * Return: On success a valid pointer to the xattrs is returned. On
+ * failure or early-exit an ERR_PTR or NULL is returned. Callers should
+ * check with IS_ERR_OR_NULL() and propagate with PTR_ERR() which
+ * correctly returns 0 for the NULL no-op case.
*/
-void simple_xattrs_init(struct simple_xattrs *xattrs)
+struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
+ const void *value, int flags)
{
- xattrs->rb_root = RB_ROOT;
- rwlock_init(&xattrs->lock);
+ struct simple_xattrs *xattrs;
+
+ xattrs = READ_ONCE(*xattrsp);
+ if (xattrs)
+ return xattrs;
+
+ if (!value)
+ return (flags & XATTR_REPLACE) ? ERR_PTR(-ENODATA) : NULL;
+
+ xattrs = simple_xattrs_alloc();
+ if (!IS_ERR(xattrs))
+ smp_store_release(xattrsp, xattrs);
+ return xattrs;
+}
+
+static void simple_xattr_ht_free(void *ptr, void *arg)
+{
+ struct simple_xattr *xattr = ptr;
+ size_t *freed_space = arg;
+
+ if (freed_space)
+ *freed_space += simple_xattr_space(xattr->name, xattr->size);
+ simple_xattr_free(xattr);
}
/**
@@ -1540,22 +1699,10 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
*/
void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
{
- struct rb_node *rbp;
+ might_sleep();
if (freed_space)
*freed_space = 0;
- rbp = rb_first(&xattrs->rb_root);
- while (rbp) {
- struct simple_xattr *xattr;
- struct rb_node *rbp_next;
-
- rbp_next = rb_next(rbp);
- xattr = rb_entry(rbp, struct simple_xattr, rb_node);
- rb_erase(&xattr->rb_node, &xattrs->rb_root);
- if (freed_space)
- *freed_space += simple_xattr_space(xattr->name,
- xattr->size);
- simple_xattr_free(xattr);
- rbp = rbp_next;
- }
+ rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free,
+ freed_space);
}
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index b5a5f32fdfd1..d8f57f0af5e4 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -99,8 +99,6 @@ enum kernfs_node_type {
#define KERNFS_TYPE_MASK 0x000f
#define KERNFS_FLAG_MASK ~KERNFS_TYPE_MASK
-#define KERNFS_MAX_USER_XATTRS 128
-#define KERNFS_USER_XATTR_SIZE_LIMIT (128 << 10)
enum kernfs_node_flag {
KERNFS_ACTIVATED = 0x0010,
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a8273b32e041..f6a2d3402d76 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -48,7 +48,7 @@ struct shmem_inode_info {
};
struct timespec64 i_crtime; /* file creation time */
struct shared_policy policy; /* NUMA memory alloc policy */
- struct simple_xattrs xattrs; /* list of xattrs */
+ struct simple_xattrs *xattrs; /* list of xattrs */
pgoff_t fallocend; /* highest fallocate endindex */
unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */
atomic_t stop_eviction; /* hold when working on inode */
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 296b5ee5c979..8b6601367eae 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
+#include <linux/rhashtable-types.h>
#include <linux/user_namespace.h>
#include <uapi/linux/xattr.h>
@@ -106,31 +107,65 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
}
struct simple_xattrs {
- struct rb_root rb_root;
- rwlock_t lock;
+ struct rhashtable ht;
};
struct simple_xattr {
- struct rb_node rb_node;
+ struct rhash_head hash_node;
+ struct rcu_head rcu;
char *name;
size_t size;
char value[] __counted_by(size);
};
-void simple_xattrs_init(struct simple_xattrs *xattrs);
+#define SIMPLE_XATTR_MAX_NR 128
+#define SIMPLE_XATTR_MAX_SIZE (128 << 10)
+
+struct simple_xattr_limits {
+ atomic_t nr_xattrs; /* current user.* xattr count */
+ atomic_t xattr_size; /* current total user.* value bytes */
+};
+
+static inline void simple_xattr_limits_init(struct simple_xattr_limits *limits)
+{
+ atomic_set(&limits->nr_xattrs, 0);
+ atomic_set(&limits->xattr_size, 0);
+}
+
+int simple_xattrs_init(struct simple_xattrs *xattrs);
+struct simple_xattrs *simple_xattrs_alloc(void);
+struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
+ const void *value, int flags);
void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
size_t simple_xattr_space(const char *name, size_t size);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
void simple_xattr_free(struct simple_xattr *xattr);
+void simple_xattr_free_rcu(struct simple_xattr *xattr);
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
const char *name, const void *value,
size_t size, int flags);
+int simple_xattr_set_limited(struct simple_xattrs *xattrs,
+ struct simple_xattr_limits *limits,
+ const char *name, const void *value,
+ size_t size, int flags);
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size);
-void simple_xattr_add(struct simple_xattrs *xattrs,
- struct simple_xattr *new_xattr);
+int simple_xattr_add(struct simple_xattrs *xattrs,
+ struct simple_xattr *new_xattr);
int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name);
+DEFINE_CLASS(simple_xattr,
+ struct simple_xattr *,
+ if (!IS_ERR_OR_NULL(_T)) simple_xattr_free(_T),
+ simple_xattr_alloc(value, size),
+ const void *value, size_t size)
+
+DEFINE_CLASS(simple_xattrs,
+ struct simple_xattrs *,
+ if (!IS_ERR_OR_NULL(_T)) { simple_xattrs_free(_T, NULL); kfree(_T); },
+ simple_xattrs_alloc(),
+ void)
+
#endif /* _LINUX_XATTR_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index b40f3cd48961..0b0e577e880a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1425,7 +1425,10 @@ static void shmem_evict_inode(struct inode *inode)
}
}
- simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+ if (info->xattrs) {
+ simple_xattrs_free(info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+ kfree(info->xattrs);
+ }
shmem_free_inode(inode->i_sb, freed);
WARN_ON(inode->i_blocks);
clear_inode(inode);
@@ -3101,7 +3104,6 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
shmem_set_inode_flags(inode, info->fsflags, NULL);
INIT_LIST_HEAD(&info->shrinklist);
INIT_LIST_HEAD(&info->swaplist);
- simple_xattrs_init(&info->xattrs);
cache_no_acl(inode);
if (sbinfo->noswap)
mapping_set_unevictable(inode->i_mapping);
@@ -4255,10 +4257,13 @@ static int shmem_initxattrs(struct inode *inode,
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
const struct xattr *xattr;
- struct simple_xattr *new_xattr;
size_t ispace = 0;
size_t len;
+ CLASS(simple_xattrs, xattrs)();
+ if (IS_ERR(xattrs))
+ return PTR_ERR(xattrs);
+
if (sbinfo->max_inodes) {
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
ispace += simple_xattr_space(xattr->name,
@@ -4277,24 +4282,24 @@ static int shmem_initxattrs(struct inode *inode,
}
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
- if (!new_xattr)
+ CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len);
+ if (IS_ERR(new_xattr))
break;
len = strlen(xattr->name) + 1;
new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
GFP_KERNEL_ACCOUNT);
- if (!new_xattr->name) {
- kvfree(new_xattr);
+ if (!new_xattr->name)
break;
- }
memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
XATTR_SECURITY_PREFIX_LEN);
memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
xattr->name, len);
- simple_xattr_add(&info->xattrs, new_xattr);
+ if (simple_xattr_add(xattrs, new_xattr))
+ break;
+ retain_and_null_ptr(new_xattr);
}
if (xattr->name != NULL) {
@@ -4303,10 +4308,10 @@ static int shmem_initxattrs(struct inode *inode,
sbinfo->free_ispace += ispace;
raw_spin_unlock(&sbinfo->stat_lock);
}
- simple_xattrs_free(&info->xattrs, NULL);
return -ENOMEM;
}
+ smp_store_release(&info->xattrs, no_free_ptr(xattrs));
return 0;
}
@@ -4315,9 +4320,14 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler,
const char *name, void *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(inode);
+ struct simple_xattrs *xattrs;
+
+ xattrs = READ_ONCE(info->xattrs);
+ if (!xattrs)
+ return -ENODATA;
name = xattr_full_name(handler, name);
- return simple_xattr_get(&info->xattrs, name, buffer, size);
+ return simple_xattr_get(xattrs, name, buffer, size);
}
static int shmem_xattr_handler_set(const struct xattr_handler *handler,
@@ -4328,10 +4338,16 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct simple_xattrs *xattrs;
struct simple_xattr *old_xattr;
size_t ispace = 0;
name = xattr_full_name(handler, name);
+
+ xattrs = simple_xattrs_lazy_alloc(&info->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
+
if (value && sbinfo->max_inodes) {
ispace = simple_xattr_space(name, size);
raw_spin_lock(&sbinfo->stat_lock);
@@ -4344,13 +4360,13 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
return -ENOSPC;
}
- old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
+ old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
if (!IS_ERR(old_xattr)) {
ispace = 0;
if (old_xattr && sbinfo->max_inodes)
ispace = simple_xattr_space(old_xattr->name,
old_xattr->size);
- simple_xattr_free(old_xattr);
+ simple_xattr_free_rcu(old_xattr);
old_xattr = NULL;
inode_set_ctime_current(inode);
inode_inc_iversion(inode);
@@ -4391,7 +4407,9 @@ static const struct xattr_handler * const shmem_xattr_handlers[] = {
static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
+
+ return simple_xattr_list(d_inode(dentry), READ_ONCE(info->xattrs),
+ buffer, size);
}
#endif /* CONFIG_TMPFS_XATTR */
diff --git a/net/socket.c b/net/socket.c
index 136b98c54fb3..7aa94fce7a8b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -315,45 +315,70 @@ efault_end:
static struct kmem_cache *sock_inode_cachep __ro_after_init;
+struct sockfs_inode {
+ struct simple_xattrs *xattrs;
+ struct simple_xattr_limits xattr_limits;
+ struct socket_alloc;
+};
+
+static struct sockfs_inode *SOCKFS_I(struct inode *inode)
+{
+ return container_of(inode, struct sockfs_inode, vfs_inode);
+}
+
static struct inode *sock_alloc_inode(struct super_block *sb)
{
- struct socket_alloc *ei;
+ struct sockfs_inode *si;
- ei = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
- if (!ei)
+ si = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
+ if (!si)
return NULL;
- init_waitqueue_head(&ei->socket.wq.wait);
- ei->socket.wq.fasync_list = NULL;
- ei->socket.wq.flags = 0;
+ si->xattrs = NULL;
+ simple_xattr_limits_init(&si->xattr_limits);
+
+ init_waitqueue_head(&si->socket.wq.wait);
+ si->socket.wq.fasync_list = NULL;
+ si->socket.wq.flags = 0;
+
+ si->socket.state = SS_UNCONNECTED;
+ si->socket.flags = 0;
+ si->socket.ops = NULL;
+ si->socket.sk = NULL;
+ si->socket.file = NULL;
- ei->socket.state = SS_UNCONNECTED;
- ei->socket.flags = 0;
- ei->socket.ops = NULL;
- ei->socket.sk = NULL;
- ei->socket.file = NULL;
+ return &si->vfs_inode;
+}
+
+static void sock_evict_inode(struct inode *inode)
+{
+ struct sockfs_inode *si = SOCKFS_I(inode);
+ struct simple_xattrs *xattrs = si->xattrs;
- return &ei->vfs_inode;
+ if (xattrs) {
+ simple_xattrs_free(xattrs, NULL);
+ kfree(xattrs);
+ }
+ clear_inode(inode);
}
static void sock_free_inode(struct inode *inode)
{
- struct socket_alloc *ei;
+ struct sockfs_inode *si = SOCKFS_I(inode);
- ei = container_of(inode, struct socket_alloc, vfs_inode);
- kmem_cache_free(sock_inode_cachep, ei);
+ kmem_cache_free(sock_inode_cachep, si);
}
static void init_once(void *foo)
{
- struct socket_alloc *ei = (struct socket_alloc *)foo;
+ struct sockfs_inode *si = (struct sockfs_inode *)foo;
- inode_init_once(&ei->vfs_inode);
+ inode_init_once(&si->vfs_inode);
}
static void init_inodecache(void)
{
sock_inode_cachep = kmem_cache_create("sock_inode_cache",
- sizeof(struct socket_alloc),
+ sizeof(struct sockfs_inode),
0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
@@ -365,6 +390,7 @@ static void init_inodecache(void)
static const struct super_operations sockfs_ops = {
.alloc_inode = sock_alloc_inode,
.free_inode = sock_free_inode,
+ .evict_inode = sock_evict_inode,
.statfs = simple_statfs,
};
@@ -417,9 +443,48 @@ static const struct xattr_handler sockfs_security_xattr_handler = {
.set = sockfs_security_xattr_set,
};
+static int sockfs_user_xattr_get(const struct xattr_handler *handler,
+ struct dentry *dentry, struct inode *inode,
+ const char *suffix, void *value, size_t size)
+{
+ const char *name = xattr_full_name(handler, suffix);
+ struct simple_xattrs *xattrs;
+
+ xattrs = READ_ONCE(SOCKFS_I(inode)->xattrs);
+ if (!xattrs)
+ return -ENODATA;
+
+ return simple_xattr_get(xattrs, name, value, size);
+}
+
+static int sockfs_user_xattr_set(const struct xattr_handler *handler,
+ struct mnt_idmap *idmap,
+ struct dentry *dentry, struct inode *inode,
+ const char *suffix, const void *value,
+ size_t size, int flags)
+{
+ const char *name = xattr_full_name(handler, suffix);
+ struct sockfs_inode *si = SOCKFS_I(inode);
+ struct simple_xattrs *xattrs;
+
+ xattrs = simple_xattrs_lazy_alloc(&si->xattrs, value, flags);
+ if (IS_ERR_OR_NULL(xattrs))
+ return PTR_ERR(xattrs);
+
+ return simple_xattr_set_limited(xattrs, &si->xattr_limits,
+ name, value, size, flags);
+}
+
+static const struct xattr_handler sockfs_user_xattr_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = sockfs_user_xattr_get,
+ .set = sockfs_user_xattr_set,
+};
+
static const struct xattr_handler * const sockfs_xattr_handlers[] = {
&sockfs_xattr_handler,
&sockfs_security_xattr_handler,
+ &sockfs_user_xattr_handler,
NULL
};
@@ -572,26 +637,26 @@ EXPORT_SYMBOL(sockfd_lookup);
static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
size_t size)
{
- ssize_t len;
- ssize_t used = 0;
+ struct sockfs_inode *si = SOCKFS_I(d_inode(dentry));
+ ssize_t len, used;
- len = security_inode_listsecurity(d_inode(dentry), buffer, size);
+ len = simple_xattr_list(d_inode(dentry), READ_ONCE(si->xattrs),
+ buffer, size);
if (len < 0)
return len;
- used += len;
+
+ used = len;
if (buffer) {
- if (size < used)
- return -ERANGE;
buffer += len;
+ size -= len;
}
- len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
+ len = XATTR_NAME_SOCKPROTONAME_LEN + 1;
used += len;
if (buffer) {
- if (size < used)
+ if (size < len)
return -ERANGE;
memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
- buffer += len;
}
return used;
diff --git a/tools/testing/selftests/filesystems/xattr/.gitignore b/tools/testing/selftests/filesystems/xattr/.gitignore
new file mode 100644
index 000000000000..092d14094c0f
--- /dev/null
+++ b/tools/testing/selftests/filesystems/xattr/.gitignore
@@ -0,0 +1,3 @@
+xattr_socket_test
+xattr_sockfs_test
+xattr_socket_types_test
diff --git a/tools/testing/selftests/filesystems/xattr/Makefile b/tools/testing/selftests/filesystems/xattr/Makefile
new file mode 100644
index 000000000000..95364ffb10e9
--- /dev/null
+++ b/tools/testing/selftests/filesystems/xattr/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += $(KHDR_INCLUDES)
+TEST_GEN_PROGS := xattr_socket_test xattr_sockfs_test xattr_socket_types_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c b/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c
new file mode 100644
index 000000000000..fac0a4c6bc05
--- /dev/null
+++ b/tools/testing/selftests/filesystems/xattr/xattr_socket_test.c
@@ -0,0 +1,470 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2026 Christian Brauner <brauner@kernel.org>
+/*
+ * Test extended attributes on path-based Unix domain sockets.
+ *
+ * Path-based Unix domain sockets are bound to a filesystem path and their
+ * inodes live on the underlying filesystem (e.g. tmpfs). These tests verify
+ * that user.* and trusted.* xattr operations work correctly on them using
+ * path-based syscalls (setxattr, getxattr, etc.).
+ *
+ * Covers SOCK_STREAM, SOCK_DGRAM, and SOCK_SEQPACKET socket types.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "../../kselftest_harness.h"
+
+#define TEST_XATTR_NAME "user.testattr"
+#define TEST_XATTR_VALUE "testvalue"
+#define TEST_XATTR_VALUE2 "newvalue"
+
+/*
+ * Fixture for path-based Unix domain socket tests.
+ * Creates a SOCK_STREAM socket bound to a path in /tmp (typically tmpfs).
+ */
+FIXTURE(xattr_socket)
+{
+ char socket_path[PATH_MAX];
+ int sockfd;
+};
+
+FIXTURE_VARIANT(xattr_socket)
+{
+ int sock_type;
+ const char *name;
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket, stream) {
+ .sock_type = SOCK_STREAM,
+ .name = "stream",
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket, dgram) {
+ .sock_type = SOCK_DGRAM,
+ .name = "dgram",
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket, seqpacket) {
+ .sock_type = SOCK_SEQPACKET,
+ .name = "seqpacket",
+};
+
+FIXTURE_SETUP(xattr_socket)
+{
+ struct sockaddr_un addr;
+ int ret;
+
+ self->sockfd = -1;
+
+ snprintf(self->socket_path, sizeof(self->socket_path),
+ "/tmp/xattr_socket_test_%s.%d", variant->name, getpid());
+ unlink(self->socket_path);
+
+ self->sockfd = socket(AF_UNIX, variant->sock_type, 0);
+ ASSERT_GE(self->sockfd, 0) {
+ TH_LOG("Failed to create socket: %s", strerror(errno));
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, self->socket_path, sizeof(addr.sun_path) - 1);
+
+ ret = bind(self->sockfd, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("Failed to bind socket to %s: %s",
+ self->socket_path, strerror(errno));
+ }
+}
+
+FIXTURE_TEARDOWN(xattr_socket)
+{
+ if (self->sockfd >= 0)
+ close(self->sockfd);
+ unlink(self->socket_path);
+}
+
+TEST_F(xattr_socket, set_user_xattr)
+{
+ int ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr failed: %s (errno=%d)", strerror(errno), errno);
+ }
+}
+
+TEST_F(xattr_socket, get_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr failed: %s", strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) {
+ TH_LOG("getxattr returned %zd, expected %zu: %s",
+ ret, strlen(TEST_XATTR_VALUE), strerror(errno));
+ }
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+TEST_F(xattr_socket, list_user_xattr)
+{
+ char list[1024];
+ ssize_t ret;
+ bool found = false;
+ char *ptr;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr failed: %s", strerror(errno));
+ }
+
+ memset(list, 0, sizeof(list));
+ ret = listxattr(self->socket_path, list, sizeof(list));
+ ASSERT_GT(ret, 0) {
+ TH_LOG("listxattr failed: %s", strerror(errno));
+ }
+
+ for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) {
+ if (strcmp(ptr, TEST_XATTR_NAME) == 0) {
+ found = true;
+ break;
+ }
+ }
+ ASSERT_TRUE(found) {
+ TH_LOG("xattr %s not found in list", TEST_XATTR_NAME);
+ }
+}
+
+TEST_F(xattr_socket, remove_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr failed: %s", strerror(errno));
+ }
+
+ ret = removexattr(self->socket_path, TEST_XATTR_NAME);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("removexattr failed: %s", strerror(errno));
+ }
+
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA) {
+ TH_LOG("Expected ENODATA, got %s", strerror(errno));
+ }
+}
+
+/*
+ * Test that xattrs persist across socket close and reopen.
+ * The xattr is on the filesystem inode, not the socket fd.
+ */
+TEST_F(xattr_socket, xattr_persistence)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr failed: %s", strerror(errno));
+ }
+
+ close(self->sockfd);
+ self->sockfd = -1;
+
+ memset(buf, 0, sizeof(buf));
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) {
+ TH_LOG("getxattr after close failed: %s", strerror(errno));
+ }
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+TEST_F(xattr_socket, update_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0);
+ ASSERT_EQ(ret, 0);
+
+ memset(buf, 0, sizeof(buf));
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE2);
+}
+
+TEST_F(xattr_socket, xattr_create_flag)
+{
+ int ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), XATTR_CREATE);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EEXIST);
+}
+
+TEST_F(xattr_socket, xattr_replace_flag)
+{
+ int ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), XATTR_REPLACE);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_socket, multiple_xattrs)
+{
+ char buf[256];
+ ssize_t ret;
+ int i;
+ char name[64], value[64];
+ const int num_xattrs = 5;
+
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(name, sizeof(name), "user.test%d", i);
+ snprintf(value, sizeof(value), "value%d", i);
+ ret = setxattr(self->socket_path, name, value, strlen(value), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr %s failed: %s", name, strerror(errno));
+ }
+ }
+
+ for (i = 0; i < num_xattrs; i++) {
+ snprintf(name, sizeof(name), "user.test%d", i);
+ snprintf(value, sizeof(value), "value%d", i);
+ memset(buf, 0, sizeof(buf));
+ ret = getxattr(self->socket_path, name, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(value));
+ ASSERT_STREQ(buf, value);
+ }
+}
+
+TEST_F(xattr_socket, xattr_empty_value)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME, "", 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(xattr_socket, xattr_get_size)
+{
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, NULL, 0);
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+}
+
+TEST_F(xattr_socket, xattr_buffer_too_small)
+{
+ char buf[2];
+ ssize_t ret;
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ERANGE);
+}
+
+TEST_F(xattr_socket, xattr_nonexistent)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = getxattr(self->socket_path, "user.nonexistent", buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_socket, remove_nonexistent_xattr)
+{
+ int ret;
+
+ ret = removexattr(self->socket_path, "user.nonexistent");
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_socket, large_xattr_value)
+{
+ char large_value[4096];
+ char read_buf[4096];
+ ssize_t ret;
+
+ memset(large_value, 'A', sizeof(large_value));
+
+ ret = setxattr(self->socket_path, TEST_XATTR_NAME,
+ large_value, sizeof(large_value), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr with large value failed: %s", strerror(errno));
+ }
+
+ memset(read_buf, 0, sizeof(read_buf));
+ ret = getxattr(self->socket_path, TEST_XATTR_NAME,
+ read_buf, sizeof(read_buf));
+ ASSERT_EQ(ret, (ssize_t)sizeof(large_value));
+ ASSERT_EQ(memcmp(large_value, read_buf, sizeof(large_value)), 0);
+}
+
+/*
+ * Test lsetxattr/lgetxattr (don't follow symlinks).
+ * Socket files aren't symlinks, so this should work the same.
+ */
+TEST_F(xattr_socket, lsetxattr_lgetxattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = lsetxattr(self->socket_path, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("lsetxattr failed: %s", strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret = lgetxattr(self->socket_path, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+/*
+ * Fixture for trusted.* xattr tests.
+ * These require CAP_SYS_ADMIN.
+ */
+FIXTURE(xattr_socket_trusted)
+{
+ char socket_path[PATH_MAX];
+ int sockfd;
+};
+
+FIXTURE_VARIANT(xattr_socket_trusted)
+{
+ int sock_type;
+ const char *name;
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_trusted, stream) {
+ .sock_type = SOCK_STREAM,
+ .name = "stream",
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_trusted, dgram) {
+ .sock_type = SOCK_DGRAM,
+ .name = "dgram",
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_trusted, seqpacket) {
+ .sock_type = SOCK_SEQPACKET,
+ .name = "seqpacket",
+};
+
+FIXTURE_SETUP(xattr_socket_trusted)
+{
+ struct sockaddr_un addr;
+ int ret;
+
+ self->sockfd = -1;
+
+ snprintf(self->socket_path, sizeof(self->socket_path),
+ "/tmp/xattr_socket_trusted_%s.%d", variant->name, getpid());
+ unlink(self->socket_path);
+
+ self->sockfd = socket(AF_UNIX, variant->sock_type, 0);
+ ASSERT_GE(self->sockfd, 0);
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ strncpy(addr.sun_path, self->socket_path, sizeof(addr.sun_path) - 1);
+
+ ret = bind(self->sockfd, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(xattr_socket_trusted)
+{
+ if (self->sockfd >= 0)
+ close(self->sockfd);
+ unlink(self->socket_path);
+}
+
+TEST_F(xattr_socket_trusted, set_trusted_xattr)
+{
+ char buf[256];
+ ssize_t len;
+ int ret;
+
+ ret = setxattr(self->socket_path, "trusted.testattr",
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ if (ret == -1 && errno == EPERM)
+ SKIP(return, "Need CAP_SYS_ADMIN for trusted.* xattrs");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("setxattr trusted.testattr failed: %s", strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ len = getxattr(self->socket_path, "trusted.testattr",
+ buf, sizeof(buf));
+ ASSERT_EQ(len, (ssize_t)strlen(TEST_XATTR_VALUE));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+TEST_F(xattr_socket_trusted, get_trusted_xattr_unprivileged)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = getxattr(self->socket_path, "trusted.testattr", buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_TRUE(errno == ENODATA || errno == EPERM) {
+ TH_LOG("Expected ENODATA or EPERM, got %s", strerror(errno));
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c b/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c
new file mode 100644
index 000000000000..bfabe91b2ed1
--- /dev/null
+++ b/tools/testing/selftests/filesystems/xattr/xattr_socket_types_test.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2026 Christian Brauner <brauner@kernel.org>
+/*
+ * Test user.* xattrs on various socket families.
+ *
+ * All socket types use sockfs for their inodes, so user.* xattrs should
+ * work on any socket regardless of address family. This tests AF_INET,
+ * AF_INET6, AF_NETLINK, AF_PACKET, and abstract namespace AF_UNIX sockets.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/xattr.h>
+#include <linux/netlink.h>
+#include <unistd.h>
+
+#include "../../kselftest_harness.h"
+
+#define TEST_XATTR_NAME "user.testattr"
+#define TEST_XATTR_VALUE "testvalue"
+
+FIXTURE(xattr_socket_types)
+{
+ int sockfd;
+};
+
+FIXTURE_VARIANT(xattr_socket_types)
+{
+ int family;
+ int type;
+ int protocol;
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_types, inet) {
+ .family = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = 0,
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_types, inet6) {
+ .family = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = 0,
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_types, netlink) {
+ .family = AF_NETLINK,
+ .type = SOCK_RAW,
+ .protocol = NETLINK_USERSOCK,
+};
+
+FIXTURE_VARIANT_ADD(xattr_socket_types, packet) {
+ .family = AF_PACKET,
+ .type = SOCK_DGRAM,
+ .protocol = 0,
+};
+
+FIXTURE_SETUP(xattr_socket_types)
+{
+ self->sockfd = socket(variant->family, variant->type,
+ variant->protocol);
+ if (self->sockfd < 0 &&
+ (errno == EAFNOSUPPORT || errno == EPERM || errno == EACCES))
+ SKIP(return, "socket(%d, %d, %d) not available: %s",
+ variant->family, variant->type, variant->protocol,
+ strerror(errno));
+ ASSERT_GE(self->sockfd, 0) {
+ TH_LOG("Failed to create socket(%d, %d, %d): %s",
+ variant->family, variant->type, variant->protocol,
+ strerror(errno));
+ }
+}
+
+FIXTURE_TEARDOWN(xattr_socket_types)
+{
+ if (self->sockfd >= 0)
+ close(self->sockfd);
+}
+
+TEST_F(xattr_socket_types, set_get_list_remove)
+{
+ char buf[256], list[4096], *ptr;
+ ssize_t ret;
+ bool found;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fsetxattr failed: %s", strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+
+ memset(list, 0, sizeof(list));
+ ret = flistxattr(self->sockfd, list, sizeof(list));
+ ASSERT_GT(ret, 0);
+ found = false;
+ for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) {
+ if (strcmp(ptr, TEST_XATTR_NAME) == 0)
+ found = true;
+ }
+ ASSERT_TRUE(found);
+
+ ret = fremovexattr(self->sockfd, TEST_XATTR_NAME);
+ ASSERT_EQ(ret, 0);
+
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+/*
+ * Test abstract namespace AF_UNIX socket.
+ * Abstract sockets don't have a filesystem path; their inodes live in
+ * sockfs so user.* xattrs should work via fsetxattr/fgetxattr.
+ */
+FIXTURE(xattr_abstract)
+{
+ int sockfd;
+};
+
+FIXTURE_SETUP(xattr_abstract)
+{
+ struct sockaddr_un addr;
+ char name[64];
+ int ret, len;
+
+ self->sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(self->sockfd, 0);
+
+ len = snprintf(name, sizeof(name), "xattr_test_abstract_%d", getpid());
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sun_family = AF_UNIX;
+ addr.sun_path[0] = '\0';
+ memcpy(&addr.sun_path[1], name, len);
+
+ ret = bind(self->sockfd, (struct sockaddr *)&addr,
+ offsetof(struct sockaddr_un, sun_path) + 1 + len);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(xattr_abstract)
+{
+ if (self->sockfd >= 0)
+ close(self->sockfd);
+}
+
+TEST_F(xattr_abstract, set_get)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fsetxattr on abstract socket failed: %s",
+ strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c b/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c
new file mode 100644
index 000000000000..b4824b01a86d
--- /dev/null
+++ b/tools/testing/selftests/filesystems/xattr/xattr_sockfs_test.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2026 Christian Brauner <brauner@kernel.org>
+/*
+ * Test extended attributes on sockfs sockets.
+ *
+ * Sockets created via socket() have their inodes in sockfs, which supports
+ * user.* xattrs with per-inode limits: up to 128 xattrs and 128KB total
+ * value size. These tests verify xattr operations via fsetxattr/fgetxattr/
+ * flistxattr/fremovexattr on the socket fd, as well as limit enforcement.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <unistd.h>
+
+#include "../../kselftest_harness.h"
+
+#define TEST_XATTR_NAME "user.testattr"
+#define TEST_XATTR_VALUE "testvalue"
+#define TEST_XATTR_VALUE2 "newvalue"
+
+/* Per-inode limits for user.* xattrs on sockfs (from include/linux/xattr.h) */
+#define SIMPLE_XATTR_MAX_NR 128
+#define SIMPLE_XATTR_MAX_SIZE (128 << 10) /* 128 KB */
+
+#ifndef XATTR_SIZE_MAX
+#define XATTR_SIZE_MAX 65536
+#endif
+
+/*
+ * Fixture for sockfs socket xattr tests.
+ * Creates an AF_UNIX socket (lives in sockfs, not bound to any path).
+ */
+FIXTURE(xattr_sockfs)
+{
+ int sockfd;
+};
+
+FIXTURE_SETUP(xattr_sockfs)
+{
+ self->sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(self->sockfd, 0) {
+ TH_LOG("Failed to create socket: %s", strerror(errno));
+ }
+}
+
+FIXTURE_TEARDOWN(xattr_sockfs)
+{
+ if (self->sockfd >= 0)
+ close(self->sockfd);
+}
+
+TEST_F(xattr_sockfs, set_get_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fsetxattr failed: %s", strerror(errno));
+ }
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE)) {
+ TH_LOG("fgetxattr returned %zd: %s", ret, strerror(errno));
+ }
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+}
+
+/*
+ * Test listing xattrs on a sockfs socket.
+ * Should include user.* xattrs and system.sockprotoname.
+ */
+TEST_F(xattr_sockfs, list_user_xattr)
+{
+ char list[4096];
+ ssize_t ret;
+ char *ptr;
+ bool found_user = false;
+ bool found_proto = false;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fsetxattr failed: %s", strerror(errno));
+ }
+
+ memset(list, 0, sizeof(list));
+ ret = flistxattr(self->sockfd, list, sizeof(list));
+ ASSERT_GT(ret, 0) {
+ TH_LOG("flistxattr failed: %s", strerror(errno));
+ }
+
+ for (ptr = list; ptr < list + ret; ptr += strlen(ptr) + 1) {
+ if (strcmp(ptr, TEST_XATTR_NAME) == 0)
+ found_user = true;
+ if (strcmp(ptr, "system.sockprotoname") == 0)
+ found_proto = true;
+ }
+ ASSERT_TRUE(found_user) {
+ TH_LOG("user xattr not found in list");
+ }
+ ASSERT_TRUE(found_proto) {
+ TH_LOG("system.sockprotoname not found in list");
+ }
+}
+
+TEST_F(xattr_sockfs, remove_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fremovexattr(self->sockfd, TEST_XATTR_NAME);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fremovexattr failed: %s", strerror(errno));
+ }
+
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_sockfs, update_user_xattr)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0);
+ ASSERT_EQ(ret, 0);
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE2);
+}
+
+TEST_F(xattr_sockfs, xattr_create_flag)
+{
+ int ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2),
+ XATTR_CREATE);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EEXIST);
+}
+
+TEST_F(xattr_sockfs, xattr_replace_flag)
+{
+ int ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE),
+ XATTR_REPLACE);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_sockfs, get_nonexistent)
+{
+ char buf[256];
+ ssize_t ret;
+
+ ret = fgetxattr(self->sockfd, "user.nonexistent", buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+}
+
+TEST_F(xattr_sockfs, empty_value)
+{
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME, "", 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, NULL, 0);
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(xattr_sockfs, get_size)
+{
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, NULL, 0);
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+}
+
+TEST_F(xattr_sockfs, buffer_too_small)
+{
+ char buf[2];
+ ssize_t ret;
+
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ERANGE);
+}
+
+/*
+ * Test maximum number of user.* xattrs per socket.
+ * The kernel enforces SIMPLE_XATTR_MAX_NR (128), so the 129th should
+ * fail with ENOSPC.
+ */
+TEST_F(xattr_sockfs, max_nr_xattrs)
+{
+ char name[32];
+ int i, ret;
+
+ for (i = 0; i < SIMPLE_XATTR_MAX_NR; i++) {
+ snprintf(name, sizeof(name), "user.test%03d", i);
+ ret = fsetxattr(self->sockfd, name, "v", 1, 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("fsetxattr %s failed at i=%d: %s",
+ name, i, strerror(errno));
+ }
+ }
+
+ ret = fsetxattr(self->sockfd, "user.overflow", "v", 1, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENOSPC) {
+ TH_LOG("Expected ENOSPC for xattr %d, got %s",
+ SIMPLE_XATTR_MAX_NR + 1, strerror(errno));
+ }
+}
+
+/*
+ * Test maximum total value size for user.* xattrs.
+ * The kernel enforces SIMPLE_XATTR_MAX_SIZE (128KB). Individual xattr
+ * values are limited to XATTR_SIZE_MAX (64KB) by the VFS, so we need
+ * at least two xattrs to hit the total limit.
+ */
+TEST_F(xattr_sockfs, max_xattr_size)
+{
+ char *value;
+ int ret;
+
+ value = malloc(XATTR_SIZE_MAX);
+ ASSERT_NE(value, NULL);
+ memset(value, 'A', XATTR_SIZE_MAX);
+
+ /* First 64KB xattr - total = 64KB */
+ ret = fsetxattr(self->sockfd, "user.big1", value, XATTR_SIZE_MAX, 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("first large xattr failed: %s", strerror(errno));
+ }
+
+ /* Second 64KB xattr - total = 128KB (exactly at limit) */
+ ret = fsetxattr(self->sockfd, "user.big2", value, XATTR_SIZE_MAX, 0);
+ free(value);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("second large xattr failed: %s", strerror(errno));
+ }
+
+ /* Third xattr with 1 byte - total > 128KB, should fail */
+ ret = fsetxattr(self->sockfd, "user.big3", "v", 1, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENOSPC) {
+ TH_LOG("Expected ENOSPC when exceeding size limit, got %s",
+ strerror(errno));
+ }
+}
+
+/*
+ * Test that removing an xattr frees limit space, allowing re-addition.
+ */
+TEST_F(xattr_sockfs, limit_remove_readd)
+{
+ char name[32];
+ int i, ret;
+
+ /* Fill up to the maximum count */
+ for (i = 0; i < SIMPLE_XATTR_MAX_NR; i++) {
+ snprintf(name, sizeof(name), "user.test%03d", i);
+ ret = fsetxattr(self->sockfd, name, "v", 1, 0);
+ ASSERT_EQ(ret, 0);
+ }
+
+ /* Verify we're at the limit */
+ ret = fsetxattr(self->sockfd, "user.overflow", "v", 1, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENOSPC);
+
+ /* Remove one xattr */
+ ret = fremovexattr(self->sockfd, "user.test000");
+ ASSERT_EQ(ret, 0);
+
+ /* Now we should be able to add one more */
+ ret = fsetxattr(self->sockfd, "user.newattr", "v", 1, 0);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("re-add after remove failed: %s", strerror(errno));
+ }
+}
+
+/*
+ * Test that two different sockets have independent xattr limits.
+ */
+TEST_F(xattr_sockfs, limits_per_inode)
+{
+ char buf[256];
+ int sock2;
+ ssize_t ret;
+
+ sock2 = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GE(sock2, 0);
+
+ /* Set xattr on first socket */
+ ret = fsetxattr(self->sockfd, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE, strlen(TEST_XATTR_VALUE), 0);
+ ASSERT_EQ(ret, 0);
+
+ /* First socket's xattr should not be visible on second socket */
+ ret = fgetxattr(sock2, TEST_XATTR_NAME, NULL, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, ENODATA);
+
+ /* Second socket should independently accept xattrs */
+ ret = fsetxattr(sock2, TEST_XATTR_NAME,
+ TEST_XATTR_VALUE2, strlen(TEST_XATTR_VALUE2), 0);
+ ASSERT_EQ(ret, 0);
+
+ /* Verify each socket has its own value */
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(self->sockfd, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE);
+
+ memset(buf, 0, sizeof(buf));
+ ret = fgetxattr(sock2, TEST_XATTR_NAME, buf, sizeof(buf));
+ ASSERT_EQ(ret, (ssize_t)strlen(TEST_XATTR_VALUE2));
+ ASSERT_STREQ(buf, TEST_XATTR_VALUE2);
+
+ close(sock2);
+}
+
+TEST_HARNESS_MAIN