summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 01:24:54 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 01:24:54 +0300
commitff8747aacaff8266dd751b8a8648fb728dcc3b21 (patch)
tree2eb6f46a6c2f904de59b37d9edf5c2f1d2386010
parentec5d1ae94e99d8831427d00973da5620c7fb4368 (diff)
parent9722955b54307e9070994f2382ec06af3d7405e0 (diff)
downloadlinux-ff8747aacaff8266dd751b8a8648fb728dcc3b21.tar.xz
Merge tag 'vfs-7.2-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull simple_xattr updates from Christian Brauner: "This reworks the simple xattr api to make it more efficient and easier to use for all consumers. The simple_xattr hash table moves from the inode into a per-superblock cache, removing the per-inode overhead for the common case of few or no xattrs. The interface now passes struct simple_xattrs ** so lazy allocation is handled internally instead of by every caller, kernfs xattr operations on kernfs nodes shared between multiple superblocks are properly serialized, and tmpfs constructs "security.foo" xattr names with kasprintf() instead of kmalloc() plus two memcpy()s. A follow-up fix links kernfs nodes to their parent before the LSM init hook runs: with the per-sb cache kernfs_xattr_set() computes the cache via kernfs_root(kn), which faulted on a freshly allocated node when selinux_kernfs_init_security() called into it - reproducible as a NULL pointer dereference on the first cgroup mkdir on SELinux-enabled systems. On top of this bpffs gains support for trusted.* and security.* xattrs so that user space and BPF LSM programs can attach metadata - for example a content hash or a security label - to pinned objects and directories and inspect it uniformly like on other filesystems. The store is in-memory and non-persistent, living only for the lifetime of the mount like everything else in bpffs" * tag 'vfs-7.2-rc1.xattr' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: bpf: Add simple xattr support to bpffs kernfs: link kn to its parent before the LSM init hook simpe_xattr: use per-sb cache simple_xattr: change interface to pass struct simple_xattrs ** tmpfs: simplify constructing "security.foo" xattr names kernfs: fix xattr race condition with multiple superblocks
-rw-r--r--fs/kernfs/dir.c22
-rw-r--r--fs/kernfs/file.c13
-rw-r--r--fs/kernfs/inode.c36
-rw-r--r--fs/kernfs/kernfs-internal.h24
-rw-r--r--fs/kernfs/mount.c2
-rw-r--r--fs/pidfs.c45
-rw-r--r--fs/xattr.c278
-rw-r--r--include/linux/bpf.h3
-rw-r--r--include/linux/kernfs.h11
-rw-r--r--include/linux/shmem_fs.h3
-rw-r--r--include/linux/xattr.h39
-rw-r--r--kernel/bpf/inode.c256
-rw-r--r--mm/shmem.c50
-rw-r--r--net/socket.c30
14 files changed, 526 insertions, 286 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 4f9ade82b08a..6d47b8469642 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -605,11 +605,8 @@ void kernfs_put(struct kernfs_node *kn)
if (kernfs_type(kn) == KERNFS_LINK)
kernfs_put(kn->symlink.target_kn);
- if (kn->iattr && kn->iattr->xattrs) {
- simple_xattrs_free(kn->iattr->xattrs, NULL);
- kfree(kn->iattr->xattrs);
- kn->iattr->xattrs = NULL;
- }
+ if (kn->iattr)
+ simple_xattrs_free(&root->xa_cache, &kn->iattr->xattrs, NULL);
spin_lock(&root->kernfs_idr_lock);
idr_remove(&root->ino_idr, (u32)kernfs_ino(kn));
@@ -624,6 +621,7 @@ void kernfs_put(struct kernfs_node *kn)
} else {
/* just released the root kn, free @root too */
idr_destroy(&root->ino_idr);
+ simple_xattr_cache_cleanup(&root->xa_cache);
kfree_rcu(root, rcu);
}
}
@@ -700,6 +698,9 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
}
if (parent) {
+ kernfs_get(parent);
+ rcu_assign_pointer(kn->__parent, parent);
+
ret = security_kernfs_init_security(parent, kn);
if (ret)
goto err_out4;
@@ -708,11 +709,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
return kn;
err_out4:
+ RCU_INIT_POINTER(kn->__parent, NULL);
+ kernfs_put(parent);
if (kn->iattr) {
- if (kn->iattr->xattrs) {
- simple_xattrs_free(kn->iattr->xattrs, NULL);
- kfree(kn->iattr->xattrs);
- }
+ simple_xattrs_free(&root->xa_cache, &kn->iattr->xattrs, NULL);
kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
}
err_out3:
@@ -747,10 +747,6 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
kn = __kernfs_new_node(kernfs_root(parent), parent,
name, mode, uid, gid, flags);
- if (kn) {
- kernfs_get(parent);
- rcu_assign_pointer(kn->__parent, parent);
- }
return kn;
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 1163aa769738..8e0e90c93372 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -40,22 +40,15 @@ struct kernfs_open_node {
static DEFINE_SPINLOCK(kernfs_notify_lock);
static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
+/* Compatibility wrappers - use the common hashed node lock */
static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn)
{
- int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
-
- return &kernfs_locks->open_file_mutex[idx];
+ return kernfs_node_lock_ptr(kn);
}
static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
{
- struct mutex *lock;
-
- lock = kernfs_open_file_mutex_ptr(kn);
-
- mutex_lock(lock);
-
- return lock;
+ return kernfs_node_lock(kn);
}
/**
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 38b28aa7cd02..2cb20294aaf5 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -37,6 +37,7 @@ static struct kernfs_iattrs *__kernfs_iattrs(struct kernfs_node *kn, bool alloc)
if (!ret)
return NULL;
+ INIT_LIST_HEAD_RCU(&ret->xattrs);
/* assign default attributes */
ret->ia_uid = GLOBAL_ROOT_UID;
ret->ia_gid = GLOBAL_ROOT_GID;
@@ -144,8 +145,7 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
if (!attrs)
return -ENOMEM;
- return simple_xattr_list(d_inode(dentry), READ_ONCE(attrs->xattrs),
- buf, size);
+ return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size);
}
static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
@@ -297,34 +297,35 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
void *value, size_t size)
{
struct kernfs_iattrs *attrs = kernfs_iattrs_noalloc(kn);
- struct simple_xattrs *xattrs;
+ struct simple_xattr_cache *cache = &kernfs_root(kn)->xa_cache;
if (!attrs)
return -ENODATA;
- xattrs = READ_ONCE(attrs->xattrs);
- if (!xattrs)
- return -ENODATA;
-
- return simple_xattr_get(xattrs, name, value, size);
+ return simple_xattr_get(cache, &attrs->xattrs, name, value, size);
}
int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
const void *value, size_t size, int flags)
{
struct simple_xattr *old_xattr;
- struct simple_xattrs *xattrs;
struct kernfs_iattrs *attrs;
+ struct simple_xattr_cache *cache = &kernfs_root(kn)->xa_cache;
attrs = kernfs_iattrs(kn);
if (!attrs)
return -ENOMEM;
- xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
- if (IS_ERR_OR_NULL(xattrs))
- return PTR_ERR(xattrs);
+ /*
+ * Protect xattr modifications with the hashed per-node mutex.
+ * Multiple superblocks (with different namespaces) can share the same
+ * kernfs_node, so inode locking alone is insufficient. The hashed mutex
+ * ensures serialization of concurrent xattr operations on the same node,
+ * including the lazy allocation of the xattrs structure itself.
+ */
+ CLASS(kernfs_node_lock, lock)(kn);
- old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
+ old_xattr = simple_xattr_set(cache, &attrs->xattrs, name, value, size, flags);
if (IS_ERR(old_xattr))
return PTR_ERR(old_xattr);
@@ -362,7 +363,6 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
{
const char *full_name = xattr_full_name(handler, suffix);
struct kernfs_node *kn = inode->i_private;
- struct simple_xattrs *xattrs;
struct kernfs_iattrs *attrs;
if (!(kernfs_root(kn)->flags & KERNFS_ROOT_SUPPORT_USER_XATTR))
@@ -372,11 +372,11 @@ static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
if (!attrs)
return -ENOMEM;
- xattrs = simple_xattrs_lazy_alloc(&attrs->xattrs, value, flags);
- if (IS_ERR_OR_NULL(xattrs))
- return PTR_ERR(xattrs);
+ /* See comment in kernfs_xattr_set() about locking. */
+ CLASS(kernfs_node_lock, lock)(kn);
- return simple_xattr_set_limited(xattrs, &attrs->xattr_limits,
+ return simple_xattr_set_limited(&kernfs_root(kn)->xa_cache,
+ &attrs->xattrs, &attrs->xattr_limits,
full_name, value, size, flags);
}
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 8d8912f50b05..aa784b540b36 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -26,7 +26,7 @@ struct kernfs_iattrs {
struct timespec64 ia_mtime;
struct timespec64 ia_ctime;
- struct simple_xattrs *xattrs;
+ struct list_head xattrs;
struct simple_xattr_limits xattr_limits;
};
@@ -54,6 +54,8 @@ struct kernfs_root {
rwlock_t kernfs_rename_lock;
struct rcu_head rcu;
+
+ struct simple_xattr_cache xa_cache;
};
/* +1 to avoid triggering overflow warning when negating it */
@@ -211,4 +213,24 @@ extern const struct inode_operations kernfs_symlink_iops;
* kernfs locks
*/
extern struct kernfs_global_locks *kernfs_locks;
+
+/* Hashed mutex helpers - protect per-node data structures */
+static inline struct mutex *kernfs_node_lock_ptr(struct kernfs_node *kn)
+{
+ int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
+
+ return &kernfs_locks->node_mutex[idx];
+}
+
+static inline struct mutex *kernfs_node_lock(struct kernfs_node *kn)
+{
+ struct mutex *lock = kernfs_node_lock_ptr(kn);
+
+ mutex_lock(lock);
+ return lock;
+}
+
+DEFINE_CLASS(kernfs_node_lock, struct mutex *,
+ mutex_unlock(_T), kernfs_node_lock(kn), struct kernfs_node *kn)
+
#endif /* __KERNFS_INTERNAL_H */
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 6e3217b6e481..f183a96778b9 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -446,7 +446,7 @@ static void __init kernfs_mutex_init(void)
int count;
for (count = 0; count < NR_KERNFS_LOCKS; count++)
- mutex_init(&kernfs_locks->open_file_mutex[count]);
+ mutex_init(&kernfs_locks->node_mutex[count]);
}
static void __init kernfs_lock_init(void)
diff --git a/fs/pidfs.c b/fs/pidfs.c
index b2ff950a096e..ae307b713dbd 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -37,6 +37,8 @@ static struct kmem_cache *pidfs_attr_cachep __ro_after_init;
static struct path pidfs_root_path = {};
+static struct simple_xattr_cache pidfs_xa_cache;
+
void pidfs_get_root(struct path *path)
{
*path = pidfs_root_path;
@@ -96,7 +98,7 @@ static const struct rhashtable_params pidfs_ino_ht_params = {
* use file handles.
*/
struct pidfs_attr {
- struct simple_xattrs *xattrs;
+ struct list_head xattrs;
union {
struct pidfs_anon_attr;
struct llist_node pidfs_llist;
@@ -196,12 +198,7 @@ static void pidfs_free_attr_work(struct work_struct *work)
head = llist_del_all(&pidfs_free_list);
llist_for_each_entry_safe(attr, next, head, pidfs_llist) {
- struct simple_xattrs *xattrs = attr->xattrs;
-
- if (xattrs) {
- simple_xattrs_free(xattrs, NULL);
- kfree(xattrs);
- }
+ simple_xattrs_free(&pidfs_xa_cache, &attr->xattrs, NULL);
kfree(attr);
}
}
@@ -229,7 +226,7 @@ void pidfs_free_pid(struct pid *pid)
if (IS_ERR(attr))
return;
- if (likely(!attr->xattrs))
+ if (likely(list_empty(&attr->xattrs)))
kfree(attr);
else if (llist_add(&attr->pidfs_llist, &pidfs_free_list))
schedule_work(&pidfs_free_work);
@@ -810,14 +807,8 @@ static ssize_t pidfs_listxattr(struct dentry *dentry, char *buf, size_t size)
{
struct inode *inode = d_inode(dentry);
struct pid *pid = inode->i_private;
- struct pidfs_attr *attr = pid->attr;
- struct simple_xattrs *xattrs;
-
- xattrs = READ_ONCE(attr->xattrs);
- if (!xattrs)
- return 0;
- return simple_xattr_list(inode, xattrs, buf, size);
+ return simple_xattr_list(inode, &pid->attr->xattrs, buf, size);
}
static const struct inode_operations pidfs_inode_operations = {
@@ -1013,6 +1004,8 @@ int pidfs_register_pid(struct pid *pid)
if (!new_attr)
return -ENOMEM;
+ INIT_LIST_HEAD_RCU(&new_attr->xattrs);
+
/* Synchronize with pidfs_exit(). */
guard(spinlock_irq)(&pid->wait_pidfd.lock);
@@ -1052,16 +1045,9 @@ static int pidfs_xattr_get(const struct xattr_handler *handler,
const char *suffix, void *value, size_t size)
{
struct pid *pid = inode->i_private;
- struct pidfs_attr *attr = pid->attr;
- const char *name;
- struct simple_xattrs *xattrs;
-
- xattrs = READ_ONCE(attr->xattrs);
- if (!xattrs)
- return -ENODATA;
+ const char *name = xattr_full_name(handler, suffix);
- name = xattr_full_name(handler, suffix);
- return simple_xattr_get(xattrs, name, value, size);
+ return simple_xattr_get(&pidfs_xa_cache, &pid->attr->xattrs, name, value, size);
}
static int pidfs_xattr_set(const struct xattr_handler *handler,
@@ -1070,20 +1056,13 @@ static int pidfs_xattr_set(const struct xattr_handler *handler,
const void *value, size_t size, int flags)
{
struct pid *pid = inode->i_private;
- struct pidfs_attr *attr = pid->attr;
- const char *name;
- struct simple_xattrs *xattrs;
+ const char *name = xattr_full_name(handler, suffix);
struct simple_xattr *old_xattr;
/* Ensure we're the only one to set @attr->xattrs. */
WARN_ON_ONCE(!inode_is_locked(inode));
- xattrs = simple_xattrs_lazy_alloc(&attr->xattrs, value, flags);
- if (IS_ERR_OR_NULL(xattrs))
- return PTR_ERR(xattrs);
-
- name = xattr_full_name(handler, suffix);
- old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
+ old_xattr = simple_xattr_set(&pidfs_xa_cache, &pid->attr->xattrs, name, value, size, flags);
if (IS_ERR(old_xattr))
return PTR_ERR(old_xattr);
diff --git a/fs/xattr.c b/fs/xattr.c
index efdcf2a48585..e1fc688158eb 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -28,6 +28,11 @@
#include "internal.h"
+struct sx_key {
+ const struct list_head *parent;
+ const char *name;
+};
+
static const char *
strcmp_prefix(const char *a, const char *a_prefix)
{
@@ -1269,23 +1274,32 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
return new_xattr;
}
+static u32 sx_hashfn(const char *name, const struct list_head *parent, u32 seed)
+{
+ return jhash(name, strlen(name), jhash(&parent, sizeof(parent), seed));
+}
+
static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed)
{
- const char *name = data;
- return jhash(name, strlen(name), seed);
+ const struct sx_key *key = data;
+
+ return sx_hashfn(key->name, key->parent, seed);
}
static u32 simple_xattr_obj_hashfn(const void *obj, u32 len, u32 seed)
{
const struct simple_xattr *xattr = obj;
- return jhash(xattr->name, strlen(xattr->name), seed);
+
+ return sx_hashfn(xattr->name, xattr->parent, seed);
}
static int simple_xattr_obj_cmpfn(struct rhashtable_compare_arg *arg,
const void *obj)
{
const struct simple_xattr *xattr = obj;
- return strcmp(xattr->name, arg->key);
+ const struct sx_key *key = arg->key;
+
+ return xattr->parent != key->parent || strcmp(xattr->name, key->name);
}
static const struct rhashtable_params simple_xattr_params = {
@@ -1298,6 +1312,7 @@ static const struct rhashtable_params simple_xattr_params = {
/**
* simple_xattr_get - get an xattr object
+ * @cache: anchor for the hash table
* @xattrs: the header of the xattr object
* @name: the name of the xattr to retrieve
* @buffer: the buffer to store the value into
@@ -1311,14 +1326,19 @@ static const struct rhashtable_params simple_xattr_params = {
* Return: On success the length of the xattr value is returned. On error a
* negative error code is returned.
*/
-int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
- void *buffer, size_t size)
+int simple_xattr_get(struct simple_xattr_cache *cache, struct list_head *xattrs,
+ const char *name, void *buffer, size_t size)
{
struct simple_xattr *xattr;
+ struct sx_key key = { .parent = xattrs, .name = name };
+ struct rhashtable *ht = READ_ONCE(cache->ht);
int ret = -ENODATA;
+ if (!ht)
+ return ret;
+
guard(rcu)();
- xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params);
+ xattr = rhashtable_lookup(ht, &key, simple_xattr_params);
if (xattr) {
ret = xattr->size;
if (buffer) {
@@ -1331,8 +1351,45 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
return ret;
}
+static struct rhashtable *simple_xattrs_lazy_alloc(struct simple_xattr_cache *cache,
+ const void *value, int flags)
+{
+ struct rhashtable *oldht, *ht = READ_ONCE(cache->ht);
+ int err;
+
+ if (unlikely(!ht)) {
+ if (!value)
+ return (flags & XATTR_REPLACE) ? ERR_PTR(-ENODATA) : NULL;
+
+ ht = kzalloc_obj(*ht);
+ if (!ht)
+ return ERR_PTR(-ENOMEM);
+
+ err = rhashtable_init(ht, &simple_xattr_params);
+ if (err) {
+ kfree(ht);
+ return ERR_PTR(err);
+ }
+
+ /*
+ * Provides release semantics on success, so that use of a
+ * non-NULL READ_ONCE(cache->ht) will be ordered relative to the
+ * above initialization, due to implicit address dependency.
+ */
+ oldht = cmpxchg_release(&cache->ht, NULL, ht);
+ if (oldht) {
+ /* Race lost */
+ rhashtable_destroy(ht);
+ kfree(ht);
+ ht = oldht;
+ }
+ }
+ return ht;
+}
+
/**
* simple_xattr_set - set an xattr object
+ * @cache: anchor for the hash table
* @xattrs: the header of the xattr object
* @name: the name of the xattr to retrieve
* @value: the value to store along the xattr
@@ -1362,45 +1419,58 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
* Return: On success, the removed or replaced xattr is returned, to be freed
* by the caller; or NULL if none. On failure a negative error code is returned.
*/
-struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+struct simple_xattr *simple_xattr_set(struct simple_xattr_cache *cache, struct list_head *xattrs,
const char *name, const void *value,
size_t size, int flags)
{
+ struct sx_key key = { .parent = xattrs, .name = name };
struct simple_xattr *old_xattr = NULL;
+ struct rhashtable *ht;
int err;
+ ht = simple_xattrs_lazy_alloc(cache, value, flags);
+ if (IS_ERR_OR_NULL(ht))
+ return ERR_CAST(ht);
+
CLASS(simple_xattr, new_xattr)(value, size);
if (IS_ERR(new_xattr))
return new_xattr;
if (new_xattr) {
+ new_xattr->parent = xattrs;
new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
if (!new_xattr->name)
return ERR_PTR(-ENOMEM);
}
- /* Lookup is safe without RCU here since writes are serialized. */
- old_xattr = rhashtable_lookup_fast(&xattrs->ht, name,
- simple_xattr_params);
-
+ /*
+ * Hash table lookup/replace/remove will grab RCU read lock themselves.
+ * This makes sure that hash table lookup is safe against concurrent
+ * modification on another inode.
+ */
+ old_xattr = rhashtable_lookup_fast(ht, &key, simple_xattr_params);
if (old_xattr) {
/* Fail if XATTR_CREATE is requested and the xattr exists. */
if (flags & XATTR_CREATE)
return ERR_PTR(-EEXIST);
if (new_xattr) {
- err = rhashtable_replace_fast(&xattrs->ht,
+ err = rhashtable_replace_fast(ht,
&old_xattr->hash_node,
&new_xattr->hash_node,
simple_xattr_params);
if (err)
return ERR_PTR(err);
+
+ list_replace_rcu(&old_xattr->node, &new_xattr->node);
} else {
- err = rhashtable_remove_fast(&xattrs->ht,
+ err = rhashtable_remove_fast(ht,
&old_xattr->hash_node,
simple_xattr_params);
if (err)
return ERR_PTR(err);
+
+ list_del_rcu(&old_xattr->node);
}
} else {
/* Fail if XATTR_REPLACE is requested but no xattr is found. */
@@ -1412,11 +1482,13 @@ struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
* new value simply insert it.
*/
if (new_xattr) {
- err = rhashtable_insert_fast(&xattrs->ht,
+ err = rhashtable_insert_fast(ht,
&new_xattr->hash_node,
simple_xattr_params);
if (err)
return ERR_PTR(err);
+
+ list_add_tail_rcu(&new_xattr->node, xattrs);
}
/*
@@ -1453,6 +1525,7 @@ static inline int simple_xattr_limits_inc(struct simple_xattr_limits *limits,
/**
* simple_xattr_set_limited - set an xattr with per-inode user.* limits
+ * @cache: anchor for the hash table
* @xattrs: the header of the xattr object
* @limits: per-inode limit counters for user.* xattrs
* @name: the name of the xattr to set or remove
@@ -1467,7 +1540,7 @@ static inline int simple_xattr_limits_inc(struct simple_xattr_limits *limits,
* Return: On success zero is returned. On failure a negative error code is
* returned.
*/
-int simple_xattr_set_limited(struct simple_xattrs *xattrs,
+int simple_xattr_set_limited(struct simple_xattr_cache *cache, struct list_head *xattrs,
struct simple_xattr_limits *limits,
const char *name, const void *value,
size_t size, int flags)
@@ -1481,7 +1554,7 @@ int simple_xattr_set_limited(struct simple_xattrs *xattrs,
return ret;
}
- old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
+ old_xattr = simple_xattr_set(cache, xattrs, name, value, size, flags);
if (IS_ERR(old_xattr)) {
if (value)
simple_xattr_limits_dec(limits, size);
@@ -1527,11 +1600,10 @@ static bool xattr_is_maclabel(const char *name)
* Return: On success the required size or the size of the copied xattrs is
* returned. On error a negative error code is returned.
*/
-ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
+ssize_t simple_xattr_list(struct inode *inode, struct list_head *xattrs,
char *buffer, size_t size)
{
bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
- struct rhashtable_iter iter;
struct simple_xattr *xattr;
ssize_t remaining_size = size;
int err = 0;
@@ -1555,17 +1627,8 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (!xattrs)
return size - remaining_size;
- rhashtable_walk_enter(&xattrs->ht, &iter);
- rhashtable_walk_start(&iter);
-
- while ((xattr = rhashtable_walk_next(&iter)) != NULL) {
- if (IS_ERR(xattr)) {
- if (PTR_ERR(xattr) == -EAGAIN)
- continue;
- err = PTR_ERR(xattr);
- break;
- }
-
+ rcu_read_lock();
+ list_for_each_entry_rcu(xattr, xattrs, node) {
/* skip "trusted." attributes for unprivileged callers */
if (!trusted && xattr_is_trusted(xattr->name))
continue;
@@ -1578,15 +1641,14 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
if (err)
break;
}
-
- rhashtable_walk_stop(&iter);
- rhashtable_walk_exit(&iter);
+ rcu_read_unlock();
return err ? err : size - remaining_size;
}
/**
* simple_xattr_add - add xattr objects
+ * @cache: anchor for the hash table
* @xattrs: the header of the xattr object
* @new_xattr: the xattr object to add
*
@@ -1597,112 +1659,100 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
* Return: On success zero is returned. On failure a negative error code is
* returned.
*/
-int simple_xattr_add(struct simple_xattrs *xattrs,
+int simple_xattr_add(struct simple_xattr_cache *cache, struct list_head *xattrs,
struct simple_xattr *new_xattr)
{
- return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node,
- simple_xattr_params);
+ struct rhashtable *ht;
+ int err;
+
+ ht = simple_xattrs_lazy_alloc(cache, new_xattr->value, 0);
+ if (IS_ERR(ht))
+ return PTR_ERR(ht);
+
+ new_xattr->parent = xattrs;
+ err = rhashtable_insert_fast(ht, &new_xattr->hash_node, simple_xattr_params);
+ if (err)
+ return err;
+
+ list_add_tail_rcu(&new_xattr->node, xattrs);
+ return 0;
}
/**
- * simple_xattrs_init - initialize new xattr header
- * @xattrs: header to initialize
+ * simple_xattr_add_limited - add an xattr object, charging per-inode limits
+ * @cache: anchor for the hash table
+ * @xattrs: the header of the xattr object
+ * @limits: per-inode limit counters
+ * @new_xattr: the xattr object to add
*
- * Initialize the rhashtable used to store xattr objects.
+ * Like simple_xattr_add(), but also accounts @new_xattr against @limits so
+ * that a later removal or replacement of it through simple_xattr_set_limited()
+ * decrements counters that were actually incremented, rather than underflowing
+ * them. Use this instead of simple_xattr_add() when seeding initial xattrs
+ * that share a namespace with the limited set/remove path.
*
* Return: On success zero is returned. On failure a negative error code is
* returned.
*/
-int simple_xattrs_init(struct simple_xattrs *xattrs)
-{
- return rhashtable_init(&xattrs->ht, &simple_xattr_params);
-}
-
-/**
- * simple_xattrs_alloc - allocate and initialize a new xattr header
- *
- * Dynamically allocate a simple_xattrs header and initialize the
- * underlying rhashtable. This is intended for consumers that want
- * to lazily allocate xattr storage only when the first xattr is set,
- * avoiding the per-inode rhashtable overhead when no xattrs are used.
- *
- * Return: On success a new simple_xattrs is returned. On failure an
- * ERR_PTR is returned.
- */
-struct simple_xattrs *simple_xattrs_alloc(void)
+int simple_xattr_add_limited(struct simple_xattr_cache *cache,
+ struct list_head *xattrs,
+ struct simple_xattr_limits *limits,
+ struct simple_xattr *new_xattr)
{
- struct simple_xattrs *xattrs __free(kfree) = NULL;
- int ret;
-
- xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL);
- if (!xattrs)
- return ERR_PTR(-ENOMEM);
+ int err;
- ret = simple_xattrs_init(xattrs);
- if (ret)
- return ERR_PTR(ret);
+ err = simple_xattr_limits_inc(limits, new_xattr->size);
+ if (err)
+ return err;
- return no_free_ptr(xattrs);
+ err = simple_xattr_add(cache, xattrs, new_xattr);
+ if (err)
+ simple_xattr_limits_dec(limits, new_xattr->size);
+ return err;
}
/**
- * simple_xattrs_lazy_alloc - get or allocate xattrs for a set operation
- * @xattrsp: pointer to the xattrs pointer (may point to NULL)
- * @value: value being set (NULL means remove)
- * @flags: xattr set flags
- *
- * For lazily-allocated xattrs on the write path. If no xattrs exist yet
- * and this is a remove operation, returns the appropriate result without
- * allocating. Otherwise ensures xattrs is allocated and published with
- * store-release semantics.
+ * simple_xattrs_free - free xattrs
+ * @cache: anchor for the hash table
+ * @xattrs: xattr header whose xattrs to destroy
+ * @freed_space: approximate number of bytes of memory freed from @xattrs
*
- * Return: On success a valid pointer to the xattrs is returned. On
- * failure or early-exit an ERR_PTR or NULL is returned. Callers should
- * check with IS_ERR_OR_NULL() and propagate with PTR_ERR() which
- * correctly returns 0 for the NULL no-op case.
+ * Destroy all xattrs in @xattrs. When this is called no one can hold a
+ * reference to any of the xattrs anymore.
*/
-struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
- const void *value, int flags)
+void simple_xattrs_free(struct simple_xattr_cache *cache, struct list_head *xattrs,
+ size_t *freed_space)
{
- struct simple_xattrs *xattrs;
-
- xattrs = READ_ONCE(*xattrsp);
- if (xattrs)
- return xattrs;
-
- if (!value)
- return (flags & XATTR_REPLACE) ? ERR_PTR(-ENODATA) : NULL;
-
- xattrs = simple_xattrs_alloc();
- if (!IS_ERR(xattrs))
- smp_store_release(xattrsp, xattrs);
- return xattrs;
-}
+ if (freed_space)
+ *freed_space = 0;
-static void simple_xattr_ht_free(void *ptr, void *arg)
-{
- struct simple_xattr *xattr = ptr;
- size_t *freed_space = arg;
+ while (!list_empty(xattrs)) {
+ struct simple_xattr *xattr = list_first_entry(xattrs, typeof(*xattr), node);
- if (freed_space)
- *freed_space += simple_xattr_space(xattr->name, xattr->size);
- simple_xattr_free(xattr);
+ rhashtable_remove_fast(cache->ht, &xattr->hash_node, simple_xattr_params);
+ list_del(&xattr->node);
+ if (freed_space)
+ *freed_space += simple_xattr_space(xattr->name, xattr->size);
+ /*
+ * Free with RCU, since the xattr might still get accessed by
+ * the hash compare function
+ */
+ simple_xattr_free_rcu(xattr);
+ }
}
/**
- * simple_xattrs_free - free xattrs
- * @xattrs: xattr header whose xattrs to destroy
- * @freed_space: approximate number of bytes of memory freed from @xattrs
+ * simple_xattr_cache_cleanup - free the cache
+ * @cache: anchor for the hash table
*
- * Destroy all xattrs in @xattr. When this is called no one can hold a
- * reference to any of the xattrs anymore.
+ * Destroy the cache table, which was lazily allocated on adding the first xattr.
*/
-void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
+void simple_xattr_cache_cleanup(struct simple_xattr_cache *cache)
{
- might_sleep();
-
- if (freed_space)
- *freed_space = 0;
- rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free,
- freed_space);
+ if (cache->ht) {
+ WARN_ON(atomic_read(&cache->ht->nelems));
+ rhashtable_destroy(cache->ht);
+ kfree(cache->ht);
+ cache->ht = NULL;
+ }
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd191c5fdb0a..64efc3fdb716 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,6 +31,7 @@
#include <linux/static_call.h>
#include <linux/memcontrol.h>
#include <linux/cfi.h>
+#include <linux/xattr.h>
#include <asm/rqspinlock.h>
struct bpf_verifier_env;
@@ -1918,6 +1919,8 @@ struct bpf_mount_opts {
u64 delegate_maps;
u64 delegate_progs;
u64 delegate_attachs;
+
+ struct simple_xattr_cache xa_cache;
};
struct bpf_token {
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index e21b2f7f4159..351a5101c862 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -76,20 +76,25 @@ struct kernfs_iattrs;
* kernfs_open_file.
*
* kernfs_open_files are chained at kernfs_open_node->files, which is
- * protected by kernfs_global_locks.open_file_mutex[i].
+ * protected by kernfs_global_locks.node_mutex[i].
*
* To reduce possible contention in sysfs access, arising due to single
- * locks, use an array of locks (e.g. open_file_mutex) and use kernfs_node
+ * locks, use an array of locks (e.g. node_mutex) and use kernfs_node
* object address as hash keys to get the index of these locks.
*
* Hashed mutexes are safe to use here because operations using these don't
* rely on global exclusion.
*
+ * The hashed mutex array protects per-node data: the kernfs_open_node for
+ * open file management, and kernfs_node xattr operations (necessary because
+ * multiple superblocks with different namespaces can share the same
+ * kernfs_node, making per-inode locking insufficient).
+ *
* In future we intend to replace other global locks with hashed ones as well.
* kernfs_global_locks acts as a holder for all such hash tables.
*/
struct kernfs_global_locks {
- struct mutex open_file_mutex[NR_KERNFS_LOCKS];
+ struct mutex node_mutex[NR_KERNFS_LOCKS];
};
enum kernfs_node_type {
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 93a0ba872ebe..69b0177da156 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -48,7 +48,7 @@ struct shmem_inode_info {
};
struct timespec64 i_crtime; /* file creation time */
struct shared_policy policy; /* NUMA memory alloc policy */
- struct simple_xattrs *xattrs; /* list of xattrs */
+ struct list_head xattrs; /* list of xattrs */
pgoff_t fallocend; /* highest fallocate endindex */
unsigned int fsflags; /* for FS_IOC_[SG]ETFLAGS */
atomic_t stop_eviction; /* hold when working on inode */
@@ -89,6 +89,7 @@ struct shmem_sb_info {
struct list_head shrinklist; /* List of shinkable inodes */
unsigned long shrinklist_len; /* Length of shrinklist */
struct shmem_quota_limits qlimits; /* Default quota limits */
+ struct simple_xattr_cache xa_cache;
};
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index 8b6601367eae..54ac3cbc133f 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -106,12 +106,14 @@ static inline const char *xattr_prefix(const struct xattr_handler *handler)
return handler->prefix ?: handler->name;
}
-struct simple_xattrs {
- struct rhashtable ht;
+struct simple_xattr_cache {
+ struct rhashtable *ht;
};
struct simple_xattr {
struct rhash_head hash_node;
+ struct list_head *parent;
+ struct list_head node;
struct rcu_head rcu;
char *name;
size_t size;
@@ -132,40 +134,39 @@ static inline void simple_xattr_limits_init(struct simple_xattr_limits *limits)
atomic_set(&limits->xattr_size, 0);
}
-int simple_xattrs_init(struct simple_xattrs *xattrs);
-struct simple_xattrs *simple_xattrs_alloc(void);
-struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp,
- const void *value, int flags);
-void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
+void simple_xattrs_free(struct simple_xattr_cache *cache, struct list_head *xattrs,
+ size_t *freed_space);
size_t simple_xattr_space(const char *name, size_t size);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
void simple_xattr_free(struct simple_xattr *xattr);
void simple_xattr_free_rcu(struct simple_xattr *xattr);
-int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
- void *buffer, size_t size);
-struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+int simple_xattr_get(struct simple_xattr_cache *cache, struct list_head *xattrs,
+ const char *name, void *buffer, size_t size);
+struct simple_xattr *simple_xattr_set(struct simple_xattr_cache *cache,
+ struct list_head *xattrs,
const char *name, const void *value,
size_t size, int flags);
-int simple_xattr_set_limited(struct simple_xattrs *xattrs,
+int simple_xattr_set_limited(struct simple_xattr_cache *cache,
+ struct list_head *xattrs,
struct simple_xattr_limits *limits,
const char *name, const void *value,
size_t size, int flags);
-ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
+ssize_t simple_xattr_list(struct inode *inode, struct list_head *xattrs,
char *buffer, size_t size);
-int simple_xattr_add(struct simple_xattrs *xattrs,
+int simple_xattr_add(struct simple_xattr_cache *cache, struct list_head *xattrs,
struct simple_xattr *new_xattr);
+int simple_xattr_add_limited(struct simple_xattr_cache *cache,
+ struct list_head *xattrs,
+ struct simple_xattr_limits *limits,
+ struct simple_xattr *new_xattr);
int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name);
+void simple_xattr_cache_cleanup(struct simple_xattr_cache *cache);
+
DEFINE_CLASS(simple_xattr,
struct simple_xattr *,
if (!IS_ERR_OR_NULL(_T)) simple_xattr_free(_T),
simple_xattr_alloc(value, size),
const void *value, size_t size)
-DEFINE_CLASS(simple_xattrs,
- struct simple_xattrs *,
- if (!IS_ERR_OR_NULL(_T)) { simple_xattrs_free(_T, NULL); kfree(_T); },
- simple_xattrs_alloc(),
- void)
-
#endif /* _LINUX_XATTR_H */
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 25c06a011825..c3f79b5a2f8c 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -21,6 +21,9 @@
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/kstrtox.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+
#include "preload/bpf_preload.h"
enum bpf_type {
@@ -30,6 +33,23 @@ enum bpf_type {
BPF_TYPE_LINK,
};
+struct bpf_fs_inode {
+ struct list_head xattrs;
+ struct simple_xattr_limits xlimits;
+ struct inode vfs_inode;
+};
+
+static inline struct bpf_fs_inode *BPF_FS_I(struct inode *inode)
+{
+ return container_of(inode, struct bpf_fs_inode, vfs_inode);
+}
+
+static struct kmem_cache *bpf_fs_inode_cachep __ro_after_init;
+
+static int bpf_fs_initxattrs(struct inode *inode,
+ const struct xattr *xattr_array, void *fs_info);
+static ssize_t bpf_fs_listxattr(struct dentry *dentry, char *buf, size_t size);
+
static void *bpf_any_get(void *raw, enum bpf_type type)
{
switch (type) {
@@ -94,10 +114,17 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
}
static const struct inode_operations bpf_dir_iops;
+static const struct inode_operations bpf_symlink_iops;
-static const struct inode_operations bpf_prog_iops = { };
-static const struct inode_operations bpf_map_iops = { };
-static const struct inode_operations bpf_link_iops = { };
+static const struct inode_operations bpf_prog_iops = {
+ .listxattr = bpf_fs_listxattr,
+};
+static const struct inode_operations bpf_map_iops = {
+ .listxattr = bpf_fs_listxattr,
+};
+static const struct inode_operations bpf_link_iops = {
+ .listxattr = bpf_fs_listxattr,
+};
struct inode *bpf_get_inode(struct super_block *sb,
const struct inode *dir,
@@ -153,11 +180,19 @@ static struct dentry *bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
+ int ret;
inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
if (IS_ERR(inode))
return ERR_CAST(inode);
+ ret = security_inode_init_security(inode, dir, &dentry->d_name,
+ bpf_fs_initxattrs, NULL);
+ if (ret && ret != -EOPNOTSUPP) {
+ iput(inode);
+ return ERR_PTR(ret);
+ }
+
inode->i_op = &bpf_dir_iops;
inode->i_fop = &simple_dir_operations;
@@ -330,10 +365,20 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
const struct file_operations *fops)
{
struct inode *dir = dentry->d_parent->d_inode;
- struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
+ struct inode *inode;
+ int ret;
+
+ inode = bpf_get_inode(dir->i_sb, dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
+ ret = security_inode_init_security(inode, dir, &dentry->d_name,
+ bpf_fs_initxattrs, NULL);
+ if (ret && ret != -EOPNOTSUPP) {
+ iput(inode);
+ return ret;
+ }
+
inode->i_op = iops;
inode->i_fop = fops;
inode->i_private = raw;
@@ -382,9 +427,11 @@ bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *target)
{
- char *link = kstrdup(target, GFP_USER | __GFP_NOWARN);
struct inode *inode;
+ char *link;
+ int ret;
+ link = kstrdup(target, GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (!link)
return -ENOMEM;
@@ -394,13 +441,25 @@ static int bpf_symlink(struct mnt_idmap *idmap, struct inode *dir,
return PTR_ERR(inode);
}
- inode->i_op = &simple_symlink_inode_operations;
+ inode->i_op = &bpf_symlink_iops;
inode->i_link = link;
+ ret = security_inode_init_security(inode, dir, &dentry->d_name,
+ bpf_fs_initxattrs, NULL);
+ if (ret && ret != -EOPNOTSUPP) {
+ iput(inode);
+ return ret;
+ }
+
bpf_dentry_finalize(dentry, inode, dir);
return 0;
}
+static const struct inode_operations bpf_symlink_iops = {
+ .get_link = simple_get_link,
+ .listxattr = bpf_fs_listxattr,
+};
+
static const struct inode_operations bpf_dir_iops = {
.lookup = bpf_lookup,
.mkdir = bpf_mkdir,
@@ -409,6 +468,7 @@ static const struct inode_operations bpf_dir_iops = {
.rename = simple_rename,
.link = simple_link,
.unlink = simple_unlink,
+ .listxattr = bpf_fs_listxattr,
};
/* pin iterator link into bpffs */
@@ -762,22 +822,147 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
+static struct inode *bpf_fs_alloc_inode(struct super_block *sb)
+{
+ struct bpf_fs_inode *bi;
+
+ bi = alloc_inode_sb(sb, bpf_fs_inode_cachep, GFP_KERNEL);
+ if (!bi)
+ return NULL;
+ INIT_LIST_HEAD_RCU(&bi->xattrs);
+ simple_xattr_limits_init(&bi->xlimits);
+ return &bi->vfs_inode;
+}
+
static void bpf_destroy_inode(struct inode *inode)
{
+ struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+ struct bpf_fs_inode *bi = BPF_FS_I(inode);
enum bpf_type type;
- if (S_ISLNK(inode->i_mode))
- kfree(inode->i_link);
if (!bpf_inode_type(inode, &type))
bpf_any_put(inode->i_private, type);
- free_inode_nonrcu(inode);
+ simple_xattrs_free(&opts->xa_cache, &bi->xattrs, NULL);
+}
+
+static void bpf_free_inode(struct inode *inode)
+{
+ if (S_ISLNK(inode->i_mode))
+ kfree(inode->i_link);
+ kmem_cache_free(bpf_fs_inode_cachep, BPF_FS_I(inode));
+}
+
+static int bpf_fs_xattr_get(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *name, void *value, size_t size)
+{
+ struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+ struct bpf_fs_inode *bi = BPF_FS_I(inode);
+
+ name = xattr_full_name(handler, name);
+ return simple_xattr_get(&opts->xa_cache, &bi->xattrs, name, value, size);
+}
+
+enum {
+ BPF_FS_XATTR_UNSPEC,
+ BPF_FS_XATTR_SECURITY,
+ BPF_FS_XATTR_TRUSTED,
+};
+
+static int bpf_fs_xattr_set(const struct xattr_handler *handler,
+ struct mnt_idmap *idmap, struct dentry *unused,
+ struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+ struct bpf_fs_inode *bi = BPF_FS_I(inode);
+ struct simple_xattr *old;
+ int err = -EINVAL;
+
+ name = xattr_full_name(handler, name);
+ switch (handler->flags) {
+ case BPF_FS_XATTR_SECURITY:
+ err = simple_xattr_set_limited(&opts->xa_cache, &bi->xattrs,
+ &bi->xlimits, name, value, size,
+ flags);
+ break;
+ case BPF_FS_XATTR_TRUSTED:
+ old = simple_xattr_set(&opts->xa_cache, &bi->xattrs, name,
+ value, size, flags);
+ err = IS_ERR(old) ? PTR_ERR(old) : 0;
+ if (!err)
+ simple_xattr_free_rcu(old);
+ break;
+ }
+ if (err)
+ return err;
+ inode_set_ctime_current(inode);
+ return 0;
+}
+
+static const struct xattr_handler bpf_fs_trusted_xattr_handler = {
+ .prefix = XATTR_TRUSTED_PREFIX,
+ .flags = BPF_FS_XATTR_TRUSTED,
+ .get = bpf_fs_xattr_get,
+ .set = bpf_fs_xattr_set,
+};
+
+static const struct xattr_handler bpf_fs_security_xattr_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .flags = BPF_FS_XATTR_SECURITY,
+ .get = bpf_fs_xattr_get,
+ .set = bpf_fs_xattr_set,
+};
+
+static const struct xattr_handler * const bpf_fs_xattr_handlers[] = {
+ &bpf_fs_trusted_xattr_handler,
+ &bpf_fs_security_xattr_handler,
+ NULL,
+};
+
+static ssize_t bpf_fs_listxattr(struct dentry *dentry, char *buf, size_t size)
+{
+ struct inode *inode = d_inode(dentry);
+
+ return simple_xattr_list(inode, &BPF_FS_I(inode)->xattrs, buf, size);
+}
+
+static int bpf_fs_initxattrs(struct inode *inode,
+ const struct xattr *xattr_array, void *fs_info)
+{
+ struct bpf_mount_opts *opts = inode->i_sb->s_fs_info;
+ struct bpf_fs_inode *bi = BPF_FS_I(inode);
+ const struct xattr *xattr;
+ int err;
+
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ CLASS(simple_xattr, new_xattr)(xattr->value, xattr->value_len);
+ if (IS_ERR(new_xattr))
+ return PTR_ERR(new_xattr);
+
+ new_xattr->name = kasprintf(GFP_KERNEL_ACCOUNT,
+ XATTR_SECURITY_PREFIX "%s",
+ xattr->name);
+ if (!new_xattr->name)
+ return -ENOMEM;
+
+ err = simple_xattr_add_limited(&opts->xa_cache, &bi->xattrs,
+ &bi->xlimits, new_xattr);
+ if (err)
+ return err;
+
+ retain_and_null_ptr(new_xattr);
+ }
+ return 0;
}
const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = inode_just_drop,
.show_options = bpf_show_options,
+ .alloc_inode = bpf_fs_alloc_inode,
.destroy_inode = bpf_destroy_inode,
+ .free_inode = bpf_free_inode,
};
enum {
@@ -996,25 +1181,38 @@ out:
static int bpf_fill_super(struct super_block *sb, struct fs_context *fc)
{
- static const struct tree_descr bpf_rfiles[] = { { "" } };
struct bpf_mount_opts *opts = sb->s_fs_info;
struct inode *inode;
- int ret;
/* Mounting an instance of BPF FS requires privileges */
if (fc->user_ns != &init_user_ns && !capable(CAP_SYS_ADMIN))
return -EPERM;
- ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
- if (ret)
- return ret;
-
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_magic = BPF_FS_MAGIC;
sb->s_op = &bpf_super_ops;
+ sb->s_xattr = bpf_fs_xattr_handlers;
+ sb->s_iflags |= SB_I_NOEXEC;
+ sb->s_iflags |= SB_I_NODEV;
+ sb->s_time_gran = 1;
+
+ inode = bpf_get_inode(sb, NULL, S_IFDIR | 0777);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ inode->i_ino = 1;
+ inode->i_op = &bpf_dir_iops;
+ inode->i_fop = &simple_dir_operations;
+ set_nlink(inode, 2);
+
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root)
+ return -ENOMEM;
- inode = sb->s_root->d_inode;
+ inode = d_inode(sb->s_root);
inode->i_uid = opts->uid;
inode->i_gid = opts->gid;
- inode->i_op = &bpf_dir_iops;
inode->i_mode &= ~S_IALLUGO;
populate_bpffs(sb->s_root);
inode->i_mode |= S_ISVTX | opts->mode;
@@ -1068,6 +1266,7 @@ static void bpf_kill_super(struct super_block *sb)
struct bpf_mount_opts *opts = sb->s_fs_info;
kill_anon_super(sb);
+ simple_xattr_cache_cleanup(&opts->xa_cache);
kfree(opts);
}
@@ -1080,18 +1279,37 @@ static struct file_system_type bpf_fs_type = {
.fs_flags = FS_USERNS_MOUNT,
};
+static void bpf_fs_inode_init_once(void *foo)
+{
+ struct bpf_fs_inode *bi = foo;
+
+ inode_init_once(&bi->vfs_inode);
+}
+
static int __init bpf_init(void)
{
int ret;
+ bpf_fs_inode_cachep = kmem_cache_create("bpf_fs_inode_cache",
+ sizeof(struct bpf_fs_inode),
+ 0, SLAB_ACCOUNT,
+ bpf_fs_inode_init_once);
+ if (!bpf_fs_inode_cachep)
+ return -ENOMEM;
+
ret = sysfs_create_mount_point(fs_kobj, "bpf");
if (ret)
- return ret;
+ goto out_cache;
ret = register_filesystem(&bpf_fs_type);
- if (ret)
+ if (ret) {
sysfs_remove_mount_point(fs_kobj, "bpf");
+ goto out_cache;
+ }
+ return 0;
+out_cache:
+ kmem_cache_destroy(bpf_fs_inode_cachep);
return ret;
}
fs_initcall(bpf_init);
diff --git a/mm/shmem.c b/mm/shmem.c
index 3b5dc21b323c..7b1ea9fb598f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1425,10 +1425,8 @@ static void shmem_evict_inode(struct inode *inode)
}
}
- if (info->xattrs) {
- simple_xattrs_free(info->xattrs, sbinfo->max_inodes ? &freed : NULL);
- kfree(info->xattrs);
- }
+ simple_xattrs_free(&sbinfo->xa_cache, &info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+
shmem_free_inode(inode->i_sb, freed);
WARN_ON(inode->i_blocks);
clear_inode(inode);
@@ -3086,6 +3084,7 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
inode->i_generation = get_random_u32();
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
+ INIT_LIST_HEAD_RCU(&info->xattrs);
spin_lock_init(&info->lock);
atomic_set(&info->stop_eviction, 0);
info->seals = F_SEAL_SEAL;
@@ -4232,11 +4231,6 @@ static int shmem_initxattrs(struct inode *inode,
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
const struct xattr *xattr;
size_t ispace = 0;
- size_t len;
-
- CLASS(simple_xattrs, xattrs)();
- if (IS_ERR(xattrs))
- return PTR_ERR(xattrs);
if (sbinfo->max_inodes) {
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
@@ -4260,19 +4254,16 @@ static int shmem_initxattrs(struct inode *inode,
if (IS_ERR(new_xattr))
break;
- len = strlen(xattr->name) + 1;
- new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
- GFP_KERNEL_ACCOUNT);
+ new_xattr->name = kasprintf(GFP_KERNEL_ACCOUNT,
+ XATTR_SECURITY_PREFIX "%s", xattr->name);
if (!new_xattr->name)
break;
- memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
- XATTR_SECURITY_PREFIX_LEN);
- memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN,
- xattr->name, len);
-
- if (simple_xattr_add(xattrs, new_xattr))
+ if (simple_xattr_add(&sbinfo->xa_cache, &info->xattrs, new_xattr))
break;
+
+ if (sbinfo->max_inodes)
+ ispace -= simple_xattr_space(new_xattr->name, new_xattr->size);
retain_and_null_ptr(new_xattr);
}
@@ -4284,8 +4275,8 @@ static int shmem_initxattrs(struct inode *inode,
}
return -ENOMEM;
}
+ WARN_ON(ispace);
- smp_store_release(&info->xattrs, no_free_ptr(xattrs));
return 0;
}
@@ -4293,15 +4284,11 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, void *buffer, size_t size)
{
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
struct shmem_inode_info *info = SHMEM_I(inode);
- struct simple_xattrs *xattrs;
-
- xattrs = READ_ONCE(info->xattrs);
- if (!xattrs)
- return -ENODATA;
name = xattr_full_name(handler, name);
- return simple_xattr_get(xattrs, name, buffer, size);
+ return simple_xattr_get(&sbinfo->xa_cache, &info->xattrs, name, buffer, size);
}
static int shmem_xattr_handler_set(const struct xattr_handler *handler,
@@ -4312,16 +4299,11 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
- struct simple_xattrs *xattrs;
struct simple_xattr *old_xattr;
size_t ispace = 0;
name = xattr_full_name(handler, name);
- xattrs = simple_xattrs_lazy_alloc(&info->xattrs, value, flags);
- if (IS_ERR_OR_NULL(xattrs))
- return PTR_ERR(xattrs);
-
if (value && sbinfo->max_inodes) {
ispace = simple_xattr_space(name, size);
raw_spin_lock(&sbinfo->stat_lock);
@@ -4334,7 +4316,7 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
return -ENOSPC;
}
- old_xattr = simple_xattr_set(xattrs, name, value, size, flags);
+ old_xattr = simple_xattr_set(&sbinfo->xa_cache, &info->xattrs, name, value, size, flags);
if (!IS_ERR(old_xattr)) {
ispace = 0;
if (old_xattr && sbinfo->max_inodes)
@@ -4382,8 +4364,7 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
- return simple_xattr_list(d_inode(dentry), READ_ONCE(info->xattrs),
- buffer, size);
+ return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size);
}
#endif /* CONFIG_TMPFS_XATTR */
@@ -4984,6 +4965,9 @@ static void shmem_put_super(struct super_block *sb)
free_percpu(sbinfo->ino_batch);
percpu_counter_destroy(&sbinfo->used_blocks);
mpol_put(sbinfo->mpol);
+#ifdef CONFIG_TMPFS_XATTR
+ simple_xattr_cache_cleanup(&sbinfo->xa_cache);
+#endif
kfree(sbinfo);
sb->s_fs_info = NULL;
}
diff --git a/net/socket.c b/net/socket.c
index c2698a1441a7..f51bdcbaa43f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -310,8 +310,10 @@ efault_end:
static struct kmem_cache *sock_inode_cachep __ro_after_init;
+static struct simple_xattr_cache sockfs_xa_cache;
+
struct sockfs_inode {
- struct simple_xattrs *xattrs;
+ struct list_head xattrs;
struct simple_xattr_limits xattr_limits;
struct socket_alloc;
};
@@ -328,7 +330,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
si = alloc_inode_sb(sb, sock_inode_cachep, GFP_KERNEL);
if (!si)
return NULL;
- si->xattrs = NULL;
+ INIT_LIST_HEAD_RCU(&si->xattrs);
simple_xattr_limits_init(&si->xattr_limits);
init_waitqueue_head(&si->socket.wq.wait);
@@ -347,12 +349,8 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
static void sock_evict_inode(struct inode *inode)
{
struct sockfs_inode *si = SOCKFS_I(inode);
- struct simple_xattrs *xattrs = si->xattrs;
- if (xattrs) {
- simple_xattrs_free(xattrs, NULL);
- kfree(xattrs);
- }
+ simple_xattrs_free(&sockfs_xa_cache, &si->xattrs, NULL);
clear_inode(inode);
}
@@ -443,13 +441,9 @@ static int sockfs_user_xattr_get(const struct xattr_handler *handler,
const char *suffix, void *value, size_t size)
{
const char *name = xattr_full_name(handler, suffix);
- struct simple_xattrs *xattrs;
-
- xattrs = READ_ONCE(SOCKFS_I(inode)->xattrs);
- if (!xattrs)
- return -ENODATA;
+ struct sockfs_inode *si = SOCKFS_I(inode);
- return simple_xattr_get(xattrs, name, value, size);
+ return simple_xattr_get(&sockfs_xa_cache, &si->xattrs, name, value, size);
}
static int sockfs_user_xattr_set(const struct xattr_handler *handler,
@@ -460,13 +454,8 @@ static int sockfs_user_xattr_set(const struct xattr_handler *handler,
{
const char *name = xattr_full_name(handler, suffix);
struct sockfs_inode *si = SOCKFS_I(inode);
- struct simple_xattrs *xattrs;
-
- xattrs = simple_xattrs_lazy_alloc(&si->xattrs, value, flags);
- if (IS_ERR_OR_NULL(xattrs))
- return PTR_ERR(xattrs);
- return simple_xattr_set_limited(xattrs, &si->xattr_limits,
+ return simple_xattr_set_limited(&sockfs_xa_cache, &si->xattrs, &si->xattr_limits,
name, value, size, flags);
}
@@ -635,8 +624,7 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
struct sockfs_inode *si = SOCKFS_I(d_inode(dentry));
ssize_t len, used;
- len = simple_xattr_list(d_inode(dentry), READ_ONCE(si->xattrs),
- buffer, size);
+ len = simple_xattr_list(d_inode(dentry), &si->xattrs, buffer, size);
if (len < 0)
return len;