From 854e8df2ce6b02c8be40d6f26bd8aa700b375bb2 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 23 Oct 2025 10:21:42 +0200 Subject: fs/pipe: stop duplicating union pipe_index declaration Now that we build with -fms-extensions, union pipe_index can be included as an anonymous member in struct pipe_inode_info, avoiding the duplication. Signed-off-by: Rasmus Villemoes Link: https://patch.msgid.link/20251023082142.2104456-1-linux@rasmusvillemoes.dk Signed-off-by: Nathan Chancellor Signed-off-by: Christian Brauner --- include/linux/pipe_fs_i.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 9d42d473d201..7f6a92ac9704 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -44,11 +44,11 @@ typedef unsigned int pipe_index_t; typedef unsigned short pipe_index_t; #endif -/* - * We have to declare this outside 'struct pipe_inode_info', - * but then we can't use 'union pipe_index' for an anonymous - * union, so we end up having to duplicate this declaration - * below. Annoying. +/** + * struct pipe_index - pipe indeces + * @head: The point of buffer production + * @tail: The point of buffer consumption + * @head_tail: unsigned long union of @head and @tail */ union pipe_index { unsigned long head_tail; @@ -63,9 +63,7 @@ union pipe_index { * @mutex: mutex protecting the whole thing * @rd_wait: reader wait point in case of empty pipe * @wr_wait: writer wait point in case of full pipe - * @head: The point of buffer production - * @tail: The point of buffer consumption - * @head_tail: unsigned long union of @head and @tail + * @pipe_index: the pipe indeces * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) @@ -87,14 +85,7 @@ struct pipe_inode_info { struct mutex mutex; wait_queue_head_t rd_wait, wr_wait; - /* This has to match the 'union pipe_index' above */ - union { - unsigned long head_tail; - struct { - pipe_index_t head; - pipe_index_t tail; - }; - }; + union pipe_index; unsigned int max_usage; unsigned int ring_size; -- cgit v1.2.3 From e631df89cd5d638a9d7c152dd9b0a92643efab3e Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Fri, 7 Nov 2025 15:21:47 +0100 Subject: fs: speed up path lookup with cheaper handling of MAY_EXEC The generic inode_permission() routine does work which is known to be of no significance for lookup. There are checks for MAY_WRITE, while the requested permission is MAY_EXEC. Additionally devcgroup_inode_permission() is called to check for devices, but it is an invariant the inode is a directory. Absent a ->permission func, execution lands in generic_permission() which checks upfront if the requested permission is granted for everyone. We can elide the branches which are guaranteed to be false and cut straight to the check if everyone happens to be allowed MAY_EXEC on the inode (which holds true most of the time). Moreover, filesystems which provide their own ->permission routine can take advantage of the optimization by setting the IOP_FASTPERM_MAY_EXEC flag on their inodes, which they can legitimately do if their MAY_EXEC handling matches generic_permission(). As a simple benchmark, as part of compilation gcc issues access(2) on numerous long paths, for example /usr/lib/gcc/x86_64-linux-gnu/12/crtendS.o Issuing access(2) on it in a loop on ext4 on Sapphire Rapids (ops/s): before: 3797556 after: 3987789 (+5%) Note: this depends on the not-yet-landed ext4 patch to mark inodes with cache_no_acl() Signed-off-by: Mateusz Guzik Link: https://patch.msgid.link/20251107142149.989998-2-mjguzik@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/namei.c | 43 +++++++++++++++++++++++++++++++++++++++++-- include/linux/fs.h | 13 +++++++------ 2 files changed, 48 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 1d4d17f24fb2..94cb52b01022 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -540,6 +540,9 @@ static inline int do_inode_permission(struct mnt_idmap *idmap, * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks. + * + * Note: lookup_inode_permission_may_exec() does not call here. If you add + * MAY_EXEC checks, adjust it. */ static int sb_permission(struct super_block *sb, struct inode *inode, int mask) { @@ -602,6 +605,42 @@ int inode_permission(struct mnt_idmap *idmap, } EXPORT_SYMBOL(inode_permission); +/* + * lookup_inode_permission_may_exec - Check traversal right for given inode + * + * This is a special case routine for may_lookup() making assumptions specific + * to path traversal. Use inode_permission() if you are doing something else. + * + * Work is shaved off compared to inode_permission() as follows: + * - we know for a fact there is no MAY_WRITE to worry about + * - it is an invariant the inode is a directory + * + * Since majority of real-world traversal happens on inodes which grant it for + * everyone, we check it upfront and only resort to more expensive work if it + * fails. + * + * Filesystems which have their own ->permission hook and consequently miss out + * on IOP_FASTPERM can still get the optimization if they set IOP_FASTPERM_MAY_EXEC + * on their directory inodes. + */ +static __always_inline int lookup_inode_permission_may_exec(struct mnt_idmap *idmap, + struct inode *inode, int mask) +{ + /* Lookup already checked this to return -ENOTDIR */ + VFS_BUG_ON_INODE(!S_ISDIR(inode->i_mode), inode); + VFS_BUG_ON((mask & ~MAY_NOT_BLOCK) != 0); + + mask |= MAY_EXEC; + + if (unlikely(!(inode->i_opflags & (IOP_FASTPERM | IOP_FASTPERM_MAY_EXEC)))) + return inode_permission(idmap, inode, mask); + + if (unlikely(((inode->i_mode & 0111) != 0111) || !no_acl_inode(inode))) + return inode_permission(idmap, inode, mask); + + return security_inode_permission(inode, mask); +} + /** * path_get - get a reference to a path * @path: path to get the reference to @@ -1855,7 +1894,7 @@ static inline int may_lookup(struct mnt_idmap *idmap, int err, mask; mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0; - err = inode_permission(idmap, nd->inode, mask | MAY_EXEC); + err = lookup_inode_permission_may_exec(idmap, nd->inode, mask); if (likely(!err)) return 0; @@ -1870,7 +1909,7 @@ static inline int may_lookup(struct mnt_idmap *idmap, if (err != -ECHILD) // hard error return err; - return inode_permission(idmap, nd->inode, MAY_EXEC); + return lookup_inode_permission_may_exec(idmap, nd->inode, 0); } static int reserve_stack(struct nameidata *nd, struct path *link) diff --git a/include/linux/fs.h b/include/linux/fs.h index c895146c1444..ff69734b9fde 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -659,13 +659,14 @@ is_uncached_acl(struct posix_acl *acl) return (long)acl & 1; } -#define IOP_FASTPERM 0x0001 -#define IOP_LOOKUP 0x0002 -#define IOP_NOFOLLOW 0x0004 -#define IOP_XATTR 0x0008 +#define IOP_FASTPERM 0x0001 +#define IOP_LOOKUP 0x0002 +#define IOP_NOFOLLOW 0x0004 +#define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 -#define IOP_MGTIME 0x0020 -#define IOP_CACHED_LINK 0x0040 +#define IOP_MGTIME 0x0020 +#define IOP_CACHED_LINK 0x0040 +#define IOP_FASTPERM_MAY_EXEC 0x0080 /* * Inode state bits. Protected by inode->i_lock -- cgit v1.2.3 From 54ca9e913e22e364292a484783efc4fcdb6fdc51 Mon Sep 17 00:00:00 2001 From: Askar Safin Date: Thu, 20 Nov 2025 19:51:40 +0000 Subject: include/linux/fs.h: trivial fix: regualr -> regular Trivial fix. Signed-off-by: Askar Safin Link: https://patch.msgid.link/20251120195140.571608-1-safinaskar@gmail.com Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ff69734b9fde..e02700b4e36b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3102,7 +3102,7 @@ static inline bool inode_wrong_type(const struct inode *inode, umode_t mode) * file_start_write - get write access to a superblock for regular file io * @file: the file we want to write to * - * This is a variant of sb_start_write() which is a noop on non-regualr file. + * This is a variant of sb_start_write() which is a noop on non-regular file. * Should be matched with a call to file_end_write(). */ static inline void file_start_write(struct file *file) -- cgit v1.2.3