From 4be9f3cc582a24b08f6580f65fa48a4d70332ab5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Nov 2025 09:12:43 -0500 Subject: filelock: rework the __break_lease API to use flags Currently __break_lease takes both a type and an openmode. With the addition of directory leases, that makes less sense. Declare a set of LEASE_BREAK_* flags that can be used to control how lease breaks work instead of requiring a type and an openmode. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-2-52f3feebb2f2@kernel.org Signed-off-by: Christian Brauner --- include/linux/filelock.h | 52 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/filelock.h b/include/linux/filelock.h index c2ce8ba05d06..47da6aa28d8d 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -212,7 +212,14 @@ int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); void locks_init_lease(struct file_lease *); void locks_free_lease(struct file_lease *fl); struct file_lease *locks_alloc_lease(void); -int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); + +#define LEASE_BREAK_LEASE BIT(0) // break leases and delegations +#define LEASE_BREAK_DELEG BIT(1) // break delegations only +#define LEASE_BREAK_LAYOUT BIT(2) // break layouts only +#define LEASE_BREAK_NONBLOCK BIT(3) // non-blocking break +#define LEASE_BREAK_OPEN_RDONLY BIT(4) // readonly open event + +int __break_lease(struct inode *inode, unsigned int flags); void lease_get_mtime(struct inode *, struct timespec64 *time); int generic_setlease(struct file *, int, struct file_lease **, void **priv); int kernel_setlease(struct file *, int, struct file_lease **, void **); @@ -367,7 +374,7 @@ static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *f return -ENOLCK; } -static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) +static inline int __break_lease(struct inode *inode, unsigned int flags) { return 0; } @@ -428,6 +435,17 @@ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) } #ifdef CONFIG_FILE_LOCKING +static inline unsigned int openmode_to_lease_flags(unsigned int mode) +{ + unsigned int flags = 0; + + if ((mode & O_ACCMODE) == O_RDONLY) + flags |= LEASE_BREAK_OPEN_RDONLY; + if (mode & O_NONBLOCK) + flags |= LEASE_BREAK_NONBLOCK; + return flags; +} + static inline int break_lease(struct inode *inode, unsigned int mode) { struct file_lock_context *flctx; @@ -443,11 +461,11 @@ static inline int break_lease(struct inode *inode, unsigned int mode) return 0; smp_mb(); if (!list_empty_careful(&flctx->flc_lease)) - return __break_lease(inode, mode, FL_LEASE); + return __break_lease(inode, LEASE_BREAK_LEASE | openmode_to_lease_flags(mode)); return 0; } -static inline int break_deleg(struct inode *inode, unsigned int mode) +static inline int break_deleg(struct inode *inode, unsigned int flags) { struct file_lock_context *flctx; @@ -461,8 +479,10 @@ static inline int break_deleg(struct inode *inode, unsigned int mode) if (!flctx) return 0; smp_mb(); - if (!list_empty_careful(&flctx->flc_lease)) - return __break_lease(inode, mode, FL_DELEG); + if (!list_empty_careful(&flctx->flc_lease)) { + flags |= LEASE_BREAK_DELEG; + return __break_lease(inode, flags); + } return 0; } @@ -470,7 +490,7 @@ static inline int try_break_deleg(struct inode *inode, struct inode **delegated_ { int ret; - ret = break_deleg(inode, O_WRONLY|O_NONBLOCK); + ret = break_deleg(inode, LEASE_BREAK_NONBLOCK); if (ret == -EWOULDBLOCK && delegated_inode) { *delegated_inode = inode; ihold(inode); @@ -482,7 +502,7 @@ static inline int break_deleg_wait(struct inode **delegated_inode) { int ret; - ret = break_deleg(*delegated_inode, O_WRONLY); + ret = break_deleg(*delegated_inode, 0); iput(*delegated_inode); *delegated_inode = NULL; return ret; @@ -491,20 +511,24 @@ static inline int break_deleg_wait(struct inode **delegated_inode) static inline int break_layout(struct inode *inode, bool wait) { smp_mb(); - if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) - return __break_lease(inode, - wait ? O_WRONLY : O_WRONLY | O_NONBLOCK, - FL_LAYOUT); + if (inode->i_flctx && !list_empty_careful(&inode->i_flctx->flc_lease)) { + unsigned int flags = LEASE_BREAK_LAYOUT; + + if (!wait) + flags |= LEASE_BREAK_NONBLOCK; + + return __break_lease(inode, flags); + } return 0; } #else /* !CONFIG_FILE_LOCKING */ -static inline int break_lease(struct inode *inode, unsigned int mode) +static inline int break_lease(struct inode *inode, bool wait) { return 0; } -static inline int break_deleg(struct inode *inode, unsigned int mode) +static inline int break_deleg(struct inode *inode, unsigned int flags) { return 0; } -- cgit v1.2.3 From 6976ed2dd0d59086d16d853ac9b21776be68aaad Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Nov 2025 09:12:44 -0500 Subject: filelock: add struct delegated_inode The current API requires a pointer to an inode pointer. It's easy for callers to get this wrong. Add a new delegated_inode structure and use that to pass back any inode that needs to be waited on. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-3-52f3feebb2f2@kernel.org Signed-off-by: Christian Brauner --- fs/attr.c | 2 +- fs/namei.c | 18 +++++++++--------- fs/open.c | 8 ++++---- fs/posix_acl.c | 8 ++++---- fs/utimes.c | 4 ++-- fs/xattr.c | 12 ++++++------ include/linux/filelock.h | 36 +++++++++++++++++++++++++++--------- include/linux/fs.h | 9 +++++---- include/linux/xattr.h | 4 ++-- 9 files changed, 60 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/fs/attr.c b/fs/attr.c index 795f231d00e8..b9ec6b47bab2 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -415,7 +415,7 @@ EXPORT_SYMBOL(may_setattr); * performed on the raw inode simply pass @nop_mnt_idmap. */ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry, - struct iattr *attr, struct inode **delegated_inode) + struct iattr *attr, struct delegated_inode *delegated_inode) { struct inode *inode = dentry->d_inode; umode_t mode = inode->i_mode; diff --git a/fs/namei.c b/fs/namei.c index 7377020a2cba..bf42f146f847 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4648,7 +4648,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) * raw inode simply pass @nop_mnt_idmap. */ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, struct inode **delegated_inode) + struct dentry *dentry, struct delegated_inode *delegated_inode) { struct inode *target = dentry->d_inode; int error = may_delete(idmap, dir, dentry, 0); @@ -4706,7 +4706,7 @@ int do_unlinkat(int dfd, struct filename *name) struct qstr last; int type; struct inode *inode = NULL; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; unsigned int lookup_flags = 0; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); @@ -4743,7 +4743,7 @@ exit3: if (inode) iput(inode); /* truncate the inode here */ inode = NULL; - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; @@ -4892,7 +4892,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn */ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap, struct inode *dir, struct dentry *new_dentry, - struct inode **delegated_inode) + struct delegated_inode *delegated_inode) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; @@ -4968,7 +4968,7 @@ int do_linkat(int olddfd, struct filename *old, int newdfd, struct mnt_idmap *idmap; struct dentry *new_dentry; struct path old_path, new_path; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; int how = 0; int error; @@ -5012,7 +5012,7 @@ retry: new_dentry, &delegated_inode); out_dput: end_creating_path(&new_path, new_dentry); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) { path_put(&old_path); @@ -5098,7 +5098,7 @@ int vfs_rename(struct renamedata *rd) struct inode *new_dir = d_inode(rd->new_parent); struct dentry *old_dentry = rd->old_dentry; struct dentry *new_dentry = rd->new_dentry; - struct inode **delegated_inode = rd->delegated_inode; + struct delegated_inode *delegated_inode = rd->delegated_inode; unsigned int flags = rd->flags; bool is_dir = d_is_dir(old_dentry); struct inode *source = old_dentry->d_inode; @@ -5261,7 +5261,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, struct path old_path, new_path; struct qstr old_last, new_last; int old_type, new_type; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE; bool should_retry = false; @@ -5369,7 +5369,7 @@ exit4: exit3: unlock_rename(new_path.dentry, old_path.dentry); exit_lock_rename: - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; diff --git a/fs/open.c b/fs/open.c index 3d64372ecc67..fdaa6f08f6f4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -631,7 +631,7 @@ out: int chmod_common(const struct path *path, umode_t mode) { struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; struct iattr newattrs; int error; @@ -651,7 +651,7 @@ retry_deleg: &newattrs, &delegated_inode); out_unlock: inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; @@ -756,7 +756,7 @@ int chown_common(const struct path *path, uid_t user, gid_t group) struct mnt_idmap *idmap; struct user_namespace *fs_userns; struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; int error; struct iattr newattrs; kuid_t uid; @@ -791,7 +791,7 @@ retry_deleg: error = notify_change(idmap, path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 4050942ab52f..768f027c1428 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -1091,7 +1091,7 @@ int vfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, int acl_type; int error; struct inode *inode = d_inode(dentry); - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; acl_type = posix_acl_type(acl_name); if (acl_type < 0) @@ -1141,7 +1141,7 @@ retry_deleg: out_inode_unlock: inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; @@ -1212,7 +1212,7 @@ int vfs_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, int acl_type; int error; struct inode *inode = d_inode(dentry); - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; acl_type = posix_acl_type(acl_name); if (acl_type < 0) @@ -1249,7 +1249,7 @@ retry_deleg: out_inode_unlock: inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; diff --git a/fs/utimes.c b/fs/utimes.c index c7c7958e57b2..bf9f45bdef54 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -22,7 +22,7 @@ int vfs_utimes(const struct path *path, struct timespec64 *times) int error; struct iattr newattrs; struct inode *inode = path->dentry->d_inode; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; if (times) { if (!nsec_valid(times[0].tv_nsec) || @@ -66,7 +66,7 @@ retry_deleg: error = notify_change(mnt_idmap(path->mnt), path->dentry, &newattrs, &delegated_inode); inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; diff --git a/fs/xattr.c b/fs/xattr.c index 8851a5ef34f5..32d445fb60aa 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -274,7 +274,7 @@ int __vfs_setxattr_noperm(struct mnt_idmap *idmap, int __vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, - int flags, struct inode **delegated_inode) + int flags, struct delegated_inode *delegated_inode) { struct inode *inode = dentry->d_inode; int error; @@ -305,7 +305,7 @@ vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; const void *orig_value = value; int error; @@ -322,7 +322,7 @@ retry_deleg: flags, &delegated_inode); inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; @@ -533,7 +533,7 @@ EXPORT_SYMBOL(__vfs_removexattr); int __vfs_removexattr_locked(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, - struct inode **delegated_inode) + struct delegated_inode *delegated_inode) { struct inode *inode = dentry->d_inode; int error; @@ -567,7 +567,7 @@ vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name) { struct inode *inode = dentry->d_inode; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; int error; retry_deleg: @@ -576,7 +576,7 @@ retry_deleg: name, &delegated_inode); inode_unlock(inode); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 47da6aa28d8d..208d108df2d7 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -486,25 +486,35 @@ static inline int break_deleg(struct inode *inode, unsigned int flags) return 0; } -static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) +struct delegated_inode { + struct inode *di_inode; +}; + +static inline bool is_delegated(struct delegated_inode *di) +{ + return di->di_inode; +} + +static inline int try_break_deleg(struct inode *inode, + struct delegated_inode *di) { int ret; ret = break_deleg(inode, LEASE_BREAK_NONBLOCK); - if (ret == -EWOULDBLOCK && delegated_inode) { - *delegated_inode = inode; + if (ret == -EWOULDBLOCK && di) { + di->di_inode = inode; ihold(inode); } return ret; } -static inline int break_deleg_wait(struct inode **delegated_inode) +static inline int break_deleg_wait(struct delegated_inode *di) { int ret; - ret = break_deleg(*delegated_inode, 0); - iput(*delegated_inode); - *delegated_inode = NULL; + ret = break_deleg(di->di_inode, 0); + iput(di->di_inode); + di->di_inode = NULL; return ret; } @@ -523,6 +533,13 @@ static inline int break_layout(struct inode *inode, bool wait) } #else /* !CONFIG_FILE_LOCKING */ +struct delegated_inode { }; + +static inline bool is_delegated(struct delegated_inode *di) +{ + return false; +} + static inline int break_lease(struct inode *inode, bool wait) { return 0; @@ -533,12 +550,13 @@ static inline int break_deleg(struct inode *inode, unsigned int flags) return 0; } -static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode) +static inline int try_break_deleg(struct inode *inode, + struct delegated_inode *delegated_inode) { return 0; } -static inline int break_deleg_wait(struct inode **delegated_inode) +static inline int break_deleg_wait(struct delegated_inode *delegated_inode) { BUG(); return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index c895146c1444..909a88e3979d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -80,6 +80,7 @@ struct fs_context; struct fs_parameter_spec; struct file_kattr; struct iomap_ops; +struct delegated_inode; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2119,10 +2120,10 @@ int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, int vfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, - struct dentry *, struct inode **); + struct dentry *, struct delegated_inode *); int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *); int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, - struct inode **); + struct delegated_inode *); /** * struct renamedata - contains all information required for renaming @@ -2140,7 +2141,7 @@ struct renamedata { struct dentry *old_dentry; struct dentry *new_parent; struct dentry *new_dentry; - struct inode **delegated_inode; + struct delegated_inode *delegated_inode; unsigned int flags; } __randomize_layout; @@ -3071,7 +3072,7 @@ static inline int bmap(struct inode *inode, sector_t *block) #endif int notify_change(struct mnt_idmap *, struct dentry *, - struct iattr *, struct inode **); + struct iattr *, struct delegated_inode *); int inode_permission(struct mnt_idmap *, struct inode *, int); int generic_permission(struct mnt_idmap *, struct inode *, int); static inline int file_permission(struct file *file, int mask) diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 86b0d47984a1..64e9afe7d647 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -85,12 +85,12 @@ int __vfs_setxattr_noperm(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); int __vfs_setxattr_locked(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int, - struct inode **); + struct delegated_inode *); int vfs_setxattr(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); int __vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); int __vfs_removexattr_locked(struct mnt_idmap *, struct dentry *, - const char *, struct inode **); + const char *, struct delegated_inode *); int vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); -- cgit v1.2.3 From e12d203b8c880061c0bf0339cad51e5851a33442 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Nov 2025 09:12:47 -0500 Subject: vfs: allow mkdir to wait for delegation break on parent In order to add directory delegation support, we need to break delegations on the parent whenever there is going to be a change in the directory. Add a new delegated_inode parameter to vfs_mkdir. All of the existing callers set that to NULL for now, except for do_mkdirat which will properly block until the lease is gone. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-6-52f3feebb2f2@kernel.org Signed-off-by: Christian Brauner --- drivers/base/devtmpfs.c | 2 +- fs/cachefiles/namei.c | 2 +- fs/ecryptfs/inode.c | 2 +- fs/init.c | 2 +- fs/namei.c | 24 ++++++++++++++++++------ fs/nfsd/nfs4recover.c | 2 +- fs/nfsd/vfs.c | 2 +- fs/overlayfs/overlayfs.h | 2 +- fs/smb/server/vfs.c | 2 +- fs/xfs/scrub/orphanage.c | 2 +- include/linux/fs.h | 2 +- 11 files changed, 28 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 9d4e46ad8352..0e79621cb0f7 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -180,7 +180,7 @@ static int dev_mkdir(const char *name, umode_t mode) if (IS_ERR(dentry)) return PTR_ERR(dentry); - dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode); + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode, NULL); if (!IS_ERR(dentry)) /* mark as kernel-created inode */ d_inode(dentry)->i_private = &thread; diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index d1edb2ac3837..50c0f9c76d1f 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -130,7 +130,7 @@ retry: goto mkdir_error; ret = cachefiles_inject_write_error(); if (ret == 0) - subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700); + subdir = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), subdir, 0700, NULL); else subdir = ERR_PTR(ret); if (IS_ERR(subdir)) { diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index ed1394da8d6b..35830b3144f8 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -508,7 +508,7 @@ static struct dentry *ecryptfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, goto out; lower_dentry = vfs_mkdir(&nop_mnt_idmap, lower_dir, - lower_dentry, mode); + lower_dentry, mode, NULL); rc = PTR_ERR(lower_dentry); if (IS_ERR(lower_dentry)) goto out; diff --git a/fs/init.c b/fs/init.c index 07f592ccdba8..895f8a09a71a 100644 --- a/fs/init.c +++ b/fs/init.c @@ -233,7 +233,7 @@ int __init init_mkdir(const char *pathname, umode_t mode) error = security_path_mkdir(&path, dentry, mode); if (!error) { dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, mode); + dentry, mode, NULL); if (IS_ERR(dentry)) error = PTR_ERR(dentry); } diff --git a/fs/namei.c b/fs/namei.c index 5bcf3e93d350..76c0587d991f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4407,10 +4407,11 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d /** * vfs_mkdir - create directory returning correct dentry if possible - * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory - * @dentry: dentry of the child directory - * @mode: mode of the child directory + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentry: dentry of the child directory + * @mode: mode of the child directory + * @delegated_inode: returns parent inode, if the inode is delegated. * * Create a directory. * @@ -4427,7 +4428,8 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d * In case of an error the dentry is dput() and an ERR_PTR() is returned. */ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct dentry *dentry, umode_t mode, + struct delegated_inode *delegated_inode) { int error; unsigned max_links = dir->i_sb->s_max_links; @@ -4450,6 +4452,10 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, if (max_links && dir->i_nlink >= max_links) goto err; + error = try_break_deleg(dir, delegated_inode); + if (error) + goto err; + de = dir->i_op->mkdir(idmap, dir, dentry, mode); error = PTR_ERR(de); if (IS_ERR(de)) @@ -4473,6 +4479,7 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode) struct path path; int error; unsigned int lookup_flags = LOOKUP_DIRECTORY; + struct delegated_inode delegated_inode = { }; retry: dentry = filename_create(dfd, name, &path, lookup_flags); @@ -4484,11 +4491,16 @@ retry: mode_strip_umask(path.dentry->d_inode, mode)); if (!error) { dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, mode); + dentry, mode, &delegated_inode); if (IS_ERR(dentry)) error = PTR_ERR(dentry); } end_creating_path(&path, dentry); + if (is_delegated(&delegated_inode)) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index e2b9472e5c78..1f56834b2072 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -213,7 +213,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); + dentry = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, 0700, NULL); if (IS_ERR(dentry)) status = PTR_ERR(dentry); out_put: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 9cb20d4aeab1..97aef140cbf5 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1558,7 +1558,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); + dchild = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode, NULL); if (IS_ERR(dchild)) { host_err = PTR_ERR(dchild); } else if (d_is_negative(dchild)) { diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index c8fd5951fc5e..0f65f9a5d54d 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -248,7 +248,7 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs, { struct dentry *ret; - ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); + ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, NULL); pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret)); return ret; } diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 891ed2dc2b73..3d2190f26623 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -230,7 +230,7 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode) idmap = mnt_idmap(path.mnt); mode |= S_IFDIR; d = dentry; - dentry = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode); + dentry = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode, NULL); if (IS_ERR(dentry)) err = PTR_ERR(dentry); else if (d_is_negative(dentry)) diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 9c12cb844231..91c9d07b97f3 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -167,7 +167,7 @@ xrep_orphanage_create( */ if (d_really_is_negative(orphanage_dentry)) { orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode, - orphanage_dentry, 0750); + orphanage_dentry, 0750, NULL); error = PTR_ERR(orphanage_dentry); if (IS_ERR(orphanage_dentry)) goto out_unlock_root; diff --git a/include/linux/fs.h b/include/linux/fs.h index 909a88e3979d..20bb4c8a4e8e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2114,7 +2114,7 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap, int vfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, - struct dentry *, umode_t); + struct dentry *, umode_t, struct delegated_inode *); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int vfs_symlink(struct mnt_idmap *, struct inode *, -- cgit v1.2.3 From 4fa76319cd0cc97ca54ff71c94814dc5b1983ad2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Nov 2025 09:12:48 -0500 Subject: vfs: allow rmdir to wait for delegation break on parent In order to add directory delegation support, we need to break delegations on the parent whenever there is going to be a change in the directory. Add a delegated_inode struct to vfs_rmdir() and populate that pointer with the parent inode if it's non-NULL. Most existing in-kernel callers pass in a NULL pointer. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-7-52f3feebb2f2@kernel.org Signed-off-by: Christian Brauner --- drivers/base/devtmpfs.c | 2 +- fs/ecryptfs/inode.c | 2 +- fs/namei.c | 22 +++++++++++++++++----- fs/nfsd/nfs4recover.c | 4 ++-- fs/nfsd/vfs.c | 2 +- fs/overlayfs/overlayfs.h | 2 +- fs/smb/server/vfs.c | 4 ++-- include/linux/fs.h | 3 ++- 8 files changed, 27 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 0e79621cb0f7..104025104ef7 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -261,7 +261,7 @@ static int dev_rmdir(const char *name) return PTR_ERR(dentry); if (d_inode(dentry)->i_private == &thread) err = vfs_rmdir(&nop_mnt_idmap, d_inode(parent.dentry), - dentry); + dentry, NULL); else err = -EPERM; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 35830b3144f8..88631291b325 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -540,7 +540,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) if (d_unhashed(lower_dentry)) rc = -EINVAL; else - rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry); + rc = vfs_rmdir(&nop_mnt_idmap, lower_dir, lower_dentry, NULL); } if (!rc) { clear_nlink(d_inode(dentry)); diff --git a/fs/namei.c b/fs/namei.c index 76c0587d991f..9e0393a92091 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4522,9 +4522,10 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) /** * vfs_rmdir - remove directory - * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory - * @dentry: dentry of the child directory + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentry: dentry of the child directory + * @delegated_inode: returns parent inode, if it's delegated. * * Remove a directory. * @@ -4535,7 +4536,7 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) * raw inode simply pass @nop_mnt_idmap. */ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry) + struct dentry *dentry, struct delegated_inode *delegated_inode) { int error = may_delete(idmap, dir, dentry, 1); @@ -4557,6 +4558,10 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir, if (error) goto out; + error = try_break_deleg(dir, delegated_inode); + if (error) + goto out; + error = dir->i_op->rmdir(dir, dentry); if (error) goto out; @@ -4583,6 +4588,7 @@ int do_rmdir(int dfd, struct filename *name) struct qstr last; int type; unsigned int lookup_flags = 0; + struct delegated_inode delegated_inode = { }; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) @@ -4612,7 +4618,8 @@ retry: error = security_path_rmdir(&path, dentry); if (error) goto exit4; - error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry); + error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, + dentry, &delegated_inode); exit4: dput(dentry); exit3: @@ -4620,6 +4627,11 @@ exit3: mnt_drop_write(path.mnt); exit2: path_put(&path); + if (is_delegated(&delegated_inode)) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 1f56834b2072..30bae93931d9 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -337,7 +337,7 @@ nfsd4_unlink_clid_dir(char *name, struct nfsd_net *nn) status = -ENOENT; if (d_really_is_negative(dentry)) goto out; - status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry); + status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry, NULL); out: dput(dentry); out_unlock: @@ -427,7 +427,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) if (nfs4_has_reclaimed_state(name, nn)) goto out_free; - status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child); + status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child, NULL); if (status) printk("failed to remove client recovery directory %pd\n", child); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 97aef140cbf5..c400ea94ff2e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2108,7 +2108,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, break; } } else { - host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry); + host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry, NULL); } fh_fill_post_attrs(fhp); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 0f65f9a5d54d..d215d7349489 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -206,7 +206,7 @@ static inline int ovl_do_notify_change(struct ovl_fs *ofs, static inline int ovl_do_rmdir(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry) { - int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry); + int err = vfs_rmdir(ovl_upper_mnt_idmap(ofs), dir, dentry, NULL); pr_debug("rmdir(%pd2) = %i\n", dentry, err); return err; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 3d2190f26623..c5f0f3170d58 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -609,7 +609,7 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path) idmap = mnt_idmap(path->mnt); if (S_ISDIR(d_inode(path->dentry)->i_mode)) { - err = vfs_rmdir(idmap, d_inode(parent), path->dentry); + err = vfs_rmdir(idmap, d_inode(parent), path->dentry, NULL); if (err && err != -ENOTEMPTY) ksmbd_debug(VFS, "rmdir failed, err %d\n", err); } else { @@ -1090,7 +1090,7 @@ int ksmbd_vfs_unlink(struct file *filp) dget(dentry); if (S_ISDIR(d_inode(dentry)->i_mode)) - err = vfs_rmdir(idmap, d_inode(dir), dentry); + err = vfs_rmdir(idmap, d_inode(dir), dentry, NULL); else err = vfs_unlink(idmap, d_inode(dir), dentry, NULL); diff --git a/include/linux/fs.h b/include/linux/fs.h index 20bb4c8a4e8e..12873214e1c7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2121,7 +2121,8 @@ int vfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, struct dentry *, struct delegated_inode *); -int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *); +int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *, + struct delegated_inode *); int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *, struct delegated_inode *); -- cgit v1.2.3 From 85bbffcad7307e2ca6136be657cc21b0e1c42241 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Nov 2025 09:12:50 -0500 Subject: vfs: clean up argument list for vfs_create() As Neil points out: "I would be in favour of dropping the "dir" arg because it is always d_inode(dentry->d_parent) which is stable." ...and... "Also *every* caller of vfs_create() passes ".excl = true". So maybe we don't need that arg at all." Drop both arguments from vfs_create() and fix up the callers. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-9-52f3feebb2f2@kernel.org Signed-off-by: Christian Brauner --- fs/ecryptfs/inode.c | 3 +-- fs/namei.c | 11 ++++------- fs/nfsd/nfs3proc.c | 2 +- fs/nfsd/vfs.c | 3 +-- fs/open.c | 4 +--- fs/overlayfs/overlayfs.h | 2 +- fs/smb/server/vfs.c | 3 +-- include/linux/fs.h | 3 +-- 8 files changed, 11 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 88631291b325..d109e3763a88 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -188,8 +188,7 @@ ecryptfs_do_create(struct inode *directory_inode, rc = lock_parent(ecryptfs_dentry, &lower_dentry, &lower_dir); if (!rc) - rc = vfs_create(&nop_mnt_idmap, lower_dir, - lower_dentry, mode, true); + rc = vfs_create(&nop_mnt_idmap, lower_dentry, mode); if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " "rc = [%d]\n", __func__, rc); diff --git a/fs/namei.c b/fs/namei.c index f439429bdfa2..9586c6aba6ea 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3461,10 +3461,8 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, /** * vfs_create - create new file * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory * @dentry: dentry of the child file * @mode: mode of the child file - * @want_excl: whether the file must not yet exist * * Create a new file. * @@ -3474,9 +3472,9 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ -int vfs_create(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, bool want_excl) +int vfs_create(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode) { + struct inode *dir = d_inode(dentry->d_parent); int error; error = may_create(idmap, dir, dentry); @@ -3490,7 +3488,7 @@ int vfs_create(struct mnt_idmap *idmap, struct inode *dir, error = security_inode_create(dir, dentry, mode); if (error) return error; - error = dir->i_op->create(idmap, dir, dentry, mode, want_excl); + error = dir->i_op->create(idmap, dir, dentry, mode, true); if (!error) fsnotify_create(dir, dentry); return error; @@ -4383,8 +4381,7 @@ retry: idmap = mnt_idmap(path.mnt); switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(idmap, path.dentry->d_inode, - dentry, mode, true); + error = vfs_create(idmap, dentry, mode); if (!error) security_path_post_mknod(idmap, dentry); break; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index b6d03e1ef5f7..30ea7ffa2aff 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -344,7 +344,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, status = fh_fill_pre_attrs(fhp); if (status != nfs_ok) goto out; - host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true); + host_err = vfs_create(&nop_mnt_idmap, child, iap->ia_mode); if (host_err < 0) { status = nfserrno(host_err); goto out; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c400ea94ff2e..464fd54675f3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1552,8 +1552,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, err = 0; switch (type) { case S_IFREG: - host_err = vfs_create(&nop_mnt_idmap, dirp, dchild, - iap->ia_mode, true); + host_err = vfs_create(&nop_mnt_idmap, dchild, iap->ia_mode); if (!host_err) nfsd_check_ignore_resizing(iap); break; diff --git a/fs/open.c b/fs/open.c index fdaa6f08f6f4..e440f58e3ce8 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1171,9 +1171,7 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, if (IS_ERR(f)) return f; - error = vfs_create(mnt_idmap(path->mnt), - d_inode(path->dentry->d_parent), - path->dentry, mode, true); + error = vfs_create(mnt_idmap(path->mnt), path->dentry, mode); if (!error) error = vfs_open(path, f); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index d215d7349489..2bdc434941eb 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -235,7 +235,7 @@ static inline int ovl_do_create(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode) { - int err = vfs_create(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, true); + int err = vfs_create(ovl_upper_mnt_idmap(ofs), dentry, mode); pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); return err; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index c5f0f3170d58..83ece2de4b23 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -188,8 +188,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) } mode |= S_IFREG; - err = vfs_create(mnt_idmap(path.mnt), d_inode(path.dentry), - dentry, mode, true); + err = vfs_create(mnt_idmap(path.mnt), dentry, mode); if (!err) { ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(dentry)); diff --git a/include/linux/fs.h b/include/linux/fs.h index 12873214e1c7..21876ef1fec9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2111,8 +2111,7 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap, /* * VFS helper functions.. */ -int vfs_create(struct mnt_idmap *, struct inode *, - struct dentry *, umode_t, bool); +int vfs_create(struct mnt_idmap *, struct dentry *, umode_t); struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, struct delegated_inode *); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, -- cgit v1.2.3 From c826229c6a82fe1fe7b7752692f87a881eb4b545 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Nov 2025 09:12:51 -0500 Subject: vfs: make vfs_create break delegations on parent directory In order to add directory delegation support, we need to break delegations on the parent whenever there is going to be a change in the directory. Add a delegated_inode parameter to vfs_create. Most callers are converted to pass in NULL, but do_mknodat() is changed to wait for a delegation break if there is one. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-10-52f3feebb2f2@kernel.org Signed-off-by: Christian Brauner --- fs/ecryptfs/inode.c | 2 +- fs/namei.c | 15 +++++++++++++-- fs/nfsd/nfs3proc.c | 2 +- fs/nfsd/vfs.c | 2 +- fs/open.c | 2 +- fs/overlayfs/overlayfs.h | 2 +- fs/smb/server/vfs.c | 2 +- include/linux/fs.h | 3 ++- 8 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index d109e3763a88..3341f00dd087 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -188,7 +188,7 @@ ecryptfs_do_create(struct inode *directory_inode, rc = lock_parent(ecryptfs_dentry, &lower_dentry, &lower_dir); if (!rc) - rc = vfs_create(&nop_mnt_idmap, lower_dentry, mode); + rc = vfs_create(&nop_mnt_idmap, lower_dentry, mode, NULL); if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " "rc = [%d]\n", __func__, rc); diff --git a/fs/namei.c b/fs/namei.c index 9586c6aba6ea..b20f053374a5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3463,6 +3463,7 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, * @idmap: idmap of the mount the inode was found from * @dentry: dentry of the child file * @mode: mode of the child file + * @di: returns parent inode, if the inode is delegated. * * Create a new file. * @@ -3472,7 +3473,8 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ -int vfs_create(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode) +int vfs_create(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode, + struct delegated_inode *di) { struct inode *dir = d_inode(dentry->d_parent); int error; @@ -3486,6 +3488,9 @@ int vfs_create(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode) mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG); error = security_inode_create(dir, dentry, mode); + if (error) + return error; + error = try_break_deleg(dir, di); if (error) return error; error = dir->i_op->create(idmap, dir, dentry, mode, true); @@ -4358,6 +4363,7 @@ static int may_mknod(umode_t mode) static int do_mknodat(int dfd, struct filename *name, umode_t mode, unsigned int dev) { + struct delegated_inode di = { }; struct mnt_idmap *idmap; struct dentry *dentry; struct path path; @@ -4381,7 +4387,7 @@ retry: idmap = mnt_idmap(path.mnt); switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(idmap, dentry, mode); + error = vfs_create(idmap, dentry, mode, &di); if (!error) security_path_post_mknod(idmap, dentry); break; @@ -4396,6 +4402,11 @@ retry: } out2: end_creating_path(&path, dentry); + if (is_delegated(&di)) { + error = break_deleg_wait(&di); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 30ea7ffa2aff..2cb972b5ed99 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -344,7 +344,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, status = fh_fill_pre_attrs(fhp); if (status != nfs_ok) goto out; - host_err = vfs_create(&nop_mnt_idmap, child, iap->ia_mode); + host_err = vfs_create(&nop_mnt_idmap, child, iap->ia_mode, NULL); if (host_err < 0) { status = nfserrno(host_err); goto out; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 464fd54675f3..de5f46f8c6d3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1552,7 +1552,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, err = 0; switch (type) { case S_IFREG: - host_err = vfs_create(&nop_mnt_idmap, dchild, iap->ia_mode); + host_err = vfs_create(&nop_mnt_idmap, dchild, iap->ia_mode, NULL); if (!host_err) nfsd_check_ignore_resizing(iap); break; diff --git a/fs/open.c b/fs/open.c index e440f58e3ce8..92cf2e11781b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1171,7 +1171,7 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, if (IS_ERR(f)) return f; - error = vfs_create(mnt_idmap(path->mnt), path->dentry, mode); + error = vfs_create(mnt_idmap(path->mnt), path->dentry, mode, NULL); if (!error) error = vfs_open(path, f); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 2bdc434941eb..e30441cc9c63 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -235,7 +235,7 @@ static inline int ovl_do_create(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode) { - int err = vfs_create(ovl_upper_mnt_idmap(ofs), dentry, mode); + int err = vfs_create(ovl_upper_mnt_idmap(ofs), dentry, mode, NULL); pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); return err; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 83ece2de4b23..3747851b61c8 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -188,7 +188,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode) } mode |= S_IFREG; - err = vfs_create(mnt_idmap(path.mnt), dentry, mode); + err = vfs_create(mnt_idmap(path.mnt), dentry, mode, NULL); if (!err) { ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(dentry)); diff --git a/include/linux/fs.h b/include/linux/fs.h index 21876ef1fec9..83b05aec4e10 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2111,7 +2111,8 @@ bool inode_owner_or_capable(struct mnt_idmap *idmap, /* * VFS helper functions.. */ -int vfs_create(struct mnt_idmap *, struct dentry *, umode_t); +int vfs_create(struct mnt_idmap *, struct dentry *, umode_t, + struct delegated_inode *); struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, struct delegated_inode *); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, -- cgit v1.2.3 From e8960c1b2ee9ba75d65492b8e90e851d11e5f215 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Nov 2025 09:12:52 -0500 Subject: vfs: make vfs_mknod break delegations on parent directory In order to add directory delegation support, we need to break delegations on the parent whenever there is going to be a change in the directory. Add a new delegated_inode pointer to vfs_mknod() and have the appropriate callers wait when there is an outstanding delegation. All other callers just set the pointer to NULL. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-11-52f3feebb2f2@kernel.org Signed-off-by: Christian Brauner --- drivers/base/devtmpfs.c | 2 +- fs/ecryptfs/inode.c | 2 +- fs/init.c | 2 +- fs/namei.c | 23 +++++++++++++++-------- fs/nfsd/vfs.c | 2 +- fs/overlayfs/overlayfs.h | 2 +- include/linux/fs.h | 4 ++-- net/unix/af_unix.c | 2 +- 8 files changed, 23 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 104025104ef7..2f576ecf1832 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -231,7 +231,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid, return PTR_ERR(dentry); err = vfs_mknod(&nop_mnt_idmap, d_inode(path.dentry), dentry, mode, - dev->devt); + dev->devt, NULL); if (!err) { struct iattr newattrs; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 3341f00dd087..83f06452476d 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -564,7 +564,7 @@ ecryptfs_mknod(struct mnt_idmap *idmap, struct inode *dir, rc = lock_parent(dentry, &lower_dentry, &lower_dir); if (!rc) rc = vfs_mknod(&nop_mnt_idmap, lower_dir, - lower_dentry, mode, dev); + lower_dentry, mode, dev, NULL); if (rc || d_really_is_negative(lower_dentry)) goto out; rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); diff --git a/fs/init.c b/fs/init.c index 895f8a09a71a..4f02260dd65b 100644 --- a/fs/init.c +++ b/fs/init.c @@ -157,7 +157,7 @@ int __init init_mknod(const char *filename, umode_t mode, unsigned int dev) error = security_path_mknod(&path, dentry, mode, dev); if (!error) error = vfs_mknod(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, mode, new_decode_dev(dev)); + dentry, mode, new_decode_dev(dev), NULL); end_creating_path(&path, dentry); return error; } diff --git a/fs/namei.c b/fs/namei.c index b20f053374a5..e9616134390f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4295,13 +4295,15 @@ inline struct dentry *start_creating_user_path( } EXPORT_SYMBOL(start_creating_user_path); + /** * vfs_mknod - create device node or file - * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory - * @dentry: dentry of the child device node - * @mode: mode of the child device node - * @dev: device number of device to create + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentry: dentry of the child device node + * @mode: mode of the child device node + * @dev: device number of device to create + * @delegated_inode: returns parent inode, if the inode is delegated. * * Create a device node or file. * @@ -4312,7 +4314,8 @@ EXPORT_SYMBOL(start_creating_user_path); * raw inode simply pass @nop_mnt_idmap. */ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, dev_t dev) + struct dentry *dentry, umode_t mode, dev_t dev, + struct delegated_inode *delegated_inode) { bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV; int error = may_create(idmap, dir, dentry); @@ -4336,6 +4339,10 @@ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, if (error) return error; + error = try_break_deleg(dir, delegated_inode); + if (error) + return error; + error = dir->i_op->mknod(idmap, dir, dentry, mode, dev); if (!error) fsnotify_create(dir, dentry); @@ -4393,11 +4400,11 @@ retry: break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(idmap, path.dentry->d_inode, - dentry, mode, new_decode_dev(dev)); + dentry, mode, new_decode_dev(dev), &di); break; case S_IFIFO: case S_IFSOCK: error = vfs_mknod(idmap, path.dentry->d_inode, - dentry, mode, 0); + dentry, mode, 0, &di); break; } out2: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index de5f46f8c6d3..6684935007b1 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1573,7 +1573,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, case S_IFIFO: case S_IFSOCK: host_err = vfs_mknod(&nop_mnt_idmap, dirp, dchild, - iap->ia_mode, rdev); + iap->ia_mode, rdev, NULL); break; default: printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n", diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e30441cc9c63..afd95721f76e 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -257,7 +257,7 @@ static inline int ovl_do_mknod(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev); + int err = vfs_mknod(ovl_upper_mnt_idmap(ofs), dir, dentry, mode, dev, NULL); pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err); return err; diff --git a/include/linux/fs.h b/include/linux/fs.h index 83b05aec4e10..1a5d86cfafaa 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2116,7 +2116,7 @@ int vfs_create(struct mnt_idmap *, struct dentry *, umode_t, struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, struct delegated_inode *); int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, - umode_t, dev_t); + umode_t, dev_t, struct delegated_inode *); int vfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, @@ -2152,7 +2152,7 @@ static inline int vfs_whiteout(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry) { return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE, - WHITEOUT_DEV); + WHITEOUT_DEV, NULL); } struct file *kernel_tmpfile_open(struct mnt_idmap *idmap, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 768098dec231..db1fd8d6a84c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1399,7 +1399,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr, idmap = mnt_idmap(parent.mnt); err = security_path_mknod(&parent, dentry, mode, 0); if (!err) - err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0); + err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0, NULL); if (err) goto out_path; err = mutex_lock_interruptible(&u->bindlock); -- cgit v1.2.3 From 92bf53577f01aad988f7f39f69163b41f94cfb7d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Nov 2025 09:12:53 -0500 Subject: vfs: make vfs_symlink break delegations on parent dir In order to add directory delegation support, we must break delegations on the parent on any change to the directory. Add a delegated_inode parameter to vfs_symlink() and have it break the delegation. do_symlinkat() can then wait on the delegation break before proceeding. Reviewed-by: Jan Kara Reviewed-by: NeilBrown Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-12-52f3feebb2f2@kernel.org Signed-off-by: Christian Brauner --- fs/ecryptfs/inode.c | 2 +- fs/init.c | 2 +- fs/namei.c | 16 ++++++++++++++-- fs/nfsd/vfs.c | 2 +- fs/overlayfs/overlayfs.h | 2 +- include/linux/fs.h | 2 +- 6 files changed, 19 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 83f06452476d..ba15e7359dfa 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -479,7 +479,7 @@ static int ecryptfs_symlink(struct mnt_idmap *idmap, if (rc) goto out_lock; rc = vfs_symlink(&nop_mnt_idmap, lower_dir, lower_dentry, - encoded_symname); + encoded_symname, NULL); kfree(encoded_symname); if (rc || d_really_is_negative(lower_dentry)) goto out_lock; diff --git a/fs/init.c b/fs/init.c index 4f02260dd65b..e0f5429c0a49 100644 --- a/fs/init.c +++ b/fs/init.c @@ -209,7 +209,7 @@ int __init init_symlink(const char *oldname, const char *newname) error = security_path_symlink(&path, dentry, oldname); if (!error) error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, oldname); + dentry, oldname, NULL); end_creating_path(&path, dentry); return error; } diff --git a/fs/namei.c b/fs/namei.c index e9616134390f..d5ab28947b2b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4845,6 +4845,7 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) * @dir: inode of the parent directory * @dentry: dentry of the child symlink file * @oldname: name of the file to link to + * @delegated_inode: returns victim inode, if the inode is delegated. * * Create a symlink. * @@ -4855,7 +4856,8 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) * raw inode simply pass @nop_mnt_idmap. */ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, const char *oldname) + struct dentry *dentry, const char *oldname, + struct delegated_inode *delegated_inode) { int error; @@ -4870,6 +4872,10 @@ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, if (error) return error; + error = try_break_deleg(dir, delegated_inode); + if (error) + return error; + error = dir->i_op->symlink(idmap, dir, dentry, oldname); if (!error) fsnotify_create(dir, dentry); @@ -4883,6 +4889,7 @@ int do_symlinkat(struct filename *from, int newdfd, struct filename *to) struct dentry *dentry; struct path path; unsigned int lookup_flags = 0; + struct delegated_inode delegated_inode = { }; if (IS_ERR(from)) { error = PTR_ERR(from); @@ -4897,8 +4904,13 @@ retry: error = security_path_symlink(&path, dentry, from->name); if (!error) error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, from->name); + dentry, from->name, &delegated_inode); end_creating_path(&path, dentry); + if (is_delegated(&delegated_inode)) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6684935007b1..28710da4cce7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1742,7 +1742,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, err = fh_fill_pre_attrs(fhp); if (err != nfs_ok) goto out_unlock; - host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path); + host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path, NULL); err = nfserrno(host_err); cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); if (!err) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index afd95721f76e..5065961bd370 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -267,7 +267,7 @@ static inline int ovl_do_symlink(struct ovl_fs *ofs, struct inode *dir, struct dentry *dentry, const char *oldname) { - int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname); + int err = vfs_symlink(ovl_upper_mnt_idmap(ofs), dir, dentry, oldname, NULL); pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); return err; diff --git a/include/linux/fs.h b/include/linux/fs.h index 1a5d86cfafaa..64323e618724 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2118,7 +2118,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *, struct inode *, int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t, struct delegated_inode *); int vfs_symlink(struct mnt_idmap *, struct inode *, - struct dentry *, const char *); + struct dentry *, const char *, struct delegated_inode *); int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *, struct dentry *, struct delegated_inode *); int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *, -- cgit v1.2.3 From 1602bad16d7df82faca6d7c70821117684a66f49 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 11 Nov 2025 09:12:58 -0500 Subject: vfs: expose delegation support to userland Now that support for recallable directory delegations is available, expose this functionality to userland with new F_SETDELEG and F_GETDELEG commands for fcntl(). Note that this also allows userland to request a FL_DELEG type lease on files too. Userland applications that do will get signalled when there are metadata changes in addition to just data changes (which is a limitation of FL_LEASE leases). These commands accept a new "struct delegation" argument that contains a flags field for future expansion. Signed-off-by: Jeff Layton Link: https://patch.msgid.link/20251111-dir-deleg-ro-v6-17-52f3feebb2f2@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fcntl.c | 13 +++++++++++++ fs/locks.c | 45 ++++++++++++++++++++++++++++++++++++++++----- include/linux/filelock.h | 12 ++++++++++++ include/uapi/linux/fcntl.h | 11 +++++++++++ 4 files changed, 76 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/fcntl.c b/fs/fcntl.c index 72f8433d9109..f93dbca08435 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -445,6 +445,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { void __user *argp = (void __user *)arg; + struct delegation deleg; int argi = (int)arg; struct flock flock; long err = -EINVAL; @@ -550,6 +551,18 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_SET_RW_HINT: err = fcntl_set_rw_hint(filp, arg); break; + case F_GETDELEG: + if (copy_from_user(&deleg, argp, sizeof(deleg))) + return -EFAULT; + err = fcntl_getdeleg(filp, &deleg); + if (!err && copy_to_user(argp, &deleg, sizeof(deleg))) + return -EFAULT; + break; + case F_SETDELEG: + if (copy_from_user(&deleg, argp, sizeof(deleg))) + return -EFAULT; + err = fcntl_setdeleg(fd, filp, &deleg); + break; default: break; } diff --git a/fs/locks.c b/fs/locks.c index dd290a87f58e..7f4ccc7974bc 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1703,7 +1703,7 @@ EXPORT_SYMBOL(lease_get_mtime); * XXX: sfr & willy disagree over whether F_INPROGRESS * should be returned to userspace. */ -int fcntl_getlease(struct file *filp) +static int __fcntl_getlease(struct file *filp, unsigned int flavor) { struct file_lease *fl; struct inode *inode = file_inode(filp); @@ -1719,7 +1719,8 @@ int fcntl_getlease(struct file *filp) list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { if (fl->c.flc_file != filp) continue; - type = target_leasetype(fl); + if (fl->c.flc_flags & flavor) + type = target_leasetype(fl); break; } spin_unlock(&ctx->flc_lock); @@ -1730,6 +1731,19 @@ int fcntl_getlease(struct file *filp) return type; } +int fcntl_getlease(struct file *filp) +{ + return __fcntl_getlease(filp, FL_LEASE); +} + +int fcntl_getdeleg(struct file *filp, struct delegation *deleg) +{ + if (deleg->d_flags != 0 || deleg->__pad != 0) + return -EINVAL; + deleg->d_type = __fcntl_getlease(filp, FL_DELEG); + return 0; +} + /** * check_conflicting_open - see if the given file points to an inode that has * an existing open that would conflict with the @@ -2039,13 +2053,13 @@ vfs_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv) } EXPORT_SYMBOL_GPL(vfs_setlease); -static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg) +static int do_fcntl_add_lease(unsigned int fd, struct file *filp, unsigned int flavor, int arg) { struct file_lease *fl; struct fasync_struct *new; int error; - fl = lease_alloc(filp, FL_LEASE, arg); + fl = lease_alloc(filp, flavor, arg); if (IS_ERR(fl)) return PTR_ERR(fl); @@ -2081,7 +2095,28 @@ int fcntl_setlease(unsigned int fd, struct file *filp, int arg) if (arg == F_UNLCK) return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); - return do_fcntl_add_lease(fd, filp, arg); + return do_fcntl_add_lease(fd, filp, FL_LEASE, arg); +} + +/** + * fcntl_setdeleg - sets a delegation on an open file + * @fd: open file descriptor + * @filp: file pointer + * @deleg: delegation request from userland + * + * Call this fcntl to establish a delegation on the file. + * Note that you also need to call %F_SETSIG to + * receive a signal when the lease is broken. + */ +int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg) +{ + /* For now, no flags are supported */ + if (deleg->d_flags != 0 || deleg->__pad != 0) + return -EINVAL; + + if (deleg->d_type == F_UNLCK) + return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); + return do_fcntl_add_lease(fd, filp, FL_DELEG, deleg->d_type); } /** diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 208d108df2d7..54b824c05299 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -159,6 +159,8 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int, int fcntl_setlease(unsigned int fd, struct file *filp, int arg); int fcntl_getlease(struct file *filp); +int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg); +int fcntl_getdeleg(struct file *filp, struct delegation *deleg); static inline bool lock_is_unlock(struct file_lock *fl) { @@ -278,6 +280,16 @@ static inline int fcntl_getlease(struct file *filp) return F_UNLCK; } +static inline int fcntl_setdeleg(unsigned int fd, struct file *filp, struct delegation *deleg) +{ + return -EINVAL; +} + +static inline int fcntl_getdeleg(struct file *filp, struct delegation *deleg) +{ + return -EINVAL; +} + static inline bool lock_is_unlock(struct file_lock *fl) { return false; diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 3741ea1b73d8..008fac15e573 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -79,6 +79,17 @@ */ #define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET +/* Set/Get delegations */ +#define F_GETDELEG (F_LINUX_SPECIFIC_BASE + 15) +#define F_SETDELEG (F_LINUX_SPECIFIC_BASE + 16) + +/* Argument structure for F_GETDELEG and F_SETDELEG */ +struct delegation { + uint32_t d_flags; /* Must be 0 */ + uint16_t d_type; /* F_RDLCK, F_WRLCK, F_UNLCK */ + uint16_t __pad; /* Must be 0 */ +}; + /* * Types of directory notifications that may be requested. */ -- cgit v1.2.3