From c5bc1b3ff35ae321d018d0c4ba66b062e4fb1e05 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 16 Sep 2022 09:37:51 -0400 Subject: fs: uninline inode_query_iversion Reviewed-by: NeilBrown Reviewed-by: Jan Kara Reviewed-by: Christian Brauner Signed-off-by: Jeff Layton --- include/linux/iversion.h | 38 ++------------------------------------ 1 file changed, 2 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iversion.h b/include/linux/iversion.h index e27bd4f55d84..6755d8b4f20b 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -234,42 +234,6 @@ inode_peek_iversion(const struct inode *inode) return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT; } -/** - * inode_query_iversion - read i_version for later use - * @inode: inode from which i_version should be read - * - * Read the inode i_version counter. This should be used by callers that wish - * to store the returned i_version for later comparison. This will guarantee - * that a later query of the i_version will result in a different value if - * anything has changed. - * - * In this implementation, we fetch the current value, set the QUERIED flag and - * then try to swap it into place with a cmpxchg, if it wasn't already set. If - * that fails, we try again with the newly fetched value from the cmpxchg. - */ -static inline u64 -inode_query_iversion(struct inode *inode) -{ - u64 cur, new; - - cur = inode_peek_iversion_raw(inode); - do { - /* If flag is already set, then no need to swap */ - if (cur & I_VERSION_QUERIED) { - /* - * This barrier (and the implicit barrier in the - * cmpxchg below) pairs with the barrier in - * inode_maybe_inc_iversion(). - */ - smp_mb(); - break; - } - - new = cur | I_VERSION_QUERIED; - } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); - return cur >> I_VERSION_QUERIED_SHIFT; -} - /* * For filesystems without any sort of change attribute, the best we can * do is fake one up from the ctime: @@ -283,6 +247,8 @@ static inline u64 time_to_chattr(struct timespec64 *t) return chattr; } +u64 inode_query_iversion(struct inode *inode); + /** * inode_eq_iversion_raw - check whether the raw i_version counter has changed * @inode: inode to check -- cgit v1.2.3 From a3bb710383cbca2a0f1f4e5a1c7ef8dde14eff95 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 22 Aug 2022 09:04:04 -0400 Subject: fs: clarify when the i_version counter must be updated The i_version field in the kernel has had different semantics over the decades, but NFSv4 has certain expectations. Update the comments in iversion.h to describe when the i_version must change. Cc: Colin Walters Cc: NeilBrown Cc: Trond Myklebust Cc: Dave Chinner Reviewed-by: Christian Brauner Signed-off-by: Jeff Layton --- include/linux/iversion.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iversion.h b/include/linux/iversion.h index 6755d8b4f20b..f174ff1b59ee 100644 --- a/include/linux/iversion.h +++ b/include/linux/iversion.h @@ -9,8 +9,26 @@ * --------------------------- * The change attribute (i_version) is mandated by NFSv4 and is mostly for * knfsd, but is also used for other purposes (e.g. IMA). The i_version must - * appear different to observers if there was a change to the inode's data or - * metadata since it was last queried. + * appear larger to observers if there was an explicit change to the inode's + * data or metadata since it was last queried. + * + * An explicit change is one that would ordinarily result in a change to the + * inode status change time (aka ctime). i_version must appear to change, even + * if the ctime does not (since the whole point is to avoid missing updates due + * to timestamp granularity). If POSIX or other relevant spec mandates that the + * ctime must change due to an operation, then the i_version counter must be + * incremented as well. + * + * Making the i_version update completely atomic with the operation itself would + * be prohibitively expensive. Traditionally the kernel has updated the times on + * directories after an operation that changes its contents. For regular files, + * the ctime is usually updated before the data is copied into the cache for a + * write. This means that there is a window of time when an observer can + * associate a new timestamp with old file contents. Since the purpose of the + * i_version is to allow for better cache coherency, the i_version must always + * be updated after the results of the operation are visible. Updating it before + * and after a change is also permitted. (Note that no filesystems currently do + * this. Fixing that is a work-in-progress). * * Observers see the i_version as a 64-bit number that never decreases. If it * remains the same since it was last checked, then nothing has changed in the -- cgit v1.2.3 From a1175d6b1bdaf4f74eda47ab18eb44194f9cb796 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sun, 4 Dec 2016 09:29:46 -0500 Subject: vfs: plumb i_version handling into struct kstat The NFS server has a lot of special handling for different types of change attribute access, depending on the underlying filesystem. In most cases, it's doing a getattr anyway and then fetching that value after the fact. Rather that do that, add a new STATX_CHANGE_COOKIE flag that is a kernel-only symbol (for now). If requested and getattr can implement it, it can fill out this field. For IS_I_VERSION inodes, add a generic implementation in vfs_getattr_nosec. Take care to mask STATX_CHANGE_COOKIE off in requests from userland and in the result mask. Since not all filesystems can give the same guarantees of monotonicity, claim a STATX_ATTR_CHANGE_MONOTONIC flag that filesystems can set to indicate that they offer an i_version value that can never go backward. Eventually if we decide to make the i_version available to userland, we can just designate a field for it in struct statx, and move the STATX_CHANGE_COOKIE definition to the uapi header. Reviewed-by: NeilBrown Reviewed-by: Jan Kara Signed-off-by: Jeff Layton --- fs/stat.c | 17 +++++++++++++++-- include/linux/stat.h | 9 +++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/fs/stat.c b/fs/stat.c index d6cc74ca8486..f43afe0081fe 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -122,6 +123,11 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT | STATX_ATTR_DAX); + if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) { + stat->result_mask |= STATX_CHANGE_COOKIE; + stat->change_cookie = inode_query_iversion(inode); + } + mnt_userns = mnt_user_ns(path->mnt); if (inode->i_op->getattr) return inode->i_op->getattr(mnt_userns, path, stat, @@ -602,9 +608,11 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer) memset(&tmp, 0, sizeof(tmp)); - tmp.stx_mask = stat->result_mask; + /* STATX_CHANGE_COOKIE is kernel-only for now */ + tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE; tmp.stx_blksize = stat->blksize; - tmp.stx_attributes = stat->attributes; + /* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */ + tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC; tmp.stx_nlink = stat->nlink; tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid); tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid); @@ -643,6 +651,11 @@ int do_statx(int dfd, struct filename *filename, unsigned int flags, if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; + /* STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests + * from userland. + */ + mask &= ~STATX_CHANGE_COOKIE; + error = vfs_statx(dfd, filename, flags, &stat, mask); if (error) return error; diff --git a/include/linux/stat.h b/include/linux/stat.h index ff277ced50e9..52150570d37a 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -52,6 +52,15 @@ struct kstat { u64 mnt_id; u32 dio_mem_align; u32 dio_offset_align; + u64 change_cookie; }; +/* These definitions are internal to the kernel for now. Mainly used by nfsd. */ + +/* mask values */ +#define STATX_CHANGE_COOKIE 0x40000000U /* Want/got stx_change_attr */ + +/* file attribute values */ +#define STATX_ATTR_CHANGE_MONOTONIC 0x8000000000000000ULL /* version monotonically increases */ + #endif -- cgit v1.2.3 From 58a033c9a3e003e048a0431a296e58c6b363b02b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 5 Oct 2022 06:32:23 -0400 Subject: nfsd: remove fetch_iversion export operation Now that the i_version counter is reported in struct kstat, there is no need for this export operation. Acked-by: Chuck Lever Reviewed-by: NeilBrown Signed-off-by: Jeff Layton --- fs/nfs/export.c | 7 ------- fs/nfsd/nfsfh.c | 3 --- include/linux/exportfs.h | 1 - 3 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 01596f2d0a1e..1a9d5aa51dfb 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -145,17 +145,10 @@ out: return parent; } -static u64 nfs_fetch_iversion(struct inode *inode) -{ - nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); - return inode_peek_iversion_raw(inode); -} - const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, - .fetch_iversion = nfs_fetch_iversion, .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| EXPORT_OP_NOATOMIC_ATTR, diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3a01c8601712..76ea268dc420 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -778,11 +778,8 @@ u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode) { u64 chattr; - if (inode->i_sb->s_export_op->fetch_iversion) - return inode->i_sb->s_export_op->fetch_iversion(inode); if (stat->result_mask & STATX_CHANGE_COOKIE) { chattr = stat->change_cookie; - if (S_ISREG(inode->i_mode) && !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) { chattr += (u64)stat->ctime.tv_sec << 30; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fe848901fcc3..9f4d4bcbf251 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -213,7 +213,6 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); - u64 (*fetch_iversion)(struct inode *); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ -- cgit v1.2.3