diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 236 |
1 files changed, 177 insertions, 59 deletions
diff --git a/fs/inode.c b/fs/inode.c index 8fefb69e1f84..35fd688168c5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -16,7 +16,6 @@ #include <linux/fsnotify.h> #include <linux/mount.h> #include <linux/posix_acl.h> -#include <linux/prefetch.h> #include <linux/buffer_head.h> /* for inode_has_buffers */ #include <linux/ratelimit.h> #include <linux/list_lru.h> @@ -752,16 +751,11 @@ EXPORT_SYMBOL_GPL(evict_inodes); /** * invalidate_inodes - attempt to free all inodes on a superblock * @sb: superblock to operate on - * @kill_dirty: flag to guide handling of dirty inodes * - * Attempts to free all inodes for a given superblock. If there were any - * busy inodes return a non-zero value, else zero. - * If @kill_dirty is set, discard dirty inodes too, otherwise treat - * them as busy. + * Attempts to free all inodes (including dirty inodes) for a given superblock. */ -int invalidate_inodes(struct super_block *sb, bool kill_dirty) +void invalidate_inodes(struct super_block *sb) { - int busy = 0; struct inode *inode, *next; LIST_HEAD(dispose); @@ -773,14 +767,8 @@ again: spin_unlock(&inode->i_lock); continue; } - if (inode->i_state & I_DIRTY_ALL && !kill_dirty) { - spin_unlock(&inode->i_lock); - busy = 1; - continue; - } if (atomic_read(&inode->i_count)) { spin_unlock(&inode->i_lock); - busy = 1; continue; } @@ -798,8 +786,6 @@ again: spin_unlock(&sb->s_inode_list_lock); dispose_list(&dispose); - - return busy; } /* @@ -1041,8 +1027,6 @@ struct inode *new_inode(struct super_block *sb) { struct inode *inode; - spin_lock_prefetch(&sb->s_inode_list_lock); - inode = new_inode_pseudo(sb); if (inode) inode_sb_list_add(inode); @@ -1853,6 +1837,7 @@ EXPORT_SYMBOL(bmap); static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, struct timespec64 now) { + struct timespec64 ctime; if (!(mnt->mnt_flags & MNT_RELATIME)) return 1; @@ -1864,7 +1849,8 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, /* * Is ctime younger than or equal to atime? If yes, update atime: */ - if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0) + ctime = inode_get_ctime(inode); + if (timespec64_compare(&ctime, &inode->i_atime) >= 0) return 1; /* @@ -1879,29 +1865,76 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, return 0; } -int generic_update_time(struct inode *inode, struct timespec64 *time, int flags) +/** + * inode_update_timestamps - update the timestamps on the inode + * @inode: inode to be updated + * @flags: S_* flags that needed to be updated + * + * The update_time function is called when an inode's timestamps need to be + * updated for a read or write operation. This function handles updating the + * actual timestamps. It's up to the caller to ensure that the inode is marked + * dirty appropriately. + * + * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated, + * attempt to update all three of them. S_ATIME updates can be handled + * independently of the rest. + * + * Returns a set of S_* flags indicating which values changed. + */ +int inode_update_timestamps(struct inode *inode, int flags) { - int dirty_flags = 0; + int updated = 0; + struct timespec64 now; - if (flags & (S_ATIME | S_CTIME | S_MTIME)) { - if (flags & S_ATIME) - inode->i_atime = *time; - if (flags & S_CTIME) - inode->i_ctime = *time; - if (flags & S_MTIME) - inode->i_mtime = *time; - - if (inode->i_sb->s_flags & SB_LAZYTIME) - dirty_flags |= I_DIRTY_TIME; - else - dirty_flags |= I_DIRTY_SYNC; + if (flags & (S_MTIME|S_CTIME|S_VERSION)) { + struct timespec64 ctime = inode_get_ctime(inode); + + now = inode_set_ctime_current(inode); + if (!timespec64_equal(&now, &ctime)) + updated |= S_CTIME; + if (!timespec64_equal(&now, &inode->i_mtime)) { + inode->i_mtime = now; + updated |= S_MTIME; + } + if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated)) + updated |= S_VERSION; + } else { + now = current_time(inode); } - if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false)) - dirty_flags |= I_DIRTY_SYNC; + if (flags & S_ATIME) { + if (!timespec64_equal(&now, &inode->i_atime)) { + inode->i_atime = now; + updated |= S_ATIME; + } + } + return updated; +} +EXPORT_SYMBOL(inode_update_timestamps); +/** + * generic_update_time - update the timestamps on the inode + * @inode: inode to be updated + * @flags: S_* flags that needed to be updated + * + * The update_time function is called when an inode's timestamps need to be + * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME, + * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME + * updates can be handled done independently of the rest. + * + * Returns a S_* mask indicating which fields were updated. + */ +int generic_update_time(struct inode *inode, int flags) +{ + int updated = inode_update_timestamps(inode, flags); + int dirty_flags = 0; + + if (updated & (S_ATIME|S_MTIME|S_CTIME)) + dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC; + if (updated & S_VERSION) + dirty_flags |= I_DIRTY_SYNC; __mark_inode_dirty(inode, dirty_flags); - return 0; + return updated; } EXPORT_SYMBOL(generic_update_time); @@ -1909,11 +1942,12 @@ EXPORT_SYMBOL(generic_update_time); * This does the actual work of updating an inodes time or version. Must have * had called mnt_want_write() before calling this. */ -int inode_update_time(struct inode *inode, struct timespec64 *time, int flags) +int inode_update_time(struct inode *inode, int flags) { if (inode->i_op->update_time) - return inode->i_op->update_time(inode, time, flags); - return generic_update_time(inode, time, flags); + return inode->i_op->update_time(inode, flags); + generic_update_time(inode, flags); + return 0; } EXPORT_SYMBOL(inode_update_time); @@ -1965,7 +1999,6 @@ void touch_atime(const struct path *path) { struct vfsmount *mnt = path->mnt; struct inode *inode = d_inode(path->dentry); - struct timespec64 now; if (!atime_needs_update(path, inode)) return; @@ -1984,8 +2017,7 @@ void touch_atime(const struct path *path) * We may also fail on filesystems that have the ability to make parts * of the fs read only, e.g. subvolumes in Btrfs. */ - now = current_time(inode); - inode_update_time(inode, &now, S_ATIME); + inode_update_time(inode, S_ATIME); __mnt_drop_write(mnt); skip_update: sb_end_write(inode->i_sb); @@ -2070,18 +2102,63 @@ int file_remove_privs(struct file *file) } EXPORT_SYMBOL(file_remove_privs); -static int inode_needs_update_time(struct inode *inode, struct timespec64 *now) +/** + * current_mgtime - Return FS time (possibly fine-grained) + * @inode: inode. + * + * Return the current time truncated to the time granularity supported by + * the fs, as suitable for a ctime/mtime change. If the ctime is flagged + * as having been QUERIED, get a fine-grained timestamp. + */ +struct timespec64 current_mgtime(struct inode *inode) +{ + struct timespec64 now, ctime; + atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec; + long nsec = atomic_long_read(pnsec); + + if (nsec & I_CTIME_QUERIED) { + ktime_get_real_ts64(&now); + return timestamp_truncate(now, inode); + } + + ktime_get_coarse_real_ts64(&now); + now = timestamp_truncate(now, inode); + + /* + * If we've recently fetched a fine-grained timestamp + * then the coarse-grained one may still be earlier than the + * existing ctime. Just keep the existing value if so. + */ + ctime = inode_get_ctime(inode); + if (timespec64_compare(&ctime, &now) > 0) + now = ctime; + + return now; +} +EXPORT_SYMBOL(current_mgtime); + +static struct timespec64 current_ctime(struct inode *inode) +{ + if (is_mgtime(inode)) + return current_mgtime(inode); + return current_time(inode); +} + +static int inode_needs_update_time(struct inode *inode) { int sync_it = 0; + struct timespec64 now = current_ctime(inode); + struct timespec64 ctime; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) return 0; - if (!timespec64_equal(&inode->i_mtime, now)) + if (!timespec64_equal(&inode->i_mtime, &now)) sync_it = S_MTIME; - if (!timespec64_equal(&inode->i_ctime, now)) + ctime = inode_get_ctime(inode); + if (!timespec64_equal(&ctime, &now)) sync_it |= S_CTIME; if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) @@ -2090,15 +2167,14 @@ static int inode_needs_update_time(struct inode *inode, struct timespec64 *now) return sync_it; } -static int __file_update_time(struct file *file, struct timespec64 *now, - int sync_mode) +static int __file_update_time(struct file *file, int sync_mode) { int ret = 0; struct inode *inode = file_inode(file); /* try to update time settings */ if (!__mnt_want_write_file(file)) { - ret = inode_update_time(inode, now, sync_mode); + ret = inode_update_time(inode, sync_mode); __mnt_drop_write_file(file); } @@ -2123,13 +2199,12 @@ int file_update_time(struct file *file) { int ret; struct inode *inode = file_inode(file); - struct timespec64 now = current_time(inode); - ret = inode_needs_update_time(inode, &now); + ret = inode_needs_update_time(inode); if (ret <= 0) return ret; - return __file_update_time(file, &now, ret); + return __file_update_time(file, ret); } EXPORT_SYMBOL(file_update_time); @@ -2152,7 +2227,6 @@ static int file_modified_flags(struct file *file, int flags) { int ret; struct inode *inode = file_inode(file); - struct timespec64 now = current_time(inode); /* * Clear the security bits if the process is not being run by root. @@ -2165,13 +2239,13 @@ static int file_modified_flags(struct file *file, int flags) if (unlikely(file->f_mode & FMODE_NOCMTIME)) return 0; - ret = inode_needs_update_time(inode, &now); + ret = inode_needs_update_time(inode); if (ret <= 0) return ret; if (flags & IOCB_NOWAIT) return -EAGAIN; - return __file_update_time(file, &now, ret); + return __file_update_time(file, ret); } /** @@ -2491,15 +2565,59 @@ struct timespec64 current_time(struct inode *inode) struct timespec64 now; ktime_get_coarse_real_ts64(&now); + return timestamp_truncate(now, inode); +} +EXPORT_SYMBOL(current_time); - if (unlikely(!inode->i_sb)) { - WARN(1, "current_time() called with uninitialized super_block in the inode"); +/** + * inode_set_ctime_current - set the ctime to current_time + * @inode: inode + * + * Set the inode->i_ctime to the current value for the inode. Returns + * the current value that was assigned to i_ctime. + */ +struct timespec64 inode_set_ctime_current(struct inode *inode) +{ + struct timespec64 now; + struct timespec64 ctime; + + ctime.tv_nsec = READ_ONCE(inode->__i_ctime.tv_nsec); + if (!(ctime.tv_nsec & I_CTIME_QUERIED)) { + now = current_time(inode); + + /* Just copy it into place if it's not multigrain */ + if (!is_mgtime(inode)) { + inode_set_ctime_to_ts(inode, now); + return now; + } + + /* + * If we've recently updated with a fine-grained timestamp, + * then the coarse-grained one may still be earlier than the + * existing ctime. Just keep the existing value if so. + */ + ctime.tv_sec = inode->__i_ctime.tv_sec; + if (timespec64_compare(&ctime, &now) > 0) + return ctime; + + /* + * Ctime updates are usually protected by the inode_lock, but + * we can still race with someone setting the QUERIED flag. + * Try to swap the new nsec value into place. If it's changed + * in the interim, then just go with a fine-grained timestamp. + */ + if (cmpxchg(&inode->__i_ctime.tv_nsec, ctime.tv_nsec, + now.tv_nsec) != ctime.tv_nsec) + goto fine_grained; + inode->__i_ctime.tv_sec = now.tv_sec; return now; } - - return timestamp_truncate(now, inode); +fine_grained: + ktime_get_real_ts64(&now); + inode_set_ctime_to_ts(inode, timestamp_truncate(now, inode)); + return now; } -EXPORT_SYMBOL(current_time); +EXPORT_SYMBOL(inode_set_ctime_current); /** * in_group_or_capable - check whether caller is CAP_FSETID privileged |