diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 214 |
1 files changed, 162 insertions, 52 deletions
diff --git a/fs/inode.c b/fs/inode.c index bd4da9c5207e..6462276dfdf0 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -422,6 +422,7 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->i_io_list); INIT_LIST_HEAD(&inode->i_wb_list); INIT_LIST_HEAD(&inode->i_lru); + INIT_LIST_HEAD(&inode->i_sb_list); __address_space_init_once(&inode->i_data); i_size_ordered_init(inode); } @@ -604,7 +605,7 @@ void clear_inode(struct inode *inode) { /* * We have to cycle the i_pages lock here because reclaim can be in the - * process of removing the last page (in __delete_from_page_cache()) + * process of removing the last page (in __filemap_remove_folio()) * and we must not free the mapping under it. */ xa_lock_irq(&inode->i_data.i_pages); @@ -1021,7 +1022,6 @@ struct inode *new_inode_pseudo(struct super_block *sb) spin_lock(&inode->i_lock); inode->i_state = 0; spin_unlock(&inode->i_lock); - INIT_LIST_HEAD(&inode->i_sb_list); } return inode; } @@ -1165,7 +1165,6 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, { struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); struct inode *old; - bool creating = inode->i_state & I_CREATING; again: spin_lock(&inode_hash_lock); @@ -1199,7 +1198,12 @@ again: inode->i_state |= I_NEW; hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); - if (!creating) + + /* + * Add inode to the sb list if it's not already. It has I_NEW at this + * point, so it should be safe to test i_sb_list locklessly. + */ + if (list_empty(&inode->i_sb_list)) inode_sb_list_add(inode); unlock: spin_unlock(&inode_hash_lock); @@ -2010,67 +2014,57 @@ static int __remove_privs(struct user_namespace *mnt_userns, return notify_change(mnt_userns, dentry, &newattrs, NULL); } -/* - * Remove special file priviledges (suid, capabilities) when file is written - * to or truncated. - */ -int file_remove_privs(struct file *file) +static int __file_remove_privs(struct file *file, unsigned int flags) { struct dentry *dentry = file_dentry(file); struct inode *inode = file_inode(file); + int error; int kill; - int error = 0; - /* - * Fast path for nothing security related. - * As well for non-regular files, e.g. blkdev inodes. - * For example, blkdev_write_iter() might get here - * trying to remove privs which it is not allowed to. - */ if (IS_NOSEC(inode) || !S_ISREG(inode->i_mode)) return 0; kill = dentry_needs_remove_privs(dentry); - if (kill < 0) + if (kill <= 0) return kill; - if (kill) - error = __remove_privs(file_mnt_user_ns(file), dentry, kill); + + if (flags & IOCB_NOWAIT) + return -EAGAIN; + + error = __remove_privs(file_mnt_user_ns(file), dentry, kill); if (!error) inode_has_no_xattr(inode); return error; } -EXPORT_SYMBOL(file_remove_privs); /** - * file_update_time - update mtime and ctime time - * @file: file accessed + * file_remove_privs - remove special file privileges (suid, capabilities) + * @file: file to remove privileges from + * + * When file is modified by a write or truncation ensure that special + * file privileges are removed. * - * Update the mtime and ctime members of an inode and mark the inode - * for writeback. Note that this function is meant exclusively for - * usage in the file write path of filesystems, and filesystems may - * choose to explicitly ignore update via this function with the - * S_NOCMTIME inode flag, e.g. for network filesystem where these - * timestamps are handled by the server. This can return an error for - * file systems who need to allocate space in order to update an inode. + * Return: 0 on success, negative errno on failure. */ +int file_remove_privs(struct file *file) +{ + return __file_remove_privs(file, 0); +} +EXPORT_SYMBOL(file_remove_privs); -int file_update_time(struct file *file) +static int inode_needs_update_time(struct inode *inode, struct timespec64 *now) { - struct inode *inode = file_inode(file); - struct timespec64 now; int sync_it = 0; - int ret; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) return 0; - now = current_time(inode); - if (!timespec64_equal(&inode->i_mtime, &now)) + if (!timespec64_equal(&inode->i_mtime, now)) sync_it = S_MTIME; - if (!timespec64_equal(&inode->i_ctime, &now)) + if (!timespec64_equal(&inode->i_ctime, now)) sync_it |= S_CTIME; if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) @@ -2079,37 +2073,127 @@ int file_update_time(struct file *file) if (!sync_it) return 0; - /* Finally allowed to write? Takes lock. */ - if (__mnt_want_write_file(file)) - return 0; + return sync_it; +} - ret = inode_update_time(inode, &now, sync_it); - __mnt_drop_write_file(file); +static int __file_update_time(struct file *file, struct timespec64 *now, + int sync_mode) +{ + int ret = 0; + struct inode *inode = file_inode(file); + + /* try to update time settings */ + if (!__mnt_want_write_file(file)) { + ret = inode_update_time(inode, now, sync_mode); + __mnt_drop_write_file(file); + } return ret; } + +/** + * file_update_time - update mtime and ctime time + * @file: file accessed + * + * Update the mtime and ctime members of an inode and mark the inode for + * writeback. Note that this function is meant exclusively for usage in + * the file write path of filesystems, and filesystems may choose to + * explicitly ignore updates via this function with the _NOCMTIME inode + * flag, e.g. for network filesystem where these imestamps are handled + * by the server. This can return an error for file systems who need to + * allocate space in order to update an inode. + * + * Return: 0 on success, negative errno on failure. + */ +int file_update_time(struct file *file) +{ + int ret; + struct inode *inode = file_inode(file); + struct timespec64 now = current_time(inode); + + ret = inode_needs_update_time(inode, &now); + if (ret <= 0) + return ret; + + return __file_update_time(file, &now, ret); +} EXPORT_SYMBOL(file_update_time); -/* Caller must hold the file's inode lock */ -int file_modified(struct file *file) +/** + * file_modified_flags - handle mandated vfs changes when modifying a file + * @file: file that was modified + * @flags: kiocb flags + * + * When file has been modified ensure that special + * file privileges are removed and time settings are updated. + * + * If IOCB_NOWAIT is set, special file privileges will not be removed and + * time settings will not be updated. It will return -EAGAIN. + * + * Context: Caller must hold the file's inode lock. + * + * Return: 0 on success, negative errno on failure. + */ +static int file_modified_flags(struct file *file, int flags) { - int err; + int ret; + struct inode *inode = file_inode(file); + struct timespec64 now = current_time(inode); /* * Clear the security bits if the process is not being run by root. * This keeps people from modifying setuid and setgid binaries. */ - err = file_remove_privs(file); - if (err) - return err; + ret = __file_remove_privs(file, flags); + if (ret) + return ret; if (unlikely(file->f_mode & FMODE_NOCMTIME)) return 0; - return file_update_time(file); + ret = inode_needs_update_time(inode, &now); + if (ret <= 0) + return ret; + if (flags & IOCB_NOWAIT) + return -EAGAIN; + + return __file_update_time(file, &now, ret); +} + +/** + * file_modified - handle mandated vfs changes when modifying a file + * @file: file that was modified + * + * When file has been modified ensure that special + * file privileges are removed and time settings are updated. + * + * Context: Caller must hold the file's inode lock. + * + * Return: 0 on success, negative errno on failure. + */ +int file_modified(struct file *file) +{ + return file_modified_flags(file, 0); } EXPORT_SYMBOL(file_modified); +/** + * kiocb_modified - handle mandated vfs changes when modifying a file + * @iocb: iocb that was modified + * + * When file has been modified ensure that special + * file privileges are removed and time settings are updated. + * + * Context: Caller must hold the file's inode lock. + * + * Return: 0 on success, negative errno on failure. + */ +int kiocb_modified(struct kiocb *iocb) +{ + return file_modified_flags(iocb->ki_filp, iocb->ki_flags); +} +EXPORT_SYMBOL_GPL(kiocb_modified); + int inode_needs_sync(struct inode *inode) { if (IS_SYNC(inode)) @@ -2246,10 +2330,6 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode, /* Directories are special, and always inherit S_ISGID */ if (S_ISDIR(mode)) mode |= S_ISGID; - else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) && - !in_group_p(i_gid_into_mnt(mnt_userns, dir)) && - !capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID)) - mode &= ~S_ISGID; } else inode_fsgid_set(inode, mnt_userns); inode->i_mode = mode; @@ -2405,3 +2485,33 @@ struct timespec64 current_time(struct inode *inode) return timestamp_truncate(now, inode); } EXPORT_SYMBOL(current_time); + +/** + * mode_strip_sgid - handle the sgid bit for non-directories + * @mnt_userns: User namespace of the mount the inode was created from + * @dir: parent directory inode + * @mode: mode of the file to be created in @dir + * + * If the @mode of the new file has both the S_ISGID and S_IXGRP bit + * raised and @dir has the S_ISGID bit raised ensure that the caller is + * either in the group of the parent directory or they have CAP_FSETID + * in their user namespace and are privileged over the parent directory. + * In all other cases, strip the S_ISGID bit from @mode. + * + * Return: the new mode to use for the file + */ +umode_t mode_strip_sgid(struct user_namespace *mnt_userns, + const struct inode *dir, umode_t mode) +{ + if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP)) + return mode; + if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID)) + return mode; + if (in_group_p(i_gid_into_mnt(mnt_userns, dir))) + return mode; + if (capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID)) + return mode; + + return mode & ~S_ISGID; +} +EXPORT_SYMBOL(mode_strip_sgid); |