From 0ecee66990644c3482209ff7c12faa7bc40449b1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 16 Oct 2019 19:48:14 -0700 Subject: fs/namespace.c: fix use-after-free of mount in mnt_warn_timestamp_expiry() After do_add_mount() returns success, the caller doesn't hold a reference to the 'struct mount' anymore. So it's invalid to access it in mnt_warn_timestamp_expiry(). Fix it by calling mnt_warn_timestamp_expiry() before do_add_mount() rather than after, and adjusting the warning message accordingly. Reported-by: syzbot+da4f525235510683d855@syzkaller.appspotmail.com Fixes: f8b92ba67c5d ("mount: Add mount warning for impending timestamp expiry") Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- fs/namespace.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index fe0e9e1410fe..2adfe7b166a3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2478,8 +2478,10 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount * time64_to_tm(sb->s_time_max, 0, &tm); - pr_warn("Mounted %s file system at %s supports timestamps until %04ld (0x%llx)\n", - sb->s_type->name, mntpath, + pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n", + sb->s_type->name, + is_mounted(mnt) ? "remounted" : "mounted", + mntpath, tm.tm_year+1900, (unsigned long long)sb->s_time_max); free_page((unsigned long)buf); @@ -2764,14 +2766,11 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, if (IS_ERR(mnt)) return PTR_ERR(mnt); - error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags); - if (error < 0) { - mntput(mnt); - return error; - } - mnt_warn_timestamp_expiry(mountpoint, mnt); + error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags); + if (error < 0) + mntput(mnt); return error; } -- cgit v1.2.3 From 97eba80fcca754856d09e048f469db22773bec68 Mon Sep 17 00:00:00 2001 From: Guillem Jover Date: Wed, 21 Aug 2019 05:38:20 +0200 Subject: aio: Fix io_pgetevents() struct __compat_aio_sigset layout This type is used to pass the sigset_t from userland to the kernel, but it was using the kernel native pointer type for the member representing the compat userland pointer to the userland sigset_t. This messes up the layout, and makes the kernel eat up both the userland pointer and the size members into the kernel pointer, and then reads garbage into the kernel sigsetsize. Which makes the sigset_t size consistency check fail, and consequently the syscall always returns -EINVAL. This breaks both libaio and strace on 32-bit userland running on 64-bit kernels. And there are apparently no users in the wild of the current broken layout (at least according to codesearch.debian.org and a brief check over github.com search). So it looks safe to fix this directly in the kernel, instead of either letting userland deal with this permanently with the additional overhead or trying to make the syscall infer what layout userland used, even though this is also being worked around in libaio to temporarily cope with kernels that have not yet been fixed. We use a proper compat_uptr_t instead of a compat_sigset_t pointer. Fixes: 7a074e96dee6 ("aio: implement io_pgetevents") Signed-off-by: Guillem Jover Signed-off-by: Al Viro --- fs/aio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 01e0fb9ae45a..0d9a559d488c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -2179,7 +2179,7 @@ SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id, #ifdef CONFIG_COMPAT struct __compat_aio_sigset { - compat_sigset_t __user *sigmask; + compat_uptr_t sigmask; compat_size_t sigsetsize; }; @@ -2193,7 +2193,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, struct old_timespec32 __user *, timeout, const struct __compat_aio_sigset __user *, usig) { - struct __compat_aio_sigset ksig = { NULL, }; + struct __compat_aio_sigset ksig = { 0, }; struct timespec64 t; bool interrupted; int ret; @@ -2204,7 +2204,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize); + ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize); if (ret) return ret; @@ -2228,7 +2228,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, struct __kernel_timespec __user *, timeout, const struct __compat_aio_sigset __user *, usig) { - struct __compat_aio_sigset ksig = { NULL, }; + struct __compat_aio_sigset ksig = { 0, }; struct timespec64 t; bool interrupted; int ret; @@ -2239,7 +2239,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64, if (usig && copy_from_user(&ksig, usig, sizeof(ksig))) return -EFAULT; - ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize); + ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize); if (ret) return ret; -- cgit v1.2.3 From 03ad0d703df75c43f78bd72e16124b5b94a95188 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 25 Oct 2019 00:03:11 -0400 Subject: autofs: fix a leak in autofs_expire_indirect() if the second call of should_expire() in there ends up grabbing and returning a new reference to dentry, we need to drop it before continuing. Signed-off-by: Al Viro --- fs/autofs/expire.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c index 2866fabf497f..91f5787dae7c 100644 --- a/fs/autofs/expire.c +++ b/fs/autofs/expire.c @@ -459,9 +459,10 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb, */ how &= ~AUTOFS_EXP_LEAVES; found = should_expire(expired, mnt, timeout, how); - if (!found || found != expired) - /* Something has changed, continue */ + if (found != expired) { // something has changed, continue + dput(found); goto next; + } if (expired != dentry) dput(dentry); -- cgit v1.2.3 From 630faf81b3e61bcc90dc6d8b497800657d2752a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Nov 2019 11:53:27 -0500 Subject: cgroup: don't put ERR_PTR() into fc->root the caller of ->get_tree() expects NULL left there on error... Reported-by: Thibaut Sautereau Signed-off-by: Al Viro --- kernel/cgroup/cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 080561bb8a4b..ef4242e5d4bc 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2119,11 +2119,12 @@ int cgroup_do_get_tree(struct fs_context *fc) nsdentry = kernfs_node_dentry(cgrp->kn, sb); dput(fc->root); - fc->root = nsdentry; if (IS_ERR(nsdentry)) { - ret = PTR_ERR(nsdentry); deactivate_locked_super(sb); + ret = PTR_ERR(nsdentry); + nsdentry = NULL; } + fc->root = nsdentry; } if (!ctx->kfc.new_sb_created) -- cgit v1.2.3 From a2ece088882666e1dc7113744ac912eb161e3f87 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Nov 2019 22:08:29 -0500 Subject: exportfs_decode_fh(): negative pinned may become positive without the parent locked Signed-off-by: Al Viro --- fs/exportfs/expfs.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 09bc68708d28..2dd55b172d57 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -519,26 +519,33 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * inode is actually connected to the parent. */ err = exportfs_get_name(mnt, target_dir, nbuf, result); - if (!err) { - inode_lock(target_dir->d_inode); - nresult = lookup_one_len(nbuf, target_dir, - strlen(nbuf)); - inode_unlock(target_dir->d_inode); - if (!IS_ERR(nresult)) { - if (nresult->d_inode) { - dput(result); - result = nresult; - } else - dput(nresult); - } + if (err) { + dput(target_dir); + goto err_result; } + inode_lock(target_dir->d_inode); + nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); + if (!IS_ERR(nresult)) { + if (unlikely(nresult->d_inode != result->d_inode)) { + dput(nresult); + nresult = ERR_PTR(-ESTALE); + } + } + inode_unlock(target_dir->d_inode); /* * At this point we are done with the parent, but it's pinned * by the child dentry anyway. */ dput(target_dir); + if (IS_ERR(nresult)) { + err = PTR_ERR(nresult); + goto err_result; + } + dput(result); + result = nresult; + /* * And finally make sure the dentry is actually acceptable * to NFSD. -- cgit v1.2.3 From 69924b89687a2923e88cc42144aea27868913d0e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 Nov 2019 13:11:41 -0400 Subject: audit_get_nd(): don't unlock parent too early if the child has been negative and just went positive under us, we want coherent d_is_positive() and ->d_inode. Don't unlock the parent until we'd done that work... Signed-off-by: Al Viro --- kernel/audit_watch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 1f31c2f1e6fc..4508d5e0cf69 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -351,12 +351,12 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d = kern_path_locked(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - inode_unlock(d_backing_inode(parent->dentry)); if (d_is_positive(d)) { /* update watch filter fields */ watch->dev = d->d_sb->s_dev; watch->ino = d_backing_inode(d)->i_ino; } + inode_unlock(d_backing_inode(parent->dentry)); dput(d); return 0; } -- cgit v1.2.3 From bcf0d9d4b76976f892154efdfc509b256fd898e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Nov 2019 12:07:15 -0500 Subject: ecryptfs: fix unlink and rmdir in face of underlying fs modifications A problem similar to the one caught in commit 74dd7c97ea2a ("ecryptfs_rename(): verify that lower dentries are still OK after lock_rename()") exists for unlink/rmdir as well. Instead of playing with dget_parent() of underlying dentry of victim and hoping it's the same as underlying dentry of our directory, do the following: * find the underlying dentry of victim * find the underlying directory of victim's parent (stable since the victim is ecryptfs dentry and inode of its parent is held exclusive by the caller). * lock the inode of dentry underlying the victim's parent * check that underlying dentry of victim is still hashed and has the right parent - it can be moved, but it can't be moved to/from the directory we are holding exclusive. So while ->d_parent itself might not be stable, the result of comparison is. If the check passes, everything is fine - underlying directory is locked, underlying victim is still a child of that directory and we can go ahead and feed them to vfs_unlink(). As in the current mainline we need to pin the underlying dentry of victim, so that it wouldn't go negative under us, but that's the only temporary reference that needs to be grabbed there. Underlying dentry of parent won't go away (it's pinned by the parent, which is held by caller), so there's no need to grab it. The same problem (with the same solution) exists for rmdir. Moreover, rename gets simpler and more robust with the same "don't bother with dget_parent()" approach. Fixes: 74dd7c97ea2 "ecryptfs_rename(): verify that lower dentries are still OK after lock_rename()" Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 65 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 18426f4855f1..a905d5f4f3b0 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -128,13 +128,20 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, struct inode *inode) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); struct dentry *lower_dir_dentry; + struct inode *lower_dir_inode; int rc; - dget(lower_dentry); - lower_dir_dentry = lock_parent(lower_dentry); - rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL); + lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); + lower_dir_inode = d_inode(lower_dir_dentry); + inode_lock_nested(lower_dir_inode, I_MUTEX_PARENT); + dget(lower_dentry); // don't even try to make the lower negative + if (lower_dentry->d_parent != lower_dir_dentry) + rc = -EINVAL; + else if (d_unhashed(lower_dentry)) + rc = -EINVAL; + else + rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL); if (rc) { printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); goto out_unlock; @@ -142,10 +149,11 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, fsstack_copy_attr_times(dir, lower_dir_inode); set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink); inode->i_ctime = dir->i_ctime; - d_drop(dentry); out_unlock: - unlock_dir(lower_dir_dentry); dput(lower_dentry); + inode_unlock(lower_dir_inode); + if (!rc) + d_drop(dentry); return rc; } @@ -512,22 +520,30 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) { struct dentry *lower_dentry; struct dentry *lower_dir_dentry; + struct inode *lower_dir_inode; int rc; lower_dentry = ecryptfs_dentry_to_lower(dentry); - dget(dentry); - lower_dir_dentry = lock_parent(lower_dentry); - dget(lower_dentry); - rc = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry); - dput(lower_dentry); - if (!rc && d_really_is_positive(dentry)) + lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent); + lower_dir_inode = d_inode(lower_dir_dentry); + + inode_lock_nested(lower_dir_inode, I_MUTEX_PARENT); + dget(lower_dentry); // don't even try to make the lower negative + if (lower_dentry->d_parent != lower_dir_dentry) + rc = -EINVAL; + else if (d_unhashed(lower_dentry)) + rc = -EINVAL; + else + rc = vfs_rmdir(lower_dir_inode, lower_dentry); + if (!rc) { clear_nlink(d_inode(dentry)); - fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry)); - set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink); - unlock_dir(lower_dir_dentry); + fsstack_copy_attr_times(dir, lower_dir_inode); + set_nlink(dir, lower_dir_inode->i_nlink); + } + dput(lower_dentry); + inode_unlock(lower_dir_inode); if (!rc) d_drop(dentry); - dput(dentry); return rc; } @@ -565,20 +581,22 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_new_dentry; struct dentry *lower_old_dir_dentry; struct dentry *lower_new_dir_dentry; - struct dentry *trap = NULL; + struct dentry *trap; struct inode *target_inode; if (flags) return -EINVAL; + lower_old_dir_dentry = ecryptfs_dentry_to_lower(old_dentry->d_parent); + lower_new_dir_dentry = ecryptfs_dentry_to_lower(new_dentry->d_parent); + lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); - dget(lower_old_dentry); - dget(lower_new_dentry); - lower_old_dir_dentry = dget_parent(lower_old_dentry); - lower_new_dir_dentry = dget_parent(lower_new_dentry); + target_inode = d_inode(new_dentry); + trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); + dget(lower_new_dentry); rc = -EINVAL; if (lower_old_dentry->d_parent != lower_old_dir_dentry) goto out_lock; @@ -606,11 +624,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (new_dir != old_dir) fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry)); out_lock: - unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); - dput(lower_new_dir_dentry); - dput(lower_old_dir_dentry); dput(lower_new_dentry); - dput(lower_old_dentry); + unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); return rc; } -- cgit v1.2.3 From e72b9dd6a5f17d0fb51f16f8685f3004361e83d0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Nov 2019 13:45:04 -0500 Subject: ecryptfs_lookup_interpose(): lower_dentry->d_inode is not stable lower_dentry can't go from positive to negative (we have it pinned), but it *can* go from negative to positive. So fetching ->d_inode into a local variable, doing a blocking allocation, checking that now ->d_inode is non-NULL and feeding the value we'd fetched earlier to a function that won't accept NULL is not a good idea. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a905d5f4f3b0..3c2298721359 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -319,7 +319,7 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, struct dentry *lower_dentry) { - struct inode *inode, *lower_inode = d_inode(lower_dentry); + struct inode *inode, *lower_inode; struct ecryptfs_dentry_info *dentry_info; struct vfsmount *lower_mnt; int rc = 0; @@ -339,7 +339,15 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, dentry_info->lower_path.mnt = lower_mnt; dentry_info->lower_path.dentry = lower_dentry; - if (d_really_is_negative(lower_dentry)) { + /* + * negative dentry can go positive under us here - its parent is not + * locked. That's OK and that could happen just as we return from + * ecryptfs_lookup() anyway. Just need to be careful and fetch + * ->d_inode only once - it's not stable here. + */ + lower_inode = READ_ONCE(lower_dentry->d_inode); + + if (!lower_inode) { /* We want to add because we couldn't find in lower */ d_add(dentry, NULL); return NULL; -- cgit v1.2.3 From 762c69685ff7ad5ad7fee0656671e20a0c9c864d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Nov 2019 13:55:43 -0500 Subject: ecryptfs_lookup_interpose(): lower_dentry->d_parent is not stable either We need to get the underlying dentry of parent; sure, absent the races it is the parent of underlying dentry, but there's nothing to prevent losing a timeslice to preemtion in the middle of evaluation of lower_dentry->d_parent->d_inode, having another process move lower_dentry around and have its (ex)parent not pinned anymore and freed on memory pressure. Then we regain CPU and try to fetch ->d_inode from memory that is freed by that point. dentry->d_parent *is* stable here - it's an argument of ->lookup() and we are guaranteed that it won't be moved anywhere until we feed it to d_add/d_splice_alias. So we safely go that way to get to its underlying dentry. Cc: stable@vger.kernel.org # since 2009 or so Signed-off-by: Al Viro --- fs/ecryptfs/inode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 3c2298721359..e23752d9a79f 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -319,9 +319,9 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, struct dentry *lower_dentry) { + struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent); struct inode *inode, *lower_inode; struct ecryptfs_dentry_info *dentry_info; - struct vfsmount *lower_mnt; int rc = 0; dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); @@ -330,13 +330,12 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, return ERR_PTR(-ENOMEM); } - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); fsstack_copy_attr_atime(d_inode(dentry->d_parent), - d_inode(lower_dentry->d_parent)); + d_inode(path->dentry)); BUG_ON(!d_count(lower_dentry)); ecryptfs_set_dentry_private(dentry, dentry_info); - dentry_info->lower_path.mnt = lower_mnt; + dentry_info->lower_path.mnt = mntget(path->mnt); dentry_info->lower_path.dentry = lower_dentry; /* -- cgit v1.2.3