diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2020-03-25 18:07:09 +0300 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2020-03-25 18:07:09 +0300 |
commit | 4b871ce26ab2c758ea90b8dd659e4267aae9e943 (patch) | |
tree | 1daa6f09d76a01c93cb59372e85e017a833ca5c9 /fs/exec.c | |
parent | a0d4a141750df51135499f96c355c4d76add5505 (diff) | |
parent | 501f9328bf5c6b5e4863da4b50e0e86792de3aa9 (diff) | |
download | linux-4b871ce26ab2c758ea90b8dd659e4267aae9e943.tar.xz |
Merged 'Infrastructure to allow fixing exec deadlocks' from Bernd Edlinger
This is an infrastructure change that makes way for fixing this issue.
Each patch was already posted previously so this is just a cleanup of
the original mailing list thread(s) which got out of control by now.
Everything started here:
https://lore.kernel.org/lkml/AM6PR03MB5170B06F3A2B75EFB98D071AE4E60@AM6PR03MB5170.eurprd03.prod.outlook.com/
I added reviewed-by tags from the mailing list threads, except when
withdrawn.
It took a lot longer than expected to collect everything from the
mailinglist threads, since several commit messages have been infected
with typos, and they got fixed without a new patch version.
- Correct the point of no return.
- Add two new mutexes to replace cred_guard_mutex.
- Fix each use of cred_guard_mutex.
- Update documentation.
- Add a test case.
-- EWB Removed the last 2 patches they need more work
Bernd Edlinger (9):
exec: Fix a deadlock in strace
selftests/ptrace: add test cases for dead-locks
mm: docs: Fix a comment in process_vm_rw_core
kernel: doc: remove outdated comment cred.c
kernel/kcmp.c: Use new infrastructure to fix deadlocks in execve
proc: Use new infrastructure to fix deadlocks in execve
proc: io_accounting: Use new infrastructure to fix deadlocks in execve
perf: Use new infrastructure to fix deadlocks in execve
pidfd: Use new infrastructure to fix deadlocks in execve
Eric W. Biederman (5):
exec: Only compute current once in flush_old_exec
exec: Factor unshare_sighand out of de_thread and call it separately
exec: Move cleanup of posix timers on exec out of de_thread
exec: Move exec_mmap right after de_thread in flush_old_exec
exec: Add exec_update_mutex to replace cred_guard_mutex
fs/exec.c | 78 +++++++++++++++++++---------
fs/proc/base.c | 10 ++--
include/linux/binfmts.h | 8 ++-
include/linux/sched/signal.h | 9 +++-
init/init_task.c | 1 +
kernel/cred.c | 2 -
kernel/events/core.c | 12 ++---
kernel/fork.c | 5 +-
kernel/kcmp.c | 8 +--
kernel/pid.c | 4 +-
mm/process_vm_access.c | 2 +-
tools/testing/selftests/ptrace/Makefile | 4 +-
tools/testing/selftests/ptrace/vmaccess.c | 86 +++++++++++++++++++++++++++++++
13 files changed, 179 insertions(+), 50 deletions(-)
Signed-off-by: Bernd Edlinger <bernd.edlinger@hotmail.de>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Diffstat (limited to 'fs/exec.c')
-rw-r--r-- | fs/exec.c | 78 |
1 files changed, 54 insertions, 24 deletions
diff --git a/fs/exec.c b/fs/exec.c index db17be51b112..0e46ec57fe0a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1010,16 +1010,26 @@ ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) } EXPORT_SYMBOL(read_code); +/* + * Maps the mm_struct mm into the current task struct. + * On success, this function returns with the mutex + * exec_update_mutex locked. + */ static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; struct mm_struct *old_mm, *active_mm; + int ret; /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; exec_mm_release(tsk, old_mm); + ret = mutex_lock_killable(&tsk->signal->exec_update_mutex); + if (ret) + return ret; + if (old_mm) { sync_mm_rss(old_mm); /* @@ -1031,9 +1041,11 @@ static int exec_mmap(struct mm_struct *mm) down_read(&old_mm->mmap_sem); if (unlikely(old_mm->core_state)) { up_read(&old_mm->mmap_sem); + mutex_unlock(&tsk->signal->exec_update_mutex); return -EINTR; } } + task_lock(tsk); active_mm = tsk->active_mm; membarrier_exec_mmap(mm); @@ -1189,10 +1201,22 @@ no_thread_group: /* we have changed execution domain */ tsk->exit_signal = SIGCHLD; -#ifdef CONFIG_POSIX_TIMERS - exit_itimers(sig); - flush_itimer_signals(); -#endif + BUG_ON(!thread_group_leader(tsk)); + return 0; + +killed: + /* protects against exit_notify() and __exit_signal() */ + read_lock(&tasklist_lock); + sig->group_exit_task = NULL; + sig->notify_count = 0; + read_unlock(&tasklist_lock); + return -EAGAIN; +} + + +static int unshare_sighand(struct task_struct *me) +{ + struct sighand_struct *oldsighand = me->sighand; if (refcount_read(&oldsighand->count) != 1) { struct sighand_struct *newsighand; @@ -1210,23 +1234,13 @@ no_thread_group: write_lock_irq(&tasklist_lock); spin_lock(&oldsighand->siglock); - rcu_assign_pointer(tsk->sighand, newsighand); + rcu_assign_pointer(me->sighand, newsighand); spin_unlock(&oldsighand->siglock); write_unlock_irq(&tasklist_lock); __cleanup_sighand(oldsighand); } - - BUG_ON(!thread_group_leader(tsk)); return 0; - -killed: - /* protects against exit_notify() and __exit_signal() */ - read_lock(&tasklist_lock); - sig->group_exit_task = NULL; - sig->notify_count = 0; - read_unlock(&tasklist_lock); - return -EAGAIN; } char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk) @@ -1260,13 +1274,13 @@ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) */ int flush_old_exec(struct linux_binprm * bprm) { + struct task_struct *me = current; int retval; /* - * Make sure we have a private signal table and that - * we are unassociated from the previous thread group. + * Make this the only thread in the thread group. */ - retval = de_thread(current); + retval = de_thread(me); if (retval) goto out; @@ -1286,18 +1300,31 @@ int flush_old_exec(struct linux_binprm * bprm) goto out; /* - * After clearing bprm->mm (to mark that current is using the - * prepared mm now), we have nothing left of the original + * After setting bprm->called_exec_mmap (to mark that current is + * using the prepared mm now), we have nothing left of the original * process. If anything from here on returns an error, the check * in search_binary_handler() will SEGV current. */ + bprm->called_exec_mmap = 1; bprm->mm = NULL; +#ifdef CONFIG_POSIX_TIMERS + exit_itimers(me->signal); + flush_itimer_signals(); +#endif + + /* + * Make the signal table private. + */ + retval = unshare_sighand(me); + if (retval) + goto out; + set_fs(USER_DS); - current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | + me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE | PF_NO_SETAFFINITY); flush_thread(); - current->personality &= ~bprm->per_clear; + me->personality &= ~bprm->per_clear; /* * We have to apply CLOEXEC before we change whether the process is @@ -1305,7 +1332,7 @@ int flush_old_exec(struct linux_binprm * bprm) * trying to access the should-be-closed file descriptors of a process * undergoing exec(2). */ - do_close_on_exec(current->files); + do_close_on_exec(me->files); return 0; out: @@ -1424,6 +1451,8 @@ static void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); if (bprm->cred) { + if (bprm->called_exec_mmap) + mutex_unlock(¤t->signal->exec_update_mutex); mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } @@ -1473,6 +1502,7 @@ void install_exec_creds(struct linux_binprm *bprm) * credentials; any time after this it may be unlocked. */ security_bprm_committed_creds(bprm); + mutex_unlock(¤t->signal->exec_update_mutex); mutex_unlock(¤t->signal->cred_guard_mutex); } EXPORT_SYMBOL(install_exec_creds); @@ -1664,7 +1694,7 @@ int search_binary_handler(struct linux_binprm *bprm) read_lock(&binfmt_lock); put_binfmt(fmt); - if (retval < 0 && !bprm->mm) { + if (retval < 0 && bprm->called_exec_mmap) { /* we got to flush_old_exec() and failed after it */ read_unlock(&binfmt_lock); force_sigsegv(SIGSEGV); |