diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 19:42:29 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 19:42:29 +0300 |
commit | 7fc035058eab3a485060374d78012708524ca133 (patch) | |
tree | aaa4331d386601977618d2b413dd05097319aff7 | |
parent | 667161ba0a1c5badc5c40fc45cf62a6d62883710 (diff) | |
parent | 6a46bf558803dd2b959ca7435a5c143efe837217 (diff) | |
download | linux-7fc035058eab3a485060374d78012708524ca133.tar.xz |
Merge tag 'execve-v6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull execve updates from Kees Cook:
"Most are small refactorings and bug fixes, but three things stand out:
switching timens (which got reverted before) looks solid now,
FOLL_FORCE has been removed (no failures seen yet across several weeks
in -next), and some whitespace cleanups (which are long overdue).
- Add timens support (when switching mm). This version has survived
in -next for the entire cycle (Andrei Vagin)
- Various small bug fixes, refactoring, and readability improvements
(Bernd Edlinger, Rolf Eike Beer, Bo Liu, Li Zetao Liu Shixin)
- Remove FOLL_FORCE for stack setup (Kees Cook)
- Whitespace cleanups (Rolf Eike Beer, Kees Cook)"
* tag 'execve-v6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
binfmt_misc: fix shift-out-of-bounds in check_special_flags
binfmt: Fix error return code in load_elf_fdpic_binary()
exec: Remove FOLL_FORCE for stack setup
binfmt_elf: replace IS_ERR() with IS_ERR_VALUE()
binfmt_elf: simplify error handling in load_elf_phdrs()
binfmt_elf: fix documented return value for load_elf_phdrs()
exec: simplify initial stack size expansion
binfmt: Fix whitespace issues
exec: Add comments on check_unsafe_exec() fs counting
ELF uapi: add spaces before '{'
selftests/timens: add a test for vfork+exit
fs/exec: switch timens when a task gets a new mm
-rw-r--r-- | fs/binfmt_elf.c | 32 | ||||
-rw-r--r-- | fs/binfmt_elf_fdpic.c | 7 | ||||
-rw-r--r-- | fs/binfmt_misc.c | 8 | ||||
-rw-r--r-- | fs/exec.c | 34 | ||||
-rw-r--r-- | include/linux/nsproxy.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/elf.h | 14 | ||||
-rw-r--r-- | kernel/fork.c | 9 | ||||
-rw-r--r-- | kernel/nsproxy.c | 23 | ||||
-rw-r--r-- | tools/testing/selftests/timens/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/timens/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/timens/vfork_exec.c | 139 |
11 files changed, 215 insertions, 55 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6a11025e5850..d9af34f816a9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -248,7 +248,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, } while (0) #ifdef ARCH_DLINFO - /* + /* * ARCH_DLINFO must come first so PPC can do its special alignment of * AUXV. * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in @@ -456,13 +456,13 @@ static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr) * * Loads ELF program headers from the binary file elf_file, which has the ELF * header pointed to by elf_ex, into a newly allocated array. The caller is - * responsible for freeing the allocated data. Returns an ERR_PTR upon failure. + * responsible for freeing the allocated data. Returns NULL upon failure. */ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, struct file *elf_file) { struct elf_phdr *elf_phdata = NULL; - int retval, err = -1; + int retval = -1; unsigned int size; /* @@ -484,15 +484,9 @@ static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex, /* Read in the program headers */ retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff); - if (retval < 0) { - err = retval; - goto out; - } - /* Success! */ - err = 0; out: - if (err) { + if (retval) { kfree(elf_phdata); elf_phdata = NULL; } @@ -1020,7 +1014,7 @@ out_free_interp: executable_stack); if (retval < 0) goto out_free_dentry; - + elf_bss = 0; elf_brk = 0; @@ -1043,7 +1037,7 @@ out_free_interp: if (unlikely (elf_brk > elf_bss)) { unsigned long nbyte; - + /* There was a PT_LOAD segment with p_memsz > p_filesz before this one. Map anonymous pages, if needed, and clear the area. */ @@ -1166,7 +1160,7 @@ out_free_interp: error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { - retval = IS_ERR((void *)error) ? + retval = IS_ERR_VALUE(error) ? PTR_ERR((void*)error) : -EINVAL; goto out_free_dentry; } @@ -1251,7 +1245,7 @@ out_free_interp: interpreter, load_bias, interp_elf_phdata, &arch_state); - if (!IS_ERR((void *)elf_entry)) { + if (!IS_ERR_VALUE(elf_entry)) { /* * load_elf_interp() returns relocation * adjustment @@ -1260,7 +1254,7 @@ out_free_interp: elf_entry += interp_elf_ex->e_entry; } if (BAD_ADDR(elf_entry)) { - retval = IS_ERR((void *)elf_entry) ? + retval = IS_ERR_VALUE(elf_entry) ? (int)elf_entry : -EINVAL; goto out_free_dentry; } @@ -1521,7 +1515,7 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) phdr->p_align = 0; } -static void fill_note(struct memelfnote *note, const char *name, int type, +static void fill_note(struct memelfnote *note, const char *name, int type, unsigned int sz, void *data) { note->name = name; @@ -2004,8 +1998,8 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) t->num_notes = 0; fill_prstatus(&t->prstatus.common, p, signr); - elf_core_copy_task_regs(p, &t->prstatus.pr_reg); - + elf_core_copy_task_regs(p, &t->prstatus.pr_reg); + fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), &(t->prstatus)); t->num_notes++; @@ -2295,7 +2289,7 @@ static int elf_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; - /* write out the notes section */ + /* write out the notes section */ if (!write_note_info(&info, cprm)) goto end_coredump; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 08d0c8797828..096e3520a0b1 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -434,8 +434,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) current->mm->start_stack = current->mm->start_brk + stack_size; #endif - if (create_elf_fdpic_tables(bprm, current->mm, - &exec_params, &interp_params) < 0) + retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params, + &interp_params); + if (retval < 0) goto error; kdebug("- start_code %lx", current->mm->start_code); @@ -1603,7 +1604,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) if (!elf_core_write_extra_phdrs(cprm, offset)) goto end_coredump; - /* write out the notes section */ + /* write out the notes section */ if (!writenote(thread_list->notes, cprm)) goto end_coredump; if (!writenote(&psinfo_note, cprm)) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index e1eae7ea823a..bb202ad369d5 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -44,10 +44,10 @@ static LIST_HEAD(entries); static int enabled = 1; enum {Enabled, Magic}; -#define MISC_FMT_PRESERVE_ARGV0 (1 << 31) -#define MISC_FMT_OPEN_BINARY (1 << 30) -#define MISC_FMT_CREDENTIALS (1 << 29) -#define MISC_FMT_OPEN_FILE (1 << 28) +#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31) +#define MISC_FMT_OPEN_BINARY (1UL << 30) +#define MISC_FMT_CREDENTIALS (1UL << 29) +#define MISC_FMT_OPEN_FILE (1UL << 28) typedef struct { struct list_head list; diff --git a/fs/exec.c b/fs/exec.c index a0b1f0337a62..089a743f636b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -64,6 +64,7 @@ #include <linux/io_uring.h> #include <linux/syscall_user_dispatch.h> #include <linux/coredump.h> +#include <linux/time_namespace.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -171,7 +172,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) exit: fput(file); out: - return error; + return error; } #endif /* #ifdef CONFIG_USELIB */ @@ -199,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, { struct page *page; int ret; - unsigned int gup_flags = FOLL_FORCE; + unsigned int gup_flags = 0; #ifdef CONFIG_STACK_GROWSUP if (write) { @@ -842,16 +843,13 @@ int setup_arg_pages(struct linux_binprm *bprm, * will align it up. */ rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK; + + stack_expand = min(rlim_stack, stack_size + stack_expand); + #ifdef CONFIG_STACK_GROWSUP - if (stack_size + stack_expand > rlim_stack) - stack_base = vma->vm_start + rlim_stack; - else - stack_base = vma->vm_end + stack_expand; + stack_base = vma->vm_start + stack_expand; #else - if (stack_size + stack_expand > rlim_stack) - stack_base = vma->vm_end - rlim_stack; - else - stack_base = vma->vm_start - stack_expand; + stack_base = vma->vm_end - stack_expand; #endif current->mm->start_stack = bprm->p; ret = expand_stack(vma, stack_base); @@ -1297,6 +1295,10 @@ int begin_new_exec(struct linux_binprm * bprm) bprm->mm = NULL; + retval = exec_task_namespaces(); + if (retval) + goto out_unlock; + #ifdef CONFIG_POSIX_TIMERS spin_lock_irq(&me->sighand->siglock); posix_cpu_timers_exit(me); @@ -1568,6 +1570,12 @@ static void check_unsafe_exec(struct linux_binprm *bprm) if (task_no_new_privs(current)) bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; + /* + * If another task is sharing our fs, we cannot safely + * suid exec because the differently privileged task + * will be able to manipulate the current directory, etc. + * It would be nice to force an unshare instead... + */ t = p; n_fs = 1; spin_lock(&p->fs->lock); @@ -1748,6 +1756,7 @@ static int search_binary_handler(struct linux_binprm *bprm) return retval; } +/* binfmt handlers will call back into begin_new_exec() on success. */ static int exec_binprm(struct linux_binprm *bprm) { pid_t old_pid, old_vpid; @@ -1806,6 +1815,11 @@ static int bprm_execve(struct linux_binprm *bprm, if (retval) return retval; + /* + * Check for unsafe execution states before exec_binprm(), which + * will call back into begin_new_exec(), into bprm_creds_from_file(), + * where setuid-ness is evaluated. + */ check_unsafe_exec(bprm); current->in_execve = 1; diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index cdb171efc7cb..fee881cded01 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -94,6 +94,7 @@ static inline struct cred *nsset_cred(struct nsset *set) int copy_namespaces(unsigned long flags, struct task_struct *tsk); void exit_task_namespaces(struct task_struct *tsk); void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new); +int exec_task_namespaces(void); void free_nsproxy(struct nsproxy *ns); int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, struct cred *, struct fs_struct *); diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index c7b056af9ef0..4c6a8fa5e7ed 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -91,7 +91,7 @@ typedef __s64 Elf64_Sxword; #define DT_INIT 12 #define DT_FINI 13 #define DT_SONAME 14 -#define DT_RPATH 15 +#define DT_RPATH 15 #define DT_SYMBOLIC 16 #define DT_REL 17 #define DT_RELSZ 18 @@ -140,9 +140,9 @@ typedef __s64 Elf64_Sxword; #define ELF64_ST_BIND(x) ELF_ST_BIND(x) #define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) -typedef struct dynamic{ +typedef struct dynamic { Elf32_Sword d_tag; - union{ + union { Elf32_Sword d_val; Elf32_Addr d_ptr; } d_un; @@ -173,7 +173,7 @@ typedef struct elf64_rel { Elf64_Xword r_info; /* index and type of relocation */ } Elf64_Rel; -typedef struct elf32_rela{ +typedef struct elf32_rela { Elf32_Addr r_offset; Elf32_Word r_info; Elf32_Sword r_addend; @@ -185,7 +185,7 @@ typedef struct elf64_rela { Elf64_Sxword r_addend; /* Constant addend used to compute value */ } Elf64_Rela; -typedef struct elf32_sym{ +typedef struct elf32_sym { Elf32_Word st_name; Elf32_Addr st_value; Elf32_Word st_size; @@ -206,7 +206,7 @@ typedef struct elf64_sym { #define EI_NIDENT 16 -typedef struct elf32_hdr{ +typedef struct elf32_hdr { unsigned char e_ident[EI_NIDENT]; Elf32_Half e_type; Elf32_Half e_machine; @@ -246,7 +246,7 @@ typedef struct elf64_hdr { #define PF_W 0x2 #define PF_X 0x1 -typedef struct elf32_phdr{ +typedef struct elf32_phdr { Elf32_Word p_type; Elf32_Off p_offset; Elf32_Addr p_vaddr; diff --git a/kernel/fork.c b/kernel/fork.c index 844dfdc8c639..cfb09ca1b1bc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2046,15 +2046,6 @@ static __latent_entropy struct task_struct *copy_process( return ERR_PTR(-EINVAL); } - /* - * If the new process will be in a different time namespace - * do not allow it to share VM or a thread group with the forking task. - */ - if (clone_flags & (CLONE_THREAD | CLONE_VM)) { - if (nsp->time_ns != nsp->time_ns_for_children) - return ERR_PTR(-EINVAL); - } - if (clone_flags & CLONE_PIDFD) { /* * - CLONE_DETACHED is blocked so that we can potentially diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index eec72ca962e2..a487ff24129b 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -157,7 +157,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)))) { - if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) { + if ((flags & CLONE_VM) || + likely(old_ns->time_ns_for_children == old_ns->time_ns)) { get_nsproxy(old_ns); return 0; } @@ -179,7 +180,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) if (IS_ERR(new_ns)) return PTR_ERR(new_ns); - timens_on_fork(new_ns, tsk); + if ((flags & CLONE_VM) == 0) + timens_on_fork(new_ns, tsk); tsk->nsproxy = new_ns; return 0; @@ -254,6 +256,23 @@ void exit_task_namespaces(struct task_struct *p) switch_task_namespaces(p, NULL); } +int exec_task_namespaces(void) +{ + struct task_struct *tsk = current; + struct nsproxy *new; + + if (tsk->nsproxy->time_ns_for_children == tsk->nsproxy->time_ns) + return 0; + + new = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); + if (IS_ERR(new)) + return PTR_ERR(new); + + timens_on_fork(new, tsk); + switch_task_namespaces(tsk, new); + return 0; +} + static int check_setns_flags(unsigned long flags) { if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore index fe1eb8271b35..cae8dca0fbff 100644 --- a/tools/testing/selftests/timens/.gitignore +++ b/tools/testing/selftests/timens/.gitignore @@ -8,3 +8,4 @@ procfs timens timer timerfd +vfork_exec diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile index 3a5936cc10ab..f0d51d4d2c87 100644 --- a/tools/testing/selftests/timens/Makefile +++ b/tools/testing/selftests/timens/Makefile @@ -1,4 +1,4 @@ -TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex +TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex vfork_exec TEST_GEN_PROGS_EXTENDED := gettime_perf CFLAGS := -Wall -Werror -pthread diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c new file mode 100644 index 000000000000..beb7614941fb --- /dev/null +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> +#include <string.h> +#include <pthread.h> + +#include "log.h" +#include "timens.h" + +#define OFFSET (36000) + +struct thread_args { + char *tst_name; + struct timespec *now; +}; + +static void *tcheck(void *_args) +{ + struct thread_args *args = _args; + struct timespec *now = args->now, tst; + int i; + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now->tv_sec) > 5) { + pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n", + args->tst_name, tst.tv_sec, now->tv_sec); + return (void *)1UL; + } + } + return NULL; +} + +static int check_in_thread(char *tst_name, struct timespec *now) +{ + struct thread_args args = { + .tst_name = tst_name, + .now = now, + }; + pthread_t th; + void *retval; + + if (pthread_create(&th, NULL, tcheck, &args)) + return pr_perror("thread"); + if (pthread_join(th, &retval)) + return pr_perror("pthread_join"); + return !(retval == NULL); +} + +static int check(char *tst_name, struct timespec *now) +{ + struct timespec tst; + int i; + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now->tv_sec) > 5) + return pr_fail("%s: unexpected value: %ld (%ld)\n", + tst_name, tst.tv_sec, now->tv_sec); + } + if (check_in_thread(tst_name, now)) + return 1; + ksft_test_result_pass("%s\n", tst_name); + return 0; +} + +int main(int argc, char *argv[]) +{ + struct timespec now; + int status; + pid_t pid; + + if (argc > 1) { + char *endptr; + + ksft_cnt.ksft_pass = 1; + now.tv_sec = strtoul(argv[1], &endptr, 0); + if (*endptr != 0) + return pr_perror("strtoul"); + + return check("child after exec", &now); + } + + nscheck(); + + ksft_set_plan(4); + + clock_gettime(CLOCK_MONOTONIC, &now); + + if (unshare_timens()) + return 1; + + if (_settime(CLOCK_MONOTONIC, OFFSET)) + return 1; + + if (check("parent before vfork", &now)) + return 1; + + pid = vfork(); + if (pid < 0) + return pr_perror("fork"); + + if (pid == 0) { + char now_str[64]; + char *cargv[] = {"exec", now_str, NULL}; + char *cenv[] = {NULL}; + + /* Check for proper vvar offsets after execve. */ + snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET); + execve("/proc/self/exe", cargv, cenv); + pr_perror("execve"); + _exit(1); + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("waitpid"); + + if (status) + ksft_exit_fail(); + ksft_inc_pass_cnt(); + ksft_test_result_pass("wait for child\n"); + + /* Check that we are still in the source timens. */ + if (check("parent after vfork", &now)) + return 1; + + ksft_exit_pass(); + return 0; +} |