diff options
Diffstat (limited to 'fs/binfmt_elf.c')
-rw-r--r-- | fs/binfmt_elf.c | 391 |
1 files changed, 197 insertions, 194 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index fb2c8d14327a..47335a0f4a61 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -110,38 +110,19 @@ static struct linux_binfmt elf_format = { #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE)) -static int set_brk(unsigned long start, unsigned long end, int prot) -{ - start = ELF_PAGEALIGN(start); - end = ELF_PAGEALIGN(end); - if (end > start) { - /* - * Map the last of the bss segment. - * If the header is requesting these pages to be - * executable, honour that (ppc32 needs this). - */ - int error = vm_brk_flags(start, end - start, - prot & PROT_EXEC ? VM_EXEC : 0); - if (error) - return error; - } - current->mm->start_brk = current->mm->brk = end; - return 0; -} - -/* We need to explicitly zero any fractional pages - after the data section (i.e. bss). This would - contain the junk from the file that should not - be in memory +/* + * We need to explicitly zero any trailing portion of the page that follows + * p_filesz when it ends before the page ends (e.g. bss), otherwise this + * memory will contain the junk from the file that should not be present. */ -static int padzero(unsigned long elf_bss) +static int padzero(unsigned long address) { unsigned long nbyte; - nbyte = ELF_PAGEOFFSET(elf_bss); + nbyte = ELF_PAGEOFFSET(address); if (nbyte) { nbyte = ELF_MIN_ALIGN - nbyte; - if (clear_user((void __user *) elf_bss, nbyte)) + if (clear_user((void __user *)address, nbyte)) return -EFAULT; } return 0; @@ -367,6 +348,11 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, return 0; } +/* + * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset" + * into memory at "addr". (Note that p_filesz is rounded up to the + * next page, so any extra bytes from the file must be wiped.) + */ static unsigned long elf_map(struct file *filep, unsigned long addr, const struct elf_phdr *eppnt, int prot, int type, unsigned long total_size) @@ -406,6 +392,60 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, return(map_addr); } +/* + * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset" + * into memory at "addr". Memory from "p_filesz" through "p_memsz" + * rounded up to the next page is zeroed. + */ +static unsigned long elf_load(struct file *filep, unsigned long addr, + const struct elf_phdr *eppnt, int prot, int type, + unsigned long total_size) +{ + unsigned long zero_start, zero_end; + unsigned long map_addr; + + if (eppnt->p_filesz) { + map_addr = elf_map(filep, addr, eppnt, prot, type, total_size); + if (BAD_ADDR(map_addr)) + return map_addr; + if (eppnt->p_memsz > eppnt->p_filesz) { + zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) + + eppnt->p_filesz; + zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) + + eppnt->p_memsz; + + /* + * Zero the end of the last mapped page but ignore + * any errors if the segment isn't writable. + */ + if (padzero(zero_start) && (prot & PROT_WRITE)) + return -EFAULT; + } + } else { + map_addr = zero_start = ELF_PAGESTART(addr); + zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) + + eppnt->p_memsz; + } + if (eppnt->p_memsz > eppnt->p_filesz) { + /* + * Map the last of the segment. + * If the header is requesting these pages to be + * executable, honour that (ppc32 needs this). + */ + int error; + + zero_start = ELF_PAGEALIGN(zero_start); + zero_end = ELF_PAGEALIGN(zero_end); + + error = vm_brk_flags(zero_start, zero_end - zero_start, + prot & PROT_EXEC ? VM_EXEC : 0); + if (error) + map_addr = error; + } + return map_addr; +} + + static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr) { elf_addr_t min_addr = -1; @@ -596,8 +636,6 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, struct elf_phdr *eppnt; unsigned long load_addr = 0; int load_addr_set = 0; - unsigned long last_bss = 0, elf_bss = 0; - int bss_prot = 0; unsigned long error = ~0UL; unsigned long total_size; int i; @@ -634,7 +672,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, else if (no_base && interp_elf_ex->e_type == ET_DYN) load_addr = -vaddr; - map_addr = elf_map(interpreter, load_addr + vaddr, + map_addr = elf_load(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type, total_size); total_size = 0; error = map_addr; @@ -660,51 +698,9 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, error = -ENOMEM; goto out; } - - /* - * Find the end of the file mapping for this phdr, and - * keep track of the largest address we see for this. - */ - k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; - if (k > elf_bss) - elf_bss = k; - - /* - * Do the same thing for the memory mapping - between - * elf_bss and last_bss is the bss section. - */ - k = load_addr + eppnt->p_vaddr + eppnt->p_memsz; - if (k > last_bss) { - last_bss = k; - bss_prot = elf_prot; - } } } - /* - * Now fill out the bss section: first pad the last page from - * the file up to the page boundary, and zero it from elf_bss - * up to the end of the page. - */ - if (padzero(elf_bss)) { - error = -EFAULT; - goto out; - } - /* - * Next, align both the file and mem bss up to the page size, - * since this is where elf_bss was just zeroed up to, and where - * last_bss will end after the vm_brk_flags() below. - */ - elf_bss = ELF_PAGEALIGN(elf_bss); - last_bss = ELF_PAGEALIGN(last_bss); - /* Finally, if there is still more bss to allocate, do it. */ - if (last_bss > elf_bss) { - error = vm_brk_flags(elf_bss, last_bss - elf_bss, - bss_prot & PROT_EXEC ? VM_EXEC : 0); - if (error) - goto out; - } - error = load_addr; out: return error; @@ -828,8 +824,8 @@ static int load_elf_binary(struct linux_binprm *bprm) unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; struct elf_phdr *elf_property_phdata = NULL; - unsigned long elf_bss, elf_brk; - int bss_prot = 0; + unsigned long elf_brk; + bool brk_moved = false; int retval, i; unsigned long elf_entry; unsigned long e_entry; @@ -1021,7 +1017,6 @@ out_free_interp: if (retval < 0) goto out_free_dentry; - elf_bss = 0; elf_brk = 0; start_code = ~0UL; @@ -1041,33 +1036,6 @@ out_free_interp: if (elf_ppnt->p_type != PT_LOAD) continue; - if (unlikely (elf_brk > elf_bss)) { - unsigned long nbyte; - - /* There was a PT_LOAD segment with p_memsz > p_filesz - before this one. Map anonymous pages, if needed, - and clear the area. */ - retval = set_brk(elf_bss + load_bias, - elf_brk + load_bias, - bss_prot); - if (retval) - goto out_free_dentry; - nbyte = ELF_PAGEOFFSET(elf_bss); - if (nbyte) { - nbyte = ELF_MIN_ALIGN - nbyte; - if (nbyte > elf_brk - elf_bss) - nbyte = elf_brk - elf_bss; - if (clear_user((void __user *)elf_bss + - load_bias, nbyte)) { - /* - * This bss-zeroing can fail if the ELF - * file specifies odd protections. So - * we don't check the return value - */ - } - } - } - elf_prot = make_prot(elf_ppnt->p_flags, &arch_state, !!interpreter, false); @@ -1095,15 +1063,49 @@ out_free_interp: * Header for ET_DYN binaries to calculate the * randomization (load_bias) for all the LOAD * Program Headers. + */ + + /* + * Calculate the entire size of the ELF mapping + * (total_size), used for the initial mapping, + * due to load_addr_set which is set to true later + * once the initial mapping is performed. + * + * Note that this is only sensible when the LOAD + * segments are contiguous (or overlapping). If + * used for LOADs that are far apart, this would + * cause the holes between LOADs to be mapped, + * running the risk of having the mapping fail, + * as it would be larger than the ELF file itself. + * + * As a result, only ET_DYN does this, since + * some ET_EXEC (e.g. ia64) may have large virtual + * memory holes between LOADs. + * + */ + total_size = total_mapping_size(elf_phdata, + elf_ex->e_phnum); + if (!total_size) { + retval = -EINVAL; + goto out_free_dentry; + } + + /* Calculate any requested alignment. */ + alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); + + /** + * DOC: PIE handling * - * There are effectively two types of ET_DYN - * binaries: programs (i.e. PIE: ET_DYN with INTERP) - * and loaders (ET_DYN without INTERP, since they - * _are_ the ELF interpreter). The loaders must - * be loaded away from programs since the program - * may otherwise collide with the loader (especially - * for ET_EXEC which does not have a randomized - * position). For example to handle invocations of + * There are effectively two types of ET_DYN ELF + * binaries: programs (i.e. PIE: ET_DYN with + * PT_INTERP) and loaders (i.e. static PIE: ET_DYN + * without PT_INTERP, usually the ELF interpreter + * itself). Loaders must be loaded away from programs + * since the program may otherwise collide with the + * loader (especially for ET_EXEC which does not have + * a randomized position). + * + * For example, to handle invocations of * "./ld.so someprog" to test out a new version of * the loader, the subsequent program that the * loader loads must avoid the loader itself, so @@ -1116,17 +1118,49 @@ out_free_interp: * ELF_ET_DYN_BASE and loaders are loaded into the * independently randomized mmap region (0 load_bias * without MAP_FIXED nor MAP_FIXED_NOREPLACE). + * + * See below for "brk" handling details, which is + * also affected by program vs loader and ASLR. */ if (interpreter) { + /* On ET_DYN with PT_INTERP, we do the ASLR. */ load_bias = ELF_ET_DYN_BASE; if (current->flags & PF_RANDOMIZE) load_bias += arch_mmap_rnd(); - alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); + /* Adjust alignment as requested. */ if (alignment) load_bias &= ~(alignment - 1); elf_flags |= MAP_FIXED_NOREPLACE; - } else - load_bias = 0; + } else { + /* + * For ET_DYN without PT_INTERP, we rely on + * the architectures's (potentially ASLR) mmap + * base address (via a load_bias of 0). + * + * When a large alignment is requested, we + * must do the allocation at address "0" right + * now to discover where things will load so + * that we can adjust the resulting alignment. + * In this case (load_bias != 0), we can use + * MAP_FIXED_NOREPLACE to make sure the mapping + * doesn't collide with anything. + */ + if (alignment > ELF_MIN_ALIGN) { + load_bias = elf_load(bprm->file, 0, elf_ppnt, + elf_prot, elf_flags, total_size); + if (BAD_ADDR(load_bias)) { + retval = IS_ERR_VALUE(load_bias) ? + PTR_ERR((void*)load_bias) : -EINVAL; + goto out_free_dentry; + } + vm_munmap(load_bias, total_size); + /* Adjust alignment as requested. */ + if (alignment) + load_bias &= ~(alignment - 1); + elf_flags |= MAP_FIXED_NOREPLACE; + } else + load_bias = 0; + } /* * Since load_bias is used for all subsequent loading @@ -1136,34 +1170,9 @@ out_free_interp: * is then page aligned. */ load_bias = ELF_PAGESTART(load_bias - vaddr); - - /* - * Calculate the entire size of the ELF mapping - * (total_size), used for the initial mapping, - * due to load_addr_set which is set to true later - * once the initial mapping is performed. - * - * Note that this is only sensible when the LOAD - * segments are contiguous (or overlapping). If - * used for LOADs that are far apart, this would - * cause the holes between LOADs to be mapped, - * running the risk of having the mapping fail, - * as it would be larger than the ELF file itself. - * - * As a result, only ET_DYN does this, since - * some ET_EXEC (e.g. ia64) may have large virtual - * memory holes between LOADs. - * - */ - total_size = total_mapping_size(elf_phdata, - elf_ex->e_phnum); - if (!total_size) { - retval = -EINVAL; - goto out_free_dentry; - } } - error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, + error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags, total_size); if (BAD_ADDR(error)) { retval = IS_ERR_VALUE(error) ? @@ -1211,41 +1220,23 @@ out_free_interp: k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; - if (k > elf_bss) - elf_bss = k; if ((elf_ppnt->p_flags & PF_X) && end_code < k) end_code = k; if (end_data < k) end_data = k; k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz; - if (k > elf_brk) { - bss_prot = elf_prot; + if (k > elf_brk) elf_brk = k; - } } e_entry = elf_ex->e_entry + load_bias; phdr_addr += load_bias; - elf_bss += load_bias; elf_brk += load_bias; start_code += load_bias; end_code += load_bias; start_data += load_bias; end_data += load_bias; - /* Calling set_brk effectively mmaps the pages that we need - * for the bss and break sections. We must do this before - * mapping in the interpreter, to make sure it doesn't wind - * up getting placed where the bss needs to go. - */ - retval = set_brk(elf_bss, elf_brk, bss_prot); - if (retval) - goto out_free_dentry; - if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { - retval = -EFAULT; /* Nobody gets to see this, but.. */ - goto out_free_dentry; - } - if (interpreter) { elf_entry = load_elf_interp(interp_elf_ex, interpreter, @@ -1301,24 +1292,44 @@ out_free_interp: mm->end_data = end_data; mm->start_stack = bprm->p; - if ((current->flags & PF_RANDOMIZE) && (snapshot_randomize_va_space > 1)) { + /** + * DOC: "brk" handling + * + * For architectures with ELF randomization, when executing a + * loader directly (i.e. static PIE: ET_DYN without PT_INTERP), + * move the brk area out of the mmap region and into the unused + * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide + * early with the stack growing down or other regions being put + * into the mmap region by the kernel (e.g. vdso). + * + * In the CONFIG_COMPAT_BRK case, though, everything is turned + * off because we're not allowed to move the brk at all. + */ + if (!IS_ENABLED(CONFIG_COMPAT_BRK) && + IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && + elf_ex->e_type == ET_DYN && !interpreter) { + elf_brk = ELF_ET_DYN_BASE; + /* This counts as moving the brk, so let brk(2) know. */ + brk_moved = true; + } + mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk); + + if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) { /* - * For architectures with ELF randomization, when executing - * a loader directly (i.e. no interpreter listed in ELF - * headers), move the brk area out of the mmap region - * (since it grows up, and may collide early with the stack - * growing down), and into the unused ELF_ET_DYN_BASE region. + * If we didn't move the brk to ELF_ET_DYN_BASE (above), + * leave a gap between .bss and brk. */ - if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && - elf_ex->e_type == ET_DYN && !interpreter) { - mm->brk = mm->start_brk = ELF_ET_DYN_BASE; - } + if (!brk_moved) + mm->brk = mm->start_brk = mm->brk + PAGE_SIZE; mm->brk = mm->start_brk = arch_randomize_brk(mm); + brk_moved = true; + } + #ifdef compat_brk_randomized + if (brk_moved) current->brk_randomized = 1; #endif - } if (current->personality & MMAP_PAGE_ZERO) { /* Why this, you ask??? Well SVr4 maps page 0 as read-only, @@ -1327,6 +1338,11 @@ out_free_interp: emulate the SVr4 behavior. Sigh. */ error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); + + retval = do_mseal(0, PAGE_SIZE, 0); + if (retval) + pr_warn_ratelimited("pid=%d, couldn't seal address 0, ret=%d.\n", + task_pid_nr(current), retval); } regs = current_pt_regs(); @@ -1370,7 +1386,6 @@ static int load_elf_library(struct file *file) { struct elf_phdr *elf_phdata; struct elf_phdr *eppnt; - unsigned long elf_bss, bss, len; int retval, error, i, j; struct elfhdr elf_ex; @@ -1415,30 +1430,15 @@ static int load_elf_library(struct file *file) eppnt++; /* Now use mmap to map the library into memory. */ - error = vm_mmap(file, - ELF_PAGESTART(eppnt->p_vaddr), - (eppnt->p_filesz + - ELF_PAGEOFFSET(eppnt->p_vaddr)), + error = elf_load(file, ELF_PAGESTART(eppnt->p_vaddr), + eppnt, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED_NOREPLACE | MAP_PRIVATE, - (eppnt->p_offset - - ELF_PAGEOFFSET(eppnt->p_vaddr))); - if (error != ELF_PAGESTART(eppnt->p_vaddr)) - goto out_free_ph; + 0); - elf_bss = eppnt->p_vaddr + eppnt->p_filesz; - if (padzero(elf_bss)) { - error = -EFAULT; + if (error != ELF_PAGESTART(eppnt->p_vaddr)) goto out_free_ph; - } - len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr); - bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr); - if (bss > len) { - error = vm_brk(len, bss - len); - if (error) - goto out_free_ph; - } error = 0; out_free_ph: @@ -1630,7 +1630,6 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); } -#define MAX_FILE_NOTE_SIZE (4*1024*1024) /* * Format of NT_FILE note: * @@ -1658,8 +1657,12 @@ static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm names_ofs = (2 + 3 * count) * sizeof(data[0]); alloc: - if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ + /* paranoia check */ + if (size >= core_file_note_size_limit) { + pr_warn_once("coredump Note size too large: %u (does kernel.core_file_note_size_limit sysctl need adjustment?\n", + size); return -EINVAL; + } size = round_up(size, PAGE_SIZE); /* * "size" can be 0 here legitimately. @@ -1994,7 +1997,7 @@ static void free_note_info(struct elf_note_info *info) threads = t->next; WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus); for (i = 1; i < info->thread_notes; ++i) - kfree(t->notes[i].data); + kvfree(t->notes[i].data); kfree(t); } kfree(info->psinfo.data); @@ -2066,7 +2069,7 @@ static int elf_core_dump(struct coredump_params *cprm) { size_t sz = info.size; - /* For cell spufs */ + /* For cell spufs and x86 xstate */ sz += elf_coredump_extra_notes_size(); phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL); @@ -2130,7 +2133,7 @@ static int elf_core_dump(struct coredump_params *cprm) if (!write_note_info(&info, cprm)) goto end_coredump; - /* For cell spufs */ + /* For cell spufs and x86 xstate */ if (elf_coredump_extra_notes_write(cprm)) goto end_coredump; @@ -2177,5 +2180,5 @@ core_initcall(init_elf_binfmt); module_exit(exit_elf_binfmt); #ifdef CONFIG_BINFMT_ELF_KUNIT_TEST -#include "binfmt_elf_test.c" +#include "tests/binfmt_elf_kunit.c" #endif |