From 60592fb6b67c653beaa2e7acad9a9d7aa0b71dff Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 12 May 2023 02:25:28 +0000 Subject: coredump, vmcore: Set p_align to 4 for PT_NOTE Tools like readelf/llvm-readelf use p_align to parse a PT_NOTE program header as an array of 4-byte entries or 8-byte entries. Currently, there are workarounds[1] in place for Linux to treat p_align==0 as 4. However, it would be more appropriate to set the correct alignment so that tools do not have to rely on guesswork. FreeBSD coredumps set p_align to 4 as well. [1]: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=82ed9683ec099d8205dc499ac84febc975235af6 [2]: https://reviews.llvm.org/D150022 Signed-off-by: Fangrui Song Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20230512022528.3430327-1-maskray@google.com --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/binfmt_elf.c') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 1033fbdfdbec..44b4c42ab8e8 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1517,7 +1517,7 @@ static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) phdr->p_filesz = sz; phdr->p_memsz = 0; phdr->p_flags = 0; - phdr->p_align = 0; + phdr->p_align = 4; } static void fill_note(struct memelfnote *note, const char *name, int type, -- cgit v1.2.3 From aa88054b70905069d1cf706aa5e9a3418d1d341d Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Fri, 23 Jun 2023 08:56:44 +0300 Subject: binfmt_elf: fix comment typo s/reset/regset/ Signed-off-by: Baruch Siach Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/0b2967c4a4141875c493e835d5a6f8f2d19ae2d6.1687499804.git.baruch@tkos.co.il --- fs/binfmt_elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/binfmt_elf.c') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 44b4c42ab8e8..983ce34115d5 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1773,7 +1773,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t, /* * NT_PRSTATUS is the one special case, because the regset data * goes into the pr_reg field inside the note contents, rather - * than being the whole note contents. We fill the reset in here. + * than being the whole note contents. We fill the regset in here. * We assume that regset 0 is NT_PRSTATUS. */ fill_prstatus(&t->prstatus.common, t->task, signr); -- cgit v1.2.3 From f440fa1ac955e2898893f9301568435eb5cdfc4b Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 16 Jun 2023 15:58:54 -0700 Subject: mm: make find_extend_vma() fail if write lock not held Make calls to extend_vma() and find_extend_vma() fail if the write lock is required. To avoid making this a flag-day event, this still allows the old read-locking case for the trivial situations, and passes in a flag to say "is it write-locked". That way write-lockers can say "yes, I'm being careful", and legacy users will continue to work in all the common cases until they have been fully converted to the new world order. Co-Developed-by: Matthew Wilcox (Oracle) Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Liam R. Howlett Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 6 +++--- fs/exec.c | 5 +++-- include/linux/mm.h | 10 +++++++--- mm/memory.c | 2 +- mm/mmap.c | 50 +++++++++++++++++++++++++++++++++----------------- mm/nommu.c | 3 ++- 6 files changed, 49 insertions(+), 27 deletions(-) (limited to 'fs/binfmt_elf.c') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 1033fbdfdbec..869c3aa0e455 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -320,10 +320,10 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, * Grow the stack manually; some architectures have a limit on how * far ahead a user-space access may be in order to grow the stack. */ - if (mmap_read_lock_killable(mm)) + if (mmap_write_lock_killable(mm)) return -EINTR; - vma = find_extend_vma(mm, bprm->p); - mmap_read_unlock(mm); + vma = find_extend_vma_locked(mm, bprm->p, true); + mmap_write_unlock(mm); if (!vma) return -EFAULT; diff --git a/fs/exec.c b/fs/exec.c index a466e797c8e2..a61eb256e5e4 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -205,7 +205,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, #ifdef CONFIG_STACK_GROWSUP if (write) { - ret = expand_downwards(bprm->vma, pos); + /* We claim to hold the lock - nobody to race with */ + ret = expand_downwards(bprm->vma, pos, true); if (ret < 0) return NULL; } @@ -853,7 +854,7 @@ int setup_arg_pages(struct linux_binprm *bprm, stack_base = vma->vm_end - stack_expand; #endif current->mm->start_stack = bprm->p; - ret = expand_stack(vma, stack_base); + ret = expand_stack_locked(vma, stack_base, true); if (ret) ret = -EFAULT; diff --git a/include/linux/mm.h b/include/linux/mm.h index 570cf906fbcc..01a016521b60 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3192,11 +3192,13 @@ extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ -extern int expand_stack(struct vm_area_struct *vma, unsigned long address); +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address, + bool write_locked); +#define expand_stack(vma,addr) expand_stack_locked(vma,addr,false) /* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */ -extern int expand_downwards(struct vm_area_struct *vma, - unsigned long address); +int expand_downwards(struct vm_area_struct *vma, unsigned long address, + bool write_locked); #if VM_GROWSUP extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); #else @@ -3297,6 +3299,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, #endif struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); +struct vm_area_struct *find_extend_vma_locked(struct mm_struct *, + unsigned long addr, bool write_locked); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, diff --git a/mm/memory.c b/mm/memory.c index 1dff248805bf..a81f5d0997ad 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5368,7 +5368,7 @@ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, goto fail; } - if (expand_stack(vma, addr)) + if (expand_stack_locked(vma, addr, true)) goto fail; success: diff --git a/mm/mmap.c b/mm/mmap.c index 6d120bf1d0bc..2c44ac108a3c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1935,7 +1935,8 @@ static int acct_stack_growth(struct vm_area_struct *vma, * PA-RISC uses this for its stack; IA64 for its Register Backing Store. * vma is the last one with address > vma->vm_end. Have to extend vma. */ -int expand_upwards(struct vm_area_struct *vma, unsigned long address) +int expand_upwards(struct vm_area_struct *vma, unsigned long address, + bool write_locked) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *next; @@ -1959,6 +1960,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) if (gap_addr < address || gap_addr > TASK_SIZE) gap_addr = TASK_SIZE; + if (!write_locked) + return -EAGAIN; next = find_vma_intersection(mm, vma->vm_end, gap_addr); if (next && vma_is_accessible(next)) { if (!(next->vm_flags & VM_GROWSUP)) @@ -2028,7 +2031,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) /* * vma is the first one with address < vma->vm_start. Have to extend vma. */ -int expand_downwards(struct vm_area_struct *vma, unsigned long address) +int expand_downwards(struct vm_area_struct *vma, unsigned long address, + bool write_locked) { struct mm_struct *mm = vma->vm_mm; MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start); @@ -2042,10 +2046,13 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) /* Enforce stack_guard_gap */ prev = mas_prev(&mas, 0); /* Check that both stack segments have the same anon_vma? */ - if (prev && !(prev->vm_flags & VM_GROWSDOWN) && - vma_is_accessible(prev)) { - if (address - prev->vm_end < stack_guard_gap) + if (prev) { + if (!(prev->vm_flags & VM_GROWSDOWN) && + vma_is_accessible(prev) && + (address - prev->vm_end < stack_guard_gap)) return -ENOMEM; + if (!write_locked && (prev->vm_end == address)) + return -EAGAIN; } if (mas_preallocate(&mas, GFP_KERNEL)) @@ -2124,13 +2131,14 @@ static int __init cmdline_parse_stack_guard_gap(char *p) __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap); #ifdef CONFIG_STACK_GROWSUP -int expand_stack(struct vm_area_struct *vma, unsigned long address) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address, + bool write_locked) { - return expand_upwards(vma, address); + return expand_upwards(vma, address, write_locked); } -struct vm_area_struct * -find_extend_vma(struct mm_struct *mm, unsigned long addr) +struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, + unsigned long addr, bool write_locked) { struct vm_area_struct *vma, *prev; @@ -2138,20 +2146,25 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; - if (!prev || expand_stack(prev, addr)) + if (!prev) + return NULL; + if (expand_stack_locked(prev, addr, write_locked)) return NULL; if (prev->vm_flags & VM_LOCKED) populate_vma_page_range(prev, addr, prev->vm_end, NULL); return prev; } #else -int expand_stack(struct vm_area_struct *vma, unsigned long address) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address, + bool write_locked) { - return expand_downwards(vma, address); + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) + return -EINVAL; + return expand_downwards(vma, address, write_locked); } -struct vm_area_struct * -find_extend_vma(struct mm_struct *mm, unsigned long addr) +struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, + unsigned long addr, bool write_locked) { struct vm_area_struct *vma; unsigned long start; @@ -2162,10 +2175,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) return NULL; if (vma->vm_start <= addr) return vma; - if (!(vma->vm_flags & VM_GROWSDOWN)) - return NULL; start = vma->vm_start; - if (expand_stack(vma, addr)) + if (expand_stack_locked(vma, addr, write_locked)) return NULL; if (vma->vm_flags & VM_LOCKED) populate_vma_page_range(vma, addr, start, NULL); @@ -2173,6 +2184,11 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) } #endif +struct vm_area_struct *find_extend_vma(struct mm_struct *mm, + unsigned long addr) +{ + return find_extend_vma_locked(mm, addr, false); +} EXPORT_SYMBOL_GPL(find_extend_vma); /* diff --git a/mm/nommu.c b/mm/nommu.c index f670d9979a26..f476c9ed36b3 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -643,7 +643,8 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) * expand a stack to a given address * - not supported under NOMMU conditions */ -int expand_stack(struct vm_area_struct *vma, unsigned long address) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address, + bool write_locked) { return -ENOMEM; } -- cgit v1.2.3 From 8d7071af890768438c14db6172cc8f9f4d04e184 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 24 Jun 2023 13:45:51 -0700 Subject: mm: always expand the stack with the mmap write lock held This finishes the job of always holding the mmap write lock when extending the user stack vma, and removes the 'write_locked' argument from the vm helper functions again. For some cases, we just avoid expanding the stack at all: drivers and page pinning really shouldn't be extending any stacks. Let's see if any strange users really wanted that. It's worth noting that architectures that weren't converted to the new lock_mm_and_find_vma() helper function are left using the legacy "expand_stack()" function, but it has been changed to drop the mmap_lock and take it for writing while expanding the vma. This makes it fairly straightforward to convert the remaining architectures. As a result of dropping and re-taking the lock, the calling conventions for this function have also changed, since the old vma may no longer be valid. So it will now return the new vma if successful, and NULL - and the lock dropped - if the area could not be extended. Tested-by: Vegard Nossum Tested-by: John Paul Adrian Glaubitz # ia64 Tested-by: Frank Scheiner # ia64 Signed-off-by: Linus Torvalds --- arch/ia64/mm/fault.c | 36 +++---------- arch/m68k/mm/fault.c | 9 ++-- arch/microblaze/mm/fault.c | 5 +- arch/openrisc/mm/fault.c | 5 +- arch/parisc/mm/fault.c | 23 ++++---- arch/s390/mm/fault.c | 5 +- arch/sparc/mm/fault_64.c | 8 +-- arch/um/kernel/trap.c | 11 ++-- drivers/iommu/amd/iommu_v2.c | 4 +- drivers/iommu/iommu-sva.c | 2 +- fs/binfmt_elf.c | 2 +- fs/exec.c | 4 +- include/linux/mm.h | 16 ++---- mm/gup.c | 6 +-- mm/memory.c | 10 +++- mm/mmap.c | 121 ++++++++++++++++++++++++++++++++++--------- mm/nommu.c | 18 +++---- 17 files changed, 169 insertions(+), 116 deletions(-) (limited to 'fs/binfmt_elf.c') diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 85c4d9ac8686..5458b52b4009 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -110,10 +110,12 @@ retry: * register backing store that needs to expand upwards, in * this case vma will be null, but prev_vma will ne non-null */ - if (( !vma && prev_vma ) || (address < vma->vm_start) ) - goto check_expansion; + if (( !vma && prev_vma ) || (address < vma->vm_start) ) { + vma = expand_stack(mm, address); + if (!vma) + goto bad_area_nosemaphore; + } - good_area: code = SEGV_ACCERR; /* OK, we've got a good vm_area for this memory area. Check the access permissions: */ @@ -177,35 +179,9 @@ retry: mmap_read_unlock(mm); return; - check_expansion: - if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) { - if (!vma) - goto bad_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start) - || REGION_OFFSET(address) >= RGN_MAP_LIMIT) - goto bad_area; - if (expand_stack(vma, address)) - goto bad_area; - } else { - vma = prev_vma; - if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start) - || REGION_OFFSET(address) >= RGN_MAP_LIMIT) - goto bad_area; - /* - * Since the register backing store is accessed sequentially, - * we disallow growing it by more than a page at a time. - */ - if (address > vma->vm_end + PAGE_SIZE - sizeof(long)) - goto bad_area; - if (expand_upwards(vma, address)) - goto bad_area; - } - goto good_area; - bad_area: mmap_read_unlock(mm); + bad_area_nosemaphore: if ((isr & IA64_ISR_SP) || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 228128e45c67..c290c5c0cfb9 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -105,8 +105,9 @@ retry: if (address + 256 < rdusp()) goto map_err; } - if (expand_stack(vma, address)) - goto map_err; + vma = expand_stack(mm, address); + if (!vma) + goto map_err_nosemaphore; /* * Ok, we have a good vm_area for this memory access, so @@ -196,10 +197,12 @@ bus_err: goto send_sig; map_err: + mmap_read_unlock(mm); +map_err_nosemaphore: current->thread.signo = SIGSEGV; current->thread.code = SEGV_MAPERR; current->thread.faddr = address; - goto send_sig; + return send_fault_sig(regs); acc_err: current->thread.signo = SIGSEGV; diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 687714db6f4d..d3c3c33b73a6 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -192,8 +192,9 @@ retry: && (kernel_mode(regs) || !store_updates_sp(regs))) goto bad_area; } - if (expand_stack(vma, address)) - goto bad_area; + vma = expand_stack(mm, address); + if (!vma) + goto bad_area_nosemaphore; good_area: code = SEGV_ACCERR; diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 6734fee3134f..a9dcd4381d1a 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -127,8 +127,9 @@ retry: if (address + PAGE_SIZE < regs->sp) goto bad_area; } - if (expand_stack(vma, address)) - goto bad_area; + vma = expand_stack(mm, address); + if (!vma) + goto bad_area_nosemaphore; /* * Ok, we have a good vm_area for this memory access, so diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 6941fdbf2517..6e894afa4249 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -288,15 +288,19 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, retry: mmap_read_lock(mm); vma = find_vma_prev(mm, address, &prev_vma); - if (!vma || address < vma->vm_start) - goto check_expansion; + if (!vma || address < vma->vm_start) { + if (!prev || !(prev->vm_flags & VM_GROWSUP)) + goto bad_area; + vma = expand_stack(mm, address); + if (!vma) + goto bad_area_nosemaphore; + } + /* * Ok, we have a good vm_area for this memory access. We still need to * check the access permissions. */ -good_area: - if ((vma->vm_flags & acc_type) != acc_type) goto bad_area; @@ -347,17 +351,13 @@ good_area: mmap_read_unlock(mm); return; -check_expansion: - vma = prev_vma; - if (vma && (expand_stack(vma, address) == 0)) - goto good_area; - /* * Something tried to access memory that isn't in our memory map.. */ bad_area: mmap_read_unlock(mm); +bad_area_nosemaphore: if (user_mode(regs)) { int signo, si_code; @@ -449,7 +449,7 @@ handle_nadtlb_fault(struct pt_regs *regs) { unsigned long insn = regs->iir; int breg, treg, xreg, val = 0; - struct vm_area_struct *vma, *prev_vma; + struct vm_area_struct *vma; struct task_struct *tsk; struct mm_struct *mm; unsigned long address; @@ -485,7 +485,7 @@ handle_nadtlb_fault(struct pt_regs *regs) /* Search for VMA */ address = regs->ior; mmap_read_lock(mm); - vma = find_vma_prev(mm, address, &prev_vma); + vma = vma_lookup(mm, address); mmap_read_unlock(mm); /* @@ -494,7 +494,6 @@ handle_nadtlb_fault(struct pt_regs *regs) */ acc_type = (insn & 0x40) ? VM_WRITE : VM_READ; if (vma - && address >= vma->vm_start && (vma->vm_flags & acc_type) == acc_type) val = 1; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index b65144c392b0..dbe8394234e2 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -457,8 +457,9 @@ retry: if (unlikely(vma->vm_start > address)) { if (!(vma->vm_flags & VM_GROWSDOWN)) goto out_up; - if (expand_stack(vma, address)) - goto out_up; + vma = expand_stack(mm, address); + if (!vma) + goto out; } /* diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index d91305de694c..69ff07bc6c07 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -383,8 +383,9 @@ continue_fault: goto bad_area; } } - if (expand_stack(vma, address)) - goto bad_area; + vma = expand_stack(mm, address); + if (!vma) + goto bad_area_nosemaphore; /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. @@ -487,8 +488,9 @@ exit_exception: * Fix it, but check if it's kernel or user first.. */ bad_area: - insn = get_fault_insn(regs, insn); mmap_read_unlock(mm); +bad_area_nosemaphore: + insn = get_fault_insn(regs, insn); handle_kernel_fault: do_kernel_fault(regs, si_code, fault_code, insn, address); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index d3ce21c4ca32..6d8ae86ae978 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -47,14 +47,15 @@ retry: vma = find_vma(mm, address); if (!vma) goto out; - else if (vma->vm_start <= address) + if (vma->vm_start <= address) goto good_area; - else if (!(vma->vm_flags & VM_GROWSDOWN)) + if (!(vma->vm_flags & VM_GROWSDOWN)) goto out; - else if (is_user && !ARCH_IS_STACKGROW(address)) - goto out; - else if (expand_stack(vma, address)) + if (is_user && !ARCH_IS_STACKGROW(address)) goto out; + vma = expand_stack(mm, address); + if (!vma) + goto out_nosemaphore; good_area: *code_out = SEGV_ACCERR; diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 864e4ffb6aa9..261352a23271 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -485,8 +485,8 @@ static void do_fault(struct work_struct *work) flags |= FAULT_FLAG_REMOTE; mmap_read_lock(mm); - vma = find_extend_vma(mm, address); - if (!vma || address < vma->vm_start) + vma = vma_lookup(mm, address); + if (!vma) /* failed to get a vma in the right range */ goto out; diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 9821bc44f5ac..3ebd4b6586b3 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -175,7 +175,7 @@ iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) mmap_read_lock(mm); - vma = find_extend_vma(mm, prm->addr); + vma = vma_lookup(mm, prm->addr); if (!vma) /* Unmapped area */ goto out_put_mm; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 869c3aa0e455..befa93582ed7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -322,7 +322,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, */ if (mmap_write_lock_killable(mm)) return -EINTR; - vma = find_extend_vma_locked(mm, bprm->p, true); + vma = find_extend_vma_locked(mm, bprm->p); mmap_write_unlock(mm); if (!vma) return -EFAULT; diff --git a/fs/exec.c b/fs/exec.c index 66e3e22ffb8a..b84b4fee0f82 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -211,7 +211,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, */ if (write && pos < vma->vm_start) { mmap_write_lock(mm); - ret = expand_downwards(vma, pos, true); + ret = expand_downwards(vma, pos); if (unlikely(ret < 0)) { mmap_write_unlock(mm); return NULL; @@ -859,7 +859,7 @@ int setup_arg_pages(struct linux_binprm *bprm, stack_base = vma->vm_end - stack_expand; #endif current->mm->start_stack = bprm->p; - ret = expand_stack_locked(vma, stack_base, true); + ret = expand_stack_locked(vma, stack_base); if (ret) ret = -EFAULT; diff --git a/include/linux/mm.h b/include/linux/mm.h index 01a016521b60..4a9533efbd5d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3192,18 +3192,11 @@ extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ -int expand_stack_locked(struct vm_area_struct *vma, unsigned long address, - bool write_locked); -#define expand_stack(vma,addr) expand_stack_locked(vma,addr,false) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address); +struct vm_area_struct *expand_stack(struct mm_struct * mm, unsigned long addr); /* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */ -int expand_downwards(struct vm_area_struct *vma, unsigned long address, - bool write_locked); -#if VM_GROWSUP -extern int expand_upwards(struct vm_area_struct *vma, unsigned long address); -#else - #define expand_upwards(vma, address) (0) -#endif +int expand_downwards(struct vm_area_struct *vma, unsigned long address); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); @@ -3298,9 +3291,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long start, unsigned long end); #endif -struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); struct vm_area_struct *find_extend_vma_locked(struct mm_struct *, - unsigned long addr, bool write_locked); + unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr, diff --git a/mm/gup.c b/mm/gup.c index bbe416236593..e6cdfee4451f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1096,7 +1096,7 @@ static long __get_user_pages(struct mm_struct *mm, /* first iteration or cross vma bound */ if (!vma || start >= vma->vm_end) { - vma = find_extend_vma(mm, start); + vma = vma_lookup(mm, start); if (!vma && in_gate_area(mm, start)) { ret = get_gate_page(mm, start & PAGE_MASK, gup_flags, &vma, @@ -1265,8 +1265,8 @@ int fixup_user_fault(struct mm_struct *mm, fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; retry: - vma = find_extend_vma(mm, address); - if (!vma || address < vma->vm_start) + vma = vma_lookup(mm, address); + if (!vma) return -EFAULT; if (!vma_permits_fault(vma, fault_flags)) diff --git a/mm/memory.c b/mm/memory.c index a81f5d0997ad..5ce82a76201d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5368,7 +5368,7 @@ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, goto fail; } - if (expand_stack_locked(vma, addr, true)) + if (expand_stack_locked(vma, addr)) goto fail; success: @@ -5713,6 +5713,14 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, if (mmap_read_lock_killable(mm)) return 0; + /* We might need to expand the stack to access it */ + vma = vma_lookup(mm, addr); + if (!vma) { + vma = expand_stack(mm, addr); + if (!vma) + return 0; + } + /* ignore errors, just check how much was successfully transferred */ while (len) { int bytes, ret, offset; diff --git a/mm/mmap.c b/mm/mmap.c index 2c44ac108a3c..bc510361acec 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1935,8 +1935,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, * PA-RISC uses this for its stack; IA64 for its Register Backing Store. * vma is the last one with address > vma->vm_end. Have to extend vma. */ -int expand_upwards(struct vm_area_struct *vma, unsigned long address, - bool write_locked) +static int expand_upwards(struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *next; @@ -1960,8 +1959,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address, if (gap_addr < address || gap_addr > TASK_SIZE) gap_addr = TASK_SIZE; - if (!write_locked) - return -EAGAIN; next = find_vma_intersection(mm, vma->vm_end, gap_addr); if (next && vma_is_accessible(next)) { if (!(next->vm_flags & VM_GROWSUP)) @@ -2030,15 +2027,18 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address, /* * vma is the first one with address < vma->vm_start. Have to extend vma. + * mmap_lock held for writing. */ -int expand_downwards(struct vm_area_struct *vma, unsigned long address, - bool write_locked) +int expand_downwards(struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start); struct vm_area_struct *prev; int error = 0; + if (!(vma->vm_flags & VM_GROWSDOWN)) + return -EFAULT; + address &= PAGE_MASK; if (address < mmap_min_addr || address < FIRST_USER_ADDRESS) return -EPERM; @@ -2051,8 +2051,6 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address, vma_is_accessible(prev) && (address - prev->vm_end < stack_guard_gap)) return -ENOMEM; - if (!write_locked && (prev->vm_end == address)) - return -EAGAIN; } if (mas_preallocate(&mas, GFP_KERNEL)) @@ -2131,14 +2129,12 @@ static int __init cmdline_parse_stack_guard_gap(char *p) __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap); #ifdef CONFIG_STACK_GROWSUP -int expand_stack_locked(struct vm_area_struct *vma, unsigned long address, - bool write_locked) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address) { - return expand_upwards(vma, address, write_locked); + return expand_upwards(vma, address); } -struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, - unsigned long addr, bool write_locked) +struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma, *prev; @@ -2148,23 +2144,21 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, return vma; if (!prev) return NULL; - if (expand_stack_locked(prev, addr, write_locked)) + if (expand_stack_locked(prev, addr)) return NULL; if (prev->vm_flags & VM_LOCKED) populate_vma_page_range(prev, addr, prev->vm_end, NULL); return prev; } #else -int expand_stack_locked(struct vm_area_struct *vma, unsigned long address, - bool write_locked) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address) { if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) return -EINVAL; - return expand_downwards(vma, address, write_locked); + return expand_downwards(vma, address); } -struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, - unsigned long addr, bool write_locked) +struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma; unsigned long start; @@ -2176,7 +2170,7 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, if (vma->vm_start <= addr) return vma; start = vma->vm_start; - if (expand_stack_locked(vma, addr, write_locked)) + if (expand_stack_locked(vma, addr)) return NULL; if (vma->vm_flags & VM_LOCKED) populate_vma_page_range(vma, addr, start, NULL); @@ -2184,12 +2178,91 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, } #endif -struct vm_area_struct *find_extend_vma(struct mm_struct *mm, - unsigned long addr) +/* + * IA64 has some horrid mapping rules: it can expand both up and down, + * but with various special rules. + * + * We'll get rid of this architecture eventually, so the ugliness is + * temporary. + */ +#ifdef CONFIG_IA64 +static inline bool vma_expand_ok(struct vm_area_struct *vma, unsigned long addr) +{ + return REGION_NUMBER(addr) == REGION_NUMBER(vma->vm_start) && + REGION_OFFSET(addr) < RGN_MAP_LIMIT; +} + +/* + * IA64 stacks grow down, but there's a special register backing store + * that can grow up. Only sequentially, though, so the new address must + * match vm_end. + */ +static inline int vma_expand_up(struct vm_area_struct *vma, unsigned long addr) +{ + if (!vma_expand_ok(vma, addr)) + return -EFAULT; + if (vma->vm_end != (addr & PAGE_MASK)) + return -EFAULT; + return expand_upwards(vma, addr); +} + +static inline bool vma_expand_down(struct vm_area_struct *vma, unsigned long addr) +{ + if (!vma_expand_ok(vma, addr)) + return -EFAULT; + return expand_downwards(vma, addr); +} + +#elif defined(CONFIG_STACK_GROWSUP) + +#define vma_expand_up(vma,addr) expand_upwards(vma, addr) +#define vma_expand_down(vma, addr) (-EFAULT) + +#else + +#define vma_expand_up(vma,addr) (-EFAULT) +#define vma_expand_down(vma, addr) expand_downwards(vma, addr) + +#endif + +/* + * expand_stack(): legacy interface for page faulting. Don't use unless + * you have to. + * + * This is called with the mm locked for reading, drops the lock, takes + * the lock for writing, tries to look up a vma again, expands it if + * necessary, and downgrades the lock to reading again. + * + * If no vma is found or it can't be expanded, it returns NULL and has + * dropped the lock. + */ +struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr) { - return find_extend_vma_locked(mm, addr, false); + struct vm_area_struct *vma, *prev; + + mmap_read_unlock(mm); + if (mmap_write_lock_killable(mm)) + return NULL; + + vma = find_vma_prev(mm, addr, &prev); + if (vma && vma->vm_start <= addr) + goto success; + + if (prev && !vma_expand_up(prev, addr)) { + vma = prev; + goto success; + } + + if (vma && !vma_expand_down(vma, addr)) + goto success; + + mmap_write_unlock(mm); + return NULL; + +success: + mmap_write_downgrade(mm); + return vma; } -EXPORT_SYMBOL_GPL(find_extend_vma); /* * Ok - we have the memory areas we should free on a maple tree so release them, diff --git a/mm/nommu.c b/mm/nommu.c index f476c9ed36b3..37d0b03143f1 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -630,25 +630,21 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) } EXPORT_SYMBOL(find_vma); -/* - * find a VMA - * - we don't extend stack VMAs under NOMMU conditions - */ -struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) -{ - return find_vma(mm, addr); -} - /* * expand a stack to a given address * - not supported under NOMMU conditions */ -int expand_stack_locked(struct vm_area_struct *vma, unsigned long address, - bool write_locked) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long addr) { return -ENOMEM; } +struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr) +{ + mmap_read_unlock(mm); + return NULL; +} + /* * look up the first VMA exactly that exactly matches addr * - should be called with mm->mmap_lock at least held readlocked -- cgit v1.2.3