summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/alpha/Kconfig1
-rw-r--r--arch/alpha/mm/fault.c13
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arc/mm/fault.c11
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/mm/fault.c63
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/mm/fault.c47
-rw-r--r--arch/csky/Kconfig1
-rw-r--r--arch/csky/mm/fault.c22
-rw-r--r--arch/hexagon/Kconfig1
-rw-r--r--arch/hexagon/mm/vm_fault.c18
-rw-r--r--arch/ia64/mm/fault.c36
-rw-r--r--arch/loongarch/Kconfig1
-rw-r--r--arch/loongarch/mm/fault.c16
-rw-r--r--arch/m68k/mm/fault.c9
-rw-r--r--arch/microblaze/mm/fault.c5
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/mm/fault.c12
-rw-r--r--arch/nios2/Kconfig1
-rw-r--r--arch/nios2/mm/fault.c17
-rw-r--r--arch/openrisc/mm/fault.c5
-rw-r--r--arch/parisc/mm/fault.c23
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/mm/copro_fault.c14
-rw-r--r--arch/powerpc/mm/fault.c39
-rw-r--r--arch/riscv/Kconfig1
-rw-r--r--arch/riscv/mm/fault.c31
-rw-r--r--arch/s390/mm/fault.c5
-rw-r--r--arch/sh/Kconfig1
-rw-r--r--arch/sh/mm/fault.c17
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/mm/fault_32.c32
-rw-r--r--arch/sparc/mm/fault_64.c8
-rw-r--r--arch/um/kernel/trap.c11
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/mm/fault.c52
-rw-r--r--arch/xtensa/Kconfig1
-rw-r--r--arch/xtensa/mm/fault.c14
-rw-r--r--drivers/iommu/amd/iommu_v2.c4
-rw-r--r--drivers/iommu/iommu-sva.c2
-rw-r--r--fs/binfmt_elf.c6
-rw-r--r--fs/exec.c38
-rw-r--r--include/linux/mm.h16
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/gup.c14
-rw-r--r--mm/memory.c150
-rw-r--r--mm/mmap.c121
-rw-r--r--mm/nommu.c17
49 files changed, 439 insertions, 468 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index a5c2b1aa46b0..d6968d090d49 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -30,6 +30,7 @@ config ALPHA
select HAS_IOPORT
select HAVE_ARCH_AUDITSYSCALL
select HAVE_MOD_ARCH_SPECIFIC
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index 7b01ae4f3bc6..8c9850437e67 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -119,20 +119,12 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
flags |= FAULT_FLAG_USER;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
+ goto bad_area_nosemaphore;
/* Ok, we have a good vm_area for this memory access, so
we can handle it. */
- good_area:
si_code = SEGV_ACCERR;
if (cause < 0) {
if (!(vma->vm_flags & VM_EXEC))
@@ -192,6 +184,7 @@ retry:
bad_area:
mmap_read_unlock(mm);
+ bad_area_nosemaphore:
if (user_mode(regs))
goto do_sigsegv;
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index ab6d701365bb..96cf8720bb93 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -41,6 +41,7 @@ config ARC
select HAVE_PERF_EVENTS
select HAVE_SYSCALL_TRACEPOINTS
select IRQ_DOMAIN
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
select OF
select OF_EARLY_FLATTREE
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 5ca59a482632..f59e722d147f 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -113,15 +113,9 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
- mmap_read_lock(mm);
-
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (!vma)
- goto bad_area;
- if (unlikely(address < vma->vm_start)) {
- if (!(vma->vm_flags & VM_GROWSDOWN) || expand_stack(vma, address))
- goto bad_area;
- }
+ goto bad_area_nosemaphore;
/*
* vm_area is good, now check permissions for this memory access
@@ -161,6 +155,7 @@ retry:
bad_area:
mmap_read_unlock(mm);
+bad_area_nosemaphore:
/*
* Major/minor page fault accounting
* (in case of retry we only land here once)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index cef741bb03b3..d60b73d93e03 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -127,6 +127,7 @@ config ARM
select HAVE_VIRT_CPU_ACCOUNTING_GEN
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
select IRQ_FORCED_THREADING
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_REL
select NEED_DMA_MAP_STATE
select OF_EARLY_FLATTREE if OF
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 83598649a094..fef62e4a9edd 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -235,37 +235,11 @@ static inline bool is_permission_fault(unsigned int fsr)
return false;
}
-static vm_fault_t __kprobes
-__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int flags,
- unsigned long vma_flags, struct pt_regs *regs)
-{
- struct vm_area_struct *vma = find_vma(mm, addr);
- if (unlikely(!vma))
- return VM_FAULT_BADMAP;
-
- if (unlikely(vma->vm_start > addr)) {
- if (!(vma->vm_flags & VM_GROWSDOWN))
- return VM_FAULT_BADMAP;
- if (addr < FIRST_USER_ADDRESS)
- return VM_FAULT_BADMAP;
- if (expand_stack(vma, addr))
- return VM_FAULT_BADMAP;
- }
-
- /*
- * ok, we have a good vm_area for this memory access, check the
- * permissions on the VMA allow for the fault which occurred.
- */
- if (!(vma->vm_flags & vma_flags))
- return VM_FAULT_BADACCESS;
-
- return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
-}
-
static int __kprobes
do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
int sig, code;
vm_fault_t fault;
unsigned int flags = FAULT_FLAG_DEFAULT;
@@ -304,31 +278,21 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
- /*
- * As per x86, we may deadlock here. However, since the kernel only
- * validly references user space from well defined areas of the code,
- * we can bug out early if this is from code which shouldn't.
- */
- if (!mmap_read_trylock(mm)) {
- if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
- goto no_context;
retry:
- mmap_read_lock(mm);
- } else {
- /*
- * The above down_read_trylock() might have succeeded in
- * which case, we'll have missed the might_sleep() from
- * down_read()
- */
- might_sleep();
-#ifdef CONFIG_DEBUG_VM
- if (!user_mode(regs) &&
- !search_exception_tables(regs->ARM_pc))
- goto no_context;
-#endif
+ vma = lock_mm_and_find_vma(mm, addr, regs);
+ if (unlikely(!vma)) {
+ fault = VM_FAULT_BADMAP;
+ goto bad_area;
}
- fault = __do_page_fault(mm, addr, flags, vm_flags, regs);
+ /*
+ * ok, we have a good vm_area for this memory access, check the
+ * permissions on the VMA allow for the fault which occurred.
+ */
+ if (!(vma->vm_flags & vm_flags))
+ fault = VM_FAULT_BADACCESS;
+ else
+ fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
/* If we need to retry but a fatal signal is pending, handle the
* signal first. We do not need to release the mmap_lock because
@@ -359,6 +323,7 @@ retry:
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
return 0;
+bad_area:
/*
* If we are in kernel mode at this point, we
* have no context to handle this fault with.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3ae5c03ded0e..595028bd9160 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -231,6 +231,7 @@ config ARM64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c60100791bcc..935f0a8911f9 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -497,27 +497,14 @@ static void do_bad_area(unsigned long far, unsigned long esr,
#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000)
#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000)
-static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
+static vm_fault_t __do_page_fault(struct mm_struct *mm,
+ struct vm_area_struct *vma, unsigned long addr,
unsigned int mm_flags, unsigned long vm_flags,
struct pt_regs *regs)
{
- struct vm_area_struct *vma = find_vma(mm, addr);
-
- if (unlikely(!vma))
- return VM_FAULT_BADMAP;
-
/*
* Ok, we have a good vm_area for this memory access, so we can handle
* it.
- */
- if (unlikely(vma->vm_start > addr)) {
- if (!(vma->vm_flags & VM_GROWSDOWN))
- return VM_FAULT_BADMAP;
- if (expand_stack(vma, addr))
- return VM_FAULT_BADMAP;
- }
-
- /*
* Check that the permissions on the VMA allow for the fault which
* occurred.
*/
@@ -631,31 +618,15 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
}
lock_mmap:
#endif /* CONFIG_PER_VMA_LOCK */
- /*
- * As per x86, we may deadlock here. However, since the kernel only
- * validly references user space from well defined areas of the code,
- * we can bug out early if this is from code which shouldn't.
- */
- if (!mmap_read_trylock(mm)) {
- if (!user_mode(regs) && !search_exception_tables(regs->pc))
- goto no_context;
+
retry:
- mmap_read_lock(mm);
- } else {
- /*
- * The above mmap_read_trylock() might have succeeded in which
- * case, we'll have missed the might_sleep() from down_read().
- */
- might_sleep();
-#ifdef CONFIG_DEBUG_VM
- if (!user_mode(regs) && !search_exception_tables(regs->pc)) {
- mmap_read_unlock(mm);
- goto no_context;
- }
-#endif
+ vma = lock_mm_and_find_vma(mm, addr, regs);
+ if (unlikely(!vma)) {
+ fault = VM_FAULT_BADMAP;
+ goto done;
}
- fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs);
+ fault = __do_page_fault(mm, vma, addr, mm_flags, vm_flags, regs);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
@@ -674,9 +645,7 @@ retry:
}
mmap_read_unlock(mm);
-#ifdef CONFIG_PER_VMA_LOCK
done:
-#endif
/*
* Handle the "normal" (no error) case first.
*/
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 95f1e9bfd1c7..cf2a6fd7dff8 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -97,6 +97,7 @@ config CSKY
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU
+ select LOCK_MM_AND_FIND_VMA
select MAY_HAVE_SPARSE_IRQ
select MODULES_USE_ELF_RELA if MODULES
select OF
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index e15f736cca4b..ae9781b7d92e 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -97,13 +97,12 @@ static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_f
BUG();
}
-static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+static inline void bad_area_nosemaphore(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
{
/*
* Something tried to access memory that isn't in our memory map.
* Fix it, but check if it's kernel or user first.
*/
- mmap_read_unlock(mm);
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
do_trap(regs, SIGSEGV, code, addr);
@@ -238,20 +237,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
if (is_write(regs))
flags |= FAULT_FLAG_WRITE;
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, addr);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (unlikely(!vma)) {
- bad_area(regs, mm, code, addr);
- return;
- }
- if (likely(vma->vm_start <= addr))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- bad_area(regs, mm, code, addr);
- return;
- }
- if (unlikely(expand_stack(vma, addr))) {
- bad_area(regs, mm, code, addr);
+ bad_area_nosemaphore(regs, mm, code, addr);
return;
}
@@ -259,11 +247,11 @@ retry:
* Ok, we have a good vm_area for this memory access, so
* we can handle it.
*/
-good_area:
code = SEGV_ACCERR;
if (unlikely(access_error(regs, vma))) {
- bad_area(regs, mm, code, addr);
+ mmap_read_unlock(mm);
+ bad_area_nosemaphore(regs, mm, code, addr);
return;
}
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 54eadf265178..6726f4941015 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -28,6 +28,7 @@ config HEXAGON
select GENERIC_SMP_IDLE_THREAD
select STACKTRACE_SUPPORT
select GENERIC_CLOCKEVENTS_BROADCAST
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
select GENERIC_CPU_DEVICES
select ARCH_WANT_LD_ORPHAN_WARN
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 4b578d02fd01..7295ea3f8cc8 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -57,21 +57,10 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
+ vma = lock_mm_and_find_vma(mm, address, regs);
+ if (unlikely(!vma))
+ goto bad_area_nosemaphore;
- if (vma->vm_start <= address)
- goto good_area;
-
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
-
- if (expand_stack(vma, address))
- goto bad_area;
-
-good_area:
/* Address space is OK. Now check access rights. */
si_code = SEGV_ACCERR;
@@ -143,6 +132,7 @@ good_area:
bad_area:
mmap_read_unlock(mm);
+bad_area_nosemaphore:
if (user_mode(regs)) {
force_sig_fault(SIGSEGV, si_code, (void __user *)address);
return;
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 85c4d9ac8686..5458b52b4009 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -110,10 +110,12 @@ retry:
* register backing store that needs to expand upwards, in
* this case vma will be null, but prev_vma will ne non-null
*/
- if (( !vma && prev_vma ) || (address < vma->vm_start) )
- goto check_expansion;
+ if (( !vma && prev_vma ) || (address < vma->vm_start) ) {
+ vma = expand_stack(mm, address);
+ if (!vma)
+ goto bad_area_nosemaphore;
+ }
- good_area:
code = SEGV_ACCERR;
/* OK, we've got a good vm_area for this memory area. Check the access permissions: */
@@ -177,35 +179,9 @@ retry:
mmap_read_unlock(mm);
return;
- check_expansion:
- if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
- if (!vma)
- goto bad_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
- || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
- } else {
- vma = prev_vma;
- if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
- || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
- goto bad_area;
- /*
- * Since the register backing store is accessed sequentially,
- * we disallow growing it by more than a page at a time.
- */
- if (address > vma->vm_end + PAGE_SIZE - sizeof(long))
- goto bad_area;
- if (expand_upwards(vma, address))
- goto bad_area;
- }
- goto good_area;
-
bad_area:
mmap_read_unlock(mm);
+ bad_area_nosemaphore:
if ((isr & IA64_ISR_SP)
|| ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
{
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index cbab4f9ca15c..32b9da4622ce 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -131,6 +131,7 @@ config LOONGARCH
select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP
select IRQ_FORCED_THREADING
select IRQ_LOONGARCH_CPU
+ select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_MERGE_VMAS if MMU
select MODULES_USE_ELF_RELA if MODULES
select NEED_PER_CPU_EMBED_FIRST_CHUNK
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index 449087bd589d..da5b6d518cdb 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -169,22 +169,18 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (!expand_stack(vma, address))
- goto good_area;
+ vma = lock_mm_and_find_vma(mm, address, regs);
+ if (unlikely(!vma))
+ goto bad_area_nosemaphore;
+ goto good_area;
+
/*
* Something tried to access memory that isn't in our memory map..
* Fix it, but check if it's kernel or user first..
*/
bad_area:
mmap_read_unlock(mm);
+bad_area_nosemaphore:
do_sigsegv(regs, write, address, si_code);
return;
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 228128e45c67..c290c5c0cfb9 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -105,8 +105,9 @@ retry:
if (address + 256 < rdusp())
goto map_err;
}
- if (expand_stack(vma, address))
- goto map_err;
+ vma = expand_stack(mm, address);
+ if (!vma)
+ goto map_err_nosemaphore;
/*
* Ok, we have a good vm_area for this memory access, so
@@ -196,10 +197,12 @@ bus_err:
goto send_sig;
map_err:
+ mmap_read_unlock(mm);
+map_err_nosemaphore:
current->thread.signo = SIGSEGV;
current->thread.code = SEGV_MAPERR;
current->thread.faddr = address;
- goto send_sig;
+ return send_fault_sig(regs);
acc_err:
current->thread.signo = SIGSEGV;
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index 687714db6f4d..d3c3c33b73a6 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -192,8 +192,9 @@ retry:
&& (kernel_mode(regs) || !store_updates_sp(regs)))
goto bad_area;
}
- if (expand_stack(vma, address))
- goto bad_area;
+ vma = expand_stack(mm, address);
+ if (!vma)
+ goto bad_area_nosemaphore;
good_area:
code = SEGV_ACCERR;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ada18f3be229..d49d5fc40021 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -92,6 +92,7 @@ config MIPS
select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
select IRQ_FORCED_THREADING
select ISA if EISA
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_REL if MODULES
select MODULES_USE_ELF_RELA if MODULES && 64BIT
select PERF_USE_VMALLOC
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index a27045f5a556..d7878208bd3f 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -99,21 +99,13 @@ static void __do_page_fault(struct pt_regs *regs, unsigned long write,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
+ goto bad_area_nosemaphore;
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
-good_area:
si_code = SEGV_ACCERR;
if (write) {
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index e5936417d3cd..d54464021a61 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -16,6 +16,7 @@ config NIOS2
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_KGDB
select IRQ_DOMAIN
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
select OF
select OF_EARLY_FLATTREE
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index ca64eccea551..e3fa9c15181d 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -86,27 +86,14 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- if (!mmap_read_trylock(mm)) {
- if (!user_mode(regs) && !search_exception_tables(regs->ea))
- goto bad_area_nosemaphore;
retry:
- mmap_read_lock(mm);
- }
-
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
+ goto bad_area_nosemaphore;
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
-good_area:
code = SEGV_ACCERR;
switch (cause) {
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 6734fee3134f..a9dcd4381d1a 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -127,8 +127,9 @@ retry:
if (address + PAGE_SIZE < regs->sp)
goto bad_area;
}
- if (expand_stack(vma, address))
- goto bad_area;
+ vma = expand_stack(mm, address);
+ if (!vma)
+ goto bad_area_nosemaphore;
/*
* Ok, we have a good vm_area for this memory access, so
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 6941fdbf2517..6e894afa4249 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -288,15 +288,19 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
retry:
mmap_read_lock(mm);
vma = find_vma_prev(mm, address, &prev_vma);
- if (!vma || address < vma->vm_start)
- goto check_expansion;
+ if (!vma || address < vma->vm_start) {
+ if (!prev || !(prev->vm_flags & VM_GROWSUP))
+ goto bad_area;
+ vma = expand_stack(mm, address);
+ if (!vma)
+ goto bad_area_nosemaphore;
+ }
+
/*
* Ok, we have a good vm_area for this memory access. We still need to
* check the access permissions.
*/
-good_area:
-
if ((vma->vm_flags & acc_type) != acc_type)
goto bad_area;
@@ -347,17 +351,13 @@ good_area:
mmap_read_unlock(mm);
return;
-check_expansion:
- vma = prev_vma;
- if (vma && (expand_stack(vma, address) == 0))
- goto good_area;
-
/*
* Something tried to access memory that isn't in our memory map..
*/
bad_area:
mmap_read_unlock(mm);
+bad_area_nosemaphore:
if (user_mode(regs)) {
int signo, si_code;
@@ -449,7 +449,7 @@ handle_nadtlb_fault(struct pt_regs *regs)
{
unsigned long insn = regs->iir;
int breg, treg, xreg, val = 0;
- struct vm_area_struct *vma, *prev_vma;
+ struct vm_area_struct *vma;
struct task_struct *tsk;
struct mm_struct *mm;
unsigned long address;
@@ -485,7 +485,7 @@ handle_nadtlb_fault(struct pt_regs *regs)
/* Search for VMA */
address = regs->ior;
mmap_read_lock(mm);
- vma = find_vma_prev(mm, address, &prev_vma);
+ vma = vma_lookup(mm, address);
mmap_read_unlock(mm);
/*
@@ -494,7 +494,6 @@ handle_nadtlb_fault(struct pt_regs *regs)
*/
acc_type = (insn & 0x40) ? VM_WRITE : VM_READ;
if (vma
- && address >= vma->vm_start
&& (vma->vm_flags & acc_type) == acc_type)
val = 1;
}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8eb6c5e9e4f8..395044b705a1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -277,6 +277,7 @@ config PPC
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN && MODULES
+ select LOCK_MM_AND_FIND_VMA
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
select MMU_GATHER_MERGE_VMAS
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 7c507fb48182..f49fd873df8d 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -33,19 +33,11 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
if (mm->pgd == NULL)
return -EFAULT;
- mmap_read_lock(mm);
- ret = -EFAULT;
- vma = find_vma(mm, ea);
+ vma = lock_mm_and_find_vma(mm, ea, NULL);
if (!vma)
- goto out_unlock;
-
- if (ea < vma->vm_start) {
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto out_unlock;
- if (expand_stack(vma, ea))
- goto out_unlock;
- }
+ return -EFAULT;
+ ret = -EFAULT;
is_write = dsisr & DSISR_ISSTORE;
if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 531177a4ee08..5bfdf6ecfa96 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -84,11 +84,6 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
return __bad_area_nosemaphore(regs, address, si_code);
}
-static noinline int bad_area(struct pt_regs *regs, unsigned long address)
-{
- return __bad_area(regs, address, SEGV_MAPERR);
-}
-
static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
struct vm_area_struct *vma)
{
@@ -515,40 +510,12 @@ lock_mmap:
* we will deadlock attempting to validate the fault against the
* address space. Luckily the kernel only validly references user
* space from well defined areas of code, which are listed in the
- * exceptions table.
- *
- * As the vast majority of faults will be valid we will only perform
- * the source reference check when there is a possibility of a deadlock.
- * Attempt to lock the address space, if we cannot we then validate the
- * source. If this is invalid we can skip the address space check,
- * thus avoiding the deadlock.
+ * exceptions table. lock_mm_and_find_vma() handles that logic.
*/
- if (unlikely(!mmap_read_trylock(mm))) {
- if (!is_user && !search_exception_tables(regs->nip))
- return bad_area_nosemaphore(regs, address);
-
retry:
- mmap_read_lock(mm);
- } else {
- /*
- * The above down_read_trylock() might have succeeded in
- * which case we'll have missed the might_sleep() from
- * down_read():
- */
- might_sleep();
- }
-
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (unlikely(!vma))
- return bad_area(regs, address);
-
- if (unlikely(vma->vm_start > address)) {
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
- return bad_area(regs, address);
-
- if (unlikely(expand_stack(vma, address)))
- return bad_area(regs, address);
- }
+ return bad_area_nosemaphore(regs, address);
if (unlikely(access_pkey_error(is_write, is_exec,
(error_code & DSISR_KEYFAULT), vma)))
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c69572fbe613..a08917f681af 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -127,6 +127,7 @@ config RISCV
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select KASAN_VMALLOC if KASAN
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA if MODULES
select MODULE_SECTIONS if MODULES
select OF
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 8685f85a7474..35a84ec69a9f 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -84,13 +84,13 @@ static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_f
BUG();
}
-static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+static inline void
+bad_area_nosemaphore(struct pt_regs *regs, int code, unsigned long addr)
{
/*
* Something tried to access memory that isn't in our memory map.
* Fix it, but check if it's kernel or user first.
*/
- mmap_read_unlock(mm);
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs)) {
do_trap(regs, SIGSEGV, code, addr);
@@ -100,6 +100,15 @@ static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code
no_context(regs, addr);
}
+static inline void
+bad_area(struct pt_regs *regs, struct mm_struct *mm, int code,
+ unsigned long addr)
+{
+ mmap_read_unlock(mm);
+
+ bad_area_nosemaphore(regs, code, addr);
+}
+
static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
{
pgd_t *pgd, *pgd_k;
@@ -287,23 +296,10 @@ void handle_page_fault(struct pt_regs *regs)
else if (cause == EXC_INST_PAGE_FAULT)
flags |= FAULT_FLAG_INSTRUCTION;
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, addr);
+ vma = lock_mm_and_find_vma(mm, addr, regs);
if (unlikely(!vma)) {
tsk->thread.bad_cause = cause;
- bad_area(regs, mm, code, addr);
- return;
- }
- if (likely(vma->vm_start <= addr))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- tsk->thread.bad_cause = cause;
- bad_area(regs, mm, code, addr);
- return;
- }
- if (unlikely(expand_stack(vma, addr))) {
- tsk->thread.bad_cause = cause;
- bad_area(regs, mm, code, addr);
+ bad_area_nosemaphore(regs, code, addr);
return;
}
@@ -311,7 +307,6 @@ retry:
* Ok, we have a good vm_area for this memory access, so
* we can handle it.
*/
-good_area:
code = SEGV_ACCERR;
if (unlikely(access_error(cause, vma))) {
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index b65144c392b0..dbe8394234e2 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -457,8 +457,9 @@ retry:
if (unlikely(vma->vm_start > address)) {
if (!(vma->vm_flags & VM_GROWSDOWN))
goto out_up;
- if (expand_stack(vma, address))
- goto out_up;
+ vma = expand_stack(mm, address);
+ if (!vma)
+ goto out;
}
/*
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index e339745f62a1..2b3ce4fd3956 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -60,6 +60,7 @@ config SUPERH
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select IRQ_FORCED_THREADING
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
select NEED_SG_DMA_LENGTH
select NO_DMA if !MMU && !DMA_COHERENT
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index acd2f5e50bfc..06e6b4952924 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -439,21 +439,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
}
retry:
- mmap_read_lock(mm);
-
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (unlikely(!vma)) {
- bad_area(regs, error_code, address);
- return;
- }
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- bad_area(regs, error_code, address);
- return;
- }
- if (unlikely(expand_stack(vma, address))) {
- bad_area(regs, error_code, address);
+ bad_area_nosemaphore(regs, error_code, address);
return;
}
@@ -461,7 +449,6 @@ retry:
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
-good_area:
if (unlikely(access_error(error_code, vma))) {
bad_area_access_error(regs, error_code, address);
return;
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 6197b87f2b3b..49849790e66d 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -58,6 +58,7 @@ config SPARC32
select DMA_DIRECT_REMAP
select GENERIC_ATOMIC64
select HAVE_UID16
+ select LOCK_MM_AND_FIND_VMA
select OLD_SIGACTION
select ZONE_DMA
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 179295b14664..a3ccc0267bc2 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -143,28 +143,19 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
if (pagefault_disabled() || !mm)
goto no_context;
+ if (!from_user && address >= PAGE_OFFSET)
+ goto no_context;
+
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
- mmap_read_lock(mm);
-
- if (!from_user && address >= PAGE_OFFSET)
- goto bad_area;
-
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
+ goto bad_area_nosemaphore;
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
-good_area:
code = SEGV_ACCERR;
if (write) {
if (!(vma->vm_flags & VM_WRITE))
@@ -321,17 +312,9 @@ static void force_user_fault(unsigned long address, int write)
code = SEGV_MAPERR;
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
-good_area:
+ goto bad_area_nosemaphore;
code = SEGV_ACCERR;
if (write) {
if (!(vma->vm_flags & VM_WRITE))
@@ -350,6 +333,7 @@ good_area:
return;
bad_area:
mmap_read_unlock(mm);
+bad_area_nosemaphore:
__do_fault_siginfo(code, SIGSEGV, tsk->thread.kregs, address);
return;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index d8a407fbe350..e326caf708c6 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -386,8 +386,9 @@ continue_fault:
goto bad_area;
}
}
- if (expand_stack(vma, address))
- goto bad_area;
+ vma = expand_stack(mm, address);
+ if (!vma)
+ goto bad_area_nosemaphore;
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
@@ -490,8 +491,9 @@ exit_exception:
* Fix it, but check if it's kernel or user first..
*/
bad_area:
- insn = get_fault_insn(regs, insn);
mmap_read_unlock(mm);
+bad_area_nosemaphore:
+ insn = get_fault_insn(regs, insn);
handle_kernel_fault:
do_kernel_fault(regs, si_code, fault_code, insn, address);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index d3ce21c4ca32..6d8ae86ae978 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -47,14 +47,15 @@ retry:
vma = find_vma(mm, address);
if (!vma)
goto out;
- else if (vma->vm_start <= address)
+ if (vma->vm_start <= address)
goto good_area;
- else if (!(vma->vm_flags & VM_GROWSDOWN))
+ if (!(vma->vm_flags & VM_GROWSDOWN))
goto out;
- else if (is_user && !ARCH_IS_STACKGROW(address))
- goto out;
- else if (expand_stack(vma, address))
+ if (is_user && !ARCH_IS_STACKGROW(address))
goto out;
+ vma = expand_stack(mm, address);
+ if (!vma)
+ goto out_nosemaphore;
good_area:
*code_out = SEGV_ACCERR;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d5c69145a73c..b3ebf58cf77e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -279,6 +279,7 @@ config X86
select HOTPLUG_SMT if SMP
select HOTPLUG_SPLIT_STARTUP if SMP && X86_32
select IRQ_FORCED_THREADING
+ select LOCK_MM_AND_FIND_VMA
select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
select NEED_SG_DMA_LENGTH
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e4399983c50c..e8711b2cafaf 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -880,12 +880,6 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
}
-static noinline void
-bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
-{
- __bad_area(regs, error_code, address, 0, SEGV_MAPERR);
-}
-
static inline bool bad_area_access_from_pkeys(unsigned long error_code,
struct vm_area_struct *vma)
{
@@ -1366,51 +1360,10 @@ void do_user_addr_fault(struct pt_regs *regs,
lock_mmap:
#endif /* CONFIG_PER_VMA_LOCK */
- /*
- * Kernel-mode access to the user address space should only occur
- * on well-defined single instructions listed in the exception
- * tables. But, an erroneous kernel fault occurring outside one of
- * those areas which also holds mmap_lock might deadlock attempting
- * to validate the fault against the address space.
- *
- * Only do the expensive exception table search when we might be at
- * risk of a deadlock. This happens if we
- * 1. Failed to acquire mmap_lock, and
- * 2. The access did not originate in userspace.
- */
- if (unlikely(!mmap_read_trylock(mm))) {
- if (!user_mode(regs) && !search_exception_tables(regs->ip)) {
- /*
- * Fault from code in kernel from
- * which we do not expect faults.
- */
- bad_area_nosemaphore(regs, error_code, address);
- return;
- }
retry:
- mmap_read_lock(mm);
- } else {
- /*
- * The above down_read_trylock() might have succeeded in
- * which case we'll have missed the might_sleep() from
- * down_read():
- */
- might_sleep();
- }
-
- vma = find_vma(mm, address);
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (unlikely(!vma)) {
- bad_area(regs, error_code, address);
- return;
- }
- if (likely(vma->vm_start <= address))
- goto good_area;
- if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
- bad_area(regs, error_code, address);
- return;
- }
- if (unlikely(expand_stack(vma, address))) {
- bad_area(regs, error_code, address);
+ bad_area_nosemaphore(regs, error_code, address);
return;
}
@@ -1418,7 +1371,6 @@ retry:
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
-good_area:
if (unlikely(access_error(error_code, vma))) {
bad_area_access_error(regs, error_code, address, vma);
return;
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index c1bcfc2a8581..2a51a466779f 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -49,6 +49,7 @@ config XTENSA
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING_GEN
select IRQ_DOMAIN
+ select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
select PERF_USE_VMALLOC
select TRACE_IRQFLAGS_SUPPORT
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index faf7cf35a0ee..d1eb8d6c5b82 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -130,23 +130,14 @@ void do_page_fault(struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
-
+ vma = lock_mm_and_find_vma(mm, address, regs);
if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
+ goto bad_area_nosemaphore;
/* Ok, we have a good vm_area for this memory access, so
* we can handle it..
*/
-good_area:
code = SEGV_ACCERR;
if (is_write) {
@@ -205,6 +196,7 @@ good_area:
*/
bad_area:
mmap_read_unlock(mm);
+bad_area_nosemaphore:
if (user_mode(regs)) {
force_sig_fault(SIGSEGV, code, (void *) address);
return;
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 864e4ffb6aa9..261352a23271 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -485,8 +485,8 @@ static void do_fault(struct work_struct *work)
flags |= FAULT_FLAG_REMOTE;
mmap_read_lock(mm);
- vma = find_extend_vma(mm, address);
- if (!vma || address < vma->vm_start)
+ vma = vma_lookup(mm, address);
+ if (!vma)
/* failed to get a vma in the right range */
goto out;
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 9821bc44f5ac..3ebd4b6586b3 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -175,7 +175,7 @@ iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
mmap_read_lock(mm);
- vma = find_extend_vma(mm, prm->addr);
+ vma = vma_lookup(mm, prm->addr);
if (!vma)
/* Unmapped area */
goto out_put_mm;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 983ce34115d5..7b3d2d491407 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -320,10 +320,10 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
* Grow the stack manually; some architectures have a limit on how
* far ahead a user-space access may be in order to grow the stack.
*/
- if (mmap_read_lock_killable(mm))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
- vma = find_extend_vma(mm, bprm->p);
- mmap_read_unlock(mm);
+ vma = find_extend_vma_locked(mm, bprm->p);
+ mmap_write_unlock(mm);
if (!vma)
return -EFAULT;
diff --git a/fs/exec.c b/fs/exec.c
index 25c65b64544b..80b8611c416d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -200,33 +200,39 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
+ struct vm_area_struct *vma = bprm->vma;
+ struct mm_struct *mm = bprm->mm;
int ret;
- unsigned int gup_flags = 0;
-#ifdef CONFIG_STACK_GROWSUP
- if (write) {
- ret = expand_downwards(bprm->vma, pos);
- if (ret < 0)
+ /*
+ * Avoid relying on expanding the stack down in GUP (which
+ * does not work for STACK_GROWSUP anyway), and just do it
+ * by hand ahead of time.
+ */
+ if (write && pos < vma->vm_start) {
+ mmap_write_lock(mm);
+ ret = expand_downwards(vma, pos);
+ if (unlikely(ret < 0)) {
+ mmap_write_unlock(mm);
return NULL;
- }
-#endif
-
- if (write)
- gup_flags |= FOLL_WRITE;
+ }
+ mmap_write_downgrade(mm);
+ } else
+ mmap_read_lock(mm);
/*
* We are doing an exec(). 'current' is the process
- * doing the exec and bprm->mm is the new process's mm.
+ * doing the exec and 'mm' is the new process's mm.
*/
- mmap_read_lock(bprm->mm);
- ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
+ ret = get_user_pages_remote(mm, pos, 1,
+ write ? FOLL_WRITE : 0,
&page, NULL);
- mmap_read_unlock(bprm->mm);
+ mmap_read_unlock(mm);
if (ret <= 0)
return NULL;
if (write)
- acct_arg_size(bprm, vma_pages(bprm->vma));
+ acct_arg_size(bprm, vma_pages(vma));
return page;
}
@@ -853,7 +859,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
stack_base = vma->vm_end - stack_expand;
#endif
current->mm->start_stack = bprm->p;
- ret = expand_stack(vma, stack_base);
+ ret = expand_stack_locked(vma, stack_base);
if (ret)
ret = -EFAULT;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index eef34f6a0351..39aa409e84d5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2334,6 +2334,8 @@ void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows);
void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen, int even_cows);
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+ unsigned long address, struct pt_regs *regs);
#else
static inline vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
unsigned long address, unsigned int flags,
@@ -3228,16 +3230,11 @@ extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
-extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address);
+struct vm_area_struct *expand_stack(struct mm_struct * mm, unsigned long addr);
/* CONFIG_STACK_GROWSUP still needs to grow downwards at some places */
-extern int expand_downwards(struct vm_area_struct *vma,
- unsigned long address);
-#if VM_GROWSUP
-extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
-#else
- #define expand_upwards(vma, address) (0)
-#endif
+int expand_downwards(struct vm_area_struct *vma, unsigned long address);
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
@@ -3332,7 +3329,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
#endif
-struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *,
+ unsigned long addr);
int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t);
int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/Kconfig b/mm/Kconfig
index 12f32f8d26bf..e00df648c91d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1222,6 +1222,10 @@ config PER_VMA_LOCK
This feature allows locking each virtual memory area separately when
handling page faults instead of taking mmap_lock.
+config LOCK_MM_AND_FIND_VMA
+ bool
+ depends on !STACK_GROWSUP
+
source "mm/damon/Kconfig"
endmenu
diff --git a/mm/gup.c b/mm/gup.c
index 48c1659314b0..ef29641671c7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1168,7 +1168,11 @@ static long __get_user_pages(struct mm_struct *mm,
/* first iteration or cross vma bound */
if (!vma || start >= vma->vm_end) {
- vma = find_extend_vma(mm, start);
+ vma = find_vma(mm, start);
+ if (vma && (start < vma->vm_start)) {
+ WARN_ON_ONCE(vma->vm_flags & VM_GROWSDOWN);
+ vma = NULL;
+ }
if (!vma && in_gate_area(mm, start)) {
ret = get_gate_page(mm, start & PAGE_MASK,
gup_flags, &vma,
@@ -1333,9 +1337,13 @@ int fixup_user_fault(struct mm_struct *mm,
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
retry:
- vma = find_extend_vma(mm, address);
- if (!vma || address < vma->vm_start)
+ vma = find_vma(mm, address);
+ if (!vma)
+ return -EFAULT;
+ if (address < vma->vm_start ) {
+ WARN_ON_ONCE(vma->vm_flags & VM_GROWSDOWN);
return -EFAULT;
+ }
if (!vma_permits_fault(vma, fault_flags))
return -EFAULT;
diff --git a/mm/memory.c b/mm/memory.c
index 58029fd5adda..d8a9a770b1f1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5245,6 +5245,125 @@ out:
}
EXPORT_SYMBOL_GPL(handle_mm_fault);
+#ifdef CONFIG_LOCK_MM_AND_FIND_VMA
+#include <linux/extable.h>
+
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+ /* Even if this succeeds, make it clear we *might* have slept */
+ if (likely(mmap_read_trylock(mm))) {
+ might_sleep();
+ return true;
+ }
+
+ if (regs && !user_mode(regs)) {
+ unsigned long ip = instruction_pointer(regs);
+ if (!search_exception_tables(ip))
+ return false;
+ }
+
+ return !mmap_read_lock_killable(mm);
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+ /*
+ * We don't have this operation yet.
+ *
+ * It should be easy enough to do: it's basically a
+ * atomic_long_try_cmpxchg_acquire()
+ * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+ * it also needs the proper lockdep magic etc.
+ */
+ return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
+{
+ mmap_read_unlock(mm);
+ if (regs && !user_mode(regs)) {
+ unsigned long ip = instruction_pointer(regs);
+ if (!search_exception_tables(ip))
+ return false;
+ }
+ return !mmap_write_lock_killable(mm);
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm,
+ unsigned long addr, struct pt_regs *regs)
+{
+ struct vm_area_struct *vma;
+
+ if (!get_mmap_lock_carefully(mm, regs))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (likely(vma && (vma->vm_start <= addr)))
+ return vma;
+
+ /*
+ * Well, dang. We might still be successful, but only
+ * if we can extend a vma to do so.
+ */
+ if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+ mmap_read_unlock(mm);
+ return NULL;
+ }
+
+ /*
+ * We can try to upgrade the mmap lock atomically,
+ * in which case we can continue to use the vma
+ * we already looked up.
+ *
+ * Otherwise we'll have to drop the mmap lock and
+ * re-take it, and also look up the vma again,
+ * re-checking it.
+ */
+ if (!mmap_upgrade_trylock(mm)) {
+ if (!upgrade_mmap_lock_carefully(mm, regs))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto fail;
+ if (vma->vm_start <= addr)
+ goto success;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto fail;
+ }
+
+ if (expand_stack_locked(vma, addr))
+ goto fail;
+
+success:
+ mmap_write_downgrade(mm);
+ return vma;
+
+fail:
+ mmap_write_unlock(mm);
+ return NULL;
+}
+#endif
+
#ifdef CONFIG_PER_VMA_LOCK
/*
* Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be
@@ -5584,25 +5703,32 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
gup_flags, &vma);
if (IS_ERR_OR_NULL(page)) {
-#ifndef CONFIG_HAVE_IOREMAP_PROT
- break;
-#else
- int res = 0;
+ /* We might need to expand the stack to access it */
+ vma = vma_lookup(mm, addr);
+ if (!vma) {
+ vma = expand_stack(mm, addr);
+
+ /* mmap_lock was dropped on failure */
+ if (!vma)
+ return buf - old_buf;
+
+ /* Try again if stack expansion worked */
+ continue;
+ }
+
/*
* Check if this is a VM_IO | VM_PFNMAP VMA, which
* we can access using slightly different code.
*/
- vma = vma_lookup(mm, addr);
- if (!vma)
- break;
+ bytes = 0;
+#ifdef CONFIG_HAVE_IOREMAP_PROT
if (vma->vm_ops && vma->vm_ops->access)
- res = vma->vm_ops->access(vma, addr, buf,
- len, write);
- if (res <= 0)
- break;
- bytes = res;
+ bytes = vma->vm_ops->access(vma, addr, buf,
+ len, write);
#endif
+ if (bytes <= 0)
+ break;
} else {
bytes = len;
offset = addr & (PAGE_SIZE-1);
diff --git a/mm/mmap.c b/mm/mmap.c
index 8f1000bc35df..9b5188b65800 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1948,7 +1948,7 @@ static int acct_stack_growth(struct vm_area_struct *vma,
* PA-RISC uses this for its stack; IA64 for its Register Backing Store.
* vma is the last one with address > vma->vm_end. Have to extend vma.
*/
-int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *next;
@@ -2040,6 +2040,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
/*
* vma is the first one with address < vma->vm_start. Have to extend vma.
+ * mmap_lock held for writing.
*/
int expand_downwards(struct vm_area_struct *vma, unsigned long address)
{
@@ -2048,16 +2049,20 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
struct vm_area_struct *prev;
int error = 0;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ return -EFAULT;
+
address &= PAGE_MASK;
- if (address < mmap_min_addr)
+ if (address < mmap_min_addr || address < FIRST_USER_ADDRESS)
return -EPERM;
/* Enforce stack_guard_gap */
prev = mas_prev(&mas, 0);
/* Check that both stack segments have the same anon_vma? */
- if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
- vma_is_accessible(prev)) {
- if (address - prev->vm_end < stack_guard_gap)
+ if (prev) {
+ if (!(prev->vm_flags & VM_GROWSDOWN) &&
+ vma_is_accessible(prev) &&
+ (address - prev->vm_end < stack_guard_gap))
return -ENOMEM;
}
@@ -2137,13 +2142,12 @@ static int __init cmdline_parse_stack_guard_gap(char *p)
__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
#ifdef CONFIG_STACK_GROWSUP
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
{
return expand_upwards(vma, address);
}
-struct vm_area_struct *
-find_extend_vma(struct mm_struct *mm, unsigned long addr)
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma, *prev;
@@ -2151,20 +2155,23 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
vma = find_vma_prev(mm, addr, &prev);
if (vma && (vma->vm_start <= addr))
return vma;
- if (!prev || expand_stack(prev, addr))
+ if (!prev)
+ return NULL;
+ if (expand_stack_locked(prev, addr))
return NULL;
if (prev->vm_flags & VM_LOCKED)
populate_vma_page_range(prev, addr, prev->vm_end, NULL);
return prev;
}
#else
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long address)
{
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
+ return -EINVAL;
return expand_downwards(vma, address);
}
-struct vm_area_struct *
-find_extend_vma(struct mm_struct *mm, unsigned long addr)
+struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma;
unsigned long start;
@@ -2175,10 +2182,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
return NULL;
if (vma->vm_start <= addr)
return vma;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- return NULL;
start = vma->vm_start;
- if (expand_stack(vma, addr))
+ if (expand_stack_locked(vma, addr))
return NULL;
if (vma->vm_flags & VM_LOCKED)
populate_vma_page_range(vma, addr, start, NULL);
@@ -2186,7 +2191,91 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
}
#endif
-EXPORT_SYMBOL_GPL(find_extend_vma);
+/*
+ * IA64 has some horrid mapping rules: it can expand both up and down,
+ * but with various special rules.
+ *
+ * We'll get rid of this architecture eventually, so the ugliness is
+ * temporary.
+ */
+#ifdef CONFIG_IA64
+static inline bool vma_expand_ok(struct vm_area_struct *vma, unsigned long addr)
+{
+ return REGION_NUMBER(addr) == REGION_NUMBER(vma->vm_start) &&
+ REGION_OFFSET(addr) < RGN_MAP_LIMIT;
+}
+
+/*
+ * IA64 stacks grow down, but there's a special register backing store
+ * that can grow up. Only sequentially, though, so the new address must
+ * match vm_end.
+ */
+static inline int vma_expand_up(struct vm_area_struct *vma, unsigned long addr)
+{
+ if (!vma_expand_ok(vma, addr))
+ return -EFAULT;
+ if (vma->vm_end != (addr & PAGE_MASK))
+ return -EFAULT;
+ return expand_upwards(vma, addr);
+}
+
+static inline bool vma_expand_down(struct vm_area_struct *vma, unsigned long addr)
+{
+ if (!vma_expand_ok(vma, addr))
+ return -EFAULT;
+ return expand_downwards(vma, addr);
+}
+
+#elif defined(CONFIG_STACK_GROWSUP)
+
+#define vma_expand_up(vma,addr) expand_upwards(vma, addr)
+#define vma_expand_down(vma, addr) (-EFAULT)
+
+#else
+
+#define vma_expand_up(vma,addr) (-EFAULT)
+#define vma_expand_down(vma, addr) expand_downwards(vma, addr)
+
+#endif
+
+/*
+ * expand_stack(): legacy interface for page faulting. Don't use unless
+ * you have to.
+ *
+ * This is called with the mm locked for reading, drops the lock, takes
+ * the lock for writing, tries to look up a vma again, expands it if
+ * necessary, and downgrades the lock to reading again.
+ *
+ * If no vma is found or it can't be expanded, it returns NULL and has
+ * dropped the lock.
+ */
+struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
+{
+ struct vm_area_struct *vma, *prev;
+
+ mmap_read_unlock(mm);
+ if (mmap_write_lock_killable(mm))
+ return NULL;
+
+ vma = find_vma_prev(mm, addr, &prev);
+ if (vma && vma->vm_start <= addr)
+ goto success;
+
+ if (prev && !vma_expand_up(prev, addr)) {
+ vma = prev;
+ goto success;
+ }
+
+ if (vma && !vma_expand_down(vma, addr))
+ goto success;
+
+ mmap_write_unlock(mm);
+ return NULL;
+
+success:
+ mmap_write_downgrade(mm);
+ return vma;
+}
/*
* Ok - we have the memory areas we should free on a maple tree so release them,
diff --git a/mm/nommu.c b/mm/nommu.c
index f670d9979a26..37d0b03143f1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -631,23 +631,20 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
EXPORT_SYMBOL(find_vma);
/*
- * find a VMA
- * - we don't extend stack VMAs under NOMMU conditions
- */
-struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
-{
- return find_vma(mm, addr);
-}
-
-/*
* expand a stack to a given address
* - not supported under NOMMU conditions
*/
-int expand_stack(struct vm_area_struct *vma, unsigned long address)
+int expand_stack_locked(struct vm_area_struct *vma, unsigned long addr)
{
return -ENOMEM;
}
+struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr)
+{
+ mmap_read_unlock(mm);
+ return NULL;
+}
+
/*
* look up the first VMA exactly that exactly matches addr
* - should be called with mm->mmap_lock at least held readlocked