diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 4 | ||||
-rw-r--r-- | mm/gup.c | 14 | ||||
-rw-r--r-- | mm/memory.c | 150 | ||||
-rw-r--r-- | mm/mmap.c | 121 | ||||
-rw-r--r-- | mm/nommu.c | 17 |
5 files changed, 265 insertions, 41 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 12f32f8d26bf..e00df648c91d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1222,6 +1222,10 @@ config PER_VMA_LOCK This feature allows locking each virtual memory area separately when handling page faults instead of taking mmap_lock. +config LOCK_MM_AND_FIND_VMA + bool + depends on !STACK_GROWSUP + source "mm/damon/Kconfig" endmenu @@ -1168,7 +1168,11 @@ static long __get_user_pages(struct mm_struct *mm, /* first iteration or cross vma bound */ if (!vma || start >= vma->vm_end) { - vma = find_extend_vma(mm, start); + vma = find_vma(mm, start); + if (vma && (start < vma->vm_start)) { + WARN_ON_ONCE(vma->vm_flags & VM_GROWSDOWN); + vma = NULL; + } if (!vma && in_gate_area(mm, start)) { ret = get_gate_page(mm, start & PAGE_MASK, gup_flags, &vma, @@ -1333,9 +1337,13 @@ int fixup_user_fault(struct mm_struct *mm, fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; retry: - vma = find_extend_vma(mm, address); - if (!vma || address < vma->vm_start) + vma = find_vma(mm, address); + if (!vma) + return -EFAULT; + if (address < vma->vm_start ) { + WARN_ON_ONCE(vma->vm_flags & VM_GROWSDOWN); return -EFAULT; + } if (!vma_permits_fault(vma, fault_flags)) return -EFAULT; diff --git a/mm/memory.c b/mm/memory.c index 58029fd5adda..d8a9a770b1f1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5245,6 +5245,125 @@ out: } EXPORT_SYMBOL_GPL(handle_mm_fault); +#ifdef CONFIG_LOCK_MM_AND_FIND_VMA +#include <linux/extable.h> + +static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs) +{ + /* Even if this succeeds, make it clear we *might* have slept */ + if (likely(mmap_read_trylock(mm))) { + might_sleep(); + return true; + } + + if (regs && !user_mode(regs)) { + unsigned long ip = instruction_pointer(regs); + if (!search_exception_tables(ip)) + return false; + } + + return !mmap_read_lock_killable(mm); +} + +static inline bool mmap_upgrade_trylock(struct mm_struct *mm) +{ + /* + * We don't have this operation yet. + * + * It should be easy enough to do: it's basically a + * atomic_long_try_cmpxchg_acquire() + * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but + * it also needs the proper lockdep magic etc. + */ + return false; +} + +static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs) +{ + mmap_read_unlock(mm); + if (regs && !user_mode(regs)) { + unsigned long ip = instruction_pointer(regs); + if (!search_exception_tables(ip)) + return false; + } + return !mmap_write_lock_killable(mm); +} + +/* + * Helper for page fault handling. + * + * This is kind of equivalend to "mmap_read_lock()" followed + * by "find_extend_vma()", except it's a lot more careful about + * the locking (and will drop the lock on failure). + * + * For example, if we have a kernel bug that causes a page + * fault, we don't want to just use mmap_read_lock() to get + * the mm lock, because that would deadlock if the bug were + * to happen while we're holding the mm lock for writing. + * + * So this checks the exception tables on kernel faults in + * order to only do this all for instructions that are actually + * expected to fault. + * + * We can also actually take the mm lock for writing if we + * need to extend the vma, which helps the VM layer a lot. + */ +struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, + unsigned long addr, struct pt_regs *regs) +{ + struct vm_area_struct *vma; + + if (!get_mmap_lock_carefully(mm, regs)) + return NULL; + + vma = find_vma(mm, addr); + if (likely(vma && (vma->vm_start <= addr))) + return vma; + + /* + * Well, dang. We might still be successful, but only + * if we can extend a vma to do so. + */ + if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { + mmap_read_unlock(mm); + return NULL; + } + + /* + * We can try to upgrade the mmap lock atomically, + * in which case we can continue to use the vma + * we already looked up. + * + * Otherwise we'll have to drop the mmap lock and + * re-take it, and also look up the vma again, + * re-checking it. + */ + if (!mmap_upgrade_trylock(mm)) { + if (!upgrade_mmap_lock_carefully(mm, regs)) + return NULL; + + vma = find_vma(mm, addr); + if (!vma) + goto fail; + if (vma->vm_start <= addr) + goto success; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto fail; + } + + if (expand_stack_locked(vma, addr)) + goto fail; + +success: + mmap_write_downgrade(mm); + return vma; + +fail: + mmap_write_unlock(mm); + return NULL; +} +#endif + #ifdef CONFIG_PER_VMA_LOCK /* * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be @@ -5584,25 +5703,32 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, gup_flags, &vma); if (IS_ERR_OR_NULL(page)) { -#ifndef CONFIG_HAVE_IOREMAP_PROT - break; -#else - int res = 0; + /* We might need to expand the stack to access it */ + vma = vma_lookup(mm, addr); + if (!vma) { + vma = expand_stack(mm, addr); + + /* mmap_lock was dropped on failure */ + if (!vma) + return buf - old_buf; + + /* Try again if stack expansion worked */ + continue; + } + /* * Check if this is a VM_IO | VM_PFNMAP VMA, which * we can access using slightly different code. */ - vma = vma_lookup(mm, addr); - if (!vma) - break; + bytes = 0; +#ifdef CONFIG_HAVE_IOREMAP_PROT if (vma->vm_ops && vma->vm_ops->access) - res = vma->vm_ops->access(vma, addr, buf, - len, write); - if (res <= 0) - break; - bytes = res; + bytes = vma->vm_ops->access(vma, addr, buf, + len, write); #endif + if (bytes <= 0) + break; } else { bytes = len; offset = addr & (PAGE_SIZE-1); diff --git a/mm/mmap.c b/mm/mmap.c index 8f1000bc35df..9b5188b65800 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1948,7 +1948,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, * PA-RISC uses this for its stack; IA64 for its Register Backing Store. * vma is the last one with address > vma->vm_end. Have to extend vma. */ -int expand_upwards(struct vm_area_struct *vma, unsigned long address) +static int expand_upwards(struct vm_area_struct *vma, unsigned long address) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *next; @@ -2040,6 +2040,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) /* * vma is the first one with address < vma->vm_start. Have to extend vma. + * mmap_lock held for writing. */ int expand_downwards(struct vm_area_struct *vma, unsigned long address) { @@ -2048,16 +2049,20 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address) struct vm_area_struct *prev; int error = 0; + if (!(vma->vm_flags & VM_GROWSDOWN)) + return -EFAULT; + address &= PAGE_MASK; - if (address < mmap_min_addr) + if (address < mmap_min_addr || address < FIRST_USER_ADDRESS) return -EPERM; /* Enforce stack_guard_gap */ prev = mas_prev(&mas, 0); /* Check that both stack segments have the same anon_vma? */ - if (prev && !(prev->vm_flags & VM_GROWSDOWN) && - vma_is_accessible(prev)) { - if (address - prev->vm_end < stack_guard_gap) + if (prev) { + if (!(prev->vm_flags & VM_GROWSDOWN) && + vma_is_accessible(prev) && + (address - prev->vm_end < stack_guard_gap)) return -ENOMEM; } @@ -2137,13 +2142,12 @@ static int __init cmdline_parse_stack_guard_gap(char *p) __setup("stack_guard_gap=", cmdline_parse_stack_guard_gap); #ifdef CONFIG_STACK_GROWSUP -int expand_stack(struct vm_area_struct *vma, unsigned long address) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address) { return expand_upwards(vma, address); } -struct vm_area_struct * -find_extend_vma(struct mm_struct *mm, unsigned long addr) +struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma, *prev; @@ -2151,20 +2155,23 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; - if (!prev || expand_stack(prev, addr)) + if (!prev) + return NULL; + if (expand_stack_locked(prev, addr)) return NULL; if (prev->vm_flags & VM_LOCKED) populate_vma_page_range(prev, addr, prev->vm_end, NULL); return prev; } #else -int expand_stack(struct vm_area_struct *vma, unsigned long address) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long address) { + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) + return -EINVAL; return expand_downwards(vma, address); } -struct vm_area_struct * -find_extend_vma(struct mm_struct *mm, unsigned long addr) +struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma; unsigned long start; @@ -2175,10 +2182,8 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) return NULL; if (vma->vm_start <= addr) return vma; - if (!(vma->vm_flags & VM_GROWSDOWN)) - return NULL; start = vma->vm_start; - if (expand_stack(vma, addr)) + if (expand_stack_locked(vma, addr)) return NULL; if (vma->vm_flags & VM_LOCKED) populate_vma_page_range(vma, addr, start, NULL); @@ -2186,7 +2191,91 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) } #endif -EXPORT_SYMBOL_GPL(find_extend_vma); +/* + * IA64 has some horrid mapping rules: it can expand both up and down, + * but with various special rules. + * + * We'll get rid of this architecture eventually, so the ugliness is + * temporary. + */ +#ifdef CONFIG_IA64 +static inline bool vma_expand_ok(struct vm_area_struct *vma, unsigned long addr) +{ + return REGION_NUMBER(addr) == REGION_NUMBER(vma->vm_start) && + REGION_OFFSET(addr) < RGN_MAP_LIMIT; +} + +/* + * IA64 stacks grow down, but there's a special register backing store + * that can grow up. Only sequentially, though, so the new address must + * match vm_end. + */ +static inline int vma_expand_up(struct vm_area_struct *vma, unsigned long addr) +{ + if (!vma_expand_ok(vma, addr)) + return -EFAULT; + if (vma->vm_end != (addr & PAGE_MASK)) + return -EFAULT; + return expand_upwards(vma, addr); +} + +static inline bool vma_expand_down(struct vm_area_struct *vma, unsigned long addr) +{ + if (!vma_expand_ok(vma, addr)) + return -EFAULT; + return expand_downwards(vma, addr); +} + +#elif defined(CONFIG_STACK_GROWSUP) + +#define vma_expand_up(vma,addr) expand_upwards(vma, addr) +#define vma_expand_down(vma, addr) (-EFAULT) + +#else + +#define vma_expand_up(vma,addr) (-EFAULT) +#define vma_expand_down(vma, addr) expand_downwards(vma, addr) + +#endif + +/* + * expand_stack(): legacy interface for page faulting. Don't use unless + * you have to. + * + * This is called with the mm locked for reading, drops the lock, takes + * the lock for writing, tries to look up a vma again, expands it if + * necessary, and downgrades the lock to reading again. + * + * If no vma is found or it can't be expanded, it returns NULL and has + * dropped the lock. + */ +struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr) +{ + struct vm_area_struct *vma, *prev; + + mmap_read_unlock(mm); + if (mmap_write_lock_killable(mm)) + return NULL; + + vma = find_vma_prev(mm, addr, &prev); + if (vma && vma->vm_start <= addr) + goto success; + + if (prev && !vma_expand_up(prev, addr)) { + vma = prev; + goto success; + } + + if (vma && !vma_expand_down(vma, addr)) + goto success; + + mmap_write_unlock(mm); + return NULL; + +success: + mmap_write_downgrade(mm); + return vma; +} /* * Ok - we have the memory areas we should free on a maple tree so release them, diff --git a/mm/nommu.c b/mm/nommu.c index f670d9979a26..37d0b03143f1 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -631,23 +631,20 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) EXPORT_SYMBOL(find_vma); /* - * find a VMA - * - we don't extend stack VMAs under NOMMU conditions - */ -struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) -{ - return find_vma(mm, addr); -} - -/* * expand a stack to a given address * - not supported under NOMMU conditions */ -int expand_stack(struct vm_area_struct *vma, unsigned long address) +int expand_stack_locked(struct vm_area_struct *vma, unsigned long addr) { return -ENOMEM; } +struct vm_area_struct *expand_stack(struct mm_struct *mm, unsigned long addr) +{ + mmap_read_unlock(mm); + return NULL; +} + /* * look up the first VMA exactly that exactly matches addr * - should be called with mm->mmap_lock at least held readlocked |