diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 4 | ||||
-rw-r--r-- | mm/memory.c | 121 |
2 files changed, 125 insertions, 0 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 7672a22647b4..e3454087fd31 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1206,6 +1206,10 @@ config PER_VMA_LOCK This feature allows locking each virtual memory area separately when handling page faults instead of taking mmap_lock. +config LOCK_MM_AND_FIND_VMA + bool + depends on !STACK_GROWSUP + source "mm/damon/Kconfig" endmenu diff --git a/mm/memory.c b/mm/memory.c index f69fbc251198..1a427097b71f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5262,6 +5262,127 @@ out: } EXPORT_SYMBOL_GPL(handle_mm_fault); +#ifdef CONFIG_LOCK_MM_AND_FIND_VMA +#include <linux/extable.h> + +static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs) +{ + /* Even if this succeeds, make it clear we *might* have slept */ + if (likely(mmap_read_trylock(mm))) { + might_sleep(); + return true; + } + + if (regs && !user_mode(regs)) { + unsigned long ip = instruction_pointer(regs); + if (!search_exception_tables(ip)) + return false; + } + + mmap_read_lock(mm); + return true; +} + +static inline bool mmap_upgrade_trylock(struct mm_struct *mm) +{ + /* + * We don't have this operation yet. + * + * It should be easy enough to do: it's basically a + * atomic_long_try_cmpxchg_acquire() + * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but + * it also needs the proper lockdep magic etc. + */ + return false; +} + +static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs) +{ + mmap_read_unlock(mm); + if (regs && !user_mode(regs)) { + unsigned long ip = instruction_pointer(regs); + if (!search_exception_tables(ip)) + return false; + } + mmap_write_lock(mm); + return true; +} + +/* + * Helper for page fault handling. + * + * This is kind of equivalend to "mmap_read_lock()" followed + * by "find_extend_vma()", except it's a lot more careful about + * the locking (and will drop the lock on failure). + * + * For example, if we have a kernel bug that causes a page + * fault, we don't want to just use mmap_read_lock() to get + * the mm lock, because that would deadlock if the bug were + * to happen while we're holding the mm lock for writing. + * + * So this checks the exception tables on kernel faults in + * order to only do this all for instructions that are actually + * expected to fault. + * + * We can also actually take the mm lock for writing if we + * need to extend the vma, which helps the VM layer a lot. + */ +struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, + unsigned long addr, struct pt_regs *regs) +{ + struct vm_area_struct *vma; + + if (!get_mmap_lock_carefully(mm, regs)) + return NULL; + + vma = find_vma(mm, addr); + if (likely(vma && (vma->vm_start <= addr))) + return vma; + + /* + * Well, dang. We might still be successful, but only + * if we can extend a vma to do so. + */ + if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { + mmap_read_unlock(mm); + return NULL; + } + + /* + * We can try to upgrade the mmap lock atomically, + * in which case we can continue to use the vma + * we already looked up. + * + * Otherwise we'll have to drop the mmap lock and + * re-take it, and also look up the vma again, + * re-checking it. + */ + if (!mmap_upgrade_trylock(mm)) { + if (!upgrade_mmap_lock_carefully(mm, regs)) + return NULL; + + vma = find_vma(mm, addr); + if (!vma) + goto fail; + if (vma->vm_start <= addr) + goto success; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto fail; + } + + if (expand_stack(vma, addr)) + goto fail; + +success: + mmap_write_downgrade(mm); + return vma; + +fail: + mmap_write_unlock(mm); + return NULL; +} +#endif + #ifdef CONFIG_PER_VMA_LOCK /* * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be |