From e78c3496790ee8a36522a838b59b388e8a709e65 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 16 Aug 2014 13:40:10 -0400 Subject: time, signal: Protect resource use statistics with seqlock Both times() and clock_gettime(CLOCK_PROCESS_CPUTIME_ID) have scalability issues on large systems, due to both functions being serialized with a lock. The lock protects against reporting a wrong value, due to a thread in the task group exiting, its statistics reporting up to the signal struct, and that exited task's statistics being counted twice (or not at all). Protecting that with a lock results in times() and clock_gettime() being completely serialized on large systems. This can be fixed by using a seqlock around the events that gather and propagate statistics. As an additional benefit, the protection code can be moved into thread_group_cputime(), slightly simplifying the calling functions. In the case of posix_cpu_clock_get_task() things can be simplified a lot, because the calling function already ensures that the task sticks around, and the rest is now taken care of in thread_group_cputime(). This way the statistics reporting code can run lockless. Signed-off-by: Rik van Riel Signed-off-by: Peter Zijlstra (Intel) Cc: Alex Thorlton Cc: Andrew Morton Cc: Daeseok Youn Cc: David Rientjes Cc: Dongsheng Yang Cc: Geert Uytterhoeven Cc: Guillaume Morin Cc: Ionut Alexa Cc: Kees Cook Cc: Linus Torvalds Cc: Li Zefan Cc: Michal Hocko Cc: Michal Schmidt Cc: Oleg Nesterov Cc: Vladimir Davydov Cc: umgwanakikbuti@gmail.com Cc: fweisbec@gmail.com Cc: srao@redhat.com Cc: lwoodman@redhat.com Cc: atheurer@redhat.com Link: http://lkml.kernel.org/r/20140816134010.26a9b572@annuminas.surriel.com Signed-off-by: Ingo Molnar --- kernel/fork.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 0cf9cdb6e491..9387ae8ab048 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1068,6 +1068,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->curr_target = tsk; init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); + seqlock_init(&sig->stats_lock); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; -- cgit v1.2.3 From d4311ff1a8da48d609db9500f121c15580dfeeb7 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Fri, 12 Sep 2014 14:16:17 +0100 Subject: init/main.c: Give init_task a canary Tasks get their end of stack set to STACK_END_MAGIC with the aim to catch stack overruns. Currently this feature does not apply to init_task. This patch removes this restriction. Note that a similar patch was posted by Prarit Bhargava some time ago but was never merged: http://marc.info/?l=linux-kernel&m=127144305403241&w=2 Signed-off-by: Aaron Tomlin Signed-off-by: Peter Zijlstra (Intel) Acked-by: Oleg Nesterov Acked-by: Michael Ellerman Cc: aneesh.kumar@linux.vnet.ibm.com Cc: dzickus@redhat.com Cc: bmr@redhat.com Cc: jcastillo@redhat.com Cc: jgh@redhat.com Cc: minchan@kernel.org Cc: tglx@linutronix.de Cc: hannes@cmpxchg.org Cc: Alex Thorlton Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Daeseok Youn Cc: David Rientjes Cc: Fabian Frederick Cc: Geert Uytterhoeven Cc: Jiri Olsa Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Michael Opdenacker Cc: Paul Mackerras Cc: Prarit Bhargava Cc: Rik van Riel Cc: Rusty Russell Cc: Seiji Aguchi Cc: Steven Rostedt Cc: Vladimir Davydov Cc: Yasuaki Ishimatsu Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1410527779-8133-2-git-send-email-atomlin@redhat.com Signed-off-by: Ingo Molnar --- arch/powerpc/mm/fault.c | 3 +-- arch/x86/mm/fault.c | 3 +-- include/linux/sched.h | 2 ++ init/main.c | 1 + kernel/fork.c | 12 +++++++++--- kernel/trace/trace_stack.c | 4 +--- 6 files changed, 15 insertions(+), 10 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 51ab9e7e6c39..35d0760c3fa4 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -538,7 +537,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) regs->nip); stackend = end_of_stack(current); - if (current != &init_task && *stackend != STACK_END_MAGIC) + if (*stackend != STACK_END_MAGIC) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); die("Kernel access of bad area", regs, sig); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a24194681513..bc23a7043c65 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -3,7 +3,6 @@ * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ -#include /* STACK_END_MAGIC */ #include /* test_thread_flag(), ... */ #include /* oops_begin/end, ... */ #include /* search_exception_table */ @@ -710,7 +709,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, show_fault_oops(regs, error_code, address); stackend = end_of_stack(tsk); - if (tsk != &init_task && *stackend != STACK_END_MAGIC) + if (*stackend != STACK_END_MAGIC) printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); tsk->thread.cr2 = address; diff --git a/include/linux/sched.h b/include/linux/sched.h index 82ff3d6efb19..118dca7d5a28 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -57,6 +57,7 @@ struct sched_param { #include #include #include +#include #include @@ -2638,6 +2639,7 @@ static inline unsigned long stack_not_used(struct task_struct *p) return (unsigned long)n - (unsigned long)end_of_stack(p); } #endif +extern void set_task_stack_end_magic(struct task_struct *tsk); /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available diff --git a/init/main.c b/init/main.c index bb1aed928f21..5fc3fc7bd475 100644 --- a/init/main.c +++ b/init/main.c @@ -508,6 +508,7 @@ asmlinkage __visible void __init start_kernel(void) * lockdep hash: */ lockdep_init(); + set_task_stack_end_magic(&init_task); smp_setup_processor_id(); debug_objects_early_init(); diff --git a/kernel/fork.c b/kernel/fork.c index 9387ae8ab048..ad64248c4b18 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -294,11 +294,18 @@ int __weak arch_dup_task_struct(struct task_struct *dst, return 0; } +void set_task_stack_end_magic(struct task_struct *tsk) +{ + unsigned long *stackend; + + stackend = end_of_stack(tsk); + *stackend = STACK_END_MAGIC; /* for overflow detection */ +} + static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; struct thread_info *ti; - unsigned long *stackend; int node = tsk_fork_get_node(orig); int err; @@ -328,8 +335,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); - stackend = end_of_stack(tsk); - *stackend = STACK_END_MAGIC; /* for overflow detection */ + set_task_stack_end_magic(tsk); #ifdef CONFIG_CC_STACKPROTECTOR tsk->stack_canary = get_random_int(); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 8a4e5cb66a4c..1636e41828c2 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -13,7 +13,6 @@ #include #include #include -#include #include @@ -171,8 +170,7 @@ check_stack(unsigned long ip, unsigned long *stack) i++; } - if ((current != &init_task && - *(end_of_stack(current)) != STACK_END_MAGIC)) { + if (*end_of_stack(current) != STACK_END_MAGIC) { print_max_stack(); BUG(); } -- cgit v1.2.3 From 96dad67ff244e797c4bc3e4f7f0fdaa0cfdf0a7d Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 9 Oct 2014 15:28:39 -0700 Subject: mm: use VM_BUG_ON_MM where possible Dump the contents of the relevant struct_mm when we hit the bug condition. Signed-off-by: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 3 +-- kernel/sys.c | 2 +- mm/huge_memory.c | 2 +- mm/mlock.c | 2 +- mm/mmap.c | 7 ++++--- mm/pagewalk.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index a91e47d86de2..8c162d102740 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -601,9 +601,8 @@ static void check_mm(struct mm_struct *mm) printk(KERN_ALERT "BUG: Bad rss-counter state " "mm:%p idx:%d val:%ld\n", mm, i, x); } - #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS - VM_BUG_ON(mm->pmd_huge_pte); + VM_BUG_ON_MM(mm->pmd_huge_pte, mm); #endif } diff --git a/kernel/sys.c b/kernel/sys.c index f7030b060018..df692fbf1e79 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1634,7 +1634,7 @@ static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) struct inode *inode; int err; - VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); + VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); exe = fdget(fd); if (!exe.file) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c13148cc745f..74c78aa8bc2f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2048,7 +2048,7 @@ int __khugepaged_enter(struct mm_struct *mm) return -ENOMEM; /* __khugepaged_exit() must not run from under us */ - VM_BUG_ON(khugepaged_test_exit(mm)); + VM_BUG_ON_MM(khugepaged_test_exit(mm), mm); if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) { free_mm_slot(mm_slot); return 0; diff --git a/mm/mlock.c b/mm/mlock.c index d5d09d0786ec..03aa8512723b 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -235,7 +235,7 @@ long __mlock_vma_pages_range(struct vm_area_struct *vma, VM_BUG_ON(end & ~PAGE_MASK); VM_BUG_ON_VMA(start < vma->vm_start, vma); VM_BUG_ON_VMA(end > vma->vm_end, vma); - VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); + VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); gup_flags = FOLL_TOUCH | FOLL_MLOCK; /* diff --git a/mm/mmap.c b/mm/mmap.c index c9bc285df255..16d19b48e2ad 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -410,8 +410,9 @@ static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore) for (nd = rb_first(root); nd; nd = rb_next(nd)) { struct vm_area_struct *vma; vma = rb_entry(nd, struct vm_area_struct, vm_rb); - BUG_ON(vma != ignore && - vma->rb_subtree_gap != vma_compute_subtree_gap(vma)); + VM_BUG_ON_VMA(vma != ignore && + vma->rb_subtree_gap != vma_compute_subtree_gap(vma), + vma); } } @@ -448,7 +449,7 @@ static void validate_mm(struct mm_struct *mm) pr_emerg("map_count %d rb %d\n", mm->map_count, i); bug = 1; } - BUG_ON(bug); + VM_BUG_ON_MM(bug, mm); } #else #define validate_mm_rb(root, ignore) do { } while (0) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 2beeabf502c5..ad83195521f2 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -177,7 +177,7 @@ int walk_page_range(unsigned long addr, unsigned long end, if (!walk->mm) return -EINVAL; - VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); + VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm); pgd = pgd_offset(walk->mm, addr); do { -- cgit v1.2.3 From 392809b25833548ccfc55e61b76c8451a5073216 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 28 Sep 2014 23:44:18 +0200 Subject: signal: Document the RCU protection of ->sighand __cleanup_sighand() frees sighand without RCU grace period. This is correct but this looks "obviously buggy" and constantly confuses the readers, add the comments to explain how this works. Signed-off-by: Oleg Nesterov Reviewed-by: Steven Rostedt Reviewed-by: Rik van Riel Signed-off-by: Paul E. McKenney Reviewed-by: Pranith Kumar --- kernel/fork.c | 5 ++++- kernel/signal.c | 12 +++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 9b7d746d6d62..9ca84189cfc2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1022,11 +1022,14 @@ void __cleanup_sighand(struct sighand_struct *sighand) { if (atomic_dec_and_test(&sighand->count)) { signalfd_cleanup(sighand); + /* + * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it + * without an RCU grace period, see __lock_task_sighand(). + */ kmem_cache_free(sighand_cachep, sighand); } } - /* * Initialize POSIX timer handling for a thread group. */ diff --git a/kernel/signal.c b/kernel/signal.c index 54820984a872..19e35135fc60 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1275,7 +1275,17 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, local_irq_restore(*flags); break; } - + /* + * This sighand can be already freed and even reused, but + * we rely on SLAB_DESTROY_BY_RCU and sighand_ctor() which + * initializes ->siglock: this slab can't go away, it has + * the same object type, ->siglock can't be reinitialized. + * + * We need to ensure that tsk->sighand is still the same + * after we take the lock, we can race with de_thread() or + * __exit_signal(). In the latter case the next iteration + * must see ->sighand == NULL. + */ spin_lock(&sighand->siglock); if (likely(sighand == tsk->sighand)) { rcu_read_unlock(); -- cgit v1.2.3 From 83cde9e8ba95d180eaefefe834958fbf7008cf39 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 12 Dec 2014 16:54:21 -0800 Subject: mm: use new helper functions around the i_mmap_mutex Convert all open coded mutex_lock/unlock calls to the i_mmap_[lock/unlock]_write() helpers. Signed-off-by: Davidlohr Bueso Acked-by: Rik van Riel Acked-by: "Kirill A. Shutemov" Acked-by: Hugh Dickins Cc: Oleg Nesterov Acked-by: Peter Zijlstra (Intel) Cc: Srikar Dronamraju Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 4 ++-- kernel/events/uprobes.c | 4 ++-- kernel/fork.c | 4 ++-- mm/filemap_xip.c | 4 ++-- mm/fremap.c | 4 ++-- mm/hugetlb.c | 12 ++++++------ mm/memory-failure.c | 4 ++-- mm/memory.c | 8 ++++---- mm/mmap.c | 14 +++++++------- mm/mremap.c | 4 ++-- mm/nommu.c | 14 +++++++------- mm/rmap.c | 4 ++-- 12 files changed, 40 insertions(+), 40 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 1e2872b25343..a082709aa427 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) pgoff = offset >> PAGE_SHIFT; i_size_write(inode, offset); - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); if (!RB_EMPTY_ROOT(&mapping->i_mmap)) hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); truncate_hugepages(inode, offset); return 0; } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ed8f2cde34c5..aac81bf9df09 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -724,7 +724,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) int more = 0; again: - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { if (!valid_vma(vma, is_register)) continue; @@ -755,7 +755,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) info->mm = vma->vm_mm; info->vaddr = offset_to_vaddr(vma, offset); } - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); if (!more) goto out; diff --git a/kernel/fork.c b/kernel/fork.c index 9ca84189cfc2..4dc2ddade9f1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -433,7 +433,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); if (tmp->vm_flags & VM_SHARED) atomic_inc(&mapping->i_mmap_writable); flush_dcache_mmap_lock(mapping); @@ -445,7 +445,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) vma_interval_tree_insert_after(tmp, mpnt, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } /* diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index d8d9fe3f685c..bad746bde4a2 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -182,7 +182,7 @@ __xip_unmap (struct address_space * mapping, return; retry: - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { mm = vma->vm_mm; address = vma->vm_start + @@ -202,7 +202,7 @@ retry: page_cache_release(page); } } - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); if (locked) { mutex_unlock(&xip_sparse_mutex); diff --git a/mm/fremap.c b/mm/fremap.c index 72b8fa361433..11ef7ec40d13 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -238,13 +238,13 @@ get_write_lock: } goto out_freed; } - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); flush_dcache_mmap_lock(mapping); vma->vm_flags |= VM_NONLINEAR; vma_interval_tree_remove(vma, &mapping->i_mmap); vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); flush_dcache_mmap_unlock(mapping); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } if (vma->vm_flags & VM_LOCKED) { diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 919b86a2164d..ffe19304cc09 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2774,7 +2774,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * this mapping should be shared between all the VMAs, * __unmap_hugepage_range() is called as the lock is already held */ - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) { /* Do not unmap the current VMA */ if (iter_vma == vma) @@ -2791,7 +2791,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, unmap_hugepage_range(iter_vma, address, address + huge_page_size(h), page); } - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } /* @@ -3348,7 +3348,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, flush_cache_range(vma, address, end); mmu_notifier_invalidate_range_start(mm, start, end); - mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); + i_mmap_lock_write(vma->vm_file->f_mapping); for (; address < end; address += huge_page_size(h)) { spinlock_t *ptl; ptep = huge_pte_offset(mm, address); @@ -3376,7 +3376,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, * and that page table be reused and filled with junk. */ flush_tlb_range(vma, start, end); - mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); + i_mmap_unlock_write(vma->vm_file->f_mapping); mmu_notifier_invalidate_range_end(mm, start, end); return pages << h->order; @@ -3544,7 +3544,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (!vma_shareable(vma, addr)) return (pte_t *)pmd_alloc(mm, pud, addr); - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { if (svma == vma) continue; @@ -3572,7 +3572,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) spin_unlock(ptl); out: pte = (pte_t *)pmd_alloc(mm, pud, addr); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); return pte; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index e5ee0ca7ae85..5e2b26dab8dc 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -466,7 +466,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, struct task_struct *tsk; struct address_space *mapping = page->mapping; - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); read_lock(&tasklist_lock); for_each_process(tsk) { pgoff_t pgoff = page_to_pgoff(page); @@ -488,7 +488,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, } } read_unlock(&tasklist_lock); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } /* diff --git a/mm/memory.c b/mm/memory.c index 4b5a282e1107..039fab699a1a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1326,9 +1326,9 @@ static void unmap_single_vma(struct mmu_gather *tlb, * safe to do nothing in this case. */ if (vma->vm_file) { - mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); + i_mmap_lock_write(vma->vm_file->f_mapping); __unmap_hugepage_range_final(tlb, vma, start, end, NULL); - mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex); + i_mmap_unlock_write(vma->vm_file->f_mapping); } } else unmap_page_range(tlb, vma, start, end, details); @@ -2377,12 +2377,12 @@ void unmap_mapping_range(struct address_space *mapping, details.last_index = ULONG_MAX; - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) unmap_mapping_range_tree(&mapping->i_mmap, &details); if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } EXPORT_SYMBOL(unmap_mapping_range); diff --git a/mm/mmap.c b/mm/mmap.c index b6c0a77fc1c8..ecd6ecf48778 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -260,9 +260,9 @@ void unlink_file_vma(struct vm_area_struct *vma) if (file) { struct address_space *mapping = file->f_mapping; - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); __remove_shared_vm_struct(vma, file, mapping); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } } @@ -674,14 +674,14 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_file) { mapping = vma->vm_file->f_mapping; - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); } __vma_link(mm, vma, prev, rb_link, rb_parent); __vma_link_file(vma); if (mapping) - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); mm->map_count++; validate_mm(mm); @@ -796,7 +796,7 @@ again: remove_next = 1 + (end > next->vm_end); next->vm_end); } - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); if (insert) { /* * Put into interval tree now, so instantiated pages @@ -883,7 +883,7 @@ again: remove_next = 1 + (end > next->vm_end); anon_vma_unlock_write(anon_vma); } if (mapping) - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); if (root) { uprobe_mmap(vma); @@ -3182,7 +3182,7 @@ static void vm_unlock_mapping(struct address_space *mapping) * AS_MM_ALL_LOCKS can't change to 0 from under us * because we hold the mm_all_locks_mutex. */ - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); if (!test_and_clear_bit(AS_MM_ALL_LOCKS, &mapping->flags)) BUG(); diff --git a/mm/mremap.c b/mm/mremap.c index b147f66f4c40..426b448d6447 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -119,7 +119,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (need_rmap_locks) { if (vma->vm_file) { mapping = vma->vm_file->f_mapping; - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); } if (vma->anon_vma) { anon_vma = vma->anon_vma; @@ -156,7 +156,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (anon_vma) anon_vma_unlock_write(anon_vma); if (mapping) - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } #define LATENCY_LIMIT (64 * PAGE_SIZE) diff --git a/mm/nommu.c b/mm/nommu.c index bd1808e194a7..52a576553581 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -722,11 +722,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) if (vma->vm_file) { mapping = vma->vm_file->f_mapping; - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); flush_dcache_mmap_lock(mapping); vma_interval_tree_insert(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } /* add the VMA to the tree */ @@ -795,11 +795,11 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) if (vma->vm_file) { mapping = vma->vm_file->f_mapping; - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); flush_dcache_mmap_lock(mapping); vma_interval_tree_remove(vma, &mapping->i_mmap); flush_dcache_mmap_unlock(mapping); - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); } /* remove from the MM's tree and list */ @@ -2094,14 +2094,14 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; down_write(&nommu_region_sem); - mutex_lock(&inode->i_mapping->i_mmap_mutex); + i_mmap_lock_write(inode->i_mapping); /* search for VMAs that fall within the dead zone */ vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) { /* found one - only interested if it's shared out of the page * cache */ if (vma->vm_flags & VM_SHARED) { - mutex_unlock(&inode->i_mapping->i_mmap_mutex); + i_mmap_unlock_write(inode->i_mapping); up_write(&nommu_region_sem); return -ETXTBSY; /* not quite true, but near enough */ } @@ -2129,7 +2129,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size, } } - mutex_unlock(&inode->i_mapping->i_mmap_mutex); + i_mmap_unlock_write(inode->i_mapping); up_write(&nommu_region_sem); return 0; } diff --git a/mm/rmap.c b/mm/rmap.c index 45eba36fd673..bea03f6bec61 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1690,7 +1690,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) if (!mapping) return ret; - mutex_lock(&mapping->i_mmap_mutex); + i_mmap_lock_write(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { unsigned long address = vma_address(page, vma); @@ -1713,7 +1713,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) ret = rwc->file_nonlinear(page, mapping, rwc->arg); done: - mutex_unlock(&mapping->i_mmap_mutex); + i_mmap_unlock_write(mapping); return ret; } -- cgit v1.2.3