diff options
author | Suren Baghdasaryan <surenb@google.com> | 2023-02-27 20:36:31 +0300 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2023-04-06 06:03:02 +0300 |
commit | 0d2ebf9c3f7822e7ba3e4792ea3b6b19aa2da34a (patch) | |
tree | 238c51ee2bbbe47ded831d364d90b56ad8b9b7a3 | |
parent | 70d4cbc80c88251de0a5b3e8df3275901f1fa99a (diff) | |
download | linux-0d2ebf9c3f7822e7ba3e4792ea3b6b19aa2da34a.tar.xz |
mm/mmap: free vm_area_struct without call_rcu in exit_mmap
call_rcu() can take a long time when callback offloading is enabled. Its
use in the vm_area_free can cause regressions in the exit path when
multiple VMAs are being freed.
Because exit_mmap() is called only after the last mm user drops its
refcount, the page fault handlers can't be racing with it. Any other
possible user like oom-reaper or process_mrelease are already synchronized
using mmap_lock. Therefore exit_mmap() can free VMAs directly, without
the use of call_rcu().
Expose __vm_area_free() and use it from exit_mmap() to avoid possible
call_rcu() floods and performance regressions caused by it.
Link: https://lkml.kernel.org/r/20230227173632.3292573-33-surenb@google.com
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 11 |
3 files changed, 10 insertions, 5 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 1876825d6700..f31c75dc190e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -257,6 +257,8 @@ void setup_initial_init_mm(void *start_code, void *end_code, struct vm_area_struct *vm_area_alloc(struct mm_struct *); struct vm_area_struct *vm_area_dup(struct vm_area_struct *); void vm_area_free(struct vm_area_struct *); +/* Use only if VMA has no other users */ +void __vm_area_free(struct vm_area_struct *vma); #ifndef CONFIG_MMU extern struct rb_root nommu_region_tree; diff --git a/kernel/fork.c b/kernel/fork.c index 76fcc08984d4..f86f7e917954 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -480,7 +480,7 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) return new; } -static void __vm_area_free(struct vm_area_struct *vma) +void __vm_area_free(struct vm_area_struct *vma) { free_anon_vma_name(vma); kmem_cache_free(vm_area_cachep, vma); diff --git a/mm/mmap.c b/mm/mmap.c index b42f58591b9a..511f656eb423 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -133,7 +133,7 @@ void unlink_file_vma(struct vm_area_struct *vma) /* * Close a vm structure and free it. */ -static void remove_vma(struct vm_area_struct *vma) +static void remove_vma(struct vm_area_struct *vma, bool unreachable) { might_sleep(); if (vma->vm_ops && vma->vm_ops->close) @@ -141,7 +141,10 @@ static void remove_vma(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); - vm_area_free(vma); + if (unreachable) + __vm_area_free(vma); + else + vm_area_free(vma); } static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *vmi, @@ -2145,7 +2148,7 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) if (vma->vm_flags & VM_ACCOUNT) nr_accounted += nrpages; vm_stat_account(mm, vma->vm_flags, -nrpages); - remove_vma(vma); + remove_vma(vma, false); } vm_unacct_memory(nr_accounted); validate_mm(mm); @@ -3078,7 +3081,7 @@ void exit_mmap(struct mm_struct *mm) do { if (vma->vm_flags & VM_ACCOUNT) nr_accounted += vma_pages(vma); - remove_vma(vma); + remove_vma(vma, true); count++; cond_resched(); } while ((vma = mas_find(&mas, ULONG_MAX)) != NULL); |