summaryrefslogtreecommitdiff
path: root/mm/mmap.c
diff options
context:
space:
mode:
authorLiam R. Howlett <Liam.Howlett@Oracle.com>2022-09-06 22:49:06 +0300
committerAndrew Morton <akpm@linux-foundation.org>2022-09-27 05:46:26 +0300
commit763ecb035029f500d7e6dc99acd1ad299b7726a1 (patch)
tree764e1b696d8d67f71d649282e7048187d1ef8570 /mm/mmap.c
parent78ba531ff3ec2a444001853d8636ff39ed11ca28 (diff)
downloadlinux-763ecb035029f500d7e6dc99acd1ad299b7726a1.tar.xz
mm: remove the vma linked list
Replace any vm_next use with vma_find(). Update free_pgtables(), unmap_vmas(), and zap_page_range() to use the maple tree. Use the new free_pgtables() and unmap_vmas() in do_mas_align_munmap(). At the same time, alter the loop to be more compact. Now that free_pgtables() and unmap_vmas() take a maple tree as an argument, rearrange do_mas_align_munmap() to use the new tree to hold the vmas to remove. Remove __vma_link_list() and __vma_unlink_list() as they are exclusively used to update the linked list. Drop linked list update from __insert_vm_struct(). Rework validation of tree as it was depending on the linked list. [yang.lee@linux.alibaba.com: fix one kernel-doc comment] Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=1949 Link: https://lkml.kernel.org/r/20220824021918.94116-1-yang.lee@linux.alibaba.comLink: https://lkml.kernel.org/r/20220906194824.2110408-69-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com> Signed-off-by: Yang Li <yang.lee@linux.alibaba.com> Tested-by: Yu Zhao <yuzhao@google.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> Cc: SeongJae Park <sj@kernel.org> Cc: Sven Schnelle <svens@linux.ibm.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c469
1 files changed, 187 insertions, 282 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index aabd4f986ccf..4441f7ed197a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -75,9 +75,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
static bool ignore_rlimit_data;
core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
-static void unmap_region(struct mm_struct *mm,
+static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
struct vm_area_struct *vma, struct vm_area_struct *prev,
- unsigned long start, unsigned long end);
+ struct vm_area_struct *next, unsigned long start,
+ unsigned long end);
static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
{
@@ -130,12 +131,10 @@ void unlink_file_vma(struct vm_area_struct *vma)
}
/*
- * Close a vm structure and free it, returning the next.
+ * Close a vm structure and free it.
*/
-static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+static void remove_vma(struct vm_area_struct *vma)
{
- struct vm_area_struct *next = vma->vm_next;
-
might_sleep();
if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
@@ -143,7 +142,6 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
fput(vma->vm_file);
mpol_put(vma_policy(vma));
vm_area_free(vma);
- return next;
}
/*
@@ -168,8 +166,7 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
unsigned long newbrk, unsigned long oldbrk,
struct list_head *uf);
static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *brkvma,
- unsigned long addr, unsigned long request,
- unsigned long flags);
+ unsigned long addr, unsigned long request, unsigned long flags);
SYSCALL_DEFINE1(brk, unsigned long, brk)
{
unsigned long newbrk, oldbrk, origbrk;
@@ -238,7 +235,6 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
* before calling do_brk_munmap().
*/
mm->brk = brk;
- mas.last = oldbrk - 1;
ret = do_brk_munmap(&mas, brkvma, newbrk, oldbrk, &uf);
if (ret == 1) {
downgraded = true;
@@ -293,44 +289,21 @@ extern void mt_dump(const struct maple_tree *mt);
static void validate_mm_mt(struct mm_struct *mm)
{
struct maple_tree *mt = &mm->mm_mt;
- struct vm_area_struct *vma_mt, *vma = mm->mmap;
+ struct vm_area_struct *vma_mt;
MA_STATE(mas, mt, 0, 0);
mt_validate(&mm->mm_mt);
mas_for_each(&mas, vma_mt, ULONG_MAX) {
- if (xa_is_zero(vma_mt))
- continue;
-
- if (!vma)
- break;
-
- if ((vma != vma_mt) ||
- (vma->vm_start != vma_mt->vm_start) ||
- (vma->vm_end != vma_mt->vm_end) ||
- (vma->vm_start != mas.index) ||
- (vma->vm_end - 1 != mas.last)) {
+ if ((vma_mt->vm_start != mas.index) ||
+ (vma_mt->vm_end - 1 != mas.last)) {
pr_emerg("issue in %s\n", current->comm);
dump_stack();
dump_vma(vma_mt);
- pr_emerg("and vm_next\n");
- dump_vma(vma->vm_next);
pr_emerg("mt piv: %p %lu - %lu\n", vma_mt,
mas.index, mas.last);
pr_emerg("mt vma: %p %lu - %lu\n", vma_mt,
vma_mt->vm_start, vma_mt->vm_end);
- if (vma->vm_prev) {
- pr_emerg("ll prev: %p %lu - %lu\n",
- vma->vm_prev, vma->vm_prev->vm_start,
- vma->vm_prev->vm_end);
- }
- pr_emerg("ll vma: %p %lu - %lu\n", vma,
- vma->vm_start, vma->vm_end);
- if (vma->vm_next) {
- pr_emerg("ll next: %p %lu - %lu\n",
- vma->vm_next, vma->vm_next->vm_start,
- vma->vm_next->vm_end);
- }
mt_dump(mas.tree);
if (vma_mt->vm_end != mas.last + 1) {
@@ -347,23 +320,19 @@ static void validate_mm_mt(struct mm_struct *mm)
}
VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm);
}
- VM_BUG_ON(vma != vma_mt);
- vma = vma->vm_next;
-
}
- VM_BUG_ON(vma);
}
static void validate_mm(struct mm_struct *mm)
{
int bug = 0;
int i = 0;
- unsigned long highest_address = 0;
- struct vm_area_struct *vma = mm->mmap;
+ struct vm_area_struct *vma;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
validate_mm_mt(mm);
- while (vma) {
+ mas_for_each(&mas, vma, ULONG_MAX) {
#ifdef CONFIG_DEBUG_VM_RB
struct anon_vma *anon_vma = vma->anon_vma;
struct anon_vma_chain *avc;
@@ -375,18 +344,10 @@ static void validate_mm(struct mm_struct *mm)
anon_vma_unlock_read(anon_vma);
}
#endif
-
- highest_address = vm_end_gap(vma);
- vma = vma->vm_next;
i++;
}
if (i != mm->map_count) {
- pr_emerg("map_count %d vm_next %d\n", mm->map_count, i);
- bug = 1;
- }
- if (highest_address != mm->highest_vm_end) {
- pr_emerg("mm->highest_vm_end %lx, found %lx\n",
- mm->highest_vm_end, highest_address);
+ pr_emerg("map_count %d mas_for_each %d\n", mm->map_count, i);
bug = 1;
}
VM_BUG_ON_MM(bug, mm);
@@ -446,29 +407,13 @@ bool range_has_overlap(struct mm_struct *mm, unsigned long start,
struct vm_area_struct *existing;
MA_STATE(mas, &mm->mm_mt, start, start);
+ rcu_read_lock();
existing = mas_find(&mas, end - 1);
*pprev = mas_prev(&mas, 0);
+ rcu_read_unlock();
return existing ? true : false;
}
-/*
- * __vma_next() - Get the next VMA.
- * @mm: The mm_struct.
- * @vma: The current vma.
- *
- * If @vma is NULL, return the first vma in the mm.
- *
- * Returns: The next VMA after @vma.
- */
-static inline struct vm_area_struct *__vma_next(struct mm_struct *mm,
- struct vm_area_struct *vma)
-{
- if (!vma)
- return mm->mmap;
-
- return vma->vm_next;
-}
-
static unsigned long count_vma_pages_range(struct mm_struct *mm,
unsigned long addr, unsigned long end)
{
@@ -553,8 +498,7 @@ static inline void vma_mas_szero(struct ma_state *mas, unsigned long start,
mas_store_prealloc(mas, NULL);
}
-static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
- struct vm_area_struct *prev)
+static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
{
MA_STATE(mas, &mm->mm_mt, 0, 0);
struct address_space *mapping = NULL;
@@ -568,7 +512,6 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
}
vma_mas_store(vma, &mas);
- __vma_link_list(mm, vma, prev);
__vma_link_file(vma);
if (mapping)
@@ -580,22 +523,6 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
}
/*
- * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
- * mm's list and the mm tree. It has already been inserted into the interval tree.
- */
-static void __insert_vm_struct(struct mm_struct *mm, struct ma_state *mas,
- struct vm_area_struct *vma, unsigned long location)
-{
- struct vm_area_struct *prev;
-
- mas_set(mas, location);
- prev = mas_prev(mas, 0);
- vma_mas_store(vma, mas);
- __vma_link_list(mm, vma, prev);
- mm->map_count++;
-}
-
-/*
* vma_expand - Expand an existing VMA
*
* @mas: The maple state
@@ -675,15 +602,8 @@ inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma,
}
/* Expanding over the next vma */
- if (remove_next) {
- /* Remove from mm linked list - also updates highest_vm_end */
- __vma_unlink_list(mm, next);
-
- if (file)
- __remove_shared_vm_struct(next, file, mapping);
-
- } else if (!next) {
- mm->highest_vm_end = vm_end_gap(vma);
+ if (remove_next && file) {
+ __remove_shared_vm_struct(next, file, mapping);
}
if (anon_vma) {
@@ -738,7 +658,6 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
int remove_next = 0;
MA_STATE(mas, &mm->mm_mt, 0, 0);
struct vm_area_struct *exporter = NULL, *importer = NULL;
- unsigned long ll_prev = vma->vm_start; /* linked list prev. */
if (next && !insert) {
if (end >= next->vm_end) {
@@ -773,7 +692,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
next_next = find_vma(mm, next->vm_end);
VM_WARN_ON(remove_next == 2 &&
- end != next->vm_next->vm_end);
+ end != next_next->vm_end);
}
exporter = next;
@@ -784,7 +703,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
* next, if the vma overlaps with it.
*/
if (remove_next == 2 && !next->anon_vma)
- exporter = next->vm_next;
+ exporter = next_next;
} else if (end > next->vm_start) {
/*
@@ -879,17 +798,14 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
if (vma->vm_end > end) {
if (!insert || (insert->vm_start != end)) {
vma_mas_szero(&mas, end, vma->vm_end);
+ mas_reset(&mas);
VM_WARN_ON(insert &&
insert->vm_end < vma->vm_end);
- } else if (insert->vm_start == end) {
- ll_prev = vma->vm_end;
}
} else {
vma_changed = true;
}
vma->vm_end = end;
- if (!next)
- mm->highest_vm_end = vm_end_gap(vma);
}
if (vma_changed)
@@ -909,29 +825,19 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
flush_dcache_mmap_unlock(mapping);
}
- if (remove_next) {
- /*
- * vma_merge has merged next into vma, and needs
- * us to remove next before dropping the locks.
- * Since we have expanded over this vma, the maple tree will
- * have overwritten by storing the value
- */
- __vma_unlink_list(mm, next);
+ if (remove_next && file) {
+ __remove_shared_vm_struct(next, file, mapping);
if (remove_next == 2)
- __vma_unlink_list(mm, next_next);
-
- if (file) {
- __remove_shared_vm_struct(next, file, mapping);
- if (remove_next == 2)
- __remove_shared_vm_struct(next_next, file, mapping);
- }
+ __remove_shared_vm_struct(next_next, file, mapping);
} else if (insert) {
/*
* split_vma has split insert from vma, and needs
* us to insert it before dropping the locks
* (it may either follow vma or precede it).
*/
- __insert_vm_struct(mm, &mas, insert, ll_prev);
+ mas_reset(&mas);
+ vma_mas_store(insert, &mas);
+ mm->map_count++;
}
if (anon_vma) {
@@ -965,54 +871,12 @@ again:
/*
* In mprotect's case 6 (see comments on vma_merge),
- * we must remove another next too. It would clutter
- * up the code too much to do both in one go.
+ * we must remove next_next too.
*/
- if (remove_next != 3) {
- /*
- * If "next" was removed and vma->vm_end was
- * expanded (up) over it, in turn
- * "next->vm_prev->vm_end" changed and the
- * "vma->vm_next" gap must be updated.
- */
- next = next_next;
- } else {
- /*
- * For the scope of the comment "next" and
- * "vma" considered pre-swap(): if "vma" was
- * removed, next->vm_start was expanded (down)
- * over it and the "next" gap must be updated.
- * Because of the swap() the post-swap() "vma"
- * actually points to pre-swap() "next"
- * (post-swap() "next" as opposed is now a
- * dangling pointer).
- */
- next = vma;
- }
if (remove_next == 2) {
remove_next = 1;
+ next = next_next;
goto again;
- } else if (!next) {
- /*
- * If remove_next == 2 we obviously can't
- * reach this path.
- *
- * If remove_next == 3 we can't reach this
- * path because pre-swap() next is always not
- * NULL. pre-swap() "next" is not being
- * removed and its next->vm_end is not altered
- * (and furthermore "end" already matches
- * next->vm_end in remove_next == 3).
- *
- * We reach this only in the remove_next == 1
- * case if the "next" vma that was removed was
- * the highest vma of the mm. However in such
- * case next->vm_end == "end" and the extended
- * "vma" has vma->vm_end == next->vm_end so
- * mm->highest_vm_end doesn't need any update
- * in remove_next == 1 case.
- */
- VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
}
}
if (insert && file)
@@ -1020,6 +884,7 @@ again:
mas_destroy(&mas);
validate_mm(mm);
+
return 0;
}
@@ -1179,10 +1044,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
if (vm_flags & VM_SPECIAL)
return NULL;
- next = __vma_next(mm, prev);
+ next = find_vma(mm, prev ? prev->vm_end : 0);
area = next;
if (area && area->vm_end == end) /* cases 6, 7, 8 */
- next = next->vm_next;
+ next = find_vma(mm, next->vm_end);
/* verify some invariant that must be enforced by the caller */
VM_WARN_ON(prev && addr <= prev->vm_start);
@@ -1316,18 +1181,24 @@ static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_
*/
struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
{
+ MA_STATE(mas, &vma->vm_mm->mm_mt, vma->vm_end, vma->vm_end);
struct anon_vma *anon_vma = NULL;
+ struct vm_area_struct *prev, *next;
/* Try next first. */
- if (vma->vm_next) {
- anon_vma = reusable_anon_vma(vma->vm_next, vma, vma->vm_next);
+ next = mas_walk(&mas);
+ if (next) {
+ anon_vma = reusable_anon_vma(next, vma, next);
if (anon_vma)
return anon_vma;
}
+ prev = mas_prev(&mas, 0);
+ VM_BUG_ON_VMA(prev != vma, vma);
+ prev = mas_prev(&mas, 0);
/* Try prev next. */
- if (vma->vm_prev)
- anon_vma = reusable_anon_vma(vma->vm_prev, vma->vm_prev, vma);
+ if (prev)
+ anon_vma = reusable_anon_vma(prev, prev, vma);
/*
* We might reach here with anon_vma == NULL if we can't find
@@ -2101,8 +1972,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
if (gap_addr < address || gap_addr > TASK_SIZE)
gap_addr = TASK_SIZE;
- next = vma->vm_next;
- if (next && next->vm_start < gap_addr && vma_is_accessible(next)) {
+ next = find_vma_intersection(mm, vma->vm_end, gap_addr);
+ if (next && vma_is_accessible(next)) {
if (!(next->vm_flags & VM_GROWSUP))
return -ENOMEM;
/* Check that both stack segments have the same anon_vma? */
@@ -2153,8 +2024,6 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
/* Overwrite old entry in mtree. */
vma_mas_store(vma, &mas);
anon_vma_interval_tree_post_update_vma(vma);
- if (!vma->vm_next)
- mm->highest_vm_end = vm_end_gap(vma);
spin_unlock(&mm->page_table_lock);
perf_event_mmap(vma);
@@ -2174,16 +2043,16 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
int expand_downwards(struct vm_area_struct *vma, unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
+ MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_start);
struct vm_area_struct *prev;
int error = 0;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
address &= PAGE_MASK;
if (address < mmap_min_addr)
return -EPERM;
/* Enforce stack_guard_gap */
- prev = vma->vm_prev;
+ prev = mas_prev(&mas, 0);
/* Check that both stack segments have the same anon_vma? */
if (prev && !(prev->vm_flags & VM_GROWSDOWN) &&
vma_is_accessible(prev)) {
@@ -2318,25 +2187,26 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
EXPORT_SYMBOL_GPL(find_extend_vma);
/*
- * Ok - we have the memory areas we should free on the vma list,
- * so release them, and do the vma updates.
+ * Ok - we have the memory areas we should free on a maple tree so release them,
+ * and do the vma updates.
*
* Called with the mm semaphore held.
*/
-static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
+static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
{
unsigned long nr_accounted = 0;
+ struct vm_area_struct *vma;
/* Update high watermark before we lower total_vm */
update_hiwater_vm(mm);
- do {
+ mas_for_each(mas, vma, ULONG_MAX) {
long nrpages = vma_pages(vma);
if (vma->vm_flags & VM_ACCOUNT)
nr_accounted += nrpages;
vm_stat_account(mm, vma->vm_flags, -nrpages);
- vma = remove_vma(vma);
- } while (vma);
+ remove_vma(vma);
+ }
vm_unacct_memory(nr_accounted);
validate_mm(mm);
}
@@ -2346,18 +2216,18 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
*
* Called with the mm semaphore held.
*/
-static void unmap_region(struct mm_struct *mm,
+static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
struct vm_area_struct *vma, struct vm_area_struct *prev,
+ struct vm_area_struct *next,
unsigned long start, unsigned long end)
{
- struct vm_area_struct *next = __vma_next(mm, prev);
struct mmu_gather tlb;
lru_add_drain();
tlb_gather_mmu(&tlb, mm);
update_hiwater_rss(mm);
- unmap_vmas(&tlb, vma, start, end);
- free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+ unmap_vmas(&tlb, mt, vma, start, end);
+ free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
next ? next->vm_start : USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb);
}
@@ -2444,24 +2314,17 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
return __split_vma(mm, vma, addr, new_below);
}
-static inline int
-unlock_range(struct vm_area_struct *start, struct vm_area_struct **tail,
- unsigned long limit)
+static inline int munmap_sidetree(struct vm_area_struct *vma,
+ struct ma_state *mas_detach)
{
- struct mm_struct *mm = start->vm_mm;
- struct vm_area_struct *tmp = start;
- int count = 0;
-
- while (tmp && tmp->vm_start < limit) {
- *tail = tmp;
- count++;
- if (tmp->vm_flags & VM_LOCKED)
- mm->locked_vm -= vma_pages(tmp);
+ mas_set_range(mas_detach, vma->vm_start, vma->vm_end - 1);
+ if (mas_store_gfp(mas_detach, vma, GFP_KERNEL))
+ return -ENOMEM;
- tmp = tmp->vm_next;
- }
+ if (vma->vm_flags & VM_LOCKED)
+ vma->vm_mm->locked_vm -= vma_pages(vma);
- return count;
+ return 0;
}
/*
@@ -2481,9 +2344,13 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
struct mm_struct *mm, unsigned long start,
unsigned long end, struct list_head *uf, bool downgrade)
{
- struct vm_area_struct *prev, *last;
+ struct vm_area_struct *prev, *next = NULL;
+ struct maple_tree mt_detach;
+ int count = 0;
int error = -ENOMEM;
- /* we have start < vma->vm_end */
+ MA_STATE(mas_detach, &mt_detach, 0, 0);
+ mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN);
+ mt_set_external_lock(&mt_detach, &mm->mmap_lock);
if (mas_preallocate(mas, vma, GFP_KERNEL))
return -ENOMEM;
@@ -2496,6 +2363,8 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
* unmapped vm_area_struct will remain in use: so lower split_vma
* places tmp vma above, and higher split_vma places tmp vma below.
*/
+
+ /* Does it split the first one? */
if (start > vma->vm_start) {
/*
@@ -2506,35 +2375,60 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
goto map_count_exceeded;
+ /*
+ * mas_pause() is not needed since mas->index needs to be set
+ * differently than vma->vm_end anyways.
+ */
error = __split_vma(mm, vma, start, 0);
if (error)
- goto split_failed;
+ goto start_split_failed;
- prev = vma;
- vma = __vma_next(mm, prev);
- mas->index = start;
- mas_reset(mas);
- } else {
- prev = vma->vm_prev;
+ mas_set(mas, start);
+ vma = mas_walk(mas);
}
- if (vma->vm_end >= end)
- last = vma;
- else
- last = find_vma_intersection(mm, end - 1, end);
+ prev = mas_prev(mas, 0);
+ if (unlikely((!prev)))
+ mas_set(mas, start);
+
+ /*
+ * Detach a range of VMAs from the mm. Using next as a temp variable as
+ * it is always overwritten.
+ */
+ mas_for_each(mas, next, end - 1) {
+ /* Does it split the end? */
+ if (next->vm_end > end) {
+ struct vm_area_struct *split;
+
+ error = __split_vma(mm, next, end, 1);
+ if (error)
+ goto end_split_failed;
- /* Does it split the last one? */
- if (last && end < last->vm_end) {
- error = __split_vma(mm, last, end, 1);
+ mas_set(mas, end);
+ split = mas_prev(mas, 0);
+ error = munmap_sidetree(split, &mas_detach);
+ if (error)
+ goto munmap_sidetree_failed;
+ count++;
+ if (vma == next)
+ vma = split;
+ break;
+ }
+ error = munmap_sidetree(next, &mas_detach);
if (error)
- goto split_failed;
+ goto munmap_sidetree_failed;
- if (vma == last)
- vma = __vma_next(mm, prev);
- mas_reset(mas);
+ count++;
+#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
+ BUG_ON(next->vm_start < start);
+ BUG_ON(next->vm_start > end);
+#endif
}
+ if (!next)
+ next = mas_next(mas, ULONG_MAX);
+
if (unlikely(uf)) {
/*
* If userfaultfd_unmap_prep returns an error the vmas
@@ -2551,35 +2445,36 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
goto userfaultfd_error;
}
- /*
- * unlock any mlock()ed ranges before detaching vmas, count the number
- * of VMAs to be dropped, and return the tail entry of the affected
- * area.
- */
- mm->map_count -= unlock_range(vma, &last, end);
- /* Drop removed area from the tree */
+ /* Point of no return */
+ mas_set_range(mas, start, end - 1);
+#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
+ /* Make sure no VMAs are about to be lost. */
+ {
+ MA_STATE(test, &mt_detach, start, end - 1);
+ struct vm_area_struct *vma_mas, *vma_test;
+ int test_count = 0;
+
+ rcu_read_lock();
+ vma_test = mas_find(&test, end - 1);
+ mas_for_each(mas, vma_mas, end - 1) {
+ BUG_ON(vma_mas != vma_test);
+ test_count++;
+ vma_test = mas_next(&test, end - 1);
+ }
+ rcu_read_unlock();
+ BUG_ON(count != test_count);
+ mas_set_range(mas, start, end - 1);
+ }
+#endif
mas_store_prealloc(mas, NULL);
-
- /* Detach vmas from the MM linked list */
- vma->vm_prev = NULL;
- if (prev)
- prev->vm_next = last->vm_next;
- else
- mm->mmap = last->vm_next;
-
- if (last->vm_next) {
- last->vm_next->vm_prev = prev;
- last->vm_next = NULL;
- } else
- mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
-
+ mm->map_count -= count;
/*
* Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
* VM_GROWSUP VMA. Such VMAs can change their size under
* down_read(mmap_lock) and collide with the VMA we are about to unmap.
*/
if (downgrade) {
- if (last && (last->vm_flags & VM_GROWSDOWN))
+ if (next && (next->vm_flags & VM_GROWSDOWN))
downgrade = false;
else if (prev && (prev->vm_flags & VM_GROWSUP))
downgrade = false;
@@ -2587,18 +2482,22 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
mmap_write_downgrade(mm);
}
- unmap_region(mm, vma, prev, start, end);
-
- /* Fix up all other VM information */
- remove_vma_list(mm, vma);
+ unmap_region(mm, &mt_detach, vma, prev, next, start, end);
+ /* Statistics and freeing VMAs */
+ mas_set(&mas_detach, start);
+ remove_mt(mm, &mas_detach);
+ __mt_destroy(&mt_detach);
validate_mm(mm);
return downgrade ? 1 : 0;
-map_count_exceeded:
-split_failed:
userfaultfd_error:
+munmap_sidetree_failed:
+end_split_failed:
+ __mt_destroy(&mt_detach);
+start_split_failed:
+map_count_exceeded:
mas_destroy(mas);
return error;
}
@@ -2833,7 +2732,6 @@ cannot_expand:
i_mmap_lock_write(vma->vm_file->f_mapping);
vma_mas_store(vma, &mas);
- __vma_link_list(mm, vma, prev);
mm->map_count++;
if (vma->vm_file) {
if (vma->vm_flags & VM_SHARED)
@@ -2891,7 +2789,7 @@ unmap_and_free_vma:
vma->vm_file = NULL;
/* Undo any partial mapping done by a device driver. */
- unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
+ unmap_region(mm, mas.tree, vma, prev, next, vma->vm_start, vma->vm_end);
if (vm_flags & VM_SHARED)
mapping_unmap_writable(file->f_mapping);
free_vma:
@@ -2979,11 +2877,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
goto out;
if (start + size > vma->vm_end) {
- struct vm_area_struct *next;
+ VMA_ITERATOR(vmi, mm, vma->vm_end);
+ struct vm_area_struct *next, *prev = vma;
- for (next = vma->vm_next; next; next = next->vm_next) {
+ for_each_vma_range(vmi, next, start + size) {
/* hole between vmas ? */
- if (next->vm_start != next->vm_prev->vm_end)
+ if (next->vm_start != prev->vm_end)
goto out;
if (next->vm_file != vma->vm_file)
@@ -2992,8 +2891,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
if (next->vm_flags != vma->vm_flags)
goto out;
- if (start + size <= next->vm_end)
- break;
+ prev = next;
}
if (!next)
@@ -3060,11 +2958,9 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
* do some brk-specific accounting here.
*/
static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
- unsigned long addr, unsigned long len,
- unsigned long flags)
+ unsigned long addr, unsigned long len, unsigned long flags)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *prev = NULL;
validate_mm_mt(mm);
/*
@@ -3107,7 +3003,6 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
khugepaged_enter_vma(vma, flags);
goto out;
}
- prev = vma;
/* create a vma struct for an anonymous mapping */
vma = vm_area_alloc(mm);
@@ -3124,10 +3019,6 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
if (mas_store_gfp(mas, vma, GFP_KERNEL))
goto mas_store_fail;
- if (!prev)
- prev = mas_prev(mas, 0);
-
- __vma_link_list(mm, vma, prev);
mm->map_count++;
out:
perf_event_mmap(vma);
@@ -3136,7 +3027,7 @@ out:
if (flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
vma->vm_flags |= VM_SOFTDIRTY;
- validate_mm_mt(mm);
+ validate_mm(mm);
return 0;
mas_store_fail:
@@ -3217,6 +3108,8 @@ void exit_mmap(struct mm_struct *mm)
struct mmu_gather tlb;
struct vm_area_struct *vma;
unsigned long nr_accounted = 0;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
+ int count = 0;
/* mm's last user has gone, and its about to be pulled down */
mmu_notifier_release(mm);
@@ -3241,7 +3134,7 @@ void exit_mmap(struct mm_struct *mm)
mmap_write_lock(mm);
arch_exit_mmap(mm);
- vma = mm->mmap;
+ vma = mas_find(&mas, ULONG_MAX);
if (!vma) {
/* Can happen if dup_mmap() received an OOM */
mmap_write_unlock(mm);
@@ -3252,22 +3145,29 @@ void exit_mmap(struct mm_struct *mm)
flush_cache_mm(mm);
tlb_gather_mmu_fullmm(&tlb, mm);
/* update_hiwater_rss(mm) here? but nobody should be looking */
- /* Use -1 here to ensure all VMAs in the mm are unmapped */
- unmap_vmas(&tlb, vma, 0, -1);
- free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
+ /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
+ unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX);
+ free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS,
+ USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb);
- /* Walk the list again, actually closing and freeing it. */
- while (vma) {
+ /*
+ * Walk the list again, actually closing and freeing it, with preemption
+ * enabled, without holding any MM locks besides the unreachable
+ * mmap_write_lock.
+ */
+ do {
if (vma->vm_flags & VM_ACCOUNT)
nr_accounted += vma_pages(vma);
- vma = remove_vma(vma);
+ remove_vma(vma);
+ count++;
cond_resched();
- }
+ } while ((vma = mas_find(&mas, ULONG_MAX)) != NULL);
+
+ BUG_ON(count != mm->map_count);
trace_exit_mmap(mm);
__mt_destroy(&mm->mm_mt);
- mm->mmap = NULL;
mmap_write_unlock(mm);
vm_unacct_memory(nr_accounted);
}
@@ -3306,7 +3206,7 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
}
- if (vma_link(mm, vma, prev)) {
+ if (vma_link(mm, vma)) {
vm_unacct_memory(charged);
return -ENOMEM;
}
@@ -3338,7 +3238,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
faulted_in_anon_vma = false;
}
- if (range_has_overlap(mm, addr, addr + len, &prev))
+ new_vma = find_vma_prev(mm, addr, &prev);
+ if (new_vma && new_vma->vm_start < addr + len)
return NULL; /* should never get here */
new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
@@ -3381,7 +3282,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
get_file(new_vma->vm_file);
if (new_vma->vm_ops && new_vma->vm_ops->open)
new_vma->vm_ops->open(new_vma);
- if (vma_link(mm, new_vma, prev))
+ if (vma_link(mm, new_vma))
goto out_vma_link;
*need_rmap_locks = false;
}
@@ -3686,12 +3587,13 @@ int mm_take_all_locks(struct mm_struct *mm)
{
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
mmap_assert_write_locked(mm);
mutex_lock(&mm_all_locks_mutex);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ mas_for_each(&mas, vma, ULONG_MAX) {
if (signal_pending(current))
goto out_unlock;
if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3699,7 +3601,8 @@ int mm_take_all_locks(struct mm_struct *mm)
vm_lock_mapping(mm, vma->vm_file->f_mapping);
}
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ mas_set(&mas, 0);
+ mas_for_each(&mas, vma, ULONG_MAX) {
if (signal_pending(current))
goto out_unlock;
if (vma->vm_file && vma->vm_file->f_mapping &&
@@ -3707,7 +3610,8 @@ int mm_take_all_locks(struct mm_struct *mm)
vm_lock_mapping(mm, vma->vm_file->f_mapping);
}
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ mas_set(&mas, 0);
+ mas_for_each(&mas, vma, ULONG_MAX) {
if (signal_pending(current))
goto out_unlock;
if (vma->anon_vma)
@@ -3766,11 +3670,12 @@ void mm_drop_all_locks(struct mm_struct *mm)
{
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
+ MA_STATE(mas, &mm->mm_mt, 0, 0);
mmap_assert_write_locked(mm);
BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ mas_for_each(&mas, vma, ULONG_MAX) {
if (vma->anon_vma)
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
vm_unlock_anon_vma(avc->anon_vma);