summaryrefslogtreecommitdiff
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c1100
1 files changed, 522 insertions, 578 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 425a9349e610..ff68a67a2a7c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -78,7 +78,7 @@ core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
struct vm_area_struct *vma, struct vm_area_struct *prev,
struct vm_area_struct *next, unsigned long start,
- unsigned long end);
+ unsigned long end, bool mm_wr_locked);
static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
{
@@ -144,6 +144,24 @@ static void remove_vma(struct vm_area_struct *vma)
vm_area_free(vma);
}
+static inline struct vm_area_struct *vma_prev_limit(struct vma_iterator *vmi,
+ unsigned long min)
+{
+ return mas_prev(&vmi->mas, min);
+}
+
+static inline int vma_iter_clear_gfp(struct vma_iterator *vmi,
+ unsigned long start, unsigned long end, gfp_t gfp)
+{
+ vmi->mas.index = start;
+ vmi->mas.last = end - 1;
+ mas_store_gfp(&vmi->mas, NULL, gfp);
+ if (unlikely(mas_is_err(&vmi->mas)))
+ return -ENOMEM;
+
+ return 0;
+}
+
/*
* check_brk_limits() - Use platform specific check of range & verify mlock
* limits.
@@ -162,10 +180,7 @@ static int check_brk_limits(unsigned long addr, unsigned long len)
return mlock_future_check(current->mm, current->mm->def_flags, len);
}
-static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
- unsigned long newbrk, unsigned long oldbrk,
- struct list_head *uf);
-static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *brkvma,
+static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *brkvma,
unsigned long addr, unsigned long request, unsigned long flags);
SYSCALL_DEFINE1(brk, unsigned long, brk)
{
@@ -176,7 +191,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
bool populate;
bool downgraded = false;
LIST_HEAD(uf);
- MA_STATE(mas, &mm->mm_mt, 0, 0);
+ struct vma_iterator vmi;
if (mmap_write_lock_killable(mm))
return -EINTR;
@@ -218,23 +233,23 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
/*
* Always allow shrinking brk.
- * do_brk_munmap() may downgrade mmap_lock to read.
+ * do_vma_munmap() may downgrade mmap_lock to read.
*/
if (brk <= mm->brk) {
int ret;
/* Search one past newbrk */
- mas_set(&mas, newbrk);
- brkvma = mas_find(&mas, oldbrk);
+ vma_iter_init(&vmi, mm, newbrk);
+ brkvma = vma_find(&vmi, oldbrk);
if (!brkvma || brkvma->vm_start >= oldbrk)
goto out; /* mapping intersects with an existing non-brk vma. */
/*
* mm->brk must be protected by write mmap_lock.
- * do_brk_munmap() may downgrade the lock, so update it
- * before calling do_brk_munmap().
+ * do_vma_munmap() may downgrade the lock, so update it
+ * before calling do_vma_munmap().
*/
mm->brk = brk;
- ret = do_brk_munmap(&mas, brkvma, newbrk, oldbrk, &uf);
+ ret = do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true);
if (ret == 1) {
downgraded = true;
goto success;
@@ -252,14 +267,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
* Only check if the next VMA is within the stack_guard_gap of the
* expansion area
*/
- mas_set(&mas, oldbrk);
- next = mas_find(&mas, newbrk - 1 + PAGE_SIZE + stack_guard_gap);
+ vma_iter_init(&vmi, mm, oldbrk);
+ next = vma_find(&vmi, newbrk + PAGE_SIZE + stack_guard_gap);
if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
goto out;
- brkvma = mas_prev(&mas, mm->start_brk);
+ brkvma = vma_prev_limit(&vmi, mm->start_brk);
/* Ok, looks good - let it rip. */
- if (do_brk_flags(&mas, brkvma, oldbrk, newbrk - oldbrk, 0) < 0)
+ if (do_brk_flags(&vmi, brkvma, oldbrk, newbrk - oldbrk, 0) < 0)
goto out;
mm->brk = brk;
@@ -417,85 +432,218 @@ static void __vma_link_file(struct vm_area_struct *vma,
flush_dcache_mmap_unlock(mapping);
}
+static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+ VMA_ITERATOR(vmi, mm, 0);
+ struct address_space *mapping = NULL;
+
+ if (vma_iter_prealloc(&vmi))
+ return -ENOMEM;
+
+ if (vma->vm_file) {
+ mapping = vma->vm_file->f_mapping;
+ i_mmap_lock_write(mapping);
+ }
+
+ vma_iter_store(&vmi, vma);
+
+ if (mapping) {
+ __vma_link_file(vma, mapping);
+ i_mmap_unlock_write(mapping);
+ }
+
+ mm->map_count++;
+ validate_mm(mm);
+ return 0;
+}
+
/*
- * vma_mas_store() - Store a VMA in the maple tree.
- * @vma: The vm_area_struct
- * @mas: The maple state
- *
- * Efficient way to store a VMA in the maple tree when the @mas has already
- * walked to the correct location.
- *
- * Note: the end address is inclusive in the maple tree.
+ * init_multi_vma_prep() - Initializer for struct vma_prepare
+ * @vp: The vma_prepare struct
+ * @vma: The vma that will be altered once locked
+ * @next: The next vma if it is to be adjusted
+ * @remove: The first vma to be removed
+ * @remove2: The second vma to be removed
*/
-void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas)
+static inline void init_multi_vma_prep(struct vma_prepare *vp,
+ struct vm_area_struct *vma, struct vm_area_struct *next,
+ struct vm_area_struct *remove, struct vm_area_struct *remove2)
{
- trace_vma_store(mas->tree, vma);
- mas_set_range(mas, vma->vm_start, vma->vm_end - 1);
- mas_store_prealloc(mas, vma);
+ memset(vp, 0, sizeof(struct vma_prepare));
+ vp->vma = vma;
+ vp->anon_vma = vma->anon_vma;
+ vp->remove = remove;
+ vp->remove2 = remove2;
+ vp->adj_next = next;
+ if (!vp->anon_vma && next)
+ vp->anon_vma = next->anon_vma;
+
+ vp->file = vma->vm_file;
+ if (vp->file)
+ vp->mapping = vma->vm_file->f_mapping;
+
}
/*
- * vma_mas_remove() - Remove a VMA from the maple tree.
- * @vma: The vm_area_struct
- * @mas: The maple state
- *
- * Efficient way to remove a VMA from the maple tree when the @mas has already
- * been established and points to the correct location.
- * Note: the end address is inclusive in the maple tree.
+ * init_vma_prep() - Initializer wrapper for vma_prepare struct
+ * @vp: The vma_prepare struct
+ * @vma: The vma that will be altered once locked
*/
-void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
+static inline void init_vma_prep(struct vma_prepare *vp,
+ struct vm_area_struct *vma)
{
- trace_vma_mas_szero(mas->tree, vma->vm_start, vma->vm_end - 1);
- mas->index = vma->vm_start;
- mas->last = vma->vm_end - 1;
- mas_store_prealloc(mas, NULL);
+ init_multi_vma_prep(vp, vma, NULL, NULL, NULL);
}
+
/*
- * vma_mas_szero() - Set a given range to zero. Used when modifying a
- * vm_area_struct start or end.
- *
- * @mas: The maple tree ma_state
- * @start: The start address to zero
- * @end: The end address to zero.
+ * vma_prepare() - Helper function for handling locking VMAs prior to altering
+ * @vp: The initialized vma_prepare struct
*/
-static inline void vma_mas_szero(struct ma_state *mas, unsigned long start,
- unsigned long end)
+static inline void vma_prepare(struct vma_prepare *vp)
{
- trace_vma_mas_szero(mas->tree, start, end - 1);
- mas_set_range(mas, start, end - 1);
- mas_store_prealloc(mas, NULL);
+ if (vp->file) {
+ uprobe_munmap(vp->vma, vp->vma->vm_start, vp->vma->vm_end);
+
+ if (vp->adj_next)
+ uprobe_munmap(vp->adj_next, vp->adj_next->vm_start,
+ vp->adj_next->vm_end);
+
+ i_mmap_lock_write(vp->mapping);
+ if (vp->insert && vp->insert->vm_file) {
+ /*
+ * Put into interval tree now, so instantiated pages
+ * are visible to arm/parisc __flush_dcache_page
+ * throughout; but we cannot insert into address
+ * space until vma start or end is updated.
+ */
+ __vma_link_file(vp->insert,
+ vp->insert->vm_file->f_mapping);
+ }
+ }
+
+ if (vp->anon_vma) {
+ anon_vma_lock_write(vp->anon_vma);
+ anon_vma_interval_tree_pre_update_vma(vp->vma);
+ if (vp->adj_next)
+ anon_vma_interval_tree_pre_update_vma(vp->adj_next);
+ }
+
+ if (vp->file) {
+ flush_dcache_mmap_lock(vp->mapping);
+ vma_interval_tree_remove(vp->vma, &vp->mapping->i_mmap);
+ if (vp->adj_next)
+ vma_interval_tree_remove(vp->adj_next,
+ &vp->mapping->i_mmap);
+ }
+
}
-static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
-{
- MA_STATE(mas, &mm->mm_mt, 0, 0);
- struct address_space *mapping = NULL;
+/*
+ * vma_complete- Helper function for handling the unlocking after altering VMAs,
+ * or for inserting a VMA.
+ *
+ * @vp: The vma_prepare struct
+ * @vmi: The vma iterator
+ * @mm: The mm_struct
+ */
+static inline void vma_complete(struct vma_prepare *vp,
+ struct vma_iterator *vmi, struct mm_struct *mm)
+{
+ if (vp->file) {
+ if (vp->adj_next)
+ vma_interval_tree_insert(vp->adj_next,
+ &vp->mapping->i_mmap);
+ vma_interval_tree_insert(vp->vma, &vp->mapping->i_mmap);
+ flush_dcache_mmap_unlock(vp->mapping);
+ }
+
+ if (vp->remove && vp->file) {
+ __remove_shared_vm_struct(vp->remove, vp->file, vp->mapping);
+ if (vp->remove2)
+ __remove_shared_vm_struct(vp->remove2, vp->file,
+ vp->mapping);
+ } else if (vp->insert) {
+ /*
+ * split_vma has split insert from vma, and needs
+ * us to insert it before dropping the locks
+ * (it may either follow vma or precede it).
+ */
+ vma_iter_store(vmi, vp->insert);
+ mm->map_count++;
+ }
- if (mas_preallocate(&mas, vma, GFP_KERNEL))
- return -ENOMEM;
+ if (vp->anon_vma) {
+ anon_vma_interval_tree_post_update_vma(vp->vma);
+ if (vp->adj_next)
+ anon_vma_interval_tree_post_update_vma(vp->adj_next);
+ anon_vma_unlock_write(vp->anon_vma);
+ }
- if (vma->vm_file) {
- mapping = vma->vm_file->f_mapping;
- i_mmap_lock_write(mapping);
+ if (vp->file) {
+ i_mmap_unlock_write(vp->mapping);
+ uprobe_mmap(vp->vma);
+
+ if (vp->adj_next)
+ uprobe_mmap(vp->adj_next);
}
- vma_mas_store(vma, &mas);
+ if (vp->remove) {
+again:
+ if (vp->file) {
+ uprobe_munmap(vp->remove, vp->remove->vm_start,
+ vp->remove->vm_end);
+ fput(vp->file);
+ }
+ if (vp->remove->anon_vma)
+ anon_vma_merge(vp->vma, vp->remove);
+ mm->map_count--;
+ mpol_put(vma_policy(vp->remove));
+ if (!vp->remove2)
+ WARN_ON_ONCE(vp->vma->vm_end < vp->remove->vm_end);
+ vm_area_free(vp->remove);
- if (mapping) {
- __vma_link_file(vma, mapping);
- i_mmap_unlock_write(mapping);
+ /*
+ * In mprotect's case 6 (see comments on vma_merge),
+ * we must remove the one after next as well.
+ */
+ if (vp->remove2) {
+ vp->remove = vp->remove2;
+ vp->remove2 = NULL;
+ goto again;
+ }
+ }
+ if (vp->insert && vp->file)
+ uprobe_mmap(vp->insert);
+}
+
+/*
+ * dup_anon_vma() - Helper function to duplicate anon_vma
+ * @dst: The destination VMA
+ * @src: The source VMA
+ *
+ * Returns: 0 on success.
+ */
+static inline int dup_anon_vma(struct vm_area_struct *dst,
+ struct vm_area_struct *src)
+{
+ /*
+ * Easily overlooked: when mprotect shifts the boundary, make sure the
+ * expanding vma has anon_vma set if the shrinking vma had, to cover any
+ * anon pages imported.
+ */
+ if (src->anon_vma && !dst->anon_vma) {
+ dst->anon_vma = src->anon_vma;
+ return anon_vma_clone(dst, src);
}
- mm->map_count++;
- validate_mm(mm);
return 0;
}
/*
* vma_expand - Expand an existing VMA
*
- * @mas: The maple state
+ * @vmi: The vma iterator
* @vma: The vma to expand
* @start: The start of the vma
* @end: The exclusive end of the vma
@@ -509,96 +657,46 @@ static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
*
* Returns: 0 on success
*/
-inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma,
- unsigned long start, unsigned long end, pgoff_t pgoff,
- struct vm_area_struct *next)
+int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, pgoff_t pgoff,
+ struct vm_area_struct *next)
{
- struct mm_struct *mm = vma->vm_mm;
- struct address_space *mapping = NULL;
- struct rb_root_cached *root = NULL;
- struct anon_vma *anon_vma = vma->anon_vma;
- struct file *file = vma->vm_file;
bool remove_next = false;
+ struct vma_prepare vp;
if (next && (vma != next) && (end == next->vm_end)) {
- remove_next = true;
- if (next->anon_vma && !vma->anon_vma) {
- int error;
+ int ret;
- anon_vma = next->anon_vma;
- vma->anon_vma = anon_vma;
- error = anon_vma_clone(vma, next);
- if (error)
- return error;
- }
+ remove_next = true;
+ ret = dup_anon_vma(vma, next);
+ if (ret)
+ return ret;
}
+ init_multi_vma_prep(&vp, vma, NULL, remove_next ? next : NULL, NULL);
/* Not merging but overwriting any part of next is not handled. */
- VM_BUG_ON(next && !remove_next && next != vma && end > next->vm_start);
+ VM_WARN_ON(next && !vp.remove &&
+ next != vma && end > next->vm_start);
/* Only handles expanding */
- VM_BUG_ON(vma->vm_start < start || vma->vm_end > end);
+ VM_WARN_ON(vma->vm_start < start || vma->vm_end > end);
- if (mas_preallocate(mas, vma, GFP_KERNEL))
+ if (vma_iter_prealloc(vmi))
goto nomem;
vma_adjust_trans_huge(vma, start, end, 0);
+ /* VMA iterator points to previous, so set to start if necessary */
+ if (vma_iter_addr(vmi) != start)
+ vma_iter_set(vmi, start);
- if (file) {
- mapping = file->f_mapping;
- root = &mapping->i_mmap;
- uprobe_munmap(vma, vma->vm_start, vma->vm_end);
- i_mmap_lock_write(mapping);
- }
-
- if (anon_vma) {
- anon_vma_lock_write(anon_vma);
- anon_vma_interval_tree_pre_update_vma(vma);
- }
-
- if (file) {
- flush_dcache_mmap_lock(mapping);
- vma_interval_tree_remove(vma, root);
- }
-
+ vma_prepare(&vp);
vma->vm_start = start;
vma->vm_end = end;
vma->vm_pgoff = pgoff;
/* Note: mas must be pointing to the expanding VMA */
- vma_mas_store(vma, mas);
-
- if (file) {
- vma_interval_tree_insert(vma, root);
- flush_dcache_mmap_unlock(mapping);
- }
-
- /* Expanding over the next vma */
- if (remove_next && file) {
- __remove_shared_vm_struct(next, file, mapping);
- }
-
- if (anon_vma) {
- anon_vma_interval_tree_post_update_vma(vma);
- anon_vma_unlock_write(anon_vma);
- }
-
- if (file) {
- i_mmap_unlock_write(mapping);
- uprobe_mmap(vma);
- }
-
- if (remove_next) {
- if (file) {
- uprobe_munmap(next, next->vm_start, next->vm_end);
- fput(file);
- }
- if (next->anon_vma)
- anon_vma_merge(vma, next);
- mm->map_count--;
- mpol_put(vma_policy(next));
- vm_area_free(next);
- }
+ vma_iter_store(vmi, vma);
- validate_mm(mm);
+ vma_complete(&vp, vmi, vma->vm_mm);
+ validate_mm(vma->vm_mm);
return 0;
nomem:
@@ -606,256 +704,39 @@ nomem:
}
/*
- * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
- * is already present in an i_mmap tree without adjusting the tree.
- * The following helper function should be used when such adjustments
- * are necessary. The "insert" vma (if any) is to be inserted
- * before we drop the necessary locks.
+ * vma_shrink() - Reduce an existing VMAs memory area
+ * @vmi: The vma iterator
+ * @vma: The VMA to modify
+ * @start: The new start
+ * @end: The new end
+ *
+ * Returns: 0 on success, -ENOMEM otherwise
*/
-int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert,
- struct vm_area_struct *expand)
+int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end, pgoff_t pgoff)
{
- struct mm_struct *mm = vma->vm_mm;
- struct vm_area_struct *next_next = NULL; /* uninit var warning */
- struct vm_area_struct *next = find_vma(mm, vma->vm_end);
- struct vm_area_struct *orig_vma = vma;
- struct address_space *mapping = NULL;
- struct rb_root_cached *root = NULL;
- struct anon_vma *anon_vma = NULL;
- struct file *file = vma->vm_file;
- bool vma_changed = false;
- long adjust_next = 0;
- int remove_next = 0;
- MA_STATE(mas, &mm->mm_mt, 0, 0);
- struct vm_area_struct *exporter = NULL, *importer = NULL;
-
- if (next && !insert) {
- if (end >= next->vm_end) {
- /*
- * vma expands, overlapping all the next, and
- * perhaps the one after too (mprotect case 6).
- * The only other cases that gets here are
- * case 1, case 7 and case 8.
- */
- if (next == expand) {
- /*
- * The only case where we don't expand "vma"
- * and we expand "next" instead is case 8.
- */
- VM_WARN_ON(end != next->vm_end);
- /*
- * remove_next == 3 means we're
- * removing "vma" and that to do so we
- * swapped "vma" and "next".
- */
- remove_next = 3;
- VM_WARN_ON(file != next->vm_file);
- swap(vma, next);
- } else {
- VM_WARN_ON(expand != vma);
- /*
- * case 1, 6, 7, remove_next == 2 is case 6,
- * remove_next == 1 is case 1 or 7.
- */
- remove_next = 1 + (end > next->vm_end);
- if (remove_next == 2)
- next_next = find_vma(mm, next->vm_end);
-
- VM_WARN_ON(remove_next == 2 &&
- end != next_next->vm_end);
- }
-
- exporter = next;
- importer = vma;
+ struct vma_prepare vp;
- /*
- * If next doesn't have anon_vma, import from vma after
- * next, if the vma overlaps with it.
- */
- if (remove_next == 2 && !next->anon_vma)
- exporter = next_next;
-
- } else if (end > next->vm_start) {
- /*
- * vma expands, overlapping part of the next:
- * mprotect case 5 shifting the boundary up.
- */
- adjust_next = (end - next->vm_start);
- exporter = next;
- importer = vma;
- VM_WARN_ON(expand != importer);
- } else if (end < vma->vm_end) {
- /*
- * vma shrinks, and !insert tells it's not
- * split_vma inserting another: so it must be
- * mprotect case 4 shifting the boundary down.
- */
- adjust_next = -(vma->vm_end - end);
- exporter = vma;
- importer = next;
- VM_WARN_ON(expand != importer);
- }
+ WARN_ON((vma->vm_start != start) && (vma->vm_end != end));
- /*
- * Easily overlooked: when mprotect shifts the boundary,
- * make sure the expanding vma has anon_vma set if the
- * shrinking vma had, to cover any anon pages imported.
- */
- if (exporter && exporter->anon_vma && !importer->anon_vma) {
- int error;
-
- importer->anon_vma = exporter->anon_vma;
- error = anon_vma_clone(importer, exporter);
- if (error)
- return error;
- }
- }
-
- if (mas_preallocate(&mas, vma, GFP_KERNEL))
+ if (vma_iter_prealloc(vmi))
return -ENOMEM;
- vma_adjust_trans_huge(orig_vma, start, end, adjust_next);
- if (file) {
- mapping = file->f_mapping;
- root = &mapping->i_mmap;
- uprobe_munmap(vma, vma->vm_start, vma->vm_end);
-
- if (adjust_next)
- uprobe_munmap(next, next->vm_start, next->vm_end);
-
- i_mmap_lock_write(mapping);
- if (insert && insert->vm_file) {
- /*
- * Put into interval tree now, so instantiated pages
- * are visible to arm/parisc __flush_dcache_page
- * throughout; but we cannot insert into address
- * space until vma start or end is updated.
- */
- __vma_link_file(insert, insert->vm_file->f_mapping);
- }
- }
-
- anon_vma = vma->anon_vma;
- if (!anon_vma && adjust_next)
- anon_vma = next->anon_vma;
- if (anon_vma) {
- VM_WARN_ON(adjust_next && next->anon_vma &&
- anon_vma != next->anon_vma);
- anon_vma_lock_write(anon_vma);
- anon_vma_interval_tree_pre_update_vma(vma);
- if (adjust_next)
- anon_vma_interval_tree_pre_update_vma(next);
- }
-
- if (file) {
- flush_dcache_mmap_lock(mapping);
- vma_interval_tree_remove(vma, root);
- if (adjust_next)
- vma_interval_tree_remove(next, root);
- }
+ init_vma_prep(&vp, vma);
+ vma_adjust_trans_huge(vma, start, end, 0);
+ vma_prepare(&vp);
- if (start != vma->vm_start) {
- if ((vma->vm_start < start) &&
- (!insert || (insert->vm_end != start))) {
- vma_mas_szero(&mas, vma->vm_start, start);
- VM_WARN_ON(insert && insert->vm_start > vma->vm_start);
- } else {
- vma_changed = true;
- }
- vma->vm_start = start;
- }
- if (end != vma->vm_end) {
- if (vma->vm_end > end) {
- if (!insert || (insert->vm_start != end)) {
- vma_mas_szero(&mas, end, vma->vm_end);
- mas_reset(&mas);
- VM_WARN_ON(insert &&
- insert->vm_end < vma->vm_end);
- }
- } else {
- vma_changed = true;
- }
- vma->vm_end = end;
- }
+ if (vma->vm_start < start)
+ vma_iter_clear(vmi, vma->vm_start, start);
- if (vma_changed)
- vma_mas_store(vma, &mas);
+ if (vma->vm_end > end)
+ vma_iter_clear(vmi, end, vma->vm_end);
+ vma->vm_start = start;
+ vma->vm_end = end;
vma->vm_pgoff = pgoff;
- if (adjust_next) {
- next->vm_start += adjust_next;
- next->vm_pgoff += adjust_next >> PAGE_SHIFT;
- vma_mas_store(next, &mas);
- }
-
- if (file) {
- if (adjust_next)
- vma_interval_tree_insert(next, root);
- vma_interval_tree_insert(vma, root);
- flush_dcache_mmap_unlock(mapping);
- }
-
- if (remove_next && file) {
- __remove_shared_vm_struct(next, file, mapping);
- if (remove_next == 2)
- __remove_shared_vm_struct(next_next, file, mapping);
- } else if (insert) {
- /*
- * split_vma has split insert from vma, and needs
- * us to insert it before dropping the locks
- * (it may either follow vma or precede it).
- */
- mas_reset(&mas);
- vma_mas_store(insert, &mas);
- mm->map_count++;
- }
-
- if (anon_vma) {
- anon_vma_interval_tree_post_update_vma(vma);
- if (adjust_next)
- anon_vma_interval_tree_post_update_vma(next);
- anon_vma_unlock_write(anon_vma);
- }
-
- if (file) {
- i_mmap_unlock_write(mapping);
- uprobe_mmap(vma);
-
- if (adjust_next)
- uprobe_mmap(next);
- }
-
- if (remove_next) {
-again:
- if (file) {
- uprobe_munmap(next, next->vm_start, next->vm_end);
- fput(file);
- }
- if (next->anon_vma)
- anon_vma_merge(vma, next);
- mm->map_count--;
- mpol_put(vma_policy(next));
- if (remove_next != 2)
- BUG_ON(vma->vm_end < next->vm_end);
- vm_area_free(next);
-
- /*
- * In mprotect's case 6 (see comments on vma_merge),
- * we must remove next_next too.
- */
- if (remove_next == 2) {
- remove_next = 1;
- next = next_next;
- goto again;
- }
- }
- if (insert && file)
- uprobe_mmap(insert);
-
- mas_destroy(&mas);
- validate_mm(mm);
-
+ vma_complete(&vp, vmi, vma->vm_mm);
+ validate_mm(vma->vm_mm);
return 0;
}
@@ -864,9 +745,9 @@ again:
* per-vma resources, so we don't attempt to merge those.
*/
static inline int is_mergeable_vma(struct vm_area_struct *vma,
- struct file *file, unsigned long vm_flags,
- struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
- struct anon_vma_name *anon_name)
+ struct file *file, unsigned long vm_flags,
+ struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
+ struct anon_vma_name *anon_name)
{
/*
* VM_SOFTDIRTY should not prevent from VMA merging, if we
@@ -985,7 +866,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
* It is important for case 8 that the vma NNNN overlapping the
* region AAAA is never going to extended over XXXX. Instead XXXX must
* be extended in region AAAA and NNNN must be removed. This way in
- * all cases where vma_merge succeeds, the moment vma_adjust drops the
+ * all cases where vma_merge succeeds, the moment vma_merge drops the
* rmap_locks, the properties of the merged vma will be already
* correct for the whole merged range. Some of those properties like
* vm_page_prot/vm_flags may be accessed by rmap_walks and they must
@@ -995,8 +876,14 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
* or other rmap walkers (if working on addresses beyond the "end"
* parameter) may establish ptes with the wrong permissions of NNNN
* instead of the right permissions of XXXX.
+ *
+ * In the code below:
+ * PPPP is represented by *prev
+ * NNNN is represented by *mid (and possibly equal to *next)
+ * XXXX is represented by *next or not represented at all.
+ * AAAA is not represented - it will be merged or the function will return NULL
*/
-struct vm_area_struct *vma_merge(struct mm_struct *mm,
+struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
struct vm_area_struct *prev, unsigned long addr,
unsigned long end, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file,
@@ -1005,11 +892,19 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
struct anon_vma_name *anon_name)
{
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
- struct vm_area_struct *mid, *next, *res;
+ pgoff_t vma_pgoff;
+ struct vm_area_struct *mid, *next, *res = NULL;
+ struct vm_area_struct *vma, *adjust, *remove, *remove2;
int err = -1;
bool merge_prev = false;
bool merge_next = false;
+ bool vma_expanded = false;
+ struct vma_prepare vp;
+ unsigned long vma_end = end;
+ long adj_next = 0;
+ unsigned long vma_start = addr;
+ validate_mm(mm);
/*
* We later require that vma->vm_flags == vm_flags,
* so this tests vma->vm_flags & VM_SPECIAL, too.
@@ -1027,13 +922,18 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
VM_WARN_ON(mid && end > mid->vm_end);
VM_WARN_ON(addr >= end);
- /* Can we merge the predecessor? */
- if (prev && prev->vm_end == addr &&
- mpol_equal(vma_policy(prev), policy) &&
- can_vma_merge_after(prev, vm_flags,
- anon_vma, file, pgoff,
- vm_userfaultfd_ctx, anon_name)) {
- merge_prev = true;
+ if (prev) {
+ res = prev;
+ vma = prev;
+ vma_start = prev->vm_start;
+ vma_pgoff = prev->vm_pgoff;
+ /* Can we merge the predecessor? */
+ if (prev->vm_end == addr && mpol_equal(vma_policy(prev), policy)
+ && can_vma_merge_after(prev, vm_flags, anon_vma, file,
+ pgoff, vm_userfaultfd_ctx, anon_name)) {
+ merge_prev = true;
+ vma_prev(vmi);
+ }
}
/* Can we merge the successor? */
if (next && end == next->vm_start &&
@@ -1043,34 +943,87 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
vm_userfaultfd_ctx, anon_name)) {
merge_next = true;
}
+
+ remove = remove2 = adjust = NULL;
/* Can we merge both the predecessor and the successor? */
if (merge_prev && merge_next &&
- is_mergeable_anon_vma(prev->anon_vma,
- next->anon_vma, NULL)) { /* cases 1, 6 */
- err = __vma_adjust(prev, prev->vm_start,
- next->vm_end, prev->vm_pgoff, NULL,
- prev);
- res = prev;
- } else if (merge_prev) { /* cases 2, 5, 7 */
- err = __vma_adjust(prev, prev->vm_start,
- end, prev->vm_pgoff, NULL, prev);
- res = prev;
+ is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL)) {
+ remove = mid; /* case 1 */
+ vma_end = next->vm_end;
+ err = dup_anon_vma(res, remove);
+ if (mid != next) { /* case 6 */
+ remove2 = next;
+ if (!remove->anon_vma)
+ err = dup_anon_vma(res, remove2);
+ }
+ } else if (merge_prev) {
+ err = 0; /* case 2 */
+ if (mid && end > mid->vm_start) {
+ err = dup_anon_vma(res, mid);
+ if (end == mid->vm_end) { /* case 7 */
+ remove = mid;
+ } else { /* case 5 */
+ adjust = mid;
+ adj_next = (end - mid->vm_start);
+ }
+ }
} else if (merge_next) {
- if (prev && addr < prev->vm_end) /* case 4 */
- err = __vma_adjust(prev, prev->vm_start,
- addr, prev->vm_pgoff, NULL, next);
- else /* cases 3, 8 */
- err = __vma_adjust(mid, addr, next->vm_end,
- next->vm_pgoff - pglen, NULL, next);
res = next;
+ if (prev && addr < prev->vm_end) { /* case 4 */
+ vma_end = addr;
+ adjust = mid;
+ adj_next = -(vma->vm_end - addr);
+ err = dup_anon_vma(adjust, prev);
+ } else {
+ vma = next; /* case 3 */
+ vma_start = addr;
+ vma_end = next->vm_end;
+ vma_pgoff = mid->vm_pgoff;
+ err = 0;
+ if (mid != next) { /* case 8 */
+ remove = mid;
+ err = dup_anon_vma(res, remove);
+ }
+ }
}
- /*
- * Cannot merge with predecessor or successor or error in __vma_adjust?
- */
+ /* Cannot merge or error in anon_vma clone */
if (err)
return NULL;
+
+ if (vma_iter_prealloc(vmi))
+ return NULL;
+
+ vma_adjust_trans_huge(vma, vma_start, vma_end, adj_next);
+ init_multi_vma_prep(&vp, vma, adjust, remove, remove2);
+ VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma &&
+ vp.anon_vma != adjust->anon_vma);
+
+ vma_prepare(&vp);
+ if (vma_start < vma->vm_start || vma_end > vma->vm_end)
+ vma_expanded = true;
+
+ vma->vm_start = vma_start;
+ vma->vm_end = vma_end;
+ vma->vm_pgoff = vma_pgoff;
+
+ if (vma_expanded)
+ vma_iter_store(vmi, vma);
+
+ if (adj_next) {
+ adjust->vm_start += adj_next;
+ adjust->vm_pgoff += adj_next >> PAGE_SHIFT;
+ if (adj_next < 0) {
+ WARN_ON(vma_expanded);
+ vma_iter_store(vmi, next);
+ }
+ }
+
+ vma_complete(&vp, vmi, mm);
+ vma_iter_free(vmi);
+ validate_mm(mm);
khugepaged_enter_vma(res, vm_flags);
+
return res;
}
@@ -1558,8 +1511,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
* the correct alignment and offset, all from @info. Note: current->mm is used
* for the search.
*
- * @info: The unmapped area information including the range (low_limit -
- * hight_limit), the alignment offset and mask.
+ * @info: The unmapped area information including the range [low_limit -
+ * high_limit), the alignment offset and mask.
*
* Return: A memory address or -ENOMEM.
*/
@@ -1585,11 +1538,11 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
/**
* unmapped_area_topdown() - Find an area between the low_limit and the
- * high_limit with * the correct alignment and offset at the highest available
+ * high_limit with the correct alignment and offset at the highest available
* address, all from @info. Note: current->mm is used for the search.
*
- * @info: The unmapped area information including the range (low_limit -
- * hight_limit), the alignment offset and mask.
+ * @info: The unmapped area information including the range [low_limit -
+ * high_limit), the alignment offset and mask.
*
* Return: A memory address or -ENOMEM.
*/
@@ -1938,7 +1891,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
/* Check that both stack segments have the same anon_vma? */
}
- if (mas_preallocate(&mas, vma, GFP_KERNEL))
+ if (mas_preallocate(&mas, GFP_KERNEL))
return -ENOMEM;
/* We must make sure the anon_vma is allocated. */
@@ -1981,7 +1934,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_end = address;
/* Overwrite old entry in mtree. */
- vma_mas_store(vma, &mas);
+ mas_set_range(&mas, vma->vm_start, address - 1);
+ mas_store_prealloc(&mas, vma);
anon_vma_interval_tree_post_update_vma(vma);
spin_unlock(&mm->page_table_lock);
@@ -2019,7 +1973,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
return -ENOMEM;
}
- if (mas_preallocate(&mas, vma, GFP_KERNEL))
+ if (mas_preallocate(&mas, GFP_KERNEL))
return -ENOMEM;
/* We must make sure the anon_vma is allocated. */
@@ -2063,7 +2017,8 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
vma->vm_start = address;
vma->vm_pgoff -= grow;
/* Overwrite old entry in mtree. */
- vma_mas_store(vma, &mas);
+ mas_set_range(&mas, address, vma->vm_end - 1);
+ mas_store_prealloc(&mas, vma);
anon_vma_interval_tree_post_update_vma(vma);
spin_unlock(&mm->page_table_lock);
@@ -2178,14 +2133,14 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
struct vm_area_struct *vma, struct vm_area_struct *prev,
struct vm_area_struct *next,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end, bool mm_wr_locked)
{
struct mmu_gather tlb;
lru_add_drain();
tlb_gather_mmu(&tlb, mm);
update_hiwater_rss(mm);
- unmap_vmas(&tlb, mt, vma, start, end);
+ unmap_vmas(&tlb, mt, vma, start, end, mm_wr_locked);
free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
next ? next->vm_start : USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb);
@@ -2194,13 +2149,19 @@ static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
/*
* __split_vma() bypasses sysctl_max_map_count checking. We use this where it
* has already been checked or doesn't make sense to fail.
+ * VMA Iterator will point to the end VMA.
*/
-int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, int new_below)
{
+ struct vma_prepare vp;
struct vm_area_struct *new;
int err;
- validate_mm_mt(mm);
+
+ validate_mm_mt(vma->vm_mm);
+
+ WARN_ON(vma->vm_start >= addr);
+ WARN_ON(vma->vm_end <= addr);
if (vma->vm_ops && vma->vm_ops->may_split) {
err = vma->vm_ops->may_split(vma, addr);
@@ -2212,16 +2173,20 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
if (!new)
return -ENOMEM;
- if (new_below)
+ err = -ENOMEM;
+ if (vma_iter_prealloc(vmi))
+ goto out_free_vma;
+
+ if (new_below) {
new->vm_end = addr;
- else {
+ } else {
new->vm_start = addr;
new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
}
err = vma_dup_policy(vma, new);
if (err)
- goto out_free_vma;
+ goto out_free_vmi;
err = anon_vma_clone(new, vma);
if (err)
@@ -2233,30 +2198,34 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);
- if (new_below)
- err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
- ((addr - new->vm_start) >> PAGE_SHIFT), new);
- else
- err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+ vma_adjust_trans_huge(vma, vma->vm_start, addr, 0);
+ init_vma_prep(&vp, vma);
+ vp.insert = new;
+ vma_prepare(&vp);
+
+ if (new_below) {
+ vma->vm_start = addr;
+ vma->vm_pgoff += (addr - new->vm_start) >> PAGE_SHIFT;
+ } else {
+ vma->vm_end = addr;
+ }
+
+ /* vma_complete stores the new vma */
+ vma_complete(&vp, vmi, vma->vm_mm);
/* Success. */
- if (!err)
- return 0;
+ if (new_below)
+ vma_next(vmi);
+ validate_mm_mt(vma->vm_mm);
+ return 0;
- /* Avoid vm accounting in close() operation */
- new->vm_start = new->vm_end;
- new->vm_pgoff = 0;
- /* Clean everything up if vma_adjust failed. */
- if (new->vm_ops && new->vm_ops->close)
- new->vm_ops->close(new);
- if (new->vm_file)
- fput(new->vm_file);
- unlink_anon_vmas(new);
- out_free_mpol:
+out_free_mpol:
mpol_put(vma_policy(new));
- out_free_vma:
+out_free_vmi:
+ vma_iter_free(vmi);
+out_free_vma:
vm_area_free(new);
- validate_mm_mt(mm);
+ validate_mm_mt(vma->vm_mm);
return err;
}
@@ -2264,13 +2233,13 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
* Split a vma into two pieces at address 'addr', a new vma is allocated
* either for the first part or the tail.
*/
-int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, int new_below)
{
- if (mm->map_count >= sysctl_max_map_count)
+ if (vma->vm_mm->map_count >= sysctl_max_map_count)
return -ENOMEM;
- return __split_vma(mm, vma, addr, new_below);
+ return __split_vma(vmi, vma, addr, new_below);
}
static inline int munmap_sidetree(struct vm_area_struct *vma,
@@ -2287,8 +2256,8 @@ static inline int munmap_sidetree(struct vm_area_struct *vma,
}
/*
- * do_mas_align_munmap() - munmap the aligned region from @start to @end.
- * @mas: The maple_state, ideally set up to alter the correct tree location.
+ * do_vmi_align_munmap() - munmap the aligned region from @start to @end.
+ * @vmi: The vma iterator
* @vma: The starting vm_area_struct
* @mm: The mm_struct
* @start: The aligned start address to munmap.
@@ -2299,7 +2268,7 @@ static inline int munmap_sidetree(struct vm_area_struct *vma,
* If @downgrade is true, check return code for potential release of the lock.
*/
static int
-do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
+do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct mm_struct *mm, unsigned long start,
unsigned long end, struct list_head *uf, bool downgrade)
{
@@ -2308,13 +2277,9 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
int count = 0;
int error = -ENOMEM;
MA_STATE(mas_detach, &mt_detach, 0, 0);
- mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN);
+ mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
mt_set_external_lock(&mt_detach, &mm->mmap_lock);
- if (mas_preallocate(mas, vma, GFP_KERNEL))
- return -ENOMEM;
-
- mas->last = end - 1;
/*
* If we need to split any vma, do it now to save pain later.
*
@@ -2334,45 +2299,27 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
goto map_count_exceeded;
- /*
- * mas_pause() is not needed since mas->index needs to be set
- * differently than vma->vm_end anyways.
- */
- error = __split_vma(mm, vma, start, 0);
+ error = __split_vma(vmi, vma, start, 0);
if (error)
goto start_split_failed;
- mas_set(mas, start);
- vma = mas_walk(mas);
+ vma = vma_iter_load(vmi);
}
- prev = mas_prev(mas, 0);
+ prev = vma_prev(vmi);
if (unlikely((!prev)))
- mas_set(mas, start);
+ vma_iter_set(vmi, start);
/*
* Detach a range of VMAs from the mm. Using next as a temp variable as
* it is always overwritten.
*/
- mas_for_each(mas, next, end - 1) {
+ for_each_vma_range(*vmi, next, end) {
/* Does it split the end? */
if (next->vm_end > end) {
- struct vm_area_struct *split;
-
- error = __split_vma(mm, next, end, 1);
+ error = __split_vma(vmi, next, end, 0);
if (error)
goto end_split_failed;
-
- mas_set(mas, end);
- split = mas_prev(mas, 0);
- error = munmap_sidetree(split, &mas_detach);
- if (error)
- goto munmap_sidetree_failed;
-
- count++;
- if (vma == next)
- vma = split;
- break;
}
error = munmap_sidetree(next, &mas_detach);
if (error)
@@ -2385,9 +2332,7 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
#endif
}
- if (!next)
- next = mas_next(mas, ULONG_MAX);
-
+ next = vma_next(vmi);
if (unlikely(uf)) {
/*
* If userfaultfd_unmap_prep returns an error the vmas
@@ -2404,8 +2349,6 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
goto userfaultfd_error;
}
- /* Point of no return */
- mas_set_range(mas, start, end - 1);
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
/* Make sure no VMAs are about to be lost. */
{
@@ -2413,19 +2356,23 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
struct vm_area_struct *vma_mas, *vma_test;
int test_count = 0;
+ vma_iter_set(vmi, start);
rcu_read_lock();
vma_test = mas_find(&test, end - 1);
- mas_for_each(mas, vma_mas, end - 1) {
+ for_each_vma_range(*vmi, vma_mas, end) {
BUG_ON(vma_mas != vma_test);
test_count++;
vma_test = mas_next(&test, end - 1);
}
rcu_read_unlock();
BUG_ON(count != test_count);
- mas_set_range(mas, start, end - 1);
}
#endif
- mas_store_prealloc(mas, NULL);
+ /* Point of no return */
+ vma_iter_set(vmi, start);
+ if (vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL))
+ return -ENOMEM;
+
mm->map_count -= count;
/*
* Do not downgrade mmap_lock if we are next to VM_GROWSDOWN or
@@ -2441,7 +2388,11 @@ do_mas_align_munmap(struct ma_state *mas, struct vm_area_struct *vma,
mmap_write_downgrade(mm);
}
- unmap_region(mm, &mt_detach, vma, prev, next, start, end);
+ /*
+ * We can free page tables without write-locking mmap_lock because VMAs
+ * were isolated before we downgraded mmap_lock.
+ */
+ unmap_region(mm, &mt_detach, vma, prev, next, start, end, !downgrade);
/* Statistics and freeing VMAs */
mas_set(&mas_detach, start);
remove_mt(mm, &mas_detach);
@@ -2457,13 +2408,12 @@ end_split_failed:
__mt_destroy(&mt_detach);
start_split_failed:
map_count_exceeded:
- mas_destroy(mas);
return error;
}
/*
- * do_mas_munmap() - munmap a given range.
- * @mas: The maple state
+ * do_vmi_munmap() - munmap a given range.
+ * @vmi: The vma iterator
* @mm: The mm_struct
* @start: The start address to munmap
* @len: The length of the range to munmap
@@ -2477,7 +2427,7 @@ map_count_exceeded:
*
* Returns: -EINVAL on failure, 1 on success and unlock, 0 otherwise.
*/
-int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
+int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
unsigned long start, size_t len, struct list_head *uf,
bool downgrade)
{
@@ -2495,11 +2445,11 @@ int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
arch_unmap(mm, start, end);
/* Find the first overlapping VMA */
- vma = mas_find(mas, end - 1);
+ vma = vma_find(vmi, end);
if (!vma)
return 0;
- return do_mas_align_munmap(mas, vma, mm, start, end, uf, downgrade);
+ return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade);
}
/* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls.
@@ -2511,9 +2461,9 @@ int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
struct list_head *uf)
{
- MA_STATE(mas, &mm->mm_mt, start, start);
+ VMA_ITERATOR(vmi, mm, start);
- return do_mas_munmap(&mas, mm, start, len, uf, false);
+ return do_vmi_munmap(&vmi, mm, start, len, uf, false);
}
unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2529,7 +2479,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long merge_start = addr, merge_end = end;
pgoff_t vm_pgoff;
int error;
- MA_STATE(mas, &mm->mm_mt, addr, end - 1);
+ VMA_ITERATOR(vmi, mm, addr);
/* Check against address space limit. */
if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
@@ -2547,7 +2497,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
}
/* Unmap any existing mapping in the area */
- if (do_mas_munmap(&mas, mm, addr, len, uf, false))
+ if (do_vmi_munmap(&vmi, mm, addr, len, uf, false))
return -ENOMEM;
/*
@@ -2560,8 +2510,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vm_flags |= VM_ACCOUNT;
}
- next = mas_next(&mas, ULONG_MAX);
- prev = mas_prev(&mas, 0);
+ next = vma_next(&vmi);
+ prev = vma_prev(&vmi);
if (vm_flags & VM_SPECIAL)
goto cannot_expand;
@@ -2589,13 +2539,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
/* Actually expand, if possible */
if (vma &&
- !vma_expand(&mas, vma, merge_start, merge_end, vm_pgoff, next)) {
+ !vma_expand(&vmi, vma, merge_start, merge_end, vm_pgoff, next)) {
khugepaged_enter_vma(vma, vm_flags);
goto expanded;
}
- mas.index = addr;
- mas.last = end - 1;
cannot_expand:
/*
* Determine the object being mapped and call the appropriate
@@ -2608,9 +2556,10 @@ cannot_expand:
goto unacct_error;
}
+ vma_iter_set(&vmi, addr);
vma->vm_start = addr;
vma->vm_end = end;
- vma->vm_flags = vm_flags;
+ vm_flags_init(vma, vm_flags);
vma->vm_page_prot = vm_get_page_prot(vm_flags);
vma->vm_pgoff = pgoff;
@@ -2630,19 +2579,20 @@ cannot_expand:
* Expansion is handled above, merging is handled below.
* Drivers should not alter the address of the VMA.
*/
- if (WARN_ON((addr != vma->vm_start))) {
- error = -EINVAL;
+ error = -EINVAL;
+ if (WARN_ON((addr != vma->vm_start)))
goto close_and_free_vma;
- }
- mas_reset(&mas);
+ vma_iter_set(&vmi, addr);
/*
* If vm_flags changed after call_mmap(), we should try merge
* vma again as we may succeed this time.
*/
if (unlikely(vm_flags != vma->vm_flags && prev)) {
- merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
- NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
+ merge = vma_merge(&vmi, mm, prev, vma->vm_start,
+ vma->vm_end, vma->vm_flags, NULL,
+ vma->vm_file, vma->vm_pgoff, NULL,
+ NULL_VM_UFFD_CTX, NULL);
if (merge) {
/*
* ->mmap() can change vma->vm_file and fput
@@ -2669,31 +2619,24 @@ cannot_expand:
vma_set_anonymous(vma);
}
- /* Allow architectures to sanity-check the vm_flags */
- if (!arch_validate_flags(vma->vm_flags)) {
- error = -EINVAL;
- if (file)
- goto close_and_free_vma;
- else if (vma->vm_file)
- goto unmap_and_free_vma;
- else
- goto free_vma;
+ if (map_deny_write_exec(vma, vma->vm_flags)) {
+ error = -EACCES;
+ goto close_and_free_vma;
}
- if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
- error = -ENOMEM;
- if (file)
- goto close_and_free_vma;
- else if (vma->vm_file)
- goto unmap_and_free_vma;
- else
- goto free_vma;
- }
+ /* Allow architectures to sanity-check the vm_flags */
+ error = -EINVAL;
+ if (!arch_validate_flags(vma->vm_flags))
+ goto close_and_free_vma;
+
+ error = -ENOMEM;
+ if (vma_iter_prealloc(&vmi))
+ goto close_and_free_vma;
if (vma->vm_file)
i_mmap_lock_write(vma->vm_file->f_mapping);
- vma_mas_store(vma, &mas);
+ vma_iter_store(&vmi, vma);
mm->map_count++;
if (vma->vm_file) {
if (vma->vm_flags & VM_SHARED)
@@ -2724,7 +2667,7 @@ expanded:
if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
is_vm_hugetlb_page(vma) ||
vma == get_gate_vma(current->mm))
- vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
+ vm_flags_clear(vma, VM_LOCKED_MASK);
else
mm->locked_vm += (len >> PAGE_SHIFT);
}
@@ -2739,7 +2682,7 @@ expanded:
* then new mapped in-place (which must be aimed as
* a completely new data area).
*/
- vma->vm_flags |= VM_SOFTDIRTY;
+ vm_flags_set(vma, VM_SOFTDIRTY);
vma_set_page_prot(vma);
@@ -2747,14 +2690,18 @@ expanded:
return addr;
close_and_free_vma:
- if (vma->vm_ops && vma->vm_ops->close)
+ if (file && vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
+
+ if (file || vma->vm_file) {
unmap_and_free_vma:
- fput(vma->vm_file);
- vma->vm_file = NULL;
+ fput(vma->vm_file);
+ vma->vm_file = NULL;
- /* Undo any partial mapping done by a device driver. */
- unmap_region(mm, mas.tree, vma, prev, next, vma->vm_start, vma->vm_end);
+ /* Undo any partial mapping done by a device driver. */
+ unmap_region(mm, &mm->mm_mt, vma, prev, next, vma->vm_start,
+ vma->vm_end, true);
+ }
if (file && (vm_flags & VM_SHARED))
mapping_unmap_writable(file->f_mapping);
free_vma:
@@ -2771,12 +2718,12 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
int ret;
struct mm_struct *mm = current->mm;
LIST_HEAD(uf);
- MA_STATE(mas, &mm->mm_mt, start, start);
+ VMA_ITERATOR(vmi, mm, start);
if (mmap_write_lock_killable(mm))
return -EINTR;
- ret = do_mas_munmap(&mas, mm, start, len, &uf, downgrade);
+ ret = do_vmi_munmap(&vmi, mm, start, len, &uf, downgrade);
/*
* Returning 1 indicates mmap_lock is downgraded.
* But 1 is not legal return value of vm_munmap() and munmap(), reset
@@ -2889,33 +2836,34 @@ out:
}
/*
- * brk_munmap() - Unmap a parital vma.
- * @mas: The maple tree state.
- * @vma: The vma to be modified
- * @newbrk: the start of the address to unmap
- * @oldbrk: The end of the address to unmap
+ * do_vma_munmap() - Unmap a full or partial vma.
+ * @vmi: The vma iterator pointing at the vma
+ * @vma: The first vma to be munmapped
+ * @start: the start of the address to unmap
+ * @end: The end of the address to unmap
* @uf: The userfaultfd list_head
+ * @downgrade: Attempt to downgrade or not
*
- * Returns: 1 on success.
- * unmaps a partial VMA mapping. Does not handle alignment, downgrades lock if
- * possible.
+ * Returns: 0 on success and not downgraded, 1 on success and downgraded.
+ * unmaps a VMA mapping when the vma iterator is already in position.
+ * Does not handle alignment.
*/
-static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
- unsigned long newbrk, unsigned long oldbrk,
- struct list_head *uf)
+int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ struct list_head *uf, bool downgrade)
{
struct mm_struct *mm = vma->vm_mm;
int ret;
- arch_unmap(mm, newbrk, oldbrk);
- ret = do_mas_align_munmap(mas, vma, mm, newbrk, oldbrk, uf, true);
+ arch_unmap(mm, start, end);
+ ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade);
validate_mm_mt(mm);
return ret;
}
/*
* do_brk_flags() - Increase the brk vma if the flags match.
- * @mas: The maple tree state.
+ * @vmi: The vma iterator
* @addr: The start address
* @len: The length of the increase
* @vma: The vma,
@@ -2925,10 +2873,11 @@ static int do_brk_munmap(struct ma_state *mas, struct vm_area_struct *vma,
* do not match then create a new anonymous VMA. Eventually we may be able to
* do some brk-specific accounting here.
*/
-static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
+static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, unsigned long len, unsigned long flags)
{
struct mm_struct *mm = current->mm;
+ struct vma_prepare vp;
validate_mm_mt(mm);
/*
@@ -2952,23 +2901,17 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
if (vma && vma->vm_end == addr && !vma_policy(vma) &&
can_vma_merge_after(vma, flags, NULL, NULL,
addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
- mas_set_range(mas, vma->vm_start, addr + len - 1);
- if (mas_preallocate(mas, vma, GFP_KERNEL))
+ if (vma_iter_prealloc(vmi))
goto unacct_fail;
vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
- if (vma->anon_vma) {
- anon_vma_lock_write(vma->anon_vma);
- anon_vma_interval_tree_pre_update_vma(vma);
- }
+ init_vma_prep(&vp, vma);
+ vma_prepare(&vp);
vma->vm_end = addr + len;
- vma->vm_flags |= VM_SOFTDIRTY;
- mas_store_prealloc(mas, vma);
+ vm_flags_set(vma, VM_SOFTDIRTY);
+ vma_iter_store(vmi, vma);
- if (vma->anon_vma) {
- anon_vma_interval_tree_post_update_vma(vma);
- anon_vma_unlock_write(vma->anon_vma);
- }
+ vma_complete(&vp, vmi, mm);
khugepaged_enter_vma(vma, flags);
goto out;
}
@@ -2982,10 +2925,9 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_pgoff = addr >> PAGE_SHIFT;
- vma->vm_flags = flags;
+ vm_flags_init(vma, flags);
vma->vm_page_prot = vm_get_page_prot(flags);
- mas_set_range(mas, vma->vm_start, addr + len - 1);
- if (mas_store_gfp(mas, vma, GFP_KERNEL))
+ if (vma_iter_store_gfp(vmi, vma, GFP_KERNEL))
goto mas_store_fail;
mm->map_count++;
@@ -2995,7 +2937,7 @@ out:
mm->data_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
- vma->vm_flags |= VM_SOFTDIRTY;
+ vm_flags_set(vma, VM_SOFTDIRTY);
validate_mm(mm);
return 0;
@@ -3014,7 +2956,7 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
int ret;
bool populate;
LIST_HEAD(uf);
- MA_STATE(mas, &mm->mm_mt, addr, addr);
+ VMA_ITERATOR(vmi, mm, addr);
len = PAGE_ALIGN(request);
if (len < request)
@@ -3033,12 +2975,12 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
if (ret)
goto limits_failed;
- ret = do_mas_munmap(&mas, mm, addr, len, &uf, 0);
+ ret = do_vmi_munmap(&vmi, mm, addr, len, &uf, 0);
if (ret)
goto munmap_failed;
- vma = mas_prev(&mas, 0);
- ret = do_brk_flags(&mas, vma, addr, len, flags);
+ vma = vma_prev(&vmi);
+ ret = do_brk_flags(&vmi, vma, addr, len, flags);
populate = ((mm->def_flags & VM_LOCKED) != 0);
mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
@@ -3086,7 +3028,7 @@ void exit_mmap(struct mm_struct *mm)
tlb_gather_mmu_fullmm(&tlb, mm);
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
- unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX);
+ unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX, false);
mmap_read_unlock(mm);
/*
@@ -3095,6 +3037,7 @@ void exit_mmap(struct mm_struct *mm)
*/
set_bit(MMF_OOM_SKIP, &mm->flags);
mmap_write_lock(mm);
+ mt_clear_in_rcu(&mm->mm_mt);
free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS,
USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb);
@@ -3174,6 +3117,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma, *prev;
bool faulted_in_anon_vma = true;
+ VMA_ITERATOR(vmi, mm, addr);
validate_mm_mt(mm);
/*
@@ -3189,7 +3133,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
if (new_vma && new_vma->vm_start < addr + len)
return NULL; /* should never get here */
- new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
+ new_vma = vma_merge(&vmi, mm, prev, addr, addr + len, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
vma->vm_userfaultfd_ctx, anon_vma_name(vma));
if (new_vma) {
@@ -3394,8 +3338,8 @@ static struct vm_area_struct *__install_special_mapping(
vma->vm_start = addr;
vma->vm_end = addr + len;
- vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
- vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
+ vm_flags_init(vma, (vm_flags | mm->def_flags |
+ VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK);
vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
vma->vm_ops = ops;