diff options
| author | Liam R. Howlett <Liam.Howlett@oracle.com> | 2026-01-21 19:49:44 +0300 |
|---|---|---|
| committer | Andrew Morton <akpm@linux-foundation.org> | 2026-02-13 02:42:55 +0300 |
| commit | 0df5a8d3948da979b8ab811a692b34635e1b146d (patch) | |
| tree | 7372f0529280a14c79fa1c3af0fa7557d83b94c6 /mm | |
| parent | 5b6626a76a81fdf54abb33da0bbb061d830e9821 (diff) | |
| download | linux-0df5a8d3948da979b8ab811a692b34635e1b146d.tar.xz | |
mm/vma: use unmap_desc in exit_mmap() and vms_clear_ptes()
Convert vms_clear_ptes() to use unmap_desc to call unmap_vmas() instead of
the large argument list. The UNMAP_STATE() cannot be used because the vma
iterator in the vms does not point to the correct maple state
(mas_detach), and the tree_end will be set incorrectly. Setting up the
arguments manually avoids setting the struct up incorrectly and doing
extra work to get the correct pagetable range.
exit_mmap() also calls unmap_vmas() with many arguments. Using the
unmap_all_init() function to set the unmap descriptor for all vmas makes
this a bit easier to read.
Update to the vma test code is necessary to ensure testing continues to
function.
No functional changes intended.
Link: https://lkml.kernel.org/r/20260121164946.2093480-10-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Chris Li <chrisl@kernel.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: SeongJae Park <sj@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/internal.h | 3 | ||||
| -rw-r--r-- | mm/memory.c | 20 | ||||
| -rw-r--r-- | mm/mmap.c | 4 | ||||
| -rw-r--r-- | mm/vma.c | 27 | ||||
| -rw-r--r-- | mm/vma.h | 14 |
5 files changed, 50 insertions, 18 deletions
diff --git a/mm/internal.h b/mm/internal.h index 2a0e42e36b48..0f3ad8665d95 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -197,6 +197,9 @@ static inline void vma_close(struct vm_area_struct *vma) } } +/* unmap_vmas is in mm/memory.c */ +void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap); + #ifdef CONFIG_MMU static inline void get_anon_vma(struct anon_vma *anon_vma) diff --git a/mm/memory.c b/mm/memory.c index 6033cf6c93de..d68f8f082b1c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2144,11 +2144,7 @@ static void unmap_single_vma(struct mmu_gather *tlb, /** * unmap_vmas - unmap a range of memory covered by a list of vma's * @tlb: address of the caller's struct mmu_gather - * @mas: the maple state - * @vma: the starting vma - * @start_addr: virtual address at which to start unmapping - * @end_addr: virtual address at which to end unmapping - * @tree_end: The maximum index to check + * @unmap: The unmap_desc * * Unmap all pages in the vma list. * @@ -2161,10 +2157,9 @@ static void unmap_single_vma(struct mmu_gather *tlb, * ensure that any thus-far unmapped pages are flushed before unmap_vmas() * drops the lock and schedules. */ -void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, - struct vm_area_struct *vma, unsigned long start_addr, - unsigned long end_addr, unsigned long tree_end) +void unmap_vmas(struct mmu_gather *tlb, struct unmap_desc *unmap) { + struct vm_area_struct *vma; struct mmu_notifier_range range; struct zap_details details = { .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP, @@ -2172,16 +2167,17 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, .even_cows = true, }; + vma = unmap->first; mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, - start_addr, end_addr); + unmap->vma_start, unmap->vma_end); mmu_notifier_invalidate_range_start(&range); do { - unsigned long start = start_addr; - unsigned long end = end_addr; + unsigned long start = unmap->vma_start; + unsigned long end = unmap->vma_end; hugetlb_zap_begin(vma, &start, &end); unmap_single_vma(tlb, vma, start, end, &details); hugetlb_zap_end(vma, &details); - vma = mas_find(mas, tree_end - 1); + vma = mas_find(unmap->mas, unmap->tree_end - 1); } while (vma); mmu_notifier_invalidate_range_end(&range); } diff --git a/mm/mmap.c b/mm/mmap.c index 4500e61a0d5e..042b6b4b6ab8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1277,6 +1277,7 @@ void exit_mmap(struct mm_struct *mm) struct vm_area_struct *vma; unsigned long nr_accounted = 0; VMA_ITERATOR(vmi, mm, 0); + struct unmap_desc unmap; /* mm's last user has gone, and its about to be pulled down */ mmu_notifier_release(mm); @@ -1292,11 +1293,12 @@ void exit_mmap(struct mm_struct *mm) goto destroy; } + unmap_all_init(&unmap, &vmi, vma); flush_cache_mm(mm); tlb_gather_mmu_fullmm(&tlb, mm); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */ - unmap_vmas(&tlb, &vmi.mas, vma, 0, ULONG_MAX, ULONG_MAX); + unmap_vmas(&tlb, &unmap); mmap_read_unlock(mm); /* @@ -480,8 +480,7 @@ void unmap_region(struct unmap_desc *unmap) tlb_gather_mmu(&tlb, mm); update_hiwater_rss(mm); - unmap_vmas(&tlb, mas, unmap->first, unmap->vma_start, unmap->vma_end, - unmap->vma_end); + unmap_vmas(&tlb, unmap); mas_set(mas, unmap->tree_reset); free_pgtables(&tlb, mas, unmap->first, unmap->pg_start, unmap->pg_end, unmap->tree_end, unmap->mm_wr_locked); @@ -1257,6 +1256,26 @@ static inline void vms_clear_ptes(struct vma_munmap_struct *vms, struct ma_state *mas_detach, bool mm_wr_locked) { struct mmu_gather tlb; + struct unmap_desc unmap = { + .mas = mas_detach, + .first = vms->vma, + /* start and end may be different if there is no prev or next vma. */ + .pg_start = vms->unmap_start, + .pg_end = vms->unmap_end, + .vma_start = vms->start, + .vma_end = vms->end, + /* + * The tree limits and reset differ from the normal case since it's a + * side-tree + */ + .tree_reset = 1, + .tree_end = vms->vma_count, + /* + * We can free page tables without write-locking mmap_lock because VMAs + * were isolated before we downgraded mmap_lock. + */ + .mm_wr_locked = mm_wr_locked, + }; if (!vms->clear_ptes) /* Nothing to do */ return; @@ -1268,9 +1287,7 @@ static inline void vms_clear_ptes(struct vma_munmap_struct *vms, mas_set(mas_detach, 1); tlb_gather_mmu(&tlb, vms->vma->vm_mm); update_hiwater_rss(vms->vma->vm_mm); - unmap_vmas(&tlb, mas_detach, vms->vma, vms->start, vms->end, - vms->vma_count); - + unmap_vmas(&tlb, &unmap); mas_set(mas_detach, 1); /* start and end may be different if there is no prev or next vma. */ free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start, @@ -167,6 +167,20 @@ struct unmap_desc { bool mm_wr_locked; /* If the mmap write lock is held */ }; +static inline void unmap_all_init(struct unmap_desc *unmap, + struct vma_iterator *vmi, struct vm_area_struct *vma) +{ + unmap->mas = &vmi->mas; + unmap->first = vma; + unmap->pg_start = FIRST_USER_ADDRESS; + unmap->pg_end = USER_PGTABLES_CEILING; + unmap->vma_start = 0; + unmap->vma_end = ULONG_MAX; + unmap->tree_end = ULONG_MAX; + unmap->tree_reset = vma->vm_end; + unmap->mm_wr_locked = false; +} + #define UNMAP_STATE(name, _vmi, _vma, _vma_start, _vma_end, _prev, _next) \ struct unmap_desc name = { \ .mas = &(_vmi)->mas, \ |
