diff options
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 95 |
1 files changed, 72 insertions, 23 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index 06bdfab83b58..086546ac5766 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -305,6 +305,62 @@ void munlock_folio(struct folio *folio) local_unlock(&mlock_fbatch.lock); } +static inline unsigned int folio_mlock_step(struct folio *folio, + pte_t *pte, unsigned long addr, unsigned long end) +{ + unsigned int count, i, nr = folio_nr_pages(folio); + unsigned long pfn = folio_pfn(folio); + pte_t ptent = ptep_get(pte); + + if (!folio_test_large(folio)) + return 1; + + count = pfn + nr - pte_pfn(ptent); + count = min_t(unsigned int, count, (end - addr) >> PAGE_SHIFT); + + for (i = 0; i < count; i++, pte++) { + pte_t entry = ptep_get(pte); + + if (!pte_present(entry)) + break; + if (pte_pfn(entry) - pfn >= nr) + break; + } + + return i; +} + +static inline bool allow_mlock_munlock(struct folio *folio, + struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned int step) +{ + /* + * For unlock, allow munlock large folio which is partially + * mapped to VMA. As it's possible that large folio is + * mlocked and VMA is split later. + * + * During memory pressure, such kind of large folio can + * be split. And the pages are not in VM_LOCKed VMA + * can be reclaimed. + */ + if (!(vma->vm_flags & VM_LOCKED)) + return true; + + /* folio_within_range() cannot take KSM, but any small folio is OK */ + if (!folio_test_large(folio)) + return true; + + /* folio not in range [start, end), skip mlock */ + if (!folio_within_range(folio, vma, start, end)) + return false; + + /* folio is not fully mapped, skip mlock */ + if (step != folio_nr_pages(folio)) + return false; + + return true; +} + static int mlock_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) @@ -314,6 +370,8 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr, pte_t *start_pte, *pte; pte_t ptent; struct folio *folio; + unsigned int step = 1; + unsigned long start = addr; ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { @@ -334,6 +392,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr, walk->action = ACTION_AGAIN; return 0; } + for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) { ptent = ptep_get(pte); if (!pte_present(ptent)) @@ -341,12 +400,19 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr, folio = vm_normal_folio(vma, addr, ptent); if (!folio || folio_is_zone_device(folio)) continue; - if (folio_test_large(folio)) - continue; + + step = folio_mlock_step(folio, pte, addr, end); + if (!allow_mlock_munlock(folio, vma, start, end, step)) + goto next_entry; + if (vma->vm_flags & VM_LOCKED) mlock_folio(folio); else munlock_folio(folio); + +next_entry: + pte += step - 1; + addr += (step - 1) << PAGE_SHIFT; } pte_unmap(start_pte); out: @@ -414,7 +480,6 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long end, vm_flags_t newflags) { struct mm_struct *mm = vma->vm_mm; - pgoff_t pgoff; int nr_pages; int ret = 0; vm_flags_t oldflags = vma->vm_flags; @@ -425,28 +490,12 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ goto out; - pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); - *prev = vma_merge(vmi, mm, *prev, start, end, newflags, - vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); - if (*prev) { - vma = *prev; - goto success; - } - - if (start != vma->vm_start) { - ret = split_vma(vmi, vma, start, 1); - if (ret) - goto out; - } - - if (end != vma->vm_end) { - ret = split_vma(vmi, vma, end, 0); - if (ret) - goto out; + vma = vma_modify_flags(vmi, *prev, vma, start, end, newflags); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto out; } -success: /* * Keep track of amount of locked VM. */ |