diff options
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 52 |
1 files changed, 38 insertions, 14 deletions
diff --git a/mm/mlock.c b/mm/mlock.c index 67ba6da7d0e3..192e6eebe4f2 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -133,7 +133,10 @@ static void __munlock_isolation_failed(struct page *page) /** * munlock_vma_page - munlock a vma page - * @page - page to be unlocked + * @page - page to be unlocked, either a normal page or THP page head + * + * returns the size of the page as a page mask (0 for normal page, + * HPAGE_PMD_NR - 1 for THP head page) * * called from munlock()/munmap() path with page supposedly on the LRU. * When we munlock a page, because the vma where we found the page is being @@ -148,21 +151,30 @@ static void __munlock_isolation_failed(struct page *page) */ unsigned int munlock_vma_page(struct page *page) { - unsigned int page_mask = 0; + unsigned int nr_pages; BUG_ON(!PageLocked(page)); if (TestClearPageMlocked(page)) { - unsigned int nr_pages = hpage_nr_pages(page); + nr_pages = hpage_nr_pages(page); mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); - page_mask = nr_pages - 1; if (!isolate_lru_page(page)) __munlock_isolated_page(page); else __munlock_isolation_failed(page); + } else { + nr_pages = hpage_nr_pages(page); } - return page_mask; + /* + * Regardless of the original PageMlocked flag, we determine nr_pages + * after touching the flag. This leaves a possible race with a THP page + * split, such that a whole THP page was munlocked, but nr_pages == 1. + * Returning a smaller mask due to that is OK, the worst that can + * happen is subsequent useless scanning of the former tail pages. + * The NR_MLOCK accounting can however become broken. + */ + return nr_pages - 1; } /** @@ -286,10 +298,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) { int i; int nr = pagevec_count(pvec); - int delta_munlocked = -nr; + int delta_munlocked; struct pagevec pvec_putback; int pgrescued = 0; + pagevec_init(&pvec_putback, 0); + /* Phase 1: page isolation */ spin_lock_irq(&zone->lru_lock); for (i = 0; i < nr; i++) { @@ -318,18 +332,21 @@ skip_munlock: /* * We won't be munlocking this page in the next phase * but we still need to release the follow_page_mask() - * pin. + * pin. We cannot do it under lru_lock however. If it's + * the last pin, __page_cache_release would deadlock. */ + pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; - put_page(page); - delta_munlocked++; } } + delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); spin_unlock_irq(&zone->lru_lock); + /* Now we can release pins of pages that we are not munlocking */ + pagevec_release(&pvec_putback); + /* Phase 2: page munlock */ - pagevec_init(&pvec_putback, 0); for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; @@ -379,10 +396,14 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, /* * Initialize pte walk starting at the already pinned page where we - * are sure that there is a pte. + * are sure that there is a pte, as it was pinned under the same + * mmap_sem write op. */ pte = get_locked_pte(vma->vm_mm, start, &ptl); - end = min(end, pmd_addr_end(start, end)); + /* Make sure we do not cross the page table boundary */ + end = pgd_addr_end(start, end); + end = pud_addr_end(start, end); + end = pmd_addr_end(start, end); /* The page next to the pinned page is the first we will try to get */ start += PAGE_SIZE; @@ -436,7 +457,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, while (start < end) { struct page *page = NULL; - unsigned int page_mask, page_increm; + unsigned int page_mask; + unsigned long page_increm; struct pagevec pvec; struct zone *zone; int zoneid; @@ -486,7 +508,9 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, goto next; } } - page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); + /* It's a bug to munlock in the middle of a THP page */ + VM_BUG_ON((start >> PAGE_SHIFT) & page_mask); + page_increm = 1 + page_mask; start += page_increm * PAGE_SIZE; next: cond_resched(); |