summaryrefslogtreecommitdiff
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c207
1 files changed, 107 insertions, 100 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 242368592ea7..257cba79a96d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1539,9 +1539,9 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
*
* returns 0 on success, -ve errno on failure.
*/
-int __isolate_lru_page(struct page *page, isolate_mode_t mode)
+int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
{
- int ret = -EINVAL;
+ int ret = -EBUSY;
/* Only take pages on the LRU. */
if (!PageLRU(page))
@@ -1551,8 +1551,6 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
return ret;
- ret = -EBUSY;
-
/*
* To minimise LRU disruption, the caller can indicate that it only
* wants to isolate pages it will be able to operate on without
@@ -1593,20 +1591,9 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
return ret;
- if (likely(get_page_unless_zero(page))) {
- /*
- * Be careful not to clear PageLRU until after we're
- * sure the page is not being freed elsewhere -- the
- * page release code relies on it.
- */
- ClearPageLRU(page);
- ret = 0;
- }
-
- return ret;
+ return 0;
}
-
/*
* Update LRU sizes after isolating pages. The LRU size updates must
* be complete before mem_cgroup_update_lru_size due to a sanity check.
@@ -1626,14 +1613,16 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
}
/**
- * pgdat->lru_lock is heavily contended. Some of the functions that
+ * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
+ *
+ * lruvec->lru_lock is heavily contended. Some of the functions that
* shrink the lists perform better by taking out a batch of pages
* and working on them outside the LRU lock.
*
* For pagecache intensive workloads, this function is the hottest
* spot in the kernel (apart from copy_*_user functions).
*
- * Appropriate locks must be held before calling this function.
+ * Lru_lock must be held before calling this function.
*
* @nr_to_scan: The number of eligible pages to look through on the list.
* @lruvec: The LRU vector to pull pages from.
@@ -1666,8 +1655,6 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
- VM_BUG_ON_PAGE(!PageLRU(page), page);
-
nr_pages = compound_nr(page);
total_scan += nr_pages;
@@ -1688,20 +1675,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
* only when the page is being freed somewhere else.
*/
scan += nr_pages;
- switch (__isolate_lru_page(page, mode)) {
+ switch (__isolate_lru_page_prepare(page, mode)) {
case 0:
+ /*
+ * Be careful not to clear PageLRU until after we're
+ * sure the page is not being freed elsewhere -- the
+ * page release code relies on it.
+ */
+ if (unlikely(!get_page_unless_zero(page)))
+ goto busy;
+
+ if (!TestClearPageLRU(page)) {
+ /*
+ * This page may in other isolation path,
+ * but we still hold lru_lock.
+ */
+ put_page(page);
+ goto busy;
+ }
+
nr_taken += nr_pages;
nr_zone_taken[page_zonenum(page)] += nr_pages;
list_move(&page->lru, dst);
break;
- case -EBUSY:
+ default:
+busy:
/* else it is being freed elsewhere */
list_move(&page->lru, src);
- continue;
-
- default:
- BUG();
}
}
@@ -1764,21 +1765,16 @@ int isolate_lru_page(struct page *page)
VM_BUG_ON_PAGE(!page_count(page), page);
WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
- if (PageLRU(page)) {
- pg_data_t *pgdat = page_pgdat(page);
+ if (TestClearPageLRU(page)) {
struct lruvec *lruvec;
- spin_lock_irq(&pgdat->lru_lock);
- lruvec = mem_cgroup_page_lruvec(page, pgdat);
- if (PageLRU(page)) {
- int lru = page_lru(page);
- get_page(page);
- ClearPageLRU(page);
- del_page_from_lru_list(page, lruvec, lru);
- ret = 0;
- }
- spin_unlock_irq(&pgdat->lru_lock);
+ get_page(page);
+ lruvec = lock_page_lruvec_irq(page);
+ del_page_from_lru_list(page, lruvec, page_lru(page));
+ unlock_page_lruvec_irq(lruvec);
+ ret = 0;
}
+
return ret;
}
@@ -1820,29 +1816,14 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
}
/*
- * This moves pages from @list to corresponding LRU list.
- *
- * We move them the other way if the page is referenced by one or more
- * processes, from rmap.
- *
- * If the pages are mostly unmapped, the processing is fast and it is
- * appropriate to hold zone_lru_lock across the whole operation. But if
- * the pages are mapped, the processing is slow (page_referenced()) so we
- * should drop zone_lru_lock around each page. It's impossible to balance
- * this, so instead we remove the pages from the LRU while processing them.
- * It is safe to rely on PG_active against the non-LRU pages in here because
- * nobody will play with that bit on a non-LRU page.
- *
- * The downside is that we have to touch page->_refcount against each page.
- * But we had to alter page->flags anyway.
+ * move_pages_to_lru() moves pages from private @list to appropriate LRU list.
+ * On return, @list is reused as a list of pages to be freed by the caller.
*
* Returns the number of pages moved to the given lruvec.
*/
-
static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
struct list_head *list)
{
- struct pglist_data *pgdat = lruvec_pgdat(lruvec);
int nr_pages, nr_moved = 0;
LIST_HEAD(pages_to_free);
struct page *page;
@@ -1851,38 +1832,54 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
while (!list_empty(list)) {
page = lru_to_page(list);
VM_BUG_ON_PAGE(PageLRU(page), page);
+ list_del(&page->lru);
if (unlikely(!page_evictable(page))) {
- list_del(&page->lru);
- spin_unlock_irq(&pgdat->lru_lock);
+ spin_unlock_irq(&lruvec->lru_lock);
putback_lru_page(page);
- spin_lock_irq(&pgdat->lru_lock);
+ spin_lock_irq(&lruvec->lru_lock);
continue;
}
- lruvec = mem_cgroup_page_lruvec(page, pgdat);
+ /*
+ * The SetPageLRU needs to be kept here for list integrity.
+ * Otherwise:
+ * #0 move_pages_to_lru #1 release_pages
+ * if !put_page_testzero
+ * if (put_page_testzero())
+ * !PageLRU //skip lru_lock
+ * SetPageLRU()
+ * list_add(&page->lru,)
+ * list_add(&page->lru,)
+ */
SetPageLRU(page);
- lru = page_lru(page);
- nr_pages = thp_nr_pages(page);
- update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
- list_move(&page->lru, &lruvec->lists[lru]);
-
- if (put_page_testzero(page)) {
+ if (unlikely(put_page_testzero(page))) {
__ClearPageLRU(page);
__ClearPageActive(page);
- del_page_from_lru_list(page, lruvec, lru);
if (unlikely(PageCompound(page))) {
- spin_unlock_irq(&pgdat->lru_lock);
+ spin_unlock_irq(&lruvec->lru_lock);
destroy_compound_page(page);
- spin_lock_irq(&pgdat->lru_lock);
+ spin_lock_irq(&lruvec->lru_lock);
} else
list_add(&page->lru, &pages_to_free);
- } else {
- nr_moved += nr_pages;
- if (PageActive(page))
- workingset_age_nonresident(lruvec, nr_pages);
+
+ continue;
}
+
+ /*
+ * All pages were isolated from the same lruvec (and isolation
+ * inhibits memcg migration).
+ */
+ VM_BUG_ON_PAGE(!lruvec_holds_page_lru_lock(page, lruvec), page);
+ lru = page_lru(page);
+ nr_pages = thp_nr_pages(page);
+
+ update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
+ list_add(&page->lru, &lruvec->lists[lru]);
+ nr_moved += nr_pages;
+ if (PageActive(page))
+ workingset_age_nonresident(lruvec, nr_pages);
}
/*
@@ -1939,7 +1936,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
lru_add_drain();
- spin_lock_irq(&pgdat->lru_lock);
+ spin_lock_irq(&lruvec->lru_lock);
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
&nr_scanned, sc, lru);
@@ -1951,27 +1948,25 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
__count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
__count_vm_events(PGSCAN_ANON + file, nr_scanned);
- spin_unlock_irq(&pgdat->lru_lock);
+ spin_unlock_irq(&lruvec->lru_lock);
if (nr_taken == 0)
return 0;
nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, &stat, false);
- spin_lock_irq(&pgdat->lru_lock);
-
+ spin_lock_irq(&lruvec->lru_lock);
move_pages_to_lru(lruvec, &page_list);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
- lru_note_cost(lruvec, file, stat.nr_pageout);
item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
if (!cgroup_reclaim(sc))
__count_vm_events(item, nr_reclaimed);
__count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
__count_vm_events(PGSTEAL_ANON + file, nr_reclaimed);
+ spin_unlock_irq(&lruvec->lru_lock);
- spin_unlock_irq(&pgdat->lru_lock);
-
+ lru_note_cost(lruvec, file, stat.nr_pageout);
mem_cgroup_uncharge_list(&page_list);
free_unref_page_list(&page_list);
@@ -2003,6 +1998,23 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
return nr_reclaimed;
}
+/*
+ * shrink_active_list() moves pages from the active LRU to the inactive LRU.
+ *
+ * We move them the other way if the page is referenced by one or more
+ * processes.
+ *
+ * If the pages are mostly unmapped, the processing is fast and it is
+ * appropriate to hold lru_lock across the whole operation. But if
+ * the pages are mapped, the processing is slow (page_referenced()), so
+ * we should drop lru_lock around each page. It's impossible to balance
+ * this, so instead we remove the pages from the LRU while processing them.
+ * It is safe to rely on PG_active against the non-LRU pages in here because
+ * nobody will play with that bit on a non-LRU page.
+ *
+ * The downside is that we have to touch page->_refcount against each page.
+ * But we had to alter page->flags anyway.
+ */
static void shrink_active_list(unsigned long nr_to_scan,
struct lruvec *lruvec,
struct scan_control *sc,
@@ -2022,7 +2034,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
lru_add_drain();
- spin_lock_irq(&pgdat->lru_lock);
+ spin_lock_irq(&lruvec->lru_lock);
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
&nr_scanned, sc, lru);
@@ -2033,7 +2045,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
__count_vm_events(PGREFILL, nr_scanned);
__count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned);
- spin_unlock_irq(&pgdat->lru_lock);
+ spin_unlock_irq(&lruvec->lru_lock);
while (!list_empty(&l_hold)) {
cond_resched();
@@ -2079,7 +2091,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
/*
* Move pages back to the lru list.
*/
- spin_lock_irq(&pgdat->lru_lock);
+ spin_lock_irq(&lruvec->lru_lock);
nr_activate = move_pages_to_lru(lruvec, &l_active);
nr_deactivate = move_pages_to_lru(lruvec, &l_inactive);
@@ -2090,7 +2102,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
__count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, nr_deactivate);
__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
- spin_unlock_irq(&pgdat->lru_lock);
+ spin_unlock_irq(&lruvec->lru_lock);
mem_cgroup_uncharge_list(&l_active);
free_unref_page_list(&l_active);
@@ -2678,10 +2690,10 @@ again:
/*
* Determine the scan balance between anon and file LRUs.
*/
- spin_lock_irq(&pgdat->lru_lock);
+ spin_lock_irq(&target_lruvec->lru_lock);
sc->anon_cost = target_lruvec->anon_cost;
sc->file_cost = target_lruvec->file_cost;
- spin_unlock_irq(&pgdat->lru_lock);
+ spin_unlock_irq(&target_lruvec->lru_lock);
/*
* Target desirable inactive:active list ratios for the anon
@@ -4257,15 +4269,13 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
*/
void check_move_unevictable_pages(struct pagevec *pvec)
{
- struct lruvec *lruvec;
- struct pglist_data *pgdat = NULL;
+ struct lruvec *lruvec = NULL;
int pgscanned = 0;
int pgrescued = 0;
int i;
for (i = 0; i < pvec->nr; i++) {
struct page *page = pvec->pages[i];
- struct pglist_data *pagepgdat = page_pgdat(page);
int nr_pages;
if (PageTransTail(page))
@@ -4274,18 +4284,12 @@ void check_move_unevictable_pages(struct pagevec *pvec)
nr_pages = thp_nr_pages(page);
pgscanned += nr_pages;
- if (pagepgdat != pgdat) {
- if (pgdat)
- spin_unlock_irq(&pgdat->lru_lock);
- pgdat = pagepgdat;
- spin_lock_irq(&pgdat->lru_lock);
- }
- lruvec = mem_cgroup_page_lruvec(page, pgdat);
-
- if (!PageLRU(page) || !PageUnevictable(page))
+ /* block memcg migration during page moving between lru */
+ if (!TestClearPageLRU(page))
continue;
- if (page_evictable(page)) {
+ lruvec = relock_page_lruvec_irq(page, lruvec);
+ if (page_evictable(page) && PageUnevictable(page)) {
enum lru_list lru = page_lru_base_type(page);
VM_BUG_ON_PAGE(PageActive(page), page);
@@ -4294,12 +4298,15 @@ void check_move_unevictable_pages(struct pagevec *pvec)
add_page_to_lru_list(page, lruvec, lru);
pgrescued += nr_pages;
}
+ SetPageLRU(page);
}
- if (pgdat) {
+ if (lruvec) {
__count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
__count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
- spin_unlock_irq(&pgdat->lru_lock);
+ unlock_page_lruvec_irq(lruvec);
+ } else if (pgscanned) {
+ count_vm_events(UNEVICTABLE_PGSCANNED, pgscanned);
}
}
EXPORT_SYMBOL_GPL(check_move_unevictable_pages);