diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 123 |
1 files changed, 95 insertions, 28 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index de1f15969e27..73fc517c08d2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -496,11 +496,25 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) return pmd; } -static inline struct list_head *page_deferred_list(struct page *page) +#ifdef CONFIG_MEMCG +static inline struct deferred_split *get_deferred_split_queue(struct page *page) { - /* ->lru in the tail pages is occupied by compound_head. */ - return &page[2].deferred_list; + struct mem_cgroup *memcg = compound_head(page)->mem_cgroup; + struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + + if (memcg) + return &memcg->deferred_split_queue; + else + return &pgdat->deferred_split_queue; +} +#else +static inline struct deferred_split *get_deferred_split_queue(struct page *page) +{ + struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); + + return &pgdat->deferred_split_queue; } +#endif void prep_transhuge_page(struct page *page) { @@ -2497,6 +2511,8 @@ static void __split_huge_page(struct page *page, struct list_head *list, struct page *head = compound_head(page); pg_data_t *pgdat = page_pgdat(head); struct lruvec *lruvec; + struct address_space *swap_cache = NULL; + unsigned long offset = 0; int i; lruvec = mem_cgroup_page_lruvec(head, pgdat); @@ -2504,6 +2520,14 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* complete memcg works before add pages to LRU */ mem_cgroup_split_huge_fixup(head); + if (PageAnon(head) && PageSwapCache(head)) { + swp_entry_t entry = { .val = page_private(head) }; + + offset = swp_offset(entry); + swap_cache = swap_address_space(entry); + xa_lock(&swap_cache->i_pages); + } + for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond i_size: drop them from page cache */ @@ -2513,6 +2537,12 @@ static void __split_huge_page(struct page *page, struct list_head *list, if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head)) shmem_uncharge(head->mapping->host, 1); put_page(head + i); + } else if (!PageAnon(page)) { + __xa_store(&head->mapping->i_pages, head[i].index, + head + i, 0); + } else if (swap_cache) { + __xa_store(&swap_cache->i_pages, offset + i, + head + i, 0); } } @@ -2523,10 +2553,12 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* See comment in __split_huge_page_tail() */ if (PageAnon(head)) { /* Additional pin to swap cache */ - if (PageSwapCache(head)) + if (PageSwapCache(head)) { page_ref_add(head, 2); - else + xa_unlock(&swap_cache->i_pages); + } else { page_ref_inc(head); + } } else { /* Additional pin to page cache */ page_ref_add(head, 2); @@ -2673,6 +2705,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) { struct page *head = compound_head(page); struct pglist_data *pgdata = NODE_DATA(page_to_nid(head)); + struct deferred_split *ds_queue = get_deferred_split_queue(page); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; int count, mapcount, extra_pins, ret; @@ -2759,17 +2792,17 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } /* Prevent deferred_split_scan() touching ->_refcount */ - spin_lock(&pgdata->split_queue_lock); + spin_lock(&ds_queue->split_queue_lock); count = page_count(head); mapcount = total_mapcount(head); if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) { if (!list_empty(page_deferred_list(head))) { - pgdata->split_queue_len--; + ds_queue->split_queue_len--; list_del(page_deferred_list(head)); } if (mapping) __dec_node_page_state(page, NR_SHMEM_THPS); - spin_unlock(&pgdata->split_queue_lock); + spin_unlock(&ds_queue->split_queue_lock); __split_huge_page(page, list, end, flags); if (PageSwapCache(head)) { swp_entry_t entry = { .val = page_private(head) }; @@ -2786,7 +2819,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) dump_page(page, "total_mapcount(head) > 0"); BUG(); } - spin_unlock(&pgdata->split_queue_lock); + spin_unlock(&ds_queue->split_queue_lock); fail: if (mapping) xa_unlock(&mapping->i_pages); spin_unlock_irqrestore(&pgdata->lru_lock, flags); @@ -2808,53 +2841,86 @@ out: void free_transhuge_page(struct page *page) { - struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); + struct deferred_split *ds_queue = get_deferred_split_queue(page); unsigned long flags; - spin_lock_irqsave(&pgdata->split_queue_lock, flags); + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if (!list_empty(page_deferred_list(page))) { - pgdata->split_queue_len--; + ds_queue->split_queue_len--; list_del(page_deferred_list(page)); } - spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); + spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); free_compound_page(page); } void deferred_split_huge_page(struct page *page) { - struct pglist_data *pgdata = NODE_DATA(page_to_nid(page)); + struct deferred_split *ds_queue = get_deferred_split_queue(page); +#ifdef CONFIG_MEMCG + struct mem_cgroup *memcg = compound_head(page)->mem_cgroup; +#endif unsigned long flags; VM_BUG_ON_PAGE(!PageTransHuge(page), page); - spin_lock_irqsave(&pgdata->split_queue_lock, flags); + /* + * The try_to_unmap() in page reclaim path might reach here too, + * this may cause a race condition to corrupt deferred split queue. + * And, if page reclaim is already handling the same page, it is + * unnecessary to handle it again in shrinker. + * + * Check PageSwapCache to determine if the page is being + * handled by page reclaim since THP swap would add the page into + * swap cache before calling try_to_unmap(). + */ + if (PageSwapCache(page)) + return; + + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if (list_empty(page_deferred_list(page))) { count_vm_event(THP_DEFERRED_SPLIT_PAGE); - list_add_tail(page_deferred_list(page), &pgdata->split_queue); - pgdata->split_queue_len++; + list_add_tail(page_deferred_list(page), &ds_queue->split_queue); + ds_queue->split_queue_len++; +#ifdef CONFIG_MEMCG + if (memcg) + memcg_set_shrinker_bit(memcg, page_to_nid(page), + deferred_split_shrinker.id); +#endif } - spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); + spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); } static unsigned long deferred_split_count(struct shrinker *shrink, struct shrink_control *sc) { struct pglist_data *pgdata = NODE_DATA(sc->nid); - return READ_ONCE(pgdata->split_queue_len); + struct deferred_split *ds_queue = &pgdata->deferred_split_queue; + +#ifdef CONFIG_MEMCG + if (sc->memcg) + ds_queue = &sc->memcg->deferred_split_queue; +#endif + return READ_ONCE(ds_queue->split_queue_len); } static unsigned long deferred_split_scan(struct shrinker *shrink, struct shrink_control *sc) { struct pglist_data *pgdata = NODE_DATA(sc->nid); + struct deferred_split *ds_queue = &pgdata->deferred_split_queue; unsigned long flags; LIST_HEAD(list), *pos, *next; struct page *page; int split = 0; - spin_lock_irqsave(&pgdata->split_queue_lock, flags); +#ifdef CONFIG_MEMCG + if (sc->memcg) + ds_queue = &sc->memcg->deferred_split_queue; +#endif + + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); /* Take pin on all head pages to avoid freeing them under us */ - list_for_each_safe(pos, next, &pgdata->split_queue) { + list_for_each_safe(pos, next, &ds_queue->split_queue) { page = list_entry((void *)pos, struct page, mapping); page = compound_head(page); if (get_page_unless_zero(page)) { @@ -2862,12 +2928,12 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, } else { /* We lost race with put_compound_page() */ list_del_init(page_deferred_list(page)); - pgdata->split_queue_len--; + ds_queue->split_queue_len--; } if (!--sc->nr_to_scan) break; } - spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); + spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); list_for_each_safe(pos, next, &list) { page = list_entry((void *)pos, struct page, mapping); @@ -2881,15 +2947,15 @@ next: put_page(page); } - spin_lock_irqsave(&pgdata->split_queue_lock, flags); - list_splice_tail(&list, &pgdata->split_queue); - spin_unlock_irqrestore(&pgdata->split_queue_lock, flags); + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); + list_splice_tail(&list, &ds_queue->split_queue); + spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); /* * Stop shrinker if we didn't split any page, but the queue is empty. * This can happen if pages were freed under us. */ - if (!split && list_empty(&pgdata->split_queue)) + if (!split && list_empty(&ds_queue->split_queue)) return SHRINK_STOP; return split; } @@ -2898,7 +2964,8 @@ static struct shrinker deferred_split_shrinker = { .count_objects = deferred_split_count, .scan_objects = deferred_split_scan, .seeks = DEFAULT_SEEKS, - .flags = SHRINKER_NUMA_AWARE, + .flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE | + SHRINKER_NONSLAB, }; #ifdef CONFIG_DEBUG_FS |