diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 246 |
1 files changed, 161 insertions, 85 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3c4eb750a199..114c56c3685d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -74,6 +74,7 @@ #include <asm/div64.h> #include "internal.h" #include "shuffle.h" +#include "page_reporting.h" /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ static DEFINE_MUTEX(pcp_batch_high_lock); @@ -95,7 +96,6 @@ DEFINE_STATIC_KEY_TRUE(vm_numa_stat_key); */ DEFINE_PER_CPU(int, _numa_mem_); /* Kernel "local memory" node */ EXPORT_PER_CPU_SYMBOL(_numa_mem_); -int _node_numa_mem_[MAX_NUMNODES]; #endif /* work_structs for global per-cpu drains */ @@ -689,6 +689,8 @@ void prep_compound_page(struct page *page, unsigned int order) set_compound_head(p, page); } atomic_set(compound_mapcount_ptr(page), -1); + if (hpage_pincount_available(page)) + atomic_set(compound_pincount_ptr(page), 0); } #ifdef CONFIG_DEBUG_PAGEALLOC @@ -791,32 +793,25 @@ static inline void set_page_order(struct page *page, unsigned int order) * * For recording page's order, we use page_private(page). */ -static inline int page_is_buddy(struct page *page, struct page *buddy, +static inline bool page_is_buddy(struct page *page, struct page *buddy, unsigned int order) { - if (page_is_guard(buddy) && page_order(buddy) == order) { - if (page_zone_id(page) != page_zone_id(buddy)) - return 0; - - VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); + if (!page_is_guard(buddy) && !PageBuddy(buddy)) + return false; - return 1; - } + if (page_order(buddy) != order) + return false; - if (PageBuddy(buddy) && page_order(buddy) == order) { - /* - * zone check is done late to avoid uselessly - * calculating zone/node ids for pages that could - * never merge. - */ - if (page_zone_id(page) != page_zone_id(buddy)) - return 0; + /* + * zone check is done late to avoid uselessly calculating + * zone/node ids for pages that could never merge. + */ + if (page_zone_id(page) != page_zone_id(buddy)) + return false; - VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); + VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); - return 1; - } - return 0; + return true; } #ifdef CONFIG_COMPACTION @@ -870,6 +865,78 @@ compaction_capture(struct capture_control *capc, struct page *page, } #endif /* CONFIG_COMPACTION */ +/* Used for pages not on another list */ +static inline void add_to_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype) +{ + struct free_area *area = &zone->free_area[order]; + + list_add(&page->lru, &area->free_list[migratetype]); + area->nr_free++; +} + +/* Used for pages not on another list */ +static inline void add_to_free_list_tail(struct page *page, struct zone *zone, + unsigned int order, int migratetype) +{ + struct free_area *area = &zone->free_area[order]; + + list_add_tail(&page->lru, &area->free_list[migratetype]); + area->nr_free++; +} + +/* Used for pages which are on another list */ +static inline void move_to_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype) +{ + struct free_area *area = &zone->free_area[order]; + + list_move(&page->lru, &area->free_list[migratetype]); +} + +static inline void del_page_from_free_list(struct page *page, struct zone *zone, + unsigned int order) +{ + /* clear reported state and update reported page count */ + if (page_reported(page)) + __ClearPageReported(page); + + list_del(&page->lru); + __ClearPageBuddy(page); + set_page_private(page, 0); + zone->free_area[order].nr_free--; +} + +/* + * If this is not the largest possible page, check if the buddy + * of the next-highest order is free. If it is, it's possible + * that pages are being freed that will coalesce soon. In case, + * that is happening, add the free page to the tail of the list + * so it's less likely to be used soon and more likely to be merged + * as a higher order page + */ +static inline bool +buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn, + struct page *page, unsigned int order) +{ + struct page *higher_page, *higher_buddy; + unsigned long combined_pfn; + + if (order >= MAX_ORDER - 2) + return false; + + if (!pfn_valid_within(buddy_pfn)) + return false; + + combined_pfn = buddy_pfn & pfn; + higher_page = page + (combined_pfn - pfn); + buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1); + higher_buddy = higher_page + (buddy_pfn - combined_pfn); + + return pfn_valid_within(buddy_pfn) && + page_is_buddy(higher_page, higher_buddy, order + 1); +} + /* * Freeing function for a buddy system allocator. * @@ -897,13 +964,14 @@ compaction_capture(struct capture_control *capc, struct page *page, static inline void __free_one_page(struct page *page, unsigned long pfn, struct zone *zone, unsigned int order, - int migratetype) + int migratetype, bool report) { - unsigned long combined_pfn; + struct capture_control *capc = task_capc(zone); unsigned long uninitialized_var(buddy_pfn); - struct page *buddy; + unsigned long combined_pfn; unsigned int max_order; - struct capture_control *capc = task_capc(zone); + struct page *buddy; + bool to_tail; max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); @@ -938,7 +1006,7 @@ continue_merging: if (page_is_guard(buddy)) clear_page_guard(zone, buddy, order, migratetype); else - del_page_from_free_area(buddy, &zone->free_area[order]); + del_page_from_free_list(buddy, zone, order); combined_pfn = buddy_pfn & pfn; page = page + (combined_pfn - pfn); pfn = combined_pfn; @@ -972,35 +1040,19 @@ continue_merging: done_merging: set_page_order(page, order); - /* - * If this is not the largest possible page, check if the buddy - * of the next-highest order is free. If it is, it's possible - * that pages are being freed that will coalesce soon. In case, - * that is happening, add the free page to the tail of the list - * so it's less likely to be used soon and more likely to be merged - * as a higher order page - */ - if ((order < MAX_ORDER-2) && pfn_valid_within(buddy_pfn) - && !is_shuffle_order(order)) { - struct page *higher_page, *higher_buddy; - combined_pfn = buddy_pfn & pfn; - higher_page = page + (combined_pfn - pfn); - buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1); - higher_buddy = higher_page + (buddy_pfn - combined_pfn); - if (pfn_valid_within(buddy_pfn) && - page_is_buddy(higher_page, higher_buddy, order + 1)) { - add_to_free_area_tail(page, &zone->free_area[order], - migratetype); - return; - } - } - if (is_shuffle_order(order)) - add_to_free_area_random(page, &zone->free_area[order], - migratetype); + to_tail = shuffle_pick_tail(); else - add_to_free_area(page, &zone->free_area[order], migratetype); + to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order); + if (to_tail) + add_to_free_list_tail(page, zone, order, migratetype); + else + add_to_free_list(page, zone, order, migratetype); + + /* Notify page reporting subsystem of freed page */ + if (report) + page_reporting_notify_free(order); } /* @@ -1152,7 +1204,7 @@ static __always_inline bool free_pages_prepare(struct page *page, if (PageMappingFlags(page)) page->mapping = NULL; if (memcg_kmem_enabled() && PageKmemcg(page)) - __memcg_kmem_uncharge(page, order); + __memcg_kmem_uncharge_page(page, order); if (check_free) bad += free_pages_check(page); if (bad) @@ -1317,7 +1369,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, if (unlikely(isolated_pageblocks)) mt = get_pageblock_migratetype(page); - __free_one_page(page, page_to_pfn(page), zone, 0, mt); + __free_one_page(page, page_to_pfn(page), zone, 0, mt, true); trace_mm_page_pcpu_drain(page, 0, mt); } spin_unlock(&zone->lock); @@ -1333,7 +1385,7 @@ static void free_one_page(struct zone *zone, is_migrate_isolate(migratetype))) { migratetype = get_pfnblock_migratetype(page, pfn); } - __free_one_page(page, pfn, zone, order, migratetype); + __free_one_page(page, pfn, zone, order, migratetype, true); spin_unlock(&zone->lock); } @@ -2014,13 +2066,11 @@ void __init init_cma_reserved_pageblock(struct page *page) * -- nyc */ static inline void expand(struct zone *zone, struct page *page, - int low, int high, struct free_area *area, - int migratetype) + int low, int high, int migratetype) { unsigned long size = 1 << high; while (high > low) { - area--; high--; size >>= 1; VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); @@ -2034,7 +2084,7 @@ static inline void expand(struct zone *zone, struct page *page, if (set_page_guard(zone, &page[size], high, migratetype)) continue; - add_to_free_area(&page[size], area, migratetype); + add_to_free_list(&page[size], zone, high, migratetype); set_page_order(&page[size], high); } } @@ -2192,8 +2242,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, page = get_page_from_free_area(area, migratetype); if (!page) continue; - del_page_from_free_area(page, area); - expand(zone, page, order, current_order, area, migratetype); + del_page_from_free_list(page, zone, current_order); + expand(zone, page, order, current_order, migratetype); set_pcppage_migratetype(page, migratetype); return page; } @@ -2267,7 +2317,7 @@ static int move_freepages(struct zone *zone, VM_BUG_ON_PAGE(page_zone(page) != zone, page); order = page_order(page); - move_to_free_area(page, &zone->free_area[order], migratetype); + move_to_free_list(page, zone, order, migratetype); page += 1 << order; pages_moved += 1 << order; } @@ -2383,7 +2433,6 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, unsigned int alloc_flags, int start_type, bool whole_block) { unsigned int current_order = page_order(page); - struct free_area *area; int free_pages, movable_pages, alike_pages; int old_block_type; @@ -2454,8 +2503,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, return; single_page: - area = &zone->free_area[current_order]; - move_to_free_area(page, area, start_type); + move_to_free_list(page, zone, current_order, start_type); } /* @@ -3126,7 +3174,6 @@ EXPORT_SYMBOL_GPL(split_page); int __isolate_free_page(struct page *page, unsigned int order) { - struct free_area *area = &page_zone(page)->free_area[order]; unsigned long watermark; struct zone *zone; int mt; @@ -3152,7 +3199,7 @@ int __isolate_free_page(struct page *page, unsigned int order) /* Remove page from free list */ - del_page_from_free_area(page, area); + del_page_from_free_list(page, zone, order); /* * Set the pageblock if the isolated page is at least half of a @@ -3173,6 +3220,25 @@ int __isolate_free_page(struct page *page, unsigned int order) return 1UL << order; } +/** + * __putback_isolated_page - Return a now-isolated page back where we got it + * @page: Page that was isolated + * @order: Order of the isolated page + * + * This function is meant to return a page pulled from the free lists via + * __isolate_free_page back to the free lists they were pulled from. + */ +void __putback_isolated_page(struct page *page, unsigned int order, int mt) +{ + struct zone *zone = page_zone(page); + + /* zone lock should be held when this function is called */ + lockdep_assert_held(&zone->lock); + + /* Return isolated page to tail of freelist. */ + __free_one_page(page, page_to_pfn(page), zone, order, mt, false); +} + /* * Update NUMA hit/miss statistics * @@ -3459,8 +3525,7 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, return true; } #endif - if (alloc_harder && - !list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) + if (alloc_harder && !free_area_empty(area, MIGRATE_HIGHATOMIC)) return true; } return false; @@ -3535,10 +3600,13 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) static inline unsigned int alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask) { - unsigned int alloc_flags = 0; + unsigned int alloc_flags; - if (gfp_mask & __GFP_KSWAPD_RECLAIM) - alloc_flags |= ALLOC_KSWAPD; + /* + * __GFP_KSWAPD_RECLAIM is assumed to be the same as ALLOC_KSWAPD + * to save a branch. + */ + alloc_flags = (__force int) (gfp_mask & __GFP_KSWAPD_RECLAIM); #ifdef CONFIG_ZONE_DMA32 if (!zone) @@ -4174,8 +4242,13 @@ gfp_to_alloc_flags(gfp_t gfp_mask) { unsigned int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; - /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */ + /* + * __GFP_HIGH is assumed to be the same as ALLOC_HIGH + * and __GFP_KSWAPD_RECLAIM is assumed to be the same as ALLOC_KSWAPD + * to save two branches. + */ BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); + BUILD_BUG_ON(__GFP_KSWAPD_RECLAIM != (__force gfp_t) ALLOC_KSWAPD); /* * The caller may dip into page reserves a bit more if the caller @@ -4183,7 +4256,8 @@ gfp_to_alloc_flags(gfp_t gfp_mask) * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will * set both ALLOC_HARDER (__GFP_ATOMIC) and ALLOC_HIGH (__GFP_HIGH). */ - alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); + alloc_flags |= (__force int) + (gfp_mask & (__GFP_HIGH | __GFP_KSWAPD_RECLAIM)); if (gfp_mask & __GFP_ATOMIC) { /* @@ -4200,9 +4274,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask) } else if (unlikely(rt_task(current)) && !in_interrupt()) alloc_flags |= ALLOC_HARDER; - if (gfp_mask & __GFP_KSWAPD_RECLAIM) - alloc_flags |= ALLOC_KSWAPD; - #ifdef CONFIG_CMA if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) alloc_flags |= ALLOC_CMA; @@ -4745,14 +4816,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, * Restore the original nodemask if it was potentially replaced with * &cpuset_current_mems_allowed to optimize the fast-path attempt. */ - if (unlikely(ac.nodemask != nodemask)) - ac.nodemask = nodemask; + ac.nodemask = nodemask; page = __alloc_pages_slowpath(alloc_mask, order, &ac); out: if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && - unlikely(__memcg_kmem_charge(page, gfp_mask, order) != 0)) { + unlikely(__memcg_kmem_charge_page(page, gfp_mask, order) != 0)) { __free_pages(page, order); page = NULL; } @@ -7867,8 +7937,8 @@ int __meminit init_per_zone_wmark_min(void) min_free_kbytes = new_min_free_kbytes; if (min_free_kbytes < 128) min_free_kbytes = 128; - if (min_free_kbytes > 65536) - min_free_kbytes = 65536; + if (min_free_kbytes > 262144) + min_free_kbytes = 262144; } else { pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n", new_min_free_kbytes, user_min_free_kbytes); @@ -8253,15 +8323,20 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, /* * Hugepages are not in LRU lists, but they're movable. + * THPs are on the LRU, but need to be counted as #small pages. * We need not scan over tail pages because we don't * handle each tail page individually in migration. */ - if (PageHuge(page)) { + if (PageHuge(page) || PageTransCompound(page)) { struct page *head = compound_head(page); unsigned int skip_pages; - if (!hugepage_migration_supported(page_hstate(head))) + if (PageHuge(page)) { + if (!hugepage_migration_supported(page_hstate(head))) + return page; + } else if (!PageLRU(head) && !__PageMovable(head)) { return page; + } skip_pages = compound_nr(head) - (page - head); iter += skip_pages - 1; @@ -8402,6 +8477,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, .ignore_skip_hint = true, .no_set_skip_hint = true, .gfp_mask = current_gfp_context(gfp_mask), + .alloc_contig = true, }; INIT_LIST_HEAD(&cc.migratepages); @@ -8709,7 +8785,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) BUG_ON(!PageBuddy(page)); order = page_order(page); offlined_pages += 1 << order; - del_page_from_free_area(page, &zone->free_area[order]); + del_page_from_free_list(page, zone, order); pfn += (1 << order); } spin_unlock_irqrestore(&zone->lock, flags); |