diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 241 |
1 files changed, 157 insertions, 84 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e0ff3a811ec5..23f5066bd4a5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -78,6 +78,34 @@ #include "shuffle.h" #include "page_reporting.h" +/* Free Page Internal flags: for internal, non-pcp variants of free_pages(). */ +typedef int __bitwise fpi_t; + +/* No special request */ +#define FPI_NONE ((__force fpi_t)0) + +/* + * Skip free page reporting notification for the (possibly merged) page. + * This does not hinder free page reporting from grabbing the page, + * reporting it and marking it "reported" - it only skips notifying + * the free page reporting infrastructure about a newly freed page. For + * example, used when temporarily pulling a page from a freelist and + * putting it back unmodified. + */ +#define FPI_SKIP_REPORT_NOTIFY ((__force fpi_t)BIT(0)) + +/* + * Place the (possibly merged) page to the tail of the freelist. Will ignore + * page shuffling (relevant code - e.g., memory onlining - is expected to + * shuffle the whole zone). + * + * Note: No code should rely on this flag for correctness - it's purely + * to allow for optimizations when handing back either fresh pages + * (memory onlining) or untouched pages (page isolation, free page + * reporting). + */ +#define FPI_TO_TAIL ((__force fpi_t)BIT(1)) + /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ static DEFINE_MUTEX(pcp_batch_high_lock); #define MIN_PERCPU_PAGELIST_FRACTION (8) @@ -247,7 +275,8 @@ bool pm_suspended_storage(void) unsigned int pageblock_order __read_mostly; #endif -static void __free_pages_ok(struct page *page, unsigned int order); +static void __free_pages_ok(struct page *page, unsigned int order, + fpi_t fpi_flags); /* * results with 256, 32 in the lowmem_reserve sysctl: @@ -659,7 +688,7 @@ out: void free_compound_page(struct page *page) { mem_cgroup_uncharge(page); - __free_pages_ok(page, compound_order(page)); + __free_pages_ok(page, compound_order(page), FPI_NONE); } void prep_compound_page(struct page *page, unsigned int order) @@ -763,7 +792,7 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, unsigned int order, int migratetype) {} #endif -static inline void set_page_order(struct page *page, unsigned int order) +static inline void set_buddy_order(struct page *page, unsigned int order) { set_page_private(page, order); __SetPageBuddy(page); @@ -788,7 +817,7 @@ static inline bool page_is_buddy(struct page *page, struct page *buddy, if (!page_is_guard(buddy) && !PageBuddy(buddy)) return false; - if (page_order(buddy) != order) + if (buddy_order(buddy) != order) return false; /* @@ -873,13 +902,17 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone, area->nr_free++; } -/* Used for pages which are on another list */ +/* + * Used for pages which are on another list. Move the pages to the tail + * of the list - so the moved pages won't immediately be considered for + * allocation again (e.g., optimization for memory onlining). + */ static inline void move_to_free_list(struct page *page, struct zone *zone, unsigned int order, int migratetype) { struct free_area *area = &zone->free_area[order]; - list_move(&page->lru, &area->free_list[migratetype]); + list_move_tail(&page->lru, &area->free_list[migratetype]); } static inline void del_page_from_free_list(struct page *page, struct zone *zone, @@ -952,7 +985,7 @@ buddy_merge_likely(unsigned long pfn, unsigned long buddy_pfn, static inline void __free_one_page(struct page *page, unsigned long pfn, struct zone *zone, unsigned int order, - int migratetype, bool report) + int migratetype, fpi_t fpi_flags) { struct capture_control *capc = task_capc(zone); unsigned long buddy_pfn; @@ -1026,9 +1059,11 @@ continue_merging: } done_merging: - set_page_order(page, order); + set_buddy_order(page, order); - if (is_shuffle_order(order)) + if (fpi_flags & FPI_TO_TAIL) + to_tail = true; + else if (is_shuffle_order(order)) to_tail = shuffle_pick_tail(); else to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order); @@ -1039,7 +1074,7 @@ done_merging: add_to_free_list(page, zone, order, migratetype); /* Notify page reporting subsystem of freed page */ - if (report) + if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY)) page_reporting_notify_free(order); } @@ -1174,6 +1209,17 @@ static __always_inline bool free_pages_prepare(struct page *page, trace_mm_page_free(page, order); + if (unlikely(PageHWPoison(page)) && !order) { + /* + * Do not let hwpoison pages hit pcplists/buddy + * Untie memcg state and reset page's owner + */ + if (memcg_kmem_enabled() && PageKmemcg(page)) + __memcg_kmem_uncharge_page(page, order); + reset_page_owner(page, order); + return false; + } + /* * Check tail pages before head page information is cleared to * avoid checking PageCompound for order-0 pages. @@ -1369,7 +1415,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, if (unlikely(isolated_pageblocks)) mt = get_pageblock_migratetype(page); - __free_one_page(page, page_to_pfn(page), zone, 0, mt, true); + __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE); trace_mm_page_pcpu_drain(page, 0, mt); } spin_unlock(&zone->lock); @@ -1378,14 +1424,14 @@ static void free_pcppages_bulk(struct zone *zone, int count, static void free_one_page(struct zone *zone, struct page *page, unsigned long pfn, unsigned int order, - int migratetype) + int migratetype, fpi_t fpi_flags) { spin_lock(&zone->lock); if (unlikely(has_isolate_pageblock(zone) || is_migrate_isolate(migratetype))) { migratetype = get_pfnblock_migratetype(page, pfn); } - __free_one_page(page, pfn, zone, order, migratetype, true); + __free_one_page(page, pfn, zone, order, migratetype, fpi_flags); spin_unlock(&zone->lock); } @@ -1463,7 +1509,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end) } } -static void __free_pages_ok(struct page *page, unsigned int order) +static void __free_pages_ok(struct page *page, unsigned int order, + fpi_t fpi_flags) { unsigned long flags; int migratetype; @@ -1475,7 +1522,8 @@ static void __free_pages_ok(struct page *page, unsigned int order) migratetype = get_pfnblock_migratetype(page, pfn); local_irq_save(flags); __count_vm_events(PGFREE, 1 << order); - free_one_page(page_zone(page), page, pfn, order, migratetype); + free_one_page(page_zone(page), page, pfn, order, migratetype, + fpi_flags); local_irq_restore(flags); } @@ -1485,6 +1533,11 @@ void __free_pages_core(struct page *page, unsigned int order) struct page *p = page; unsigned int loop; + /* + * When initializing the memmap, __init_single_page() sets the refcount + * of all pages to 1 ("allocated"/"not free"). We have to set the + * refcount of all involved pages to 0. + */ prefetchw(p); for (loop = 0; loop < (nr_pages - 1); loop++, p++) { prefetchw(p + 1); @@ -1495,8 +1548,12 @@ void __free_pages_core(struct page *page, unsigned int order) set_page_count(p, 0); atomic_long_add(nr_pages, &page_zone(page)->managed_pages); - set_page_refcounted(page); - __free_pages(page, order); + + /* + * Bypass PCP and place fresh pages right to the tail, primarily + * relevant for memory onlining. + */ + __free_pages_ok(page, order, FPI_TO_TAIL); } #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -2121,7 +2178,7 @@ static inline void expand(struct zone *zone, struct page *page, continue; add_to_free_list(&page[size], zone, high, migratetype); - set_page_order(&page[size], high); + set_buddy_order(&page[size], high); } } @@ -2299,7 +2356,7 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, #endif /* - * Move the free pages in a range to the free lists of the requested type. + * Move the free pages in a range to the freelist tail of the requested type. * Note that start_page and end_pages are not aligned on a pageblock * boundary. If alignment is required, use move_freepages_block() */ @@ -2335,7 +2392,7 @@ static int move_freepages(struct zone *zone, VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); VM_BUG_ON_PAGE(page_zone(page) != zone, page); - order = page_order(page); + order = buddy_order(page); move_to_free_list(page, zone, order, migratetype); page += 1 << order; pages_moved += 1 << order; @@ -2459,7 +2516,7 @@ static inline void boost_watermark(struct zone *zone) static void steal_suitable_fallback(struct zone *zone, struct page *page, unsigned int alloc_flags, int start_type, bool whole_block) { - unsigned int current_order = page_order(page); + unsigned int current_order = buddy_order(page); int free_pages, movable_pages, alike_pages; int old_block_type; @@ -3123,7 +3180,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn) */ if (migratetype >= MIGRATE_PCPTYPES) { if (unlikely(is_migrate_isolate(migratetype))) { - free_one_page(zone, page, pfn, 0, migratetype); + free_one_page(zone, page, pfn, 0, migratetype, + FPI_NONE); return; } migratetype = MIGRATE_MOVABLE; @@ -3209,7 +3267,7 @@ void split_page(struct page *page, unsigned int order) for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); - split_page_owner(page, order); + split_page_owner(page, 1 << order); } EXPORT_SYMBOL_GPL(split_page); @@ -3278,7 +3336,8 @@ void __putback_isolated_page(struct page *page, unsigned int order, int mt) lockdep_assert_held(&zone->lock); /* Return isolated page to tail of freelist. */ - __free_one_page(page, page_to_pfn(page), zone, order, mt, false); + __free_one_page(page, page_to_pfn(page), zone, order, mt, + FPI_SKIP_REPORT_NOTIFY | FPI_TO_TAIL); } /* @@ -4945,7 +5004,7 @@ static inline void free_the_page(struct page *page, unsigned int order) if (order == 0) /* Via pcp? */ free_unref_page(page); else - __free_pages_ok(page, order); + __free_pages_ok(page, order, FPI_NONE); } void __free_pages(struct page *page, unsigned int order) @@ -5979,10 +6038,15 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) * Initially all pages are reserved - free ones are freed * up by memblock_free_all() once the early boot process is * done. Non-atomic initialization, single-pass. + * + * All aligned pageblocks are initialized to the specified migratetype + * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related + * zone stats (e.g., nr_isolate_pageblock) are touched. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn, enum meminit_context context, - struct vmem_altmap *altmap) + unsigned long start_pfn, + enum meminit_context context, + struct vmem_altmap *altmap, int migratetype) { unsigned long pfn, end_pfn = start_pfn + size; struct page *page; @@ -6026,19 +6090,12 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, __SetPageReserved(page); /* - * Mark the block movable so that blocks are reserved for - * movable at startup. This will force kernel allocations - * to reserve their blocks rather than leaking throughout - * the address space during boot when many long-lived - * kernel allocations are made. - * - * bitmap is created for zone's valid pfn range. but memmap - * can be created for invalid pages (for alignment) - * check here not to call set_pageblock_migratetype() against - * pfn out of zone. + * Usually, we want to mark the pageblock MIGRATE_MOVABLE, + * such that unmovable allocations won't be scattered all + * over the place during system boot. */ - if (!(pfn & (pageblock_nr_pages - 1))) { - set_pageblock_migratetype(page, MIGRATE_MOVABLE); + if (IS_ALIGNED(pfn, pageblock_nr_pages)) { + set_pageblock_migratetype(page, migratetype); cond_resched(); } pfn++; @@ -6100,15 +6157,10 @@ void __ref memmap_init_zone_device(struct zone *zone, * the address space during boot when many long-lived * kernel allocations are made. * - * bitmap is created for zone's valid pfn range. but memmap - * can be created for invalid pages (for alignment) - * check here not to call set_pageblock_migratetype() against - * pfn out of zone. - * * Please note that MEMINIT_HOTPLUG path doesn't clear memmap * because this is done early in section_activate() */ - if (!(pfn & (pageblock_nr_pages - 1))) { + if (IS_ALIGNED(pfn, pageblock_nr_pages)) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); cond_resched(); } @@ -6143,7 +6195,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid, if (end_pfn > start_pfn) { size = end_pfn - start_pfn; memmap_init_zone(size, nid, zone, start_pfn, - MEMINIT_EARLY, NULL); + MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); } } } @@ -8292,7 +8344,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, */ if (!page_ref_count(page)) { if (PageBuddy(page)) - iter += (1 << page_order(page)) - 1; + iter += (1 << buddy_order(page)) - 1; continue; } @@ -8457,7 +8509,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, ret = start_isolate_page_range(pfn_max_align_down(start), pfn_max_align_up(end), migratetype, 0); - if (ret < 0) + if (ret) return ret; /* @@ -8505,7 +8557,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, } if (outer_start != start) { - order = page_order(pfn_to_page(outer_start)); + order = buddy_order(pfn_to_page(outer_start)); /* * outer_start page could be small order buddy page and @@ -8693,35 +8745,21 @@ void zone_pcp_reset(struct zone *zone) #ifdef CONFIG_MEMORY_HOTREMOVE /* - * All pages in the range must be in a single zone and isolated - * before calling this. + * All pages in the range must be in a single zone, must not contain holes, + * must span full sections, and must be isolated before calling this function. */ -unsigned long -__offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) +void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) { + unsigned long pfn = start_pfn; struct page *page; struct zone *zone; unsigned int order; - unsigned long pfn; unsigned long flags; - unsigned long offlined_pages = 0; - - /* find the first valid pfn */ - for (pfn = start_pfn; pfn < end_pfn; pfn++) - if (pfn_valid(pfn)) - break; - if (pfn == end_pfn) - return offlined_pages; offline_mem_sections(pfn, end_pfn); zone = page_zone(pfn_to_page(pfn)); spin_lock_irqsave(&zone->lock, flags); - pfn = start_pfn; while (pfn < end_pfn) { - if (!pfn_valid(pfn)) { - pfn++; - continue; - } page = pfn_to_page(pfn); /* * The HWPoisoned page may be not in buddy system, and @@ -8729,7 +8767,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) */ if (unlikely(!PageBuddy(page) && PageHWPoison(page))) { pfn++; - offlined_pages++; continue; } /* @@ -8740,20 +8777,16 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) BUG_ON(page_count(page)); BUG_ON(PageBuddy(page)); pfn++; - offlined_pages++; continue; } BUG_ON(page_count(page)); BUG_ON(!PageBuddy(page)); - order = page_order(page); - offlined_pages += 1 << order; + order = buddy_order(page); del_page_from_free_list(page, zone, order); pfn += (1 << order); } spin_unlock_irqrestore(&zone->lock, flags); - - return offlined_pages; } #endif @@ -8768,7 +8801,7 @@ bool is_free_buddy_page(struct page *page) for (order = 0; order < MAX_ORDER; order++) { struct page *page_head = page - (pfn & ((1 << order) - 1)); - if (PageBuddy(page_head) && page_order(page_head) >= order) + if (PageBuddy(page_head) && buddy_order(page_head) >= order) break; } spin_unlock_irqrestore(&zone->lock, flags); @@ -8778,30 +8811,70 @@ bool is_free_buddy_page(struct page *page) #ifdef CONFIG_MEMORY_FAILURE /* - * Set PG_hwpoison flag if a given page is confirmed to be a free page. This - * test is performed under the zone lock to prevent a race against page - * allocation. + * Break down a higher-order page in sub-pages, and keep our target out of + * buddy allocator. */ -bool set_hwpoison_free_buddy_page(struct page *page) +static void break_down_buddy_pages(struct zone *zone, struct page *page, + struct page *target, int low, int high, + int migratetype) +{ + unsigned long size = 1 << high; + struct page *current_buddy, *next_page; + + while (high > low) { + high--; + size >>= 1; + + if (target >= &page[size]) { + next_page = page + size; + current_buddy = page; + } else { + next_page = page; + current_buddy = page + size; + } + + if (set_page_guard(zone, current_buddy, high, migratetype)) + continue; + + if (current_buddy != target) { + add_to_free_list(current_buddy, zone, high, migratetype); + set_buddy_order(current_buddy, high); + page = next_page; + } + } +} + +/* + * Take a page that will be marked as poisoned off the buddy allocator. + */ +bool take_page_off_buddy(struct page *page) { struct zone *zone = page_zone(page); unsigned long pfn = page_to_pfn(page); unsigned long flags; unsigned int order; - bool hwpoisoned = false; + bool ret = false; spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { struct page *page_head = page - (pfn & ((1 << order) - 1)); + int page_order = buddy_order(page_head); - if (PageBuddy(page_head) && page_order(page_head) >= order) { - if (!TestSetPageHWPoison(page)) - hwpoisoned = true; + if (PageBuddy(page_head) && page_order >= order) { + unsigned long pfn_head = page_to_pfn(page_head); + int migratetype = get_pfnblock_migratetype(page_head, + pfn_head); + + del_page_from_free_list(page_head, zone, page_order); + break_down_buddy_pages(zone, page_head, page, 0, + page_order, migratetype); + ret = true; break; } + if (page_count(page_head) > 0) + break; } spin_unlock_irqrestore(&zone->lock, flags); - - return hwpoisoned; + return ret; } #endif |