diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 150 |
1 files changed, 55 insertions, 95 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7d3460c7a480..452459836b71 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -284,17 +284,6 @@ const char * const migratetype_names[MIGRATE_TYPES] = { #endif }; -static compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = { - [NULL_COMPOUND_DTOR] = NULL, - [COMPOUND_PAGE_DTOR] = free_compound_page, -#ifdef CONFIG_HUGETLB_PAGE - [HUGETLB_PAGE_DTOR] = free_huge_page, -#endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - [TRANSHUGE_PAGE_DTOR] = free_transhuge_page, -#endif -}; - int min_free_kbytes = 1024; int user_min_free_kbytes = -1; static int watermark_boost_factor __read_mostly = 15000; @@ -371,10 +360,16 @@ static inline int pfn_to_bitidx(const struct page *page, unsigned long pfn) return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS; } -static __always_inline -unsigned long __get_pfnblock_flags_mask(const struct page *page, - unsigned long pfn, - unsigned long mask) +/** + * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages + * @page: The page within the block of interest + * @pfn: The target page frame number + * @mask: mask of bits that the caller is interested in + * + * Return: pageblock_bits flags + */ +unsigned long get_pfnblock_flags_mask(const struct page *page, + unsigned long pfn, unsigned long mask) { unsigned long *bitmap; unsigned long bitidx, word_bitidx; @@ -393,24 +388,10 @@ unsigned long __get_pfnblock_flags_mask(const struct page *page, return (word >> bitidx) & mask; } -/** - * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages - * @page: The page within the block of interest - * @pfn: The target page frame number - * @mask: mask of bits that the caller is interested in - * - * Return: pageblock_bits flags - */ -unsigned long get_pfnblock_flags_mask(const struct page *page, - unsigned long pfn, unsigned long mask) -{ - return __get_pfnblock_flags_mask(page, pfn, mask); -} - static __always_inline int get_pfnblock_migratetype(const struct page *page, unsigned long pfn) { - return __get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK); + return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK); } /** @@ -459,7 +440,7 @@ void set_pageblock_migratetype(struct page *page, int migratetype) #ifdef CONFIG_DEBUG_VM static int page_outside_zone_boundaries(struct zone *zone, struct page *page) { - int ret = 0; + int ret; unsigned seq; unsigned long pfn = page_to_pfn(page); unsigned long sp, start_pfn; @@ -468,8 +449,7 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page) seq = zone_span_seqbegin(zone); start_pfn = zone->zone_start_pfn; sp = zone->spanned_pages; - if (!zone_spans_pfn(zone, pfn)) - ret = 1; + ret = !zone_spans_pfn(zone, pfn); } while (zone_span_seqretry(zone, seq)); if (ret) @@ -539,8 +519,6 @@ out: static inline unsigned int order_to_pindex(int migratetype, int order) { - int base = order; - #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (order > PAGE_ALLOC_COSTLY_ORDER) { VM_BUG_ON(order != pageblock_order); @@ -550,7 +528,7 @@ static inline unsigned int order_to_pindex(int migratetype, int order) VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); #endif - return (MIGRATE_PCPTYPES * base) + migratetype; + return (MIGRATE_PCPTYPES * order) + migratetype; } static inline int pindex_to_order(unsigned int pindex) @@ -594,19 +572,10 @@ static inline void free_the_page(struct page *page, unsigned int order) * The remaining PAGE_SIZE pages are called "tail pages". PageTail() is encoded * in bit 0 of page->compound_head. The rest of bits is pointer to head page. * - * The first tail page's ->compound_dtor holds the offset in array of compound - * page destructors. See compound_page_dtors. - * * The first tail page's ->compound_order holds the order of allocation. * This usage means that zero-order pages may not be compound. */ -void free_compound_page(struct page *page) -{ - mem_cgroup_uncharge(page_folio(page)); - free_the_page(page, compound_order(page)); -} - void prep_compound_page(struct page *page, unsigned int order) { int i; @@ -621,10 +590,16 @@ void prep_compound_page(struct page *page, unsigned int order) void destroy_large_folio(struct folio *folio) { - enum compound_dtor_id dtor = folio->_folio_dtor; + if (folio_test_hugetlb(folio)) { + free_huge_folio(folio); + return; + } + + if (folio_test_large_rmappable(folio)) + folio_undo_large_rmappable(folio); - VM_BUG_ON_FOLIO(dtor >= NR_COMPOUND_DTORS, folio); - compound_page_dtors[dtor](&folio->page); + mem_cgroup_uncharge(folio); + free_the_page(&folio->page, folio_order(folio)); } static inline void set_buddy_order(struct page *page, unsigned int order) @@ -824,7 +799,7 @@ static inline void __free_one_page(struct page *page, * pageblock isolation could cause incorrect freepage or CMA * accounting or HIGHATOMIC accounting. */ - int buddy_mt = get_pageblock_migratetype(buddy); + int buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn); if (migratetype != buddy_mt && (!migratetype_is_mergeable(migratetype) || @@ -900,7 +875,7 @@ int split_free_page(struct page *free_page, goto out; } - mt = get_pageblock_migratetype(free_page); + mt = get_pfnblock_migratetype(free_page, free_page_pfn); if (likely(!is_migrate_isolate(mt))) __mod_zone_freepage_state(zone, -(1UL << order), mt); @@ -1132,7 +1107,7 @@ static __always_inline bool free_pages_prepare(struct page *page, VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); if (compound) - ClearPageHasHWPoisoned(page); + page[1].flags &= ~PAGE_FLAGS_SECOND; for (i = 1; i < (1 << order); i++) { if (compound) bad += free_tail_page_prepare(page, page + i); @@ -1210,8 +1185,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, int pindex) { unsigned long flags; - int min_pindex = 0; - int max_pindex = NR_PCP_LISTS - 1; unsigned int order; bool isolated_pageblocks; struct page *page; @@ -1234,17 +1207,10 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* Remove pages from lists in a round-robin fashion. */ do { - if (++pindex > max_pindex) - pindex = min_pindex; + if (++pindex > NR_PCP_LISTS - 1) + pindex = 0; list = &pcp->lists[pindex]; - if (!list_empty(list)) - break; - - if (pindex == max_pindex) - max_pindex--; - if (pindex == min_pindex) - min_pindex++; - } while (1); + } while (list_empty(list)); order = pindex_to_order(pindex); nr_pages = 1 << order; @@ -1834,6 +1800,10 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, free_pages = move_freepages_block(zone, page, start_type, &movable_pages); + /* moving whole block can fail due to zone boundary conditions */ + if (!free_pages) + goto single_page; + /* * Determine how many pages are compatible with our allocation. * For movable allocation, it's the number of movable pages which @@ -1855,14 +1825,9 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, else alike_pages = 0; } - - /* moving whole block can fail due to zone boundary conditions */ - if (!free_pages) - goto single_page; - /* * If a sufficient number of pages in the block are either free or of - * comparable migratability as our allocation, claim the whole block. + * compatible migratability as our allocation, claim the whole block. */ if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || page_group_by_mobility_disabled) @@ -1912,8 +1877,7 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, * Reserve a pageblock for exclusive use of high-order atomic allocations if * there are no empty page blocks that contain a page with a suitable order */ -static void reserve_highatomic_pageblock(struct page *page, struct zone *zone, - unsigned int alloc_order) +static void reserve_highatomic_pageblock(struct page *page, struct zone *zone) { int mt; unsigned long max_managed, flags; @@ -2353,10 +2317,10 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn, return true; } -static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch, - bool free_high) +static int nr_pcp_free(struct per_cpu_pages *pcp, int high, bool free_high) { int min_nr_free, max_nr_free; + int batch = READ_ONCE(pcp->batch); /* Free everything if batch freeing high-order pages. */ if (unlikely(free_high)) @@ -2423,9 +2387,7 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp, high = nr_pcp_high(pcp, zone, free_high); if (pcp->count >= high) { - int batch = READ_ONCE(pcp->batch); - - free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch, free_high), pcp, pindex); + free_pcppages_bulk(zone, nr_pcp_free(pcp, high, free_high), pcp, pindex); } } @@ -3225,7 +3187,7 @@ try_this_zone: * if the pageblock should be reserved for the future */ if (unlikely(alloc_flags & ALLOC_HIGHATOMIC)) - reserve_highatomic_pageblock(page, zone, order); + reserve_highatomic_pageblock(page, zone); return page; } else { @@ -4508,10 +4470,11 @@ struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, { struct page *page = __alloc_pages(gfp | __GFP_COMP, order, preferred_nid, nodemask); + struct folio *folio = (struct folio *)page; - if (page && order > 1) - prep_transhuge_page(page); - return (struct folio *)page; + if (folio && order > 1) + folio_prep_large_rmappable(folio); + return folio; } EXPORT_SYMBOL(__folio_alloc); @@ -5139,19 +5102,17 @@ static void __build_all_zonelists(void *data) unsigned long flags; /* - * Explicitly disable this CPU's interrupts before taking seqlock - * to prevent any IRQ handler from calling into the page allocator - * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock. + * The zonelist_update_seq must be acquired with irqsave because the + * reader can be invoked from IRQ with GFP_ATOMIC. */ - local_irq_save(flags); + write_seqlock_irqsave(&zonelist_update_seq, flags); /* - * Explicitly disable this CPU's synchronous printk() before taking - * seqlock to prevent any printk() from trying to hold port->lock, for + * Also disable synchronous printk() to prevent any printk() from + * trying to hold port->lock, for * tty_insert_flip_string_and_push_buffer() on other CPU might be * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held. */ printk_deferred_enter(); - write_seqlock(&zonelist_update_seq); #ifdef CONFIG_NUMA memset(node_load, 0, sizeof(node_load)); @@ -5188,9 +5149,8 @@ static void __build_all_zonelists(void *data) #endif } - write_sequnlock(&zonelist_update_seq); printk_deferred_exit(); - local_irq_restore(flags); + write_sequnlock_irqrestore(&zonelist_update_seq, flags); } static noinline void __init @@ -5694,9 +5654,9 @@ static void __setup_per_zone_wmarks(void) struct zone *zone; unsigned long flags; - /* Calculate total number of !ZONE_HIGHMEM pages */ + /* Calculate total number of !ZONE_HIGHMEM and !ZONE_MOVABLE pages */ for_each_zone(zone) { - if (!is_highmem(zone)) + if (!is_highmem(zone) && zone_idx(zone) != ZONE_MOVABLE) lowmem_pages += zone_managed_pages(zone); } @@ -5706,15 +5666,15 @@ static void __setup_per_zone_wmarks(void) spin_lock_irqsave(&zone->lock, flags); tmp = (u64)pages_min * zone_managed_pages(zone); do_div(tmp, lowmem_pages); - if (is_highmem(zone)) { + if (is_highmem(zone) || zone_idx(zone) == ZONE_MOVABLE) { /* * __GFP_HIGH and PF_MEMALLOC allocations usually don't - * need highmem pages, so cap pages_min to a small - * value here. + * need highmem and movable zones pages, so cap pages_min + * to a small value here. * * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) * deltas control async page reclaim, and so should - * not be capped for highmem. + * not be capped for highmem and movable zones. */ unsigned long min_pages; |