diff options
Diffstat (limited to 'mm/mm_init.c')
-rw-r--r-- | mm/mm_init.c | 161 |
1 files changed, 109 insertions, 52 deletions
diff --git a/mm/mm_init.c b/mm/mm_init.c index 7f7f9c677854..a1963c3322af 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -259,6 +259,8 @@ static int __init cmdline_parse_core(char *p, unsigned long *core, return 0; } +bool mirrored_kernelcore __initdata_memblock; + /* * kernelcore=size sets the amount of memory for use for allocations that * cannot be reclaimed or migrated. @@ -644,10 +646,8 @@ static inline void pgdat_set_deferred_range(pg_data_t *pgdat) } /* Returns true if the struct page for the pfn is initialised */ -static inline bool __meminit early_page_initialised(unsigned long pfn) +static inline bool __meminit early_page_initialised(unsigned long pfn, int nid) { - int nid = early_pfn_to_nid(pfn); - if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn) return false; @@ -693,15 +693,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) return false; } -static void __meminit init_reserved_page(unsigned long pfn) +static void __meminit init_reserved_page(unsigned long pfn, int nid) { pg_data_t *pgdat; - int nid, zid; + int zid; - if (early_page_initialised(pfn)) + if (early_page_initialised(pfn, nid)) return; - nid = early_pfn_to_nid(pfn); pgdat = NODE_DATA(nid); for (zid = 0; zid < MAX_NR_ZONES; zid++) { @@ -715,7 +714,7 @@ static void __meminit init_reserved_page(unsigned long pfn) #else static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {} -static inline bool early_page_initialised(unsigned long pfn) +static inline bool early_page_initialised(unsigned long pfn, int nid) { return true; } @@ -725,7 +724,7 @@ static inline bool defer_init(int nid, unsigned long pfn, unsigned long end_pfn) return false; } -static inline void init_reserved_page(unsigned long pfn) +static inline void init_reserved_page(unsigned long pfn, int nid) { } #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ @@ -736,7 +735,8 @@ static inline void init_reserved_page(unsigned long pfn) * marks the pages PageReserved. The remaining valid pages are later * sent to the buddy page allocator. */ -void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end) +void __meminit reserve_bootmem_region(phys_addr_t start, + phys_addr_t end, int nid) { unsigned long start_pfn = PFN_DOWN(start); unsigned long end_pfn = PFN_UP(end); @@ -745,7 +745,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end) if (pfn_valid(start_pfn)) { struct page *page = pfn_to_page(start_pfn); - init_reserved_page(start_pfn); + init_reserved_page(start_pfn, nid); /* Avoid false-positive PageTail() */ INIT_LIST_HEAD(&page->lru); @@ -1166,24 +1166,15 @@ unsigned long __init absent_pages_in_range(unsigned long start_pfn, /* Return the number of page frames in holes in a zone on a node */ static unsigned long __init zone_absent_pages_in_node(int nid, unsigned long zone_type, - unsigned long node_start_pfn, - unsigned long node_end_pfn) + unsigned long zone_start_pfn, + unsigned long zone_end_pfn) { - unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; - unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; - unsigned long zone_start_pfn, zone_end_pfn; unsigned long nr_absent; - /* When hotadd a new node from cpu_up(), the node should be empty */ - if (!node_start_pfn && !node_end_pfn) + /* zone is empty, we don't have any absent pages */ + if (zone_start_pfn == zone_end_pfn) return 0; - zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); - zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high); - - adjust_zone_range_for_zone_movable(nid, zone_type, - node_start_pfn, node_end_pfn, - &zone_start_pfn, &zone_end_pfn); nr_absent = __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn); /* @@ -1227,9 +1218,6 @@ static unsigned long __init zone_spanned_pages_in_node(int nid, { unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type]; unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type]; - /* When hotadd a new node from cpu_up(), the node should be empty */ - if (!node_start_pfn && !node_end_pfn) - return 0; /* Get the start and end of the zone */ *zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high); @@ -1250,6 +1238,24 @@ static unsigned long __init zone_spanned_pages_in_node(int nid, return *zone_end_pfn - *zone_start_pfn; } +static void __init reset_memoryless_node_totalpages(struct pglist_data *pgdat) +{ + struct zone *z; + + for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) { + z->zone_start_pfn = 0; + z->spanned_pages = 0; + z->present_pages = 0; +#if defined(CONFIG_MEMORY_HOTPLUG) + z->present_early_pages = 0; +#endif + } + + pgdat->node_spanned_pages = 0; + pgdat->node_present_pages = 0; + pr_debug("On node %d totalpages: 0\n", pgdat->node_id); +} + static void __init calculate_node_totalpages(struct pglist_data *pgdat, unsigned long node_start_pfn, unsigned long node_end_pfn) @@ -1261,7 +1267,7 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat, struct zone *zone = pgdat->node_zones + i; unsigned long zone_start_pfn, zone_end_pfn; unsigned long spanned, absent; - unsigned long size, real_size; + unsigned long real_size; spanned = zone_spanned_pages_in_node(pgdat->node_id, i, node_start_pfn, @@ -1269,23 +1275,22 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat, &zone_start_pfn, &zone_end_pfn); absent = zone_absent_pages_in_node(pgdat->node_id, i, - node_start_pfn, - node_end_pfn); + zone_start_pfn, + zone_end_pfn); - size = spanned; - real_size = size - absent; + real_size = spanned - absent; - if (size) + if (spanned) zone->zone_start_pfn = zone_start_pfn; else zone->zone_start_pfn = 0; - zone->spanned_pages = size; + zone->spanned_pages = spanned; zone->present_pages = real_size; #if defined(CONFIG_MEMORY_HOTPLUG) zone->present_early_pages = real_size; #endif - totalpages += size; + totalpages += spanned; realtotalpages += real_size; } @@ -1375,6 +1380,10 @@ static void __meminit zone_init_free_lists(struct zone *zone) INIT_LIST_HEAD(&zone->free_area[order].free_list[t]); zone->free_area[order].nr_free = 0; } + +#ifdef CONFIG_UNACCEPTED_MEMORY + INIT_LIST_HEAD(&zone->unaccepted_pages); +#endif } void __meminit init_currently_empty_zone(struct zone *zone, @@ -1502,6 +1511,8 @@ void __ref free_area_init_core_hotplug(struct pglist_data *pgdat) pgdat->kswapd_order = 0; pgdat->kswapd_highest_zoneidx = 0; pgdat->node_start_pfn = 0; + pgdat->node_present_pages = 0; + for_each_online_cpu(cpu) { struct per_cpu_nodestat *p; @@ -1509,8 +1520,17 @@ void __ref free_area_init_core_hotplug(struct pglist_data *pgdat) memset(p, 0, sizeof(*p)); } - for (z = 0; z < MAX_NR_ZONES; z++) - zone_init_internals(&pgdat->node_zones[z], z, nid, 0); + /* + * When memory is hot-added, all the memory is in offline state. So + * clear all zones' present_pages and managed_pages because they will + * be updated in online_pages() and offline_pages(). + */ + for (z = 0; z < MAX_NR_ZONES; z++) { + struct zone *zone = pgdat->node_zones + z; + + zone->present_pages = 0; + zone_init_internals(zone, z, nid, 0); + } } #endif @@ -1578,7 +1598,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat) if (!size) continue; - set_pageblock_order(); setup_usemap(zone); init_currently_empty_zone(zone, zone->zone_start_pfn, size); } @@ -1702,11 +1721,13 @@ static void __init free_area_init_node(int nid) pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, (u64)start_pfn << PAGE_SHIFT, end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0); + + calculate_node_totalpages(pgdat, start_pfn, end_pfn); } else { pr_info("Initmem setup node %d as memoryless\n", nid); - } - calculate_node_totalpages(pgdat, start_pfn, end_pfn); + reset_memoryless_node_totalpages(pgdat); + } alloc_node_mem_map(pgdat); pgdat_set_deferred_range(pgdat); @@ -1716,7 +1737,7 @@ static void __init free_area_init_node(int nid) } /* Any regular or high memory on that node ? */ -static void check_for_memory(pg_data_t *pgdat, int nid) +static void check_for_memory(pg_data_t *pgdat) { enum zone_type zone_type; @@ -1724,9 +1745,9 @@ static void check_for_memory(pg_data_t *pgdat, int nid) struct zone *zone = &pgdat->node_zones[zone_type]; if (populated_zone(zone)) { if (IS_ENABLED(CONFIG_HIGHMEM)) - node_set_state(nid, N_HIGH_MEMORY); + node_set_state(pgdat->node_id, N_HIGH_MEMORY); if (zone_type <= ZONE_NORMAL) - node_set_state(nid, N_NORMAL_MEMORY); + node_set_state(pgdat->node_id, N_NORMAL_MEMORY); break; } } @@ -1745,11 +1766,6 @@ void __init setup_nr_node_ids(void) } #endif -static void __init free_area_init_memoryless_node(int nid) -{ - free_area_init_node(nid); -} - /* * Some architectures, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For * such cases we allow max_zone_pfn sorted in the descending order @@ -1848,6 +1864,8 @@ void __init free_area_init(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); + set_pageblock_order(); + for_each_node(nid) { pg_data_t *pgdat; @@ -1860,7 +1878,7 @@ void __init free_area_init(unsigned long *max_zone_pfn) panic("Cannot allocate %zuB for node %d.\n", sizeof(*pgdat), nid); arch_refresh_nodedata(nid, pgdat); - free_area_init_memoryless_node(nid); + free_area_init_node(nid); /* * We do not want to confuse userspace by sysfs @@ -1881,7 +1899,7 @@ void __init free_area_init(unsigned long *max_zone_pfn) /* Any memory on that node */ if (pgdat->node_present_pages) node_set_state(nid, N_MEMORY); - check_for_memory(pgdat, nid); + check_for_memory(pgdat); } memmap_init(); @@ -1960,6 +1978,9 @@ static void __init deferred_free_range(unsigned long pfn, return; } + /* Accept chunks smaller than MAX_ORDER upfront */ + accept_memory(PFN_PHYS(pfn), PFN_PHYS(pfn + nr_pages)); + for (i = 0; i < nr_pages; i++, page++, pfn++) { if (pageblock_aligned(pfn)) set_pageblock_migratetype(page, MIGRATE_MOVABLE); @@ -2328,6 +2349,28 @@ void __init init_cma_reserved_pageblock(struct page *page) } #endif +void set_zone_contiguous(struct zone *zone) +{ + unsigned long block_start_pfn = zone->zone_start_pfn; + unsigned long block_end_pfn; + + block_end_pfn = pageblock_end_pfn(block_start_pfn); + for (; block_start_pfn < zone_end_pfn(zone); + block_start_pfn = block_end_pfn, + block_end_pfn += pageblock_nr_pages) { + + block_end_pfn = min(block_end_pfn, zone_end_pfn(zone)); + + if (!__pageblock_pfn_to_page(block_start_pfn, + block_end_pfn, zone)) + return; + cond_resched(); + } + + /* We confirm that there is no hole */ + zone->contiguous = true; +} + void __init page_alloc_init_late(void) { struct zone *zone; @@ -2368,6 +2411,8 @@ void __init page_alloc_init_late(void) /* Initialize page ext after all struct pages are initialized. */ if (deferred_struct_pages) page_ext_init(); + + page_alloc_sysctl_init(); } #ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES @@ -2532,8 +2577,14 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) void __init memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { - if (!early_page_initialised(pfn)) - return; + + if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) { + int nid = early_pfn_to_nid(pfn); + + if (!early_page_initialised(pfn, nid)) + return; + } + if (!kmsan_memblock_free_pages(page, order)) { /* KMSAN will take care of these pages. */ return; @@ -2541,6 +2592,12 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, __free_pages_core(page, order); } +DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc); +EXPORT_SYMBOL(init_on_alloc); + +DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); +EXPORT_SYMBOL(init_on_free); + static bool _init_on_alloc_enabled_early __read_mostly = IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON); static int __init early_init_on_alloc(char *buf) |