From f29d8e9c0191a2a02500945db505e5c89159c3f4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 28 Dec 2018 00:35:36 -0800 Subject: mm/memory_hotplug: drop "online" parameter from add_memory_resource() Userspace should always be in charge of how to online memory and if memory should be onlined automatically in the kernel. Let's drop the parameter to overwrite this - XEN passes memhp_auto_online, just like add_memory(), so we can directly use that instead internally. Link: http://lkml.kernel.org/r/20181123123740.27652-1-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Acked-by: Juergen Gross Cc: Boris Ostrovsky Cc: Stefano Stabellini Cc: Dan Williams Cc: Pavel Tatashin Cc: David Hildenbrand Cc: Joonsoo Kim Cc: Arun KS Cc: Mathieu Malaterre Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ffd9cd10fcf3..7383a7a76d69 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -326,7 +326,7 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); -extern int add_memory_resource(int nid, struct resource *resource, bool online); +extern int add_memory_resource(int nid, struct resource *resource); extern int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, -- cgit v1.2.3 From 2c2a5af6fed20cf74401c9d64319c76c5ff81309 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Fri, 28 Dec 2018 00:36:22 -0800 Subject: mm, memory_hotplug: add nid parameter to arch_remove_memory Patch series "Do not touch pages in hot-remove path", v2. This patchset aims for two things: 1) A better definition about offline and hot-remove stage 2) Solving bugs where we can access non-initialized pages during hot-remove operations [2] [3]. This is achieved by moving all page/zone handling to the offline stage, so we do not need to access pages when hot-removing memory. [1] https://patchwork.kernel.org/cover/10691415/ [2] https://patchwork.kernel.org/patch/10547445/ [3] https://www.spinics.net/lists/linux-mm/msg161316.html This patch (of 5): This is a preparation for the following-up patches. The idea of passing the nid is that it will allow us to get rid of the zone parameter afterwards. Link: http://lkml.kernel.org/r/20181127162005.15833-2-osalvador@suse.de Signed-off-by: Oscar Salvador Reviewed-by: David Hildenbrand Reviewed-by: Pavel Tatashin Cc: Michal Hocko Cc: Dan Williams Cc: Jerome Glisse Cc: Jonathan Cameron Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 2 +- arch/powerpc/mm/mem.c | 3 ++- arch/s390/mm/init.c | 2 +- arch/sh/mm/init.c | 2 +- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/init_64.c | 3 ++- include/linux/memory_hotplug.h | 4 ++-- kernel/memremap.c | 5 ++++- mm/memory_hotplug.c | 2 +- 9 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index d5e12ff1d73c..904fe55e10fc 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -661,7 +661,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 20394e52fe27..33cc6f676fa6 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -139,7 +139,8 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * } #ifdef CONFIG_MEMORY_HOTREMOVE -int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int __meminit arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 50388190b393..3e82f66d5c61 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -242,7 +242,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { /* * There is no hardware or firmware interface which could trigger a diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index c8c13c777162..a8e5c0e00fca 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -443,7 +443,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 49ecf5ecf6d3..85c94f9a87f8 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -860,7 +860,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 484c1b92f078..bccff68e3267 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1141,7 +1141,8 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) remove_pagetable(start, end, true, NULL); } -int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7383a7a76d69..9e4d9b9b93ea 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -107,8 +107,8 @@ static inline bool movable_node_is_enabled(void) } #ifdef CONFIG_MEMORY_HOTREMOVE -extern int arch_remove_memory(u64 start, u64 size, - struct vmem_altmap *altmap); +extern int arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap); extern int __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); #endif /* CONFIG_MEMORY_HOTREMOVE */ diff --git a/kernel/memremap.c b/kernel/memremap.c index 3eef989ef035..0d5603d76c37 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -87,6 +87,7 @@ static void devm_memremap_pages_release(void *data) struct resource *res = &pgmap->res; resource_size_t align_start, align_size; unsigned long pfn; + int nid; pgmap->kill(pgmap->ref); for_each_device_pfn(pfn, pgmap) @@ -97,13 +98,15 @@ static void devm_memremap_pages_release(void *data) align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - align_start; + nid = page_to_nid(pfn_to_page(align_start >> PAGE_SHIFT)); + mem_hotplug_begin(); if (pgmap->type == MEMORY_DEVICE_PRIVATE) { pfn = align_start >> PAGE_SHIFT; __remove_pages(page_zone(pfn_to_page(pfn)), pfn, align_size >> PAGE_SHIFT, NULL); } else { - arch_remove_memory(align_start, align_size, + arch_remove_memory(nid, align_start, align_size, pgmap->altmap_valid ? &pgmap->altmap : NULL); kasan_remove_zero_shadow(__va(align_start), align_size); } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6258e0e923cc..0718cf7427b2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1841,7 +1841,7 @@ void __ref __remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size); - arch_remove_memory(start, size, NULL); + arch_remove_memory(nid, start, size, NULL); try_offline_node(nid); -- cgit v1.2.3 From 4e0d2e7ef14d9e1c900dac909db45263822b824f Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 28 Dec 2018 00:37:06 -0800 Subject: mm, sparse: pass nid instead of pgdat to sparse_add_one_section() Since the information needed in sparse_add_one_section() is node id to allocate proper memory, it is not necessary to pass its pgdat. This patch changes the prototype of sparse_add_one_section() to pass node id directly. This is intended to reduce misleading that sparse_add_one_section() would touch pgdat. Link: http://lkml.kernel.org/r/20181204085657.20472-2-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: David Hildenbrand Acked-by: Michal Hocko Cc: Dave Hansen Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 4 ++-- mm/memory_hotplug.c | 2 +- mm/sparse.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 9e4d9b9b93ea..8ed6e09a5c0c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -333,8 +333,8 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); -extern int sparse_add_one_section(struct pglist_data *pgdat, - unsigned long start_pfn, struct vmem_altmap *altmap); +extern int sparse_add_one_section(int nid, unsigned long start_pfn, + struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0718cf7427b2..5f15f9c04c4a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -253,7 +253,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, if (pfn_valid(phys_start_pfn)) return -EEXIST; - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); + ret = sparse_add_one_section(nid, phys_start_pfn, altmap); if (ret < 0) return ret; diff --git a/mm/sparse.c b/mm/sparse.c index 7323a03fbc39..7ea5dc6c6b19 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -678,8 +678,8 @@ static void free_map_bootmem(struct page *memmap) * set. If this is <=0, then that means that the passed-in * map was not consumed and must be freed. */ -int __meminit sparse_add_one_section(struct pglist_data *pgdat, - unsigned long start_pfn, struct vmem_altmap *altmap) +int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, + struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct mem_section *ms; @@ -691,11 +691,11 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, * no locking for this, because it does its own * plus, it does a kmalloc */ - ret = sparse_index_init(section_nr, pgdat->node_id); + ret = sparse_index_init(section_nr, nid); if (ret < 0 && ret != -EEXIST) return ret; ret = 0; - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap); + memmap = kmalloc_section_memmap(section_nr, nid, altmap); if (!memmap) return -ENOMEM; usemap = __kmalloc_section_usemap(); -- cgit v1.2.3 From 0614ce9776b037b6a08a9adcbfcc382c0053b178 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 28 Dec 2018 00:38:13 -0800 Subject: include/linux/memory_hotplug.h: remove duplicate declaration of offline_pages() offline_pages() is already declared in this file. Just remove the duplicated one. Link: http://lkml.kernel.org/r/20181205031357.24769-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8ed6e09a5c0c..07da5c6c5ba0 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -331,7 +331,6 @@ extern int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); -extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern int sparse_add_one_section(int nid, unsigned long start_pfn, struct vmem_altmap *altmap); -- cgit v1.2.3 From b13bc35193d9e7a8c050a24928ca5c9e7c9a009b Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Fri, 1 Feb 2019 14:20:51 -0800 Subject: mm/hotplug: invalid PFNs from pfn_to_online_page() On an arm64 ThunderX2 server, the first kmemleak scan would crash [1] with CONFIG_DEBUG_VM_PGFLAGS=y due to page_to_nid() found a pfn that is not directly mapped (MEMBLOCK_NOMAP). Hence, the page->flags is uninitialized. This is due to the commit 9f1eb38e0e11 ("mm, kmemleak: little optimization while scanning") starts to use pfn_to_online_page() instead of pfn_valid(). However, in the CONFIG_MEMORY_HOTPLUG=y case, pfn_to_online_page() does not call memblock_is_map_memory() while pfn_valid() does. Historically, the commit 68709f45385a ("arm64: only consider memblocks with NOMAP cleared for linear mapping") causes pages marked as nomap being no long reassigned to the new zone in memmap_init_zone() by calling __init_single_page(). Since the commit 2d070eab2e82 ("mm: consider zone which is not fully populated to have holes") introduced pfn_to_online_page() and was designed to return a valid pfn only, but it is clearly broken on arm64. Therefore, let pfn_to_online_page() call pfn_valid_within(), so it can handle nomap thanks to the commit f52bb98f5ade ("arm64: mm: always enable CONFIG_HOLES_IN_ZONE"), while it will be optimized away on architectures where have no HOLES_IN_ZONE. [1] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000006 Mem abort info: ESR = 0x96000005 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000005 CM = 0, WnR = 0 Internal error: Oops: 96000005 [#1] SMP CPU: 60 PID: 1408 Comm: kmemleak Not tainted 5.0.0-rc2+ #8 pstate: 60400009 (nZCv daif +PAN -UAO) pc : page_mapping+0x24/0x144 lr : __dump_page+0x34/0x3dc sp : ffff00003a5cfd10 x29: ffff00003a5cfd10 x28: 000000000000802f x27: 0000000000000000 x26: 0000000000277d00 x25: ffff000010791f56 x24: ffff7fe000000000 x23: ffff000010772f8b x22: ffff00001125f670 x21: ffff000011311000 x20: ffff000010772f8b x19: fffffffffffffffe x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: ffff802698b19600 x13: ffff802698b1a200 x12: ffff802698b16f00 x11: ffff802698b1a400 x10: 0000000000001400 x9 : 0000000000000001 x8 : ffff00001121a000 x7 : 0000000000000000 x6 : ffff0000102c53b8 x5 : 0000000000000000 x4 : 0000000000000003 x3 : 0000000000000100 x2 : 0000000000000000 x1 : ffff000010772f8b x0 : ffffffffffffffff Process kmemleak (pid: 1408, stack limit = 0x(____ptrval____)) Call trace: page_mapping+0x24/0x144 __dump_page+0x34/0x3dc dump_page+0x28/0x4c kmemleak_scan+0x4ac/0x680 kmemleak_scan_thread+0xb4/0xdc kthread+0x12c/0x13c ret_from_fork+0x10/0x18 Code: d503201f f9400660 36000040 d1000413 (f9400661) ---[ end trace 4d4bd7f573490c8e ]--- Kernel panic - not syncing: Fatal exception SMP: stopping secondary CPUs Kernel Offset: disabled CPU features: 0x002,20000c38 Memory Limit: none ---[ end Kernel panic - not syncing: Fatal exception ]--- Link: http://lkml.kernel.org/r/20190122132916.28360-1-cai@lca.pw Fixes: 9f1eb38e0e11 ("mm, kmemleak: little optimization while scanning") Signed-off-by: Qian Cai Acked-by: Michal Hocko Cc: Oscar Salvador Cc: Catalin Marinas Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 07da5c6c5ba0..368267c1b71b 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -21,14 +21,16 @@ struct vmem_altmap; * walkers which rely on the fully initialized page->flags and others * should use this rather than pfn_valid && pfn_to_page */ -#define pfn_to_online_page(pfn) \ -({ \ - struct page *___page = NULL; \ - unsigned long ___nr = pfn_to_section_nr(pfn); \ - \ - if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\ - ___page = pfn_to_page(pfn); \ - ___page; \ +#define pfn_to_online_page(pfn) \ +({ \ + struct page *___page = NULL; \ + unsigned long ___pfn = pfn; \ + unsigned long ___nr = pfn_to_section_nr(___pfn); \ + \ + if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \ + pfn_valid_within(___pfn)) \ + ___page = pfn_to_page(___pfn); \ + ___page; \ }) /* -- cgit v1.2.3 From 357b4da50a62e2fd70eacee21cdbd22d4c7a7b60 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 14 Feb 2019 11:42:39 +0100 Subject: x86: respect memory size limiting via mem= parameter When limiting memory size via kernel parameter "mem=" this should be respected even in case of memory made accessible via a PCI card. Today this kind of memory won't be made usable in initial memory setup as the memory won't be visible in E820 map, but it might be added when adding PCI devices due to corresponding ACPI table entries. Not respecting "mem=" can be corrected by adding a global max_mem_size variable set by parse_memopt() which will result in rejecting adding memory areas resulting in a memory size above the allowed limit. Signed-off-by: Juergen Gross Acked-by: Ingo Molnar Reviewed-by: William Kucharski Signed-off-by: Juergen Gross --- arch/x86/kernel/e820.c | 5 +++++ include/linux/memory_hotplug.h | 2 ++ mm/memory_hotplug.c | 6 ++++++ 3 files changed, 13 insertions(+) (limited to 'include/linux/memory_hotplug.h') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 50895c2f937d..e67513e2cbbb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -881,6 +882,10 @@ static int __init parse_memopt(char *p) e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); +#ifdef CONFIG_MEMORY_HOTPLUG + max_mem_size = mem_size; +#endif + return 0; } early_param("mem", parse_memopt); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 368267c1b71b..cfd12078172a 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -100,6 +100,8 @@ extern void __online_page_free(struct page *page); extern int try_online_node(int nid); +extern u64 max_mem_size; + extern bool memhp_auto_online; /* If movable_node boot option specified */ extern bool movable_node_enabled; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 124e794867c5..519f9db063ff 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -96,10 +96,16 @@ void mem_hotplug_done(void) cpus_read_unlock(); } +u64 max_mem_size = U64_MAX; + /* add this memory to iomem resource */ static struct resource *register_memory_resource(u64 start, u64 size) { struct resource *res, *conflict; + + if (start + size > max_mem_size) + return ERR_PTR(-E2BIG); + res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (!res) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From a9cd410a3d296846a8125aa43d97a573a354c472 Mon Sep 17 00:00:00 2001 From: Arun KS Date: Tue, 5 Mar 2019 15:42:14 -0800 Subject: mm/page_alloc.c: memory hotplug: free pages as higher order When freeing pages are done with higher order, time spent on coalescing pages by buddy allocator can be reduced. With section size of 256MB, hot add latency of a single section shows improvement from 50-60 ms to less than 1 ms, hence improving the hot add latency by 60 times. Modify external providers of online callback to align with the change. [arunks@codeaurora.org: v11] Link: http://lkml.kernel.org/r/1547792588-18032-1-git-send-email-arunks@codeaurora.org [akpm@linux-foundation.org: remove unused local, per Arun] [akpm@linux-foundation.org: avoid return of void-returning __free_pages_core(), per Oscar] [akpm@linux-foundation.org: fix it for mm-convert-totalram_pages-and-totalhigh_pages-variables-to-atomic.patch] [arunks@codeaurora.org: v8] Link: http://lkml.kernel.org/r/1547032395-24582-1-git-send-email-arunks@codeaurora.org [arunks@codeaurora.org: v9] Link: http://lkml.kernel.org/r/1547098543-26452-1-git-send-email-arunks@codeaurora.org Link: http://lkml.kernel.org/r/1538727006-5727-1-git-send-email-arunks@codeaurora.org Signed-off-by: Arun KS Reviewed-by: Andrew Morton Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Reviewed-by: Alexander Duyck Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Dan Williams Cc: Vlastimil Babka Cc: Joonsoo Kim Cc: Greg Kroah-Hartman Cc: Mathieu Malaterre Cc: "Kirill A. Shutemov" Cc: Souptick Joarder Cc: Mel Gorman Cc: Aaron Lu Cc: Srivatsa Vaddagiri Cc: Vinayak Menon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hv/hv_balloon.c | 7 ++++--- drivers/xen/balloon.c | 15 ++++++++++----- include/linux/memory_hotplug.h | 2 +- mm/internal.h | 1 + mm/memory_hotplug.c | 37 +++++++++++++++++++++++++------------ mm/page_alloc.c | 8 ++++---- 6 files changed, 45 insertions(+), 25 deletions(-) (limited to 'include/linux/memory_hotplug.h') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 7c6349a50ef1..a50b7624b2a3 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -771,7 +771,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, } } -static void hv_online_page(struct page *pg) +static void hv_online_page(struct page *pg, unsigned int order) { struct hv_hotadd_state *has; unsigned long flags; @@ -780,10 +780,11 @@ static void hv_online_page(struct page *pg) spin_lock_irqsave(&dm_device.ha_lock, flags); list_for_each_entry(has, &dm_device.ha_region_list, list) { /* The page belongs to a different HAS. */ - if ((pfn < has->start_pfn) || (pfn >= has->end_pfn)) + if ((pfn < has->start_pfn) || + (pfn + (1UL << order) > has->end_pfn)) continue; - hv_page_online_one(has, pg); + hv_bring_pgs_online(has, pfn, 1UL << order); break; } spin_unlock_irqrestore(&dm_device.ha_lock, flags); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index ceb5048de9a7..d107447c47de 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -369,14 +369,19 @@ static enum bp_state reserve_additional_memory(void) return BP_ECANCELED; } -static void xen_online_page(struct page *page) +static void xen_online_page(struct page *page, unsigned int order) { - __online_page_set_limits(page); + unsigned long i, size = (1 << order); + unsigned long start_pfn = page_to_pfn(page); + struct page *p; + pr_debug("Online %lu pages starting at pfn 0x%lx\n", size, start_pfn); mutex_lock(&balloon_mutex); - - __balloon_append(page); - + for (i = 0; i < size; i++) { + p = pfn_to_page(start_pfn + i); + __online_page_set_limits(p); + __balloon_append(p); + } mutex_unlock(&balloon_mutex); } diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 368267c1b71b..52869d6d38b3 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -89,7 +89,7 @@ extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn, unsigned long *valid_start, unsigned long *valid_end); extern void __offline_isolated_pages(unsigned long, unsigned long); -typedef void (*online_page_callback_t)(struct page *page); +typedef void (*online_page_callback_t)(struct page *page, unsigned int order); extern int set_online_page_callback(online_page_callback_t callback); extern int restore_online_page_callback(online_page_callback_t callback); diff --git a/mm/internal.h b/mm/internal.h index f4a7bb02decf..536bc2a839b9 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -163,6 +163,7 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn, extern int __isolate_free_page(struct page *page, unsigned int order); extern void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order); +extern void __free_pages_core(struct page *page, unsigned int order); extern void prep_compound_page(struct page *page, unsigned int order); extern void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1ad28323fb9f..4f07c8ddfdd7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -47,7 +47,7 @@ * and restore_online_page_callback() for generic callback restore. */ -static void generic_online_page(struct page *page); +static void generic_online_page(struct page *page, unsigned int order); static online_page_callback_t online_page_callback = generic_online_page; static DEFINE_MUTEX(online_page_callback_lock); @@ -656,26 +656,39 @@ void __online_page_free(struct page *page) } EXPORT_SYMBOL_GPL(__online_page_free); -static void generic_online_page(struct page *page) +static void generic_online_page(struct page *page, unsigned int order) { - __online_page_set_limits(page); - __online_page_increment_counters(page); - __online_page_free(page); + __free_pages_core(page, order); + totalram_pages_add(1UL << order); +#ifdef CONFIG_HIGHMEM + if (PageHighMem(page)) + totalhigh_pages_add(1UL << order); +#endif +} + +static int online_pages_blocks(unsigned long start, unsigned long nr_pages) +{ + unsigned long end = start + nr_pages; + int order, onlined_pages = 0; + + while (start < end) { + order = min(MAX_ORDER - 1, + get_order(PFN_PHYS(end) - PFN_PHYS(start))); + (*online_page_callback)(pfn_to_page(start), order); + + onlined_pages += (1UL << order); + start += (1UL << order); + } + return onlined_pages; } static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, void *arg) { - unsigned long i; unsigned long onlined_pages = *(unsigned long *)arg; - struct page *page; if (PageReserved(pfn_to_page(start_pfn))) - for (i = 0; i < nr_pages; i++) { - page = pfn_to_page(start_pfn + i); - (*online_page_callback)(page); - onlined_pages++; - } + onlined_pages += online_pages_blocks(start_pfn, nr_pages); online_mem_sections(start_pfn, start_pfn + nr_pages); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 10d0f2ed9f69..5361bd078493 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1303,7 +1303,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) local_irq_restore(flags); } -static void __init __free_pages_boot_core(struct page *page, unsigned int order) +void __free_pages_core(struct page *page, unsigned int order) { unsigned int nr_pages = 1 << order; struct page *p = page; @@ -1382,7 +1382,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, { if (early_page_uninitialised(pfn)) return; - return __free_pages_boot_core(page, order); + __free_pages_core(page, order); } /* @@ -1472,14 +1472,14 @@ static void __init deferred_free_range(unsigned long pfn, if (nr_pages == pageblock_nr_pages && (pfn & (pageblock_nr_pages - 1)) == 0) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); - __free_pages_boot_core(page, pageblock_order); + __free_pages_core(page, pageblock_order); return; } for (i = 0; i < nr_pages; i++, page++, pfn++) { if ((pfn & (pageblock_nr_pages - 1)) == 0) set_pageblock_migratetype(page, MIGRATE_MOVABLE); - __free_pages_boot_core(page, 0); + __free_pages_core(page, 0); } } -- cgit v1.2.3