From 4c4b7f9ba9486c565aead99a198ceeef73ae81f6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:57:06 -0700 Subject: mm/memory_hotplug: remove memory block devices before arch_remove_memory() Let's factor out removing of memory block devices, which is only necessary for memory added via add_memory() and friends that created memory block devices. Remove the devices before calling arch_remove_memory(). This finishes factoring out memory block device handling from arch_add_memory() and arch_remove_memory(). Link: http://lkml.kernel.org/r/20190527111152.16324-10-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: David Hildenbrand Cc: "mike.travis@hpe.com" Cc: Andrew Banman Cc: Ingo Molnar Cc: Alex Deucher Cc: "David S. Miller" Cc: Mark Brown Cc: Chris Wilson Cc: Oscar Salvador Cc: Jonathan Cameron Cc: Arun KS Cc: Mathieu Malaterre Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Chintan Pandya Cc: Christophe Leroy Cc: Dave Hansen Cc: Fenghua Yu Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Joonsoo Kim Cc: Jun Yao Cc: "Kirill A. Shutemov" Cc: Logan Gunthorpe Cc: Mark Rutland Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Qian Cai Cc: Rich Felker Cc: Rob Herring Cc: Robin Murphy Cc: Thomas Gleixner Cc: Tony Luck Cc: Vasily Gorbik Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index aa878fbcf705..0b0f38c2c7cd 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -802,9 +802,10 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) return 0; } -/* unregister memory section under all nodes that it spans */ -int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, - unsigned long phys_index) +/* + * Unregister memory block device under all nodes that it spans. + */ +int unregister_memory_block_under_nodes(struct memory_block *mem_blk) { NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); unsigned long pfn, sect_start_pfn, sect_end_pfn; @@ -817,8 +818,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, return -ENOMEM; nodes_clear(*unlinked_nodes); - sect_start_pfn = section_nr_to_pfn(phys_index); - sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); + sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { int nid; -- cgit v1.2.3 From a31b264c2b415b29660da0bc2ba291a98629ce51 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:57:12 -0700 Subject: mm/memory_hotplug: make unregister_memory_block_under_nodes() never fail We really don't want anything during memory hotunplug to fail. We always pass a valid memory block device, that check can go. Avoid allocating memory and eventually failing. As we are always called under lock, we can use a static piece of memory. This avoids having to put the structure onto the stack, having to guess about the stack size of callers. Patch inspired by a patch from Oscar Salvador. In the future, there might be no need to iterate over nodes at all. mem->nid should tell us exactly what to remove. Memory block devices with mixed nodes (added during boot) should properly fenced off and never removed. Link: http://lkml.kernel.org/r/20190527111152.16324-11-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Wei Yang Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Alex Deucher Cc: "David S. Miller" Cc: Mark Brown Cc: Chris Wilson Cc: David Hildenbrand Cc: Jonathan Cameron Cc: Andrew Banman Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Arun KS Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Chintan Pandya Cc: Christophe Leroy Cc: Dan Williams Cc: Dave Hansen Cc: Fenghua Yu Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Joonsoo Kim Cc: Jun Yao Cc: "Kirill A. Shutemov" Cc: Logan Gunthorpe Cc: Mark Rutland Cc: Masahiro Yamada Cc: Mathieu Malaterre Cc: Michael Ellerman Cc: Mike Rapoport Cc: "mike.travis@hpe.com" Cc: Nicholas Piggin Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Qian Cai Cc: Rich Felker Cc: Rob Herring Cc: Robin Murphy Cc: Thomas Gleixner Cc: Tony Luck Cc: Vasily Gorbik Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 18 +++++------------- include/linux/node.h | 5 ++--- 2 files changed, 7 insertions(+), 16 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index 0b0f38c2c7cd..beec80649b33 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -804,20 +804,14 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) /* * Unregister memory block device under all nodes that it spans. + * Has to be called with mem_sysfs_mutex held (due to unlinked_nodes). */ -int unregister_memory_block_under_nodes(struct memory_block *mem_blk) +void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { - NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); unsigned long pfn, sect_start_pfn, sect_end_pfn; + static nodemask_t unlinked_nodes; - if (!mem_blk) { - NODEMASK_FREE(unlinked_nodes); - return -EFAULT; - } - if (!unlinked_nodes) - return -ENOMEM; - nodes_clear(*unlinked_nodes); - + nodes_clear(unlinked_nodes); sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { @@ -828,15 +822,13 @@ int unregister_memory_block_under_nodes(struct memory_block *mem_blk) continue; if (!node_online(nid)) continue; - if (node_test_and_set(nid, *unlinked_nodes)) + if (node_test_and_set(nid, unlinked_nodes)) continue; sysfs_remove_link(&node_devices[nid]->dev.kobj, kobject_name(&mem_blk->dev.kobj)); sysfs_remove_link(&mem_blk->dev.kobj, kobject_name(&node_devices[nid]->dev.kobj)); } - NODEMASK_FREE(unlinked_nodes); - return 0; } int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) diff --git a/include/linux/node.h b/include/linux/node.h index 02a29e71b175..548c226966a2 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -139,7 +139,7 @@ extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); extern int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg); -extern int unregister_memory_block_under_nodes(struct memory_block *mem_blk); +extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, @@ -175,9 +175,8 @@ static inline int register_mem_sect_under_node(struct memory_block *mem_blk, { return 0; } -static inline int unregister_memory_block_under_nodes(struct memory_block *mem_blk) +static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { - return 0; } static inline void register_hugetlbfs_with_node(node_registration_func_t reg, -- cgit v1.2.3 From 8d595c4c0f768f19db043d378b22e98405f9fd47 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:57:43 -0700 Subject: mm: make register_mem_sect_under_node() static It is only used internally. Link: http://lkml.kernel.org/r/20190614100114.311-4-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Keith Busch Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 3 ++- include/linux/node.h | 7 ------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'drivers/base/node.c') diff --git a/drivers/base/node.c b/drivers/base/node.c index beec80649b33..27391f1e8f60 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -753,7 +753,8 @@ static int __ref get_nid_for_pfn(unsigned long pfn) } /* register memory section under specified node if it spans that node */ -int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) +static int register_mem_sect_under_node(struct memory_block *mem_blk, + void *arg) { int ret, nid = *(int *)arg; unsigned long pfn, sect_start_pfn, sect_end_pfn; diff --git a/include/linux/node.h b/include/linux/node.h index 548c226966a2..4866f32a02d8 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -137,8 +137,6 @@ static inline int register_one_node(int nid) extern void unregister_one_node(int nid); extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); -extern int register_mem_sect_under_node(struct memory_block *mem_blk, - void *arg); extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, @@ -170,11 +168,6 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) { return 0; } -static inline int register_mem_sect_under_node(struct memory_block *mem_blk, - void *arg) -{ - return 0; -} static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { } -- cgit v1.2.3 From fbcf73ce65827c3d8935f38b832a43153a0c78d1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:57:46 -0700 Subject: mm/memory_hotplug: rename walk_memory_range() and pass start+size instead of pfns walk_memory_range() was once used to iterate over sections. Now, it iterates over memory blocks. Rename the function, fixup the documentation. Also, pass start+size instead of PFNs, which is what most callers already have at hand. (we'll rework link_mem_sections() most probably soon) Follow-up patches will rework, simplify, and move walk_memory_blocks() to drivers/base/memory.c. Note: walk_memory_blocks() only works correctly right now if the start_pfn is aligned to a section start. This is the case right now, but we'll generalize the function in a follow up patch so the semantics match the documentation. [akpm@linux-foundation.org: remove unused variable] Link: http://lkml.kernel.org/r/20190614100114.311-5-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Greg Kroah-Hartman Cc: David Hildenbrand Cc: Rashmica Gupta Cc: Pavel Tatashin Cc: Anshuman Khandual Cc: Michael Neuling Cc: Thomas Gleixner Cc: Oscar Salvador Cc: Michal Hocko Cc: Wei Yang Cc: Juergen Gross Cc: Qian Cai Cc: Arun KS Cc: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/powernv/memtrace.c | 23 +++++++++++------------ drivers/acpi/acpi_memhotplug.c | 19 ++++--------------- drivers/base/node.c | 5 +++-- include/linux/memory_hotplug.h | 2 +- mm/memory_hotplug.c | 24 +++++++++++++----------- 5 files changed, 32 insertions(+), 41 deletions(-) (limited to 'drivers/base/node.c') diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 5e53c1392d3b..eb2e75dac369 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -70,23 +70,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg) /* called with device_hotplug_lock held */ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) { - u64 end_pfn = start_pfn + nr_pages - 1; + const unsigned long start = PFN_PHYS(start_pfn); + const unsigned long size = PFN_PHYS(nr_pages); - if (walk_memory_range(start_pfn, end_pfn, NULL, - check_memblock_online)) + if (walk_memory_blocks(start, size, NULL, check_memblock_online)) return false; - walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE, - change_memblock_state); + walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE, + change_memblock_state); if (offline_pages(start_pfn, nr_pages)) { - walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE, - change_memblock_state); + walk_memory_blocks(start, size, (void *)MEM_ONLINE, + change_memblock_state); return false; } - walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, - change_memblock_state); + walk_memory_blocks(start, size, (void *)MEM_OFFLINE, + change_memblock_state); return true; @@ -242,9 +242,8 @@ static int memtrace_online(void) */ if (!memhp_auto_online) { lock_device_hotplug(); - walk_memory_range(PFN_DOWN(ent->start), - PFN_UP(ent->start + ent->size - 1), - NULL, online_mem_block); + walk_memory_blocks(ent->start, ent->size, NULL, + online_mem_block); unlock_device_hotplug(); } diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index db013dc21c02..e294f44a7850 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -155,16 +155,6 @@ static int acpi_memory_check_device(struct acpi_memory_device *mem_device) return 0; } -static unsigned long acpi_meminfo_start_pfn(struct acpi_memory_info *info) -{ - return PFN_DOWN(info->start_addr); -} - -static unsigned long acpi_meminfo_end_pfn(struct acpi_memory_info *info) -{ - return PFN_UP(info->start_addr + info->length-1); -} - static int acpi_bind_memblk(struct memory_block *mem, void *arg) { return acpi_bind_one(&mem->dev, arg); @@ -173,9 +163,8 @@ static int acpi_bind_memblk(struct memory_block *mem, void *arg) static int acpi_bind_memory_blocks(struct acpi_memory_info *info, struct acpi_device *adev) { - return walk_memory_range(acpi_meminfo_start_pfn(info), - acpi_meminfo_end_pfn(info), adev, - acpi_bind_memblk); + return walk_memory_blocks(info->start_addr, info->length, adev, + acpi_bind_memblk); } static int acpi_unbind_memblk(struct memory_block *mem, void *arg) @@ -186,8 +175,8 @@ static int acpi_unbind_memblk(struct memory_block *mem, void *arg) static void acpi_unbind_memory_blocks(struct acpi_memory_info *info) { - walk_memory_range(acpi_meminfo_start_pfn(info), - acpi_meminfo_end_pfn(info), NULL, acpi_unbind_memblk); + walk_memory_blocks(info->start_addr, info->length, NULL, + acpi_unbind_memblk); } static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) diff --git a/drivers/base/node.c b/drivers/base/node.c index 27391f1e8f60..75b7e6f6535b 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -834,8 +834,9 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk) int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) { - return walk_memory_range(start_pfn, end_pfn, (void *)&nid, - register_mem_sect_under_node); + return walk_memory_blocks(PFN_PHYS(start_pfn), + PFN_PHYS(end_pfn - start_pfn), (void *)&nid, + register_mem_sect_under_node); } #ifdef CONFIG_HUGETLBFS diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 79e0add6a597..d9fffc34949f 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -340,7 +340,7 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ extern void __ref free_area_init_core_hotplug(int nid); -extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, +extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, int (*func)(struct memory_block *, void *)); extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index d1d0ceaaca88..b3ef84e408fa 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1124,8 +1124,7 @@ int __ref add_memory_resource(int nid, struct resource *res) /* online pages if requested */ if (memhp_auto_online) - walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), - NULL, online_memory_block); + walk_memory_blocks(start, size, NULL, online_memory_block); return ret; error: @@ -1663,20 +1662,24 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages) #endif /* CONFIG_MEMORY_HOTREMOVE */ /** - * walk_memory_range - walks through all mem sections in [start_pfn, end_pfn) - * @start_pfn: start pfn of the memory range - * @end_pfn: end pfn of the memory range + * walk_memory_blocks - walk through all present memory blocks overlapped + * by the range [start, start + size) + * + * @start: start address of the memory range + * @size: size of the memory range * @arg: argument passed to func - * @func: callback for each memory section walked + * @func: callback for each memory block walked * - * This function walks through all present mem sections in range - * [start_pfn, end_pfn) and call func on each mem section. + * This function walks through all present memory blocks overlapped by the + * range [start, start + size), calling func on each memory block. * * Returns the return value of func. */ -int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, +int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, int (*func)(struct memory_block *, void *)) { + const unsigned long start_pfn = PFN_DOWN(start); + const unsigned long end_pfn = PFN_UP(start + size - 1); struct memory_block *mem = NULL; struct mem_section *section; unsigned long pfn, section_nr; @@ -1822,8 +1825,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) * whether all memory blocks in question are offline and return error * if this is not the case. */ - rc = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, - check_memblock_offlined_cb); + rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb); if (rc) goto done; -- cgit v1.2.3