From 514caf23a70fd697fa2ece238b2cd8dcc73fb16f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 14:27:13 +0200 Subject: memremap: replace the altmap_valid field with a PGMAP_ALTMAP_VALID flag Add a flags field to struct dev_pagemap to replace the altmap_valid boolean to be a little more extensible. Also add a pgmap_altmap() helper to find the optional altmap and clean up the code using the altmap using it. Signed-off-by: Christoph Hellwig Reviewed-by: Ira Weiny Reviewed-by: Dan Williams Tested-by: Dan Williams Signed-off-by: Jason Gunthorpe --- mm/memory_hotplug.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e096c987d261..6166ba5a15f3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -557,10 +557,8 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, int sections_to_remove; /* In the ZONE_DEVICE case device driver owns the memory region */ - if (is_dev_zone(zone)) { - if (altmap) - map_offset = vmem_altmap_offset(altmap); - } + if (is_dev_zone(zone)) + map_offset = vmem_altmap_offset(altmap); clear_zone_contiguous(zone); -- cgit v1.2.3 From eca499ab3749a4537dee77ffead47a1a2c0dee19 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 16 Jul 2019 16:30:31 -0700 Subject: mm/hotplug: make remove_memory() interface usable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Presently the remove_memory() interface is inherently broken. It tries to remove memory but panics if some memory is not offline. The problem is that it is impossible to ensure that all memory blocks are offline as this function also takes lock_device_hotplug that is required to change memory state via sysfs. So, between calling this function and offlining all memory blocks there is always a window when lock_device_hotplug is released, and therefore, there is always a chance for a panic during this window. Make this interface to return an error if memory removal fails. This way it is safe to call this function without panicking machine, and also makes it symmetric to add_memory() which already returns an error. Link: http://lkml.kernel.org/r/20190517215438.6487-3-pasha.tatashin@soleen.com Signed-off-by: Pavel Tatashin Reviewed-by: David Hildenbrand Acked-by: Michal Hocko Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: Dave Hansen Cc: Dave Jiang Cc: Fengguang Wu Cc: Huang Ying Cc: James Morris Cc: Jérôme Glisse Cc: Keith Busch Cc: Ross Zwisler Cc: Sasha Levin Cc: Takashi Iwai Cc: Tom Lendacky Cc: Vishal Verma Cc: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 8 ++++-- mm/memory_hotplug.c | 64 ++++++++++++++++++++++++++++-------------- 2 files changed, 49 insertions(+), 23 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index ae892eef8b82..988fde33cd7f 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -324,7 +324,7 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {} extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); extern void try_offline_node(int nid); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); -extern void remove_memory(int nid, u64 start, u64 size); +extern int remove_memory(int nid, u64 start, u64 size); extern void __remove_memory(int nid, u64 start, u64 size); #else @@ -341,7 +341,11 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages) return -EINVAL; } -static inline void remove_memory(int nid, u64 start, u64 size) {} +static inline int remove_memory(int nid, u64 start, u64 size) +{ + return -EBUSY; +} + static inline void __remove_memory(int nid, u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6166ba5a15f3..4ebe696138e8 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1734,9 +1734,10 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1; pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n", &beginpa, &endpa); - } - return ret; + return -EBUSY; + } + return 0; } static int check_cpu_on_node(pg_data_t *pgdat) @@ -1819,19 +1820,9 @@ static void __release_memory_resource(resource_size_t start, } } -/** - * remove_memory - * @nid: the node ID - * @start: physical address of the region to remove - * @size: size of the region to remove - * - * NOTE: The caller must call lock_device_hotplug() to serialize hotplug - * and online/offline operations before this call, as required by - * try_offline_node(). - */ -void __ref __remove_memory(int nid, u64 start, u64 size) +static int __ref try_remove_memory(int nid, u64 start, u64 size) { - int ret; + int rc = 0; BUG_ON(check_hotplug_memory_range(start, size)); @@ -1839,13 +1830,13 @@ void __ref __remove_memory(int nid, u64 start, u64 size) /* * All memory blocks must be offlined before removing memory. Check - * whether all memory blocks in question are offline and trigger a BUG() + * whether all memory blocks in question are offline and return error * if this is not the case. */ - ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, - check_memblock_offlined_cb); - if (ret) - BUG(); + rc = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, + check_memblock_offlined_cb); + if (rc) + goto done; /* remove memmap entry */ firmware_map_remove(start, start + size, "System RAM"); @@ -1857,14 +1848,45 @@ void __ref __remove_memory(int nid, u64 start, u64 size) try_offline_node(nid); +done: mem_hotplug_done(); + return rc; } -void remove_memory(int nid, u64 start, u64 size) +/** + * remove_memory + * @nid: the node ID + * @start: physical address of the region to remove + * @size: size of the region to remove + * + * NOTE: The caller must call lock_device_hotplug() to serialize hotplug + * and online/offline operations before this call, as required by + * try_offline_node(). + */ +void __remove_memory(int nid, u64 start, u64 size) +{ + + /* + * trigger BUG() is some memory is not offlined prior to calling this + * function + */ + if (try_remove_memory(nid, start, size)) + BUG(); +} + +/* + * Remove memory if every memory block is offline, otherwise return -EBUSY is + * some memory is not offline + */ +int remove_memory(int nid, u64 start, u64 size) { + int rc; + lock_device_hotplug(); - __remove_memory(nid, start, size); + rc = try_remove_memory(nid, start, size); unlock_device_hotplug(); + + return rc; } EXPORT_SYMBOL_GPL(remove_memory); #endif /* CONFIG_MEMORY_HOTREMOVE */ -- cgit v1.2.3 From cec3ebd083d4e8d161d0b18894c78e3311bcd026 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:56:25 -0700 Subject: mm/memory_hotplug: simplify and fix check_hotplug_memory_range() Patch series "mm/memory_hotplug: Factor out memory block devicehandling", v3. We only want memory block devices for memory to be onlined/offlined (add/remove from the buddy). This is required so user space can online/offline memory and kdump gets notified about newly onlined memory. Let's factor out creation/removal of memory block devices. This helps to further cleanup arch_add_memory/arch_remove_memory() and to make implementation of new features easier - especially sub-section memory hot add from Dan. Anshuman Khandual is currently working on arch_remove_memory(). I added a temporary solution via "arm64/mm: Add temporary arch_remove_memory() implementation", that is sufficient as a firsts tep in the context of this series. (we don't cleanup page tables in case anything goes wrong already) Did a quick sanity test with DIMM plug/unplug, making sure all devices and sysfs links properly get added/removed. Compile tested on s390x and x86-64. This patch (of 11): By converting start and size to page granularity, we actually ignore unaligned parts within a page instead of properly bailing out with an error. Link: http://lkml.kernel.org/r/20190527111152.16324-2-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Reviewed-by: Wei Yang Reviewed-by: Pavel Tatashin Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: David Hildenbrand Cc: Qian Cai Cc: Arun KS Cc: Mathieu Malaterre Cc: Alex Deucher Cc: Andrew Banman Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Chintan Pandya Cc: Christophe Leroy Cc: Chris Wilson Cc: Dave Hansen Cc: "David S. Miller" Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Joonsoo Kim Cc: Jun Yao Cc: "Kirill A. Shutemov" Cc: Logan Gunthorpe Cc: Mark Brown Cc: Mark Rutland Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Mike Rapoport Cc: "mike.travis@hpe.com" Cc: Nicholas Piggin Cc: Paul Mackerras Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Rich Felker Cc: Rob Herring Cc: Robin Murphy Cc: Thomas Gleixner Cc: Tony Luck Cc: Vasily Gorbik Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 4ebe696138e8..a8c25fd85ee3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1049,16 +1049,11 @@ int try_online_node(int nid) static int check_hotplug_memory_range(u64 start, u64 size) { - unsigned long block_sz = memory_block_size_bytes(); - u64 block_nr_pages = block_sz >> PAGE_SHIFT; - u64 nr_pages = size >> PAGE_SHIFT; - u64 start_pfn = PFN_DOWN(start); - /* memory range must be block size aligned */ - if (!nr_pages || !IS_ALIGNED(start_pfn, block_nr_pages) || - !IS_ALIGNED(nr_pages, block_nr_pages)) { + if (!size || !IS_ALIGNED(start, memory_block_size_bytes()) || + !IS_ALIGNED(size, memory_block_size_bytes())) { pr_err("Block size [%#lx] unaligned hotplug range: start %#llx, size %#llx", - block_sz, start, size); + memory_block_size_bytes(), start, size); return -EINVAL; } -- cgit v1.2.3 From 80ec922dbd87fd38d15719c86a94457204648aeb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:56:51 -0700 Subject: mm/memory_hotplug: allow arch_remove_memory() without CONFIG_MEMORY_HOTREMOVE We want to improve error handling while adding memory by allowing to use arch_remove_memory() and __remove_pages() even if CONFIG_MEMORY_HOTREMOVE is not set to e.g., implement something like: arch_add_memory() rc = do_something(); if (rc) { arch_remove_memory(); } We won't get rid of CONFIG_MEMORY_HOTREMOVE for now, as it will require quite some dependencies for memory offlining. Link: http://lkml.kernel.org/r/20190527111152.16324-7-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Pavel Tatashin Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Rich Felker Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Michal Hocko Cc: David Hildenbrand Cc: Oscar Salvador Cc: "Kirill A. Shutemov" Cc: Alex Deucher Cc: "David S. Miller" Cc: Mark Brown Cc: Chris Wilson Cc: Christophe Leroy Cc: Nicholas Piggin Cc: Vasily Gorbik Cc: Rob Herring Cc: Masahiro Yamada Cc: "mike.travis@hpe.com" Cc: Andrew Banman Cc: Arun KS Cc: Qian Cai Cc: Mathieu Malaterre Cc: Baoquan He Cc: Logan Gunthorpe Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Chintan Pandya Cc: Dan Williams Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Joonsoo Kim Cc: Jun Yao Cc: Mark Rutland Cc: Mike Rapoport Cc: Oscar Salvador Cc: Robin Murphy Cc: Wei Yang Cc: Will Deacon Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmu.c | 2 -- arch/ia64/mm/init.c | 2 -- arch/powerpc/mm/mem.c | 2 -- arch/s390/mm/init.c | 2 -- arch/sh/mm/init.c | 2 -- arch/x86/mm/init_32.c | 2 -- arch/x86/mm/init_64.c | 2 -- drivers/base/memory.c | 2 -- include/linux/memory.h | 2 -- include/linux/memory_hotplug.h | 2 -- mm/memory_hotplug.c | 2 -- mm/sparse.c | 6 ------ 12 files changed, 28 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a21fa7e1167d..750a69dde39b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1074,7 +1074,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, restrictions); } -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -1093,4 +1092,3 @@ void arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif -#endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index d28e29103bdb..aae75fd7b810 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -681,7 +681,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return ret; } -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -693,4 +692,3 @@ void arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif -#endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 26a8da3723bb..9259337d7374 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -125,7 +125,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, return __add_pages(nid, start_pfn, nr_pages, restrictions); } -#ifdef CONFIG_MEMORY_HOTREMOVE void __ref arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -151,7 +150,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, pr_warn("Hash collision while resizing HPT\n"); } #endif -#endif /* CONFIG_MEMORY_HOTPLUG */ #ifndef CONFIG_NEED_MULTIPLE_NODES void __init mem_topology_setup(void) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 5b1ec2f532e0..4e5bbe328594 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -286,7 +286,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return rc; } -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -298,5 +297,4 @@ void arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); vmem_remove_mapping(start, size); } -#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 13c6a6bb5fd9..dfdbaa50946e 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -429,7 +429,6 @@ int memory_add_physaddr_to_nid(u64 addr) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -440,5 +439,4 @@ void arch_remove_memory(int nid, u64 start, u64 size, zone = page_zone(pfn_to_page(start_pfn)); __remove_pages(zone, start_pfn, nr_pages, altmap); } -#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f265a4316179..4068abb9427f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -860,7 +860,6 @@ int arch_add_memory(int nid, u64 start, u64 size, return __add_pages(nid, start_pfn, nr_pages, restrictions); } -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -872,7 +871,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif -#endif int kernel_set_to_readonly __read_mostly; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 08bbf648827b..5a289a2ab108 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1198,7 +1198,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, remove_pagetable(start, end, false, altmap); } -#ifdef CONFIG_MEMORY_HOTREMOVE static void __meminit kernel_physical_mapping_remove(unsigned long start, unsigned long end) { @@ -1219,7 +1218,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); } -#endif #endif /* CONFIG_MEMORY_HOTPLUG */ static struct kcore_list kcore_vsyscall; diff --git a/drivers/base/memory.c b/drivers/base/memory.c index e0aa7f9abb36..92459d6f12be 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -723,7 +723,6 @@ out: return ret; } -#ifdef CONFIG_MEMORY_HOTREMOVE static void unregister_memory(struct memory_block *memory) { @@ -762,7 +761,6 @@ void unregister_memory_section(struct mem_section *section) out_unlock: mutex_unlock(&mem_sysfs_mutex); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ /* return true if the memory block is offlined, otherwise, return false */ bool is_memblock_offlined(struct memory_block *mem) diff --git a/include/linux/memory.h b/include/linux/memory.h index e1dc1bb2b787..474c7c60c8f2 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -112,9 +112,7 @@ extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); int hotplug_memory_register(int nid, struct mem_section *section); -#ifdef CONFIG_MEMORY_HOTREMOVE extern void unregister_memory_section(struct mem_section *); -#endif extern int memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); extern int memory_isolate_notify(unsigned long val, void *v); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 988fde33cd7f..87bf9c4a889e 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -123,12 +123,10 @@ static inline bool movable_node_is_enabled(void) return movable_node_enabled; } -#ifdef CONFIG_MEMORY_HOTREMOVE extern void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap); extern void __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); -#endif /* CONFIG_MEMORY_HOTREMOVE */ /* * Do we want sysfs memblock files created. This will allow userspace to online diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a8c25fd85ee3..bc11888d5d7e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -318,7 +318,6 @@ out: return err; } -#ifdef CONFIG_MEMORY_HOTREMOVE /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, @@ -580,7 +579,6 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, set_zone_contiguous(zone); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ int set_online_page_callback(online_page_callback_t callback) { diff --git a/mm/sparse.c b/mm/sparse.c index fd13166949b5..d1d5e05f5b8d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -604,7 +604,6 @@ static void __kfree_section_memmap(struct page *memmap, vmemmap_free(start, end, altmap); } -#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) { unsigned long start = (unsigned long)memmap; @@ -612,7 +611,6 @@ static void free_map_bootmem(struct page *memmap) vmemmap_free(start, end, NULL); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #else static struct page *__kmalloc_section_memmap(void) { @@ -651,7 +649,6 @@ static void __kfree_section_memmap(struct page *memmap, get_order(sizeof(struct page) * PAGES_PER_SECTION)); } -#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) { unsigned long maps_section_nr, removing_section_nr, i; @@ -681,7 +678,6 @@ static void free_map_bootmem(struct page *memmap) put_page_bootmem(page); } } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */ /** @@ -746,7 +742,6 @@ out: return ret; } -#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_FAILURE static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) { @@ -823,5 +818,4 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, PAGES_PER_SECTION - map_offset); free_section_usemap(memmap, usemap, altmap); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit v1.2.3 From db051a0dac13db24d58470d75cee0ce7c6b031a1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:56:56 -0700 Subject: mm/memory_hotplug: create memory block devices after arch_add_memory() Only memory to be added to the buddy and to be onlined/offlined by user space using /sys/devices/system/memory/... needs (and should have!) memory block devices. Factor out creation of memory block devices. Create all devices after arch_add_memory() succeeded. We can later drop the want_memblock parameter, because it is now effectively stale. Only after memory block devices have been added, memory can be onlined by user space. This implies, that memory is not visible to user space at all before arch_add_memory() succeeded. While at it - use WARN_ON_ONCE instead of BUG_ON in moved unregister_memory() - introduce find_memory_block_by_id() to search via block id - Use find_memory_block_by_id() in init_memory_block() to catch duplicates Link: http://lkml.kernel.org/r/20190527111152.16324-8-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Pavel Tatashin Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: David Hildenbrand Cc: "mike.travis@hpe.com" Cc: Ingo Molnar Cc: Andrew Banman Cc: Oscar Salvador Cc: Qian Cai Cc: Wei Yang Cc: Arun KS Cc: Mathieu Malaterre Cc: Alex Deucher Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Chintan Pandya Cc: Christophe Leroy Cc: Chris Wilson Cc: Dan Williams Cc: Dave Hansen Cc: "David S. Miller" Cc: Fenghua Yu Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Jonathan Cameron Cc: Joonsoo Kim Cc: Jun Yao Cc: "Kirill A. Shutemov" Cc: Logan Gunthorpe Cc: Mark Brown Cc: Mark Rutland Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rich Felker Cc: Rob Herring Cc: Robin Murphy Cc: Thomas Gleixner Cc: Tony Luck Cc: Vasily Gorbik Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 82 +++++++++++++++++++++++++++++++++----------------- include/linux/memory.h | 2 +- mm/memory_hotplug.c | 15 ++++----- 3 files changed, 63 insertions(+), 36 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 92459d6f12be..18a30c3ac0ef 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -39,6 +39,11 @@ static inline int base_memory_block_id(int section_nr) return section_nr / sections_per_block; } +static inline int pfn_to_block_id(unsigned long pfn) +{ + return base_memory_block_id(pfn_to_section_nr(pfn)); +} + static int memory_subsys_online(struct device *dev); static int memory_subsys_offline(struct device *dev); @@ -582,10 +587,9 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn) * A reference for the returned object is held and the reference for the * hinted object is released. */ -struct memory_block *find_memory_block_hinted(struct mem_section *section, - struct memory_block *hint) +static struct memory_block *find_memory_block_by_id(int block_id, + struct memory_block *hint) { - int block_id = base_memory_block_id(__section_nr(section)); struct device *hintdev = hint ? &hint->dev : NULL; struct device *dev; @@ -597,6 +601,14 @@ struct memory_block *find_memory_block_hinted(struct mem_section *section, return to_memory_block(dev); } +struct memory_block *find_memory_block_hinted(struct mem_section *section, + struct memory_block *hint) +{ + int block_id = base_memory_block_id(__section_nr(section)); + + return find_memory_block_by_id(block_id, hint); +} + /* * For now, we have a linear search to go find the appropriate * memory_block corresponding to a particular phys_index. If @@ -658,6 +670,11 @@ static int init_memory_block(struct memory_block **memory, int block_id, unsigned long start_pfn; int ret = 0; + mem = find_memory_block_by_id(block_id, NULL); + if (mem) { + put_device(&mem->dev); + return -EEXIST; + } mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return -ENOMEM; @@ -695,44 +712,53 @@ static int add_memory_block(int base_section_nr) return 0; } +static void unregister_memory(struct memory_block *memory) +{ + if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) + return; + + /* drop the ref. we got via find_memory_block() */ + put_device(&memory->dev); + device_unregister(&memory->dev); +} + /* - * need an interface for the VM to add new memory regions, - * but without onlining it. + * Create memory block devices for the given memory area. Start and size + * have to be aligned to memory block granularity. Memory block devices + * will be initialized as offline. */ -int hotplug_memory_register(int nid, struct mem_section *section) +int create_memory_block_devices(unsigned long start, unsigned long size) { - int block_id = base_memory_block_id(__section_nr(section)); - int ret = 0; + const int start_block_id = pfn_to_block_id(PFN_DOWN(start)); + int end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); struct memory_block *mem; + unsigned long block_id; + int ret = 0; - mutex_lock(&mem_sysfs_mutex); + if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || + !IS_ALIGNED(size, memory_block_size_bytes()))) + return -EINVAL; - mem = find_memory_block(section); - if (mem) { - mem->section_count++; - put_device(&mem->dev); - } else { + mutex_lock(&mem_sysfs_mutex); + for (block_id = start_block_id; block_id != end_block_id; block_id++) { ret = init_memory_block(&mem, block_id, MEM_OFFLINE); if (ret) - goto out; - mem->section_count++; + break; + mem->section_count = sections_per_block; + } + if (ret) { + end_block_id = block_id; + for (block_id = start_block_id; block_id != end_block_id; + block_id++) { + mem = find_memory_block_by_id(block_id, NULL); + mem->section_count = 0; + unregister_memory(mem); + } } - -out: mutex_unlock(&mem_sysfs_mutex); return ret; } -static void -unregister_memory(struct memory_block *memory) -{ - BUG_ON(memory->dev.bus != &memory_subsys); - - /* drop the ref. we got via find_memory_block() */ - put_device(&memory->dev); - device_unregister(&memory->dev); -} - void unregister_memory_section(struct mem_section *section) { struct memory_block *mem; diff --git a/include/linux/memory.h b/include/linux/memory.h index 474c7c60c8f2..db3e8567f900 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -111,7 +111,7 @@ extern int register_memory_notifier(struct notifier_block *nb); extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); -int hotplug_memory_register(int nid, struct mem_section *section); +int create_memory_block_devices(unsigned long start, unsigned long size); extern void unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index bc11888d5d7e..78291526eb4d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -259,13 +259,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, return -EEXIST; ret = sparse_add_one_section(nid, phys_start_pfn, altmap); - if (ret < 0) - return ret; - - if (!want_memblock) - return 0; - - return hotplug_memory_register(nid, __pfn_to_section(phys_start_pfn)); + return ret < 0 ? ret : 0; } /* @@ -1105,6 +1099,13 @@ int __ref add_memory_resource(int nid, struct resource *res) if (ret < 0) goto error; + /* create memory block devices after memory was added */ + ret = create_memory_block_devices(start, size); + if (ret) { + arch_remove_memory(nid, start, size, NULL); + goto error; + } + if (new_node) { /* If sysfs file of new node can't be created, cpu on the node * can't be hot-added. There is no rollback way now. -- cgit v1.2.3 From 05f800a0bd08e14606ac63e0a5c63ed6880acaab Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:57:01 -0700 Subject: mm/memory_hotplug: drop MHP_MEMBLOCK_API No longer needed, the callers of arch_add_memory() can handle this manually. Link: http://lkml.kernel.org/r/20190527111152.16324-9-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Wei Yang Acked-by: Michal Hocko Cc: David Hildenbrand Cc: Oscar Salvador Cc: Pavel Tatashin Cc: Joonsoo Kim Cc: Qian Cai Cc: Arun KS Cc: Mathieu Malaterre Cc: Mike Rapoport Cc: Alex Deucher Cc: Andrew Banman Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Chintan Pandya Cc: Christophe Leroy Cc: Chris Wilson Cc: Dan Williams Cc: Dave Hansen Cc: "David S. Miller" Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Jun Yao Cc: "Kirill A. Shutemov" Cc: Logan Gunthorpe Cc: Mark Brown Cc: Mark Rutland Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Mike Rapoport Cc: "mike.travis@hpe.com" Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Paul Mackerras Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Rich Felker Cc: Rob Herring Cc: Robin Murphy Cc: Thomas Gleixner Cc: Tony Luck Cc: Vasily Gorbik Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 8 -------- mm/memory_hotplug.c | 9 +++------ 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 87bf9c4a889e..36c514b80cf1 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -128,14 +128,6 @@ extern void arch_remove_memory(int nid, u64 start, u64 size, extern void __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); -/* - * Do we want sysfs memblock files created. This will allow userspace to online - * and offline memory explicitly. Lack of this bit means that the caller has to - * call move_pfn_range_to_zone to finish the initialization. - */ - -#define MHP_MEMBLOCK_API (1<<0) - /* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct mhp_restrictions *restrictions); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 78291526eb4d..fb9dc3fa1138 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -251,7 +251,7 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, - struct vmem_altmap *altmap, bool want_memblock) + struct vmem_altmap *altmap) { int ret; @@ -294,8 +294,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, } for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, section_nr_to_pfn(i), altmap, - restrictions->flags & MHP_MEMBLOCK_API); + err = __add_section(nid, section_nr_to_pfn(i), altmap); /* * EEXIST is finally dealt with by ioresource collision @@ -1065,9 +1064,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res) { - struct mhp_restrictions restrictions = { - .flags = MHP_MEMBLOCK_API, - }; + struct mhp_restrictions restrictions = {}; u64 start, size; bool new_node = false; int ret; -- cgit v1.2.3 From 4c4b7f9ba9486c565aead99a198ceeef73ae81f6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:57:06 -0700 Subject: mm/memory_hotplug: remove memory block devices before arch_remove_memory() Let's factor out removing of memory block devices, which is only necessary for memory added via add_memory() and friends that created memory block devices. Remove the devices before calling arch_remove_memory(). This finishes factoring out memory block device handling from arch_add_memory() and arch_remove_memory(). Link: http://lkml.kernel.org/r/20190527111152.16324-10-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: David Hildenbrand Cc: "mike.travis@hpe.com" Cc: Andrew Banman Cc: Ingo Molnar Cc: Alex Deucher Cc: "David S. Miller" Cc: Mark Brown Cc: Chris Wilson Cc: Oscar Salvador Cc: Jonathan Cameron Cc: Arun KS Cc: Mathieu Malaterre Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Chintan Pandya Cc: Christophe Leroy Cc: Dave Hansen Cc: Fenghua Yu Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Joonsoo Kim Cc: Jun Yao Cc: "Kirill A. Shutemov" Cc: Logan Gunthorpe Cc: Mark Rutland Cc: Masahiro Yamada Cc: Michael Ellerman Cc: Mike Rapoport Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Qian Cai Cc: Rich Felker Cc: Rob Herring Cc: Robin Murphy Cc: Thomas Gleixner Cc: Tony Luck Cc: Vasily Gorbik Cc: Wei Yang Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 37 ++++++++++++++++++------------------- drivers/base/node.c | 11 ++++++----- include/linux/memory.h | 2 +- include/linux/node.h | 6 ++---- mm/memory_hotplug.c | 5 +++-- 5 files changed, 30 insertions(+), 31 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 18a30c3ac0ef..826dd76f662e 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -759,32 +759,31 @@ int create_memory_block_devices(unsigned long start, unsigned long size) return ret; } -void unregister_memory_section(struct mem_section *section) +/* + * Remove memory block devices for the given memory area. Start and size + * have to be aligned to memory block granularity. Memory block devices + * have to be offline. + */ +void remove_memory_block_devices(unsigned long start, unsigned long size) { + const int start_block_id = pfn_to_block_id(PFN_DOWN(start)); + const int end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); struct memory_block *mem; + int block_id; - if (WARN_ON_ONCE(!present_section(section))) + if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || + !IS_ALIGNED(size, memory_block_size_bytes()))) return; mutex_lock(&mem_sysfs_mutex); - - /* - * Some users of the memory hotplug do not want/need memblock to - * track all sections. Skip over those. - */ - mem = find_memory_block(section); - if (!mem) - goto out_unlock; - - unregister_mem_sect_under_nodes(mem, __section_nr(section)); - - mem->section_count--; - if (mem->section_count == 0) + for (block_id = start_block_id; block_id != end_block_id; block_id++) { + mem = find_memory_block_by_id(block_id, NULL); + if (WARN_ON_ONCE(!mem)) + continue; + mem->section_count = 0; + unregister_memory_block_under_nodes(mem); unregister_memory(mem); - else - put_device(&mem->dev); - -out_unlock: + } mutex_unlock(&mem_sysfs_mutex); } diff --git a/drivers/base/node.c b/drivers/base/node.c index aa878fbcf705..0b0f38c2c7cd 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -802,9 +802,10 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) return 0; } -/* unregister memory section under all nodes that it spans */ -int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, - unsigned long phys_index) +/* + * Unregister memory block device under all nodes that it spans. + */ +int unregister_memory_block_under_nodes(struct memory_block *mem_blk) { NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); unsigned long pfn, sect_start_pfn, sect_end_pfn; @@ -817,8 +818,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, return -ENOMEM; nodes_clear(*unlinked_nodes); - sect_start_pfn = section_nr_to_pfn(phys_index); - sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); + sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { int nid; diff --git a/include/linux/memory.h b/include/linux/memory.h index db3e8567f900..f26a5417ec5d 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -112,7 +112,7 @@ extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size); -extern void unregister_memory_section(struct mem_section *); +void remove_memory_block_devices(unsigned long start, unsigned long size); extern int memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); extern int memory_isolate_notify(unsigned long val, void *v); diff --git a/include/linux/node.h b/include/linux/node.h index 1a557c589ecb..02a29e71b175 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -139,8 +139,7 @@ extern int register_cpu_under_node(unsigned int cpu, unsigned int nid); extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); extern int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg); -extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, - unsigned long phys_index); +extern int unregister_memory_block_under_nodes(struct memory_block *mem_blk); extern int register_memory_node_under_compute_node(unsigned int mem_nid, unsigned int cpu_nid, @@ -176,8 +175,7 @@ static inline int register_mem_sect_under_node(struct memory_block *mem_blk, { return 0; } -static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, - unsigned long phys_index) +static inline int unregister_memory_block_under_nodes(struct memory_block *mem_blk) { return 0; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fb9dc3fa1138..37c861e7a717 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -520,8 +520,6 @@ static void __remove_section(struct zone *zone, struct mem_section *ms, if (WARN_ON_ONCE(!valid_section(ms))) return; - unregister_memory_section(ms); - scn_nr = __section_nr(ms); start_pfn = section_nr_to_pfn((unsigned long)scn_nr); __remove_zone(zone, start_pfn); @@ -1834,6 +1832,9 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size); + /* remove memory block devices before removing memory */ + remove_memory_block_devices(start, size); + arch_remove_memory(nid, start, size, NULL); __release_memory_resource(start, size); -- cgit v1.2.3 From b9bf8d342d9b443c0d19aa57883d8ddb38d965de Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:57:17 -0700 Subject: mm/memory_hotplug: remove "zone" parameter from sparse_remove_one_section The parameter is unused, so let's drop it. Memory removal paths should never care about zones. This is the job of memory offlining and will require more refactorings. Link: http://lkml.kernel.org/r/20190527111152.16324-12-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Reviewed-by: Wei Yang Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Alex Deucher Cc: Andrew Banman Cc: Andy Lutomirski Cc: Anshuman Khandual Cc: Ard Biesheuvel Cc: Arun KS Cc: Baoquan He Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Chintan Pandya Cc: Christophe Leroy Cc: Chris Wilson Cc: Dave Hansen Cc: "David S. Miller" Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Cameron Cc: Joonsoo Kim Cc: Jun Yao Cc: "Kirill A. Shutemov" Cc: Logan Gunthorpe Cc: Mark Brown Cc: Mark Rutland Cc: Masahiro Yamada Cc: Mathieu Malaterre Cc: Michael Ellerman Cc: Mike Rapoport Cc: "mike.travis@hpe.com" Cc: Nicholas Piggin Cc: Paul Mackerras Cc: Pavel Tatashin Cc: Peter Zijlstra Cc: Qian Cai Cc: "Rafael J. Wysocki" Cc: Rich Felker Cc: Rob Herring Cc: Robin Murphy Cc: Thomas Gleixner Cc: Tony Luck Cc: Vasily Gorbik Cc: Will Deacon Cc: Yoshinori Sato Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 +- mm/memory_hotplug.c | 2 +- mm/sparse.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 36c514b80cf1..79e0add6a597 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -350,7 +350,7 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, extern bool is_memblock_offlined(struct memory_block *mem); extern int sparse_add_one_section(int nid, unsigned long start_pfn, struct vmem_altmap *altmap); -extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, +extern void sparse_remove_one_section(struct mem_section *ms, unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 37c861e7a717..d1d0ceaaca88 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -524,7 +524,7 @@ static void __remove_section(struct zone *zone, struct mem_section *ms, start_pfn = section_nr_to_pfn((unsigned long)scn_nr); __remove_zone(zone, start_pfn); - sparse_remove_one_section(zone, ms, map_offset, altmap); + sparse_remove_one_section(ms, map_offset, altmap); } /** diff --git a/mm/sparse.c b/mm/sparse.c index d1d5e05f5b8d..1552c855d62a 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -800,8 +800,8 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap, free_map_bootmem(memmap); } -void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset, struct vmem_altmap *altmap) +void sparse_remove_one_section(struct mem_section *ms, unsigned long map_offset, + struct vmem_altmap *altmap) { struct page *memmap = NULL; unsigned long *usemap = NULL; -- cgit v1.2.3 From fbcf73ce65827c3d8935f38b832a43153a0c78d1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:57:46 -0700 Subject: mm/memory_hotplug: rename walk_memory_range() and pass start+size instead of pfns walk_memory_range() was once used to iterate over sections. Now, it iterates over memory blocks. Rename the function, fixup the documentation. Also, pass start+size instead of PFNs, which is what most callers already have at hand. (we'll rework link_mem_sections() most probably soon) Follow-up patches will rework, simplify, and move walk_memory_blocks() to drivers/base/memory.c. Note: walk_memory_blocks() only works correctly right now if the start_pfn is aligned to a section start. This is the case right now, but we'll generalize the function in a follow up patch so the semantics match the documentation. [akpm@linux-foundation.org: remove unused variable] Link: http://lkml.kernel.org/r/20190614100114.311-5-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: "Rafael J. Wysocki" Cc: Len Brown Cc: Greg Kroah-Hartman Cc: David Hildenbrand Cc: Rashmica Gupta Cc: Pavel Tatashin Cc: Anshuman Khandual Cc: Michael Neuling Cc: Thomas Gleixner Cc: Oscar Salvador Cc: Michal Hocko Cc: Wei Yang Cc: Juergen Gross Cc: Qian Cai Cc: Arun KS Cc: Nick Desaulniers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/platforms/powernv/memtrace.c | 23 +++++++++++------------ drivers/acpi/acpi_memhotplug.c | 19 ++++--------------- drivers/base/node.c | 5 +++-- include/linux/memory_hotplug.h | 2 +- mm/memory_hotplug.c | 24 +++++++++++++----------- 5 files changed, 32 insertions(+), 41 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 5e53c1392d3b..eb2e75dac369 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -70,23 +70,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg) /* called with device_hotplug_lock held */ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) { - u64 end_pfn = start_pfn + nr_pages - 1; + const unsigned long start = PFN_PHYS(start_pfn); + const unsigned long size = PFN_PHYS(nr_pages); - if (walk_memory_range(start_pfn, end_pfn, NULL, - check_memblock_online)) + if (walk_memory_blocks(start, size, NULL, check_memblock_online)) return false; - walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE, - change_memblock_state); + walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE, + change_memblock_state); if (offline_pages(start_pfn, nr_pages)) { - walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE, - change_memblock_state); + walk_memory_blocks(start, size, (void *)MEM_ONLINE, + change_memblock_state); return false; } - walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, - change_memblock_state); + walk_memory_blocks(start, size, (void *)MEM_OFFLINE, + change_memblock_state); return true; @@ -242,9 +242,8 @@ static int memtrace_online(void) */ if (!memhp_auto_online) { lock_device_hotplug(); - walk_memory_range(PFN_DOWN(ent->start), - PFN_UP(ent->start + ent->size - 1), - NULL, online_mem_block); + walk_memory_blocks(ent->start, ent->size, NULL, + online_mem_block); unlock_device_hotplug(); } diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index db013dc21c02..e294f44a7850 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -155,16 +155,6 @@ static int acpi_memory_check_device(struct acpi_memory_device *mem_device) return 0; } -static unsigned long acpi_meminfo_start_pfn(struct acpi_memory_info *info) -{ - return PFN_DOWN(info->start_addr); -} - -static unsigned long acpi_meminfo_end_pfn(struct acpi_memory_info *info) -{ - return PFN_UP(info->start_addr + info->length-1); -} - static int acpi_bind_memblk(struct memory_block *mem, void *arg) { return acpi_bind_one(&mem->dev, arg); @@ -173,9 +163,8 @@ static int acpi_bind_memblk(struct memory_block *mem, void *arg) static int acpi_bind_memory_blocks(struct acpi_memory_info *info, struct acpi_device *adev) { - return walk_memory_range(acpi_meminfo_start_pfn(info), - acpi_meminfo_end_pfn(info), adev, - acpi_bind_memblk); + return walk_memory_blocks(info->start_addr, info->length, adev, + acpi_bind_memblk); } static int acpi_unbind_memblk(struct memory_block *mem, void *arg) @@ -186,8 +175,8 @@ static int acpi_unbind_memblk(struct memory_block *mem, void *arg) static void acpi_unbind_memory_blocks(struct acpi_memory_info *info) { - walk_memory_range(acpi_meminfo_start_pfn(info), - acpi_meminfo_end_pfn(info), NULL, acpi_unbind_memblk); + walk_memory_blocks(info->start_addr, info->length, NULL, + acpi_unbind_memblk); } static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) diff --git a/drivers/base/node.c b/drivers/base/node.c index 27391f1e8f60..75b7e6f6535b 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -834,8 +834,9 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk) int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) { - return walk_memory_range(start_pfn, end_pfn, (void *)&nid, - register_mem_sect_under_node); + return walk_memory_blocks(PFN_PHYS(start_pfn), + PFN_PHYS(end_pfn - start_pfn), (void *)&nid, + register_mem_sect_under_node); } #ifdef CONFIG_HUGETLBFS diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 79e0add6a597..d9fffc34949f 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -340,7 +340,7 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {} #endif /* CONFIG_MEMORY_HOTREMOVE */ extern void __ref free_area_init_core_hotplug(int nid); -extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, +extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, int (*func)(struct memory_block *, void *)); extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index d1d0ceaaca88..b3ef84e408fa 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1124,8 +1124,7 @@ int __ref add_memory_resource(int nid, struct resource *res) /* online pages if requested */ if (memhp_auto_online) - walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), - NULL, online_memory_block); + walk_memory_blocks(start, size, NULL, online_memory_block); return ret; error: @@ -1663,20 +1662,24 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages) #endif /* CONFIG_MEMORY_HOTREMOVE */ /** - * walk_memory_range - walks through all mem sections in [start_pfn, end_pfn) - * @start_pfn: start pfn of the memory range - * @end_pfn: end pfn of the memory range + * walk_memory_blocks - walk through all present memory blocks overlapped + * by the range [start, start + size) + * + * @start: start address of the memory range + * @size: size of the memory range * @arg: argument passed to func - * @func: callback for each memory section walked + * @func: callback for each memory block walked * - * This function walks through all present mem sections in range - * [start_pfn, end_pfn) and call func on each mem section. + * This function walks through all present memory blocks overlapped by the + * range [start, start + size), calling func on each memory block. * * Returns the return value of func. */ -int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, +int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, int (*func)(struct memory_block *, void *)) { + const unsigned long start_pfn = PFN_DOWN(start); + const unsigned long end_pfn = PFN_UP(start + size - 1); struct memory_block *mem = NULL; struct mem_section *section; unsigned long pfn, section_nr; @@ -1822,8 +1825,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) * whether all memory blocks in question are offline and return error * if this is not the case. */ - rc = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, - check_memblock_offlined_cb); + rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb); if (rc) goto done; -- cgit v1.2.3 From ea8846411ad686ff626e00bb2c3821b3db2ab56a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 18 Jul 2019 15:57:50 -0700 Subject: mm/memory_hotplug: move and simplify walk_memory_blocks() Let's move walk_memory_blocks() to the place where memory block logic resides and simplify it. While at it, add a type for the callback function. Link: http://lkml.kernel.org/r/20190614100114.311-6-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: David Hildenbrand Cc: Stephen Rothwell Cc: Pavel Tatashin Cc: Andrew Banman Cc: Mike Travis Cc: Oscar Salvador Cc: Michal Hocko Cc: Wei Yang Cc: Arun KS Cc: Qian Cai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 42 ++++++++++++++++++++++++++++++++ include/linux/memory.h | 3 +++ include/linux/memory_hotplug.h | 2 -- mm/memory_hotplug.c | 55 ------------------------------------------ 4 files changed, 45 insertions(+), 57 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c54e80fd25a8..0204384b4d1d 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -44,6 +44,11 @@ static inline unsigned long pfn_to_block_id(unsigned long pfn) return base_memory_block_id(pfn_to_section_nr(pfn)); } +static inline unsigned long phys_to_block_id(unsigned long phys) +{ + return pfn_to_block_id(PFN_DOWN(phys)); +} + static int memory_subsys_online(struct device *dev); static int memory_subsys_offline(struct device *dev); @@ -851,3 +856,40 @@ out: printk(KERN_ERR "%s() failed: %d\n", __func__, ret); return ret; } + +/** + * walk_memory_blocks - walk through all present memory blocks overlapped + * by the range [start, start + size) + * + * @start: start address of the memory range + * @size: size of the memory range + * @arg: argument passed to func + * @func: callback for each memory section walked + * + * This function walks through all present memory blocks overlapped by the + * range [start, start + size), calling func on each memory block. + * + * In case func() returns an error, walking is aborted and the error is + * returned. + */ +int walk_memory_blocks(unsigned long start, unsigned long size, + void *arg, walk_memory_blocks_func_t func) +{ + const unsigned long start_block_id = phys_to_block_id(start); + const unsigned long end_block_id = phys_to_block_id(start + size - 1); + struct memory_block *mem; + unsigned long block_id; + int ret = 0; + + for (block_id = start_block_id; block_id <= end_block_id; block_id++) { + mem = find_memory_block_by_id(block_id, NULL); + if (!mem) + continue; + + ret = func(mem, arg); + put_device(&mem->dev); + if (ret) + break; + } + return ret; +} diff --git a/include/linux/memory.h b/include/linux/memory.h index f26a5417ec5d..b3b388775a30 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -119,6 +119,9 @@ extern int memory_isolate_notify(unsigned long val, void *v); extern struct memory_block *find_memory_block_hinted(struct mem_section *, struct memory_block *); extern struct memory_block *find_memory_block(struct mem_section *); +typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); +extern int walk_memory_blocks(unsigned long start, unsigned long size, + void *arg, walk_memory_blocks_func_t func); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<= mem->start_section_nr) && - (section_nr <= mem->end_section_nr)) - continue; - - mem = find_memory_block_hinted(section, mem); - if (!mem) - continue; - - ret = func(mem, arg); - if (ret) { - kobject_put(&mem->dev.kobj); - return ret; - } - } - - if (mem) - kobject_put(&mem->dev.kobj); - - return 0; -} - -#ifdef CONFIG_MEMORY_HOTREMOVE static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) { int ret = !is_memblock_offlined(mem); -- cgit v1.2.3 From f1eca35a0dc7cb3cdb00c88c8c5e5138a65face0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 18 Jul 2019 15:57:57 -0700 Subject: mm/sparsemem: introduce struct mem_section_usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: Sub-section memory hotplug support", v10. The memory hotplug section is an arbitrary / convenient unit for memory hotplug. 'Section-size' units have bled into the user interface ('memblock' sysfs) and can not be changed without breaking existing userspace. The section-size constraint, while mostly benign for typical memory hotplug, has and continues to wreak havoc with 'device-memory' use cases, persistent memory (pmem) in particular. Recall that pmem uses devm_memremap_pages(), and subsequently arch_add_memory(), to allocate a 'struct page' memmap for pmem. However, it does not use the 'bottom half' of memory hotplug, i.e. never marks pmem pages online and never exposes the userspace memblock interface for pmem. This leaves an opening to redress the section-size constraint. To date, the libnvdimm subsystem has attempted to inject padding to satisfy the internal constraints of arch_add_memory(). Beyond complicating the code, leading to bugs [2], wasting memory, and limiting configuration flexibility, the padding hack is broken when the platform changes this physical memory alignment of pmem from one boot to the next. Device failure (intermittent or permanent) and physical reconfiguration are events that can cause the platform firmware to change the physical placement of pmem on a subsequent boot, and device failure is an everyday event in a data-center. It turns out that sections are only a hard requirement of the user-facing interface for memory hotplug and with a bit more infrastructure sub-section arch_add_memory() support can be added for kernel internal usages like devm_memremap_pages(). Here is an analysis of the current design assumptions in the current code and how they are addressed in the new implementation: Current design assumptions: - Sections that describe boot memory (early sections) are never unplugged / removed. - pfn_valid(), in the CONFIG_SPARSEMEM_VMEMMAP=y, case devolves to a valid_section() check - __add_pages() and helper routines assume all operations occur in PAGES_PER_SECTION units. - The memblock sysfs interface only comprehends full sections New design assumptions: - Sections are instrumented with a sub-section bitmask to track (on x86) individual 2MB sub-divisions of a 128MB section. - Partially populated early sections can be extended with additional sub-sections, and those sub-sections can be removed with arch_remove_memory(). With this in place we no longer lose usable memory capacity to padding. - pfn_valid() is updated to look deeper than valid_section() to also check the active-sub-section mask. This indication is in the same cacheline as the valid_section() so the performance impact is expected to be negligible. So far the lkp robot has not reported any regressions. - Outside of the core vmemmap population routines which are replaced, other helper routines like shrink_{zone,pgdat}_span() are updated to handle the smaller granularity. Core memory hotplug routines that deal with online memory are not touched. - The existing memblock sysfs user api guarantees / assumptions are not touched since this capability is limited to !online !memblock-sysfs-accessible sections. Meanwhile the issue reports continue to roll in from users that do not understand when and how the 128MB constraint will bite them. The current implementation relied on being able to support at least one misaligned namespace, but that immediately falls over on any moderately complex namespace creation attempt. Beyond the initial problem of 'System RAM' colliding with pmem, and the unsolvable problem of physical alignment changes, Linux is now being exposed to platforms that collide pmem ranges with other pmem ranges by default [3]. In short, devm_memremap_pages() has pushed the venerable section-size constraint past the breaking point, and the simplicity of section-aligned arch_add_memory() is no longer tenable. These patches are exposed to the kbuild robot on a subsection-v10 branch [4], and a preview of the unit test for this functionality is available on the 'subsection-pending' branch of ndctl [5]. [2]: https://lore.kernel.org/r/155000671719.348031.2347363160141119237.stgit@dwillia2-desk3.amr.corp.intel.com [3]: https://github.com/pmem/ndctl/issues/76 [4]: https://git.kernel.org/pub/scm/linux/kernel/git/djbw/nvdimm.git/log/?h=subsection-v10 [5]: https://github.com/pmem/ndctl/commit/7c59b4867e1c This patch (of 13): Towards enabling memory hotplug to track partial population of a section, introduce 'struct mem_section_usage'. A pointer to a 'struct mem_section_usage' instance replaces the existing pointer to a 'pageblock_flags' bitmap. Effectively it adds one more 'unsigned long' beyond the 'pageblock_flags' (usemap) allocation to house a new 'subsection_map' bitmap. The new bitmap enables the memory hot{plug,remove} implementation to act on incremental sub-divisions of a section. SUBSECTION_SHIFT is defined as global constant instead of per-architecture value like SECTION_SIZE_BITS in order to allow cross-arch compatibility of subsection users. Specifically a common subsection size allows for the possibility that persistent memory namespace configurations be made compatible across architectures. The primary motivation for this functionality is to support platforms that mix "System RAM" and "Persistent Memory" within a single section, or multiple PMEM ranges with different mapping lifetimes within a single section. The section restriction for hotplug has caused an ongoing saga of hacks and bugs for devm_memremap_pages() users. Beyond the fixups to teach existing paths how to retrieve the 'usemap' from a section, and updates to usemap allocation path, there are no expected behavior changes. Link: http://lkml.kernel.org/r/156092349845.979959.73333291612799019.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reviewed-by: Oscar Salvador Reviewed-by: Wei Yang Tested-by: Aneesh Kumar K.V [ppc64] Cc: Michal Hocko Cc: Vlastimil Babka Cc: Logan Gunthorpe Cc: Pavel Tatashin Cc: David Hildenbrand Cc: Jérôme Glisse Cc: Mike Rapoport Cc: Jane Chu Cc: Pavel Tatashin Cc: Jonathan Corbet Cc: Qian Cai Cc: Logan Gunthorpe Cc: Toshi Kani Cc: Jeff Moyer Cc: Michal Hocko Cc: Vlastimil Babka Cc: Jason Gunthorpe Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 28 +++++++++++++++-- mm/memory_hotplug.c | 18 ++++++----- mm/page_alloc.c | 2 +- mm/sparse.c | 81 +++++++++++++++++++++++++------------------------- 4 files changed, 76 insertions(+), 53 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 298d1c3e4c2e..2520336bdfd1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1160,6 +1160,24 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec) #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) +#define SUBSECTION_SHIFT 21 + +#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT) +#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT) +#define PAGE_SUBSECTION_MASK (~(PAGES_PER_SUBSECTION-1)) + +#if SUBSECTION_SHIFT > SECTION_SIZE_BITS +#error Subsection size exceeds section size +#else +#define SUBSECTIONS_PER_SECTION (1UL << (SECTION_SIZE_BITS - SUBSECTION_SHIFT)) +#endif + +struct mem_section_usage { + DECLARE_BITMAP(subsection_map, SUBSECTIONS_PER_SECTION); + /* See declaration of similar field in struct zone */ + unsigned long pageblock_flags[0]; +}; + struct page; struct page_ext; struct mem_section { @@ -1177,8 +1195,7 @@ struct mem_section { */ unsigned long section_mem_map; - /* See declaration of similar field in struct zone */ - unsigned long *pageblock_flags; + struct mem_section_usage *usage; #ifdef CONFIG_PAGE_EXTENSION /* * If SPARSEMEM, pgdat doesn't have page_ext pointer. We use @@ -1209,6 +1226,11 @@ extern struct mem_section **mem_section; extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; #endif +static inline unsigned long *section_to_usemap(struct mem_section *ms) +{ + return ms->usage->pageblock_flags; +} + static inline struct mem_section *__nr_to_section(unsigned long nr) { #ifdef CONFIG_SPARSEMEM_EXTREME @@ -1220,7 +1242,7 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; } extern unsigned long __section_nr(struct mem_section *ms); -extern unsigned long usemap_size(void); +extern size_t mem_section_usage_size(void); /* * We use the lower bits of the mem_map pointer to store diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fafee5f13ef2..cf9d979a6498 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -166,9 +166,10 @@ void put_page_bootmem(struct page *page) #ifndef CONFIG_SPARSEMEM_VMEMMAP static void register_page_bootmem_info_section(unsigned long start_pfn) { - unsigned long *usemap, mapsize, section_nr, i; + unsigned long mapsize, section_nr, i; struct mem_section *ms; struct page *page, *memmap; + struct mem_section_usage *usage; section_nr = pfn_to_section_nr(start_pfn); ms = __nr_to_section(section_nr); @@ -188,10 +189,10 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) for (i = 0; i < mapsize; i++, page++) get_page_bootmem(section_nr, page, SECTION_INFO); - usemap = ms->pageblock_flags; - page = virt_to_page(usemap); + usage = ms->usage; + page = virt_to_page(usage); - mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; + mapsize = PAGE_ALIGN(mem_section_usage_size()) >> PAGE_SHIFT; for (i = 0; i < mapsize; i++, page++) get_page_bootmem(section_nr, page, MIX_SECTION_INFO); @@ -200,9 +201,10 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) #else /* CONFIG_SPARSEMEM_VMEMMAP */ static void register_page_bootmem_info_section(unsigned long start_pfn) { - unsigned long *usemap, mapsize, section_nr, i; + unsigned long mapsize, section_nr, i; struct mem_section *ms; struct page *page, *memmap; + struct mem_section_usage *usage; section_nr = pfn_to_section_nr(start_pfn); ms = __nr_to_section(section_nr); @@ -211,10 +213,10 @@ static void register_page_bootmem_info_section(unsigned long start_pfn) register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION); - usemap = ms->pageblock_flags; - page = virt_to_page(usemap); + usage = ms->usage; + page = virt_to_page(usage); - mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT; + mapsize = PAGE_ALIGN(mem_section_usage_size()) >> PAGE_SHIFT; for (i = 0; i < mapsize; i++, page++) get_page_bootmem(section_nr, page, MIX_SECTION_INFO); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e515bfcf7f28..be78bafbfe3a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -450,7 +450,7 @@ static inline unsigned long *get_pageblock_bitmap(struct page *page, unsigned long pfn) { #ifdef CONFIG_SPARSEMEM - return __pfn_to_section(pfn)->pageblock_flags; + return section_to_usemap(__pfn_to_section(pfn)); #else return page_zone(page)->pageblock_flags; #endif /* CONFIG_SPARSEMEM */ diff --git a/mm/sparse.c b/mm/sparse.c index b29534cea8c0..41bef8e1f65c 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -288,33 +288,31 @@ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pn static void __meminit sparse_init_one_section(struct mem_section *ms, unsigned long pnum, struct page *mem_map, - unsigned long *pageblock_bitmap) + struct mem_section_usage *usage) { ms->section_mem_map &= ~SECTION_MAP_MASK; ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) | SECTION_HAS_MEM_MAP; - ms->pageblock_flags = pageblock_bitmap; + ms->usage = usage; } -unsigned long usemap_size(void) +static unsigned long usemap_size(void) { return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long); } -#ifdef CONFIG_MEMORY_HOTPLUG -static unsigned long *__kmalloc_section_usemap(void) +size_t mem_section_usage_size(void) { - return kmalloc(usemap_size(), GFP_KERNEL); + return sizeof(struct mem_section_usage) + usemap_size(); } -#endif /* CONFIG_MEMORY_HOTPLUG */ #ifdef CONFIG_MEMORY_HOTREMOVE -static unsigned long * __init +static struct mem_section_usage * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsigned long size) { + struct mem_section_usage *usage; unsigned long goal, limit; - unsigned long *p; int nid; /* * A page may contain usemaps for other sections preventing the @@ -330,15 +328,16 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, limit = goal + (1UL << PA_SECTION_SHIFT); nid = early_pfn_to_nid(goal >> PAGE_SHIFT); again: - p = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid); - if (!p && limit) { + usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid); + if (!usage && limit) { limit = 0; goto again; } - return p; + return usage; } -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) +static void __init check_usemap_section_nr(int nid, + struct mem_section_usage *usage) { unsigned long usemap_snr, pgdat_snr; static unsigned long old_usemap_snr; @@ -352,7 +351,7 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) old_pgdat_snr = NR_MEM_SECTIONS; } - usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); + usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); if (usemap_snr == pgdat_snr) return; @@ -380,14 +379,15 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) usemap_snr, pgdat_snr, nid); } #else -static unsigned long * __init +static struct mem_section_usage * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsigned long size) { return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id); } -static void __init check_usemap_section_nr(int nid, unsigned long *usemap) +static void __init check_usemap_section_nr(int nid, + struct mem_section_usage *usage) { } #endif /* CONFIG_MEMORY_HOTREMOVE */ @@ -474,14 +474,13 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, unsigned long pnum_end, unsigned long map_count) { - unsigned long pnum, usemap_longs, *usemap; + struct mem_section_usage *usage; + unsigned long pnum; struct page *map; - usemap_longs = BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS); - usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), - usemap_size() * - map_count); - if (!usemap) { + usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), + mem_section_usage_size() * map_count); + if (!usage) { pr_err("%s: node[%d] usemap allocation failed", __func__, nid); goto failed; } @@ -497,9 +496,9 @@ static void __init sparse_init_nid(int nid, unsigned long pnum_begin, pnum_begin = pnum; goto failed; } - check_usemap_section_nr(nid, usemap); - sparse_init_one_section(__nr_to_section(pnum), pnum, map, usemap); - usemap += usemap_longs; + check_usemap_section_nr(nid, usage); + sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage); + usage = (void *) usage + mem_section_usage_size(); } sparse_buffer_fini(); return; @@ -697,9 +696,9 @@ int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); + struct mem_section_usage *usage; struct mem_section *ms; struct page *memmap; - unsigned long *usemap; int ret; /* @@ -713,8 +712,8 @@ int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, memmap = kmalloc_section_memmap(section_nr, nid, altmap); if (!memmap) return -ENOMEM; - usemap = __kmalloc_section_usemap(); - if (!usemap) { + usage = kzalloc(mem_section_usage_size(), GFP_KERNEL); + if (!usage) { __kfree_section_memmap(memmap, altmap); return -ENOMEM; } @@ -733,11 +732,11 @@ int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, set_section_nid(section_nr, nid); section_mark_present(ms); - sparse_init_one_section(ms, section_nr, memmap, usemap); + sparse_init_one_section(ms, section_nr, memmap, usage); out: if (ret < 0) { - kfree(usemap); + kfree(usage); __kfree_section_memmap(memmap, altmap); } return ret; @@ -773,20 +772,20 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -static void free_section_usemap(struct page *memmap, unsigned long *usemap, - struct vmem_altmap *altmap) +static void free_section_usage(struct page *memmap, + struct mem_section_usage *usage, struct vmem_altmap *altmap) { - struct page *usemap_page; + struct page *usage_page; - if (!usemap) + if (!usage) return; - usemap_page = virt_to_page(usemap); + usage_page = virt_to_page(usage); /* * Check to see if allocation came from hot-plug-add */ - if (PageSlab(usemap_page) || PageCompound(usemap_page)) { - kfree(usemap); + if (PageSlab(usage_page) || PageCompound(usage_page)) { + kfree(usage); if (memmap) __kfree_section_memmap(memmap, altmap); return; @@ -805,18 +804,18 @@ void sparse_remove_one_section(struct mem_section *ms, unsigned long map_offset, struct vmem_altmap *altmap) { struct page *memmap = NULL; - unsigned long *usemap = NULL; + struct mem_section_usage *usage = NULL; if (ms->section_mem_map) { - usemap = ms->pageblock_flags; + usage = ms->usage; memmap = sparse_decode_mem_map(ms->section_mem_map, __section_nr(ms)); ms->section_mem_map = 0; - ms->pageblock_flags = NULL; + ms->usage = NULL; } clear_hwpoisoned_pages(memmap + map_offset, PAGES_PER_SECTION - map_offset); - free_section_usemap(memmap, usemap, altmap); + free_section_usage(memmap, usage, altmap); } #endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit v1.2.3 From 49ba3c6b37b38b58251c27864f551908c583e99d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 18 Jul 2019 15:58:07 -0700 Subject: mm/hotplug: prepare shrink_{zone, pgdat}_span for sub-section removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sub-section hotplug support reduces the unit of operation of hotplug from section-sized-units (PAGES_PER_SECTION) to sub-section-sized units (PAGES_PER_SUBSECTION). Teach shrink_{zone,pgdat}_span() to consider PAGES_PER_SUBSECTION boundaries as the points where pfn_valid(), not valid_section(), can toggle. [osalvador@suse.de: fix shrink_{zone,node}_span] Link: http://lkml.kernel.org/r/20190717090725.23618-3-osalvador@suse.de Link: http://lkml.kernel.org/r/156092351496.979959.12703722803097017492.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Signed-off-by: Oscar Salvador Reviewed-by: Pavel Tatashin Reviewed-by: Oscar Salvador Tested-by: Aneesh Kumar K.V [ppc64] Cc: Michal Hocko Cc: Vlastimil Babka Cc: Logan Gunthorpe Cc: David Hildenbrand Cc: Jane Chu Cc: Jeff Moyer Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Mike Rapoport Cc: Toshi Kani Cc: Wei Yang Cc: Jason Gunthorpe Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index cf9d979a6498..85467914ad23 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -318,12 +318,8 @@ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - struct mem_section *ms; - - for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(start_pfn); - - if (unlikely(!valid_section(ms))) + for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SUBSECTION) { + if (unlikely(!pfn_valid(start_pfn))) continue; if (unlikely(pfn_to_nid(start_pfn) != nid)) @@ -343,15 +339,12 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - struct mem_section *ms; unsigned long pfn; /* pfn is the end pfn of a memory section. */ pfn = end_pfn - 1; - for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) + for (; pfn >= start_pfn; pfn -= PAGES_PER_SUBSECTION) { + if (unlikely(!pfn_valid(pfn))) continue; if (unlikely(pfn_to_nid(pfn) != nid)) @@ -373,7 +366,6 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */ unsigned long zone_end_pfn = z; unsigned long pfn; - struct mem_section *ms; int nid = zone_to_nid(zone); zone_span_writelock(zone); @@ -410,17 +402,15 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, * it check the zone has only hole or not. */ pfn = zone_start_pfn; - for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) + for (; pfn < zone_end_pfn; pfn += PAGES_PER_SUBSECTION) { + if (unlikely(!pfn_valid(pfn))) continue; if (page_zone(pfn_to_page(pfn)) != zone) continue; - /* If the section is current section, it continues the loop */ - if (start_pfn == pfn) + /* Skip range to be removed */ + if (pfn >= start_pfn && pfn < end_pfn) continue; /* If we find valid section, we have nothing to do */ @@ -441,7 +431,6 @@ static void shrink_pgdat_span(struct pglist_data *pgdat, unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */ unsigned long pgdat_end_pfn = p; unsigned long pfn; - struct mem_section *ms; int nid = pgdat->node_id; if (pgdat_start_pfn == start_pfn) { @@ -478,17 +467,15 @@ static void shrink_pgdat_span(struct pglist_data *pgdat, * has only hole or not. */ pfn = pgdat_start_pfn; - for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) + for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SUBSECTION) { + if (unlikely(!pfn_valid(pfn))) continue; if (pfn_to_nid(pfn) != nid) continue; - /* If the section is current section, it continues the loop */ - if (start_pfn == pfn) + /* Skip range to be removed */ + if (pfn >= start_pfn && pfn < end_pfn) continue; /* If we find valid section, we have nothing to do */ -- cgit v1.2.3 From 96da4350000973ef9310a10d077d65bbc017f093 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 18 Jul 2019 15:58:15 -0700 Subject: mm/hotplug: kill is_dev_zone() usage in __remove_pages() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The zone type check was a leftover from the cleanup that plumbed altmap through the memory hotplug path, i.e. commit da024512a1fa "mm: pass the vmem_altmap to arch_remove_memory and __remove_pages". Link: http://lkml.kernel.org/r/156092352642.979959.6664333788149363039.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Tested-by: Aneesh Kumar K.V [ppc64] Cc: Michal Hocko Cc: Logan Gunthorpe Cc: Pavel Tatashin Cc: Jane Chu Cc: Jeff Moyer Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Mike Rapoport Cc: Toshi Kani Cc: Vlastimil Babka Cc: Wei Yang Cc: Jason Gunthorpe Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 85467914ad23..11220044b01a 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -535,9 +535,7 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, unsigned long map_offset = 0; int sections_to_remove; - /* In the ZONE_DEVICE case device driver owns the memory region */ - if (is_dev_zone(zone)) - map_offset = vmem_altmap_offset(altmap); + map_offset = vmem_altmap_offset(altmap); clear_zone_contiguous(zone); -- cgit v1.2.3 From 7ea6216049ff9cf250a6722cd766d99c8d1424e5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 18 Jul 2019 15:58:22 -0700 Subject: mm/sparsemem: prepare for sub-section ranges MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare the memory hot-{add,remove} paths for handling sub-section ranges by plumbing the starting page frame and number of pages being handled through arch_{add,remove}_memory() to sparse_{add,remove}_one_section(). This is simply plumbing, small cleanups, and some identifier renames. No intended functional changes. Link: http://lkml.kernel.org/r/156092353780.979959.9713046515562743194.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reviewed-by: Pavel Tatashin Tested-by: Aneesh Kumar K.V [ppc64] Reviewed-by: Oscar Salvador Cc: Michal Hocko Cc: Vlastimil Babka Cc: Logan Gunthorpe Cc: David Hildenbrand Cc: Jane Chu Cc: Jeff Moyer Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Mike Rapoport Cc: Toshi Kani Cc: Wei Yang Cc: Jason Gunthorpe Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 5 +- mm/memory_hotplug.c | 114 +++++++++++++++++++++++++---------------- mm/sparse.c | 16 +++--- 3 files changed, 81 insertions(+), 54 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 475aff8efbf8..2d636a7491a4 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -346,9 +346,10 @@ extern int add_memory_resource(int nid, struct resource *resource); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); extern bool is_memblock_offlined(struct memory_block *mem); -extern int sparse_add_one_section(int nid, unsigned long start_pfn, - struct vmem_altmap *altmap); +extern int sparse_add_section(int nid, unsigned long pfn, + unsigned long nr_pages, struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct mem_section *ms, + unsigned long pfn, unsigned long nr_pages, unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 11220044b01a..3fbb2cfab126 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -252,51 +252,84 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) } #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ -static int __meminit __add_section(int nid, unsigned long phys_start_pfn, - struct vmem_altmap *altmap) +static int __meminit __add_section(int nid, unsigned long pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) { int ret; - if (pfn_valid(phys_start_pfn)) + if (pfn_valid(pfn)) return -EEXIST; - ret = sparse_add_one_section(nid, phys_start_pfn, altmap); + ret = sparse_add_section(nid, pfn, nr_pages, altmap); return ret < 0 ? ret : 0; } +static int check_pfn_span(unsigned long pfn, unsigned long nr_pages, + const char *reason) +{ + /* + * Disallow all operations smaller than a sub-section and only + * allow operations smaller than a section for + * SPARSEMEM_VMEMMAP. Note that check_hotplug_memory_range() + * enforces a larger memory_block_size_bytes() granularity for + * memory that will be marked online, so this check should only + * fire for direct arch_{add,remove}_memory() users outside of + * add_memory_resource(). + */ + unsigned long min_align; + + if (IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) + min_align = PAGES_PER_SUBSECTION; + else + min_align = PAGES_PER_SECTION; + if (!IS_ALIGNED(pfn, min_align) + || !IS_ALIGNED(nr_pages, min_align)) { + WARN(1, "Misaligned __%s_pages start: %#lx end: #%lx\n", + reason, pfn, pfn + nr_pages - 1); + return -EINVAL; + } + return 0; +} + /* * Reasonably generic function for adding memory. It is * expected that archs that support memory hotplug will * call this function after deciding the zone to which to * add the new pages. */ -int __ref __add_pages(int nid, unsigned long phys_start_pfn, - unsigned long nr_pages, struct mhp_restrictions *restrictions) +int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, + struct mhp_restrictions *restrictions) { unsigned long i; - int err = 0; - int start_sec, end_sec; + int start_sec, end_sec, err; struct vmem_altmap *altmap = restrictions->altmap; - /* during initialize mem_map, align hot-added range to section */ - start_sec = pfn_to_section_nr(phys_start_pfn); - end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); - if (altmap) { /* * Validate altmap is within bounds of the total request */ - if (altmap->base_pfn != phys_start_pfn + if (altmap->base_pfn != pfn || vmem_altmap_offset(altmap) > nr_pages) { pr_warn_once("memory add fail, invalid altmap\n"); - err = -EINVAL; - goto out; + return -EINVAL; } altmap->alloc = 0; } + err = check_pfn_span(pfn, nr_pages, "add"); + if (err) + return err; + + start_sec = pfn_to_section_nr(pfn); + end_sec = pfn_to_section_nr(pfn + nr_pages - 1); for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, section_nr_to_pfn(i), altmap); + unsigned long pfns; + + pfns = min(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK)); + err = __add_section(nid, pfn, pfns, altmap); + pfn += pfns; + nr_pages -= pfns; /* * EEXIST is finally dealt with by ioresource collision @@ -309,7 +342,6 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, cond_resched(); } vmemmap_populate_print_last(); -out: return err; } @@ -487,10 +519,10 @@ static void shrink_pgdat_span(struct pglist_data *pgdat, pgdat->node_spanned_pages = 0; } -static void __remove_zone(struct zone *zone, unsigned long start_pfn) +static void __remove_zone(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages) { struct pglist_data *pgdat = zone->zone_pgdat; - int nr_pages = PAGES_PER_SECTION; unsigned long flags; pgdat_resize_lock(zone->zone_pgdat, &flags); @@ -499,27 +531,23 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) pgdat_resize_unlock(zone->zone_pgdat, &flags); } -static void __remove_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset, - struct vmem_altmap *altmap) +static void __remove_section(struct zone *zone, unsigned long pfn, + unsigned long nr_pages, unsigned long map_offset, + struct vmem_altmap *altmap) { - unsigned long start_pfn; - int scn_nr; + struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn)); if (WARN_ON_ONCE(!valid_section(ms))) return; - scn_nr = __section_nr(ms); - start_pfn = section_nr_to_pfn((unsigned long)scn_nr); - __remove_zone(zone, start_pfn); - - sparse_remove_one_section(ms, map_offset, altmap); + __remove_zone(zone, pfn, nr_pages); + sparse_remove_one_section(ms, pfn, nr_pages, map_offset, altmap); } /** * __remove_pages() - remove sections of pages from a zone * @zone: zone from which pages need to be removed - * @phys_start_pfn: starting pageframe (must be aligned to start of a section) + * @pfn: starting pageframe (must be aligned to start of a section) * @nr_pages: number of pages to remove (must be multiple of section size) * @altmap: alternative device page map or %NULL if default memmap is used * @@ -528,30 +556,30 @@ static void __remove_section(struct zone *zone, struct mem_section *ms, * sure that pages are marked reserved and zones are adjust properly by * calling offline_pages(). */ -void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, +void __remove_pages(struct zone *zone, unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap) { - unsigned long i; unsigned long map_offset = 0; - int sections_to_remove; + int i, start_sec, end_sec; map_offset = vmem_altmap_offset(altmap); clear_zone_contiguous(zone); - /* - * We can only remove entire sections - */ - BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK); - BUG_ON(nr_pages % PAGES_PER_SECTION); + if (check_pfn_span(pfn, nr_pages, "remove")) + return; - sections_to_remove = nr_pages / PAGES_PER_SECTION; - for (i = 0; i < sections_to_remove; i++) { - unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; + start_sec = pfn_to_section_nr(pfn); + end_sec = pfn_to_section_nr(pfn + nr_pages - 1); + for (i = start_sec; i <= end_sec; i++) { + unsigned long pfns; cond_resched(); - __remove_section(zone, __pfn_to_section(pfn), map_offset, - altmap); + pfns = min(nr_pages, PAGES_PER_SECTION + - (pfn & ~PAGE_SECTION_MASK)); + __remove_section(zone, pfn, pfns, map_offset, altmap); + pfn += pfns; + nr_pages -= pfns; map_offset = 0; } diff --git a/mm/sparse.c b/mm/sparse.c index 6b01022e23a9..41579b66fff1 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -728,8 +728,8 @@ static void free_map_bootmem(struct page *memmap) * * -EEXIST - Section has been present. * * -ENOMEM - Out of memory. */ -int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, - struct vmem_altmap *altmap) +int __meminit sparse_add_section(int nid, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct mem_section_usage *usage; @@ -835,8 +835,9 @@ static void free_section_usage(struct mem_section *ms, struct page *memmap, free_map_bootmem(memmap); } -void sparse_remove_one_section(struct mem_section *ms, unsigned long map_offset, - struct vmem_altmap *altmap) +void sparse_remove_one_section(struct mem_section *ms, unsigned long pfn, + unsigned long nr_pages, unsigned long map_offset, + struct vmem_altmap *altmap) { struct page *memmap = NULL; struct mem_section_usage *usage = NULL; @@ -849,10 +850,7 @@ void sparse_remove_one_section(struct mem_section *ms, unsigned long map_offset, ms->usage = NULL; } - clear_hwpoisoned_pages(memmap + map_offset, - PAGES_PER_SECTION - map_offset); - free_section_usage(ms, memmap, usage, - section_nr_to_pfn(__section_nr(ms)), - PAGES_PER_SECTION, altmap); + clear_hwpoisoned_pages(memmap + map_offset, nr_pages - map_offset); + free_section_usage(ms, memmap, usage, pfn, nr_pages, altmap); } #endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit v1.2.3 From ba72b4c8cf60e452cf6f0258ed9ee697957b7dfd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 18 Jul 2019 15:58:26 -0700 Subject: mm/sparsemem: support sub-section hotplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The libnvdimm sub-system has suffered a series of hacks and broken workarounds for the memory-hotplug implementation's awkward section-aligned (128MB) granularity. For example the following backtrace is emitted when attempting arch_add_memory() with physical address ranges that intersect 'System RAM' (RAM) with 'Persistent Memory' (PMEM) within a given section: # cat /proc/iomem | grep -A1 -B1 Persistent\ Memory 100000000-1ffffffff : System RAM 200000000-303ffffff : Persistent Memory (legacy) 304000000-43fffffff : System RAM 440000000-23ffffffff : Persistent Memory 2400000000-43bfffffff : Persistent Memory 2400000000-43bfffffff : namespace2.0 WARNING: CPU: 38 PID: 928 at arch/x86/mm/init_64.c:850 add_pages+0x5c/0x60 [..] RIP: 0010:add_pages+0x5c/0x60 [..] Call Trace: devm_memremap_pages+0x460/0x6e0 pmem_attach_disk+0x29e/0x680 [nd_pmem] ? nd_dax_probe+0xfc/0x120 [libnvdimm] nvdimm_bus_probe+0x66/0x160 [libnvdimm] It was discovered that the problem goes beyond RAM vs PMEM collisions as some platform produce PMEM vs PMEM collisions within a given section. The libnvdimm workaround for that case revealed that the libnvdimm section-alignment-padding implementation has been broken for a long while. A fix for that long-standing breakage introduces as many problems as it solves as it would require a backward-incompatible change to the namespace metadata interpretation. Instead of that dubious route [1], address the root problem in the memory-hotplug implementation. Note that EEXIST is no longer treated as success as that is how sparse_add_section() reports subsection collisions, it was also obviated by recent changes to perform the request_region() for 'System RAM' before arch_add_memory() in the add_memory() sequence. [1] https://lore.kernel.org/r/155000671719.348031.2347363160141119237.stgit@dwillia2-desk3.amr.corp.intel.com [osalvador@suse.de: fix deactivate_section for early sections] Link: http://lkml.kernel.org/r/20190715081549.32577-2-osalvador@suse.de Link: http://lkml.kernel.org/r/156092354368.979959.6232443923440952359.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Signed-off-by: Oscar Salvador Tested-by: Aneesh Kumar K.V [ppc64] Reviewed-by: Oscar Salvador Cc: Michal Hocko Cc: Vlastimil Babka Cc: Logan Gunthorpe Cc: Pavel Tatashin Cc: David Hildenbrand Cc: Jane Chu Cc: Jeff Moyer Cc: Jérôme Glisse Cc: Jonathan Corbet Cc: Mike Rapoport Cc: Toshi Kani Cc: Wei Yang Cc: Jason Gunthorpe Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 2 +- mm/memory_hotplug.c | 27 +----- mm/page_alloc.c | 2 +- mm/sparse.c | 206 +++++++++++++++++++++++++++-------------- 4 files changed, 141 insertions(+), 96 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 2d636a7491a4..f46ea71b4ffd 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -348,7 +348,7 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, extern bool is_memblock_offlined(struct memory_block *mem); extern int sparse_add_section(int nid, unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap); -extern void sparse_remove_one_section(struct mem_section *ms, +extern void sparse_remove_section(struct mem_section *ms, unsigned long pfn, unsigned long nr_pages, unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3fbb2cfab126..aafb71594ee3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -252,18 +252,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) } #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ -static int __meminit __add_section(int nid, unsigned long pfn, - unsigned long nr_pages, struct vmem_altmap *altmap) -{ - int ret; - - if (pfn_valid(pfn)) - return -EEXIST; - - ret = sparse_add_section(nid, pfn, nr_pages, altmap); - return ret < 0 ? ret : 0; -} - static int check_pfn_span(unsigned long pfn, unsigned long nr_pages, const char *reason) { @@ -327,18 +315,11 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, pfns = min(nr_pages, PAGES_PER_SECTION - (pfn & ~PAGE_SECTION_MASK)); - err = __add_section(nid, pfn, pfns, altmap); + err = sparse_add_section(nid, pfn, pfns, altmap); + if (err) + break; pfn += pfns; nr_pages -= pfns; - - /* - * EEXIST is finally dealt with by ioresource collision - * check. see add_memory() => register_memory_resource() - * Warning will be printed if there is collision. - */ - if (err && (err != -EEXIST)) - break; - err = 0; cond_resched(); } vmemmap_populate_print_last(); @@ -541,7 +522,7 @@ static void __remove_section(struct zone *zone, unsigned long pfn, return; __remove_zone(zone, pfn, nr_pages); - sparse_remove_one_section(ms, pfn, nr_pages, map_offset, altmap); + sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap); } /** diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2c74367a8eba..272c6de1bf4e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5974,7 +5974,7 @@ void __ref memmap_init_zone_device(struct zone *zone, * pfn out of zone. * * Please note that MEMMAP_HOTPLUG path doesn't clear memmap - * because this is done early in sparse_add_one_section + * because this is done early in section_activate() */ if (!(pfn & (pageblock_nr_pages - 1))) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); diff --git a/mm/sparse.c b/mm/sparse.c index 41579b66fff1..a205a2ac66a4 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -83,8 +83,15 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) unsigned long root = SECTION_NR_TO_ROOT(section_nr); struct mem_section *section; + /* + * An existing section is possible in the sub-section hotplug + * case. First hot-add instantiates, follow-on hot-add reuses + * the existing section. + * + * The mem_hotplug_lock resolves the apparent race below. + */ if (mem_section[root]) - return -EEXIST; + return 0; section = sparse_index_alloc(nid); if (!section) @@ -715,10 +722,120 @@ static void free_map_bootmem(struct page *memmap) } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ +static void section_deactivate(unsigned long pfn, unsigned long nr_pages, + struct vmem_altmap *altmap) +{ + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; + DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 }; + struct mem_section *ms = __pfn_to_section(pfn); + bool section_is_early = early_section(ms); + struct page *memmap = NULL; + unsigned long *subsection_map = ms->usage + ? &ms->usage->subsection_map[0] : NULL; + + subsection_mask_set(map, pfn, nr_pages); + if (subsection_map) + bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION); + + if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION), + "section already deactivated (%#lx + %ld)\n", + pfn, nr_pages)) + return; + + /* + * There are 3 cases to handle across two configurations + * (SPARSEMEM_VMEMMAP={y,n}): + * + * 1/ deactivation of a partial hot-added section (only possible + * in the SPARSEMEM_VMEMMAP=y case). + * a/ section was present at memory init + * b/ section was hot-added post memory init + * 2/ deactivation of a complete hot-added section + * 3/ deactivation of a complete section from memory init + * + * For 1/, when subsection_map does not empty we will not be + * freeing the usage map, but still need to free the vmemmap + * range. + * + * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified + */ + bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION); + if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) { + unsigned long section_nr = pfn_to_section_nr(pfn); + + if (!section_is_early) { + kfree(ms->usage); + ms->usage = NULL; + } + memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); + ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr); + } + + if (section_is_early && memmap) + free_map_bootmem(memmap); + else + depopulate_section_memmap(pfn, nr_pages, altmap); +} + +static struct page * __meminit section_activate(int nid, unsigned long pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) +{ + DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 }; + struct mem_section *ms = __pfn_to_section(pfn); + struct mem_section_usage *usage = NULL; + unsigned long *subsection_map; + struct page *memmap; + int rc = 0; + + subsection_mask_set(map, pfn, nr_pages); + + if (!ms->usage) { + usage = kzalloc(mem_section_usage_size(), GFP_KERNEL); + if (!usage) + return ERR_PTR(-ENOMEM); + ms->usage = usage; + } + subsection_map = &ms->usage->subsection_map[0]; + + if (bitmap_empty(map, SUBSECTIONS_PER_SECTION)) + rc = -EINVAL; + else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION)) + rc = -EEXIST; + else + bitmap_or(subsection_map, map, subsection_map, + SUBSECTIONS_PER_SECTION); + + if (rc) { + if (usage) + ms->usage = NULL; + kfree(usage); + return ERR_PTR(rc); + } + + /* + * The early init code does not consider partially populated + * initial sections, it simply assumes that memory will never be + * referenced. If we hot-add memory into such a section then we + * do not need to populate the memmap and can simply reuse what + * is already there. + */ + if (nr_pages < PAGES_PER_SECTION && early_section(ms)) + return pfn_to_page(pfn); + + memmap = populate_section_memmap(pfn, nr_pages, nid, altmap); + if (!memmap) { + section_deactivate(pfn, nr_pages, altmap); + return ERR_PTR(-ENOMEM); + } + + return memmap; +} + /** - * sparse_add_one_section - add a memory section + * sparse_add_section - add a memory section, or populate an existing one * @nid: The node to add section on * @start_pfn: start pfn of the memory range + * @nr_pages: number of pfns to add in the section * @altmap: device page map * * This is only intended for hotplug. @@ -732,51 +849,34 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); - struct mem_section_usage *usage; struct mem_section *ms; struct page *memmap; int ret; - /* - * no locking for this, because it does its own - * plus, it does a kmalloc - */ ret = sparse_index_init(section_nr, nid); - if (ret < 0 && ret != -EEXIST) + if (ret < 0) return ret; - ret = 0; - memmap = populate_section_memmap(start_pfn, PAGES_PER_SECTION, nid, - altmap); - if (!memmap) - return -ENOMEM; - usage = kzalloc(mem_section_usage_size(), GFP_KERNEL); - if (!usage) { - depopulate_section_memmap(start_pfn, PAGES_PER_SECTION, altmap); - return -ENOMEM; - } - ms = __pfn_to_section(start_pfn); - if (ms->section_mem_map & SECTION_MARKED_PRESENT) { - ret = -EEXIST; - goto out; - } + memmap = section_activate(nid, start_pfn, nr_pages, altmap); + if (IS_ERR(memmap)) + return PTR_ERR(memmap); /* * Poison uninitialized struct pages in order to catch invalid flags * combinations. */ - page_init_poison(memmap, sizeof(struct page) * PAGES_PER_SECTION); + page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages); + ms = __pfn_to_section(start_pfn); set_section_nid(section_nr, nid); section_mark_present(ms); - sparse_init_one_section(ms, section_nr, memmap, usage, 0); -out: - if (ret < 0) { - kfree(usage); - depopulate_section_memmap(start_pfn, PAGES_PER_SECTION, altmap); - } - return ret; + /* Align memmap to section boundary in the subsection case */ + if (section_nr_to_pfn(section_nr) != start_pfn) + memmap = pfn_to_kaddr(section_nr_to_pfn(section_nr)); + sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0); + + return 0; } #ifdef CONFIG_MEMORY_FAILURE @@ -809,48 +909,12 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -static void free_section_usage(struct mem_section *ms, struct page *memmap, - struct mem_section_usage *usage, unsigned long pfn, - unsigned long nr_pages, struct vmem_altmap *altmap) -{ - if (!usage) - return; - - /* - * Check to see if allocation came from hot-plug-add - */ - if (!early_section(ms)) { - kfree(usage); - if (memmap) - depopulate_section_memmap(pfn, nr_pages, altmap); - return; - } - - /* - * The usemap came from bootmem. This is packed with other usemaps - * on the section which has pgdat at boot time. Just keep it as is now. - */ - - if (memmap) - free_map_bootmem(memmap); -} - -void sparse_remove_one_section(struct mem_section *ms, unsigned long pfn, +void sparse_remove_section(struct mem_section *ms, unsigned long pfn, unsigned long nr_pages, unsigned long map_offset, struct vmem_altmap *altmap) { - struct page *memmap = NULL; - struct mem_section_usage *usage = NULL; - - if (ms->section_mem_map) { - usage = ms->usage; - memmap = sparse_decode_mem_map(ms->section_mem_map, - __section_nr(ms)); - ms->section_mem_map = 0; - ms->usage = NULL; - } - - clear_hwpoisoned_pages(memmap + map_offset, nr_pages - map_offset); - free_section_usage(ms, memmap, usage, pfn, nr_pages, altmap); + clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset, + nr_pages - map_offset); + section_deactivate(pfn, nr_pages, altmap); } #endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit v1.2.3 From 9a845030427c7a2879a7d635cc7c0e5f79ec962d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 18 Jul 2019 15:58:43 -0700 Subject: mm/sparsemem: cleanup 'section number' data types David points out that there is a mixture of 'int' and 'unsigned long' usage for section number data types. Update the memory hotplug path to use 'unsigned long' consistently for section numbers. [akpm@linux-foundation.org: fix printk format] Link: http://lkml.kernel.org/r/156107543656.1329419.11505835211949439815.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Reported-by: David Hildenbrand Reviewed-by: David Hildenbrand Cc: Michal Hocko Cc: Oscar Salvador Cc: Jason Gunthorpe Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 10 +++++----- mm/sparse.c | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index aafb71594ee3..2a9bbddb0e55 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -288,8 +288,8 @@ static int check_pfn_span(unsigned long pfn, unsigned long nr_pages, int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, struct mhp_restrictions *restrictions) { - unsigned long i; - int start_sec, end_sec, err; + int err; + unsigned long nr, start_sec, end_sec; struct vmem_altmap *altmap = restrictions->altmap; if (altmap) { @@ -310,7 +310,7 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages, start_sec = pfn_to_section_nr(pfn); end_sec = pfn_to_section_nr(pfn + nr_pages - 1); - for (i = start_sec; i <= end_sec; i++) { + for (nr = start_sec; nr <= end_sec; nr++) { unsigned long pfns; pfns = min(nr_pages, PAGES_PER_SECTION @@ -541,7 +541,7 @@ void __remove_pages(struct zone *zone, unsigned long pfn, unsigned long nr_pages, struct vmem_altmap *altmap) { unsigned long map_offset = 0; - int i, start_sec, end_sec; + unsigned long nr, start_sec, end_sec; map_offset = vmem_altmap_offset(altmap); @@ -552,7 +552,7 @@ void __remove_pages(struct zone *zone, unsigned long pfn, start_sec = pfn_to_section_nr(pfn); end_sec = pfn_to_section_nr(pfn + nr_pages - 1); - for (i = start_sec; i <= end_sec; i++) { + for (nr = start_sec; nr <= end_sec; nr++) { unsigned long pfns; cond_resched(); diff --git a/mm/sparse.c b/mm/sparse.c index a205a2ac66a4..72f010d9bff5 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -229,21 +229,21 @@ void subsection_mask_set(unsigned long *map, unsigned long pfn, void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages) { int end_sec = pfn_to_section_nr(pfn + nr_pages - 1); - int i, start_sec = pfn_to_section_nr(pfn); + unsigned long nr, start_sec = pfn_to_section_nr(pfn); if (!nr_pages) return; - for (i = start_sec; i <= end_sec; i++) { + for (nr = start_sec; nr <= end_sec; nr++) { struct mem_section *ms; unsigned long pfns; pfns = min(nr_pages, PAGES_PER_SECTION - (pfn & ~PAGE_SECTION_MASK)); - ms = __nr_to_section(i); + ms = __nr_to_section(nr); subsection_mask_set(ms->usage->subsection_map, pfn, pfns); - pr_debug("%s: sec: %d pfns: %ld set(%d, %d)\n", __func__, i, + pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr, pfns, subsection_map_index(pfn), subsection_map_index(pfn + pfns - 1)); -- cgit v1.2.3