diff options
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r-- | mm/memory_hotplug.c | 192 |
1 files changed, 154 insertions, 38 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3f231cf1b410..1b03f4ec6fd2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -41,17 +41,83 @@ #include "internal.h" #include "shuffle.h" +enum { + MEMMAP_ON_MEMORY_DISABLE = 0, + MEMMAP_ON_MEMORY_ENABLE, + MEMMAP_ON_MEMORY_FORCE, +}; + +static int memmap_mode __read_mostly = MEMMAP_ON_MEMORY_DISABLE; + +static inline unsigned long memory_block_memmap_size(void) +{ + return PHYS_PFN(memory_block_size_bytes()) * sizeof(struct page); +} + +static inline unsigned long memory_block_memmap_on_memory_pages(void) +{ + unsigned long nr_pages = PFN_UP(memory_block_memmap_size()); + + /* + * In "forced" memmap_on_memory mode, we add extra pages to align the + * vmemmap size to cover full pageblocks. That way, we can add memory + * even if the vmemmap size is not properly aligned, however, we might waste + * memory. + */ + if (memmap_mode == MEMMAP_ON_MEMORY_FORCE) + return pageblock_align(nr_pages); + return nr_pages; +} + #ifdef CONFIG_MHP_MEMMAP_ON_MEMORY /* * memory_hotplug.memmap_on_memory parameter */ -static bool memmap_on_memory __ro_after_init; -module_param(memmap_on_memory, bool, 0444); -MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug"); +static int set_memmap_mode(const char *val, const struct kernel_param *kp) +{ + int ret, mode; + bool enabled; + + if (sysfs_streq(val, "force") || sysfs_streq(val, "FORCE")) { + mode = MEMMAP_ON_MEMORY_FORCE; + } else { + ret = kstrtobool(val, &enabled); + if (ret < 0) + return ret; + if (enabled) + mode = MEMMAP_ON_MEMORY_ENABLE; + else + mode = MEMMAP_ON_MEMORY_DISABLE; + } + *((int *)kp->arg) = mode; + if (mode == MEMMAP_ON_MEMORY_FORCE) { + unsigned long memmap_pages = memory_block_memmap_on_memory_pages(); + + pr_info_once("Memory hotplug will waste %ld pages in each memory block\n", + memmap_pages - PFN_UP(memory_block_memmap_size())); + } + return 0; +} + +static int get_memmap_mode(char *buffer, const struct kernel_param *kp) +{ + if (*((int *)kp->arg) == MEMMAP_ON_MEMORY_FORCE) + return sprintf(buffer, "force\n"); + return param_get_bool(buffer, kp); +} + +static const struct kernel_param_ops memmap_mode_ops = { + .set = set_memmap_mode, + .get = get_memmap_mode, +}; +module_param_cb(memmap_on_memory, &memmap_mode_ops, &memmap_mode, 0444); +MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug\n" + "With value \"force\" it could result in memory wastage due " + "to memmap size limitations (Y/N/force)"); static inline bool mhp_memmap_on_memory(void) { - return memmap_on_memory; + return memmap_mode != MEMMAP_ON_MEMORY_DISABLE; } #else static inline bool mhp_memmap_on_memory(void) @@ -1247,11 +1313,22 @@ static int online_memory_block(struct memory_block *mem, void *arg) return device_online(&mem->dev); } -bool mhp_supports_memmap_on_memory(unsigned long size) +#ifndef arch_supports_memmap_on_memory +static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size) +{ + /* + * As default, we want the vmemmap to span a complete PMD such that we + * can map the vmemmap using a single PMD if supported by the + * architecture. + */ + return IS_ALIGNED(vmemmap_size, PMD_SIZE); +} +#endif + +static bool mhp_supports_memmap_on_memory(unsigned long size) { - unsigned long nr_vmemmap_pages = size / PAGE_SIZE; - unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page); - unsigned long remaining_size = size - vmemmap_size; + unsigned long vmemmap_size = memory_block_memmap_size(); + unsigned long memmap_pages = memory_block_memmap_on_memory_pages(); /* * Besides having arch support and the feature enabled at runtime, we @@ -1279,10 +1356,28 @@ bool mhp_supports_memmap_on_memory(unsigned long size) * altmap as an alternative source of memory, and we do not exactly * populate a single PMD. */ - return mhp_memmap_on_memory() && - size == memory_block_size_bytes() && - IS_ALIGNED(vmemmap_size, PMD_SIZE) && - IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT)); + if (!mhp_memmap_on_memory() || size != memory_block_size_bytes()) + return false; + + /* + * Make sure the vmemmap allocation is fully contained + * so that we always allocate vmemmap memory from altmap area. + */ + if (!IS_ALIGNED(vmemmap_size, PAGE_SIZE)) + return false; + + /* + * start pfn should be pageblock_nr_pages aligned for correctly + * setting migrate types + */ + if (!pageblock_aligned(memmap_pages)) + return false; + + if (memmap_pages == PHYS_PFN(memory_block_size_bytes())) + /* No effective hotplugged memory doesn't make sense. */ + return false; + + return arch_supports_memmap_on_memory(vmemmap_size); } /* @@ -1295,7 +1390,10 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; enum memblock_flags memblock_flags = MEMBLOCK_NONE; - struct vmem_altmap mhp_altmap = {}; + struct vmem_altmap mhp_altmap = { + .base_pfn = PHYS_PFN(res->start), + .end_pfn = PHYS_PFN(res->end), + }; struct memory_group *group = NULL; u64 start, size; bool new_node = false; @@ -1339,26 +1437,29 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) * Self hosted memmap array */ if (mhp_flags & MHP_MEMMAP_ON_MEMORY) { - if (!mhp_supports_memmap_on_memory(size)) { - ret = -EINVAL; - goto error; + if (mhp_supports_memmap_on_memory(size)) { + mhp_altmap.free = memory_block_memmap_on_memory_pages(); + params.altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL); + if (!params.altmap) { + ret = -ENOMEM; + goto error; + } + + memcpy(params.altmap, &mhp_altmap, sizeof(mhp_altmap)); } - mhp_altmap.free = PHYS_PFN(size); - mhp_altmap.base_pfn = PHYS_PFN(start); - params.altmap = &mhp_altmap; + /* fallback to not using altmap */ } /* call arch's memory hotadd */ ret = arch_add_memory(nid, start, size, ¶ms); if (ret < 0) - goto error; + goto error_free; /* create memory block devices after memory was added */ - ret = create_memory_block_devices(start, size, mhp_altmap.alloc, - group); + ret = create_memory_block_devices(start, size, params.altmap, group); if (ret) { arch_remove_memory(start, size, NULL); - goto error; + goto error_free; } if (new_node) { @@ -1395,6 +1496,8 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) walk_memory_blocks(start, size, NULL, online_memory_block); return ret; +error_free: + kfree(params.altmap); error: if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) memblock_remove(start, size); @@ -1843,6 +1946,11 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, do { pfn = start_pfn; do { + /* + * Historically we always checked for any signal and + * can't limit it to fatal signals without eventually + * breaking user space. + */ if (signal_pending(current)) { ret = -EINTR; reason = "signal backoff"; @@ -1956,12 +2064,18 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) return 0; } -static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg) +static int test_has_altmap_cb(struct memory_block *mem, void *arg) { + struct memory_block **mem_ptr = (struct memory_block **)arg; /* - * If not set, continue with the next block. + * return the memblock if we have altmap + * and break callback. */ - return mem->nr_vmemmap_pages; + if (mem->altmap) { + *mem_ptr = mem; + return 1; + } + return 0; } static int check_cpu_on_node(int nid) @@ -2036,10 +2150,9 @@ EXPORT_SYMBOL(try_offline_node); static int __ref try_remove_memory(u64 start, u64 size) { - struct vmem_altmap mhp_altmap = {}; - struct vmem_altmap *altmap = NULL; - unsigned long nr_vmemmap_pages; + struct memory_block *mem; int rc = 0, nid = NUMA_NO_NODE; + struct vmem_altmap *altmap = NULL; BUG_ON(check_hotplug_memory_range(start, size)); @@ -2061,23 +2174,20 @@ static int __ref try_remove_memory(u64 start, u64 size) * the same granularity it was added - a single memory block. */ if (mhp_memmap_on_memory()) { - nr_vmemmap_pages = walk_memory_blocks(start, size, NULL, - get_nr_vmemmap_pages_cb); - if (nr_vmemmap_pages) { + rc = walk_memory_blocks(start, size, &mem, test_has_altmap_cb); + if (rc) { if (size != memory_block_size_bytes()) { pr_warn("Refuse to remove %#llx - %#llx," "wrong granularity\n", start, start + size); return -EINVAL; } - + altmap = mem->altmap; /* - * Let remove_pmd_table->free_hugepage_table do the - * right thing if we used vmem_altmap when hot-adding - * the range. + * Mark altmap NULL so that we can add a debug + * check on memblock free. */ - mhp_altmap.alloc = nr_vmemmap_pages; - altmap = &mhp_altmap; + mem->altmap = NULL; } } @@ -2094,6 +2204,12 @@ static int __ref try_remove_memory(u64 start, u64 size) arch_remove_memory(start, size, altmap); + /* Verify that all vmemmap pages have actually been freed. */ + if (altmap) { + WARN(altmap->alloc, "Altmap not fully unmapped"); + kfree(altmap); + } + if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { memblock_phys_free(start, size); memblock_remove(start, size); |