diff options
-rw-r--r-- | Documentation/kernel-parameters.txt | 29 | ||||
-rw-r--r-- | arch/x86/mm/srat.c | 71 | ||||
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 22 |
4 files changed, 113 insertions, 11 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 722a74161246..766087781ecd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1640,15 +1640,30 @@ bytes respectively. Such letter suffixes can also be entirely omitted. that the amount of memory usable for all allocations is not too small. + movablemem_map=acpi + [KNL,X86,IA-64,PPC] This parameter is similar to + memmap except it specifies the memory map of + ZONE_MOVABLE. + This option inform the kernel to use Hot Pluggable bit + in flags from SRAT from ACPI BIOS to determine which + memory devices could be hotplugged. The corresponding + memory ranges will be set as ZONE_MOVABLE. + NOTE: Whatever node the kernel resides in will always + be un-hotpluggable. + movablemem_map=nn[KMG]@ss[KMG] [KNL,X86,IA-64,PPC] This parameter is similar to memmap except it specifies the memory map of ZONE_MOVABLE. - If more areas are all within one node, then from - lowest ss to the end of the node will be ZONE_MOVABLE. - If an area covers two or more nodes, the area from - ss to the end of the 1st node will be ZONE_MOVABLE, - and all the rest nodes will only have ZONE_MOVABLE. + If user specifies memory ranges, the info in SRAT will + be ingored. And it works like the following: + - If more ranges are all within one node, then from + lowest ss to the end of the node will be ZONE_MOVABLE. + - If a range is within a node, then from ss to the end + of the node will be ZONE_MOVABLE. + - If a range covers two or more nodes, then from ss to + the end of the 1st node will be ZONE_MOVABLE, and all + the rest nodes will only have ZONE_MOVABLE. If memmap is specified at the same time, the movablemem_map will be limited within the memmap areas. If kernelcore or movablecore is also specified, @@ -1656,6 +1671,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. satisfied. So the administrator should be careful that the amount of movablemem_map areas are not too large. Otherwise kernel won't have enough memory to start. + NOTE: We don't stop users specifying the node the + kernel resides in as hotpluggable so that this + option can be used as a workaround of firmware + bugs. MTD_Partition= [MTD] Format: <name>,<region-number>,<size>,<offset> diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 3e90039e52e0..79836d01f789 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -142,16 +142,72 @@ static inline int save_add_info(void) {return 0;} #endif #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP -static void __init handle_movablemem(int node, u64 start, u64 end) +static void __init +handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) { - int overlap; + int overlap, i; unsigned long start_pfn, end_pfn; start_pfn = PFN_DOWN(start); end_pfn = PFN_UP(end); /* - * For movablecore_map=nn[KMG]@ss[KMG]: + * For movablemem_map=acpi: + * + * SRAT: |_____| |_____| |_________| |_________| ...... + * node id: 0 1 1 2 + * hotpluggable: n y y n + * movablemem_map: |_____| |_________| + * + * Using movablemem_map, we can prevent memblock from allocating memory + * on ZONE_MOVABLE at boot time. + * + * Before parsing SRAT, memblock has already reserve some memory ranges + * for other purposes, such as for kernel image. We cannot prevent + * kernel from using these memory, so we need to exclude these memory + * even if it is hotpluggable. + * Furthermore, to ensure the kernel has enough memory to boot, we make + * all the memory on the node which the kernel resides in + * un-hotpluggable. + */ + if (hotpluggable && movablemem_map.acpi) { + /* Exclude ranges reserved by memblock. */ + struct memblock_type *rgn = &memblock.reserved; + + for (i = 0; i < rgn->cnt; i++) { + if (end <= rgn->regions[i].base || + start >= rgn->regions[i].base + + rgn->regions[i].size) + continue; + + /* + * If the memory range overlaps the memory reserved by + * memblock, then the kernel resides in this node. + */ + node_set(node, movablemem_map.numa_nodes_kernel); + + goto out; + } + + /* + * If the kernel resides in this node, then the whole node + * should not be hotpluggable. + */ + if (node_isset(node, movablemem_map.numa_nodes_kernel)) + goto out; + + insert_movablemem_map(start_pfn, end_pfn); + + /* + * numa_nodes_hotplug nodemask represents which nodes are put + * into movablemem_map.map[]. + */ + node_set(node, movablemem_map.numa_nodes_hotplug); + goto out; + } + + /* + * For movablemem_map=nn[KMG]@ss[KMG]: * * SRAT: |_____| |_____| |_________| |_________| ...... * node id: 0 1 1 2 @@ -160,6 +216,8 @@ static void __init handle_movablemem(int node, u64 start, u64 end) * * Using movablemem_map, we can prevent memblock from allocating memory * on ZONE_MOVABLE at boot time. + * + * NOTE: In this case, SRAT info will be ingored. */ overlap = movablemem_map_overlap(start_pfn, end_pfn); if (overlap >= 0) { @@ -187,9 +245,12 @@ static void __init handle_movablemem(int node, u64 start, u64 end) */ insert_movablemem_map(start_pfn, end_pfn); } +out: + return; } #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -static inline void handle_movablemem(int node, u64 start, u64 end) +static inline void +handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) { } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ @@ -234,7 +295,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) (unsigned long long) start, (unsigned long long) end - 1, hotpluggable ? "Hot Pluggable": ""); - handle_movablemem(node, start, end); + handle_movablemem(node, start, end, hotpluggable); return 0; out_err_bad_srat: diff --git a/include/linux/mm.h b/include/linux/mm.h index 4d7377a1d084..72a42c0fa633 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1366,9 +1366,11 @@ struct movablemem_entry { }; struct movablemem_map { + bool acpi; /* true if using SRAT info */ int nr_map; struct movablemem_entry map[MOVABLEMEM_MAP_MAX]; nodemask_t numa_nodes_hotplug; /* on which nodes we specify memory */ + nodemask_t numa_nodes_kernel; /* on which nodes kernel resides in */ }; extern void __init insert_movablemem_map(unsigned long start_pfn, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7ea9a003ad57..a7381be21320 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -203,7 +203,10 @@ static unsigned long __meminitdata dma_reserve; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* Movable memory ranges, will also be used by memblock subsystem. */ -struct movablemem_map movablemem_map; +struct movablemem_map movablemem_map = { + .acpi = false, + .nr_map = 0, +}; static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES]; static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES]; @@ -5314,6 +5317,23 @@ static int __init cmdline_parse_movablemem_map(char *p) if (!p) goto err; + if (!strcmp(p, "acpi")) + movablemem_map.acpi = true; + + /* + * If user decide to use info from BIOS, all the other user specified + * ranges will be ingored. + */ + if (movablemem_map.acpi) { + if (movablemem_map.nr_map) { + memset(movablemem_map.map, 0, + sizeof(struct movablemem_entry) + * movablemem_map.nr_map); + movablemem_map.nr_map = 0; + } + return 0; + } + oldp = p; mem_size = memparse(p, &p); if (p == oldp) |