diff options
author | Dave Hansen <haveblue@us.ibm.com> | 2005-06-23 11:07:39 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-23 20:45:01 +0400 |
commit | 6f167ec721108c9282d54424516a12c805e3c306 (patch) | |
tree | f7094a2524611ede76b32e4cc3c07987b7b0e275 | |
parent | c2ebaa425e6630adcbf757b004d257dd4204925b (diff) | |
download | linux-6f167ec721108c9282d54424516a12c805e3c306.tar.xz |
[PATCH] sparsemem base: simple NUMA remap space allocator
Introduce a simple allocator for the NUMA remap space. This space is very
scarce, used for structures which are best allocated node local.
This mechanism is also used on non-NUMA ia64 systems with a vmem_map to keep
the pgdat->node_mem_map initialized in a consistent place for all
architectures.
Issues:
o alloc_remap takes a node_id where we might expect a pgdat which was intended
to allow us to allocate the pgdat's using this mechanism; which we do not yet
do. Could have alloc_remap_node() and alloc_remap_nid() for this purpose.
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/i386/Kconfig | 5 | ||||
-rw-r--r-- | arch/i386/mm/discontig.c | 59 | ||||
-rw-r--r-- | include/linux/bootmem.h | 9 | ||||
-rw-r--r-- | mm/page_alloc.c | 6 |
4 files changed, 50 insertions, 29 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index dfd904f6883b..35ca3a17ed20 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -803,6 +803,11 @@ config NEED_NODE_MEMMAP_SIZE depends on DISCONTIGMEM default y +config HAVE_ARCH_ALLOC_REMAP + bool + depends on NUMA + default y + config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" depends on HIGHMEM4G || HIGHMEM64G diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 85d2fcbe1079..dcc71f969b01 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -108,6 +108,9 @@ unsigned long node_remap_offset[MAX_NUMNODES]; void *node_remap_start_vaddr[MAX_NUMNODES]; void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); +void *node_remap_end_vaddr[MAX_NUMNODES]; +void *node_remap_alloc_vaddr[MAX_NUMNODES]; + /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat @@ -178,6 +181,21 @@ static void __init allocate_pgdat(int nid) } } +void *alloc_remap(int nid, unsigned long size) +{ + void *allocation = node_remap_alloc_vaddr[nid]; + + size = ALIGN(size, L1_CACHE_BYTES); + + if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) + return 0; + + node_remap_alloc_vaddr[nid] += size; + memset(allocation, 0, size); + + return allocation; +} + void __init remap_numa_kva(void) { void *vaddr; @@ -185,8 +203,6 @@ void __init remap_numa_kva(void) int node; for_each_online_node(node) { - if (node == 0) - continue; for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); set_pmd_pfn((ulong) vaddr, @@ -202,11 +218,6 @@ static unsigned long calculate_numa_remap_pages(void) unsigned long size, reserve_pages = 0; for_each_online_node(nid) { - if (nid == 0) - continue; - if (!node_remap_size[nid]) - continue; - /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. @@ -226,8 +237,8 @@ static unsigned long calculate_numa_remap_pages(void) printk("Reserving %ld pages of KVA for lmem_map of node %d\n", size, nid); node_remap_size[nid] = size; - reserve_pages += size; node_remap_offset[nid] = reserve_pages; + reserve_pages += size; printk("Shrinking node %d from %ld pages to %ld pages\n", nid, node_end_pfn[nid], node_end_pfn[nid] - size); node_end_pfn[nid] -= size; @@ -280,12 +291,18 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for_each_online_node(nid) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - (highstart_pfn + reserve_pages) - node_remap_offset[nid]); + highstart_pfn + node_remap_offset[nid]); + /* Init the node remap allocator */ + node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + + (node_remap_size[nid] * PAGE_SIZE); + node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + + ALIGN(sizeof(pg_data_t), PAGE_SIZE); + allocate_pgdat(nid); printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages - - node_remap_offset[nid] + node_remap_size[nid])); + (ulong) pfn_to_kaddr(highstart_pfn + + node_remap_offset[nid] + node_remap_size[nid])); } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); @@ -348,23 +365,9 @@ void __init zone_sizes_init(void) } zholes_size = get_zholes_size(nid); - /* - * We let the lmem_map for node 0 be allocated from the - * normal bootmem allocator, but other nodes come from the - * remapped KVA area - mbligh - */ - if (!nid) - free_area_init_node(nid, NODE_DATA(nid), - zones_size, start, zholes_size); - else { - unsigned long lmem_map; - lmem_map = (unsigned long)node_remap_start_vaddr[nid]; - lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1; - lmem_map &= PAGE_MASK; - NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map; - free_area_init_node(nid, NODE_DATA(nid), zones_size, - start, zholes_size); - } + + free_area_init_node(nid, NODE_DATA(nid), zones_size, start, + zholes_size); } return; } diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 0dd8ca1a3d5a..500f451ce0c0 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -67,6 +67,15 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ +#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP +extern void *alloc_remap(int nid, unsigned long size); +#else +static inline void *alloc_remap(int nid, unsigned long size) +{ + return NULL; +} +#endif + extern unsigned long __initdata nr_kernel_pages; extern unsigned long __initdata nr_all_pages; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 559336de9687..bf1dd8819097 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1936,6 +1936,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, static void __init alloc_node_mem_map(struct pglist_data *pgdat) { unsigned long size; + struct page *map; /* Skip empty nodes */ if (!pgdat->node_spanned_pages) @@ -1944,7 +1945,10 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) /* ia64 gets its own node_mem_map, before this, without bootmem */ if (!pgdat->node_mem_map) { size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); - pgdat->node_mem_map = alloc_bootmem_node(pgdat, size); + map = alloc_remap(pgdat->node_id, size); + if (!map) + map = alloc_bootmem_node(pgdat, size); + pgdat->node_mem_map = map; } #ifndef CONFIG_DISCONTIGMEM /* |