From b3d9ed3fd872fc074286674ae8595ee880938bbf Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 8 Sep 2015 15:00:59 -0700 Subject: sparc32: do not include swap.h from pgtable_32.h "memcg: export struct mem_cgroup" will add includes into linux/memcontrol.h which lead to further header dependency issues as reported by Guenter Roeck: In file included from include/linux/highmem.h:7:0, from include/linux/bio.h:23, from include/linux/writeback.h:192, from include/linux/memcontrol.h:30, from include/linux/swap.h:8, from ./arch/sparc/include/asm/pgtable_32.h:17, from ./arch/sparc/include/asm/pgtable.h:6, from arch/sparc/kernel/traps_32.c:23: include/linux/mm.h: In function 'is_vmalloc_addr': include/linux/mm.h:371:17: error: 'VMALLOC_START' undeclared (first use in this function) include/linux/mm.h:371:17: note: each undeclared identifier is reported only once for each function it appears in include/linux/mm.h:371:41: error: 'VMALLOC_END' undeclared (first use in this function) include/linux/mm.h: In function 'maybe_mkwrite': include/linux/mm.h:556:3: error: implicit declaration of function 'pte_mkwrite' The issue is that pgtable_32.h depends on swap.h to get swap_entry_t but that goes all the way down to linux/mm.h which wants to have VMALLOC_* which is defined later in pgtable_32.h, though. swap_entry_t is defined in include/mm_types.h so it should be sufficient to include this header without more dependencies. Signed-off-by: Michal Hocko Reported-by: Guenter Roeck Tested-by: Guenter Roeck Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgtable_32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index f06b36a00a3b..91b963a887b7 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 95cf82ecc1fcb44df1768162343cc8eb88083b86 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 8 Sep 2015 15:02:03 -0700 Subject: mem-hotplug: handle node hole when initializing numa_meminfo. When parsing SRAT, all memory ranges are added into numa_meminfo. In numa_init(), before entering numa_cleanup_meminfo(), all possible memory ranges are in numa_meminfo. And numa_cleanup_meminfo() removes all ranges over max_pfn or empty. But, this only works if the nodes are continuous. Let's have a look at the following example: We have an SRAT like this: SRAT: Node 0 PXM 0 [mem 0x00000000-0x5fffffff] SRAT: Node 0 PXM 0 [mem 0x100000000-0x1ffffffffff] SRAT: Node 1 PXM 1 [mem 0x20000000000-0x3ffffffffff] SRAT: Node 4 PXM 2 [mem 0x40000000000-0x5ffffffffff] hotplug SRAT: Node 5 PXM 3 [mem 0x60000000000-0x7ffffffffff] hotplug SRAT: Node 2 PXM 4 [mem 0x80000000000-0x9ffffffffff] hotplug SRAT: Node 3 PXM 5 [mem 0xa0000000000-0xbffffffffff] hotplug SRAT: Node 6 PXM 6 [mem 0xc0000000000-0xdffffffffff] hotplug SRAT: Node 7 PXM 7 [mem 0xe0000000000-0xfffffffffff] hotplug On boot, only node 0,1,2,3 exist. And the numa_meminfo will look like this: numa_meminfo.nr_blks = 9 1. on node 0: [0, 60000000] 2. on node 0: [100000000, 20000000000] 3. on node 1: [20000000000, 40000000000] 4. on node 4: [40000000000, 60000000000] 5. on node 5: [60000000000, 80000000000] 6. on node 2: [80000000000, a0000000000] 7. on node 3: [a0000000000, a0800000000] 8. on node 6: [c0000000000, a0800000000] 9. on node 7: [e0000000000, a0800000000] And numa_cleanup_meminfo() will merge 1 and 2, and remove 8,9 because the end address is over max_pfn, which is a0800000000. But 4 and 5 are not removed because their end addresses are less then max_pfn. But in fact, node 4 and 5 don't exist. In a word, numa_cleanup_meminfo() is not able to handle holes between nodes. Since memory ranges in node 4 and 5 are in numa_meminfo, in numa_register_memblks(), node 4 and 5 will be mistakenly set to online. If you run lscpu, it will show: NUMA node0 CPU(s): 0-14,128-142 NUMA node1 CPU(s): 15-29,143-157 NUMA node2 CPU(s): NUMA node3 CPU(s): NUMA node4 CPU(s): 62-76,190-204 NUMA node5 CPU(s): 78-92,206-220 In this patch, we use memblock_overlaps_region() to check if ranges in numa_meminfo overlap with ranges in memory_block. Since memory_block contains all available memory at boot time, if they overlap, it means the ranges exist. If not, then remove them from numa_meminfo. After this patch, lscpu will show: NUMA node0 CPU(s): 0-14,128-142 NUMA node1 CPU(s): 15-29,143-157 NUMA node4 CPU(s): 62-76,190-204 NUMA node5 CPU(s): 78-92,206-220 Signed-off-by: Tang Chen Reviewed-by: Yasuaki Ishimatsu Cc: Thomas Gleixner Cc: Tejun Heo Cc: Luiz Capitulino Cc: Xishi Qiu Cc: Will Deacon Cc: Vladimir Murzin Cc: Fabian Frederick Cc: Alexander Kuleshov Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/numa.c | 6 ++++-- include/linux/memblock.h | 2 ++ mm/memblock.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 4053bb58bf92..c3b3f653ed0c 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -246,8 +246,10 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) bi->start = max(bi->start, low); bi->end = min(bi->end, high); - /* and there's no empty block */ - if (bi->start >= bi->end) + /* and there's no empty or non-exist block */ + if (bi->start >= bi->end || + !memblock_overlaps_region(&memblock.memory, + bi->start, bi->end - bi->start)) numa_remove_memblk_from(i--, mi); } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index d312ae3b51fc..c518eb589260 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -77,6 +77,8 @@ int memblock_remove(phys_addr_t base, phys_addr_t size); int memblock_free(phys_addr_t base, phys_addr_t size); int memblock_reserve(phys_addr_t base, phys_addr_t size); void memblock_trim_memory(phys_addr_t align); +bool memblock_overlaps_region(struct memblock_type *type, + phys_addr_t base, phys_addr_t size); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index 08a5126338db..509255223688 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -91,7 +91,7 @@ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, p return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -static bool __init_memblock memblock_overlaps_region(struct memblock_type *type, +bool __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) { unsigned long i; -- cgit v1.2.3 From 1570f0d7ab425c1e0905715bf9cc98b2a82e723f Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 8 Sep 2015 15:03:04 -0700 Subject: arm64: support initrd outside kernel linear map The use of mem= could leave part or all of the initrd outside of the kernel linear map. This will lead to an error when unpacking the initrd and a probable failure to boot. This patch catches that situation and relocates the initrd to be fully within the linear map. Signed-off-by: Mark Salter Acked-by: Will Deacon Cc: Catalin Marinas Cc: Arnd Bergmann Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Russell King Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/setup.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 888478881243..6bab21f84a9f 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -339,6 +339,67 @@ static void __init request_standard_resources(void) } } +#ifdef CONFIG_BLK_DEV_INITRD +/* + * Relocate initrd if it is not completely within the linear mapping. + * This would be the case if mem= cuts out all or part of it. + */ +static void __init relocate_initrd(void) +{ + phys_addr_t orig_start = __virt_to_phys(initrd_start); + phys_addr_t orig_end = __virt_to_phys(initrd_end); + phys_addr_t ram_end = memblock_end_of_DRAM(); + phys_addr_t new_start; + unsigned long size, to_free = 0; + void *dest; + + if (orig_end <= ram_end) + return; + + /* + * Any of the original initrd which overlaps the linear map should + * be freed after relocating. + */ + if (orig_start < ram_end) + to_free = ram_end - orig_start; + + size = orig_end - orig_start; + + /* initrd needs to be relocated completely inside linear mapping */ + new_start = memblock_find_in_range(0, PFN_PHYS(max_pfn), + size, PAGE_SIZE); + if (!new_start) + panic("Cannot relocate initrd of size %ld\n", size); + memblock_reserve(new_start, size); + + initrd_start = __phys_to_virt(new_start); + initrd_end = initrd_start + size; + + pr_info("Moving initrd from [%llx-%llx] to [%llx-%llx]\n", + orig_start, orig_start + size - 1, + new_start, new_start + size - 1); + + dest = (void *)initrd_start; + + if (to_free) { + memcpy(dest, (void *)__phys_to_virt(orig_start), to_free); + dest += to_free; + } + + copy_from_early_mem(dest, orig_start + to_free, size - to_free); + + if (to_free) { + pr_info("Freeing original RAMDISK from [%llx-%llx]\n", + orig_start, orig_start + to_free - 1); + memblock_free(orig_start, to_free); + } +} +#else +static inline void __init relocate_initrd(void) +{ +} +#endif + u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) @@ -372,6 +433,7 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); paging_init(); + relocate_initrd(); request_standard_resources(); early_ioremap_reset(); -- cgit v1.2.3 From 5dd2c4bded8776ee93c8f38b739fea531095067f Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Tue, 8 Sep 2015 15:03:07 -0700 Subject: x86: use generic early mem copy The early_ioremap library now has a generic copy_from_early_mem() function. Use the generic copy function for x86 relocate_initrd(). [akpm@linux-foundation.org: remove MAX_MAP_CHUNK define, per Yinghai Lu] Signed-off-by: Mark Salter Cc: Catalin Marinas Cc: Will Deacon Cc: Arnd Bergmann Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Russell King Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b143c2d04420..baadbf90a7c5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -317,15 +317,12 @@ static u64 __init get_ramdisk_size(void) return ramdisk_size; } -#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) static void __init relocate_initrd(void) { /* Assume only end is not page aligned */ u64 ramdisk_image = get_ramdisk_image(); u64 ramdisk_size = get_ramdisk_size(); u64 area_size = PAGE_ALIGN(ramdisk_size); - unsigned long slop, clen, mapaddr; - char *p, *q; /* We need to move the initrd down into directly mapped mem */ relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), @@ -343,25 +340,8 @@ static void __init relocate_initrd(void) printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1); - q = (char *)initrd_start; - - /* Copy the initrd */ - while (ramdisk_size) { - slop = ramdisk_image & ~PAGE_MASK; - clen = ramdisk_size; - if (clen > MAX_MAP_CHUNK-slop) - clen = MAX_MAP_CHUNK-slop; - mapaddr = ramdisk_image & PAGE_MASK; - p = early_memremap(mapaddr, clen+slop); - memcpy(q, p+slop, clen); - early_memunmap(p, clen+slop); - q += clen; - ramdisk_image += clen; - ramdisk_size -= clen; - } + copy_from_early_mem((void *)initrd_start, ramdisk_image, ramdisk_size); - ramdisk_image = get_ramdisk_image(); - ramdisk_size = get_ramdisk_size(); printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" " [mem %#010llx-%#010llx]\n", ramdisk_image, ramdisk_image + ramdisk_size - 1, -- cgit v1.2.3 From 96db800f5d73cd5c49461253d45766e094f0f8c2 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Tue, 8 Sep 2015 15:03:50 -0700 Subject: mm: rename alloc_pages_exact_node() to __alloc_pages_node() alloc_pages_exact_node() was introduced in commit 6484eb3e2a81 ("page allocator: do not check NUMA node ID when the caller knows the node is valid") as an optimized variant of alloc_pages_node(), that doesn't fallback to current node for nid == NUMA_NO_NODE. Unfortunately the name of the function can easily suggest that the allocation is restricted to the given node and fails otherwise. In truth, the node is only preferred, unless __GFP_THISNODE is passed among the gfp flags. The misleading name has lead to mistakes in the past, see for example commits 5265047ac301 ("mm, thp: really limit transparent hugepage allocation to local node") and b360edb43f8e ("mm, mempolicy: migrate_to_node should only migrate to node"). Another issue with the name is that there's a family of alloc_pages_exact*() functions where 'exact' means exact size (instead of page order), which leads to more confusion. To prevent further mistakes, this patch effectively renames alloc_pages_exact_node() to __alloc_pages_node() to better convey that it's an optimized variant of alloc_pages_node() not intended for general usage. Both functions get described in comments. It has been also considered to really provide a convenience function for allocations restricted to a node, but the major opinion seems to be that __GFP_THISNODE already provides that functionality and we shouldn't duplicate the API needlessly. The number of users would be small anyway. Existing callers of alloc_pages_exact_node() are simply converted to call __alloc_pages_node(), with the exception of sba_alloc_coherent() which open-codes the check for NUMA_NO_NODE, so it is converted to use alloc_pages_node() instead. This means it no longer performs some VM_BUG_ON checks, and since the current check for nid in alloc_pages_node() uses a 'nid < 0' comparison (which includes NUMA_NO_NODE), it may hide wrong values which would be previously exposed. Both differences will be rectified by the next patch. To sum up, this patch makes no functional changes, except temporarily hiding potentially buggy callers. Restricting the checks in alloc_pages_node() is left for the next patch which can in turn expose more existing buggy callers. Signed-off-by: Vlastimil Babka Acked-by: Johannes Weiner Acked-by: Robin Holt Acked-by: Michal Hocko Acked-by: Christoph Lameter Acked-by: Michael Ellerman Cc: Mel Gorman Cc: David Rientjes Cc: Greg Thelen Cc: Aneesh Kumar K.V Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Naoya Horiguchi Cc: Tony Luck Cc: Fenghua Yu Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Cliff Whickman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/hp/common/sba_iommu.c | 6 +----- arch/ia64/kernel/uncached.c | 2 +- arch/ia64/sn/pci/pci_dma.c | 2 +- arch/powerpc/platforms/cell/ras.c | 2 +- arch/x86/kvm/vmx.c | 2 +- drivers/misc/sgi-xp/xpc_uv.c | 2 +- include/linux/gfp.h | 23 +++++++++++++++-------- kernel/profile.c | 8 ++++---- mm/filemap.c | 2 +- mm/huge_memory.c | 2 +- mm/hugetlb.c | 4 ++-- mm/memory-failure.c | 2 +- mm/mempolicy.c | 4 ++-- mm/migrate.c | 4 ++-- mm/page_alloc.c | 2 -- mm/slab.c | 2 +- mm/slob.c | 4 ++-- mm/slub.c | 2 +- 18 files changed, 38 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 344387a55406..a6d6190c9d24 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1140,13 +1140,9 @@ sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, #ifdef CONFIG_NUMA { - int node = ioc->node; struct page *page; - if (node == NUMA_NO_NODE) - node = numa_node_id(); - - page = alloc_pages_exact_node(node, flags, get_order(size)); + page = alloc_pages_node(ioc->node, flags, get_order(size)); if (unlikely(!page)) return NULL; diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 20e8a9b21d75..f3976da36721 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -97,7 +97,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) /* attempt to allocate a granule's worth of cached memory pages */ - page = alloc_pages_exact_node(nid, + page = __alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, IA64_GRANULE_SHIFT-PAGE_SHIFT); if (!page) { diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index d0853e8e8623..8f59907007cb 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -92,7 +92,7 @@ static void *sn_dma_alloc_coherent(struct device *dev, size_t size, */ node = pcibus_to_node(pdev->bus); if (likely(node >=0)) { - struct page *p = alloc_pages_exact_node(node, + struct page *p = __alloc_pages_node(node, flags, get_order(size)); if (likely(p)) diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index e865d748179b..2d4f60c0119a 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -123,7 +123,7 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order) area->nid = nid; area->order = order; - area->pages = alloc_pages_exact_node(area->nid, + area->pages = __alloc_pages_node(area->nid, GFP_KERNEL|__GFP_THISNODE, area->order); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4a4eec30cc08..148ea2016022 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3150,7 +3150,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) struct page *pages; struct vmcs *vmcs; - pages = alloc_pages_exact_node(node, GFP_KERNEL, vmcs_config.order); + pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); if (!pages) return NULL; vmcs = page_address(pages); diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index 95c894482fdd..340b44d9e8cf 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -239,7 +239,7 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, mq->mmr_blade = uv_cpu_to_blade_id(cpu); nid = cpu_to_node(cpu); - page = alloc_pages_exact_node(nid, + page = __alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, pg_order); if (page == NULL) { diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 3bd64b115999..d2c142bc872e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -303,20 +303,28 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); } -static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, - unsigned int order) +/* + * Allocate pages, preferring the node given as nid. The node must be valid and + * online. For more general interface, see alloc_pages_node(). + */ +static inline struct page * +__alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { - /* Unknown node is current node */ - if (nid < 0) - nid = numa_node_id(); + VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid)); return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); } -static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask, +/* + * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, + * prefer the current CPU's node. + */ +static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { - VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid)); + /* Unknown node is current node */ + if (nid < 0) + nid = numa_node_id(); return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); } @@ -357,7 +365,6 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask); void free_pages_exact(void *virt, size_t size); -/* This is different from alloc_pages_exact_node !!! */ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); #define __get_free_page(gfp_mask) \ diff --git a/kernel/profile.c b/kernel/profile.c index a7bcd28d6e9f..99513e1160e5 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -339,7 +339,7 @@ static int profile_cpu_callback(struct notifier_block *info, node = cpu_to_mem(cpu); per_cpu(cpu_profile_flip, cpu) = 0; if (!per_cpu(cpu_profile_hits, cpu)[1]) { - page = alloc_pages_exact_node(node, + page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); if (!page) @@ -347,7 +347,7 @@ static int profile_cpu_callback(struct notifier_block *info, per_cpu(cpu_profile_hits, cpu)[1] = page_address(page); } if (!per_cpu(cpu_profile_hits, cpu)[0]) { - page = alloc_pages_exact_node(node, + page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); if (!page) @@ -543,14 +543,14 @@ static int create_hash_tables(void) int node = cpu_to_mem(cpu); struct page *page; - page = alloc_pages_exact_node(node, + page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 0); if (!page) goto out_cleanup; per_cpu(cpu_profile_hits, cpu)[1] = (struct profile_hit *)page_address(page); - page = alloc_pages_exact_node(node, + page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 0); if (!page) diff --git a/mm/filemap.c b/mm/filemap.c index 30d69c0c5a38..72940fb38666 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -674,7 +674,7 @@ struct page *__page_cache_alloc(gfp_t gfp) do { cpuset_mems_cookie = read_mems_allowed_begin(); n = cpuset_mem_spread_node(); - page = alloc_pages_exact_node(n, gfp, 0); + page = __alloc_pages_node(n, gfp, 0); } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); return page; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 71a4822c832b..883f613ada7e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2414,7 +2414,7 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm, */ up_read(&mm->mmap_sem); - *hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER); + *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER); if (unlikely(!*hpage)) { count_vm_event(THP_COLLAPSE_ALLOC_FAILED); *hpage = ERR_PTR(-ENOMEM); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cd1280c487ff..999fb0aef8f1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1331,7 +1331,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) { struct page *page; - page = alloc_pages_exact_node(nid, + page = __alloc_pages_node(nid, htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); @@ -1483,7 +1483,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); else - page = alloc_pages_exact_node(nid, + page = __alloc_pages_node(nid, htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index bba2d7c2c9ce..eeda6485e76c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1521,7 +1521,7 @@ static struct page *new_page(struct page *p, unsigned long private, int **x) return alloc_huge_page_node(page_hstate(compound_head(p)), nid); else - return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0); + return __alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0); } /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d6f2caee28c0..87a177917cb2 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -942,7 +942,7 @@ static struct page *new_node_page(struct page *page, unsigned long node, int **x return alloc_huge_page_node(page_hstate(compound_head(page)), node); else - return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE | + return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); } @@ -1998,7 +1998,7 @@ retry_cpuset: nmask = policy_nodemask(gfp, pol); if (!nmask || node_isset(hpage_node, *nmask)) { mpol_cond_put(pol); - page = alloc_pages_exact_node(hpage_node, + page = __alloc_pages_node(hpage_node, gfp | __GFP_THISNODE, order); goto out; } diff --git a/mm/migrate.c b/mm/migrate.c index 918defbdda0e..02ce25df16c2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1195,7 +1195,7 @@ static struct page *new_page_node(struct page *p, unsigned long private, return alloc_huge_page_node(page_hstate(compound_head(p)), pm->node); else - return alloc_pages_exact_node(pm->node, + return __alloc_pages_node(pm->node, GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); } @@ -1555,7 +1555,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page, int nid = (int) data; struct page *newpage; - newpage = alloc_pages_exact_node(nid, + newpage = __alloc_pages_node(nid, (GFP_HIGHUSER_MOVABLE | __GFP_THISNODE | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 252665d553b4..bdaa0cf8fd41 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3511,8 +3511,6 @@ EXPORT_SYMBOL(alloc_pages_exact); * * Like alloc_pages_exact(), but try to allocate on node nid first before falling * back. - * Note this is not alloc_pages_exact_node() which allocates on a specific node, - * but is not exact. */ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) { diff --git a/mm/slab.c b/mm/slab.c index 60c936938b84..c77ebe6cc87c 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1595,7 +1595,7 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, if (memcg_charge_slab(cachep, flags, cachep->gfporder)) return NULL; - page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder); + page = __alloc_pages_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder); if (!page) { memcg_uncharge_slab(cachep, cachep->gfporder); slab_out_of_memory(cachep, flags, nodeid); diff --git a/mm/slob.c b/mm/slob.c index 165bbd3cd606..0d7e5df74d1f 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -45,7 +45,7 @@ * NUMA support in SLOB is fairly simplistic, pushing most of the real * logic down to the page allocator, and simply doing the node accounting * on the upper levels. In the event that a node id is explicitly - * provided, alloc_pages_exact_node() with the specified node id is used + * provided, __alloc_pages_node() with the specified node id is used * instead. The common case (or when the node id isn't explicitly provided) * will default to the current node, as per numa_node_id(). * @@ -193,7 +193,7 @@ static void *slob_new_pages(gfp_t gfp, int order, int node) #ifdef CONFIG_NUMA if (node != NUMA_NO_NODE) - page = alloc_pages_exact_node(node, gfp, order); + page = __alloc_pages_node(node, gfp, order); else #endif page = alloc_pages(gfp, order); diff --git a/mm/slub.c b/mm/slub.c index 084184e706c6..f614b5dc396b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1334,7 +1334,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, if (node == NUMA_NO_NODE) page = alloc_pages(flags, order); else - page = alloc_pages_exact_node(node, flags, order); + page = __alloc_pages_node(node, flags, order); if (!page) memcg_uncharge_slab(s, order); -- cgit v1.2.3