From 60a8f428320918458a9a21052777eada68eebfd8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:17 -0800 Subject: x86, mm: Move after_bootmem to mm_internel.h it is only used in arch/x86/mm/init*.c Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-41-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index bcaab4e6fe91..64d5271a3d36 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1355,7 +1355,6 @@ extern void __init mmap_init(void); extern void show_mem(unsigned int flags); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); -extern int after_bootmem; extern __printf(3, 4) void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); -- cgit v1.2.3 From 595ad9af8584908ea5fb698b836169d05b99f186 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:09 -0800 Subject: memblock: Add memblock_mem_size() Use it to get mem size under the limit_pfn. to replace local version in x86 reserved_initrd. -v2: remove not needed cast that is pointed out by HPA. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-29-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 16 +--------------- include/linux/memblock.h | 1 + mm/memblock.c | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b80bee10982f..bbe8cdf7515e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -363,20 +363,6 @@ static void __init relocate_initrd(void) ramdisk_here, ramdisk_here + ramdisk_size - 1); } -static u64 __init get_mem_size(unsigned long limit_pfn) -{ - int i; - u64 mapped_pages = 0; - unsigned long start_pfn, end_pfn; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { - start_pfn = min_t(unsigned long, start_pfn, limit_pfn); - end_pfn = min_t(unsigned long, end_pfn, limit_pfn); - mapped_pages += end_pfn - start_pfn; - } - - return mapped_pages << PAGE_SHIFT; -} static void __init early_reserve_initrd(void) { /* Assume only end is not page aligned */ @@ -404,7 +390,7 @@ static void __init reserve_initrd(void) initrd_start = 0; - mapped_size = get_mem_size(max_pfn_mapped); + mapped_size = memblock_mem_size(max_pfn_mapped); if (ramdisk_size >= (mapped_size>>1)) panic("initrd too large to handle, " "disabling initrd (%lld needed, %lld available)\n", diff --git a/include/linux/memblock.h b/include/linux/memblock.h index d452ee191066..f388203db7e8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -155,6 +155,7 @@ phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); phys_addr_t memblock_phys_mem_size(void); +phys_addr_t memblock_mem_size(unsigned long limit_pfn); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); diff --git a/mm/memblock.c b/mm/memblock.c index 88adc8afb610..b8d9147e5c08 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -828,6 +828,23 @@ phys_addr_t __init memblock_phys_mem_size(void) return memblock.memory.total_size; } +phys_addr_t __init memblock_mem_size(unsigned long limit_pfn) +{ + unsigned long pages = 0; + struct memblock_region *r; + unsigned long start_pfn, end_pfn; + + for_each_memblock(memory, r) { + start_pfn = memblock_region_memory_base_pfn(r); + end_pfn = memblock_region_memory_end_pfn(r); + start_pfn = min_t(unsigned long, start_pfn, limit_pfn); + end_pfn = min_t(unsigned long, end_pfn, limit_pfn); + pages += end_pfn - start_pfn; + } + + return (phys_addr_t)pages << PAGE_SHIFT; +} + /* lowest address */ phys_addr_t __init_memblock memblock_start_of_DRAM(void) { -- cgit v1.2.3 From 0212f9159694be61c6bc52e925fa76643e0c1abf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:11 -0800 Subject: x86: Add Crash kernel low reservation During kdump kernel's booting stage, it need to find low ram for swiotlb buffer when system does not support intel iommu/dmar remapping. kexed-tools is appending memmap=exactmap and range from /proc/iomem with "Crash kernel", and that range is above 4G for 64bit after boot protocol 2.12. We need to add another range in /proc/iomem like "Crash kernel low", so kexec-tools could find that info and append to kdump kernel command line. Try to reserve some under 4G if the normal "Crash kernel" is above 4G. User could specify the size with crashkernel_low=XX[KMG]. -v2: fix warning that is found by Fengguang's test robot. -v3: move out get_mem_size change to another patch, to solve compiling warning that is found by Borislav Petkov -v4: user must specify crashkernel_low if system does not support intel or amd iommu. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-31-git-send-email-yinghai@kernel.org Cc: Eric Biederman Cc: Rob Landley Signed-off-by: H. Peter Anvin --- Documentation/kernel-parameters.txt | 3 +++ arch/x86/kernel/setup.c | 42 +++++++++++++++++++++++++++++++++++-- include/linux/kexec.h | 3 +++ kernel/kexec.c | 34 +++++++++++++++++++++++++----- 4 files changed, 75 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 363e348bff9b..da0e0773ca96 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -594,6 +594,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. is selected automatically. Check Documentation/kdump/kdump.txt for further details. + crashkernel_low=size[KMG] + [KNL, x86] parts under 4G. + crashkernel=range1:size1[,range2:size2,...][@offset] [KNL] Same as above, but depends on the memory in the running system. The syntax of range is diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4778ddeedc8a..5dc47c3e537b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -508,8 +508,44 @@ static void __init memblock_x86_reserve_range_setup_data(void) # define CRASH_KERNEL_ADDR_MAX MAXMEM #endif +static void __init reserve_crashkernel_low(void) +{ +#ifdef CONFIG_X86_64 + const unsigned long long alignment = 16<<20; /* 16M */ + unsigned long long low_base = 0, low_size = 0; + unsigned long total_low_mem; + unsigned long long base; + int ret; + + total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); + ret = parse_crashkernel_low(boot_command_line, total_low_mem, + &low_size, &base); + if (ret != 0 || low_size <= 0) + return; + + low_base = memblock_find_in_range(low_size, (1ULL<<32), + low_size, alignment); + + if (!low_base) { + pr_info("crashkernel low reservation failed - No suitable area found.\n"); + + return; + } + + memblock_reserve(low_base, low_size); + pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", + (unsigned long)(low_size >> 20), + (unsigned long)(low_base >> 20), + (unsigned long)(total_low_mem >> 20)); + crashk_low_res.start = low_base; + crashk_low_res.end = low_base + low_size - 1; + insert_resource(&iomem_resource, &crashk_low_res); +#endif +} + static void __init reserve_crashkernel(void) { + const unsigned long long alignment = 16<<20; /* 16M */ unsigned long long total_mem; unsigned long long crash_size, crash_base; int ret; @@ -523,8 +559,6 @@ static void __init reserve_crashkernel(void) /* 0 means: find the address automatically */ if (crash_base <= 0) { - const unsigned long long alignment = 16<<20; /* 16M */ - /* * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX */ @@ -535,6 +569,7 @@ static void __init reserve_crashkernel(void) pr_info("crashkernel reservation failed - No suitable area found.\n"); return; } + } else { unsigned long long start; @@ -556,6 +591,9 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); + + if (crash_base >= (1ULL<<32)) + reserve_crashkernel_low(); } #else static void __init reserve_crashkernel(void) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d0b8458a703a..d2e6927bbaae 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -191,6 +191,7 @@ extern struct kimage *kexec_crash_image; /* Location of a reserved region to hold the crash kernel. */ extern struct resource crashk_res; +extern struct resource crashk_low_res; typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4]; extern note_buf_t __percpu *crash_notes; extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; @@ -199,6 +200,8 @@ extern size_t vmcoreinfo_max_size; int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); +int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); int crash_shrink_memory(unsigned long new_size); size_t crash_get_memory_size(void); void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); diff --git a/kernel/kexec.c b/kernel/kexec.c index 5e4bd7864c5d..2436ffcec91f 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -54,6 +54,12 @@ struct resource crashk_res = { .end = 0, .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; +struct resource crashk_low_res = { + .name = "Crash kernel low", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; int kexec_should_crash(struct task_struct *p) { @@ -1369,10 +1375,11 @@ static int __init parse_crashkernel_simple(char *cmdline, * That function is the entry point for command line parsing and should be * called from the arch-specific code. */ -int __init parse_crashkernel(char *cmdline, +static int __init __parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, - unsigned long long *crash_base) + unsigned long long *crash_base, + const char *name) { char *p = cmdline, *ck_cmdline = NULL; char *first_colon, *first_space; @@ -1382,16 +1389,16 @@ int __init parse_crashkernel(char *cmdline, *crash_base = 0; /* find crashkernel and use the last one if there are more */ - p = strstr(p, "crashkernel="); + p = strstr(p, name); while (p) { ck_cmdline = p; - p = strstr(p+1, "crashkernel="); + p = strstr(p+1, name); } if (!ck_cmdline) return -EINVAL; - ck_cmdline += 12; /* strlen("crashkernel=") */ + ck_cmdline += strlen(name); /* * if the commandline contains a ':', then that's the extended @@ -1409,6 +1416,23 @@ int __init parse_crashkernel(char *cmdline, return 0; } +int __init parse_crashkernel(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel="); +} + +int __init parse_crashkernel_low(char *cmdline, + unsigned long long system_ram, + unsigned long long *crash_size, + unsigned long long *crash_base) +{ + return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, + "crashkernel_low="); +} static void update_vmcoreinfo_note(void) { -- cgit v1.2.3 From 38fa4175e60d98fb1c9815fb14f8057576dade73 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:15 -0800 Subject: mm: Add alloc_bootmem_low_pages_nopanic() We don't need to panic in some case, like for swiotlb preallocating. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-35-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- include/linux/bootmem.h | 5 +++++ mm/bootmem.c | 8 ++++++++ mm/nobootmem.c | 8 ++++++++ 3 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 3f778c27f825..3cd16ba82f15 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -99,6 +99,9 @@ void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat, extern void *__alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal); +void *__alloc_bootmem_low_nopanic(unsigned long size, + unsigned long align, + unsigned long goal); extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, @@ -132,6 +135,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, #define alloc_bootmem_low(x) \ __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) +#define alloc_bootmem_low_pages_nopanic(x) \ + __alloc_bootmem_low_nopanic(x, PAGE_SIZE, 0) #define alloc_bootmem_low_pages(x) \ __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_low_pages_node(pgdat, x) \ diff --git a/mm/bootmem.c b/mm/bootmem.c index b93376c39b61..2b0bcb019ec2 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -833,6 +833,14 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT); } +void * __init __alloc_bootmem_low_nopanic(unsigned long size, + unsigned long align, + unsigned long goal) +{ + return ___alloc_bootmem_nopanic(size, align, goal, + ARCH_LOW_ADDRESS_LIMIT); +} + /** * __alloc_bootmem_low_node - allocate low boot memory from a specific node * @pgdat: node to allocate from diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 03d152a76acf..5e07d36e381e 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -391,6 +391,14 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT); } +void * __init __alloc_bootmem_low_nopanic(unsigned long size, + unsigned long align, + unsigned long goal) +{ + return ___alloc_bootmem_nopanic(size, align, goal, + ARCH_LOW_ADDRESS_LIMIT); +} + /** * __alloc_bootmem_low_node - allocate low boot memory from a specific node * @pgdat: node to allocate from -- cgit v1.2.3 From ac2cbab21f318e19bc176a7f38a120cec835220f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:16 -0800 Subject: x86: Don't panic if can not alloc buffer for swiotlb Normal boot path on system with iommu support: swiotlb buffer will be allocated early at first and then try to initialize iommu, if iommu for intel or AMD could setup properly, swiotlb buffer will be freed. The early allocating is with bootmem, and could panic when we try to use kdump with buffer above 4G only, or with memmap to limit mem under 4G. for example: memmap=4095M$1M to remove memory under 4G. According to Eric, add _nopanic version and no_iotlb_memory to fail map single later if swiotlb is still needed. -v2: don't pass nopanic, and use -ENOMEM return value according to Eric. panic early instead of using swiotlb_full to panic...according to Eric/Konrad. -v3: make swiotlb_init to be notpanic, but will affect: arm64, ia64, powerpc, tile, unicore32, x86. -v4: cleanup swiotlb_init by removing swiotlb_init_with_default_size. Suggested-by: Eric W. Biederman Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-36-git-send-email-yinghai@kernel.org Reviewed-and-tested-by: Konrad Rzeszutek Wilk Cc: Joerg Roedel Cc: Ralf Baechle Cc: Jeremy Fitzhardinge Cc: Kyungmin Park Cc: Marek Szyprowski Cc: Arnd Bergmann Cc: Andrzej Pietrasiewicz Cc: linux-mips@linux-mips.org Cc: xen-devel@lists.xensource.com Cc: virtualization@lists.linux-foundation.org Cc: Shuah Khan Signed-off-by: H. Peter Anvin --- arch/mips/cavium-octeon/dma-octeon.c | 3 ++- drivers/xen/swiotlb-xen.c | 4 ++- include/linux/swiotlb.h | 2 +- lib/swiotlb.c | 47 ++++++++++++++++++++++-------------- 4 files changed, 35 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index 41dd00884975..02f244475207 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -317,7 +317,8 @@ void __init plat_swiotlb_setup(void) octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize); - swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1); + if (swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1) == -ENOMEM) + panic("Cannot allocate SWIOTLB buffer"); mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops; } diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index af47e7594460..1d94316f0ea4 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -231,7 +231,9 @@ retry: } start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); if (early) { - swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); + if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, + verbose)) + panic("Cannot allocate SWIOTLB buffer"); rc = 0; } else rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 071d62c214a6..2de42f9401d2 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -23,7 +23,7 @@ extern int swiotlb_force; #define IO_TLB_SHIFT 11 extern void swiotlb_init(int verbose); -extern void swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); +int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); extern unsigned long swiotlb_nr_tbl(void); extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 196b06984dec..bfe02b8fc55b 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -122,11 +122,18 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return phys_to_dma(hwdev, virt_to_phys(address)); } +static bool no_iotlb_memory; + void swiotlb_print_info(void) { unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; unsigned char *vstart, *vend; + if (no_iotlb_memory) { + pr_warn("software IO TLB: No low mem\n"); + return; + } + vstart = phys_to_virt(io_tlb_start); vend = phys_to_virt(io_tlb_end); @@ -136,7 +143,7 @@ void swiotlb_print_info(void) bytes >> 20, vstart, vend - 1); } -void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) { void *v_overflow_buffer; unsigned long i, bytes; @@ -150,9 +157,10 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) /* * Get the overflow emergency buffer */ - v_overflow_buffer = alloc_bootmem_low_pages(PAGE_ALIGN(io_tlb_overflow)); + v_overflow_buffer = alloc_bootmem_low_pages_nopanic( + PAGE_ALIGN(io_tlb_overflow)); if (!v_overflow_buffer) - panic("Cannot allocate SWIOTLB overflow buffer!\n"); + return -ENOMEM; io_tlb_overflow_buffer = __pa(v_overflow_buffer); @@ -169,15 +177,19 @@ void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) if (verbose) swiotlb_print_info(); + + return 0; } /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. */ -static void __init -swiotlb_init_with_default_size(size_t default_size, int verbose) +void __init +swiotlb_init(int verbose) { + /* default to 64MB */ + size_t default_size = 64UL<<20; unsigned char *vstart; unsigned long bytes; @@ -188,20 +200,16 @@ swiotlb_init_with_default_size(size_t default_size, int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; - /* - * Get IO TLB memory from the low pages - */ - vstart = alloc_bootmem_low_pages(PAGE_ALIGN(bytes)); - if (!vstart) - panic("Cannot allocate SWIOTLB buffer"); - - swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose); -} + /* Get IO TLB memory from the low pages */ + vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes)); + if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) + return; -void __init -swiotlb_init(int verbose) -{ - swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ + if (io_tlb_start) + free_bootmem(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + pr_warn("Cannot allocate SWIOTLB buffer"); + no_iotlb_memory = true; } /* @@ -405,6 +413,9 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, unsigned long offset_slots; unsigned long max_slots; + if (no_iotlb_memory) + panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); + mask = dma_get_seg_boundary(hwdev); tbl_dma_addr &= mask; -- cgit v1.2.3