diff options
Diffstat (limited to 'arch/arm64/mm/mmu.c')
-rw-r--r-- | arch/arm64/mm/mmu.c | 149 |
1 files changed, 117 insertions, 32 deletions
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ca692a815731..ae0c3d023824 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -464,20 +464,35 @@ void __init mark_linear_text_alias_ro(void) /* * Remove the write permissions from the linear alias of .text/.rodata */ - update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text), - (unsigned long)__init_begin - (unsigned long)_text, + update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext), + (unsigned long)__init_begin - (unsigned long)_stext, PAGE_KERNEL_RO); } +static bool crash_mem_map __initdata; + +static int __init enable_crash_mem_map(char *arg) +{ + /* + * Proper parameter parsing is done by reserve_crashkernel(). We only + * need to know if the linear map has to avoid block mappings so that + * the crashkernel reservations can be unmapped later. + */ + crash_mem_map = true; + + return 0; +} +early_param("crashkernel", enable_crash_mem_map); + static void __init map_mem(pgd_t *pgdp) { - phys_addr_t kernel_start = __pa_symbol(_text); + phys_addr_t kernel_start = __pa_symbol(_stext); phys_addr_t kernel_end = __pa_symbol(__init_begin); phys_addr_t start, end; int flags = 0; u64 i; - if (rodata_full || debug_pagealloc_enabled()) + if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -487,11 +502,6 @@ static void __init map_mem(pgd_t *pgdp) * the following for-loop */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); -#ifdef CONFIG_KEXEC_CORE - if (crashk_res.end) - memblock_mark_nomap(crashk_res.start, - resource_size(&crashk_res)); -#endif /* map all the memory banks */ for_each_mem_range(i, &start, &end) { @@ -506,7 +516,7 @@ static void __init map_mem(pgd_t *pgdp) } /* - * Map the linear alias of the [_text, __init_begin) interval + * Map the linear alias of the [_stext, __init_begin) interval * as non-executable now, and remove the write permission in * mark_linear_text_alias_ro() below (which will be called after * alternative patching has completed). This makes the contents @@ -518,21 +528,6 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); - -#ifdef CONFIG_KEXEC_CORE - /* - * Use page-level mappings here so that we can shrink the region - * in page granularity and put back unused memory to buddy system - * through /sys/kernel/kexec_crash_size interface. - */ - if (crashk_res.end) { - __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1, - PAGE_KERNEL, - NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); - memblock_clear_nomap(crashk_res.start, - resource_size(&crashk_res)); - } -#endif } void mark_rodata_ro(void) @@ -665,7 +660,7 @@ static void __init map_kernel(pgd_t *pgdp) * Only rodata will be remapped with different permissions later on, * all other segments are allowed to use contiguous mappings. */ - map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0, + map_kernel_segment(pgdp, _stext, _etext, text_prot, &vmlinux_text, 0, VM_NO_GUARD); map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL, &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD); @@ -1132,8 +1127,11 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, void *p = NULL; p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); - if (!p) - return -ENOMEM; + if (!p) { + if (vmemmap_populate_basepages(addr, next, node, altmap)) + return -ENOMEM; + continue; + } pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL)); } else @@ -1510,13 +1508,43 @@ static int prevent_bootmem_remove_notifier(struct notifier_block *nb, unsigned long end_pfn = arg->start_pfn + arg->nr_pages; unsigned long pfn = arg->start_pfn; - if (action != MEM_GOING_OFFLINE) + if ((action != MEM_GOING_OFFLINE) && (action != MEM_OFFLINE)) return NOTIFY_OK; for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + unsigned long start = PFN_PHYS(pfn); + unsigned long end = start + (1UL << PA_SECTION_SHIFT); + ms = __pfn_to_section(pfn); - if (early_section(ms)) + if (!early_section(ms)) + continue; + + if (action == MEM_GOING_OFFLINE) { + /* + * Boot memory removal is not supported. Prevent + * it via blocking any attempted offline request + * for the boot memory and just report it. + */ + pr_warn("Boot memory [%lx %lx] offlining attempted\n", start, end); return NOTIFY_BAD; + } else if (action == MEM_OFFLINE) { + /* + * This should have never happened. Boot memory + * offlining should have been prevented by this + * very notifier. Probably some memory removal + * procedure might have changed which would then + * require further debug. + */ + pr_err("Boot memory [%lx %lx] offlined\n", start, end); + + /* + * Core memory hotplug does not process a return + * code from the notifier for MEM_OFFLINE events. + * The error condition has been reported. Return + * from here as if ignored. + */ + return NOTIFY_DONE; + } } return NOTIFY_OK; } @@ -1525,9 +1553,66 @@ static struct notifier_block prevent_bootmem_remove_nb = { .notifier_call = prevent_bootmem_remove_notifier, }; +/* + * This ensures that boot memory sections on the platform are online + * from early boot. Memory sections could not be prevented from being + * offlined, unless for some reason they are not online to begin with. + * This helps validate the basic assumption on which the above memory + * event notifier works to prevent boot memory section offlining and + * its possible removal. + */ +static void validate_bootmem_online(void) +{ + phys_addr_t start, end, addr; + struct mem_section *ms; + u64 i; + + /* + * Scanning across all memblock might be expensive + * on some big memory systems. Hence enable this + * validation only with DEBUG_VM. + */ + if (!IS_ENABLED(CONFIG_DEBUG_VM)) + return; + + for_each_mem_range(i, &start, &end) { + for (addr = start; addr < end; addr += (1UL << PA_SECTION_SHIFT)) { + ms = __pfn_to_section(PHYS_PFN(addr)); + + /* + * All memory ranges in the system at this point + * should have been marked as early sections. + */ + WARN_ON(!early_section(ms)); + + /* + * Memory notifier mechanism here to prevent boot + * memory offlining depends on the fact that each + * early section memory on the system is initially + * online. Otherwise a given memory section which + * is already offline will be overlooked and can + * be removed completely. Call out such sections. + */ + if (!online_section(ms)) + pr_err("Boot memory [%llx %llx] is offline, can be removed\n", + addr, addr + (1UL << PA_SECTION_SHIFT)); + } + } +} + static int __init prevent_bootmem_remove_init(void) { - return register_memory_notifier(&prevent_bootmem_remove_nb); + int ret = 0; + + if (!IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)) + return ret; + + validate_bootmem_online(); + ret = register_memory_notifier(&prevent_bootmem_remove_nb); + if (ret) + pr_err("%s: Notifier registration failed %d\n", __func__, ret); + + return ret; } -device_initcall(prevent_bootmem_remove_init); +early_initcall(prevent_bootmem_remove_init); #endif |