diff options
-rw-r--r-- | arch/x86/include/asm/efi.h | 1 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 39 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_32.c | 5 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 97 |
4 files changed, 102 insertions, 40 deletions
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 347eeacb06a8..8fd9e637629a 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -136,6 +136,7 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); +extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index ad285404ea7f..3c1f3cd7b2ba 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -869,7 +869,7 @@ static void __init kexec_enter_virtual_mode(void) * This function will switch the EFI runtime services to virtual mode. * Essentially, we look through the EFI memmap and map every region that * has the runtime attribute bit set in its memory descriptor into the - * ->trampoline_pgd page table using a top-down VA allocation scheme. + * efi_pgd page table. * * The old method which used to update that memory descriptor with the * virtual address obtained from ioremap() is still supported when the @@ -879,8 +879,8 @@ static void __init kexec_enter_virtual_mode(void) * * The new method does a pagetable switch in a preemption-safe manner * so that we're in a different address space when calling a runtime - * function. For function arguments passing we do copy the PGDs of the - * kernel page table into ->trampoline_pgd prior to each call. + * function. For function arguments passing we do copy the PUDs of the + * kernel page table into efi_pgd prior to each call. * * Specially for kexec boot, efi runtime maps in previous kernel should * be passed in via setup_data. In that case runtime ranges will be mapped @@ -895,6 +895,12 @@ static void __init __efi_enter_virtual_mode(void) efi.systab = NULL; + if (efi_alloc_page_tables()) { + pr_err("Failed to allocate EFI page tables\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return; + } + efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { @@ -954,28 +960,11 @@ static void __init __efi_enter_virtual_mode(void) efi_runtime_mkexec(); /* - * We mapped the descriptor array into the EFI pagetable above but we're - * not unmapping it here. Here's why: - * - * We're copying select PGDs from the kernel page table to the EFI page - * table and when we do so and make changes to those PGDs like unmapping - * stuff from them, those changes appear in the kernel page table and we - * go boom. - * - * From setup_real_mode(): - * - * ... - * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; - * - * In this particular case, our allocation is in PGD 0 of the EFI page - * table but we've copied that PGD from PGD[272] of the EFI page table: - * - * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 - * - * where the direct memory mapping in kernel space is. - * - * new_memmap's VA comes from that direct mapping and thus clearing it, - * it would get cleared in the kernel page table too. + * We mapped the descriptor array into the EFI pagetable above + * but we're not unmapping it here because if we're running in + * EFI mixed mode we need all of memory to be accessible when + * we pass parameters to the EFI runtime services in the + * thunking code. * * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); */ diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index ed5b67338294..58d669bc8250 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -38,6 +38,11 @@ * say 0 - 3G. */ +int __init efi_alloc_page_tables(void) +{ + return 0; +} + void efi_sync_low_kernel_mappings(void) {} void __init efi_dump_pagetable(void) {} int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b19cdac959b2..4897f518760f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -40,6 +40,7 @@ #include <asm/fixmap.h> #include <asm/realmode.h> #include <asm/time.h> +#include <asm/pgalloc.h> /* * We allocate runtime services regions bottom-up, starting from -4G, i.e. @@ -121,22 +122,92 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) early_code_mapping_set_exec(0); } +static pgd_t *efi_pgd; + +/* + * We need our own copy of the higher levels of the page tables + * because we want to avoid inserting EFI region mappings (EFI_VA_END + * to EFI_VA_START) into the standard kernel page tables. Everything + * else can be shared, see efi_sync_low_kernel_mappings(). + */ +int __init efi_alloc_page_tables(void) +{ + pgd_t *pgd; + pud_t *pud; + gfp_t gfp_mask; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + + gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; + efi_pgd = (pgd_t *)__get_free_page(gfp_mask); + if (!efi_pgd) + return -ENOMEM; + + pgd = efi_pgd + pgd_index(EFI_VA_END); + + pud = pud_alloc_one(NULL, 0); + if (!pud) { + free_page((unsigned long)efi_pgd); + return -ENOMEM; + } + + pgd_populate(NULL, pgd, pud); + + return 0; +} + /* * Add low kernel mappings for passing arguments to EFI functions. */ void efi_sync_low_kernel_mappings(void) { - unsigned num_pgds; - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + unsigned num_entries; + pgd_t *pgd_k, *pgd_efi; + pud_t *pud_k, *pud_efi; if (efi_enabled(EFI_OLD_MEMMAP)) return; - num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); + /* + * We can share all PGD entries apart from the one entry that + * covers the EFI runtime mapping space. + * + * Make sure the EFI runtime region mappings are guaranteed to + * only span a single PGD entry and that the entry also maps + * other important kernel regions. + */ + BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); + BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != + (EFI_VA_END & PGDIR_MASK)); + + pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); + pgd_k = pgd_offset_k(PAGE_OFFSET); + + num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); + memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); - memcpy(pgd + pgd_index(PAGE_OFFSET), - init_mm.pgd + pgd_index(PAGE_OFFSET), - sizeof(pgd_t) * num_pgds); + /* + * We share all the PUD entries apart from those that map the + * EFI regions. Copy around them. + */ + BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); + BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); + + pgd_efi = efi_pgd + pgd_index(EFI_VA_END); + pud_efi = pud_offset(pgd_efi, 0); + + pgd_k = pgd_offset_k(EFI_VA_END); + pud_k = pud_offset(pgd_k, 0); + + num_entries = pud_index(EFI_VA_END); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + + pud_efi = pud_offset(pgd_efi, EFI_VA_START); + pud_k = pud_offset(pgd_k, EFI_VA_START); + + num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); } int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) @@ -150,8 +221,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) if (efi_enabled(EFI_OLD_MEMMAP)) return 0; - efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; - pgd = __va(efi_scratch.efi_pgt); + efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); + pgd = efi_pgd; /* * It can happen that the physical address of new_memmap lands in memory @@ -216,16 +287,14 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) { - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - - kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); + kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages); } static void __init __map_region(efi_memory_desc_t *md, u64 va) { - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); unsigned long flags = 0; unsigned long pfn; + pgd_t *pgd = efi_pgd; if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; @@ -334,9 +403,7 @@ void __init efi_runtime_mkexec(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); - - ptdump_walk_pgd_level(NULL, pgd); + ptdump_walk_pgd_level(NULL, efi_pgd); #endif } |