diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-28 20:03:40 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-28 20:03:40 +0300 |
commit | 634cd4b6afe15dca8df02bcba242b9b0c5e9b5a5 (patch) | |
tree | 4865e4ec64a0575614a81d88f523269c04e17f09 /arch/x86/platform | |
parent | d99391ec2b42d827d92003dcdcb96fadac9d862b (diff) | |
parent | ac6119e7f25b842fc061e8aec88c4f32d3bc28ef (diff) | |
download | linux-634cd4b6afe15dca8df02bcba242b9b0c5e9b5a5.tar.xz |
Merge branch 'efi-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull EFI updates from Ingo Molnar:
"The main changes in this cycle were:
- Cleanup of the GOP [graphics output] handling code in the EFI stub
- Complete refactoring of the mixed mode handling in the x86 EFI stub
- Overhaul of the x86 EFI boot/runtime code
- Increase robustness for mixed mode code
- Add the ability to disable DMA at the root port level in the EFI
stub
- Get rid of RWX mappings in the EFI memory map and page tables,
where possible
- Move the support code for the old EFI memory mapping style into its
only user, the SGI UV1+ support code.
- plus misc fixes, updates, smaller cleanups.
... and due to interactions with the RWX changes, another round of PAT
cleanups make a guest appearance via the EFI tree - with no side
effects intended"
* 'efi-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (75 commits)
efi/x86: Disable instrumentation in the EFI runtime handling code
efi/libstub/x86: Fix EFI server boot failure
efi/x86: Disallow efi=old_map in mixed mode
x86/boot/compressed: Relax sed symbol type regex for LLVM ld.lld
efi/x86: avoid KASAN false positives when accessing the 1: 1 mapping
efi: Fix handling of multiple efi_fake_mem= entries
efi: Fix efi_memmap_alloc() leaks
efi: Add tracking for dynamically allocated memmaps
efi: Add a flags parameter to efi_memory_map
efi: Fix comment for efi_mem_type() wrt absent physical addresses
efi/arm: Defer probe of PCIe backed efifb on DT systems
efi/x86: Limit EFI old memory map to SGI UV machines
efi/x86: Avoid RWX mappings for all of DRAM
efi/x86: Don't map the entire kernel text RW for mixed mode
x86/mm: Fix NX bit clearing issue in kernel_map_pages_in_pgd
efi/libstub/x86: Fix unused-variable warning
efi/libstub/x86: Use mandatory 16-byte stack alignment in mixed mode
efi/libstub/x86: Use const attribute for efi_is_64bit()
efi: Allow disabling PCI busmastering on bridges during boot
efi/x86: Allow translating 64-bit arguments for mixed mode calls
...
Diffstat (limited to 'arch/x86/platform')
-rw-r--r-- | arch/x86/platform/efi/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 398 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_32.c | 22 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 317 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_32.S | 109 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_64.S | 43 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_thunk_64.S | 121 | ||||
-rw-r--r-- | arch/x86/platform/efi/quirks.c | 46 | ||||
-rw-r--r-- | arch/x86/platform/uv/bios_uv.c | 169 |
9 files changed, 501 insertions, 727 deletions
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index fe29f3f5d384..84b09c230cbd 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y -OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y +KASAN_SANITIZE := n +GCOV_PROFILE := n obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 38d44f36d5ed..59f7f6d60cf6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -54,8 +54,8 @@ #include <asm/x86_init.h> #include <asm/uv/uv.h> -static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static u64 efi_systab_phys __initdata; static efi_config_table_type_t arch_tables[] __initdata = { #ifdef CONFIG_X86_UV @@ -97,32 +97,6 @@ static int __init setup_add_efi_memmap(char *arg) } early_param("add_efi_memmap", setup_add_efi_memmap); -static efi_status_t __init phys_efi_set_virtual_address_map( - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) -{ - efi_status_t status; - unsigned long flags; - pgd_t *save_pgd; - - save_pgd = efi_call_phys_prolog(); - if (!save_pgd) - return EFI_ABORTED; - - /* Disable interrupts around EFI calls: */ - local_irq_save(flags); - status = efi_call_phys(efi_phys.set_virtual_address_map, - memory_map_size, descriptor_size, - descriptor_version, virtual_map); - local_irq_restore(flags); - - efi_call_phys_epilog(save_pgd); - - return status; -} - void __init efi_find_mirror(void) { efi_memory_desc_t *md; @@ -330,10 +304,16 @@ static void __init efi_clean_memmap(void) } if (n_removal > 0) { - u64 size = efi.memmap.nr_map - n_removal; + struct efi_memory_map_data data = { + .phys_map = efi.memmap.phys_map, + .desc_version = efi.memmap.desc_version, + .desc_size = efi.memmap.desc_size, + .size = data.desc_size * (efi.memmap.nr_map - n_removal), + .flags = 0, + }; pr_warn("Removing %d invalid memory map entries.\n", n_removal); - efi_memmap_install(efi.memmap.phys_map, size); + efi_memmap_install(&data); } } @@ -353,89 +333,90 @@ void __init efi_print_memmap(void) } } -static int __init efi_systab_init(void *phys) +static int __init efi_systab_init(u64 phys) { + int size = efi_enabled(EFI_64BIT) ? sizeof(efi_system_table_64_t) + : sizeof(efi_system_table_32_t); + bool over4g = false; + void *p; + + p = early_memremap_ro(phys, size); + if (p == NULL) { + pr_err("Couldn't map the system table!\n"); + return -ENOMEM; + } + if (efi_enabled(EFI_64BIT)) { - efi_system_table_64_t *systab64; - struct efi_setup_data *data = NULL; - u64 tmp = 0; + const efi_system_table_64_t *systab64 = p; + + efi_systab.hdr = systab64->hdr; + efi_systab.fw_vendor = systab64->fw_vendor; + efi_systab.fw_revision = systab64->fw_revision; + efi_systab.con_in_handle = systab64->con_in_handle; + efi_systab.con_in = systab64->con_in; + efi_systab.con_out_handle = systab64->con_out_handle; + efi_systab.con_out = (void *)(unsigned long)systab64->con_out; + efi_systab.stderr_handle = systab64->stderr_handle; + efi_systab.stderr = systab64->stderr; + efi_systab.runtime = (void *)(unsigned long)systab64->runtime; + efi_systab.boottime = (void *)(unsigned long)systab64->boottime; + efi_systab.nr_tables = systab64->nr_tables; + efi_systab.tables = systab64->tables; + + over4g = systab64->con_in_handle > U32_MAX || + systab64->con_in > U32_MAX || + systab64->con_out_handle > U32_MAX || + systab64->con_out > U32_MAX || + systab64->stderr_handle > U32_MAX || + systab64->stderr > U32_MAX || + systab64->boottime > U32_MAX; if (efi_setup) { - data = early_memremap(efi_setup, sizeof(*data)); - if (!data) + struct efi_setup_data *data; + + data = early_memremap_ro(efi_setup, sizeof(*data)); + if (!data) { + early_memunmap(p, size); return -ENOMEM; - } - systab64 = early_memremap((unsigned long)phys, - sizeof(*systab64)); - if (systab64 == NULL) { - pr_err("Couldn't map the system table!\n"); - if (data) - early_memunmap(data, sizeof(*data)); - return -ENOMEM; - } + } + + efi_systab.fw_vendor = (unsigned long)data->fw_vendor; + efi_systab.runtime = (void *)(unsigned long)data->runtime; + efi_systab.tables = (unsigned long)data->tables; + + over4g |= data->fw_vendor > U32_MAX || + data->runtime > U32_MAX || + data->tables > U32_MAX; - efi_systab.hdr = systab64->hdr; - efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : - systab64->fw_vendor; - tmp |= data ? data->fw_vendor : systab64->fw_vendor; - efi_systab.fw_revision = systab64->fw_revision; - efi_systab.con_in_handle = systab64->con_in_handle; - tmp |= systab64->con_in_handle; - efi_systab.con_in = systab64->con_in; - tmp |= systab64->con_in; - efi_systab.con_out_handle = systab64->con_out_handle; - tmp |= systab64->con_out_handle; - efi_systab.con_out = systab64->con_out; - tmp |= systab64->con_out; - efi_systab.stderr_handle = systab64->stderr_handle; - tmp |= systab64->stderr_handle; - efi_systab.stderr = systab64->stderr; - tmp |= systab64->stderr; - efi_systab.runtime = data ? - (void *)(unsigned long)data->runtime : - (void *)(unsigned long)systab64->runtime; - tmp |= data ? data->runtime : systab64->runtime; - efi_systab.boottime = (void *)(unsigned long)systab64->boottime; - tmp |= systab64->boottime; - efi_systab.nr_tables = systab64->nr_tables; - efi_systab.tables = data ? (unsigned long)data->tables : - systab64->tables; - tmp |= data ? data->tables : systab64->tables; - - early_memunmap(systab64, sizeof(*systab64)); - if (data) early_memunmap(data, sizeof(*data)); -#ifdef CONFIG_X86_32 - if (tmp >> 32) { - pr_err("EFI data located above 4GB, disabling EFI.\n"); - return -EINVAL; + } else { + over4g |= systab64->fw_vendor > U32_MAX || + systab64->runtime > U32_MAX || + systab64->tables > U32_MAX; } -#endif } else { - efi_system_table_32_t *systab32; - - systab32 = early_memremap((unsigned long)phys, - sizeof(*systab32)); - if (systab32 == NULL) { - pr_err("Couldn't map the system table!\n"); - return -ENOMEM; - } - - efi_systab.hdr = systab32->hdr; - efi_systab.fw_vendor = systab32->fw_vendor; - efi_systab.fw_revision = systab32->fw_revision; - efi_systab.con_in_handle = systab32->con_in_handle; - efi_systab.con_in = systab32->con_in; - efi_systab.con_out_handle = systab32->con_out_handle; - efi_systab.con_out = systab32->con_out; - efi_systab.stderr_handle = systab32->stderr_handle; - efi_systab.stderr = systab32->stderr; - efi_systab.runtime = (void *)(unsigned long)systab32->runtime; - efi_systab.boottime = (void *)(unsigned long)systab32->boottime; - efi_systab.nr_tables = systab32->nr_tables; - efi_systab.tables = systab32->tables; - - early_memunmap(systab32, sizeof(*systab32)); + const efi_system_table_32_t *systab32 = p; + + efi_systab.hdr = systab32->hdr; + efi_systab.fw_vendor = systab32->fw_vendor; + efi_systab.fw_revision = systab32->fw_revision; + efi_systab.con_in_handle = systab32->con_in_handle; + efi_systab.con_in = systab32->con_in; + efi_systab.con_out_handle = systab32->con_out_handle; + efi_systab.con_out = (void *)(unsigned long)systab32->con_out; + efi_systab.stderr_handle = systab32->stderr_handle; + efi_systab.stderr = systab32->stderr; + efi_systab.runtime = (void *)(unsigned long)systab32->runtime; + efi_systab.boottime = (void *)(unsigned long)systab32->boottime; + efi_systab.nr_tables = systab32->nr_tables; + efi_systab.tables = systab32->tables; + } + + early_memunmap(p, size); + + if (IS_ENABLED(CONFIG_X86_32) && over4g) { + pr_err("EFI data located above 4GB, disabling EFI.\n"); + return -EINVAL; } efi.systab = &efi_systab; @@ -455,108 +436,23 @@ static int __init efi_systab_init(void *phys) return 0; } -static int __init efi_runtime_init32(void) -{ - efi_runtime_services_32_t *runtime; - - runtime = early_memremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_32_t)); - if (!runtime) { - pr_err("Could not map the runtime service table!\n"); - return -ENOMEM; - } - - /* - * We will only need *early* access to the SetVirtualAddressMap - * EFI runtime service. All other runtime services will be called - * via the virtual mapping. - */ - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - (unsigned long)runtime->set_virtual_address_map; - early_memunmap(runtime, sizeof(efi_runtime_services_32_t)); - - return 0; -} - -static int __init efi_runtime_init64(void) -{ - efi_runtime_services_64_t *runtime; - - runtime = early_memremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_64_t)); - if (!runtime) { - pr_err("Could not map the runtime service table!\n"); - return -ENOMEM; - } - - /* - * We will only need *early* access to the SetVirtualAddressMap - * EFI runtime service. All other runtime services will be called - * via the virtual mapping. - */ - efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - (unsigned long)runtime->set_virtual_address_map; - early_memunmap(runtime, sizeof(efi_runtime_services_64_t)); - - return 0; -} - -static int __init efi_runtime_init(void) -{ - int rv; - - /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. - * - * When EFI_PARAVIRT is in force then we could not map runtime - * service memory region because we do not have direct access to it. - * However, runtime services are available through proxy functions - * (e.g. in case of Xen dom0 EFI implementation they call special - * hypercall which executes relevant EFI functions) and that is why - * they are always enabled. - */ - - if (!efi_enabled(EFI_PARAVIRT)) { - if (efi_enabled(EFI_64BIT)) - rv = efi_runtime_init64(); - else - rv = efi_runtime_init32(); - - if (rv) - return rv; - } - - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); - - return 0; -} - void __init efi_init(void) { efi_char16_t *c16; char vendor[100] = "unknown"; int i = 0; - void *tmp; -#ifdef CONFIG_X86_32 - if (boot_params.efi_info.efi_systab_hi || - boot_params.efi_info.efi_memmap_hi) { + if (IS_ENABLED(CONFIG_X86_32) && + (boot_params.efi_info.efi_systab_hi || + boot_params.efi_info.efi_memmap_hi)) { pr_info("Table located above 4GB, disabling EFI.\n"); return; } - efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; -#else - efi_phys.systab = (efi_system_table_t *) - (boot_params.efi_info.efi_systab | - ((__u64)boot_params.efi_info.efi_systab_hi<<32)); -#endif - if (efi_systab_init(efi_phys.systab)) + efi_systab_phys = boot_params.efi_info.efi_systab | + ((__u64)boot_params.efi_info.efi_systab_hi << 32); + + if (efi_systab_init(efi_systab_phys)) return; efi.config_table = (unsigned long)efi.systab->tables; @@ -566,14 +462,16 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; vendor[i] = '\0'; - } else + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { pr_err("Could not map the firmware vendor!\n"); - early_memunmap(tmp, 2); + } pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, @@ -592,19 +490,21 @@ void __init efi_init(void) if (!efi_runtime_supported()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); - else { - if (efi_runtime_disabled() || efi_runtime_init()) { - efi_memmap_unmap(); - return; - } + + if (!efi_runtime_supported() || efi_runtime_disabled()) { + efi_memmap_unmap(); + return; } + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi_clean_memmap(); if (efi_enabled(EFI_DBG)) efi_print_memmap(); } +#if defined(CONFIG_X86_32) || defined(CONFIG_X86_UV) + void __init efi_set_executable(efi_memory_desc_t *md, bool executable) { u64 addr, npages; @@ -669,6 +569,8 @@ void __init old_map_region(efi_memory_desc_t *md) (unsigned long long)md->phys_addr); } +#endif + /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { @@ -707,7 +609,7 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - systab = (u64)(unsigned long)efi_phys.systab; + systab = efi_systab_phys; if (md->phys_addr <= systab && systab < end) { systab += md->virt_addr - md->phys_addr; efi.systab = (efi_system_table_t *)(unsigned long)systab; @@ -767,7 +669,7 @@ static inline void *efi_map_next_entry_reverse(void *entry) */ static void *efi_map_next_entry(void *entry) { - if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + if (!efi_have_uv1_memmap() && efi_enabled(EFI_64BIT)) { /* * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE * config table feature requires us to map all entries @@ -828,7 +730,7 @@ static bool should_map_region(efi_memory_desc_t *md) * Map all of RAM so that we can access arguments in the 1:1 * mapping when making EFI runtime calls. */ - if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) { + if (efi_is_mixed()) { if (md->type == EFI_CONVENTIONAL_MEMORY || md->type == EFI_LOADER_DATA || md->type == EFI_LOADER_CODE) @@ -899,11 +801,11 @@ static void __init kexec_enter_virtual_mode(void) /* * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI. With efi=old_map, we don't do runtime services in + * non-native EFI. With the UV1 memmap, we don't do runtime services in * kexec kernel because in the initial boot something else might * have been mapped at these virtual addresses. */ - if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) { + if (efi_is_mixed() || efi_have_uv1_memmap()) { efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; @@ -958,11 +860,6 @@ static void __init kexec_enter_virtual_mode(void) efi.runtime_version = efi_systab.hdr.revision; efi_native_runtime_setup(); - - efi.set_virtual_address_map = NULL; - - if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) - runtime_code_page_mkexec(); #endif } @@ -974,9 +871,9 @@ static void __init kexec_enter_virtual_mode(void) * * The old method which used to update that memory descriptor with the * virtual address obtained from ioremap() is still supported when the - * kernel is booted with efi=old_map on its command line. Same old - * method enabled the runtime services to be called without having to - * thunk back into physical mode for every invocation. + * kernel is booted on SG1 UV1 hardware. Same old method enabled the + * runtime services to be called without having to thunk back into + * physical mode for every invocation. * * The new method does a pagetable switch in a preemption-safe manner * so that we're in a different address space when calling a runtime @@ -999,16 +896,14 @@ static void __init __efi_enter_virtual_mode(void) if (efi_alloc_page_tables()) { pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } pa = __pa(new_memmap); @@ -1022,8 +917,7 @@ static void __init __efi_enter_virtual_mode(void) if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { pr_err("Failed to remap late EFI memory map\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } if (efi_enabled(EFI_DBG)) { @@ -1031,34 +925,22 @@ static void __init __efi_enter_virtual_mode(void) efi_print_memmap(); } - BUG_ON(!efi.systab); + if (WARN_ON(!efi.systab)) + goto err; - if (efi_setup_page_tables(pa, 1 << pg_shift)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; efi_sync_low_kernel_mappings(); - if (efi_is_native()) { - status = phys_efi_set_virtual_address_map( - efi.memmap.desc_size * count, - efi.memmap.desc_size, - efi.memmap.desc_version, - (efi_memory_desc_t *)pa); - } else { - status = efi_thunk_set_virtual_address_map( - efi_phys.set_virtual_address_map, - efi.memmap.desc_size * count, - efi.memmap.desc_size, - efi.memmap.desc_version, - (efi_memory_desc_t *)pa); - } - + status = efi_set_virtual_address_map(efi.memmap.desc_size * count, + efi.memmap.desc_size, + efi.memmap.desc_version, + (efi_memory_desc_t *)pa); if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; } efi_free_boot_services(); @@ -1071,13 +953,11 @@ static void __init __efi_enter_virtual_mode(void) */ efi.runtime_version = efi_systab.hdr.revision; - if (efi_is_native()) + if (!efi_is_mixed()) efi_native_runtime_setup(); else efi_thunk_runtime_setup(); - efi.set_virtual_address_map = NULL; - /* * Apply more restrictive page table mapping attributes now that * SVAM() has been called and the firmware has performed all @@ -1087,6 +967,10 @@ static void __init __efi_enter_virtual_mode(void) /* clean DUMMY object */ efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); } void __init efi_enter_virtual_mode(void) @@ -1102,20 +986,6 @@ void __init efi_enter_virtual_mode(void) efi_dump_pagetable(); } -static int __init arch_parse_efi_cmdline(char *str) -{ - if (!str) { - pr_warn("need at least one option\n"); - return -EINVAL; - } - - if (parse_option_str(str, "old_map")) - set_bit(EFI_OLD_MEMMAP, &efi.flags); - - return 0; -} -early_param("efi", arch_parse_efi_cmdline); - bool efi_is_table_address(unsigned long phys_addr) { unsigned int i; diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 9959657127f4..71dddd1620f9 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -66,9 +66,17 @@ void __init efi_map_region(efi_memory_desc_t *md) void __init efi_map_region_fixed(efi_memory_desc_t *md) {} void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} -pgd_t * __init efi_call_phys_prolog(void) +efi_status_t efi_call_svam(efi_set_virtual_address_map_t *__efiapi *, + u32, u32, u32, void *); + +efi_status_t __init efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) { struct desc_ptr gdt_descr; + efi_status_t status; + unsigned long flags; pgd_t *save_pgd; /* Current pgd is swapper_pg_dir, we'll restore it later: */ @@ -80,14 +88,18 @@ pgd_t * __init efi_call_phys_prolog(void) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - return save_pgd; -} + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call_svam(&efi.systab->runtime->set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); -void __init efi_call_phys_epilog(pgd_t *save_pgd) -{ load_fixmap_gdt(0); load_cr3(save_pgd); __flush_tlb_all(); + + return status; } void __init efi_runtime_update_mappings(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 08ce8177c3af..e2accfe636bd 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -57,142 +57,6 @@ static u64 efi_va = EFI_VA_START; struct efi_scratch efi_scratch; -static void __init early_code_mapping_set_exec(int executable) -{ - efi_memory_desc_t *md; - - if (!(__supported_pte_mask & _PAGE_NX)) - return; - - /* Make EFI service code area executable */ - for_each_efi_memory_desc(md) { - if (md->type == EFI_RUNTIME_SERVICES_CODE || - md->type == EFI_BOOT_SERVICES_CODE) - efi_set_executable(md, executable); - } -} - -pgd_t * __init efi_call_phys_prolog(void) -{ - unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; - pgd_t *save_pgd, *pgd_k, *pgd_efi; - p4d_t *p4d, *p4d_k, *p4d_efi; - pud_t *pud; - - int pgd; - int n_pgds, i, j; - - if (!efi_enabled(EFI_OLD_MEMMAP)) { - efi_switch_mm(&efi_mm); - return efi_mm.pgd; - } - - early_code_mapping_set_exec(1); - - n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); - save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); - if (!save_pgd) - return NULL; - - /* - * Build 1:1 identity mapping for efi=old_map usage. Note that - * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while - * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical - * address X, the pud_index(X) != pud_index(__va(X)), we can only copy - * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. - * This means here we can only reuse the PMD tables of the direct mapping. - */ - for (pgd = 0; pgd < n_pgds; pgd++) { - addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); - vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); - pgd_efi = pgd_offset_k(addr_pgd); - save_pgd[pgd] = *pgd_efi; - - p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); - if (!p4d) { - pr_err("Failed to allocate p4d table!\n"); - goto out; - } - - for (i = 0; i < PTRS_PER_P4D; i++) { - addr_p4d = addr_pgd + i * P4D_SIZE; - p4d_efi = p4d + p4d_index(addr_p4d); - - pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); - if (!pud) { - pr_err("Failed to allocate pud table!\n"); - goto out; - } - - for (j = 0; j < PTRS_PER_PUD; j++) { - addr_pud = addr_p4d + j * PUD_SIZE; - - if (addr_pud > (max_pfn << PAGE_SHIFT)) - break; - - vaddr = (unsigned long)__va(addr_pud); - - pgd_k = pgd_offset_k(vaddr); - p4d_k = p4d_offset(pgd_k, vaddr); - pud[j] = *pud_offset(p4d_k, vaddr); - } - } - pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; - } - - __flush_tlb_all(); - return save_pgd; -out: - efi_call_phys_epilog(save_pgd); - return NULL; -} - -void __init efi_call_phys_epilog(pgd_t *save_pgd) -{ - /* - * After the lock is released, the original page table is restored. - */ - int pgd_idx, i; - int nr_pgds; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - - if (!efi_enabled(EFI_OLD_MEMMAP)) { - efi_switch_mm(efi_scratch.prev_mm); - return; - } - - nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); - - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { - pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); - set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); - - if (!pgd_present(*pgd)) - continue; - - for (i = 0; i < PTRS_PER_P4D; i++) { - p4d = p4d_offset(pgd, - pgd_idx * PGDIR_SIZE + i * P4D_SIZE); - - if (!p4d_present(*p4d)) - continue; - - pud = (pud_t *)p4d_page_vaddr(*p4d); - pud_free(&init_mm, pud); - } - - p4d = (p4d_t *)pgd_page_vaddr(*pgd); - p4d_free(&init_mm, p4d); - } - - kfree(save_pgd); - - __flush_tlb_all(); - early_code_mapping_set_exec(0); -} - EXPORT_SYMBOL_GPL(efi_mm); /* @@ -211,7 +75,7 @@ int __init efi_alloc_page_tables(void) pud_t *pud; gfp_t gfp_mask; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; gfp_mask = GFP_KERNEL | __GFP_ZERO; @@ -252,7 +116,7 @@ void efi_sync_low_kernel_mappings(void) pud_t *pud_k, *pud_efi; pgd_t *efi_pgd = efi_mm.pgd; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return; /* @@ -346,7 +210,7 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) unsigned npages; pgd_t *pgd = efi_mm.pgd; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return 0; /* @@ -373,10 +237,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * as trim_bios_range() will reserve the first page and isolate it away * from memory allocators anyway. */ - pf = _PAGE_RW; - if (sev_active()) - pf |= _PAGE_ENC; - if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, pf)) { pr_err("Failed to create 1:1 mapping for the first page!\n"); return 1; @@ -388,21 +248,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) * text and allocate a new stack because we can't rely on the * stack pointer being < 4GB. */ - if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native()) + if (!efi_is_mixed()) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); - if (!page) - panic("Unable to allocate EFI runtime stack < 4GB\n"); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } - efi_scratch.phys_stack = virt_to_phys(page_address(page)); - efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ - npages = (_etext - _text) >> PAGE_SHIFT; + npages = (__end_rodata_aligned - _text) >> PAGE_SHIFT; text = __pa(_text); pfn = text >> PAGE_SHIFT; - pf = _PAGE_RW | _PAGE_ENC; + pf = _PAGE_ENC; if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) { pr_err("Failed to map kernel text 1:1\n"); return 1; @@ -417,6 +278,22 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va) unsigned long pfn; pgd_t *pgd = efi_mm.pgd; + /* + * EFI_RUNTIME_SERVICES_CODE regions typically cover PE/COFF + * executable images in memory that consist of both R-X and + * RW- sections, so we cannot apply read-only or non-exec + * permissions just yet. However, modern EFI systems provide + * a memory attributes table that describes those sections + * with the appropriate restricted permissions, which are + * applied in efi_runtime_update_mappings() below. All other + * regions can be mapped non-executable at this point, with + * the exception of boot services code regions, but those will + * be unmapped again entirely in efi_free_boot_services(). + */ + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_CODE) + flags |= _PAGE_NX; + if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; @@ -434,7 +311,7 @@ void __init efi_map_region(efi_memory_desc_t *md) unsigned long size = md->num_pages << PAGE_SHIFT; u64 pa = md->phys_addr; - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return old_map_region(md); /* @@ -449,7 +326,7 @@ void __init efi_map_region(efi_memory_desc_t *md) * booting in EFI mixed mode, because even though we may be * running a 64-bit kernel, the firmware may only be 32-bit. */ - if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) { + if (efi_is_mixed()) { md->virt_addr = md->phys_addr; return; } @@ -491,26 +368,6 @@ void __init efi_map_region_fixed(efi_memory_desc_t *md) __map_region(md, md->virt_addr); } -void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, - u32 type, u64 attribute) -{ - unsigned long last_map_pfn; - - if (type == EFI_MEMORY_MAPPED_IO) - return ioremap(phys_addr, size); - - last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); - if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { - unsigned long top = last_map_pfn << PAGE_SHIFT; - efi_ioremap(top, size - (top - phys_addr), type, attribute); - } - - if (!(attribute & EFI_MEMORY_WB)) - efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); - - return (void __iomem *)__va(phys_addr); -} - void __init parse_efi_setup(u64 phys_addr, u32 data_len) { efi_setup = phys_addr + sizeof(struct setup_data); @@ -559,7 +416,7 @@ void __init efi_runtime_update_mappings(void) { efi_memory_desc_t *md; - if (efi_enabled(EFI_OLD_MEMMAP)) { + if (efi_have_uv1_memmap()) { if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); return; @@ -613,7 +470,7 @@ void __init efi_runtime_update_mappings(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) ptdump_walk_pgd_level(NULL, swapper_pg_dir); else ptdump_walk_pgd_level(NULL, efi_mm.pgd); @@ -634,63 +491,74 @@ void efi_switch_mm(struct mm_struct *mm) switch_mm(efi_scratch.prev_mm, mm, NULL); } -#ifdef CONFIG_EFI_MIXED -extern efi_status_t efi64_thunk(u32, ...); - static DEFINE_SPINLOCK(efi_runtime_lock); -#define runtime_service32(func) \ -({ \ - u32 table = (u32)(unsigned long)efi.systab; \ - u32 *rt, *___f; \ - \ - rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime)); \ - ___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \ - *___f; \ +/* + * DS and ES contain user values. We need to save them. + * The 32-bit EFI code needs a valid DS, ES, and SS. There's no + * need to save the old SS: __KERNEL_DS is always acceptable. + */ +#define __efi_thunk(func, ...) \ +({ \ + efi_runtime_services_32_t *__rt; \ + unsigned short __ds, __es; \ + efi_status_t ____s; \ + \ + __rt = (void *)(unsigned long)efi.systab->mixed_mode.runtime; \ + \ + savesegment(ds, __ds); \ + savesegment(es, __es); \ + \ + loadsegment(ss, __KERNEL_DS); \ + loadsegment(ds, __KERNEL_DS); \ + loadsegment(es, __KERNEL_DS); \ + \ + ____s = efi64_thunk(__rt->func, __VA_ARGS__); \ + \ + loadsegment(ds, __ds); \ + loadsegment(es, __es); \ + \ + ____s ^= (____s & BIT(31)) | (____s & BIT_ULL(31)) << 32; \ + ____s; \ }) /* * Switch to the EFI page tables early so that we can access the 1:1 * runtime services mappings which are not mapped in any other page - * tables. This function must be called before runtime_service32(). + * tables. * * Also, disable interrupts because the IDT points to 64-bit handlers, * which aren't going to function correctly when we switch to 32-bit. */ -#define efi_thunk(f, ...) \ +#define efi_thunk(func...) \ ({ \ efi_status_t __s; \ - u32 __func; \ \ arch_efi_call_virt_setup(); \ \ - __func = runtime_service32(f); \ - __s = efi64_thunk(__func, __VA_ARGS__); \ + __s = __efi_thunk(func); \ \ arch_efi_call_virt_teardown(); \ \ __s; \ }) -efi_status_t efi_thunk_set_virtual_address_map( - void *phys_set_virtual_address_map, - unsigned long memory_map_size, - unsigned long descriptor_size, - u32 descriptor_version, - efi_memory_desc_t *virtual_map) +static efi_status_t __init __no_sanitize_address +efi_thunk_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) { efi_status_t status; unsigned long flags; - u32 func; efi_sync_low_kernel_mappings(); local_irq_save(flags); efi_switch_mm(&efi_mm); - func = (u32)(unsigned long)phys_set_virtual_address_map; - status = efi64_thunk(func, memory_map_size, descriptor_size, - descriptor_version, virtual_map); + status = __efi_thunk(set_virtual_address_map, memory_map_size, + descriptor_size, descriptor_version, virtual_map); efi_switch_mm(efi_scratch.prev_mm); local_irq_restore(flags); @@ -993,8 +861,11 @@ efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, return EFI_UNSUPPORTED; } -void efi_thunk_runtime_setup(void) +void __init efi_thunk_runtime_setup(void) { + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return; + efi.get_time = efi_thunk_get_time; efi.set_time = efi_thunk_set_time; efi.get_wakeup_time = efi_thunk_get_wakeup_time; @@ -1010,4 +881,46 @@ void efi_thunk_runtime_setup(void) efi.update_capsule = efi_thunk_update_capsule; efi.query_capsule_caps = efi_thunk_query_capsule_caps; } -#endif /* CONFIG_EFI_MIXED */ + +efi_status_t __init __no_sanitize_address +efi_set_virtual_address_map(unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + unsigned long flags; + pgd_t *save_pgd = NULL; + + if (efi_is_mixed()) + return efi_thunk_set_virtual_address_map(memory_map_size, + descriptor_size, + descriptor_version, + virtual_map); + + if (efi_have_uv1_memmap()) { + save_pgd = efi_uv1_memmap_phys_prolog(); + if (!save_pgd) + return EFI_ABORTED; + } else { + efi_switch_mm(&efi_mm); + } + + kernel_fpu_begin(); + + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); + status = efi_call(efi.systab->runtime->set_virtual_address_map, + memory_map_size, descriptor_size, + descriptor_version, virtual_map); + local_irq_restore(flags); + + kernel_fpu_end(); + + if (save_pgd) + efi_uv1_memmap_phys_epilog(save_pgd); + else + efi_switch_mm(efi_scratch.prev_mm); + + return status; +} diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S index eed8b5b441f8..75c46e7a809f 100644 --- a/arch/x86/platform/efi/efi_stub_32.S +++ b/arch/x86/platform/efi/efi_stub_32.S @@ -7,118 +7,43 @@ */ #include <linux/linkage.h> +#include <linux/init.h> #include <asm/page_types.h> -/* - * efi_call_phys(void *, ...) is a function with variable parameters. - * All the callers of this function assure that all the parameters are 4-bytes. - */ - -/* - * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. - * So we'd better save all of them at the beginning of this function and restore - * at the end no matter how many we use, because we can not assure EFI runtime - * service functions will comply with gcc calling convention, too. - */ + __INIT +SYM_FUNC_START(efi_call_svam) + push 8(%esp) + push 8(%esp) + push %ecx + push %edx -.text -SYM_FUNC_START(efi_call_phys) /* - * 0. The function can only be called in Linux kernel. So CS has been - * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found - * the values of these registers are the same. And, the corresponding - * GDT entries are identical. So I will do nothing about segment reg - * and GDT, but change GDT base register in prolog and epilog. - */ - - /* - * 1. Now I am running with EIP = <physical address> + PAGE_OFFSET. - * But to make it smoothly switch from virtual mode to flat mode. - * The mapping of lower virtual memory has been created in prolog and - * epilog. + * Switch to the flat mapped alias of this routine, by jumping to the + * address of label '1' after subtracting PAGE_OFFSET from it. */ movl $1f, %edx subl $__PAGE_OFFSET, %edx jmp *%edx 1: - /* - * 2. Now on the top of stack is the return - * address in the caller of efi_call_phys(), then parameter 1, - * parameter 2, ..., param n. To make things easy, we save the return - * address of efi_call_phys in a global variable. - */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx - - /* - * 3. Clear PG bit in %CR0. - */ + /* disable paging */ movl %cr0, %edx andl $0x7fffffff, %edx movl %edx, %cr0 - jmp 1f -1: - /* - * 4. Adjust stack pointer. - */ + /* convert the stack pointer to a flat mapped address */ subl $__PAGE_OFFSET, %esp - /* - * 5. Call the physical function. - */ - jmp *%ecx + /* call the EFI routine */ + call *(%eax) -2: - /* - * 6. After EFI runtime service returns, control will return to - * following instruction. We'd better readjust stack pointer first. - */ - addl $__PAGE_OFFSET, %esp + /* convert ESP back to a kernel VA, and pop the outgoing args */ + addl $__PAGE_OFFSET + 16, %esp - /* - * 7. Restore PG bit - */ + /* re-enable paging */ movl %cr0, %edx orl $0x80000000, %edx movl %edx, %cr0 - jmp 1f -1: - /* - * 8. Now restore the virtual mode from flat mode by - * adding EIP with PAGE_OFFSET. - */ - movl $1f, %edx - jmp *%edx -1: - - /* - * 9. Balance the stack. And because EAX contain the return value, - * we'd better not clobber it. - */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx - /* - * 10. Push the saved return address onto the stack and return. - */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx ret -SYM_FUNC_END(efi_call_phys) -.previous - -.data -saved_return_addr: - .long 0 -efi_rt_function_ptr: - .long 0 +SYM_FUNC_END(efi_call_svam) diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index b1d2313fe3bf..15da118f04f0 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -8,41 +8,12 @@ */ #include <linux/linkage.h> -#include <asm/segment.h> -#include <asm/msr.h> -#include <asm/processor-flags.h> -#include <asm/page_types.h> +#include <asm/nospec-branch.h> -#define SAVE_XMM \ - mov %rsp, %rax; \ - subq $0x70, %rsp; \ - and $~0xf, %rsp; \ - mov %rax, (%rsp); \ - mov %cr0, %rax; \ - clts; \ - mov %rax, 0x8(%rsp); \ - movaps %xmm0, 0x60(%rsp); \ - movaps %xmm1, 0x50(%rsp); \ - movaps %xmm2, 0x40(%rsp); \ - movaps %xmm3, 0x30(%rsp); \ - movaps %xmm4, 0x20(%rsp); \ - movaps %xmm5, 0x10(%rsp) - -#define RESTORE_XMM \ - movaps 0x60(%rsp), %xmm0; \ - movaps 0x50(%rsp), %xmm1; \ - movaps 0x40(%rsp), %xmm2; \ - movaps 0x30(%rsp), %xmm3; \ - movaps 0x20(%rsp), %xmm4; \ - movaps 0x10(%rsp), %xmm5; \ - mov 0x8(%rsp), %rsi; \ - mov %rsi, %cr0; \ - mov (%rsp), %rsp - -SYM_FUNC_START(efi_call) +SYM_FUNC_START(__efi_call) pushq %rbp movq %rsp, %rbp - SAVE_XMM + and $~0xf, %rsp mov 16(%rbp), %rax subq $48, %rsp mov %r9, 32(%rsp) @@ -50,9 +21,7 @@ SYM_FUNC_START(efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - call *%rdi - addq $48, %rsp - RESTORE_XMM - popq %rbp + CALL_NOSPEC %rdi + leave ret -SYM_FUNC_END(efi_call) +SYM_FUNC_END(__efi_call) diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 3189f1394701..26f0da238c1c 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -25,15 +25,16 @@ .text .code64 -SYM_FUNC_START(efi64_thunk) +SYM_CODE_START(__efi64_thunk) push %rbp push %rbx /* * Switch to 1:1 mapped 32-bit stack pointer. */ - movq %rsp, efi_saved_sp(%rip) + movq %rsp, %rax movq efi_scratch(%rip), %rsp + push %rax /* * Calculate the physical address of the kernel text. @@ -41,113 +42,31 @@ SYM_FUNC_START(efi64_thunk) movq $__START_KERNEL_map, %rax subq phys_base(%rip), %rax - /* - * Push some physical addresses onto the stack. This is easier - * to do now in a code64 section while the assembler can address - * 64-bit values. Note that all the addresses on the stack are - * 32-bit. - */ - subq $16, %rsp - leaq efi_exit32(%rip), %rbx - subq %rax, %rbx - movl %ebx, 8(%rsp) - - leaq __efi64_thunk(%rip), %rbx + leaq 1f(%rip), %rbp + leaq 2f(%rip), %rbx + subq %rax, %rbp subq %rax, %rbx - call *%rbx - - movq efi_saved_sp(%rip), %rsp - pop %rbx - pop %rbp - retq -SYM_FUNC_END(efi64_thunk) -/* - * We run this function from the 1:1 mapping. - * - * This function must be invoked with a 1:1 mapped stack. - */ -SYM_FUNC_START_LOCAL(__efi64_thunk) - movl %ds, %eax - push %rax - movl %es, %eax - push %rax - movl %ss, %eax - push %rax - - subq $32, %rsp - movl %esi, 0x0(%rsp) - movl %edx, 0x4(%rsp) - movl %ecx, 0x8(%rsp) - movq %r8, %rsi - movl %esi, 0xc(%rsp) - movq %r9, %rsi - movl %esi, 0x10(%rsp) - - leaq 1f(%rip), %rbx - movq %rbx, func_rt_ptr(%rip) + subq $28, %rsp + movl %ebx, 0x0(%rsp) /* return address */ + movl %esi, 0x4(%rsp) + movl %edx, 0x8(%rsp) + movl %ecx, 0xc(%rsp) + movl %r8d, 0x10(%rsp) + movl %r9d, 0x14(%rsp) /* Switch to 32-bit descriptor */ pushq $__KERNEL32_CS - leaq efi_enter32(%rip), %rax - pushq %rax + pushq %rdi /* EFI runtime service address */ lretq -1: addq $32, %rsp - +1: movq 24(%rsp), %rsp pop %rbx - movl %ebx, %ss - pop %rbx - movl %ebx, %es - pop %rbx - movl %ebx, %ds - - /* - * Convert 32-bit status code into 64-bit. - */ - test %rax, %rax - jz 1f - movl %eax, %ecx - andl $0x0fffffff, %ecx - andl $0xf0000000, %eax - shl $32, %rax - or %rcx, %rax -1: - ret -SYM_FUNC_END(__efi64_thunk) - -SYM_FUNC_START_LOCAL(efi_exit32) - movq func_rt_ptr(%rip), %rax - push %rax - mov %rdi, %rax - ret -SYM_FUNC_END(efi_exit32) + pop %rbp + retq .code32 -/* - * EFI service pointer must be in %edi. - * - * The stack should represent the 32-bit calling convention. - */ -SYM_FUNC_START_LOCAL(efi_enter32) - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es - movl %eax, %ss - - call *%edi - - /* We must preserve return value */ - movl %eax, %edi - - movl 72(%esp), %eax - pushl $__KERNEL_CS - pushl %eax - +2: pushl $__KERNEL_CS + pushl %ebp lret -SYM_FUNC_END(efi_enter32) - - .data - .balign 8 -func_rt_ptr: .quad 0 -efi_saved_sp: .quad 0 +SYM_CODE_END(__efi64_thunk) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index ab91218d27bd..88d32c06cffa 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -244,7 +244,7 @@ EXPORT_SYMBOL_GPL(efi_query_variable_store); */ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) { - phys_addr_t new_phys, new_size; + struct efi_memory_map_data data = { 0 }; struct efi_mem_range mr; efi_memory_desc_t md; int num_entries; @@ -272,24 +272,21 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) num_entries = efi_memmap_split_count(&md, &mr.range); num_entries += efi.memmap.nr_map; - new_size = efi.memmap.desc_size * num_entries; - - new_phys = efi_memmap_alloc(num_entries); - if (!new_phys) { + if (efi_memmap_alloc(num_entries, &data) != 0) { pr_err("Could not allocate boot services memmap\n"); return; } - new = early_memremap(new_phys, new_size); + new = early_memremap(data.phys_map, data.size); if (!new) { pr_err("Failed to map new boot services memmap\n"); return; } efi_memmap_insert(&efi.memmap, new, &mr); - early_memunmap(new, new_size); + early_memunmap(new, data.size); - efi_memmap_install(new_phys, num_entries); + efi_memmap_install(&data); e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); e820__update_table(e820_table); } @@ -385,10 +382,10 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) /* * To Do: Remove this check after adding functionality to unmap EFI boot - * services code/data regions from direct mapping area because - * "efi=old_map" maps EFI regions in swapper_pg_dir. + * services code/data regions from direct mapping area because the UV1 + * memory map maps EFI regions in swapper_pg_dir. */ - if (efi_enabled(EFI_OLD_MEMMAP)) + if (efi_have_uv1_memmap()) return; /* @@ -396,7 +393,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) * EFI runtime calls, hence don't unmap EFI boot services code/data * regions. */ - if (!efi_is_native()) + if (efi_is_mixed()) return; if (kernel_unmap_pages_in_pgd(pgd, pa, md->num_pages)) @@ -408,7 +405,7 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md) void __init efi_free_boot_services(void) { - phys_addr_t new_phys, new_size; + struct efi_memory_map_data data = { 0 }; efi_memory_desc_t *md; int num_entries = 0; void *new, *new_md; @@ -463,14 +460,12 @@ void __init efi_free_boot_services(void) if (!num_entries) return; - new_size = efi.memmap.desc_size * num_entries; - new_phys = efi_memmap_alloc(num_entries); - if (!new_phys) { + if (efi_memmap_alloc(num_entries, &data) != 0) { pr_err("Failed to allocate new EFI memmap\n"); return; } - new = memremap(new_phys, new_size, MEMREMAP_WB); + new = memremap(data.phys_map, data.size, MEMREMAP_WB); if (!new) { pr_err("Failed to map new EFI memmap\n"); return; @@ -494,7 +489,7 @@ void __init efi_free_boot_services(void) memunmap(new); - if (efi_memmap_install(new_phys, num_entries)) { + if (efi_memmap_install(&data) != 0) { pr_err("Could not install new EFI memmap\n"); return; } @@ -559,7 +554,7 @@ out: return ret; } -static const struct dmi_system_id sgi_uv1_dmi[] = { +static const struct dmi_system_id sgi_uv1_dmi[] __initconst = { { NULL, "SGI UV1", { DMI_MATCH(DMI_PRODUCT_NAME, "Stoutland Platform"), DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"), @@ -582,8 +577,15 @@ void __init efi_apply_memmap_quirks(void) } /* UV2+ BIOS has a fix for this issue. UV1 still needs the quirk. */ - if (dmi_check_system(sgi_uv1_dmi)) - set_bit(EFI_OLD_MEMMAP, &efi.flags); + if (dmi_check_system(sgi_uv1_dmi)) { + if (IS_ENABLED(CONFIG_X86_UV)) { + set_bit(EFI_UV1_MEMMAP, &efi.flags); + } else { + pr_warn("EFI runtime disabled, needs CONFIG_X86_UV=y on UV1\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + efi_memmap_unmap(); + } + } } /* @@ -721,7 +723,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr) /* * Make sure that an efi runtime service caused the page fault. * "efi_mm" cannot be used to check if the page fault had occurred - * in the firmware context because efi=old_map doesn't use efi_pgd. + * in the firmware context because the UV1 memmap doesn't use efi_pgd. */ if (efi_rts_work.efi_rts_id == EFI_NONE) return; diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index ece9cb9c1189..607f58147311 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -31,13 +31,16 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, return BIOS_STATUS_UNIMPLEMENTED; /* - * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI + * If EFI_UV1_MEMMAP is set, we need to fall back to using our old EFI * callback method, which uses efi_call() directly, with the kernel page tables: */ - if (unlikely(efi_enabled(EFI_OLD_MEMMAP))) + if (unlikely(efi_enabled(EFI_UV1_MEMMAP))) { + kernel_fpu_begin(); ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5); - else + kernel_fpu_end(); + } else { ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + } return ret; } @@ -214,3 +217,163 @@ int uv_bios_init(void) pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision); return 0; } + +static void __init early_code_mapping_set_exec(int executable) +{ + efi_memory_desc_t *md; + + if (!(__supported_pte_mask & _PAGE_NX)) + return; + + /* Make EFI service code area executable */ + for_each_efi_memory_desc(md) { + if (md->type == EFI_RUNTIME_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_CODE) + efi_set_executable(md, executable); + } +} + +void __init efi_uv1_memmap_phys_epilog(pgd_t *save_pgd) +{ + /* + * After the lock is released, the original page table is restored. + */ + int pgd_idx, i; + int nr_pgds; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + + nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); + set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); + + if (!pgd_present(*pgd)) + continue; + + for (i = 0; i < PTRS_PER_P4D; i++) { + p4d = p4d_offset(pgd, + pgd_idx * PGDIR_SIZE + i * P4D_SIZE); + + if (!p4d_present(*p4d)) + continue; + + pud = (pud_t *)p4d_page_vaddr(*p4d); + pud_free(&init_mm, pud); + } + + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + p4d_free(&init_mm, p4d); + } + + kfree(save_pgd); + + __flush_tlb_all(); + early_code_mapping_set_exec(0); +} + +pgd_t * __init efi_uv1_memmap_phys_prolog(void) +{ + unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; + pgd_t *save_pgd, *pgd_k, *pgd_efi; + p4d_t *p4d, *p4d_k, *p4d_efi; + pud_t *pud; + + int pgd; + int n_pgds, i, j; + + early_code_mapping_set_exec(1); + + n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); + save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); + if (!save_pgd) + return NULL; + + /* + * Build 1:1 identity mapping for UV1 memmap usage. Note that + * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while + * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical + * address X, the pud_index(X) != pud_index(__va(X)), we can only copy + * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. + * This means here we can only reuse the PMD tables of the direct mapping. + */ + for (pgd = 0; pgd < n_pgds; pgd++) { + addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); + vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); + pgd_efi = pgd_offset_k(addr_pgd); + save_pgd[pgd] = *pgd_efi; + + p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); + if (!p4d) { + pr_err("Failed to allocate p4d table!\n"); + goto out; + } + + for (i = 0; i < PTRS_PER_P4D; i++) { + addr_p4d = addr_pgd + i * P4D_SIZE; + p4d_efi = p4d + p4d_index(addr_p4d); + + pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); + if (!pud) { + pr_err("Failed to allocate pud table!\n"); + goto out; + } + + for (j = 0; j < PTRS_PER_PUD; j++) { + addr_pud = addr_p4d + j * PUD_SIZE; + + if (addr_pud > (max_pfn << PAGE_SHIFT)) + break; + + vaddr = (unsigned long)__va(addr_pud); + + pgd_k = pgd_offset_k(vaddr); + p4d_k = p4d_offset(pgd_k, vaddr); + pud[j] = *pud_offset(p4d_k, vaddr); + } + } + pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX; + } + + __flush_tlb_all(); + return save_pgd; +out: + efi_uv1_memmap_phys_epilog(save_pgd); + return NULL; +} + +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, + u32 type, u64 attribute) +{ + unsigned long last_map_pfn; + + if (type == EFI_MEMORY_MAPPED_IO) + return ioremap(phys_addr, size); + + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { + unsigned long top = last_map_pfn << PAGE_SHIFT; + efi_ioremap(top, size - (top - phys_addr), type, attribute); + } + + if (!(attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); + + return (void __iomem *)__va(phys_addr); +} + +static int __init arch_parse_efi_cmdline(char *str) +{ + if (!str) { + pr_warn("need at least one option\n"); + return -EINVAL; + } + + if (!efi_is_mixed() && parse_option_str(str, "old_map")) + set_bit(EFI_UV1_MEMMAP, &efi.flags); + + return 0; +} +early_param("efi", arch_parse_efi_cmdline); |