From 96738c69a7fcdbf0d7c9df0c8a27660011e82a7b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 13 Jan 2015 15:25:00 +0000 Subject: x86/efi: Avoid triple faults during EFI mixed mode calls Andy pointed out that if an NMI or MCE is received while we're in the middle of an EFI mixed mode call a triple fault will occur. This can happen, for example, when issuing an EFI mixed mode call while running perf. The reason for the triple fault is that we execute the mixed mode call in 32-bit mode with paging disabled but with 64-bit kernel IDT handlers installed throughout the call. At Andy's suggestion, stop playing the games we currently do at runtime, such as disabling paging and installing a 32-bit GDT for __KERNEL_CS. We can simply switch to the __KERNEL32_CS descriptor before invoking firmware services, and run in compatibility mode. This way, if an NMI/MCE does occur the kernel IDT handler will execute correctly, since it'll jump to __KERNEL_CS automatically. However, this change is only possible post-ExitBootServices(). Before then the firmware "owns" the machine and expects for its 32-bit IDT handlers to be left intact to service interrupts, etc. So, we now need to distinguish between early boot and runtime invocations of EFI services. During early boot, we need to restore the GDT that the firmware expects to be present. We can only jump to the __KERNEL32_CS code segment for mixed mode calls after ExitBootServices() has been invoked. A liberal sprinkling of comments in the thunking code should make the differences in early and late environments more apparent. Reported-by: Andy Lutomirski Tested-by: Borislav Petkov Cc: Signed-off-by: Matt Fleming --- arch/x86/boot/compressed/Makefile | 1 + arch/x86/boot/compressed/efi_stub_64.S | 25 ---- arch/x86/boot/compressed/efi_thunk_64.S | 196 ++++++++++++++++++++++++++++++++ 3 files changed, 197 insertions(+), 25 deletions(-) create mode 100644 arch/x86/boot/compressed/efi_thunk_64.S (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index ad754b4411f7..8bd44e8ee6e2 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -49,6 +49,7 @@ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o $(obj)/vmlinux: $(vmlinux-objs-y) FORCE $(call if_changed,ld) diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S index 7ff3632806b1..99494dff2113 100644 --- a/arch/x86/boot/compressed/efi_stub_64.S +++ b/arch/x86/boot/compressed/efi_stub_64.S @@ -3,28 +3,3 @@ #include #include "../../platform/efi/efi_stub_64.S" - -#ifdef CONFIG_EFI_MIXED - .code64 - .text -ENTRY(efi64_thunk) - push %rbp - push %rbx - - subq $16, %rsp - leaq efi_exit32(%rip), %rax - movl %eax, 8(%rsp) - leaq efi_gdt64(%rip), %rax - movl %eax, 4(%rsp) - movl %eax, 2(%rax) /* Fixup the gdt base address */ - leaq efi32_boot_gdt(%rip), %rax - movl %eax, (%rsp) - - call __efi64_thunk - - addq $16, %rsp - pop %rbx - pop %rbp - ret -ENDPROC(efi64_thunk) -#endif /* CONFIG_EFI_MIXED */ diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S new file mode 100644 index 000000000000..630384a4c14a --- /dev/null +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming + * + * Early support for invoking 32-bit EFI services from a 64-bit kernel. + * + * Because this thunking occurs before ExitBootServices() we have to + * restore the firmware's 32-bit GDT before we make EFI serivce calls, + * since the firmware's 32-bit IDT is still currently installed and it + * needs to be able to service interrupts. + * + * On the plus side, we don't have to worry about mangling 64-bit + * addresses into 32-bits because we're executing with an identify + * mapped pagetable and haven't transitioned to 64-bit virtual addresses + * yet. + */ + +#include +#include +#include +#include +#include + + .code64 + .text +ENTRY(efi64_thunk) + push %rbp + push %rbx + + subq $8, %rsp + leaq efi_exit32(%rip), %rax + movl %eax, 4(%rsp) + leaq efi_gdt64(%rip), %rax + movl %eax, (%rsp) + movl %eax, 2(%rax) /* Fixup the gdt base address */ + + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + /* + * Convert x86-64 ABI params to i386 ABI + */ + subq $32, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movq %r8, %rsi + movl %esi, 0xc(%rsp) + movq %r9, %rsi + movl %esi, 0x10(%rsp) + + sgdt save_gdt(%rip) + + leaq 1f(%rip), %rbx + movq %rbx, func_rt_ptr(%rip) + + /* + * Switch to gdt with 32-bit segments. This is the firmware GDT + * that was installed when the kernel started executing. This + * pointer was saved at the EFI stub entry point in head_64.S. + */ + leaq efi32_boot_gdt(%rip), %rax + lgdt (%rax) + + pushq $__KERNEL_CS + leaq efi_enter32(%rip), %rax + pushq %rax + lretq + +1: addq $32, %rsp + + lgdt save_gdt(%rip) + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + + /* + * Convert 32-bit status code into 64-bit. + */ + test %rax, %rax + jz 1f + movl %eax, %ecx + andl $0x0fffffff, %ecx + andl $0xf0000000, %eax + shl $32, %rax + or %rcx, %rax +1: + addq $8, %rsp + pop %rbx + pop %rbp + ret +ENDPROC(efi64_thunk) + +ENTRY(efi_exit32) + movq func_rt_ptr(%rip), %rax + push %rax + mov %rdi, %rax + ret +ENDPROC(efi_exit32) + + .code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +ENTRY(efi_enter32) + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + + /* Reload pgtables */ + movl %cr3, %eax + movl %eax, %cr3 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Disable long mode via EFER */ + movl $MSR_EFER, %ecx + rdmsr + btrl $_EFER_LME, %eax + wrmsr + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + /* + * Some firmware will return with interrupts enabled. Be sure to + * disable them before we switch GDTs. + */ + cli + + movl 56(%esp), %eax + movl %eax, 2(%eax) + lgdtl (%eax) + + movl %cr4, %eax + btsl $(X86_CR4_PAE_BIT), %eax + movl %eax, %cr4 + + movl %cr3, %eax + movl %eax, %cr3 + + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + xorl %eax, %eax + lldt %ax + + movl 60(%esp), %eax + pushl $__KERNEL_CS + pushl %eax + + /* Enable paging */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + lret +ENDPROC(efi_enter32) + + .data + .balign 8 + .global efi32_boot_gdt +efi32_boot_gdt: .word 0 + .quad 0 + +save_gdt: .word 0 + .quad 0 +func_rt_ptr: .quad 0 + + .global efi_gdt64 +efi_gdt64: + .word efi_gdt64_end - efi_gdt64 + .long 0 /* Filled out by user */ + .word 0 + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + .quad 0x0080890000000000 /* TS descriptor */ + .quad 0x0000000000000000 /* TS continued */ +efi_gdt64_end: -- cgit v1.2.3 From f47233c2d34f243ecdaac179c3408a39ff9216a7 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 13 Feb 2015 16:04:55 +0100 Subject: x86/mm/ASLR: Propagate base load address calculation Commit: e2b32e678513 ("x86, kaslr: randomize module base load address") makes the base address for module to be unconditionally randomized in case when CONFIG_RANDOMIZE_BASE is defined and "nokaslr" option isn't present on the commandline. This is not consistent with how choose_kernel_location() decides whether it will randomize kernel load base. Namely, CONFIG_HIBERNATION disables kASLR (unless "kaslr" option is explicitly specified on kernel commandline), which makes the state space larger than what module loader is looking at. IOW CONFIG_HIBERNATION && CONFIG_RANDOMIZE_BASE is a valid config option, kASLR wouldn't be applied by default in that case, but module loader is not aware of that. Instead of fixing the logic in module.c, this patch takes more generic aproach. It introduces a new bootparam setup data_type SETUP_KASLR and uses that to pass the information whether kaslr has been applied during kernel decompression, and sets a global 'kaslr_enabled' variable accordingly, so that any kernel code (module loading, livepatching, ...) can make decisions based on its value. x86 module loader is converted to make use of this flag. Signed-off-by: Jiri Kosina Acked-by: Kees Cook Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/alpine.LNX.2.00.1502101411280.10719@pobox.suse.cz [ Always dump correct kaslr status when panicking ] Signed-off-by: Borislav Petkov --- arch/x86/boot/compressed/aslr.c | 34 +++++++++++++++++++++++++++++++++- arch/x86/boot/compressed/misc.c | 3 ++- arch/x86/boot/compressed/misc.h | 6 ++++-- arch/x86/include/asm/page_types.h | 3 +++ arch/x86/include/uapi/asm/bootparam.h | 1 + arch/x86/kernel/module.c | 11 ++--------- arch/x86/kernel/setup.c | 22 ++++++++++++++++++---- 7 files changed, 63 insertions(+), 17 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index bb1376381985..7083c16cccba 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -14,6 +14,13 @@ static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; +struct kaslr_setup_data { + __u64 next; + __u32 type; + __u32 len; + __u8 data[1]; +} kaslr_setup_data; + #define I8254_PORT_CONTROL 0x43 #define I8254_PORT_COUNTER0 0x40 #define I8254_CMD_READBACK 0xC0 @@ -295,7 +302,29 @@ static unsigned long find_random_addr(unsigned long minimum, return slots_fetch_random(); } -unsigned char *choose_kernel_location(unsigned char *input, +static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled) +{ + struct setup_data *data; + + kaslr_setup_data.type = SETUP_KASLR; + kaslr_setup_data.len = 1; + kaslr_setup_data.next = 0; + kaslr_setup_data.data[0] = enabled; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + if (data) + data->next = (unsigned long)&kaslr_setup_data; + else + params->hdr.setup_data = (unsigned long)&kaslr_setup_data; + +} + +unsigned char *choose_kernel_location(struct boot_params *params, + unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) @@ -306,14 +335,17 @@ unsigned char *choose_kernel_location(unsigned char *input, #ifdef CONFIG_HIBERNATION if (!cmdline_find_option_bool("kaslr")) { debug_putstr("KASLR disabled by default...\n"); + add_kaslr_setup_data(params, 0); goto out; } #else if (cmdline_find_option_bool("nokaslr")) { debug_putstr("KASLR disabled by cmdline...\n"); + add_kaslr_setup_data(params, 0); goto out; } #endif + add_kaslr_setup_data(params, 1); /* Record the various known unsafe memory ranges. */ mem_avoid_init((unsigned long)input, input_size, diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index a950864a64da..5903089c818f 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -401,7 +401,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, * the entire decompressed kernel plus relocation table, or the * entire decompressed kernel plus .bss and .brk sections. */ - output = choose_kernel_location(input_data, input_len, output, + output = choose_kernel_location(real_mode, input_data, input_len, + output, output_len > run_size ? output_len : run_size); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 24e3e569a13c..6d6730743024 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -56,7 +56,8 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE /* aslr.c */ -unsigned char *choose_kernel_location(unsigned char *input, +unsigned char *choose_kernel_location(struct boot_params *params, + unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size); @@ -64,7 +65,8 @@ unsigned char *choose_kernel_location(unsigned char *input, bool has_cpuflag(int flag); #else static inline -unsigned char *choose_kernel_location(unsigned char *input, +unsigned char *choose_kernel_location(struct boot_params *params, + unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index f97fbe3abb67..3d43ce36eaba 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -3,6 +3,7 @@ #include #include +#include /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 @@ -51,6 +52,8 @@ extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; +extern bool kaslr_enabled; + static inline phys_addr_t get_max_mapped(void) { return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 225b0988043a..44e6dd7e36a2 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -7,6 +7,7 @@ #define SETUP_DTB 2 #define SETUP_PCI 3 #define SETUP_EFI 4 +#define SETUP_KASLR 5 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e69f9882bf95..c3c59a3a14ad 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -32,6 +32,7 @@ #include #include +#include #if 0 #define DEBUGP(fmt, ...) \ @@ -46,21 +47,13 @@ do { \ #ifdef CONFIG_RANDOMIZE_BASE static unsigned long module_load_offset; -static int randomize_modules = 1; /* Mutex protects the module_load_offset. */ static DEFINE_MUTEX(module_kaslr_mutex); -static int __init parse_nokaslr(char *p) -{ - randomize_modules = 0; - return 0; -} -early_param("nokaslr", parse_nokaslr); - static unsigned long int get_module_load_offset(void) { - if (randomize_modules) { + if (kaslr_enabled) { mutex_lock(&module_kaslr_mutex); /* * Calculate the module_load_offset the first time this diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ab4734e5411d..16b6043cb073 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -121,6 +121,8 @@ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; +bool __read_mostly kaslr_enabled = false; + #ifdef CONFIG_DMI RESERVE_BRK(dmi_alloc, 65536); #endif @@ -424,6 +426,11 @@ static void __init reserve_initrd(void) } #endif /* CONFIG_BLK_DEV_INITRD */ +static void __init parse_kaslr_setup(u64 pa_data, u32 data_len) +{ + kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data)); +} + static void __init parse_setup_data(void) { struct setup_data *data; @@ -451,6 +458,9 @@ static void __init parse_setup_data(void) case SETUP_EFI: parse_efi_setup(pa_data, data_len); break; + case SETUP_KASLR: + parse_kaslr_setup(pa_data, data_len); + break; default: break; } @@ -833,10 +843,14 @@ static void __init trim_low_memory_range(void) static int dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) { - pr_emerg("Kernel Offset: 0x%lx from 0x%lx " - "(relocation range: 0x%lx-0x%lx)\n", - (unsigned long)&_text - __START_KERNEL, __START_KERNEL, - __START_KERNEL_map, MODULES_VADDR-1); + if (kaslr_enabled) + pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n", + (unsigned long)&_text - __START_KERNEL, + __START_KERNEL, + __START_KERNEL_map, + MODULES_VADDR-1); + else + pr_emerg("Kernel Offset: disabled\n"); return 0; } -- cgit v1.2.3