From 4188f063e3694ccbf2a2044cf17cc325f91e458f Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 25 Jun 2018 14:38:08 +0200 Subject: x86/mm: Get rid of KERN_CONT in show_fault_oops() KERN_CONT leads to split lines in kernel output and complicates useful changes to printk like printing context before each line. Only acceptable use of continuations is basically boot-time testing. Get rid of it. Signed-off-by: Dmitry Vyukov Reviewed-by: Sergey Senozhatsky Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180625123808.227417-1-dvyukov@gmail.com [ Removed unnecessary parentheses and prettified the printk statement. ] Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9a84a0d08727..ee85766e6329 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -671,13 +671,9 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(smep_warning, from_kuid(&init_user_ns, current_uid())); } - printk(KERN_ALERT "BUG: unable to handle kernel "); - if (address < PAGE_SIZE) - printk(KERN_CONT "NULL pointer dereference"); - else - printk(KERN_CONT "paging request"); - - printk(KERN_CONT " at %px\n", (void *) address); + pr_alert("BUG: unable to handle kernel %s at %px\n", + address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", + (void *)address); dump_pagetable(address); } -- cgit v1.2.3 From 236f0cd286b67291796362feeac4f342900cfa82 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 25 Jun 2018 04:21:59 -0600 Subject: x86/entry/32: Add explicit 'l' instruction suffix Omitting suffixes from instructions in AT&T mode is bad practice when operand size cannot be determined by the assembler from register operands, and is likely going to be warned about by upstream GAS in the future (mine does already). Add the single missing 'l' suffix here. Signed-off-by: Jan Beulich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5B30C24702000078001CD6A6@prv1-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2582881d19ce..c371bfee137a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -477,7 +477,7 @@ ENTRY(entry_SYSENTER_32) * whereas POPF does not.) */ addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */ - btr $X86_EFLAGS_IF_BIT, (%esp) + btrl $X86_EFLAGS_IF_BIT, (%esp) popfl /* -- cgit v1.2.3 From 0e311d237d7f3022b7dafb639b42541bfb42fe94 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 25 Jun 2018 13:24:27 +0300 Subject: x86/mm: Don't free P4D table when it is folded at runtime When the P4D page table layer is folded at runtime, the p4d_free() should do nothing, the same as in . It seems this bug should cause double-free in efi_call_phys_epilog(), but I don't know how to trigger that code path, so I can't confirm that by testing. Signed-off-by: Andrey Ryabinin Reviewed-by: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org # 4.17 Fixes: 98219dda2ab5 ("x86/mm: Fold p4d page table layer at runtime") Link: http://lkml.kernel.org/r/20180625102427.15015-1-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgalloc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index ada6410fd2ec..fbd578daa66e 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -184,6 +184,9 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) { + if (!pgtable_l5_enabled()) + return; + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); free_page((unsigned long)p4d); } -- cgit v1.2.3 From 22cd978e598618e82c3c3348d2069184f6884182 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Jun 2018 22:45:52 -0700 Subject: x86/entry/64/compat: Fix "x86/entry/64/compat: Preserve r8-r11 in int $0x80" Commit: 8bb2610bc496 ("x86/entry/64/compat: Preserve r8-r11 in int $0x80") was busted: my original patch had a minor conflict with some of the nospec changes, but "git apply" is very clever and silently accepted the patch by making the same changes to a different function in the same file. There was obviously a huge offset, but "git apply" for some reason doesn't feel any need to say so. Move the changes to the correct function. Now the test_syscall_vdso_32 selftests passes. If anyone cares to observe the original problem, try applying the patch at: https://lore.kernel.org/lkml/d4c4d9985fbe64f8c9e19291886453914b48caee.1523975710.git.luto@kernel.org/raw to the kernel at 316d097c4cd4e7f2ef50c40cff2db266593c4ec4: - "git am" and "git apply" accept the patch without any complaints at all - "patch -p1" at least prints out a message about the huge offset. Reported-by: zhijianx.li@intel.com Signed-off-by: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org #v4.17+ Fixes: 8bb2610bc496 ("x86/entry/64/compat: Preserve r8-r11 in int $0x80") Link: http://lkml.kernel.org/r/6012b922485401bc42676e804171ded262fc2ef2.1530078306.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 9de7f1e1dede..7d0df78db727 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat) pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 */ + pushq $0 /* pt_regs->r8 = 0 */ xorl %r8d, %r8d /* nospec r8 */ - pushq %r9 /* pt_regs->r9 */ + pushq $0 /* pt_regs->r9 = 0 */ xorl %r9d, %r9d /* nospec r9 */ - pushq %r10 /* pt_regs->r10 */ + pushq $0 /* pt_regs->r10 = 0 */ xorl %r10d, %r10d /* nospec r10 */ - pushq %r11 /* pt_regs->r11 */ + pushq $0 /* pt_regs->r11 = 0 */ xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ @@ -374,13 +374,13 @@ ENTRY(entry_INT80_compat) pushq %rcx /* pt_regs->cx */ xorl %ecx, %ecx /* nospec cx */ pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 = 0 */ + pushq %r8 /* pt_regs->r8 */ xorl %r8d, %r8d /* nospec r8 */ - pushq $0 /* pt_regs->r9 = 0 */ + pushq %r9 /* pt_regs->r9 */ xorl %r9d, %r9d /* nospec r9 */ - pushq $0 /* pt_regs->r10 = 0 */ + pushq %r10 /* pt_regs->r10*/ xorl %r10d, %r10d /* nospec r10 */ - pushq $0 /* pt_regs->r11 = 0 */ + pushq %r11 /* pt_regs->r11 */ xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ -- cgit v1.2.3 From cfe19577047e74cdac5826adbdc2337d8437f8fb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 25 Jun 2018 15:08:52 +0300 Subject: x86/efi: Fix efi_call_phys_epilog() with CONFIG_X86_5LEVEL=y Open-coded page table entry checks don't work correctly when we fold the page table level at runtime. pgd_present() on 4-level paging machine always returns true, but open-coded version of the check may return false-negative result and we silently skip the rest of the loop body in efi_call_phys_epilog(). Replace open-coded checks with proper helpers. Signed-off-by: Kirill A. Shutemov Acked-by: Ard Biesheuvel Cc: Andrey Ryabinin Cc: Baoquan He Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org # v4.12+ Fixes: 94133e46a0f5 ("x86/efi: Correct EFI identity mapping under 'efi=old_map' when KASLR is enabled") Link: http://lkml.kernel.org/r/20180625120852.18300-1-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index e01f7ceb9e7a..77873ce700ae 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -166,14 +166,14 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); - if (!(pgd_val(*pgd) & _PAGE_PRESENT)) + if (!pgd_present(*pgd)) continue; for (i = 0; i < PTRS_PER_P4D; i++) { p4d = p4d_offset(pgd, pgd_idx * PGDIR_SIZE + i * P4D_SIZE); - if (!(p4d_val(*p4d) & _PAGE_PRESENT)) + if (!p4d_present(*p4d)) continue; pud = (pud_t *)p4d_page_vaddr(*p4d); -- cgit v1.2.3 From b8c1e4293a5d1dfd19ab7b0984bfce8191940500 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 26 Jun 2018 13:03:41 +0300 Subject: x86/mm: Drop unneeded __always_inline for p4d page table helpers This reverts the following commits: 1ea66554d3b0 ("x86/mm: Mark p4d_offset() __always_inline") 046c0dbec023 ("x86: Mark native_set_p4d() as __always_inline") p4d_offset(), native_set_p4d() and native_p4d_clear() were marked __always_inline in attempt to move __pgtable_l5_enabled into __initdata section. It was required as KASAN initialization code is a user of USE_EARLY_PGTABLE_L5, so all pgtable_l5_enabled() translated to __pgtable_l5_enabled there. This includes pgtable_l5_enabled() called from inline p4d helpers. If compiler would decided to not inline these p4d helpers, but leave them standalone, we end up with section mismatch. We don't need __always_inline here anymore. __pgtable_l5_enabled moved back to be __ro_after_init. See the following commit: 51be13351517 ("Revert "x86/mm: Mark __pgtable_l5_enabled __initdata"") Signed-off-by: Kirill A. Shutemov Cc: Arnd Bergmann Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180626100341.49910-1-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 2 +- arch/x86/include/asm/pgtable_64.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 99ecde23c3ec..5715647fc4fe 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -898,7 +898,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) /* to find an entry in a page-table-directory. */ -static __always_inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { if (!pgtable_l5_enabled()) return (p4d_t *)pgd; diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 0fdcd21dadbd..3c5385f9a88f 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -216,7 +216,7 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) } #endif -static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) +static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; @@ -230,7 +230,7 @@ static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) *p4dp = native_make_p4d(native_pgd_val(pgd)); } -static __always_inline void native_p4d_clear(p4d_t *p4d) +static inline void native_p4d_clear(p4d_t *p4d) { native_set_p4d(p4d, native_make_p4d(0)); } -- cgit v1.2.3 From d79d0d8ad0cb3d782b41631dfeac8eb05e414bcd Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 27 Jun 2018 11:07:15 +0200 Subject: x86/mm: Clean up the printk()s in show_fault_oops() - Remove 'nx_warning' and 'smep_warning', which are just pointless obfuscation. - Also convert to pr_crit(). Suggested-by: Joe Perches Signed-off-by: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180627090715.28076-1-dvyukov@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ee85766e6329..2aafa6ab6103 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -641,11 +641,6 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) return 0; } -static const char nx_warning[] = KERN_CRIT -"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; -static const char smep_warning[] = KERN_CRIT -"unable to execute userspace code (SMEP?) (uid: %d)\n"; - static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address) @@ -664,11 +659,13 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, pte = lookup_address_in_pgd(pgd, address, &level); if (pte && pte_present(*pte) && !pte_exec(*pte)) - printk(nx_warning, from_kuid(&init_user_ns, current_uid())); + pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", + from_kuid(&init_user_ns, current_uid())); if (pte && pte_present(*pte) && pte_exec(*pte) && (pgd_flags(*pgd) & _PAGE_USER) && (__read_cr4() & X86_CR4_SMEP)) - printk(smep_warning, from_kuid(&init_user_ns, current_uid())); + pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n", + from_kuid(&init_user_ns, current_uid())); } pr_alert("BUG: unable to handle kernel %s at %px\n", -- cgit v1.2.3 From 124049decbb121ec32742c94fb5d9d6bed8f24d8 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 27 Jun 2018 23:26:13 -0700 Subject: x86/e820: put !E820_TYPE_RAM regions into memblock.reserved There is a kernel panic that is triggered when reading /proc/kpageflags on the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]': BUG: unable to handle kernel paging request at fffffffffffffffe PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 1728 Comm: page-types Not tainted 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.fc28 04/01/2014 RIP: 0010:stable_page_flags+0x27/0x3c0 Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 89 fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7 RSP: 0018:ffffbbd44111fde0 EFLAGS: 00010202 RAX: fffffffffffffffe RBX: 00007fffffffeff9 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000202 RDI: ffffed1182fff5c0 RBP: ffffffffffffffff R08: 0000000000000001 R09: 0000000000000001 R10: ffffbbd44111fed8 R11: 0000000000000000 R12: ffffed1182fff5c0 R13: 00000000000bffd7 R14: 0000000002fff5c0 R15: ffffbbd44111ff10 FS: 00007efc4335a500(0000) GS:ffff93a5bfc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffffffffffffe CR3: 00000000b2a58000 CR4: 00000000001406e0 Call Trace: kpageflags_read+0xc7/0x120 proc_reg_read+0x3c/0x60 __vfs_read+0x36/0x170 vfs_read+0x89/0x130 ksys_pread64+0x71/0x90 do_syscall_64+0x5b/0x160 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7efc42e75e23 Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 00 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 db d3 01 00 48 89 04 24 According to kernel bisection, this problem became visible due to commit f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap") which changes how struct pages are initialized. Memblock layout affects the pfn ranges covered by node/zone. Consider that we have a VM with 2 NUMA nodes and each node has 4GB memory, and the default (no memmap= given) memblock layout is like below: MEMBLOCK configuration: memory size = 0x00000001fff75c00 reserved size = 0x000000000300c000 memory.cnt = 0x4 memory[0x0] [0x0000000000001000-0x000000000009efff], 0x000000000009e000 bytes on node 0 flags: 0x0 memory[0x1] [0x0000000000100000-0x00000000bffd6fff], 0x00000000bfed7000 bytes on node 0 flags: 0x0 memory[0x2] [0x0000000100000000-0x000000013fffffff], 0x0000000040000000 bytes on node 0 flags: 0x0 memory[0x3] [0x0000000140000000-0x000000023fffffff], 0x0000000100000000 bytes on node 1 flags: 0x0 ... If you give memmap=1G!4G (so it just covers memory[0x2]), the range [0x100000000-0x13fffffff] is gone: MEMBLOCK configuration: memory size = 0x00000001bff75c00 reserved size = 0x000000000300c000 memory.cnt = 0x3 memory[0x0] [0x0000000000001000-0x000000000009efff], 0x000000000009e000 bytes on node 0 flags: 0x0 memory[0x1] [0x0000000000100000-0x00000000bffd6fff], 0x00000000bfed7000 bytes on node 0 flags: 0x0 memory[0x2] [0x0000000140000000-0x000000023fffffff], 0x0000000100000000 bytes on node 1 flags: 0x0 ... This causes shrinking node 0's pfn range because it is calculated by the address range of memblock.memory. So some of struct pages in the gap range are left uninitialized. We have a function zero_resv_unavail() which does zeroing the struct pages within the reserved unavailable range (i.e. memblock.memory && !memblock.reserved). This patch utilizes it to cover all unavailable ranges by putting them into memblock.reserved. Link: http://lkml.kernel.org/r/20180615072947.GB23273@hori1.linux.bs1.fc.nec.co.jp Fixes: f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap") Signed-off-by: Naoya Horiguchi Tested-by: Oscar Salvador Tested-by: "Herton R. Krzesinski" Acked-by: Michal Hocko Reviewed-by: Pavel Tatashin Cc: Steven Sistare Cc: Daniel Jordan Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/e820.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d1f25c831447..c88c23c658c1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1248,6 +1248,7 @@ void __init e820__memblock_setup(void) { int i; u64 end; + u64 addr = 0; /* * The bootstrap memblock region count maximum is 128 entries @@ -1264,13 +1265,21 @@ void __init e820__memblock_setup(void) struct e820_entry *entry = &e820_table->entries[i]; end = entry->addr + entry->size; + if (addr < entry->addr) + memblock_reserve(addr, entry->addr - addr); + addr = end; if (end != (resource_size_t)end) continue; + /* + * all !E820_TYPE_RAM ranges (including gap ranges) are put + * into memblock.reserved to make sure that struct pages in + * such regions are not left uninitialized after bootup. + */ if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) - continue; - - memblock_add(entry->addr, entry->size); + memblock_reserve(entry->addr, entry->size); + else + memblock_add(entry->addr, entry->size); } /* Throw away partial pages: */ -- cgit v1.2.3 From 221e00d1fce976d8a04ff591a0150caf84e176f8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 23 Jun 2018 12:36:22 +0200 Subject: crypto: x86 - Add missing RETs Add explicit RETs to the tail calls of AEGIS and MORUS crypto algorithms otherwise they run into INT3 padding due to ("x86/asm: Pad assembly functions with INT3 instructions") leading to spurious debug exceptions. Mike Galbraith took care of all the remaining callsites. Signed-off-by: Borislav Petkov Acked-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- arch/x86/crypto/aegis128-aesni-asm.S | 1 + arch/x86/crypto/aegis128l-aesni-asm.S | 1 + arch/x86/crypto/aegis256-aesni-asm.S | 1 + arch/x86/crypto/morus1280-avx2-asm.S | 1 + arch/x86/crypto/morus1280-sse2-asm.S | 1 + arch/x86/crypto/morus640-sse2-asm.S | 1 + 6 files changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 9254e0b6cc06..717bf0776421 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -535,6 +535,7 @@ ENTRY(crypto_aegis128_aesni_enc_tail) movdqu STATE3, 0x40(STATEP) FRAME_END + ret ENDPROC(crypto_aegis128_aesni_enc_tail) .macro decrypt_block a s0 s1 s2 s3 s4 i diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S index 9263c344f2c7..4eda2b8db9e1 100644 --- a/arch/x86/crypto/aegis128l-aesni-asm.S +++ b/arch/x86/crypto/aegis128l-aesni-asm.S @@ -645,6 +645,7 @@ ENTRY(crypto_aegis128l_aesni_enc_tail) state_store0 FRAME_END + ret ENDPROC(crypto_aegis128l_aesni_enc_tail) /* diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S index 1d977d515bf9..32aae8397268 100644 --- a/arch/x86/crypto/aegis256-aesni-asm.S +++ b/arch/x86/crypto/aegis256-aesni-asm.S @@ -543,6 +543,7 @@ ENTRY(crypto_aegis256_aesni_enc_tail) state_store0 FRAME_END + ret ENDPROC(crypto_aegis256_aesni_enc_tail) /* diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S index 37d422e77931..07653d4582a6 100644 --- a/arch/x86/crypto/morus1280-avx2-asm.S +++ b/arch/x86/crypto/morus1280-avx2-asm.S @@ -453,6 +453,7 @@ ENTRY(crypto_morus1280_avx2_enc_tail) vmovdqu STATE4, (4 * 32)(%rdi) FRAME_END + ret ENDPROC(crypto_morus1280_avx2_enc_tail) /* diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S index 1fe637c7be9d..bd1aa1b60869 100644 --- a/arch/x86/crypto/morus1280-sse2-asm.S +++ b/arch/x86/crypto/morus1280-sse2-asm.S @@ -652,6 +652,7 @@ ENTRY(crypto_morus1280_sse2_enc_tail) movdqu STATE4_HI, (9 * 16)(%rdi) FRAME_END + ret ENDPROC(crypto_morus1280_sse2_enc_tail) /* diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S index 71c72a0a0862..efa02816d921 100644 --- a/arch/x86/crypto/morus640-sse2-asm.S +++ b/arch/x86/crypto/morus640-sse2-asm.S @@ -437,6 +437,7 @@ ENTRY(crypto_morus640_sse2_enc_tail) movdqu STATE4, (4 * 16)(%rdi) FRAME_END + ret ENDPROC(crypto_morus640_sse2_enc_tail) /* -- cgit v1.2.3 From 4fb5f58e8d191f7c81637ad81284e4848afb4244 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Mon, 2 Jul 2018 23:49:54 -0700 Subject: x86/mm/32: Initialize the CR4 shadow before __flush_tlb_all() On 32-bit kernels, __flush_tlb_all() may have read the CR4 shadow before the initialization of CR4 shadow in cpu_init(). Fix it by adding an explicit cr4_init_shadow() call into start_secondary() which is the first function called on non-boot SMP CPUs - ahead of the __flush_tlb_all() call. ( This is somewhat of a layering violation, but start_secondary() does CR4 bootstrap in the PCID case anyway. ) Signed-off-by: Zhenzhong Duan Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/b07b6ae9-4b57-4b40-b9bc-50c2c67f1d91@default Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c2f7d1d2a5c3..db9656e13ea0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -221,6 +221,11 @@ static void notrace start_secondary(void *unused) #ifdef CONFIG_X86_32 /* switch away from the initial page table */ load_cr3(swapper_pg_dir); + /* + * Initialize the CR4 shadow before doing anything that could + * try to read it. + */ + cr4_init_shadow(); __flush_tlb_all(); #endif load_current_idt(); -- cgit v1.2.3 From 845d382bb15c6e7dc5026c0ff919c5b13fc7e11b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:35:53 -0500 Subject: x86/bugs: Update when to check for the LS_CFG SSBD mitigation If either the X86_FEATURE_AMD_SSBD or X86_FEATURE_VIRT_SSBD features are present, then there is no need to perform the check for the LS_CFG SSBD mitigation support. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180702213553.29202.21089.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 082d7875cef8..38915fbfae73 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -543,7 +543,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) nodes_per_socket = ((value >> 3) & 7) + 1; } - if (c->x86 >= 0x15 && c->x86 <= 0x17) { + if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && + !boot_cpu_has(X86_FEATURE_VIRT_SSBD) && + c->x86 >= 0x15 && c->x86 <= 0x17) { unsigned int bit; switch (c->x86) { -- cgit v1.2.3 From 612bc3b3d4be749f73a513a17d9b3ee1330d3487 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:36:02 -0500 Subject: x86/bugs: Fix the AMD SSBD usage of the SPEC_CTRL MSR On AMD, the presence of the MSR_SPEC_CTRL feature does not imply that the SSBD mitigation support should use the SPEC_CTRL MSR. Other features could have caused the MSR_SPEC_CTRL feature to be set, while a different SSBD mitigation option is in place. Update the SSBD support to check for the actual SSBD features that will use the SPEC_CTRL MSR. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 6ac2f49edb1e ("x86/bugs: Add AMD's SPEC_CTRL MSR usage") Link: http://lkml.kernel.org/r/20180702213602.29202.33151.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/bugs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 404df26b7de8..5c0ea39311fe 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -155,7 +155,8 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); if (hostval != guestval) { @@ -533,9 +534,10 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may * use a completely different MSR and bit dependent on family. */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) && + !static_cpu_has(X86_FEATURE_AMD_SSBD)) { x86_amd_ssb_disable(); - else { + } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -- cgit v1.2.3 From 0e2e160033283e20f688d8bad5b89460cc5bfcc4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 Jun 2018 09:23:23 -0700 Subject: x86/asm: Add _ASM_ARG* constants for argument registers to i386 and x86-64 uses different registers for arguments; make them available so we don't have to #ifdef in the actual code. Native size and specified size (q, l, w, b) versions are provided. Signed-off-by: H. Peter Anvin Signed-off-by: Nick Desaulniers Reviewed-by: Sedat Dilek Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: joe@perches.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: thomas.lendacky@amd.com Cc: tstellar@redhat.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulniers@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/asm.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 219faaec51df..990770f9e76b 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -46,6 +46,65 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". -- cgit v1.2.3 From d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 21 Jun 2018 09:23:24 -0700 Subject: x86/paravirt: Make native_save_fl() extern inline native_save_fl() is marked static inline, but by using it as a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined. paravirt's use of native_save_fl() also requires that no GPRs other than %rax are clobbered. Compilers have different heuristics which they use to emit stack guard code, the emittance of which can break paravirt's callee saved assumption by clobbering %rcx. Marking a function definition extern inline means that if this version cannot be inlined, then the out-of-line version will be preferred. By having the out-of-line version be implemented in assembly, it cannot be instrumented with a stack protector, which might violate custom calling conventions that code like paravirt rely on. The semantics of extern inline has changed since gnu89. This means that folks using GCC versions >= 5.1 may see symbol redefinition errors at link time for subdirs that override KBUILD_CFLAGS (making the C standard used implicit) regardless of this patch. This has been cleaned up earlier in the patch set, but is left as a note in the commit message for future travelers. Reports: https://lkml.org/lkml/2018/5/7/534 https://github.com/ClangBuiltLinux/linux/issues/16 Discussion: https://bugs.llvm.org/show_bug.cgi?id=37512 https://lkml.org/lkml/2018/5/24/1371 Thanks to the many folks that participated in the discussion. Debugged-by: Alistair Strachan Debugged-by: Matthias Kaehlcke Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Tom Stellar Reported-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: joe@perches.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: thomas.lendacky@amd.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irqflags.h | 2 +- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/irqflags.S | 26 ++++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/irqflags.S (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 89f08955fff7..c4fc17220df9 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -13,7 +13,7 @@ * Interrupt control: */ -static inline unsigned long native_save_fl(void) +extern inline unsigned long native_save_fl(void) { unsigned long flags; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 02d6f5cf4e70..8824d01c0c35 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -61,6 +61,7 @@ obj-y += alternative.o i8253.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-y += irqflags.o obj-y += process.o obj-y += fpu/ diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S new file mode 100644 index 000000000000..ddeeaac8adda --- /dev/null +++ b/arch/x86/kernel/irqflags.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +/* + * unsigned long native_save_fl(void) + */ +ENTRY(native_save_fl) + pushf + pop %_ASM_AX + ret +ENDPROC(native_save_fl) +EXPORT_SYMBOL(native_save_fl) + +/* + * void native_restore_fl(unsigned long flags) + * %eax/%rdi: flags + */ +ENTRY(native_restore_fl) + push %_ASM_ARG1 + popf + ret +ENDPROC(native_restore_fl) +EXPORT_SYMBOL(native_restore_fl) -- cgit v1.2.3 From 1268ed0c474a5c8f165ef386f3310521b5e00e27 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Tue, 3 Jul 2018 16:01:55 -0700 Subject: x86/hyper-v: Fix the circular dependency in IPI enlightenment The IPI hypercalls depend on being able to map the Linux notion of CPU ID to the hypervisor's notion of the CPU ID. The array hv_vp_index[] provides this mapping. Code for populating this array depends on the IPI functionality. Break this circular dependency. [ tglx: Use a proper define instead of '-1' with a u32 variable as pointed out by Vitaly ] Fixes: 68bb7bfb7985 ("X86/Hyper-V: Enable IPI enlightenments") Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Cc: gregkh@linuxfoundation.org Cc: devel@linuxdriverproject.org Cc: olaf@aepfle.de Cc: apw@canonical.com Cc: jasowang@redhat.com Cc: hpa@zytor.com Cc: sthemmin@microsoft.com Cc: Michael.H.Kelley@microsoft.com Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180703230155.15160-1-kys@linuxonhyperv.com --- arch/x86/hyperv/hv_apic.c | 5 +++++ arch/x86/hyperv/hv_init.c | 5 ++++- arch/x86/include/asm/mshyperv.h | 5 ++++- 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index f68855499391..402338365651 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -114,6 +114,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); } + if (nr_bank < 0) + goto ipi_mask_ex_done; if (!nr_bank) ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; @@ -158,6 +160,9 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) for_each_cpu(cur_cpu, mask) { vcpu = hv_cpu_number_to_vp_number(cur_cpu); + if (vcpu == VP_INVAL) + goto ipi_mask_done; + /* * This particular version of the IPI hypercall can * only target upto 64 CPUs. diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 4c431e1c1eff..1ff420217298 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -265,7 +265,7 @@ void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; - int cpuhp; + int cpuhp, i; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -293,6 +293,9 @@ void __init hyperv_init(void) if (!hv_vp_index) return; + for (i = 0; i < num_possible_cpus(); i++) + hv_vp_index[i] = VP_INVAL; + hv_vp_assist_page = kcalloc(num_possible_cpus(), sizeof(*hv_vp_assist_page), GFP_KERNEL); if (!hv_vp_assist_page) { diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 3cd14311edfa..5a7375ed5f7c 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -9,6 +9,8 @@ #include #include +#define VP_INVAL U32_MAX + struct ms_hyperv_info { u32 features; u32 misc_features; @@ -20,7 +22,6 @@ struct ms_hyperv_info { extern struct ms_hyperv_info ms_hyperv; - /* * Generate the guest ID. */ @@ -281,6 +282,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, */ for_each_cpu(cpu, cpus) { vcpu = hv_cpu_number_to_vp_number(cpu); + if (vcpu == VP_INVAL) + return -1; vcpu_bank = vcpu / 64; vcpu_offset = vcpu % 64; __set_bit(vcpu_offset, (unsigned long *) -- cgit v1.2.3 From 15279df6f26cf2013d713904b4a0c957ae8abb96 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 6 Jul 2018 23:50:03 +0200 Subject: x86/mtrr: Don't copy out-of-bounds data in mtrr_write Don't access the provided buffer out of bounds - this can cause a kernel out-of-bounds read when invoked through sys_splice() or other things that use kernel_write()/__kernel_write(). Fixes: 7f8ec5a4f01a ("x86/mtrr: Convert to use strncpy_from_user() helper") Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Cc: Andy Shevchenko Cc: "H. Peter Anvin" Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180706215003.156702-1-jannh@google.com --- arch/x86/kernel/cpu/mtrr/if.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 4021d3859499..40eee6cc4124 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -106,7 +106,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) memset(line, 0, LINE_SIZE); - length = strncpy_from_user(line, buf, LINE_SIZE - 1); + len = min_t(size_t, len, LINE_SIZE - 1); + length = strncpy_from_user(line, buf, len); if (length < 0) return length; -- cgit v1.2.3 From e296701800f30d260a66f8aa1971b5b1bc3d2f81 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jul 2018 11:02:35 +0200 Subject: efi/x86: Fix mixed mode reboot loop by removing pointless call to PciIo->Attributes() Hans de Goede reported that his mixed EFI mode Bay Trail tablet would not boot at all any more, but enter a reboot loop without any logs printed by the kernel. Unbreak 64-bit Linux/x86 on 32-bit UEFI: When it was first introduced, the EFI stub code that copies the contents of PCI option ROMs originally only intended to do so if the EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM attribute was *not* set. The reason was that the UEFI spec permits PCI option ROM images to be provided by the platform directly, rather than via the ROM BAR, and in this case, the OS can only access them at runtime if they are preserved at boot time by copying them from the areas described by PciIo->RomImage and PciIo->RomSize. However, it implemented this check erroneously, as can be seen in commit: dd5fc854de5fd ("EFI: Stash ROMs if they're not in the PCI BAR") which introduced: if (!attributes & EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM) continue; and given that the numeric value of EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM is 0x4000, this condition never becomes true, and so the option ROMs were copied unconditionally. This was spotted and 'fixed' by commit: 886d751a2ea99a160 ("x86, efi: correct precedence of operators in setup_efi_pci") but inadvertently inverted the logic at the same time, defeating the purpose of the code, since it now only preserves option ROM images that can be read from the ROM BAR as well. Unsurprisingly, this broke some systems, and so the check was removed entirely in the following commit: 739701888f5d ("x86, efi: remove attribute check from setup_efi_pci") It is debatable whether this check should have been included in the first place, since the option ROM image provided to the UEFI driver by the firmware may be different from the one that is actually present in the card's flash ROM, and so whatever PciIo->RomImage points at should be preferred regardless of whether the attribute is set. As this was the only use of the attributes field, we can remove the call to PciIo->Attributes() entirely, which is especially nice because its prototype involves uint64_t type by-value arguments which the EFI mixed mode has trouble dealing with. Any mixed mode system with PCI is likely to be affected. Tested-by: Wilfried Klaebe Tested-by: Hans de Goede Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180711090235.9327-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index e57665b4ba1c..e98522ea6f09 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -114,18 +114,12 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) struct pci_setup_rom *rom = NULL; efi_status_t status; unsigned long size; - uint64_t attributes, romsize; + uint64_t romsize; void *romimage; - status = efi_call_proto(efi_pci_io_protocol, attributes, pci, - EfiPciIoAttributeOperationGet, 0ULL, - &attributes); - if (status != EFI_SUCCESS) - return status; - /* - * Some firmware images contain EFI function pointers at the place where the - * romimage and romsize fields are supposed to be. Typically the EFI + * Some firmware images contain EFI function pointers at the place where + * the romimage and romsize fields are supposed to be. Typically the EFI * code is mapped at high addresses, translating to an unrealistically * large romsize. The UEFI spec limits the size of option ROMs to 16 * MiB so we reject any ROMs over 16 MiB in size to catch this. -- cgit v1.2.3 From e69b5d308da72cbf4e7911c3979f9a46d28532af Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 2 Jul 2018 12:00:18 +0200 Subject: xen: remove global bit from __default_kernel_pte_mask for pv guests When removing the global bit from __supported_pte_mask do the same for __default_kernel_pte_mask in order to avoid the WARN_ONCE() in check_pgprot() when setting a kernel pte before having called init_mem_mapping(). Cc: # 4.17 Reported-by: Michael Young Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 8d4e2e1ae60b..4816b6f82a9a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1229,6 +1229,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Prevent unwanted bits from being set in PTEs. */ __supported_pte_mask &= ~_PAGE_GLOBAL; + __default_kernel_pte_mask &= ~_PAGE_GLOBAL; /* * Prevent page tables from being allocated in highmem, even -- cgit v1.2.3 From 0ce0bba4e5e0eb9b753bb821785de5d23c494392 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 12 Jul 2018 17:40:34 +0200 Subject: xen: setup pv irq ops vector earlier Setting pv_irq_ops for Xen PV domains should be done as early as possible in order to support e.g. very early printk() usage. The same applies to xen_vcpu_info_reset(0), as it is needed for the pv irq ops. Move the call of xen_setup_machphys_mapping() after initializing the pv functions as it contains a WARN_ON(), too. Remove the no longer necessary conditional in xen_init_irq_ops() from PVH V1 times to make clear this is a PV only function. Cc: # 4.14 Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 24 +++++++++++------------- arch/x86/xen/irq.c | 4 +--- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4816b6f82a9a..439a94bf89ad 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1207,12 +1207,20 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_setup_features(); - xen_setup_machphys_mapping(); - /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops.patch = paravirt_patch_default; pv_cpu_ops = xen_cpu_ops; + xen_init_irq_ops(); + + /* + * Setup xen_vcpu early because it is needed for + * local_irq_disable(), irqs_disabled(), e.g. in printk(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); x86_platform.get_nmi_reason = xen_get_nmi_reason; @@ -1225,6 +1233,7 @@ asmlinkage __visible void __init xen_start_kernel(void) * Set up some pagetable state before starting to set any ptes. */ + xen_setup_machphys_mapping(); xen_init_mmu_ops(); /* Prevent unwanted bits from being set in PTEs. */ @@ -1250,20 +1259,9 @@ asmlinkage __visible void __init xen_start_kernel(void) get_cpu_cap(&boot_cpu_data); x86_configure_nx(); - xen_init_irq_ops(); - /* Let's presume PV guests always boot on vCPU with id 0. */ per_cpu(xen_vcpu_id, 0) = 0; - /* - * Setup xen_vcpu early because idt_setup_early_handler needs it for - * local_irq_disable(), irqs_disabled(). - * - * Don't do the full vcpu_info placement stuff until we have - * the cpu_possible_mask and a non-dummy shared_info. - */ - xen_vcpu_info_reset(0); - idt_setup_early_handler(); xen_init_capabilities(); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 74179852e46c..7515a19fd324 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -128,8 +128,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { void __init xen_init_irq_ops(void) { - /* For PVH we use default pv_irq_ops settings. */ - if (!xen_feature(XENFEAT_hvm_callback_vector)) - pv_irq_ops = xen_irq_ops; + pv_irq_ops = xen_irq_ops; x86_init.irqs.intr_init = xen_init_IRQ; } -- cgit v1.2.3 From fa8cbda88db12e632a8987c94b66f5caf25bcec4 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 13 Jul 2018 16:59:09 -0700 Subject: x86/purgatory: add missing FORCE to Makefile target - Build the kernel without the fix - Add some flag to the purgatories KBUILD_CFLAGS,I used -fno-asynchronous-unwind-tables - Re-build the kernel When you look at makes output you see that sha256.o is not re-build in the last step. Also readelf -S still shows the .eh_frame section for sha256.o. With the fix sha256.o is rebuilt in the last step. Without FORCE make does not detect changes only made to the command line options. So object files might not be re-built even when they should be. Fix this by adding FORCE where it is missing. Link: http://lkml.kernel.org/r/20180704110044.29279-2-prudo@linux.ibm.com Fixes: df6f2801f511 ("kernel/kexec_file.c: move purgatories sha256 to common code") Signed-off-by: Philipp Rudo Acked-by: Dave Young Cc: Ingo Molnar Cc: Thomas Gleixner Cc: [4.17+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/purgatory/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 2e9ee023e6bc..81a8e33115ad 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -6,7 +6,7 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) -$(obj)/sha256.o: $(srctree)/lib/sha256.c +$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(call if_changed_rule,cc_o_c) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib -- cgit v1.2.3