From 39380b80d72723282f0ea1d1bbf2294eae45013e Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 8 Jul 2016 11:38:28 +0200 Subject: x86/mm/pat, /dev/mem: Remove superfluous error message Currently it's possible for broken (or malicious) userspace to flood a kernel log indefinitely with messages a-la Program dmidecode tried to access /dev/mem between f0000->100000 because range_is_allowed() is case of CONFIG_STRICT_DEVMEM being turned on dumps this information each and every time devmem_is_allowed() fails. Reportedly userspace that is able to trigger contignuous flow of these messages exists. It would be possible to rate limit this message, but that'd have a questionable value; the administrator wouldn't get information about all the failing accessess, so then the information would be both superfluous and incomplete at the same time :) Returning EPERM (which is what is actually happening) is enough indication for userspace what has happened; no need to log this particular error as some sort of special condition. Signed-off-by: Jiri Kosina Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1607081137020.24757@cbobk.fhfr.pm Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index fb0604f11eec..db00e3e2f3dc 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -755,11 +755,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) return 1; while (cursor < to) { - if (!devmem_is_allowed(pfn)) { - pr_info("x86/PAT: Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", - current->comm, from, to - 1); + if (!devmem_is_allowed(pfn)) return 0; - } cursor += PAGE_SIZE; pfn++; } -- cgit v1.2.3 From dcb32d9913b7ed527b135a7e221f8d14b67bb952 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Jul 2016 17:19:15 -0700 Subject: x86/mm: Use pte_none() to test for empty PTE The page table manipulation code seems to have grown a couple of sites that are looking for empty PTEs. Just in case one of these entries got a stray bit set, use pte_none() instead of checking for a zero pte_val(). The use pte_same() makes me a bit nervous. If we were doing a pte_same() check against two cleared entries and one of them had a stray bit set, it might fail the pte_same() check. But, I don't think we ever _do_ pte_same() for cleared entries. It is almost entirely used for checking for races in fault-in paths. Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: dave.hansen@intel.com Cc: linux-mm@kvack.org Cc: mhocko@suse.com Link: http://lkml.kernel.org/r/20160708001915.813703D9@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 12 ++++++------ arch/x86/mm/pageattr.c | 2 +- arch/x86/mm/pgtable_32.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bce2e5d9edd4..bb88fbc0a288 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -354,7 +354,7 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, * pagetable pages as RO. So assume someone who pre-setup * these mappings are more intelligent. */ - if (pte_val(*pte)) { + if (!pte_none(*pte)) { if (!after_bootmem) pages++; continue; @@ -396,7 +396,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, continue; } - if (pmd_val(*pmd)) { + if (!pmd_none(*pmd)) { if (!pmd_large(*pmd)) { spin_lock(&init_mm.page_table_lock); pte = (pte_t *)pmd_page_vaddr(*pmd); @@ -470,7 +470,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, continue; } - if (pud_val(*pud)) { + if (!pud_none(*pud)) { if (!pud_large(*pud)) { pmd = pmd_offset(pud, 0); last_map_addr = phys_pmd_init(pmd, addr, end, @@ -673,7 +673,7 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) for (i = 0; i < PTRS_PER_PTE; i++) { pte = pte_start + i; - if (pte_val(*pte)) + if (!pte_none(*pte)) return; } @@ -691,7 +691,7 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) for (i = 0; i < PTRS_PER_PMD; i++) { pmd = pmd_start + i; - if (pmd_val(*pmd)) + if (!pmd_none(*pmd)) return; } @@ -710,7 +710,7 @@ static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd) for (i = 0; i < PTRS_PER_PUD; i++) { pud = pud_start + i; - if (pud_val(*pud)) + if (!pud_none(*pud)) return false; } diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7a1f7bbf4105..75142159b0a5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1185,7 +1185,7 @@ repeat: return __cpa_process_fault(cpa, address, primary); old_pte = *kpte; - if (!pte_val(old_pte)) + if (pte_none(old_pte)) return __cpa_process_fault(cpa, address, primary); if (level == PG_LEVEL_4K) { diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 75cc0978d45d..e67ae0e6c59d 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -47,7 +47,7 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval) return; } pte = pte_offset_kernel(pmd, vaddr); - if (pte_val(pteval)) + if (!pte_none(pteval)) set_pte_at(&init_mm, vaddr, pte, pteval); else pte_clear(&init_mm, vaddr, pte); -- cgit v1.2.3 From af2cf278ef4f9289f88504c3e03cb12f76027575 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Jul 2016 13:22:49 -0700 Subject: x86/mm/hotplug: Don't remove PGD entries in remove_pagetable() So when memory hotplug removes a piece of physical memory from pagetable mappings, it also frees the underlying PGD entry. This complicates PGD management, so don't do this. We can keep the PGD mapped and the PUD table all clear - it's only a single 4K page per 512 GB of memory hotplugged. Signed-off-by: Ingo Molnar Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Waiman Long Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/064ff6c7275734537f969e876f6cd0baa954d2cc.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bb88fbc0a288..e14f87057c3f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -702,27 +702,6 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) spin_unlock(&init_mm.page_table_lock); } -/* Return true if pgd is changed, otherwise return false. */ -static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd) -{ - pud_t *pud; - int i; - - for (i = 0; i < PTRS_PER_PUD; i++) { - pud = pud_start + i; - if (!pud_none(*pud)) - return false; - } - - /* free a pud table */ - free_pagetable(pgd_page(*pgd), 0); - spin_lock(&init_mm.page_table_lock); - pgd_clear(pgd); - spin_unlock(&init_mm.page_table_lock); - - return true; -} - static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, bool direct) @@ -913,7 +892,6 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) unsigned long addr; pgd_t *pgd; pud_t *pud; - bool pgd_changed = false; for (addr = start; addr < end; addr = next) { next = pgd_addr_end(addr, end); @@ -924,13 +902,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) pud = (pud_t *)pgd_page_vaddr(*pgd); remove_pud_table(pud, addr, next, direct); - if (free_pud_table(pud, pgd)) - pgd_changed = true; } - if (pgd_changed) - sync_global_pgds(start, end - 1, 1); - flush_tlb_all(); } -- cgit v1.2.3 From 360cb4d15567a7eca07a5f3ade6de308bbfb4e70 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:50 -0700 Subject: x86/mm/cpa: In populate_pgd(), don't set the PGD entry until it's populated This avoids pointless races in which another CPU or task might see a partially populated global PGD entry. These races should normally be harmless, but, if another CPU propagates the entry via vmalloc_fault() and then populate_pgd() fails (due to memory allocation failure, for example), this prevents a use-after-free of the PGD entry. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/bf99df27eac6835f687005364bd1fbd89130946c.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 75142159b0a5..26aa487ae4ef 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1104,8 +1104,6 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); if (!pud) return -1; - - set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); } pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); @@ -1113,11 +1111,16 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { - unmap_pgd_range(cpa->pgd, addr, + if (pud) + free_page((unsigned long)pud); + unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; } + if (pud) + set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); + cpa->numpages = ret; return 0; } -- cgit v1.2.3 From d92fc69ccac4c0a20679fdbdc81b2010685a6f33 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:51 -0700 Subject: x86/mm: Remove kernel_unmap_pages_in_pgd() and efi_cleanup_page_tables() kernel_unmap_pages_in_pgd() is dangerous: if a PGD entry in init_mm.pgd were to be cleared, callers would need to ensure that the pgd entry hadn't been propagated to any other pgd. Its only caller was efi_cleanup_page_tables(), and that, in turn, was unused, so just delete both functions. This leaves a couple of other helpers unused, so delete them, too. Signed-off-by: Andy Lutomirski Reviewed-by: Matt Fleming Acked-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/77ff20fdde3b75cd393be5559ad8218870520248.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 1 - arch/x86/include/asm/pgtable_types.h | 2 -- arch/x86/mm/pageattr.c | 28 ---------------------------- arch/x86/platform/efi/efi.c | 2 -- arch/x86/platform/efi/efi_32.c | 3 --- arch/x86/platform/efi/efi_64.c | 5 ----- 6 files changed, 41 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 78d1e7467eae..45ea38df86d4 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -125,7 +125,6 @@ extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); extern int __init efi_alloc_page_tables(void); extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); -extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_update_mappings(void); diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index d14d0a55322a..f1218f512f62 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -481,8 +481,6 @@ extern pmd_t *lookup_pmd_address(unsigned long address); extern phys_addr_t slow_virt_to_phys(void *__address); extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, unsigned numpages, unsigned long page_flags); -void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, - unsigned numpages); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 26aa487ae4ef..26c93c6e04a0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -746,18 +746,6 @@ static bool try_to_free_pmd_page(pmd_t *pmd) return true; } -static bool try_to_free_pud_page(pud_t *pud) -{ - int i; - - for (i = 0; i < PTRS_PER_PUD; i++) - if (!pud_none(pud[i])) - return false; - - free_page((unsigned long)pud); - return true; -} - static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) { pte_t *pte = pte_offset_kernel(pmd, start); @@ -871,16 +859,6 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) */ } -static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end) -{ - pgd_t *pgd_entry = root + pgd_index(addr); - - unmap_pud_range(pgd_entry, addr, end); - - if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry))) - pgd_clear(pgd_entry); -} - static int alloc_pte_page(pmd_t *pmd) { pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); @@ -1994,12 +1972,6 @@ out: return retval; } -void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, - unsigned numpages) -{ - unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT)); -} - /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here. diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f93545e7dc54..62986e5fbdba 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -978,8 +978,6 @@ static void __init __efi_enter_virtual_mode(void) * EFI mixed mode we need all of memory to be accessible when * we pass parameters to the EFI runtime services in the * thunking code. - * - * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); */ free_pages((unsigned long)new_memmap, pg_shift); diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 338402b91d2e..cef39b097649 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -49,9 +49,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { return 0; } -void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) -{ -} void __init efi_map_region(efi_memory_desc_t *md) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index b226b3f497f1..d288dcea1ffe 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -285,11 +285,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; } -void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) -{ - kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages); -} - static void __init __map_region(efi_memory_desc_t *md, u64 va) { unsigned long flags = _PAGE_RW; -- cgit v1.2.3 From 46aea3873401836abb7f01200e7946e7d518b359 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:54 -0700 Subject: x86/mm/64: In vmalloc_fault(), use CR3 instead of current->active_mm If we get a vmalloc fault while current->active_mm->pgd doesn't match CR3, we'll crash without this change. I've seen this failure mode on heavily instrumented kernels with virtually mapped stacks. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4650d7674185f165ed8fdf9ac4c5c35c5c179ba8.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 7d1fa7cd2374..ca44e2e7fd00 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -439,7 +439,7 @@ static noinline int vmalloc_fault(unsigned long address) * happen within a race in page table update. In the later * case just flush: */ - pgd = pgd_offset(current->active_mm, address); + pgd = (pgd_t *)__va(read_cr3()) + pgd_index(address); pgd_ref = pgd_offset_k(address); if (pgd_none(*pgd_ref)) return -1; -- cgit v1.2.3 From dfa9a942fd7951c8f333cf3f377dde51ebd21685 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 14 Jul 2016 13:22:56 -0700 Subject: x86/uaccess: Move thread_info::uaccess_err and thread_info::sig_on_uaccess_err to thread_struct struct thread_info is a legacy mess. To prepare for its partial removal, move the uaccess control fields out -- they're straightforward. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/d0ac4d01c8e4d4d756264604e47445d5acc7900e.1468527351.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vsyscall/vsyscall_64.c | 6 +++--- arch/x86/include/asm/processor.h | 3 +++ arch/x86/include/asm/thread_info.h | 2 -- arch/x86/include/asm/uaccess.h | 4 ++-- arch/x86/mm/extable.c | 2 +- arch/x86/mm/fault.c | 2 +- 6 files changed, 10 insertions(+), 9 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 174c2549939d..3aba2b043050 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -221,8 +221,8 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) * With a real vsyscall, page faults cause SIGSEGV. We want to * preserve that behavior to make writing exploits harder. */ - prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; - current_thread_info()->sig_on_uaccess_error = 1; + prev_sig_on_uaccess_error = current->thread.sig_on_uaccess_error; + current->thread.sig_on_uaccess_error = 1; ret = -EFAULT; switch (vsyscall_nr) { @@ -243,7 +243,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) break; } - current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; + current->thread.sig_on_uaccess_error = prev_sig_on_uaccess_error; check_fault: if (ret == -EFAULT) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 62c6cc3cc5d3..f53ae57bd985 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -419,6 +419,9 @@ struct thread_struct { /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; + unsigned int sig_on_uaccess_error:1; + unsigned int uaccess_err:1; /* uaccess failed */ + /* Floating point and extended processor state */ struct fpu fpu; /* diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 30c133ac05cd..7c47bb659ecd 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -58,8 +58,6 @@ struct thread_info { __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ mm_segment_t addr_limit; - unsigned int sig_on_uaccess_error:1; - unsigned int uaccess_err:1; /* uaccess failed */ }; #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d40ec723f799..8f66e5655c23 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -487,13 +487,13 @@ struct __large_struct { unsigned long buf[100]; }; * uaccess_try and catch */ #define uaccess_try do { \ - current_thread_info()->uaccess_err = 0; \ + current->thread.uaccess_err = 0; \ __uaccess_begin(); \ barrier(); #define uaccess_catch(err) \ __uaccess_end(); \ - (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ + (err) |= (current->thread.uaccess_err ? -EFAULT : 0); \ } while (0) /** diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 4bb53b89f3c5..0f90cc218d04 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -37,7 +37,7 @@ bool ex_handler_ext(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr) { /* Special hack for uaccess_err */ - current_thread_info()->uaccess_err = 1; + current->thread.uaccess_err = 1; regs->ip = ex_fixup_addr(fixup); return true; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ca44e2e7fd00..69be03d4aca6 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -737,7 +737,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, * In this case we need to make sure we're not recursively * faulting through the emulate_vsyscall() logic. */ - if (current_thread_info()->sig_on_uaccess_error && signal) { + if (current->thread.sig_on_uaccess_error && signal) { tsk->thread.trap_nr = X86_TRAP_PF; tsk->thread.error_code = error_code | PF_USER; tsk->thread.cr2 = address; -- cgit v1.2.3 From 2a53ccbc0de1b1950aeedd24680f7eca65c86ff5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 15 Jul 2016 10:21:11 +0200 Subject: x86/dumpstack: Rename thread_struct::sig_on_uaccess_error to sig_on_uaccess_err Rename it to match the thread_struct::uaccess_err pattern and also because it was too long. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/vsyscall/vsyscall_64.c | 10 +++++----- arch/x86/include/asm/processor.h | 2 +- arch/x86/mm/fault.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 3aba2b043050..75fc719b7f31 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -96,7 +96,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) { /* * XXX: if access_ok, get_user, and put_user handled - * sig_on_uaccess_error, this could go away. + * sig_on_uaccess_err, this could go away. */ if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { @@ -125,7 +125,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) struct task_struct *tsk; unsigned long caller; int vsyscall_nr, syscall_nr, tmp; - int prev_sig_on_uaccess_error; + int prev_sig_on_uaccess_err; long ret; /* @@ -221,8 +221,8 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) * With a real vsyscall, page faults cause SIGSEGV. We want to * preserve that behavior to make writing exploits harder. */ - prev_sig_on_uaccess_error = current->thread.sig_on_uaccess_error; - current->thread.sig_on_uaccess_error = 1; + prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err; + current->thread.sig_on_uaccess_err = 1; ret = -EFAULT; switch (vsyscall_nr) { @@ -243,7 +243,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) break; } - current->thread.sig_on_uaccess_error = prev_sig_on_uaccess_error; + current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err; check_fault: if (ret == -EFAULT) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f53ae57bd985..cbdfe5f76347 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -419,7 +419,7 @@ struct thread_struct { /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; - unsigned int sig_on_uaccess_error:1; + unsigned int sig_on_uaccess_err:1; unsigned int uaccess_err:1; /* uaccess failed */ /* Floating point and extended processor state */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 69be03d4aca6..d22161ab941d 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -737,7 +737,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, * In this case we need to make sure we're not recursively * faulting through the emulate_vsyscall() logic. */ - if (current->thread.sig_on_uaccess_error && signal) { + if (current->thread.sig_on_uaccess_err && signal) { tsk->thread.trap_nr = X86_TRAP_PF; tsk->thread.error_code = error_code | PF_USER; tsk->thread.cr2 = address; -- cgit v1.2.3 From 530dd8d4b9daf77e3e5d145a26210d91ced954c7 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 22 Jul 2016 21:58:08 -0700 Subject: x86/mm/cpa: Fix populate_pgd(): Stop trying to deallocate failed PUDs Valdis Kletnieks bisected a boot failure back to this recent commit: 360cb4d15567 ("x86/mm/cpa: In populate_pgd(), don't set the PGD entry until it's populated") I broke the case where a PUD table got allocated -- populate_pud() would wander off a pgd_none entry and get lost. I'm not sure how this survived my testing. Fix the original issue in a much simpler way. The problem was that, if we allocated a PUD table, failed to populate it, and freed it, another CPU could potentially keep using the PGD entry we installed (either by copying it via vmalloc_fault or by speculatively caching it). There's a straightforward fix: simply leave the top-level entry in place if this happens. This can't waste any significant amount of memory -- there are at most 256 entries like this systemwide and, as a practical matter, if we hit this failure path repeatedly, we're likely to reuse the same page anyway. For context, this is a reversion with this hunk added in: if (ret < 0) { + /* + * Leave the PUD page in place in case some other CPU or thread + * already found it, but remove any useless entries we just + * added to it. + */ - unmap_pgd_range(cpa->pgd, addr, + unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; } This effectively open-codes what the now-deleted unmap_pgd_range() function used to do except that unmap_pgd_range() used to try to free the page as well. Reported-by: Valdis Kletnieks Signed-off-by: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Mike Krinkin Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/21cbc2822aa18aa812c0215f4231dbf5f65afa7f.1469249789.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 26c93c6e04a0..2bc6ea153f76 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1082,6 +1082,8 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); if (!pud) return -1; + + set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); } pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); @@ -1089,16 +1091,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { - if (pud) - free_page((unsigned long)pud); unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; } - if (pud) - set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); - cpa->numpages = ret; return 0; } -- cgit v1.2.3 From 55920d31f1e3fea06702c74271dd56c4fc9b70ca Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 23 Jul 2016 09:59:28 -0700 Subject: x86/mm/cpa: Add missing comment in populate_pdg() In commit: 21cbc2822aa1 ("x86/mm/cpa: Unbreak populate_pgd(): stop trying to deallocate failed PUDs") I intended to add this comment, but I failed at using git. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/242baf8612394f4e31216f96d13c4d2e9b90d1b7.1469293159.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2bc6ea153f76..47870a534877 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1091,6 +1091,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { + /* + * Leave the PUD page in place in case some other CPU or thread + * already found it, but remove any useless entries we just + * added to it. + */ unmap_pud_range(pgd_entry, addr, addr + (cpa->numpages << PAGE_SHIFT)); return ret; -- cgit v1.2.3