diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 21 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 105 | ||||
-rw-r--r-- | arch/x86/mm/pti.c | 92 |
5 files changed, 182 insertions, 44 deletions
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 2f3c9196b834..ccd92c4da57c 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -111,6 +111,8 @@ static struct addr_marker address_markers[] = { [END_OF_SPACE_NR] = { -1, NULL } }; +#define INIT_PGD ((pgd_t *) &init_top_pgt) + #else /* CONFIG_X86_64 */ enum address_markers_idx { @@ -121,6 +123,9 @@ enum address_markers_idx { #ifdef CONFIG_HIGHMEM PKMAP_BASE_NR, #endif +#ifdef CONFIG_MODIFY_LDT_SYSCALL + LDT_NR, +#endif CPU_ENTRY_AREA_NR, FIXADDR_START_NR, END_OF_SPACE_NR, @@ -134,11 +139,16 @@ static struct addr_marker address_markers[] = { #ifdef CONFIG_HIGHMEM [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, #endif +#ifdef CONFIG_MODIFY_LDT_SYSCALL + [LDT_NR] = { 0UL, "LDT remap" }, +#endif [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, [END_OF_SPACE_NR] = { -1, NULL } }; +#define INIT_PGD (swapper_pg_dir) + #endif /* !CONFIG_X86_64 */ /* Multipliers for offsets within the PTEs */ @@ -496,11 +506,7 @@ static inline bool is_hypervisor_range(int idx) static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, bool checkwx, bool dmesg) { -#ifdef CONFIG_X86_64 - pgd_t *start = (pgd_t *) &init_top_pgt; -#else - pgd_t *start = swapper_pg_dir; -#endif + pgd_t *start = INIT_PGD; pgprotval_t prot, eff; int i; struct pg_state st = {}; @@ -566,7 +572,7 @@ EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); static void ptdump_walk_user_pgd_level_checkwx(void) { #ifdef CONFIG_PAGE_TABLE_ISOLATION - pgd_t *pgd = (pgd_t *) &init_top_pgt; + pgd_t *pgd = INIT_PGD; if (!static_cpu_has(X86_FEATURE_PTI)) return; @@ -609,6 +615,9 @@ static int __init pt_dump_init(void) # endif address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; +# ifdef CONFIG_MODIFY_LDT_SYSCALL + address_markers[LDT_NR].start_address = LDT_BASE_ADDR; +# endif #endif return 0; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2aafa6ab6103..db1c042e9853 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -317,8 +317,6 @@ static noinline int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; - WARN_ON_ONCE(in_nmi()); - /* * Synchronize this task's top level page-table * with the 'reference' page table. diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 68c292cb1ebf..dd519f372169 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1287,12 +1287,6 @@ void mark_rodata_ro(void) free_kernel_image_pages((void *)rodata_end, (void *)_sdata); debug_checkwx(); - - /* - * Do this after all of the manipulation of the - * kernel text page tables are complete. - */ - pti_clone_kernel_text(); } int kern_addr_valid(unsigned long addr) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 47b5951e592b..8e4e63d46d81 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -182,6 +182,14 @@ static void pgd_dtor(pgd_t *pgd) */ #define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD +/* + * We allocate separate PMDs for the kernel part of the user page-table + * when PTI is enabled. We need them to map the per-process LDT into the + * user-space page-table. + */ +#define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \ + KERNEL_PGD_PTRS : 0) + void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT); @@ -202,14 +210,14 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) /* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 - +#define PREALLOCATED_USER_PMDS 0 #endif /* CONFIG_X86_PAE */ -static void free_pmds(struct mm_struct *mm, pmd_t *pmds[]) +static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) { int i; - for(i = 0; i < PREALLOCATED_PMDS; i++) + for (i = 0; i < count; i++) if (pmds[i]) { pgtable_pmd_page_dtor(virt_to_page(pmds[i])); free_page((unsigned long)pmds[i]); @@ -217,7 +225,7 @@ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[]) } } -static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[]) +static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) { int i; bool failed = false; @@ -226,7 +234,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[]) if (mm == &init_mm) gfp &= ~__GFP_ACCOUNT; - for(i = 0; i < PREALLOCATED_PMDS; i++) { + for (i = 0; i < count; i++) { pmd_t *pmd = (pmd_t *)__get_free_page(gfp); if (!pmd) failed = true; @@ -241,7 +249,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[]) } if (failed) { - free_pmds(mm, pmds); + free_pmds(mm, pmds, count); return -ENOMEM; } @@ -254,23 +262,38 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[]) * preallocate which never got a corresponding vma will need to be * freed manually. */ +static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp) +{ + pgd_t pgd = *pgdp; + + if (pgd_val(pgd) != 0) { + pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); + + *pgdp = native_make_pgd(0); + + paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); + pmd_free(mm, pmd); + mm_dec_nr_pmds(mm); + } +} + static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) { int i; - for(i = 0; i < PREALLOCATED_PMDS; i++) { - pgd_t pgd = pgdp[i]; + for (i = 0; i < PREALLOCATED_PMDS; i++) + mop_up_one_pmd(mm, &pgdp[i]); - if (pgd_val(pgd) != 0) { - pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); +#ifdef CONFIG_PAGE_TABLE_ISOLATION - pgdp[i] = native_make_pgd(0); + if (!static_cpu_has(X86_FEATURE_PTI)) + return; - paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); - pmd_free(mm, pmd); - mm_dec_nr_pmds(mm); - } - } + pgdp = kernel_to_user_pgdp(pgdp); + + for (i = 0; i < PREALLOCATED_USER_PMDS; i++) + mop_up_one_pmd(mm, &pgdp[i + KERNEL_PGD_BOUNDARY]); +#endif } static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) @@ -296,6 +319,38 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) } } +#ifdef CONFIG_PAGE_TABLE_ISOLATION +static void pgd_prepopulate_user_pmd(struct mm_struct *mm, + pgd_t *k_pgd, pmd_t *pmds[]) +{ + pgd_t *s_pgd = kernel_to_user_pgdp(swapper_pg_dir); + pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); + p4d_t *u_p4d; + pud_t *u_pud; + int i; + + u_p4d = p4d_offset(u_pgd, 0); + u_pud = pud_offset(u_p4d, 0); + + s_pgd += KERNEL_PGD_BOUNDARY; + u_pud += KERNEL_PGD_BOUNDARY; + + for (i = 0; i < PREALLOCATED_USER_PMDS; i++, u_pud++, s_pgd++) { + pmd_t *pmd = pmds[i]; + + memcpy(pmd, (pmd_t *)pgd_page_vaddr(*s_pgd), + sizeof(pmd_t) * PTRS_PER_PMD); + + pud_populate(mm, u_pud, pmd); + } + +} +#else +static void pgd_prepopulate_user_pmd(struct mm_struct *mm, + pgd_t *k_pgd, pmd_t *pmds[]) +{ +} +#endif /* * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also * assumes that pgd should be in one page. @@ -343,7 +398,8 @@ static inline pgd_t *_pgd_alloc(void) * We allocate one page for pgd. */ if (!SHARED_KERNEL_PMD) - return (pgd_t *)__get_free_page(PGALLOC_GFP); + return (pgd_t *)__get_free_pages(PGALLOC_GFP, + PGD_ALLOCATION_ORDER); /* * Now PAE kernel is not running as a Xen domain. We can allocate @@ -355,7 +411,7 @@ static inline pgd_t *_pgd_alloc(void) static inline void _pgd_free(pgd_t *pgd) { if (!SHARED_KERNEL_PMD) - free_page((unsigned long)pgd); + free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); else kmem_cache_free(pgd_cache, pgd); } @@ -375,6 +431,7 @@ static inline void _pgd_free(pgd_t *pgd) pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; + pmd_t *u_pmds[PREALLOCATED_USER_PMDS]; pmd_t *pmds[PREALLOCATED_PMDS]; pgd = _pgd_alloc(); @@ -384,12 +441,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm) mm->pgd = pgd; - if (preallocate_pmds(mm, pmds) != 0) + if (preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0) goto out_free_pgd; - if (paravirt_pgd_alloc(mm) != 0) + if (preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0) goto out_free_pmds; + if (paravirt_pgd_alloc(mm) != 0) + goto out_free_user_pmds; + /* * Make sure that pre-populating the pmds is atomic with * respect to anything walking the pgd_list, so that they @@ -399,13 +459,16 @@ pgd_t *pgd_alloc(struct mm_struct *mm) pgd_ctor(mm, pgd); pgd_prepopulate_pmd(mm, pgd, pmds); + pgd_prepopulate_user_pmd(mm, pgd, u_pmds); spin_unlock(&pgd_lock); return pgd; +out_free_user_pmds: + free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS); out_free_pmds: - free_pmds(mm, pmds); + free_pmds(mm, pmds, PREALLOCATED_PMDS); out_free_pgd: _pgd_free(pgd); out: diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 8d88d067b3d7..ef8db6ffc836 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -117,7 +117,7 @@ enable: setup_force_cpu_cap(X86_FEATURE_PTI); } -pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) +pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) { /* * Changes to the high (kernel) portion of the kernelmode page @@ -176,7 +176,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) if (pgd_none(*pgd)) { unsigned long new_p4d_page = __get_free_page(gfp); - if (!new_p4d_page) + if (WARN_ON_ONCE(!new_p4d_page)) return NULL; set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); @@ -195,13 +195,17 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - p4d_t *p4d = pti_user_pagetable_walk_p4d(address); + p4d_t *p4d; pud_t *pud; + p4d = pti_user_pagetable_walk_p4d(address); + if (!p4d) + return NULL; + BUILD_BUG_ON(p4d_large(*p4d) != 0); if (p4d_none(*p4d)) { unsigned long new_pud_page = __get_free_page(gfp); - if (!new_pud_page) + if (WARN_ON_ONCE(!new_pud_page)) return NULL; set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); @@ -215,7 +219,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) } if (pud_none(*pud)) { unsigned long new_pmd_page = __get_free_page(gfp); - if (!new_pmd_page) + if (WARN_ON_ONCE(!new_pmd_page)) return NULL; set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); @@ -237,9 +241,13 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); - pmd_t *pmd = pti_user_pagetable_walk_pmd(address); + pmd_t *pmd; pte_t *pte; + pmd = pti_user_pagetable_walk_pmd(address); + if (!pmd) + return NULL; + /* We can't do anything sensible if we hit a large mapping. */ if (pmd_large(*pmd)) { WARN_ON(1); @@ -297,6 +305,10 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) p4d_t *p4d; pud_t *pud; + /* Overflow check */ + if (addr < start) + break; + pgd = pgd_offset_k(addr); if (WARN_ON(pgd_none(*pgd))) return; @@ -344,6 +356,7 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) } } +#ifdef CONFIG_X86_64 /* * Clone a single p4d (i.e. a top-level entry on 4-level systems and a * next-level entry on 5-level systems. @@ -354,6 +367,9 @@ static void __init pti_clone_p4d(unsigned long addr) pgd_t *kernel_pgd; user_p4d = pti_user_pagetable_walk_p4d(addr); + if (!user_p4d) + return; + kernel_pgd = pgd_offset_k(addr); kernel_p4d = p4d_offset(kernel_pgd, addr); *user_p4d = *kernel_p4d; @@ -367,6 +383,25 @@ static void __init pti_clone_user_shared(void) pti_clone_p4d(CPU_ENTRY_AREA_BASE); } +#else /* CONFIG_X86_64 */ + +/* + * On 32 bit PAE systems with 1GB of Kernel address space there is only + * one pgd/p4d for the whole kernel. Cloning that would map the whole + * address space into the user page-tables, making PTI useless. So clone + * the page-table on the PMD level to prevent that. + */ +static void __init pti_clone_user_shared(void) +{ + unsigned long start, end; + + start = CPU_ENTRY_AREA_BASE; + end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES); + + pti_clone_pmds(start, end, 0); +} +#endif /* CONFIG_X86_64 */ + /* * Clone the ESPFIX P4D into the user space visible page table */ @@ -380,7 +415,7 @@ static void __init pti_setup_espfix64(void) /* * Clone the populated PMDs of the entry and irqentry text and force it RO. */ -static void __init pti_clone_entry_text(void) +static void pti_clone_entry_text(void) { pti_clone_pmds((unsigned long) __entry_text_start, (unsigned long) __irqentry_text_end, @@ -445,7 +480,7 @@ extern int set_memory_global(unsigned long addr, int numpages); * For some configurations, map all of kernel text into the user page * tables. This reduces TLB misses, especially on non-PCID systems. */ -void pti_clone_kernel_text(void) +static void pti_clone_kernel_text(void) { /* * rodata is part of the kernel image and is normally @@ -453,7 +488,7 @@ void pti_clone_kernel_text(void) * clone the areas past rodata, they might contain secrets. */ unsigned long start = PFN_ALIGN(_text); - unsigned long end_clone = (unsigned long)__end_rodata_hpage_align; + unsigned long end_clone = (unsigned long)__end_rodata_aligned; unsigned long end_global = PFN_ALIGN((unsigned long)__stop___ex_table); if (!pti_kernel_image_global_ok()) @@ -507,6 +542,28 @@ void __init pti_init(void) pr_info("enabled\n"); +#ifdef CONFIG_X86_32 + /* + * We check for X86_FEATURE_PCID here. But the init-code will + * clear the feature flag on 32 bit because the feature is not + * supported on 32 bit anyway. To print the warning we need to + * check with cpuid directly again. + */ + if (cpuid_ecx(0x1) && BIT(17)) { + /* Use printk to work around pr_fmt() */ + printk(KERN_WARNING "\n"); + printk(KERN_WARNING "************************************************************\n"); + printk(KERN_WARNING "** WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! **\n"); + printk(KERN_WARNING "** **\n"); + printk(KERN_WARNING "** You are using 32-bit PTI on a 64-bit PCID-capable CPU. **\n"); + printk(KERN_WARNING "** Your performance will increase dramatically if you **\n"); + printk(KERN_WARNING "** switch to a 64-bit kernel! **\n"); + printk(KERN_WARNING "** **\n"); + printk(KERN_WARNING "** WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! **\n"); + printk(KERN_WARNING "************************************************************\n"); + } +#endif + pti_clone_user_shared(); /* Undo all global bits from the init pagetables in head_64.S: */ @@ -516,3 +573,20 @@ void __init pti_init(void) pti_setup_espfix64(); pti_setup_vsyscall(); } + +/* + * Finalize the kernel mappings in the userspace page-table. Some of the + * mappings for the kernel image might have changed since pti_init() + * cloned them. This is because parts of the kernel image have been + * mapped RO and/or NX. These changes need to be cloned again to the + * userspace page-table. + */ +void pti_finalize(void) +{ + /* + * We need to clone everything (again) that maps parts of the + * kernel image. + */ + pti_clone_entry_text(); + pti_clone_kernel_text(); +} |