diff options
author | Hugh Dickins <hughd@google.com> | 2017-09-25 02:59:49 +0300 |
---|---|---|
committer | Ben Hutchings <ben@decadent.org.uk> | 2018-01-07 04:46:51 +0300 |
commit | c1d23a9da91c884a7cb13a5c238ac279f3635c58 (patch) | |
tree | 453aa0da0edf373d0aadd8f5a108032d1d488562 | |
parent | 2bf370c889e55dca0f673dd1ee688b242f2eb881 (diff) | |
download | linux-c1d23a9da91c884a7cb13a5c238ac279f3635c58.tar.xz |
kaiser: add "nokaiser" boot option, using ALTERNATIVE
Added "nokaiser" boot option: an early param like "noinvpcid".
Most places now check int kaiser_enabled (#defined 0 when not
CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S
and entry_64_compat.S are using the ALTERNATIVE technique, which
patches in the preferred instructions at runtime. That technique
is tied to x86 cpu features, so X86_FEATURE_KAISER fabricated
("" in its comment so "kaiser" not magicked into /proc/cpuinfo).
Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that,
but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when
nokaiser like when !CONFIG_KAISER, but not setting either when kaiser -
neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL
won't get set in some obscure corner, or something add PGE into CR4.
By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled,
all page table setup which uses pte_pfn() masks it out of the ptes.
It's slightly shameful that the same declaration versus definition of
kaiser_enabled appears in not one, not two, but in three header files
(asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h). I felt safer that way,
than with #including any of those in any of the others; and did not
feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes
them all, so we shall hear about it if they get out of synch.
Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER
from kaiser.c; removed the unused native_get_normal_pgd(); removed
the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some
comments. But more interestingly, set CR4.PSE in secondary_startup_64:
the manual is clear that it does not matter whether it's 0 or 1 when
4-level-pts are enabled, but I was distracted to find cr4 different on
BSP and auxiliaries - BSP alone was adding PSE, in init_memory_mapping().
(cherry picked from Change-Id: I8e5bec716944444359cbd19f6729311eff943e9a)
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
-rw-r--r-- | Documentation/kernel-parameters.txt | 2 | ||||
-rw-r--r-- | arch/x86/ia32/ia32entry.S | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/cpufeature.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/kaiser.h | 27 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 19 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 13 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_types.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 35 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 30 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 6 | ||||
-rw-r--r-- | arch/x86/kernel/espfix_64.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/head_64.S | 4 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 10 | ||||
-rw-r--r-- | arch/x86/mm/kaiser.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 4 |
17 files changed, 139 insertions, 59 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4f43fd315ecd..28c23bdc1af4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1803,6 +1803,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nojitter [IA-64] Disables jitter checking for ITC timers. + nokaiser [X86-64] Disable KAISER isolation of kernel from user. + no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 7eb0d4792800..130db0702d9f 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -13,6 +13,8 @@ #include <asm/thread_info.h> #include <asm/segment.h> #include <asm/pgtable_types.h> +#include <asm/alternative-asm.h> +#include <asm/cpufeature.h> #include <asm/kaiser.h> #include <asm/irqflags.h> #include <linux/linkage.h> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index e65fb1220573..cb967c9db459 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -178,6 +178,9 @@ #define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ #define X86_FEATURE_INVPCID_SINGLE (7*32+ 8) /* Effectively INVPCID && CR4.PCIDE=1 */ +/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ +#define X86_FEATURE_KAISER ( 7*32+31) /* "" CONFIG_KAISER w/o nokaiser */ + /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ #define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */ diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h index 6f4c8ef46881..2fe06d06241c 100644 --- a/arch/x86/include/asm/kaiser.h +++ b/arch/x86/include/asm/kaiser.h @@ -46,28 +46,33 @@ movq \reg, %cr3 .endm .macro SWITCH_KERNEL_CR3 -pushq %rax +ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER _SWITCH_TO_KERNEL_CR3 %rax popq %rax +8: .endm .macro SWITCH_USER_CR3 -pushq %rax +ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER _SWITCH_TO_USER_CR3 %rax %al popq %rax +8: .endm .macro SWITCH_KERNEL_CR3_NO_STACK -movq %rax, PER_CPU_VAR(unsafe_stack_register_backup) +ALTERNATIVE "jmp 8f", \ + __stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \ + X86_FEATURE_KAISER _SWITCH_TO_KERNEL_CR3 %rax movq PER_CPU_VAR(unsafe_stack_register_backup), %rax +8: .endm #else /* CONFIG_KAISER */ -.macro SWITCH_KERNEL_CR3 reg +.macro SWITCH_KERNEL_CR3 .endm -.macro SWITCH_USER_CR3 reg regb +.macro SWITCH_USER_CR3 .endm .macro SWITCH_KERNEL_CR3_NO_STACK .endm @@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user); extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[]; +extern int kaiser_enabled; +#else +#define kaiser_enabled 0 +#endif /* CONFIG_KAISER */ + +/* + * Kaiser function prototypes are needed even when CONFIG_KAISER is not set, + * so as to build with tests on kaiser_enabled instead of #ifdefs. + */ + /** * kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping * @addr: the start address of the range @@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsigned long start, unsigned long size); */ extern void kaiser_init(void); -#endif /* CONFIG_KAISER */ - #endif /* __ASSEMBLY */ #endif /* _ASM_X86_KAISER_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index b1c8b8d3b02a..a467c19e64b5 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -17,6 +17,11 @@ #ifndef __ASSEMBLY__ #include <asm/x86_init.h> +#ifdef CONFIG_KAISER +extern int kaiser_enabled; +#else +#define kaiser_enabled 0 +#endif /* * ZERO_PAGE is a global shared page that is always zero: used @@ -577,7 +582,7 @@ static inline int pgd_bad(pgd_t pgd) * page table by accident; it will fault on the first * instruction it tries to run. See native_set_pgd(). */ - if (IS_ENABLED(CONFIG_KAISER)) + if (kaiser_enabled) ignore_flags |= _PAGE_NX; return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE; @@ -780,12 +785,14 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { - memcpy(dst, src, count * sizeof(pgd_t)); + memcpy(dst, src, count * sizeof(pgd_t)); #ifdef CONFIG_KAISER - /* Clone the shadow pgd part as well */ - memcpy(native_get_shadow_pgd(dst), - native_get_shadow_pgd(src), - count * sizeof(pgd_t)); + if (kaiser_enabled) { + /* Clone the shadow pgd part as well */ + memcpy(native_get_shadow_pgd(dst), + native_get_shadow_pgd(src), + count * sizeof(pgd_t)); + } #endif } diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index a3bf3de9893b..a4a2b700a4e6 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -110,13 +110,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd); static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) { +#ifdef CONFIG_DEBUG_VM + /* linux/mmdebug.h may not have been included at this point */ + BUG_ON(!kaiser_enabled); +#endif return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE); } - -static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) -{ - return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE); -} #else static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) { @@ -126,10 +125,6 @@ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp) { return NULL; } -static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp) -{ - return pgdp; -} #endif /* CONFIG_KAISER */ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 6e1315068a62..3a3c6d014696 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -39,11 +39,7 @@ #define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED) #define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY) #define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE) -#ifdef CONFIG_KAISER -#define _PAGE_GLOBAL (_AT(pteval_t, 0)) -#else #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) -#endif #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 288195901c8a..616eca18ed31 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -69,9 +69,11 @@ static inline void invpcid_flush_all_nonglobals(void) * to avoid the need for asm/kaiser.h in unexpected places. */ #ifdef CONFIG_KAISER +extern int kaiser_enabled; extern void kaiser_setup_pcid(void); extern void kaiser_flush_tlb_on_return_to_user(void); #else +#define kaiser_enabled 0 static inline void kaiser_setup_pcid(void) { } @@ -96,7 +98,7 @@ static inline void __native_flush_tlb(void) * back: */ preempt_disable(); - if (this_cpu_has(X86_FEATURE_PCID)) + if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) kaiser_flush_tlb_on_return_to_user(); native_write_cr3(native_read_cr3()); preempt_enable(); @@ -104,13 +106,15 @@ static inline void __native_flush_tlb(void) static inline void __native_flush_tlb_global(void) { -#ifdef CONFIG_KAISER - /* Globals are not used at all */ - __native_flush_tlb(); -#else unsigned long flags; unsigned long cr4; + if (kaiser_enabled) { + /* Globals are not used at all */ + __native_flush_tlb(); + return; + } + if (this_cpu_has(X86_FEATURE_INVPCID)) { /* * Using INVPCID is considerably faster than a pair of writes @@ -130,13 +134,16 @@ static inline void __native_flush_tlb_global(void) raw_local_irq_save(flags); cr4 = native_read_cr4(); - /* clear PGE */ - native_write_cr4(cr4 & ~X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); + if (cr4 & X86_CR4_PGE) { + /* clear PGE and flush TLB of all entries */ + native_write_cr4(cr4 & ~X86_CR4_PGE); + /* restore PGE as it was before */ + native_write_cr4(cr4); + } else { + native_write_cr3(native_read_cr3()); + } raw_local_irq_restore(flags); -#endif } static inline void __native_flush_tlb_single(unsigned long addr) @@ -151,7 +158,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) */ if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) { - if (this_cpu_has(X86_FEATURE_PCID)) + if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) kaiser_flush_tlb_on_return_to_user(); asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); return; @@ -166,9 +173,9 @@ static inline void __native_flush_tlb_single(unsigned long addr) * Make sure to do only a single invpcid when KAISER is * disabled and we have only a single ASID. */ - if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER) - invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); - invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); + if (kaiser_enabled) + invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr); + invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr); } static inline void __flush_tlb_all(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b567c89fc628..dbad6b1ee60e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -171,6 +171,20 @@ static int __init x86_pcid_setup(char *s) return 1; } __setup("nopcid", x86_pcid_setup); + +static int __init x86_nokaiser_setup(char *s) +{ + /* nokaiser doesn't accept parameters */ + if (s) + return -EINVAL; +#ifdef CONFIG_KAISER + kaiser_enabled = 0; + setup_clear_cpu_cap(X86_FEATURE_KAISER); + pr_info("nokaiser: KAISER feature disabled\n"); +#endif + return 0; +} +early_param("nokaiser", x86_nokaiser_setup); #endif static int __init x86_noinvpcid_setup(char *s) @@ -313,7 +327,8 @@ static __cpuinit void setup_smep(struct cpuinfo_x86 *c) static void setup_pcid(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PCID)) { - if (cpu_has(c, X86_FEATURE_PGE) && IS_ENABLED(CONFIG_X86_64)) { + if (IS_ENABLED(CONFIG_X86_64) && + (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled)) { /* * Regardless of whether PCID is enumerated, the * SDM says that it can't be enabled in 32-bit mode. @@ -680,6 +695,10 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) c->x86_power = cpuid_edx(0x80000007); init_scattered_cpuid_features(c); +#ifdef CONFIG_KAISER + if (kaiser_enabled) + set_cpu_cap(c, X86_FEATURE_KAISER); +#endif } static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -1229,6 +1248,15 @@ void __cpuinit cpu_init(void) int cpu; int i; + if (!kaiser_enabled) { + /* + * secondary_startup_64() deferred setting PGE in cr4: + * init_memory_mapping() sets it on the boot cpu, + * but it needs to be set on each secondary cpu. + */ + set_in_cr4(X86_CR4_PGE); + } + cpu = stack_smp_processor_id(); t = &per_cpu(init_tss, cpu); oist = &per_cpu(orig_ist, cpu); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3a4356a2f156..eb6c31b6069c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -56,6 +56,8 @@ #include <asm/ftrace.h> #include <asm/percpu.h> #include <asm/pgtable_types.h> +#include <asm/alternative-asm.h> +#include <asm/cpufeature.h> #include <asm/kaiser.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ @@ -404,7 +406,7 @@ ENTRY(save_paranoid) * unconditionally, but we need to find out whether the reverse * should be done on return (conveyed to paranoid_exit in %ebx). */ - movq %cr3, %rax + ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER testl $KAISER_SHADOW_PGD_OFFSET, %eax jz 2f orl $2, %ebx @@ -1428,6 +1430,7 @@ paranoid_kernel: movq %r12, %rbx /* restore after paranoid_userspace */ TRACE_IRQS_IRETQ 0 #ifdef CONFIG_KAISER + /* No ALTERNATIVE for X86_FEATURE_KAISER: save_paranoid sets %ebx */ testl $2, %ebx /* SWITCH_USER_CR3 needed? */ jz paranoid_exit_no_switch SWITCH_USER_CR3 @@ -1597,6 +1600,7 @@ ENTRY(nmi) nmi_kernel: movq %r12, %rbx /* restore after nmi_userspace */ #ifdef CONFIG_KAISER + /* No ALTERNATIVE for X86_FEATURE_KAISER: save_paranoid sets %ebx */ testl $2, %ebx /* SWITCH_USER_CR3 needed? */ jz nmi_exit_no_switch SWITCH_USER_CR3 diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 14cd73b0e634..a1944faa739c 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -135,9 +135,10 @@ void __init init_espfix_bsp(void) * area to ensure it is mapped into the shadow user page * tables. */ - if (IS_ENABLED(CONFIG_KAISER)) + if (kaiser_enabled) { set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page))); + } /* Randomize the locations */ init_espfix_random(); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 6e697ac3fb54..28aef29c42de 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -166,8 +166,8 @@ ENTRY(secondary_startup_64) /* Sanitize CPU configuration */ call verify_cpu - /* Enable PAE mode and PGE */ - movl $(X86_CR4_PAE | X86_CR4_PGE), %eax + /* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */ + movl $(X86_CR4_PAE | X86_CR4_PSE), %eax movq %rax, %cr4 /* Setup early boot stage 4 level pagetables. */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index dba71b25a546..f897b97ab897 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -161,7 +161,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, set_in_cr4(X86_CR4_PSE); /* Enable PGE if available */ - if (cpu_has_pge) { + if (cpu_has_pge && !kaiser_enabled) { set_in_cr4(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 44b93da18401..1f1b8ed9b06f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -312,6 +312,16 @@ void __init cleanup_highmap(void) continue; if (vaddr < (unsigned long) _text || vaddr > end) set_pmd(pmd, __pmd(0)); + else if (kaiser_enabled) { + /* + * level2_kernel_pgt is initialized with _PAGE_GLOBAL: + * clear that now. This is not important, so long as + * CR4.PGE remains clear, but it removes an anomaly. + * Physical mapping setup below avoids _PAGE_GLOBAL + * by use of massage_pgprot() inside pfn_pte() etc. + */ + set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL)); + } } } diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c index fb16d79fc07a..bef8a415048b 100644 --- a/arch/x86/mm/kaiser.c +++ b/arch/x86/mm/kaiser.c @@ -21,7 +21,9 @@ extern struct mm_struct init_mm; #include <asm/pgalloc.h> #include <asm/desc.h> -#ifdef CONFIG_KAISER +int kaiser_enabled __read_mostly = 1; +EXPORT_SYMBOL(kaiser_enabled); /* for inlined TLB flush functions */ + DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup); /* @@ -165,8 +167,8 @@ static pte_t *kaiser_pagetable_walk(unsigned long address) return pte_offset_kernel(pmd, address); } -int kaiser_add_user_map(const void *__start_addr, unsigned long size, - unsigned long flags) +static int kaiser_add_user_map(const void *__start_addr, unsigned long size, + unsigned long flags) { int ret = 0; pte_t *pte; @@ -175,6 +177,15 @@ int kaiser_add_user_map(const void *__start_addr, unsigned long size, unsigned long end_addr = PAGE_ALIGN(start_addr + size); unsigned long target_address; + /* + * It is convenient for callers to pass in __PAGE_KERNEL etc, + * and there is no actual harm from setting _PAGE_GLOBAL, so + * long as CR4.PGE is not set. But it is nonetheless troubling + * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser" + * requires that not to be #defined to 0): so mask it off here. + */ + flags &= ~_PAGE_GLOBAL; + if (flags & _PAGE_USER) BUG_ON(address < FIXADDR_START || end_addr >= FIXADDR_TOP); @@ -264,6 +275,8 @@ void __init kaiser_init(void) { int cpu; + if (!kaiser_enabled) + return; kaiser_init_all_pgds(); for_each_possible_cpu(cpu) { @@ -303,6 +316,8 @@ void __init kaiser_init(void) /* Add a mapping to the shadow mapping, and synchronize the mappings */ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags) { + if (!kaiser_enabled) + return 0; return kaiser_add_user_map((const void *)addr, size, flags); } @@ -312,6 +327,8 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size) unsigned long addr; pte_t *pte; + if (!kaiser_enabled) + return; for (addr = start; addr < end; addr += PAGE_SIZE) { pte = kaiser_pagetable_walk(addr); if (pte) @@ -333,6 +350,8 @@ static inline bool is_userspace_pgd(pgd_t *pgdp) pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd) { + if (!kaiser_enabled) + return pgd; /* * Do we need to also populate the shadow pgd? Check _PAGE_USER to * skip cases like kexec and EFI which make temporary low mappings. @@ -389,4 +408,3 @@ void kaiser_flush_tlb_on_return_to_user(void) X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET); } EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user); -#endif /* CONFIG_KAISER */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 73285602c93f..0b246ef0fa48 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -253,16 +253,12 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) } } -#ifdef CONFIG_KAISER /* - * Instead of one pmd, we aquire two pmds. Being order-1, it is + * Instead of one pgd, Kaiser acquires two pgds. Being order-1, it is * both 8k in size and 8k-aligned. That lets us just flip bit 12 * in a pointer to swap between the two 4k halves. */ -#define PGD_ALLOCATION_ORDER 1 -#else -#define PGD_ALLOCATION_ORDER 0 -#endif +#define PGD_ALLOCATION_ORDER kaiser_enabled static inline pgd_t *_pgd_alloc(void) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 4078e3092b16..ebfae14eb40d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -21,8 +21,7 @@ static void load_new_mm_cr3(pgd_t *pgdir) { unsigned long new_mm_cr3 = __pa(pgdir); -#ifdef CONFIG_KAISER - if (this_cpu_has(X86_FEATURE_PCID)) { + if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) { /* * We reuse the same PCID for different tasks, so we must * flush all the entries for the PCID out when we change tasks. @@ -39,7 +38,6 @@ static void load_new_mm_cr3(pgd_t *pgdir) new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH; kaiser_flush_tlb_on_return_to_user(); } -#endif /* CONFIG_KAISER */ /* * Caution: many callers of this function expect |