diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 18 | ||||
-rw-r--r-- | arch/x86/kernel/acpi/wakeup_64.S | 3 | ||||
-rw-r--r-- | arch/x86/kernel/amd_nb.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/aperture_64.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/apic/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 44 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/powerflags.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/dumpstack.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 63 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 185 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 59 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 37 | ||||
-rw-r--r-- | arch/x86/kernel/ldt.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/stacktrace.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/tboot.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 15 |
23 files changed, 424 insertions, 100 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b1b78ffe01d0..616ebd22ef9a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -16,9 +16,21 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif -KASAN_SANITIZE_head$(BITS).o := n -KASAN_SANITIZE_dumpstack.o := n -KASAN_SANITIZE_dumpstack_$(BITS).o := n +KASAN_SANITIZE_head$(BITS).o := n +KASAN_SANITIZE_dumpstack.o := n +KASAN_SANITIZE_dumpstack_$(BITS).o := n +KASAN_SANITIZE_stacktrace.o := n + +OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_test_nx.o := y + +# If instrumentation of this dir is enabled, boot hangs during first second. +# Probably could be more selective here, but note that files related to irqs, +# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to +# non-deterministic coverage. +KCOV_INSTRUMENT := n CFLAGS_irq.o := -I$(src)/../include/asm/trace diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 8c35df468104..169963f471bb 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -5,6 +5,7 @@ #include <asm/page_types.h> #include <asm/msr.h> #include <asm/asm-offsets.h> +#include <asm/frame.h> # Copyright 2003 Pavel Machek <pavel@suse.cz>, distribute under GPLv2 @@ -39,6 +40,7 @@ bogus_64_magic: jmp bogus_64_magic ENTRY(do_suspend_lowlevel) + FRAME_BEGIN subq $8, %rsp xorl %eax, %eax call save_processor_state @@ -109,6 +111,7 @@ ENTRY(do_suspend_lowlevel) xorl %eax, %eax addq $8, %rsp + FRAME_END jmp restore_processor_state ENDPROC(do_suspend_lowlevel) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 29fa475ec518..a147e676fc7b 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -170,15 +170,13 @@ int amd_get_subcaches(int cpu) { struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; unsigned int mask; - int cuid; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) return 0; pci_read_config_dword(link, 0x1d4, &mask); - cuid = cpu_data(cpu).compute_unit_id; - return (mask >> (4 * cuid)) & 0xf; + return (mask >> (4 * cpu_data(cpu).cpu_core_id)) & 0xf; } int amd_set_subcaches(int cpu, unsigned long mask) @@ -204,7 +202,7 @@ int amd_set_subcaches(int cpu, unsigned long mask) pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); } - cuid = cpu_data(cpu).compute_unit_id; + cuid = cpu_data(cpu).cpu_core_id; mask <<= 4 * cuid; mask |= (0xf ^ (1 << cuid)) << 26; diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 6e85f713641d..0a2bb1f62e72 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -227,19 +227,11 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp) return 0; } -static int gart_fix_e820 __initdata = 1; +static bool gart_fix_e820 __initdata = true; static int __init parse_gart_mem(char *p) { - if (!p) - return -EINVAL; - - if (!strncmp(p, "off", 3)) - gart_fix_e820 = 0; - else if (!strncmp(p, "on", 2)) - gart_fix_e820 = 1; - - return 0; + return kstrtobool(p, &gart_fix_e820); } early_param("gart_fix_e820", parse_gart_mem); diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 8bb12ddc5db8..8e63ebdcbd0b 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,6 +2,10 @@ # Makefile for local APIC drivers and for the IO-APIC code # +# Leads to non-deterministic coverage that is not a function of syscall inputs. +# In particualr, smp_apic_timer_interrupt() is called in random places. +KCOV_INSTRUMENT := n + obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o obj-y += hw_nmi.o diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 0d373d7affc8..4a8697f7d4ef 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -8,6 +8,10 @@ CFLAGS_REMOVE_common.o = -pg CFLAGS_REMOVE_perf_event.o = -pg endif +# If these files are instrumented, boot hangs during the first second. +KCOV_INSTRUMENT_common.o := n +KCOV_INSTRUMENT_perf_event.o := n + # Make sure load_percpu_segment has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_common.o := $(nostackp) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 68fe8d3bed56..7b76eb67a9b3 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -75,7 +75,10 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) */ extern __visible void vide(void); -__asm__(".globl vide\n\t.align 4\nvide: ret"); +__asm__(".globl vide\n" + ".type vide, @function\n" + ".align 4\n" + "vide: ret\n"); static void init_amd_k5(struct cpuinfo_x86 *c) { @@ -297,7 +300,6 @@ static int nearby_node(int apicid) #ifdef CONFIG_SMP static void amd_get_topology(struct cpuinfo_x86 *c) { - u32 cores_per_cu = 1; u8 node_id; int cpu = smp_processor_id(); @@ -310,8 +312,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get compute unit information */ smp_num_siblings = ((ebx >> 8) & 3) + 1; - c->compute_unit_id = ebx & 0xff; - cores_per_cu += ((ebx >> 8) & 3); + c->x86_max_cores /= smp_num_siblings; + c->cpu_core_id = ebx & 0xff; } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; @@ -322,19 +324,16 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* fixup multi-node processor information */ if (nodes_per_socket > 1) { - u32 cores_per_node; u32 cus_per_node; set_cpu_cap(c, X86_FEATURE_AMD_DCM); - cores_per_node = c->x86_max_cores / nodes_per_socket; - cus_per_node = cores_per_node / cores_per_cu; + cus_per_node = c->x86_max_cores / nodes_per_socket; /* store NodeID, use llc_shared_map to store sibling info */ per_cpu(cpu_llc_id, cpu) = node_id; /* core id has to be in the [0 .. cores_per_node - 1] range */ - c->cpu_core_id %= cores_per_node; - c->compute_unit_id %= cus_per_node; + c->cpu_core_id %= cus_per_node; } } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e601c1286e29..8394b3d1f94f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -304,6 +304,48 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) } /* + * Protection Keys are not available in 32-bit mode. + */ +static bool pku_disabled; + +static __always_inline void setup_pku(struct cpuinfo_x86 *c) +{ + if (!cpu_has(c, X86_FEATURE_PKU)) + return; + if (pku_disabled) + return; + + cr4_set_bits(X86_CR4_PKE); + /* + * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE + * cpuid bit to be set. We need to ensure that we + * update that bit in this CPU's "cpu_info". + */ + get_cpu_cap(c); +} + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +static __init int setup_disable_pku(char *arg) +{ + /* + * Do not clear the X86_FEATURE_PKU bit. All of the + * runtime checks are against OSPKE so clearing the + * bit does nothing. + * + * This way, we will see "pku" in cpuinfo, but not + * "ospke", which is exactly what we want. It shows + * that the CPU has PKU, but the OS has not enabled it. + * This happens to be exactly how a system would look + * if we disabled the config option. + */ + pr_info("x86: 'nopku' specified, disabling Memory Protection Keys\n"); + pku_disabled = true; + return 1; +} +__setup("nopku", setup_disable_pku); +#endif /* CONFIG_X86_64 */ + +/* * Some CPU features depend on higher CPUID levels, which may not always * be available due to CPUID level capping or broken virtualization * software. Add those features to this table to auto-disable them. @@ -625,6 +667,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006); + c->x86_capability[CPUID_7_ECX] = ecx; } /* Extended state features: level 0x0000000d */ @@ -984,6 +1027,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) init_hypervisor(c); x86_init_rdrand(c); x86_init_cache_qos(c); + setup_pku(c); /* * Clear/Set all flags overridden by options, need do it diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 0b445c2ff735..ac780cad3b86 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -384,6 +384,9 @@ static void intel_thermal_interrupt(void) { __u64 msr_val; + if (static_cpu_has(X86_FEATURE_HWP)) + wrmsrl_safe(MSR_HWP_STATUS, 0); + rdmsrl(MSR_IA32_THERM_STATUS, msr_val); /* Check for violation of core thermal thresholds*/ diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c index 31f0f335ed22..1dd8294fd730 100644 --- a/arch/x86/kernel/cpu/powerflags.c +++ b/arch/x86/kernel/cpu/powerflags.c @@ -18,4 +18,6 @@ const char *const x86_power_flags[32] = { "", /* tsc invariant mapped to constant_tsc */ "cpb", /* core performance boost */ "eff_freq_ro", /* Readonly aperf/mperf */ + "proc_feedback", /* processor feedback interface */ + "acc_power", /* accumulated power mechanism */ }; diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 9c30acfadae2..8efa57a5f29e 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -135,7 +135,8 @@ print_context_stack_bp(struct thread_info *tinfo, if (!__kernel_text_address(addr)) break; - ops->address(data, addr, 1); + if (ops->address(data, addr, 1)) + break; frame = frame->next_frame; ret_addr = &frame->return_address; print_ftrace_graph_addr(addr, data, ops, tinfo, graph); @@ -154,10 +155,11 @@ static int print_trace_stack(void *data, char *name) /* * Print one address/symbol entries per line. */ -static void print_trace_address(void *data, unsigned long addr, int reliable) +static int print_trace_address(void *data, unsigned long addr, int reliable) { touch_nmi_watchdog(); printk_stack_address(addr, reliable, data); + return 0; } static const struct stacktrace_ops print_trace_ops = { @@ -265,9 +267,8 @@ int __die(const char *str, struct pt_regs *regs, long err) #ifdef CONFIG_SMP printk("SMP "); #endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); -#endif + if (debug_pagealloc_enabled()) + printk("DEBUG_PAGEALLOC "); #ifdef CONFIG_KASAN printk("KASAN"); #endif diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 0b1b9abd4d5f..8e37cc8a539a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -354,6 +354,69 @@ void fpu__activate_fpstate_write(struct fpu *fpu) } /* + * This function must be called before we write the current + * task's fpstate. + * + * This call gets the current FPU register state and moves + * it in to the 'fpstate'. Preemption is disabled so that + * no writes to the 'fpstate' can occur from context + * swiches. + * + * Must be followed by a fpu__current_fpstate_write_end(). + */ +void fpu__current_fpstate_write_begin(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + /* + * Ensure that the context-switching code does not write + * over the fpstate while we are doing our update. + */ + preempt_disable(); + + /* + * Move the fpregs in to the fpu's 'fpstate'. + */ + fpu__activate_fpstate_read(fpu); + + /* + * The caller is about to write to 'fpu'. Ensure that no + * CPU thinks that its fpregs match the fpstate. This + * ensures we will not be lazy and skip a XRSTOR in the + * future. + */ + fpu->last_cpu = -1; +} + +/* + * This function must be paired with fpu__current_fpstate_write_begin() + * + * This will ensure that the modified fpstate gets placed back in + * the fpregs if necessary. + * + * Note: This function may be called whether or not an _actual_ + * write to the fpstate occurred. + */ +void fpu__current_fpstate_write_end(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + /* + * 'fpu' now has an updated copy of the state, but the + * registers may still be out of date. Update them with + * an XRSTOR if they are active. + */ + if (fpregs_active()) + copy_kernel_to_fpregs(&fpu->state); + + /* + * Our update is done and the fpregs/fpstate are in sync + * if necessary. Context switches can happen again. + */ + preempt_enable(); +} + +/* * 'fpu__restore()' is called to copy FPU registers from * the FPU fpstate to the live hw registers and to activate * access to the hardware registers, so that FPU instructions diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 6e8354f5a593..b48ef35b28d4 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -5,6 +5,7 @@ */ #include <linux/compat.h> #include <linux/cpu.h> +#include <linux/pkeys.h> #include <asm/fpu/api.h> #include <asm/fpu/internal.h> @@ -13,6 +14,11 @@ #include <asm/tlbflush.h> +/* + * Although we spell it out in here, the Processor Trace + * xfeature is completely unused. We use other mechanisms + * to save/restore PT state in Linux. + */ static const char *xfeature_names[] = { "x87 floating point registers" , @@ -23,6 +29,8 @@ static const char *xfeature_names[] = "AVX-512 opmask" , "AVX-512 Hi256" , "AVX-512 ZMM_Hi256" , + "Processor Trace (unused)" , + "Protection Keys User registers", "unknown xstate feature" , }; @@ -56,6 +64,7 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_AVX512VL); setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_XGETBV1); + setup_clear_cpu_cap(X86_FEATURE_PKU); } /* @@ -234,7 +243,7 @@ static void __init print_xstate_feature(u64 xstate_mask) const char *feature_name; if (cpu_has_xfeatures(xstate_mask, &feature_name)) - pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name); + pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name); } /* @@ -250,6 +259,7 @@ static void __init print_xstate_features(void) print_xstate_feature(XFEATURE_MASK_OPMASK); print_xstate_feature(XFEATURE_MASK_ZMM_Hi256); print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); + print_xstate_feature(XFEATURE_MASK_PKRU); } /* @@ -466,6 +476,7 @@ static void check_xstate_against_struct(int nr) XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state); XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state); XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state); + XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state); /* * Make *SURE* to add any feature numbers in below if @@ -473,7 +484,8 @@ static void check_xstate_against_struct(int nr) * numbers. */ if ((nr < XFEATURE_YMM) || - (nr >= XFEATURE_MAX)) { + (nr >= XFEATURE_MAX) || + (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) { WARN_ONCE(1, "no structure for xstate: %d\n", nr); XSTATE_WARN_ON(1); } @@ -671,6 +683,19 @@ void fpu__resume_cpu(void) } /* + * Given an xstate feature mask, calculate where in the xsave + * buffer the state is. Callers should ensure that the buffer + * is valid. + * + * Note: does not work for compacted buffers. + */ +void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) +{ + int feature_nr = fls64(xstate_feature_mask) - 1; + + return (void *)xsave + xstate_comp_offsets[feature_nr]; +} +/* * Given the xsave area and a state inside, this function returns the * address of the state. * @@ -690,7 +715,6 @@ void fpu__resume_cpu(void) */ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) { - int feature_nr = fls64(xstate_feature) - 1; /* * Do we even *have* xsave state? */ @@ -718,7 +742,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) if (!(xsave->header.xfeatures & xstate_feature)) return NULL; - return (void *)xsave + xstate_comp_offsets[feature_nr]; + return __raw_xsave_addr(xsave, xstate_feature); } EXPORT_SYMBOL_GPL(get_xsave_addr); @@ -753,3 +777,156 @@ const void *get_xsave_field_ptr(int xsave_state) return get_xsave_addr(&fpu->state.xsave, xsave_state); } + + +/* + * Set xfeatures (aka XSTATE_BV) bit for a feature that we want + * to take out of its "init state". This will ensure that an + * XRSTOR actually restores the state. + */ +static void fpu__xfeature_set_non_init(struct xregs_state *xsave, + int xstate_feature_mask) +{ + xsave->header.xfeatures |= xstate_feature_mask; +} + +/* + * This function is safe to call whether the FPU is in use or not. + * + * Note that this only works on the current task. + * + * Inputs: + * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, + * XFEATURE_MASK_SSE, etc...) + * @xsave_state_ptr: a pointer to a copy of the state that you would + * like written in to the current task's FPU xsave state. This pointer + * must not be located in the current tasks's xsave area. + * Output: + * address of the state in the xsave area or NULL if the state + * is not present or is in its 'init state'. + */ +static void fpu__xfeature_set_state(int xstate_feature_mask, + void *xstate_feature_src, size_t len) +{ + struct xregs_state *xsave = ¤t->thread.fpu.state.xsave; + struct fpu *fpu = ¤t->thread.fpu; + void *dst; + + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { + WARN_ONCE(1, "%s() attempted with no xsave support", __func__); + return; + } + + /* + * Tell the FPU code that we need the FPU state to be in + * 'fpu' (not in the registers), and that we need it to + * be stable while we write to it. + */ + fpu__current_fpstate_write_begin(); + + /* + * This method *WILL* *NOT* work for compact-format + * buffers. If the 'xstate_feature_mask' is unset in + * xcomp_bv then we may need to move other feature state + * "up" in the buffer. + */ + if (xsave->header.xcomp_bv & xstate_feature_mask) { + WARN_ON_ONCE(1); + goto out; + } + + /* find the location in the xsave buffer of the desired state */ + dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask); + + /* + * Make sure that the pointer being passed in did not + * come from the xsave buffer itself. + */ + WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself"); + + /* put the caller-provided data in the location */ + memcpy(dst, xstate_feature_src, len); + + /* + * Mark the xfeature so that the CPU knows there is state + * in the buffer now. + */ + fpu__xfeature_set_non_init(xsave, xstate_feature_mask); +out: + /* + * We are done writing to the 'fpu'. Reenable preeption + * and (possibly) move the fpstate back in to the fpregs. + */ + fpu__current_fpstate_write_end(); +} + +#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) +#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) + +/* + * This will go out and modify the XSAVE buffer so that PKRU is + * set to a particular state for access to 'pkey'. + * + * PKRU state does affect kernel access to user memory. We do + * not modfiy PKRU *itself* here, only the XSAVE state that will + * be restored in to PKRU when we return back to userspace. + */ +int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val) +{ + struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; + struct pkru_state *old_pkru_state; + struct pkru_state new_pkru_state; + int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); + u32 new_pkru_bits = 0; + + /* + * This check implies XSAVE support. OSPKE only gets + * set if we enable XSAVE and we enable PKU in XCR0. + */ + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return -EINVAL; + + /* Set the bits we need in PKRU */ + if (init_val & PKEY_DISABLE_ACCESS) + new_pkru_bits |= PKRU_AD_BIT; + if (init_val & PKEY_DISABLE_WRITE) + new_pkru_bits |= PKRU_WD_BIT; + + /* Shift the bits in to the correct place in PKRU for pkey. */ + new_pkru_bits <<= pkey_shift; + + /* Locate old copy of the state in the xsave buffer */ + old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); + + /* + * When state is not in the buffer, it is in the init + * state, set it manually. Otherwise, copy out the old + * state. + */ + if (!old_pkru_state) + new_pkru_state.pkru = 0; + else + new_pkru_state.pkru = old_pkru_state->pkru; + + /* mask off any old bits in place */ + new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); + /* Set the newly-requested bits */ + new_pkru_state.pkru |= new_pkru_bits; + + /* + * We could theoretically live without zeroing pkru.pad. + * The current XSAVE feature state definition says that + * only bytes 0->3 are used. But we do not want to + * chance leaking kernel stack out to userspace in case a + * memcpy() of the whole xsave buffer was done. + * + * They're in the same cacheline anyway. + */ + new_pkru_state.pad = 0; + + fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, + sizeof(new_pkru_state)); + + return 0; +} diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 702547ce33c9..d036cfb4495d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -1,5 +1,5 @@ /* - * Code for replacing ftrace calls with jumps. + * Dynamic function tracing support. * * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> * diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 0f05deeff5ce..ae703acb85c1 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -49,6 +49,7 @@ #include <linux/kdebug.h> #include <linux/kallsyms.h> #include <linux/ftrace.h> +#include <linux/frame.h> #include <asm/cacheflush.h> #include <asm/desc.h> @@ -671,39 +672,39 @@ NOKPROBE_SYMBOL(kprobe_int3_handler); * When a retprobed function returns, this code saves registers and * calls trampoline_handler() runs, which calls the kretprobe's handler. */ -static void __used kretprobe_trampoline_holder(void) -{ - asm volatile ( - ".global kretprobe_trampoline\n" - "kretprobe_trampoline: \n" +asm( + ".global kretprobe_trampoline\n" + ".type kretprobe_trampoline, @function\n" + "kretprobe_trampoline:\n" #ifdef CONFIG_X86_64 - /* We don't bother saving the ss register */ - " pushq %rsp\n" - " pushfq\n" - SAVE_REGS_STRING - " movq %rsp, %rdi\n" - " call trampoline_handler\n" - /* Replace saved sp with true return address. */ - " movq %rax, 152(%rsp)\n" - RESTORE_REGS_STRING - " popfq\n" + /* We don't bother saving the ss register */ + " pushq %rsp\n" + " pushfq\n" + SAVE_REGS_STRING + " movq %rsp, %rdi\n" + " call trampoline_handler\n" + /* Replace saved sp with true return address. */ + " movq %rax, 152(%rsp)\n" + RESTORE_REGS_STRING + " popfq\n" #else - " pushf\n" - SAVE_REGS_STRING - " movl %esp, %eax\n" - " call trampoline_handler\n" - /* Move flags to cs */ - " movl 56(%esp), %edx\n" - " movl %edx, 52(%esp)\n" - /* Replace saved flags with true return address. */ - " movl %eax, 56(%esp)\n" - RESTORE_REGS_STRING - " popf\n" + " pushf\n" + SAVE_REGS_STRING + " movl %esp, %eax\n" + " call trampoline_handler\n" + /* Move flags to cs */ + " movl 56(%esp), %edx\n" + " movl %edx, 52(%esp)\n" + /* Replace saved flags with true return address. */ + " movl %eax, 56(%esp)\n" + RESTORE_REGS_STRING + " popf\n" #endif - " ret\n"); -} -NOKPROBE_SYMBOL(kretprobe_trampoline_holder); + " ret\n" + ".size kretprobe_trampoline, .-kretprobe_trampoline\n" +); NOKPROBE_SYMBOL(kretprobe_trampoline); +STACK_FRAME_NON_STANDARD(kretprobe_trampoline); /* * Called from kretprobe_trampoline diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 47190bd399e7..807950860fb7 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -36,6 +36,7 @@ #include <linux/kprobes.h> #include <linux/debugfs.h> #include <linux/nmi.h> +#include <linux/swait.h> #include <asm/timer.h> #include <asm/cpu.h> #include <asm/traps.h> @@ -91,14 +92,14 @@ static void kvm_io_delay(void) struct kvm_task_sleep_node { struct hlist_node link; - wait_queue_head_t wq; + struct swait_queue_head wq; u32 token; int cpu; bool halted; }; static struct kvm_task_sleep_head { - spinlock_t lock; + raw_spinlock_t lock; struct hlist_head list; } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; @@ -122,17 +123,17 @@ void kvm_async_pf_task_wait(u32 token) u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; struct kvm_task_sleep_node n, *e; - DEFINE_WAIT(wait); + DECLARE_SWAITQUEUE(wait); rcu_irq_enter(); - spin_lock(&b->lock); + raw_spin_lock(&b->lock); e = _find_apf_task(b, token); if (e) { /* dummy entry exist -> wake up was delivered ahead of PF */ hlist_del(&e->link); kfree(e); - spin_unlock(&b->lock); + raw_spin_unlock(&b->lock); rcu_irq_exit(); return; @@ -141,13 +142,13 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); n.halted = is_idle_task(current) || preempt_count() > 1; - init_waitqueue_head(&n.wq); + init_swait_queue_head(&n.wq); hlist_add_head(&n.link, &b->list); - spin_unlock(&b->lock); + raw_spin_unlock(&b->lock); for (;;) { if (!n.halted) - prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); if (hlist_unhashed(&n.link)) break; @@ -166,7 +167,7 @@ void kvm_async_pf_task_wait(u32 token) } } if (!n.halted) - finish_wait(&n.wq, &wait); + finish_swait(&n.wq, &wait); rcu_irq_exit(); return; @@ -178,8 +179,8 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n) hlist_del_init(&n->link); if (n->halted) smp_send_reschedule(n->cpu); - else if (waitqueue_active(&n->wq)) - wake_up(&n->wq); + else if (swait_active(&n->wq)) + swake_up(&n->wq); } static void apf_task_wake_all(void) @@ -189,14 +190,14 @@ static void apf_task_wake_all(void) for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { struct hlist_node *p, *next; struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; - spin_lock(&b->lock); + raw_spin_lock(&b->lock); hlist_for_each_safe(p, next, &b->list) { struct kvm_task_sleep_node *n = hlist_entry(p, typeof(*n), link); if (n->cpu == smp_processor_id()) apf_task_wake_one(n); } - spin_unlock(&b->lock); + raw_spin_unlock(&b->lock); } } @@ -212,7 +213,7 @@ void kvm_async_pf_task_wake(u32 token) } again: - spin_lock(&b->lock); + raw_spin_lock(&b->lock); n = _find_apf_task(b, token); if (!n) { /* @@ -225,17 +226,17 @@ again: * Allocation failed! Busy wait while other cpu * handles async PF. */ - spin_unlock(&b->lock); + raw_spin_unlock(&b->lock); cpu_relax(); goto again; } n->token = token; n->cpu = smp_processor_id(); - init_waitqueue_head(&n->wq); + init_swait_queue_head(&n->wq); hlist_add_head(&n->link, &b->list); } else apf_task_wake_one(n); - spin_unlock(&b->lock); + raw_spin_unlock(&b->lock); return; } EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); @@ -486,7 +487,7 @@ void __init kvm_guest_init(void) paravirt_ops_setup(); register_reboot_notifier(&kvm_pv_reboot_nb); for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) - spin_lock_init(&async_pf_sleepers[i].lock); + raw_spin_lock_init(&async_pf_sleepers[i].lock); if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) x86_init.irqs.trap_init = kvm_apf_trap_init; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 6acc9dd91f36..6707039b9032 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -103,7 +103,7 @@ static void free_ldt_struct(struct ldt_struct *ldt) * we do not have to muck with descriptors here, that is * done in switch_mm() as needed. */ -int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm) { struct ldt_struct *new_ldt; struct mm_struct *old_mm; @@ -144,7 +144,7 @@ out_unlock: * * 64bit: Don't touch the LDT register - we're already in the next thread. */ -void destroy_context(struct mm_struct *mm) +void destroy_context_ldt(struct mm_struct *mm) { free_ldt_struct(mm->context.ldt); mm->context.ldt = NULL; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9f751876066f..6cbab31ac23a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -117,6 +117,8 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + if (boot_cpu_has(X86_FEATURE_OSPKE)) + printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru()); } void release_thread(struct task_struct *dead_task) @@ -488,7 +490,7 @@ void set_personality_ia32(bool x32) if (current->mm) current->mm->context.ia32_compat = TIF_X32; current->personality &= ~READ_IMPLIES_EXEC; - /* is_compat_task() uses the presence of the x32 + /* in_compat_syscall() uses the presence of the x32 syscall bit flag to determine compat status */ current_thread_info()->status &= ~TS_COMPAT; } else { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index aa52c1009475..2367ae07eb76 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,7 @@ #include <asm/alternative.h> #include <asm/prom.h> #include <asm/microcode.h> +#include <asm/mmu_context.h> /* * max_low_pfn_mapped: highest direct mapped pfn under 4GB @@ -1282,3 +1283,11 @@ static int __init register_kernel_offset_dumper(void) return 0; } __initcall(register_kernel_offset_dumper); + +void arch_show_smap(struct seq_file *m, struct vm_area_struct *vma) +{ + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return; + + seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma)); +} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b2c99f811c3f..a2065d3b3b39 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -422,7 +422,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) if (c->phys_proc_id == o->phys_proc_id && per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) && - c->compute_unit_id == o->compute_unit_id) + c->cpu_core_id == o->cpu_core_id) return topology_sane(c, o, "smt"); } else if (c->phys_proc_id == o->phys_proc_id && diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index fdd0c6430e5a..9ee98eefc44d 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -14,30 +14,34 @@ static int save_stack_stack(void *data, char *name) return 0; } -static void +static int __save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched) { struct stack_trace *trace = data; #ifdef CONFIG_FRAME_POINTER if (!reliable) - return; + return 0; #endif if (nosched && in_sched_functions(addr)) - return; + return 0; if (trace->skip > 0) { trace->skip--; - return; + return 0; } - if (trace->nr_entries < trace->max_entries) + if (trace->nr_entries < trace->max_entries) { trace->entries[trace->nr_entries++] = addr; + return 0; + } else { + return -1; /* no more room, stop walking the stack */ + } } -static void save_stack_address(void *data, unsigned long addr, int reliable) +static int save_stack_address(void *data, unsigned long addr, int reliable) { return __save_stack_address(data, addr, reliable, false); } -static void +static int save_stack_address_nosched(void *data, unsigned long addr, int reliable) { return __save_stack_address(data, addr, reliable, true); diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 91a4496db434..e72a07f20b05 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -135,7 +135,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, pmd = pmd_alloc(&tboot_mm, pud, vaddr); if (!pmd) return -1; - pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr); + pte = pte_alloc_map(&tboot_mm, pmd, vaddr); if (!pte) return -1; set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 5af9958cbdb6..4c941f88d405 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -81,11 +81,11 @@ PHDRS { SECTIONS { #ifdef CONFIG_X86_32 - . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; - phys_startup_32 = startup_32 - LOAD_OFFSET; + . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; + phys_startup_32 = ABSOLUTE(startup_32 - LOAD_OFFSET); #else - . = __START_KERNEL; - phys_startup_64 = startup_64 - LOAD_OFFSET; + . = __START_KERNEL; + phys_startup_64 = ABSOLUTE(startup_64 - LOAD_OFFSET); #endif /* Text and read-only data */ @@ -101,6 +101,7 @@ SECTIONS KPROBES_TEXT ENTRY_TEXT IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) /* End of text section */ @@ -333,6 +334,7 @@ SECTIONS __brk_limit = .; } + . = ALIGN(PAGE_SIZE); _end = .; STABS_DEBUG @@ -340,7 +342,10 @@ SECTIONS /* Sections to be discarded */ DISCARDS - /DISCARD/ : { *(.eh_frame) } + /DISCARD/ : { + *(.eh_frame) + *(__func_stack_frame_non_standard) + } } |