summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile18
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S3
-rw-r--r--arch/x86/kernel/amd_nb.c6
-rw-r--r--arch/x86/kernel/aperture_64.c12
-rw-r--r--arch/x86/kernel/apic/Makefile4
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c17
-rw-r--r--arch/x86/kernel/cpu/common.c44
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c3
-rw-r--r--arch/x86/kernel/cpu/powerflags.c2
-rw-r--r--arch/x86/kernel/dumpstack.c11
-rw-r--r--arch/x86/kernel/fpu/core.c63
-rw-r--r--arch/x86/kernel/fpu/xstate.c185
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c59
-rw-r--r--arch/x86/kernel/kvm.c37
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/setup.c9
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/stacktrace.c18
-rw-r--r--arch/x86/kernel/tboot.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S15
23 files changed, 424 insertions, 100 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index b1b78ffe01d0..616ebd22ef9a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -16,9 +16,21 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
-KASAN_SANITIZE_head$(BITS).o := n
-KASAN_SANITIZE_dumpstack.o := n
-KASAN_SANITIZE_dumpstack_$(BITS).o := n
+KASAN_SANITIZE_head$(BITS).o := n
+KASAN_SANITIZE_dumpstack.o := n
+KASAN_SANITIZE_dumpstack_$(BITS).o := n
+KASAN_SANITIZE_stacktrace.o := n
+
+OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_mcount_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_test_nx.o := y
+
+# If instrumentation of this dir is enabled, boot hangs during first second.
+# Probably could be more selective here, but note that files related to irqs,
+# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
+# non-deterministic coverage.
+KCOV_INSTRUMENT := n
CFLAGS_irq.o := -I$(src)/../include/asm/trace
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 8c35df468104..169963f471bb 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -5,6 +5,7 @@
#include <asm/page_types.h>
#include <asm/msr.h>
#include <asm/asm-offsets.h>
+#include <asm/frame.h>
# Copyright 2003 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
@@ -39,6 +40,7 @@ bogus_64_magic:
jmp bogus_64_magic
ENTRY(do_suspend_lowlevel)
+ FRAME_BEGIN
subq $8, %rsp
xorl %eax, %eax
call save_processor_state
@@ -109,6 +111,7 @@ ENTRY(do_suspend_lowlevel)
xorl %eax, %eax
addq $8, %rsp
+ FRAME_END
jmp restore_processor_state
ENDPROC(do_suspend_lowlevel)
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 29fa475ec518..a147e676fc7b 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -170,15 +170,13 @@ int amd_get_subcaches(int cpu)
{
struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
unsigned int mask;
- int cuid;
if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
return 0;
pci_read_config_dword(link, 0x1d4, &mask);
- cuid = cpu_data(cpu).compute_unit_id;
- return (mask >> (4 * cuid)) & 0xf;
+ return (mask >> (4 * cpu_data(cpu).cpu_core_id)) & 0xf;
}
int amd_set_subcaches(int cpu, unsigned long mask)
@@ -204,7 +202,7 @@ int amd_set_subcaches(int cpu, unsigned long mask)
pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
}
- cuid = cpu_data(cpu).compute_unit_id;
+ cuid = cpu_data(cpu).cpu_core_id;
mask <<= 4 * cuid;
mask |= (0xf ^ (1 << cuid)) << 26;
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 6e85f713641d..0a2bb1f62e72 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -227,19 +227,11 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp)
return 0;
}
-static int gart_fix_e820 __initdata = 1;
+static bool gart_fix_e820 __initdata = true;
static int __init parse_gart_mem(char *p)
{
- if (!p)
- return -EINVAL;
-
- if (!strncmp(p, "off", 3))
- gart_fix_e820 = 0;
- else if (!strncmp(p, "on", 2))
- gart_fix_e820 = 1;
-
- return 0;
+ return kstrtobool(p, &gart_fix_e820);
}
early_param("gart_fix_e820", parse_gart_mem);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 8bb12ddc5db8..8e63ebdcbd0b 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,6 +2,10 @@
# Makefile for local APIC drivers and for the IO-APIC code
#
+# Leads to non-deterministic coverage that is not a function of syscall inputs.
+# In particualr, smp_apic_timer_interrupt() is called in random places.
+KCOV_INSTRUMENT := n
+
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o vector.o
obj-y += hw_nmi.o
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 0d373d7affc8..4a8697f7d4ef 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -8,6 +8,10 @@ CFLAGS_REMOVE_common.o = -pg
CFLAGS_REMOVE_perf_event.o = -pg
endif
+# If these files are instrumented, boot hangs during the first second.
+KCOV_INSTRUMENT_common.o := n
+KCOV_INSTRUMENT_perf_event.o := n
+
# Make sure load_percpu_segment has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 68fe8d3bed56..7b76eb67a9b3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -75,7 +75,10 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
*/
extern __visible void vide(void);
-__asm__(".globl vide\n\t.align 4\nvide: ret");
+__asm__(".globl vide\n"
+ ".type vide, @function\n"
+ ".align 4\n"
+ "vide: ret\n");
static void init_amd_k5(struct cpuinfo_x86 *c)
{
@@ -297,7 +300,6 @@ static int nearby_node(int apicid)
#ifdef CONFIG_SMP
static void amd_get_topology(struct cpuinfo_x86 *c)
{
- u32 cores_per_cu = 1;
u8 node_id;
int cpu = smp_processor_id();
@@ -310,8 +312,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
/* get compute unit information */
smp_num_siblings = ((ebx >> 8) & 3) + 1;
- c->compute_unit_id = ebx & 0xff;
- cores_per_cu += ((ebx >> 8) & 3);
+ c->x86_max_cores /= smp_num_siblings;
+ c->cpu_core_id = ebx & 0xff;
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -322,19 +324,16 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
/* fixup multi-node processor information */
if (nodes_per_socket > 1) {
- u32 cores_per_node;
u32 cus_per_node;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
- cores_per_node = c->x86_max_cores / nodes_per_socket;
- cus_per_node = cores_per_node / cores_per_cu;
+ cus_per_node = c->x86_max_cores / nodes_per_socket;
/* store NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = node_id;
/* core id has to be in the [0 .. cores_per_node - 1] range */
- c->cpu_core_id %= cores_per_node;
- c->compute_unit_id %= cus_per_node;
+ c->cpu_core_id %= cus_per_node;
}
}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e601c1286e29..8394b3d1f94f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -304,6 +304,48 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
}
/*
+ * Protection Keys are not available in 32-bit mode.
+ */
+static bool pku_disabled;
+
+static __always_inline void setup_pku(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has(c, X86_FEATURE_PKU))
+ return;
+ if (pku_disabled)
+ return;
+
+ cr4_set_bits(X86_CR4_PKE);
+ /*
+ * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
+ * cpuid bit to be set. We need to ensure that we
+ * update that bit in this CPU's "cpu_info".
+ */
+ get_cpu_cap(c);
+}
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+static __init int setup_disable_pku(char *arg)
+{
+ /*
+ * Do not clear the X86_FEATURE_PKU bit. All of the
+ * runtime checks are against OSPKE so clearing the
+ * bit does nothing.
+ *
+ * This way, we will see "pku" in cpuinfo, but not
+ * "ospke", which is exactly what we want. It shows
+ * that the CPU has PKU, but the OS has not enabled it.
+ * This happens to be exactly how a system would look
+ * if we disabled the config option.
+ */
+ pr_info("x86: 'nopku' specified, disabling Memory Protection Keys\n");
+ pku_disabled = true;
+ return 1;
+}
+__setup("nopku", setup_disable_pku);
+#endif /* CONFIG_X86_64 */
+
+/*
* Some CPU features depend on higher CPUID levels, which may not always
* be available due to CPUID level capping or broken virtualization
* software. Add those features to this table to auto-disable them.
@@ -625,6 +667,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_6_EAX] = cpuid_eax(0x00000006);
+ c->x86_capability[CPUID_7_ECX] = ecx;
}
/* Extended state features: level 0x0000000d */
@@ -984,6 +1027,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
init_hypervisor(c);
x86_init_rdrand(c);
x86_init_cache_qos(c);
+ setup_pku(c);
/*
* Clear/Set all flags overridden by options, need do it
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 0b445c2ff735..ac780cad3b86 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -384,6 +384,9 @@ static void intel_thermal_interrupt(void)
{
__u64 msr_val;
+ if (static_cpu_has(X86_FEATURE_HWP))
+ wrmsrl_safe(MSR_HWP_STATUS, 0);
+
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
/* Check for violation of core thermal thresholds*/
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 31f0f335ed22..1dd8294fd730 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -18,4 +18,6 @@ const char *const x86_power_flags[32] = {
"", /* tsc invariant mapped to constant_tsc */
"cpb", /* core performance boost */
"eff_freq_ro", /* Readonly aperf/mperf */
+ "proc_feedback", /* processor feedback interface */
+ "acc_power", /* accumulated power mechanism */
};
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 9c30acfadae2..8efa57a5f29e 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -135,7 +135,8 @@ print_context_stack_bp(struct thread_info *tinfo,
if (!__kernel_text_address(addr))
break;
- ops->address(data, addr, 1);
+ if (ops->address(data, addr, 1))
+ break;
frame = frame->next_frame;
ret_addr = &frame->return_address;
print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
@@ -154,10 +155,11 @@ static int print_trace_stack(void *data, char *name)
/*
* Print one address/symbol entries per line.
*/
-static void print_trace_address(void *data, unsigned long addr, int reliable)
+static int print_trace_address(void *data, unsigned long addr, int reliable)
{
touch_nmi_watchdog();
printk_stack_address(addr, reliable, data);
+ return 0;
}
static const struct stacktrace_ops print_trace_ops = {
@@ -265,9 +267,8 @@ int __die(const char *str, struct pt_regs *regs, long err)
#ifdef CONFIG_SMP
printk("SMP ");
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC ");
-#endif
+ if (debug_pagealloc_enabled())
+ printk("DEBUG_PAGEALLOC ");
#ifdef CONFIG_KASAN
printk("KASAN");
#endif
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 0b1b9abd4d5f..8e37cc8a539a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -354,6 +354,69 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
}
/*
+ * This function must be called before we write the current
+ * task's fpstate.
+ *
+ * This call gets the current FPU register state and moves
+ * it in to the 'fpstate'. Preemption is disabled so that
+ * no writes to the 'fpstate' can occur from context
+ * swiches.
+ *
+ * Must be followed by a fpu__current_fpstate_write_end().
+ */
+void fpu__current_fpstate_write_begin(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ /*
+ * Ensure that the context-switching code does not write
+ * over the fpstate while we are doing our update.
+ */
+ preempt_disable();
+
+ /*
+ * Move the fpregs in to the fpu's 'fpstate'.
+ */
+ fpu__activate_fpstate_read(fpu);
+
+ /*
+ * The caller is about to write to 'fpu'. Ensure that no
+ * CPU thinks that its fpregs match the fpstate. This
+ * ensures we will not be lazy and skip a XRSTOR in the
+ * future.
+ */
+ fpu->last_cpu = -1;
+}
+
+/*
+ * This function must be paired with fpu__current_fpstate_write_begin()
+ *
+ * This will ensure that the modified fpstate gets placed back in
+ * the fpregs if necessary.
+ *
+ * Note: This function may be called whether or not an _actual_
+ * write to the fpstate occurred.
+ */
+void fpu__current_fpstate_write_end(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ /*
+ * 'fpu' now has an updated copy of the state, but the
+ * registers may still be out of date. Update them with
+ * an XRSTOR if they are active.
+ */
+ if (fpregs_active())
+ copy_kernel_to_fpregs(&fpu->state);
+
+ /*
+ * Our update is done and the fpregs/fpstate are in sync
+ * if necessary. Context switches can happen again.
+ */
+ preempt_enable();
+}
+
+/*
* 'fpu__restore()' is called to copy FPU registers from
* the FPU fpstate to the live hw registers and to activate
* access to the hardware registers, so that FPU instructions
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6e8354f5a593..b48ef35b28d4 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -5,6 +5,7 @@
*/
#include <linux/compat.h>
#include <linux/cpu.h>
+#include <linux/pkeys.h>
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
@@ -13,6 +14,11 @@
#include <asm/tlbflush.h>
+/*
+ * Although we spell it out in here, the Processor Trace
+ * xfeature is completely unused. We use other mechanisms
+ * to save/restore PT state in Linux.
+ */
static const char *xfeature_names[] =
{
"x87 floating point registers" ,
@@ -23,6 +29,8 @@ static const char *xfeature_names[] =
"AVX-512 opmask" ,
"AVX-512 Hi256" ,
"AVX-512 ZMM_Hi256" ,
+ "Processor Trace (unused)" ,
+ "Protection Keys User registers",
"unknown xstate feature" ,
};
@@ -56,6 +64,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
+ setup_clear_cpu_cap(X86_FEATURE_PKU);
}
/*
@@ -234,7 +243,7 @@ static void __init print_xstate_feature(u64 xstate_mask)
const char *feature_name;
if (cpu_has_xfeatures(xstate_mask, &feature_name))
- pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name);
+ pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
}
/*
@@ -250,6 +259,7 @@ static void __init print_xstate_features(void)
print_xstate_feature(XFEATURE_MASK_OPMASK);
print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
+ print_xstate_feature(XFEATURE_MASK_PKRU);
}
/*
@@ -466,6 +476,7 @@ static void check_xstate_against_struct(int nr)
XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state);
XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state);
+ XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state);
/*
* Make *SURE* to add any feature numbers in below if
@@ -473,7 +484,8 @@ static void check_xstate_against_struct(int nr)
* numbers.
*/
if ((nr < XFEATURE_YMM) ||
- (nr >= XFEATURE_MAX)) {
+ (nr >= XFEATURE_MAX) ||
+ (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR)) {
WARN_ONCE(1, "no structure for xstate: %d\n", nr);
XSTATE_WARN_ON(1);
}
@@ -671,6 +683,19 @@ void fpu__resume_cpu(void)
}
/*
+ * Given an xstate feature mask, calculate where in the xsave
+ * buffer the state is. Callers should ensure that the buffer
+ * is valid.
+ *
+ * Note: does not work for compacted buffers.
+ */
+void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
+{
+ int feature_nr = fls64(xstate_feature_mask) - 1;
+
+ return (void *)xsave + xstate_comp_offsets[feature_nr];
+}
+/*
* Given the xsave area and a state inside, this function returns the
* address of the state.
*
@@ -690,7 +715,6 @@ void fpu__resume_cpu(void)
*/
void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
{
- int feature_nr = fls64(xstate_feature) - 1;
/*
* Do we even *have* xsave state?
*/
@@ -718,7 +742,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
if (!(xsave->header.xfeatures & xstate_feature))
return NULL;
- return (void *)xsave + xstate_comp_offsets[feature_nr];
+ return __raw_xsave_addr(xsave, xstate_feature);
}
EXPORT_SYMBOL_GPL(get_xsave_addr);
@@ -753,3 +777,156 @@ const void *get_xsave_field_ptr(int xsave_state)
return get_xsave_addr(&fpu->state.xsave, xsave_state);
}
+
+
+/*
+ * Set xfeatures (aka XSTATE_BV) bit for a feature that we want
+ * to take out of its "init state". This will ensure that an
+ * XRSTOR actually restores the state.
+ */
+static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
+ int xstate_feature_mask)
+{
+ xsave->header.xfeatures |= xstate_feature_mask;
+}
+
+/*
+ * This function is safe to call whether the FPU is in use or not.
+ *
+ * Note that this only works on the current task.
+ *
+ * Inputs:
+ * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
+ * XFEATURE_MASK_SSE, etc...)
+ * @xsave_state_ptr: a pointer to a copy of the state that you would
+ * like written in to the current task's FPU xsave state. This pointer
+ * must not be located in the current tasks's xsave area.
+ * Output:
+ * address of the state in the xsave area or NULL if the state
+ * is not present or is in its 'init state'.
+ */
+static void fpu__xfeature_set_state(int xstate_feature_mask,
+ void *xstate_feature_src, size_t len)
+{
+ struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+ struct fpu *fpu = &current->thread.fpu;
+ void *dst;
+
+ if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
+ WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
+ return;
+ }
+
+ /*
+ * Tell the FPU code that we need the FPU state to be in
+ * 'fpu' (not in the registers), and that we need it to
+ * be stable while we write to it.
+ */
+ fpu__current_fpstate_write_begin();
+
+ /*
+ * This method *WILL* *NOT* work for compact-format
+ * buffers. If the 'xstate_feature_mask' is unset in
+ * xcomp_bv then we may need to move other feature state
+ * "up" in the buffer.
+ */
+ if (xsave->header.xcomp_bv & xstate_feature_mask) {
+ WARN_ON_ONCE(1);
+ goto out;
+ }
+
+ /* find the location in the xsave buffer of the desired state */
+ dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
+
+ /*
+ * Make sure that the pointer being passed in did not
+ * come from the xsave buffer itself.
+ */
+ WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
+
+ /* put the caller-provided data in the location */
+ memcpy(dst, xstate_feature_src, len);
+
+ /*
+ * Mark the xfeature so that the CPU knows there is state
+ * in the buffer now.
+ */
+ fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
+out:
+ /*
+ * We are done writing to the 'fpu'. Reenable preeption
+ * and (possibly) move the fpstate back in to the fpregs.
+ */
+ fpu__current_fpstate_write_end();
+}
+
+#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
+#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
+
+/*
+ * This will go out and modify the XSAVE buffer so that PKRU is
+ * set to a particular state for access to 'pkey'.
+ *
+ * PKRU state does affect kernel access to user memory. We do
+ * not modfiy PKRU *itself* here, only the XSAVE state that will
+ * be restored in to PKRU when we return back to userspace.
+ */
+int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+ struct pkru_state *old_pkru_state;
+ struct pkru_state new_pkru_state;
+ int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
+ u32 new_pkru_bits = 0;
+
+ /*
+ * This check implies XSAVE support. OSPKE only gets
+ * set if we enable XSAVE and we enable PKU in XCR0.
+ */
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
+ return -EINVAL;
+
+ /* Set the bits we need in PKRU */
+ if (init_val & PKEY_DISABLE_ACCESS)
+ new_pkru_bits |= PKRU_AD_BIT;
+ if (init_val & PKEY_DISABLE_WRITE)
+ new_pkru_bits |= PKRU_WD_BIT;
+
+ /* Shift the bits in to the correct place in PKRU for pkey. */
+ new_pkru_bits <<= pkey_shift;
+
+ /* Locate old copy of the state in the xsave buffer */
+ old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
+
+ /*
+ * When state is not in the buffer, it is in the init
+ * state, set it manually. Otherwise, copy out the old
+ * state.
+ */
+ if (!old_pkru_state)
+ new_pkru_state.pkru = 0;
+ else
+ new_pkru_state.pkru = old_pkru_state->pkru;
+
+ /* mask off any old bits in place */
+ new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
+ /* Set the newly-requested bits */
+ new_pkru_state.pkru |= new_pkru_bits;
+
+ /*
+ * We could theoretically live without zeroing pkru.pad.
+ * The current XSAVE feature state definition says that
+ * only bytes 0->3 are used. But we do not want to
+ * chance leaking kernel stack out to userspace in case a
+ * memcpy() of the whole xsave buffer was done.
+ *
+ * They're in the same cacheline anyway.
+ */
+ new_pkru_state.pad = 0;
+
+ fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state,
+ sizeof(new_pkru_state));
+
+ return 0;
+}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 702547ce33c9..d036cfb4495d 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1,5 +1,5 @@
/*
- * Code for replacing ftrace calls with jumps.
+ * Dynamic function tracing support.
*
* Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
*
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 0f05deeff5ce..ae703acb85c1 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -49,6 +49,7 @@
#include <linux/kdebug.h>
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
+#include <linux/frame.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
@@ -671,39 +672,39 @@ NOKPROBE_SYMBOL(kprobe_int3_handler);
* When a retprobed function returns, this code saves registers and
* calls trampoline_handler() runs, which calls the kretprobe's handler.
*/
-static void __used kretprobe_trampoline_holder(void)
-{
- asm volatile (
- ".global kretprobe_trampoline\n"
- "kretprobe_trampoline: \n"
+asm(
+ ".global kretprobe_trampoline\n"
+ ".type kretprobe_trampoline, @function\n"
+ "kretprobe_trampoline:\n"
#ifdef CONFIG_X86_64
- /* We don't bother saving the ss register */
- " pushq %rsp\n"
- " pushfq\n"
- SAVE_REGS_STRING
- " movq %rsp, %rdi\n"
- " call trampoline_handler\n"
- /* Replace saved sp with true return address. */
- " movq %rax, 152(%rsp)\n"
- RESTORE_REGS_STRING
- " popfq\n"
+ /* We don't bother saving the ss register */
+ " pushq %rsp\n"
+ " pushfq\n"
+ SAVE_REGS_STRING
+ " movq %rsp, %rdi\n"
+ " call trampoline_handler\n"
+ /* Replace saved sp with true return address. */
+ " movq %rax, 152(%rsp)\n"
+ RESTORE_REGS_STRING
+ " popfq\n"
#else
- " pushf\n"
- SAVE_REGS_STRING
- " movl %esp, %eax\n"
- " call trampoline_handler\n"
- /* Move flags to cs */
- " movl 56(%esp), %edx\n"
- " movl %edx, 52(%esp)\n"
- /* Replace saved flags with true return address. */
- " movl %eax, 56(%esp)\n"
- RESTORE_REGS_STRING
- " popf\n"
+ " pushf\n"
+ SAVE_REGS_STRING
+ " movl %esp, %eax\n"
+ " call trampoline_handler\n"
+ /* Move flags to cs */
+ " movl 56(%esp), %edx\n"
+ " movl %edx, 52(%esp)\n"
+ /* Replace saved flags with true return address. */
+ " movl %eax, 56(%esp)\n"
+ RESTORE_REGS_STRING
+ " popf\n"
#endif
- " ret\n");
-}
-NOKPROBE_SYMBOL(kretprobe_trampoline_holder);
+ " ret\n"
+ ".size kretprobe_trampoline, .-kretprobe_trampoline\n"
+);
NOKPROBE_SYMBOL(kretprobe_trampoline);
+STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
/*
* Called from kretprobe_trampoline
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 47190bd399e7..807950860fb7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -36,6 +36,7 @@
#include <linux/kprobes.h>
#include <linux/debugfs.h>
#include <linux/nmi.h>
+#include <linux/swait.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -91,14 +92,14 @@ static void kvm_io_delay(void)
struct kvm_task_sleep_node {
struct hlist_node link;
- wait_queue_head_t wq;
+ struct swait_queue_head wq;
u32 token;
int cpu;
bool halted;
};
static struct kvm_task_sleep_head {
- spinlock_t lock;
+ raw_spinlock_t lock;
struct hlist_head list;
} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
@@ -122,17 +123,17 @@ void kvm_async_pf_task_wait(u32 token)
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
struct kvm_task_sleep_node n, *e;
- DEFINE_WAIT(wait);
+ DECLARE_SWAITQUEUE(wait);
rcu_irq_enter();
- spin_lock(&b->lock);
+ raw_spin_lock(&b->lock);
e = _find_apf_task(b, token);
if (e) {
/* dummy entry exist -> wake up was delivered ahead of PF */
hlist_del(&e->link);
kfree(e);
- spin_unlock(&b->lock);
+ raw_spin_unlock(&b->lock);
rcu_irq_exit();
return;
@@ -141,13 +142,13 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token;
n.cpu = smp_processor_id();
n.halted = is_idle_task(current) || preempt_count() > 1;
- init_waitqueue_head(&n.wq);
+ init_swait_queue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
- spin_unlock(&b->lock);
+ raw_spin_unlock(&b->lock);
for (;;) {
if (!n.halted)
- prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
+ prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
if (hlist_unhashed(&n.link))
break;
@@ -166,7 +167,7 @@ void kvm_async_pf_task_wait(u32 token)
}
}
if (!n.halted)
- finish_wait(&n.wq, &wait);
+ finish_swait(&n.wq, &wait);
rcu_irq_exit();
return;
@@ -178,8 +179,8 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
hlist_del_init(&n->link);
if (n->halted)
smp_send_reschedule(n->cpu);
- else if (waitqueue_active(&n->wq))
- wake_up(&n->wq);
+ else if (swait_active(&n->wq))
+ swake_up(&n->wq);
}
static void apf_task_wake_all(void)
@@ -189,14 +190,14 @@ static void apf_task_wake_all(void)
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
struct hlist_node *p, *next;
struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
- spin_lock(&b->lock);
+ raw_spin_lock(&b->lock);
hlist_for_each_safe(p, next, &b->list) {
struct kvm_task_sleep_node *n =
hlist_entry(p, typeof(*n), link);
if (n->cpu == smp_processor_id())
apf_task_wake_one(n);
}
- spin_unlock(&b->lock);
+ raw_spin_unlock(&b->lock);
}
}
@@ -212,7 +213,7 @@ void kvm_async_pf_task_wake(u32 token)
}
again:
- spin_lock(&b->lock);
+ raw_spin_lock(&b->lock);
n = _find_apf_task(b, token);
if (!n) {
/*
@@ -225,17 +226,17 @@ again:
* Allocation failed! Busy wait while other cpu
* handles async PF.
*/
- spin_unlock(&b->lock);
+ raw_spin_unlock(&b->lock);
cpu_relax();
goto again;
}
n->token = token;
n->cpu = smp_processor_id();
- init_waitqueue_head(&n->wq);
+ init_swait_queue_head(&n->wq);
hlist_add_head(&n->link, &b->list);
} else
apf_task_wake_one(n);
- spin_unlock(&b->lock);
+ raw_spin_unlock(&b->lock);
return;
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
@@ -486,7 +487,7 @@ void __init kvm_guest_init(void)
paravirt_ops_setup();
register_reboot_notifier(&kvm_pv_reboot_nb);
for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
- spin_lock_init(&async_pf_sleepers[i].lock);
+ raw_spin_lock_init(&async_pf_sleepers[i].lock);
if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
x86_init.irqs.trap_init = kvm_apf_trap_init;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 6acc9dd91f36..6707039b9032 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -103,7 +103,7 @@ static void free_ldt_struct(struct ldt_struct *ldt)
* we do not have to muck with descriptors here, that is
* done in switch_mm() as needed.
*/
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
{
struct ldt_struct *new_ldt;
struct mm_struct *old_mm;
@@ -144,7 +144,7 @@ out_unlock:
*
* 64bit: Don't touch the LDT register - we're already in the next thread.
*/
-void destroy_context(struct mm_struct *mm)
+void destroy_context_ldt(struct mm_struct *mm)
{
free_ldt_struct(mm->context.ldt);
mm->context.ldt = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9f751876066f..6cbab31ac23a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,6 +117,8 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+ if (boot_cpu_has(X86_FEATURE_OSPKE))
+ printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
}
void release_thread(struct task_struct *dead_task)
@@ -488,7 +490,7 @@ void set_personality_ia32(bool x32)
if (current->mm)
current->mm->context.ia32_compat = TIF_X32;
current->personality &= ~READ_IMPLIES_EXEC;
- /* is_compat_task() uses the presence of the x32
+ /* in_compat_syscall() uses the presence of the x32
syscall bit flag to determine compat status */
current_thread_info()->status &= ~TS_COMPAT;
} else {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index aa52c1009475..2367ae07eb76 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -112,6 +112,7 @@
#include <asm/alternative.h>
#include <asm/prom.h>
#include <asm/microcode.h>
+#include <asm/mmu_context.h>
/*
* max_low_pfn_mapped: highest direct mapped pfn under 4GB
@@ -1282,3 +1283,11 @@ static int __init register_kernel_offset_dumper(void)
return 0;
}
__initcall(register_kernel_offset_dumper);
+
+void arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
+{
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
+ return;
+
+ seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index b2c99f811c3f..a2065d3b3b39 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -422,7 +422,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
if (c->phys_proc_id == o->phys_proc_id &&
per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
- c->compute_unit_id == o->compute_unit_id)
+ c->cpu_core_id == o->cpu_core_id)
return topology_sane(c, o, "smt");
} else if (c->phys_proc_id == o->phys_proc_id &&
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index fdd0c6430e5a..9ee98eefc44d 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -14,30 +14,34 @@ static int save_stack_stack(void *data, char *name)
return 0;
}
-static void
+static int
__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
{
struct stack_trace *trace = data;
#ifdef CONFIG_FRAME_POINTER
if (!reliable)
- return;
+ return 0;
#endif
if (nosched && in_sched_functions(addr))
- return;
+ return 0;
if (trace->skip > 0) {
trace->skip--;
- return;
+ return 0;
}
- if (trace->nr_entries < trace->max_entries)
+ if (trace->nr_entries < trace->max_entries) {
trace->entries[trace->nr_entries++] = addr;
+ return 0;
+ } else {
+ return -1; /* no more room, stop walking the stack */
+ }
}
-static void save_stack_address(void *data, unsigned long addr, int reliable)
+static int save_stack_address(void *data, unsigned long addr, int reliable)
{
return __save_stack_address(data, addr, reliable, false);
}
-static void
+static int
save_stack_address_nosched(void *data, unsigned long addr, int reliable)
{
return __save_stack_address(data, addr, reliable, true);
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 91a4496db434..e72a07f20b05 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -135,7 +135,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
pmd = pmd_alloc(&tboot_mm, pud, vaddr);
if (!pmd)
return -1;
- pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
+ pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
if (!pte)
return -1;
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 5af9958cbdb6..4c941f88d405 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -81,11 +81,11 @@ PHDRS {
SECTIONS
{
#ifdef CONFIG_X86_32
- . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
- phys_startup_32 = startup_32 - LOAD_OFFSET;
+ . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
+ phys_startup_32 = ABSOLUTE(startup_32 - LOAD_OFFSET);
#else
- . = __START_KERNEL;
- phys_startup_64 = startup_64 - LOAD_OFFSET;
+ . = __START_KERNEL;
+ phys_startup_64 = ABSOLUTE(startup_64 - LOAD_OFFSET);
#endif
/* Text and read-only data */
@@ -101,6 +101,7 @@ SECTIONS
KPROBES_TEXT
ENTRY_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)
/* End of text section */
@@ -333,6 +334,7 @@ SECTIONS
__brk_limit = .;
}
+ . = ALIGN(PAGE_SIZE);
_end = .;
STABS_DEBUG
@@ -340,7 +342,10 @@ SECTIONS
/* Sections to be discarded */
DISCARDS
- /DISCARD/ : { *(.eh_frame) }
+ /DISCARD/ : {
+ *(.eh_frame)
+ *(__func_stack_frame_non_standard)
+ }
}