summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/apic/apic.c35
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c13
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c42
-rw-r--r--arch/x86/kernel/cpu/amd.c12
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c18
-rw-r--r--arch/x86/kernel/fpu/xstate.c138
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/kvmclock.c1
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/setup.c27
-rw-r--r--arch/x86/kernel/smpboot.c25
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kernel/uprobes.c22
16 files changed, 159 insertions, 190 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 20abd912f0e4..f3e9b2df4b16 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -313,7 +313,7 @@ int lapic_get_maxlvt(void)
/* Clock divisor */
#define APIC_DIVISOR 16
-#define TSC_DIVISOR 32
+#define TSC_DIVISOR 8
/*
* This function sets up the local APIC timer, with a timeout of
@@ -565,13 +565,37 @@ static void setup_APIC_timer(void)
CLOCK_EVT_FEAT_DUMMY);
levt->set_next_event = lapic_next_deadline;
clockevents_config_and_register(levt,
- (tsc_khz / TSC_DIVISOR) * 1000,
+ tsc_khz * (1000 / TSC_DIVISOR),
0xF, ~0UL);
} else
clockevents_register_device(levt);
}
/*
+ * Install the updated TSC frequency from recalibration at the TSC
+ * deadline clockevent devices.
+ */
+static void __lapic_update_tsc_freq(void *info)
+{
+ struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+
+ if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return;
+
+ clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
+}
+
+void lapic_update_tsc_freq(void)
+{
+ /*
+ * The clockevent device's ->mult and ->shift can both be
+ * changed. In order to avoid races, schedule the frequency
+ * update code on each CPU.
+ */
+ on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
+}
+
+/*
* In this functions we calibrate APIC bus clocks to the external timer.
*
* We want to do the calibration only once since we want to have local timer
@@ -1599,6 +1623,9 @@ void __init enable_IR_x2apic(void)
unsigned long flags;
int ret, ir_stat;
+ if (skip_ioapic_setup)
+ return;
+
ir_stat = irq_remapping_prepare();
if (ir_stat < 0 && !x2apic_supported())
return;
@@ -2066,7 +2093,6 @@ int generic_processor_info(int apicid, int version)
return -EINVAL;
}
- num_processors++;
if (apicid == boot_cpu_physical_apicid) {
/*
* x86_bios_cpu_apicid is required to have processors listed
@@ -2089,10 +2115,13 @@ int generic_processor_info(int apicid, int version)
pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n",
thiscpu, apicid);
+
disabled_cpus++;
return -ENOSPC;
}
+ num_processors++;
+
/*
* Validate version
*/
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 6368fa69d2af..54f35d988025 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -155,7 +155,7 @@ static void init_x2apic_ldr(void)
/*
* At CPU state changes, update the x2apic cluster sibling info.
*/
-int x2apic_prepare_cpu(unsigned int cpu)
+static int x2apic_prepare_cpu(unsigned int cpu)
{
if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
return -ENOMEM;
@@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu)
return 0;
}
-int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_dead_cpu(unsigned int this_cpu)
{
int cpu;
@@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu)
static int x2apic_cluster_probe(void)
{
int cpu = smp_processor_id();
+ int ret;
if (!x2apic_mode)
return 0;
+ ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+ x2apic_prepare_cpu, x2apic_dead_cpu);
+ if (ret < 0) {
+ pr_err("Failed to register X2APIC_PREPARE\n");
+ return 0;
+ }
cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
- cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
- x2apic_prepare_cpu, x2apic_dead_cpu);
return 1;
}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 09b59adaea3f..cb0673c1e940 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
if (strncmp(oem_id, "SGI", 3) != 0)
return 0;
+ if (numa_off) {
+ pr_err("UV: NUMA is off, disabling UV support\n");
+ return 0;
+ }
+
/* Setup early hub type field in uv_hub_info for Node 0 */
uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
@@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void)
struct uv_gam_range_entry *gre = uv_gre_table;
struct uv_gam_range_s *grt;
unsigned long last_limit = 0, ram_limit = 0;
- int bytes, i, sid, lsid = -1;
+ int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
if (!gre)
return;
@@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void)
}
sid = gre->sockid - _min_socket;
if (lsid < sid) { /* new range */
- grt = &_gr_table[sid];
- grt->base = lsid;
+ grt = &_gr_table[indx];
+ grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
lsid = sid;
+ lindx = indx++;
continue;
}
if (lsid == sid && !ram_limit) { /* update range */
@@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void)
}
if (!ram_limit) { /* non-contiguous ram range */
grt++;
- grt->base = sid - 1;
+ grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
continue;
@@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (!index) {
pr_info("UV: GAM Range Table...\n");
- pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n",
+ pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n",
"Range", "", "Size", "Type", "NASID",
- "SID", "PN", "PXM");
+ "SID", "PN");
}
pr_info(
- "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n",
+ "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
((unsigned long)(gre->limit - lgre)) >>
(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
- gre->type, gre->nasid, gre->sockid,
- gre->pnode, gre->pxm);
+ gre->type, gre->nasid, gre->sockid, gre->pnode);
lgre = gre->limit;
if (sock_min > gre->sockid)
@@ -1286,7 +1291,7 @@ static void __init build_socket_tables(void)
_pnode_to_socket[i] = SOCK_EMPTY;
/* fill in pnode/node/addr conversion list values */
- pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
+ pr_info("UV: GAM Building socket/pnode conversion tables\n");
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
continue;
@@ -1294,20 +1299,18 @@ static void __init build_socket_tables(void)
if (_socket_to_pnode[i] != SOCK_EMPTY)
continue; /* duplicate */
_socket_to_pnode[i] = gre->pnode;
- _socket_to_node[i] = gre->pxm;
i = gre->pnode - minpnode;
_pnode_to_socket[i] = gre->sockid;
pr_info(
- "UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
+ "UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
gre->sockid, gre->type, gre->nasid,
_socket_to_pnode[gre->sockid - minsock],
- _socket_to_node[gre->sockid - minsock],
_pnode_to_socket[gre->pnode - minpnode]);
}
- /* check socket -> node values */
+ /* Set socket -> node values */
lnid = -1;
for_each_present_cpu(cpu) {
int nid = cpu_to_node(cpu);
@@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void)
lnid = nid;
apicid = per_cpu(x86_cpu_to_apicid, cpu);
sockid = apicid >> uv_cpuid.socketid_shift;
- i = sockid - minsock;
-
- if (nid != _socket_to_node[i]) {
- pr_warn(
- "UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
- i, sockid, gre->type, _socket_to_node[i], nid);
- _socket_to_node[i] = nid;
- }
+ _socket_to_node[sockid - minsock] = nid;
+ pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
+ sockid, apicid, nid);
}
/* Setup physical blade to pnode translation from GAM Range Table */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f5c69d8974e1..b81fe2d63e15 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -669,6 +669,17 @@ static void init_amd_gh(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
}
+#define MSR_AMD64_DE_CFG 0xC0011029
+
+static void init_amd_ln(struct cpuinfo_x86 *c)
+{
+ /*
+ * Apply erratum 665 fix unconditionally so machines without a BIOS
+ * fix work.
+ */
+ msr_set_bit(MSR_AMD64_DE_CFG, 31);
+}
+
static void init_amd_bd(struct cpuinfo_x86 *c)
{
u64 value;
@@ -726,6 +737,7 @@ static void init_amd(struct cpuinfo_x86 *c)
case 6: init_amd_k7(c); break;
case 0xf: init_amd_k8(c); break;
case 0x10: init_amd_gh(c); break;
+ case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
}
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 27a0228c9cae..620ab06bcf45 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -54,6 +54,7 @@ static LIST_HEAD(pcache);
*/
static u8 *container;
static size_t container_size;
+static bool ucode_builtin;
static u32 ucode_new_rev;
static u8 amd_ucode_patch[PATCH_MAX_SIZE];
@@ -281,18 +282,22 @@ static bool __init load_builtin_amd_microcode(struct cpio_data *cp,
void __init load_ucode_amd_bsp(unsigned int family)
{
struct cpio_data cp;
+ bool *builtin;
void **data;
size_t *size;
#ifdef CONFIG_X86_32
data = (void **)__pa_nodebug(&ucode_cpio.data);
size = (size_t *)__pa_nodebug(&ucode_cpio.size);
+ builtin = (bool *)__pa_nodebug(&ucode_builtin);
#else
data = &ucode_cpio.data;
size = &ucode_cpio.size;
+ builtin = &ucode_builtin;
#endif
- if (!load_builtin_amd_microcode(&cp, family))
+ *builtin = load_builtin_amd_microcode(&cp, family);
+ if (!*builtin)
cp = find_ucode_in_initrd();
if (!(cp.data && cp.size))
@@ -355,6 +360,7 @@ void load_ucode_amd_ap(void)
unsigned int cpu = smp_processor_id();
struct equiv_cpu_entry *eq;
struct microcode_amd *mc;
+ u8 *cont = container;
u32 rev, eax;
u16 eq_id;
@@ -371,8 +377,12 @@ void load_ucode_amd_ap(void)
if (check_current_patch_level(&rev, false))
return;
+ /* Add CONFIG_RANDOMIZE_MEMORY offset. */
+ if (!ucode_builtin)
+ cont += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
eax = cpuid_eax(0x00000001);
- eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
+ eq = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ);
eq_id = find_equiv_id(eq, eax);
if (!eq_id)
@@ -434,6 +444,10 @@ int __init save_microcode_in_initrd_amd(void)
else
container = cont_va;
+ /* Add CONFIG_RANDOMIZE_MEMORY offset. */
+ if (!ucode_builtin)
+ container += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
eax = cpuid_eax(0x00000001);
eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 680049aa4593..01567aa87503 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -866,105 +866,17 @@ const void *get_xsave_field_ptr(int xsave_state)
return get_xsave_addr(&fpu->state.xsave, xsave_state);
}
-
-/*
- * Set xfeatures (aka XSTATE_BV) bit for a feature that we want
- * to take out of its "init state". This will ensure that an
- * XRSTOR actually restores the state.
- */
-static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
- int xstate_feature_mask)
-{
- xsave->header.xfeatures |= xstate_feature_mask;
-}
-
-/*
- * This function is safe to call whether the FPU is in use or not.
- *
- * Note that this only works on the current task.
- *
- * Inputs:
- * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- * XFEATURE_MASK_SSE, etc...)
- * @xsave_state_ptr: a pointer to a copy of the state that you would
- * like written in to the current task's FPU xsave state. This pointer
- * must not be located in the current tasks's xsave area.
- * Output:
- * address of the state in the xsave area or NULL if the state
- * is not present or is in its 'init state'.
- */
-static void fpu__xfeature_set_state(int xstate_feature_mask,
- void *xstate_feature_src, size_t len)
-{
- struct xregs_state *xsave = &current->thread.fpu.state.xsave;
- struct fpu *fpu = &current->thread.fpu;
- void *dst;
-
- if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
- WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
- return;
- }
-
- /*
- * Tell the FPU code that we need the FPU state to be in
- * 'fpu' (not in the registers), and that we need it to
- * be stable while we write to it.
- */
- fpu__current_fpstate_write_begin();
-
- /*
- * This method *WILL* *NOT* work for compact-format
- * buffers. If the 'xstate_feature_mask' is unset in
- * xcomp_bv then we may need to move other feature state
- * "up" in the buffer.
- */
- if (xsave->header.xcomp_bv & xstate_feature_mask) {
- WARN_ON_ONCE(1);
- goto out;
- }
-
- /* find the location in the xsave buffer of the desired state */
- dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
-
- /*
- * Make sure that the pointer being passed in did not
- * come from the xsave buffer itself.
- */
- WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
-
- /* put the caller-provided data in the location */
- memcpy(dst, xstate_feature_src, len);
-
- /*
- * Mark the xfeature so that the CPU knows there is state
- * in the buffer now.
- */
- fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
-out:
- /*
- * We are done writing to the 'fpu'. Reenable preeption
- * and (possibly) move the fpstate back in to the fpregs.
- */
- fpu__current_fpstate_write_end();
-}
-
#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
/*
- * This will go out and modify the XSAVE buffer so that PKRU is
- * set to a particular state for access to 'pkey'.
- *
- * PKRU state does affect kernel access to user memory. We do
- * not modfiy PKRU *itself* here, only the XSAVE state that will
- * be restored in to PKRU when we return back to userspace.
+ * This will go out and modify PKRU register to set the access
+ * rights for @pkey to @init_val.
*/
int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val)
{
- struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
- struct pkru_state *old_pkru_state;
- struct pkru_state new_pkru_state;
+ u32 old_pkru;
int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
u32 new_pkru_bits = 0;
@@ -974,6 +886,15 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
*/
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
+ /*
+ * For most XSAVE components, this would be an arduous task:
+ * brining fpstate up to date with fpregs, updating fpstate,
+ * then re-populating fpregs. But, for components that are
+ * never lazily managed, we can just access the fpregs
+ * directly. PKRU is never managed lazily, so we can just
+ * manipulate it directly. Make sure it stays that way.
+ */
+ WARN_ON_ONCE(!use_eager_fpu());
/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
@@ -984,37 +905,12 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
/* Shift the bits in to the correct place in PKRU for pkey: */
new_pkru_bits <<= pkey_shift;
- /* Locate old copy of the state in the xsave buffer: */
- old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
-
- /*
- * When state is not in the buffer, it is in the init
- * state, set it manually. Otherwise, copy out the old
- * state.
- */
- if (!old_pkru_state)
- new_pkru_state.pkru = 0;
- else
- new_pkru_state.pkru = old_pkru_state->pkru;
-
- /* Mask off any old bits in place: */
- new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
-
- /* Set the newly-requested bits: */
- new_pkru_state.pkru |= new_pkru_bits;
-
- /*
- * We could theoretically live without zeroing pkru.pad.
- * The current XSAVE feature state definition says that
- * only bytes 0->3 are used. But we do not want to
- * chance leaking kernel stack out to userspace in case a
- * memcpy() of the whole xsave buffer was done.
- *
- * They're in the same cacheline anyway.
- */
- new_pkru_state.pad = 0;
+ /* Get old PKRU and mask off any old bits in place: */
+ old_pkru = read_pkru();
+ old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
- fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
+ /* Write old part along with new part: */
+ write_pkru(old_pkru | new_pkru_bits);
return 0;
}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 2dda0bc4576e..f16c55bfc090 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -25,8 +25,6 @@ static void __init i386_default_early_setup(void)
/* Initialize 32bit specific setup functions */
x86_init.resources.reserve_resources = i386_reserve_resources;
x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
-
- reserve_bios_regions();
}
asmlinkage __visible void __init i386_start_kernel(void)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 99d48e7d2974..54a2372f5dbb 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -183,7 +183,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
copy_bootdata(__va(real_mode_data));
x86_early_init_platform_quirks();
- reserve_bios_regions();
switch (boot_params.hdr.hardware_subarch) {
case X86_SUBARCH_INTEL_MID:
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ed16e58658a4..c6dfd801df97 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1242,7 +1242,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
memset(&curr_time, 0, sizeof(struct rtc_time));
if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
- mc146818_set_time(&curr_time);
+ mc146818_get_time(&curr_time);
if (hpet_rtc_flags & RTC_UIE &&
curr_time.tm_sec != hpet_prev_update_sec) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 61521dc19c10..9f669fdd2010 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -102,8 +102,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, " Rescheduling interrupts\n");
seq_printf(p, "%*s: ", prec, "CAL");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
- irq_stats(j)->irq_tlb_count);
+ seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
seq_puts(p, " Function call interrupts\n");
seq_printf(p, "%*s: ", prec, "TLB");
for_each_online_cpu(j)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1d39bfbd26bb..3692249a70f1 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -289,6 +289,7 @@ void __init kvmclock_init(void)
put_cpu();
x86_platform.calibrate_tsc = kvm_get_tsc_khz;
+ x86_platform.calibrate_cpu = kvm_get_tsc_khz;
x86_platform.get_wallclock = kvm_get_wallclock;
x86_platform.set_wallclock = kvm_set_wallclock;
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ad5bc9578a73..1acfd76e3e26 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -56,12 +56,12 @@ asm (".pushsection .entry.text, \"ax\"\n"
".popsection");
/* identity function, which can be inlined */
-u32 _paravirt_ident_32(u32 x)
+u32 notrace _paravirt_ident_32(u32 x)
{
return x;
}
-u64 _paravirt_ident_64(u64 x)
+u64 notrace _paravirt_ident_64(u64 x)
{
return x;
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 991b77986d57..0fa60f5f5a16 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -936,8 +936,6 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.arch_setup();
- kernel_randomize_memory();
-
iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
setup_memory_map();
parse_setup_data();
@@ -1055,6 +1053,12 @@ void __init setup_arch(char **cmdline_p)
max_possible_pfn = max_pfn;
+ /*
+ * Define random base addresses for memory sections after max_pfn is
+ * defined and before each memory section base is used.
+ */
+ kernel_randomize_memory();
+
#ifdef CONFIG_X86_32
/* max_low_pfn get updated here */
find_low_pfn_range();
@@ -1097,6 +1101,8 @@ void __init setup_arch(char **cmdline_p)
efi_find_mirror();
}
+ reserve_bios_regions();
+
/*
* The EFI specification says that boot service code won't be called
* after ExitBootServices(). This is, in fact, a lie.
@@ -1125,7 +1131,15 @@ void __init setup_arch(char **cmdline_p)
early_trap_pf_init();
- setup_real_mode();
+ /*
+ * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
+ * with the current CR4 value. This may not be necessary, but
+ * auditing all the early-boot CR4 manipulation would be needed to
+ * rule it out.
+ */
+ if (boot_cpu_data.cpuid_level >= 0)
+ /* A CPU has %cr4 if and only if it has CPUID. */
+ mmu_cr4_features = __read_cr4();
memblock_set_current_limit(get_max_mapped());
@@ -1174,13 +1188,6 @@ void __init setup_arch(char **cmdline_p)
kasan_init();
- if (boot_cpu_data.cpuid_level >= 0) {
- /* A CPU has %cr4 if and only if it has CPUID */
- mmu_cr4_features = __read_cr4();
- if (trampoline_cr4_features)
- *trampoline_cr4_features = mmu_cr4_features;
- }
-
#ifdef CONFIG_X86_32
/* sync back kernel address range */
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2a6e84a30a54..4296beb8fdd3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
/* Logical package management. We might want to allocate that dynamically */
static int *physical_to_logical_pkg __read_mostly;
static unsigned long *physical_package_map __read_mostly;;
-static unsigned long *logical_package_map __read_mostly;
static unsigned int max_physical_pkg_id __read_mostly;
unsigned int __max_logical_packages __read_mostly;
EXPORT_SYMBOL(__max_logical_packages);
+static unsigned int logical_packages __read_mostly;
+static bool logical_packages_frozen __read_mostly;
/* Maximum number of SMT threads on any online core */
int __max_smt_threads __read_mostly;
@@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu)
if (test_and_set_bit(pkg, physical_package_map))
goto found;
- new = find_first_zero_bit(logical_package_map, __max_logical_packages);
- if (new >= __max_logical_packages) {
+ if (logical_packages_frozen) {
physical_to_logical_pkg[pkg] = -1;
- pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+ pr_warn("APIC(%x) Package %u exceeds logical package max\n",
apicid, pkg);
return -ENOSPC;
}
- set_bit(new, logical_package_map);
+
+ new = logical_packages++;
pr_info("APIC(%x) Converting physical %u to logical package %u\n",
apicid, pkg, new);
physical_to_logical_pkg[pkg] = new;
@@ -341,6 +342,7 @@ static void __init smp_init_package_map(void)
}
__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
+ logical_packages = 0;
/*
* Possibly larger than what we need as the number of apic ids per
@@ -352,10 +354,6 @@ static void __init smp_init_package_map(void)
memset(physical_to_logical_pkg, 0xff, size);
size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
physical_package_map = kzalloc(size, GFP_KERNEL);
- size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
- logical_package_map = kzalloc(size, GFP_KERNEL);
-
- pr_info("Max logical packages: %u\n", __max_logical_packages);
for_each_present_cpu(cpu) {
unsigned int apicid = apic->cpu_present_to_apicid(cpu);
@@ -369,6 +367,15 @@ static void __init smp_init_package_map(void)
set_cpu_possible(cpu, false);
set_cpu_present(cpu, false);
}
+
+ if (logical_packages > __max_logical_packages) {
+ pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n",
+ logical_packages, __max_logical_packages);
+ logical_packages_frozen = true;
+ __max_logical_packages = logical_packages;
+ }
+
+ pr_info("Max logical packages: %u\n", __max_logical_packages);
}
void __init smp_store_boot_cpu_info(void)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1ef87e887051..78b9cb5a26af 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -22,6 +22,7 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
#include <asm/geode.h>
+#include <asm/apic.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -1249,6 +1250,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)
(unsigned long)tsc_khz / 1000,
(unsigned long)tsc_khz % 1000);
+ /* Inform the TSC deadline clockevent devices about the recalibration */
+ lapic_update_tsc_freq();
+
out:
if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 6c1ff31d99ff..495c776de4b4 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -357,20 +357,22 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
*cursor &= 0xfe;
}
/*
- * Similar treatment for VEX3 prefix.
- * TODO: add XOP/EVEX treatment when insn decoder supports them
+ * Similar treatment for VEX3/EVEX prefix.
+ * TODO: add XOP treatment when insn decoder supports them
*/
- if (insn->vex_prefix.nbytes == 3) {
+ if (insn->vex_prefix.nbytes >= 3) {
/*
* vex2: c5 rvvvvLpp (has no b bit)
* vex3/xop: c4/8f rxbmmmmm wvvvvLpp
* evex: 62 rxbR00mm wvvvv1pp zllBVaaa
- * (evex will need setting of both b and x since
- * in non-sib encoding evex.x is 4th bit of MODRM.rm)
- * Setting VEX3.b (setting because it has inverted meaning):
+ * Setting VEX3.b (setting because it has inverted meaning).
+ * Setting EVEX.x since (in non-SIB encoding) EVEX.x
+ * is the 4th bit of MODRM.rm, and needs the same treatment.
+ * For VEX3-encoded insns, VEX3.x value has no effect in
+ * non-SIB encoding, the change is superfluous but harmless.
*/
cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
- *cursor |= 0x20;
+ *cursor |= 0x60;
}
/*
@@ -415,12 +417,10 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
reg = MODRM_REG(insn); /* Fetch modrm.reg */
reg2 = 0xff; /* Fetch vex.vvvv */
- if (insn->vex_prefix.nbytes == 2)
- reg2 = insn->vex_prefix.bytes[1];
- else if (insn->vex_prefix.nbytes == 3)
+ if (insn->vex_prefix.nbytes)
reg2 = insn->vex_prefix.bytes[2];
/*
- * TODO: add XOP, EXEV vvvv reading.
+ * TODO: add XOP vvvv reading.
*
* vex.vvvv field is in bits 6-3, bits are inverted.
* But in 32-bit mode, high-order bit may be ignored.