summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c11
-rw-r--r--arch/x86/kernel/acpi/sleep.c23
-rw-r--r--arch/x86/kernel/apic/apic.c5
-rw-r--r--arch/x86/kernel/apic/io_apic.c24
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c126
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c15
-rw-r--r--arch/x86/kernel/cpu/bugs.c10
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/cpu.h8
-rw-r--r--arch/x86/kernel/cpu/intel.c59
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c16
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h10
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c15
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c2
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c11
-rw-r--r--arch/x86/kernel/cpu/sgx/sgx.h2
-rw-r--r--arch/x86/kernel/ftrace_32.S5
-rw-r--r--arch/x86/kernel/ftrace_64.S4
-rw-r--r--arch/x86/kernel/head_64.S72
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c2
-rw-r--r--arch/x86/kernel/paravirt.c30
-rw-r--r--arch/x86/kernel/pci-dma.c2
-rw-r--r--arch/x86/kernel/sev.c15
-rw-r--r--arch/x86/kernel/smpboot.c30
-rw-r--r--arch/x86/kernel/x86_init.c2
27 files changed, 290 insertions, 213 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 0dac4ab5b55b..21b542a6866c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1858,13 +1858,18 @@ early_param("acpi_sci", setup_acpi_sci);
int __acpi_acquire_global_lock(unsigned int *lock)
{
- unsigned int old, new;
+ unsigned int old, new, val;
old = READ_ONCE(*lock);
do {
- new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1));
+ val = (old >> 1) & 0x1;
+ new = (old & ~0x3) + 2 + val;
} while (!try_cmpxchg(lock, &old, new));
- return ((new & 0x3) < 3) ? -1 : 0;
+
+ if (val)
+ return 0;
+
+ return -1;
}
int __acpi_release_global_lock(unsigned int *lock)
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 3b7f4cdbf2e0..1328c221af30 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -111,13 +111,26 @@ int x86_acpi_suspend_lowlevel(void)
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
- initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
- early_gdt_descr.address =
- (unsigned long)get_cpu_gdt_rw(smp_processor_id());
- initial_gs = per_cpu_offset(smp_processor_id());
+ /*
+ * As each CPU starts up, it will find its own stack pointer
+ * from its current_task->thread.sp. Typically that will be
+ * the idle thread for a newly-started AP, or even the boot
+ * CPU which will find it set to &init_task in the static
+ * per-cpu data.
+ *
+ * Make the resuming CPU use the temporary stack at startup
+ * by setting current->thread.sp to point to that. The true
+ * %rsp will be restored with the rest of the CPU context,
+ * by do_suspend_lowlevel(). And unwinders don't care about
+ * the abuse of ->thread.sp because it's a dead variable
+ * while the thread is running on the CPU anyway; the true
+ * value is in the actual %rsp register.
+ */
+ current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
+ smpboot_control = smp_processor_id();
#endif
initial_code = (unsigned long)wakeup_long64;
- saved_magic = 0x123456789abcdef0L;
+ saved_magic = 0x123456789abcdef0L;
#endif /* CONFIG_64BIT */
/*
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 20d9a604da7c..770557110051 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -422,10 +422,9 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
if (vector && !eilvt_entry_is_changeable(vector, new))
/* may not change if vectors are different */
return rsvd;
- rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
- } while (rsvd != new);
+ } while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new));
- rsvd &= ~APIC_EILVT_MASKED;
+ rsvd = new & ~APIC_EILVT_MASKED;
if (rsvd && rsvd != vector)
pr_info("LVT offset %d assigned for vector 0x%02x\n",
offset, rsvd);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1f83b052bb74..4241dc243aa8 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -66,6 +66,7 @@
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/pgtable.h>
+#include <asm/x86_init.h>
#define for_each_ioapic(idx) \
for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
@@ -2477,17 +2478,21 @@ static int io_apic_get_redir_entries(int ioapic)
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
+ unsigned int ret;
+
/*
* dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
* gsi_top if ioapic_dynirq_base hasn't been initialized yet.
*/
- if (!ioapic_initialized)
- return gsi_top;
+ ret = ioapic_dynirq_base ? : gsi_top;
+
/*
- * For DT enabled machines ioapic_dynirq_base is irrelevant and not
- * updated. So simply return @from if ioapic_dynirq_base == 0.
+ * For DT enabled machines ioapic_dynirq_base is irrelevant and
+ * always 0. gsi_top can be 0 if there is no IO/APIC registered.
+ * 0 is an invalid interrupt number for dynamic allocations. Return
+ * @from instead.
*/
- return ioapic_dynirq_base ? : from;
+ return ret ? : from;
}
#ifdef CONFIG_X86_32
@@ -2680,10 +2685,15 @@ static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
pgprot_t flags = FIXMAP_PAGE_NOCACHE;
/*
- * Ensure fixmaps for IOAPIC MMIO respect memory encryption pgprot
+ * Ensure fixmaps for IO-APIC MMIO respect memory encryption pgprot
* bits, just like normal ioremap():
*/
- flags = pgprot_decrypted(flags);
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
+ if (x86_platform.hyper.is_private_mmio(phys))
+ flags = pgprot_encrypted(flags);
+ else
+ flags = pgprot_decrypted(flags);
+ }
__set_fixmap(idx, phys, flags);
}
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index e696e22d0531..b2b2b7f3e03f 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -9,11 +9,7 @@
#include "local.h"
-struct cluster_mask {
- unsigned int clusterid;
- int node;
- struct cpumask mask;
-};
+#define apic_cluster(apicid) ((apicid) >> 4)
/*
* __x2apic_send_IPI_mask() possibly needs to read
@@ -23,8 +19,7 @@ struct cluster_mask {
static u32 *x86_cpu_to_logical_apicid __read_mostly;
static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
-static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks);
-static struct cluster_mask *cluster_hotplug_mask;
+static DEFINE_PER_CPU_READ_MOSTLY(struct cpumask *, cluster_masks);
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
@@ -60,10 +55,10 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
/* Collapse cpus in a cluster so a single IPI per cluster is sent */
for_each_cpu(cpu, tmpmsk) {
- struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
+ struct cpumask *cmsk = per_cpu(cluster_masks, cpu);
dest = 0;
- for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
+ for_each_cpu_and(clustercpu, tmpmsk, cmsk)
dest |= x86_cpu_to_logical_apicid[clustercpu];
if (!dest)
@@ -71,7 +66,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
/* Remove cluster CPUs from tmpmask */
- cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
+ cpumask_andnot(tmpmsk, tmpmsk, cmsk);
}
local_irq_restore(flags);
@@ -105,55 +100,98 @@ static u32 x2apic_calc_apicid(unsigned int cpu)
static void init_x2apic_ldr(void)
{
- struct cluster_mask *cmsk = this_cpu_read(cluster_masks);
- u32 cluster, apicid = apic_read(APIC_LDR);
- unsigned int cpu;
+ struct cpumask *cmsk = this_cpu_read(cluster_masks);
- x86_cpu_to_logical_apicid[smp_processor_id()] = apicid;
+ BUG_ON(!cmsk);
- if (cmsk)
- goto update;
-
- cluster = apicid >> 16;
- for_each_online_cpu(cpu) {
- cmsk = per_cpu(cluster_masks, cpu);
- /* Matching cluster found. Link and update it. */
- if (cmsk && cmsk->clusterid == cluster)
- goto update;
+ cpumask_set_cpu(smp_processor_id(), cmsk);
+}
+
+/*
+ * As an optimisation during boot, set the cluster_mask for all present
+ * CPUs at once, to prevent each of them having to iterate over the others
+ * to find the existing cluster_mask.
+ */
+static void prefill_clustermask(struct cpumask *cmsk, unsigned int cpu, u32 cluster)
+{
+ int cpu_i;
+
+ for_each_present_cpu(cpu_i) {
+ struct cpumask **cpu_cmsk = &per_cpu(cluster_masks, cpu_i);
+ u32 apicid = apic->cpu_present_to_apicid(cpu_i);
+
+ if (apicid == BAD_APICID || cpu_i == cpu || apic_cluster(apicid) != cluster)
+ continue;
+
+ if (WARN_ON_ONCE(*cpu_cmsk == cmsk))
+ continue;
+
+ BUG_ON(*cpu_cmsk);
+ *cpu_cmsk = cmsk;
}
- cmsk = cluster_hotplug_mask;
- cmsk->clusterid = cluster;
- cluster_hotplug_mask = NULL;
-update:
- this_cpu_write(cluster_masks, cmsk);
- cpumask_set_cpu(smp_processor_id(), &cmsk->mask);
}
-static int alloc_clustermask(unsigned int cpu, int node)
+static int alloc_clustermask(unsigned int cpu, u32 cluster, int node)
{
+ struct cpumask *cmsk = NULL;
+ unsigned int cpu_i;
+
+ /*
+ * At boot time, the CPU present mask is stable. The cluster mask is
+ * allocated for the first CPU in the cluster and propagated to all
+ * present siblings in the cluster. If the cluster mask is already set
+ * on entry to this function for a given CPU, there is nothing to do.
+ */
if (per_cpu(cluster_masks, cpu))
return 0;
+
+ if (system_state < SYSTEM_RUNNING)
+ goto alloc;
+
/*
- * If a hotplug spare mask exists, check whether it's on the right
- * node. If not, free it and allocate a new one.
+ * On post boot hotplug for a CPU which was not present at boot time,
+ * iterate over all possible CPUs (even those which are not present
+ * any more) to find any existing cluster mask.
*/
- if (cluster_hotplug_mask) {
- if (cluster_hotplug_mask->node == node)
- return 0;
- kfree(cluster_hotplug_mask);
+ for_each_possible_cpu(cpu_i) {
+ u32 apicid = apic->cpu_present_to_apicid(cpu_i);
+
+ if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) {
+ cmsk = per_cpu(cluster_masks, cpu_i);
+ /*
+ * If the cluster is already initialized, just store
+ * the mask and return. There's no need to propagate.
+ */
+ if (cmsk) {
+ per_cpu(cluster_masks, cpu) = cmsk;
+ return 0;
+ }
+ }
}
-
- cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask),
- GFP_KERNEL, node);
- if (!cluster_hotplug_mask)
+ /*
+ * No CPU in the cluster has ever been initialized, so fall through to
+ * the boot time code which will also populate the cluster mask for any
+ * other CPU in the cluster which is (now) present.
+ */
+alloc:
+ cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node);
+ if (!cmsk)
return -ENOMEM;
- cluster_hotplug_mask->node = node;
+ per_cpu(cluster_masks, cpu) = cmsk;
+ prefill_clustermask(cmsk, cpu, cluster);
+
return 0;
}
static int x2apic_prepare_cpu(unsigned int cpu)
{
- if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0)
+ u32 phys_apicid = apic->cpu_present_to_apicid(cpu);
+ u32 cluster = apic_cluster(phys_apicid);
+ u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf));
+
+ x86_cpu_to_logical_apicid[cpu] = logical_apicid;
+
+ if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0)
return -ENOMEM;
if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL))
return -ENOMEM;
@@ -162,10 +200,10 @@ static int x2apic_prepare_cpu(unsigned int cpu)
static int x2apic_dead_cpu(unsigned int dead_cpu)
{
- struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
+ struct cpumask *cmsk = per_cpu(cluster_masks, dead_cpu);
if (cmsk)
- cpumask_clear_cpu(dead_cpu, &cmsk->mask);
+ cpumask_clear_cpu(dead_cpu, cmsk);
free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
return 0;
}
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 283dcd2f62c8..dc3576303f1a 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -115,6 +115,7 @@ static void __used common(void)
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
+ OFFSET(X86_current_task, pcpu_hot, current_task);
#ifdef CONFIG_CALL_DEPTH_TRACKING
OFFSET(X86_call_depth, pcpu_hot, call_depth);
#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 95cdd08c4cbb..571abf808ea3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -929,6 +929,10 @@ static void init_amd(struct cpuinfo_x86 *c)
if (c->x86 >= 0x10)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+ /* AMD FSRM also implies FSRS */
+ if (cpu_has(c, X86_FEATURE_FSRM))
+ set_cpu_cap(c, X86_FEATURE_FSRS);
+
/* get apicid instead of initial apic id from cpuid */
c->apicid = hard_smp_processor_id();
@@ -1005,6 +1009,17 @@ static void init_amd(struct cpuinfo_x86 *c)
msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
check_null_seg_clears_base(c);
+
+ /*
+ * Make sure EFER[AIBRSE - Automatic IBRS Enable] is set. The APs are brought up
+ * using the trampoline code and as part of it, MSR_EFER gets prepared there in
+ * order to be replicated onto them. Regardless, set it here again, if not set,
+ * to protect against any future refactoring/code reorganization which might
+ * miss setting this important bit.
+ */
+ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
+ cpu_has(c, X86_FEATURE_AUTOIBRS))
+ WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f9d060e71c3e..182af64387d0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -784,8 +784,7 @@ static int __init nospectre_v1_cmdline(char *str)
}
early_param("nospectre_v1", nospectre_v1_cmdline);
-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
- SPECTRE_V2_NONE;
+enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE;
#undef pr_fmt
#define pr_fmt(fmt) "RETBleed: " fmt
@@ -1133,13 +1132,6 @@ spectre_v2_parse_user_cmdline(void)
return SPECTRE_V2_USER_CMD_AUTO;
}
-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
-{
- return mode == SPECTRE_V2_EIBRS ||
- mode == SPECTRE_V2_EIBRS_RETPOLINE ||
- mode == SPECTRE_V2_EIBRS_LFENCE;
-}
-
static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
{
return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8cd4126d8253..80710a68ef7d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -121,6 +121,7 @@ static const struct x86_cpu_id ppin_cpuids[] = {
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &ppin_info[X86_VENDOR_INTEL]),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]),
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 57a5349e6954..f97b0fe13da8 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -83,4 +83,12 @@ unsigned int aperfmperf_get_khz(int cpu);
extern void x86_spec_ctrl_setup_ap(void);
extern void update_srbds_msr(void);
+extern enum spectre_v2_mitigation spectre_v2_enabled;
+
+static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+{
+ return mode == SPECTRE_V2_EIBRS ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_EIBRS_LFENCE;
+}
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 291d4167fab8..1c648b09e053 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1451,31 +1451,13 @@ void handle_bus_lock(struct pt_regs *regs)
}
/*
- * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should
- * only be trusted if it is confirmed that a CPU model implements a
- * specific feature at a particular bit position.
- *
- * The possible driver data field values:
- *
- * - 0: CPU models that are known to have the per-core split-lock detection
- * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
- *
- * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use
- * bit 5 to enumerate the per-core split-lock detection feature.
+ * CPU models that are known to have the per-core split-lock detection
+ * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
*/
static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1),
- X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1),
- X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1),
- X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
- X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, 1),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0),
{}
};
@@ -1487,24 +1469,27 @@ static void __init split_lock_setup(struct cpuinfo_x86 *c)
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
+ /* Check for CPUs that have support but do not enumerate it: */
m = x86_match_cpu(split_lock_cpu_ids);
- if (!m)
- return;
+ if (m)
+ goto supported;
- switch (m->driver_data) {
- case 0:
- break;
- case 1:
- if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
- return;
- rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
- if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT))
- return;
- break;
- default:
+ if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES))
return;
- }
+ /*
+ * Not all bits in MSR_IA32_CORE_CAPS are architectural, but
+ * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set
+ * it have split lock detection.
+ */
+ rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
+ if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
+ goto supported;
+
+ /* CPU is not in the model list and does not have the MSR bit: */
+ return;
+
+supported:
cpu_model_supports_sld = true;
__split_lock_setup();
}
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 23c5072fbbb7..0b971f974096 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -235,10 +235,10 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
* A list of the banks enabled on each logical CPU. Controls which respective
* descriptors to initialize later in mce_threshold_create_device().
*/
-static DEFINE_PER_CPU(unsigned int, bank_map);
+static DEFINE_PER_CPU(u64, bank_map);
/* Map of banks that have more than MCA_MISC0 available. */
-static DEFINE_PER_CPU(u32, smca_misc_banks_map);
+static DEFINE_PER_CPU(u64, smca_misc_banks_map);
static void amd_threshold_interrupt(void);
static void amd_deferred_error_interrupt(void);
@@ -267,7 +267,7 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
return;
if (low & MASK_BLKPTR_LO)
- per_cpu(smca_misc_banks_map, cpu) |= BIT(bank);
+ per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank);
}
@@ -530,7 +530,7 @@ static u32 smca_get_block_address(unsigned int bank, unsigned int block,
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);
- if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank)))
+ if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank)))
return 0;
return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
@@ -574,7 +574,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
int new;
if (!block)
- per_cpu(bank_map, cpu) |= (1 << bank);
+ per_cpu(bank_map, cpu) |= BIT_ULL(bank);
memset(&b, 0, sizeof(b));
b.cpu = cpu;
@@ -878,7 +878,7 @@ static void amd_threshold_interrupt(void)
return;
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
- if (!(per_cpu(bank_map, cpu) & (1 << bank)))
+ if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank)))
continue;
first_block = bp[bank]->blocks;
@@ -1029,7 +1029,7 @@ static const struct sysfs_ops threshold_ops = {
static void threshold_block_release(struct kobject *kobj);
-static struct kobj_type threshold_ktype = {
+static const struct kobj_type threshold_ktype = {
.sysfs_ops = &threshold_ops,
.default_groups = default_groups,
.release = threshold_block_release,
@@ -1356,7 +1356,7 @@ int mce_threshold_create_device(unsigned int cpu)
return -ENOMEM;
for (bank = 0; bank < numbanks; ++bank) {
- if (!(this_cpu_read(bank_map) & (1 << bank)))
+ if (!(this_cpu_read(bank_map) & BIT_ULL(bank)))
continue;
err = threshold_create_bank(bp, cpu, bank);
if (err) {
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 91a415553c27..d2412ce2d312 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -244,11 +244,11 @@ noinstr void pentium_machine_check(struct pt_regs *regs);
noinstr void winchip_machine_check(struct pt_regs *regs);
static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
#else
-static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
-static inline void enable_p5_mce(void) {}
-static inline void pentium_machine_check(struct pt_regs *regs) {}
-static inline void winchip_machine_check(struct pt_regs *regs) {}
+static __always_inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
+static __always_inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
+static __always_inline void enable_p5_mce(void) {}
+static __always_inline void pentium_machine_check(struct pt_regs *regs) {}
+static __always_inline void winchip_machine_check(struct pt_regs *regs) {}
#endif
noinstr u64 mce_rdmsrl(u32 msr);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 9eb457b10341..f5fdeb1e3606 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -61,7 +61,7 @@ static u8 amd_ucode_patch[MAX_NUMNODES][PATCH_MAX_SIZE];
/*
* Microcode patch container file is prepended to the initrd in cpio
- * format. See Documentation/x86/microcode.rst
+ * format. See Documentation/arch/x86/microcode.rst
*/
static const char
ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin";
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f1197366a97d..315fc358e584 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -33,7 +33,6 @@
#include <asm/nmi.h>
#include <clocksource/hyperv_timer.h>
#include <asm/numa.h>
-#include <asm/coco.h>
/* Is Linux running as the root partition? */
bool hv_root_partition;
@@ -401,8 +400,10 @@ static void __init ms_hyperv_init_platform(void)
if (ms_hyperv.priv_high & HV_ISOLATION) {
ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG);
ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG);
- ms_hyperv.shared_gpa_boundary =
- BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
+
+ if (ms_hyperv.shared_gpa_boundary_active)
+ ms_hyperv.shared_gpa_boundary =
+ BIT_ULL(ms_hyperv.shared_gpa_boundary_bits);
pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n",
ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
@@ -413,11 +414,6 @@ static void __init ms_hyperv_init_platform(void)
swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary;
#endif
}
- /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
- if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE)
- cc_set_vendor(CC_VENDOR_HYPERV);
- }
}
if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
@@ -486,6 +482,9 @@ static void __init ms_hyperv_init_platform(void)
i8253_clear_counter_on_shutdown = false;
#if IS_ENABLED(CONFIG_HYPERV)
+ if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
+ (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP))
+ hv_vtom_init();
/*
* Setup the hook to get control post apic initialization.
*/
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 7fe51488e136..0e7b6afe2fa6 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -76,7 +76,7 @@ unsigned int resctrl_rmid_realloc_limit;
#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5))
/*
- * The correction factor table is documented in Documentation/x86/resctrl.rst.
+ * The correction factor table is documented in Documentation/arch/x86/resctrl.rst.
* If rmid > rmid threshold, MBM total and local values should be multiplied
* by the correction factor.
*
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index e5a37b6e9aa5..166692f2d501 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -892,20 +892,19 @@ static struct miscdevice sgx_dev_provision = {
int sgx_set_attribute(unsigned long *allowed_attributes,
unsigned int attribute_fd)
{
- struct file *file;
+ struct fd f = fdget(attribute_fd);
- file = fget(attribute_fd);
- if (!file)
+ if (!f.file)
return -EINVAL;
- if (file->f_op != &sgx_provision_fops) {
- fput(file);
+ if (f.file->f_op != &sgx_provision_fops) {
+ fdput(f);
return -EINVAL;
}
*allowed_attributes |= SGX_ATTR_PROVISIONKEY;
- fput(file);
+ fdput(f);
return 0;
}
EXPORT_SYMBOL_GPL(sgx_set_attribute);
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 0f2020653fba..d2dad21259a8 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -15,7 +15,7 @@
#define EREMOVE_ERROR_MESSAGE \
"EREMOVE returned %d (0x%x) and an EPC page was leaked. SGX may become unusable. " \
- "Refer to Documentation/x86/sgx.rst for more information."
+ "Refer to Documentation/arch/x86/sgx.rst for more information."
#define SGX_MAX_EPC_SECTIONS 8
#define SGX_EEXTEND_BLOCK_SIZE 256
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index a0ed0e4a2c0c..0d9a14528176 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -163,6 +163,11 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
jmp .Lftrace_ret
SYM_CODE_END(ftrace_regs_caller)
+SYM_FUNC_START(ftrace_stub_direct_tramp)
+ CALL_DEPTH_ACCOUNT
+ RET
+SYM_FUNC_END(ftrace_stub_direct_tramp)
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
SYM_CODE_START(ftrace_graph_caller)
pushl %eax
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index fb4f1e01b64a..970d8445fdc4 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -309,6 +309,10 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
SYM_FUNC_END(ftrace_regs_caller)
STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
+SYM_FUNC_START(ftrace_stub_direct_tramp)
+ CALL_DEPTH_ACCOUNT
+ RET
+SYM_FUNC_END(ftrace_stub_direct_tramp)
#else /* ! CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 222efd4a09bc..6a8238702eab 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -61,23 +61,15 @@ SYM_CODE_START_NOALIGN(startup_64)
* tables and then reload them.
*/
- /* Set up the stack for verify_cpu(), similar to initial_stack below */
- leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp
+ /* Set up the stack for verify_cpu() */
+ leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
leaq _text(%rip), %rdi
- /*
- * initial_gs points to initial fixed_percpu_data struct with storage for
- * the stack protector canary. Global pointer fixups are needed at this
- * stage, so apply them as is done in fixup_pointer(), and initialize %gs
- * such that the canary can be accessed at %gs:40 for subsequent C calls.
- */
+ /* Setup GSBASE to allow stack canary access for C code */
movl $MSR_GS_BASE, %ecx
- movq initial_gs(%rip), %rax
- movq $_text, %rdx
- subq %rdx, %rax
- addq %rdi, %rax
- movq %rax, %rdx
+ leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
+ movl %edx, %eax
shrq $32, %rdx
wrmsr
@@ -241,13 +233,36 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR // above
+#ifdef CONFIG_SMP
+ movl smpboot_control(%rip), %ecx
+
+ /* Get the per cpu offset for the given CPU# which is in ECX */
+ movq __per_cpu_offset(,%rcx,8), %rdx
+#else
+ xorl %edx, %edx /* zero-extended to clear all of RDX */
+#endif /* CONFIG_SMP */
+
+ /*
+ * Setup a boot time stack - Any secondary CPU will have lost its stack
+ * by now because the cr3-switch above unmaps the real-mode stack.
+ *
+ * RDX contains the per-cpu offset
+ */
+ movq pcpu_hot + X86_current_task(%rdx), %rax
+ movq TASK_threadsp(%rax), %rsp
+
/*
* We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace
* addresses where we're currently running on. We have to do that here
* because in 32bit we couldn't load a 64bit linear address.
*/
- lgdt early_gdt_descr(%rip)
+ subq $16, %rsp
+ movw $(GDT_SIZE-1), (%rsp)
+ leaq gdt_page(%rdx), %rax
+ movq %rax, 2(%rsp)
+ lgdt (%rsp)
+ addq $16, %rsp
/* set up data segments */
xorl %eax,%eax
@@ -271,16 +286,13 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
- movl initial_gs(%rip),%eax
- movl initial_gs+4(%rip),%edx
+#ifndef CONFIG_SMP
+ leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
+#endif
+ movl %edx, %eax
+ shrq $32, %rdx
wrmsr
- /*
- * Setup a boot time stack - Any secondary CPU will have lost its stack
- * by now because the cr3-switch above unmaps the real-mode stack
- */
- movq initial_stack(%rip), %rsp
-
/* Setup and Load IDT */
pushq %rsi
call early_setup_idt
@@ -372,7 +384,11 @@ SYM_CODE_END(secondary_startup_64)
SYM_CODE_START(start_cpu0)
ANNOTATE_NOENDBR
UNWIND_HINT_EMPTY
- movq initial_stack(%rip), %rsp
+
+ /* Find the idle task stack */
+ movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx
+ movq TASK_threadsp(%rcx), %rsp
+
jmp .Ljump_to_C_code
SYM_CODE_END(start_cpu0)
#endif
@@ -416,16 +432,9 @@ SYM_CODE_END(vc_boot_ghcb)
__REFDATA
.balign 8
SYM_DATA(initial_code, .quad x86_64_start_kernel)
-SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data))
#ifdef CONFIG_AMD_MEM_ENCRYPT
SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
#endif
-
-/*
- * The FRAME_SIZE gap is a convention which helps the in-kernel unwinder
- * reliably detect the end of the stack.
- */
-SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE)
__FINITDATA
__INIT
@@ -657,8 +666,7 @@ SYM_DATA_END(level1_fixmap_pgt)
.data
.align 16
-SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
-SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))
+SYM_DATA(smpboot_control, .long 0)
.align 16
/* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 6b58610a1552..a61c12c01270 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -476,7 +476,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
efi_map_offset = params_cmdline_sz;
efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16);
- /* Copy setup header onto bootparams. Documentation/x86/boot.rst */
+ /* Copy setup header onto bootparams. Documentation/arch/x86/boot.rst */
setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
/* Is there a limit on setup header size? */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 42e182868873..ac10b46c5832 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -64,11 +64,11 @@ static unsigned paravirt_patch_call(void *insn_buff, const void *target,
}
#ifdef CONFIG_PARAVIRT_XXL
-/* identity function, which can be inlined */
-u64 notrace _paravirt_ident_64(u64 x)
-{
- return x;
-}
+DEFINE_PARAVIRT_ASM(_paravirt_ident_64, "mov %rdi, %rax", .text);
+DEFINE_PARAVIRT_ASM(pv_native_save_fl, "pushf; pop %rax", .noinstr.text);
+DEFINE_PARAVIRT_ASM(pv_native_irq_disable, "cli", .noinstr.text);
+DEFINE_PARAVIRT_ASM(pv_native_irq_enable, "sti", .noinstr.text);
+DEFINE_PARAVIRT_ASM(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text);
#endif
DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
@@ -197,11 +197,6 @@ void paravirt_end_context_switch(struct task_struct *next)
arch_enter_lazy_mmu_mode();
}
-static noinstr unsigned long pv_native_read_cr2(void)
-{
- return native_read_cr2();
-}
-
static noinstr void pv_native_write_cr2(unsigned long val)
{
native_write_cr2(val);
@@ -222,16 +217,6 @@ noinstr void pv_native_wbinvd(void)
native_wbinvd();
}
-static noinstr void pv_native_irq_enable(void)
-{
- native_irq_enable();
-}
-
-static noinstr void pv_native_irq_disable(void)
-{
- native_irq_disable();
-}
-
static noinstr void pv_native_safe_halt(void)
{
native_safe_halt();
@@ -298,7 +283,7 @@ struct paravirt_patch_template pv_ops = {
.cpu.end_context_switch = paravirt_nop,
/* Irq ops. */
- .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
+ .irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl),
.irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
.irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
.irq.safe_halt = pv_native_safe_halt,
@@ -363,8 +348,7 @@ struct paravirt_patch_template pv_ops = {
.mmu.make_pte = PTE_IDENT,
.mmu.make_pgd = PTE_IDENT,
- .mmu.dup_mmap = paravirt_nop,
- .mmu.activate_mm = paravirt_nop,
+ .mmu.enter_mmap = paravirt_nop,
.mmu.lazy_mode = {
.enter = paravirt_nop,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 30bbe4abb5d6..de6be0a3965e 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -124,7 +124,7 @@ void __init pci_iommu_alloc(void)
}
/*
- * See <Documentation/x86/x86_64/boot-options.rst> for the iommu kernel
+ * See <Documentation/arch/x86/x86_64/boot-options.rst> for the iommu kernel
* parameter documentation.
*/
static __init int iommu_setup(char *p)
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 3f664ab277c4..b031244d6d2d 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -22,6 +22,8 @@
#include <linux/efi.h>
#include <linux/platform_device.h>
#include <linux/io.h>
+#include <linux/psp-sev.h>
+#include <uapi/linux/sev-guest.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
@@ -2175,7 +2177,7 @@ static int __init init_sev_config(char *str)
}
__setup("sev=", init_sev_config);
-int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err)
+int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
{
struct ghcb_state state;
struct es_em_ctxt ctxt;
@@ -2183,8 +2185,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
struct ghcb *ghcb;
int ret;
- if (!fw_err)
- return -EINVAL;
+ rio->exitinfo2 = SEV_RET_NO_FW_CALL;
/*
* __sev_get_ghcb() needs to run with IRQs disabled because it is using
@@ -2209,16 +2210,16 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
if (ret)
goto e_put;
- *fw_err = ghcb->save.sw_exit_info_2;
- switch (*fw_err) {
+ rio->exitinfo2 = ghcb->save.sw_exit_info_2;
+ switch (rio->exitinfo2) {
case 0:
break;
- case SNP_GUEST_REQ_ERR_BUSY:
+ case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY):
ret = -EAGAIN;
break;
- case SNP_GUEST_REQ_INVALID_LEN:
+ case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN):
/* Number of expected pages are returned in RBX */
if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
input->data_npages = ghcb_get_rbx(ghcb);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9013bb28255a..851477f7d728 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -121,17 +121,20 @@ int arch_update_cpu_topology(void)
return retval;
}
+
+static unsigned int smpboot_warm_reset_vector_count;
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
spin_lock_irqsave(&rtc_lock, flags);
- CMOS_WRITE(0xa, 0xf);
+ if (!smpboot_warm_reset_vector_count++) {
+ CMOS_WRITE(0xa, 0xf);
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4;
+ *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf;
+ }
spin_unlock_irqrestore(&rtc_lock, flags);
- *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
- start_eip >> 4;
- *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
- start_eip & 0xf;
}
static inline void smpboot_restore_warm_reset_vector(void)
@@ -143,10 +146,12 @@ static inline void smpboot_restore_warm_reset_vector(void)
* to default values.
*/
spin_lock_irqsave(&rtc_lock, flags);
- CMOS_WRITE(0, 0xf);
+ if (!--smpboot_warm_reset_vector_count) {
+ CMOS_WRITE(0, 0xf);
+ *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
+ }
spin_unlock_irqrestore(&rtc_lock, flags);
- *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
/*
@@ -1059,8 +1064,6 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle);
-#else
- initial_gs = per_cpu_offset(cpu);
#endif
return 0;
}
@@ -1086,9 +1089,14 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
start_ip = real_mode_header->trampoline_start64;
#endif
idle->thread.sp = (unsigned long)task_pt_regs(idle);
- early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_code = (unsigned long)start_secondary;
- initial_stack = idle->thread.sp;
+
+ if (IS_ENABLED(CONFIG_X86_32)) {
+ early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
+ initial_stack = idle->thread.sp;
+ } else {
+ smpboot_control = cpu;
+ }
/* Enable the espfix hack for this CPU */
init_espfix_ap(cpu);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 10622cf2b30f..ecdeb0974a87 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -134,6 +134,7 @@ static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool
static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; }
static bool enc_tlb_flush_required_noop(bool enc) { return false; }
static bool enc_cache_flush_required_noop(void) { return false; }
+static bool is_private_mmio_noop(u64 addr) {return false; }
struct x86_platform_ops x86_platform __ro_after_init = {
.calibrate_cpu = native_calibrate_cpu_early,
@@ -149,6 +150,7 @@ struct x86_platform_ops x86_platform __ro_after_init = {
.realmode_reserve = reserve_real_mode,
.realmode_init = init_real_mode,
.hyper.pin_vcpu = x86_op_int_noop,
+ .hyper.is_private_mmio = is_private_mmio_noop,
.guest = {
.enc_status_change_prepare = enc_status_change_prepare_noop,