50 files changed, 789 insertions, 719 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1720eabc31ec..57a14206ad8f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -283,7 +283,6 @@ config X86
 	select RTC_LIB
 	select RTC_MC146818_LIB
 	select SPARSE_IRQ
-	select SRCU
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
 	select TRACE_IRQFLAGS_SUPPORT
@@ -1938,7 +1937,6 @@ config X86_SGX
 	depends on X86_64 && CPU_SUP_INTEL && X86_X2APIC
 	depends on CRYPTO=y
 	depends on CRYPTO_SHA256=y
-	select SRCU
 	select MMU_NOTIFIER
 	select NUMA_KEEP_MEMINFO if NUMA
 	select XARRAY_MULTI
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
index b70559b821df..2106a2bd152b 100644
--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -3,9 +3,14 @@ core-y += arch/x86/crypto/
 
 #
 # Disable SSE and other FP/SIMD instructions to match normal x86
+# This is required to work around issues in older LLVM versions, but breaks
+# GCC versions < 11. See:
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652
 #
+ifeq ($(CONFIG_CC_IS_CLANG),y)
 KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
 KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
+endif
 
 ifeq ($(CONFIG_X86_32),y)
 START := 0x8048000
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 8c45b198b62f..bccea57dee81 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -923,6 +923,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
 
 		/* Event overflow */
 		handled++;
+		status &= ~mask;
 		perf_sample_data_init(&data, 0, hwc->last_period);
 
 		if (!x86_perf_event_set_period(event))
@@ -933,8 +934,6 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
 
 		if (perf_event_overflow(event, &data, regs))
 			x86_pmu_stop(event, 0);
-
-		status &= ~mask;
 	}
 
 	/*
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index e2975a32d443..d7da28fada87 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -8,7 +8,7 @@
 
 #define ALT_FLAGS_SHIFT		16
 
-#define ALT_FLAG_NOT		BIT(0)
+#define ALT_FLAG_NOT		(1 << 0)
 #define ALT_NOT(feature)	((ALT_FLAG_NOT << ALT_FLAGS_SHIFT) | (feature))
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index cbaf174d8efd..b3af2d45bbbb 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -125,6 +125,8 @@
 
 #define INTEL_FAM6_LUNARLAKE_M		0xBD
 
+#define INTEL_FAM6_ARROWLAKE		0xC6
+
 /* "Small Core" Processors (Atom/E-Core) */
 
 #define INTEL_FAM6_ATOM_BONNELL		0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 52788f79786f..255a78d9d906 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -49,7 +49,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  *   simple as possible.
  * Must be called with preemption disabled.
  */
-static void __resctrl_sched_in(void)
+static inline void __resctrl_sched_in(struct task_struct *tsk)
 {
 	struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
 	u32 closid = state->default_closid;
@@ -61,13 +61,13 @@ static void __resctrl_sched_in(void)
 	 * Else use the closid/rmid assigned to this cpu.
 	 */
 	if (static_branch_likely(&rdt_alloc_enable_key)) {
-		tmp = READ_ONCE(current->closid);
+		tmp = READ_ONCE(tsk->closid);
 		if (tmp)
 			closid = tmp;
 	}
 
 	if (static_branch_likely(&rdt_mon_enable_key)) {
-		tmp = READ_ONCE(current->rmid);
+		tmp = READ_ONCE(tsk->rmid);
 		if (tmp)
 			rmid = tmp;
 	}
@@ -88,17 +88,17 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
 	return val * scale;
 }
 
-static inline void resctrl_sched_in(void)
+static inline void resctrl_sched_in(struct task_struct *tsk)
 {
 	if (static_branch_likely(&rdt_enable_key))
-		__resctrl_sched_in();
+		__resctrl_sched_in(tsk);
 }
 
 void resctrl_cpu_detect(struct cpuinfo_x86 *c);
 
 #else
 
-static inline void resctrl_sched_in(void) {}
+static inline void resctrl_sched_in(struct task_struct *tsk) {}
 static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
 
 #endif /* CONFIG_X86_CPU_RESCTRL */
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index b8357d6ecd47..b63be696b776 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -128,8 +128,9 @@ struct snp_psc_desc {
 	struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY];
 } __packed;
 
-/* Guest message request error code */
+/* Guest message request error codes */
 #define SNP_GUEST_REQ_INVALID_LEN	BIT_ULL(32)
+#define SNP_GUEST_REQ_ERR_BUSY		BIT_ULL(33)
 
 #define GHCB_MSR_TERM_REQ		0x100
 #define GHCB_MSR_TERM_REASON_SET_POS	12
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index cb1ee53ad3b1..770dcf75eaa9 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -261,20 +261,22 @@ enum avic_ipi_failure_cause {
 	AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
 };
 
-#define AVIC_PHYSICAL_MAX_INDEX_MASK	GENMASK_ULL(9, 0)
+#define AVIC_PHYSICAL_MAX_INDEX_MASK	GENMASK_ULL(8, 0)
 
 /*
- * For AVIC, the max index allowed for physical APIC ID
- * table is 0xff (255).
+ * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
+ * 0xff is a broadcast to all CPUs, i.e. can't be targeted individually.
  */
 #define AVIC_MAX_PHYSICAL_ID		0XFEULL
 
 /*
- * For x2AVIC, the max index allowed for physical APIC ID
- * table is 0x1ff (511).
+ * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
  */
 #define X2AVIC_MAX_PHYSICAL_ID		0x1FFUL
 
+static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
+
 #define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)
 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
 
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index d13d71af5cf6..0a49a8de9f3c 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -18,32 +18,26 @@
 
 /* Handles exceptions in both to and from, but doesn't do access_ok */
 __must_check unsigned long
-copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
-__must_check unsigned long
-copy_user_generic_string(void *to, const void *from, unsigned len);
-__must_check unsigned long
-copy_user_generic_unrolled(void *to, const void *from, unsigned len);
+rep_movs_alternative(void *to, const void *from, unsigned len);
 
 static __always_inline __must_check unsigned long
-copy_user_generic(void *to, const void *from, unsigned len)
+copy_user_generic(void *to, const void *from, unsigned long len)
 {
-	unsigned ret;
-
+	stac();
 	/*
-	 * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
-	 * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
-	 * Otherwise, use copy_user_generic_unrolled.
+	 * If CPU has FSRM feature, use 'rep movs'.
+	 * Otherwise, use rep_movs_alternative.
 	 */
-	alternative_call_2(copy_user_generic_unrolled,
-			 copy_user_generic_string,
-			 X86_FEATURE_REP_GOOD,
-			 copy_user_enhanced_fast_string,
-			 X86_FEATURE_ERMS,
-			 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
-				     "=d" (len)),
-			 "1" (to), "2" (from), "3" (len)
-			 : "memory", "rcx", "r8", "r9", "r10", "r11");
-	return ret;
+	asm volatile(
+		"1:\n\t"
+		ALTERNATIVE("rep movsb",
+			    "call rep_movs_alternative", ALT_NOT(X86_FEATURE_FSRM))
+		"2:\n"
+		_ASM_EXTABLE_UA(1b, 2b)
+		:"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
+		: : "memory", "rax", "r8", "r9", "r10", "r11");
+	clac();
+	return len;
 }
 
 static __always_inline __must_check unsigned long
@@ -58,9 +52,7 @@ raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
 	return copy_user_generic((__force void *)dst, src, size);
 }
 
-extern long __copy_user_nocache(void *dst, const void __user *src,
-				unsigned size, int zerorest);
-
+extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size);
 extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
 extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
 			   size_t len);
@@ -69,8 +61,12 @@ static inline int
 __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
 				  unsigned size)
 {
+	long ret;
 	kasan_check_write(dst, size);
-	return __copy_user_nocache(dst, src, size, 0);
+	stac();
+	ret = __copy_user_nocache(dst, src, size);
+	clac();
+	return ret;
 }
 
 static inline int
@@ -85,11 +81,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
  */
 
 __must_check unsigned long
-clear_user_original(void __user *addr, unsigned long len);
-__must_check unsigned long
-clear_user_rep_good(void __user *addr, unsigned long len);
-__must_check unsigned long
-clear_user_erms(void __user *addr, unsigned long len);
+rep_stos_alternative(void __user *addr, unsigned long len);
 
 static __always_inline __must_check unsigned long __clear_user(void __user *addr, unsigned long size)
 {
@@ -102,16 +94,12 @@ static __always_inline __must_check unsigned long __clear_user(void __user *addr
 	 */
 	asm volatile(
 		"1:\n\t"
-		ALTERNATIVE_3("rep stosb",
-			      "call clear_user_erms",	  ALT_NOT(X86_FEATURE_FSRM),
-			      "call clear_user_rep_good", ALT_NOT(X86_FEATURE_ERMS),
-			      "call clear_user_original", ALT_NOT(X86_FEATURE_REP_GOOD))
+		ALTERNATIVE("rep stosb",
+			    "call rep_stos_alternative", ALT_NOT(X86_FEATURE_FSRS))
 		"2:\n"
 	       _ASM_EXTABLE_UA(1b, 2b)
 	       : "+c" (size), "+D" (addr), ASM_CALL_CONSTRAINT
-	       : "a" (0)
-		/* rep_good clobbers %rdx */
-	       : "rdx");
+	       : "a" (0));
 
 	clac();
 
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index 6daa9b0c8d11..a3c29b1496c8 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -89,11 +89,21 @@
  * Sub-leaf 2: EAX: host tsc frequency in kHz
  */
 
+#define XEN_CPUID_TSC_EMULATED               (1u << 0)
+#define XEN_CPUID_HOST_TSC_RELIABLE          (1u << 1)
+#define XEN_CPUID_RDTSCP_INSTR_AVAIL         (1u << 2)
+
+#define XEN_CPUID_TSC_MODE_DEFAULT           (0)
+#define XEN_CPUID_TSC_MODE_ALWAYS_EMULATE    (1u)
+#define XEN_CPUID_TSC_MODE_NEVER_EMULATE     (2u)
+#define XEN_CPUID_TSC_MODE_PVRDTSCP          (3u)
+
 /*
  * Leaf 5 (0x40000x04)
  * HVM-specific features
  * Sub-leaf 0: EAX: Features
  * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
+ * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag)
  */
 #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */
 #define XEN_HVM_CPUID_X2APIC_VIRT      (1u << 1) /* Virtualized x2APIC accesses */
@@ -102,12 +112,16 @@
 #define XEN_HVM_CPUID_VCPU_ID_PRESENT  (1u << 3) /* vcpu id is present in EBX */
 #define XEN_HVM_CPUID_DOMID_PRESENT    (1u << 4) /* domid is present in ECX */
 /*
- * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be
- * used to store high bits for the Destination ID. This expands the Destination
- * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ * With interrupt format set to 0 (non-remappable) bits 55:49 from the
+ * IO-APIC RTE and bits 11:5 from the MSI address can be used to store
+ * high bits for the Destination ID. This expands the Destination ID
+ * field from 8 to 15 bits, allowing to target APIC IDs up 32768.
  */
 #define XEN_HVM_CPUID_EXT_DEST_ID      (1u << 5)
-/* Per-vCPU event channel upcalls */
+/*
+ * Per-vCPU event channel upcalls work correctly with physical IRQs
+ * bound to event channels.
+ */
 #define XEN_HVM_CPUID_UPCALL_VECTOR    (1u << 6)
 
 /*
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 1c38174b5f01..0dac4ab5b55b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -146,7 +146,11 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
 
 		pr_debug("Local APIC address 0x%08x\n", madt->address);
 	}
-	if (madt->header.revision >= 5)
+
+	/* ACPI 6.3 and newer support the online capable bit. */
+	if (acpi_gbl_FADT.header.revision > 6 ||
+	    (acpi_gbl_FADT.header.revision == 6 &&
+	     acpi_gbl_FADT.minor_revision >= 3))
 		acpi_support_online_capable = true;
 
 	default_acpi_madt_oem_check(madt->header.oem_id,
@@ -193,7 +197,8 @@ static bool __init acpi_is_processor_usable(u32 lapic_flags)
 	if (lapic_flags & ACPI_MADT_ENABLED)
 		return true;
 
-	if (acpi_support_online_capable && (lapic_flags & ACPI_MADT_ONLINE_CAPABLE))
+	if (!acpi_support_online_capable ||
+	    (lapic_flags & ACPI_MADT_ONLINE_CAPABLE))
 		return true;
 
 	return false;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 380753b14cab..1547781e505b 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -880,6 +880,15 @@ void init_spectral_chicken(struct cpuinfo_x86 *c)
 		}
 	}
 #endif
+	/*
+	 * Work around Erratum 1386.  The XSAVES instruction malfunctions in
+	 * certain circumstances on Zen1/2 uarch, and not all parts have had
+	 * updated microcode at the time of writing (March 2023).
+	 *
+	 * Affected parts all have no supervisor XSAVE states, meaning that
+	 * the XSAVEC instruction (which works fine) is equivalent.
+	 */
+	clear_cpu_cap(c, X86_FEATURE_XSAVES);
 }
 
 static void init_amd_zn(struct cpuinfo_x86 *c)
@@ -920,6 +929,10 @@ static void init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 >= 0x10)
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 
+	/* AMD FSRM also implies FSRS */
+	if (cpu_has(c, X86_FEATURE_FSRM))
+		set_cpu_cap(c, X86_FEATURE_FSRS);
+
 	/* get apicid instead of initial apic id from cpuid */
 	c->apicid = hard_smp_processor_id();
 
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7832a69d170e..2eec60f50057 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2355,6 +2355,7 @@ static void mce_restart(void)
 {
 	mce_timer_delete_all();
 	on_each_cpu(mce_cpu_restart, NULL, 1);
+	mce_schedule_work();
 }
 
 /* Toggle features for corrected errors */
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f36dc2f796c5..f1197366a97d 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -358,12 +358,16 @@ static void __init ms_hyperv_init_platform(void)
 	 * To mirror what Windows does we should extract CPU management
 	 * features and use the ReservedIdentityBit to detect if Linux is the
 	 * root partition. But that requires negotiating CPU management
-	 * interface (a process to be finalized).
+	 * interface (a process to be finalized). For now, use the privilege
+	 * flag as the indicator for running as root.
 	 *
-	 * For now, use the privilege flag as the indicator for running as
-	 * root.
+	 * Hyper-V should never specify running as root and as a Confidential
+	 * VM. But to protect against a compromised/malicious Hyper-V trying
+	 * to exploit root behavior to expose Confidential VM memory, ignore
+	 * the root partition setting if also a Confidential VM.
 	 */
-	if (cpuid_ebx(HYPERV_CPUID_FEATURES) & HV_CPU_MANAGEMENT) {
+	if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
+	    !(ms_hyperv.priv_high & HV_ISOLATION)) {
 		hv_root_partition = true;
 		pr_info("Hyper-V: running as root partition\n");
 	}
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index eb07d4435391..b44c487727d4 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -368,7 +368,6 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 {
 	struct resctrl_schema *s;
 	struct rdtgroup *rdtgrp;
-	struct rdt_domain *dom;
 	struct rdt_resource *r;
 	char *tok, *resname;
 	int ret = 0;
@@ -397,10 +396,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 		goto out;
 	}
 
-	list_for_each_entry(s, &resctrl_schema_all, list) {
-		list_for_each_entry(dom, &s->res->domains, list)
-			memset(dom->staged_config, 0, sizeof(dom->staged_config));
-	}
+	rdt_staged_configs_clear();
 
 	while ((tok = strsep(&buf, "\n")) != NULL) {
 		resname = strim(strsep(&tok, ":"));
@@ -445,6 +441,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 	}
 
 out:
+	rdt_staged_configs_clear();
 	rdtgroup_kn_unlock(of->kn);
 	cpus_read_unlock();
 	return ret ?: nbytes;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 8edecc5763d8..85ceaf9a31ac 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -555,5 +555,6 @@ void __check_limbo(struct rdt_domain *d, bool force_free);
 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
 void __init thread_throttle_mode_init(void);
 void __init mbm_config_rftype_init(const char *config);
+void rdt_staged_configs_clear(void);
 
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index e2c1599d1b37..6ad33f355861 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -78,6 +78,19 @@ void rdt_last_cmd_printf(const char *fmt, ...)
 	va_end(ap);
 }
 
+void rdt_staged_configs_clear(void)
+{
+	struct rdt_resource *r;
+	struct rdt_domain *dom;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	for_each_alloc_capable_rdt_resource(r) {
+		list_for_each_entry(dom, &r->domains, list)
+			memset(dom->staged_config, 0, sizeof(dom->staged_config));
+	}
+}
+
 /*
  * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  * we can keep a bitmap of free CLOSIDs in a single integer.
@@ -314,7 +327,7 @@ static void update_cpu_closid_rmid(void *info)
 	 * executing task might have its own closid selected. Just reuse
 	 * the context switch code.
 	 */
-	resctrl_sched_in();
+	resctrl_sched_in(current);
 }
 
 /*
@@ -530,7 +543,7 @@ static void _update_task_closid_rmid(void *task)
 	 * Otherwise, the MSR is updated when the task is scheduled in.
 	 */
 	if (task == current)
-		resctrl_sched_in();
+		resctrl_sched_in(task);
 }
 
 static void update_task_closid_rmid(struct task_struct *t)
@@ -3107,7 +3120,9 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 {
 	struct resctrl_schema *s;
 	struct rdt_resource *r;
-	int ret;
+	int ret = 0;
+
+	rdt_staged_configs_clear();
 
 	list_for_each_entry(s, &resctrl_schema_all, list) {
 		r = s->res;
@@ -3119,20 +3134,22 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 		} else {
 			ret = rdtgroup_init_cat(s, rdtgrp->closid);
 			if (ret < 0)
-				return ret;
+				goto out;
 		}
 
 		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
 		if (ret < 0) {
 			rdt_last_cmd_puts("Failed to initialize allocations\n");
-			return ret;
+			goto out;
 		}
 
 	}
 
 	rdtgrp->mode = RDT_MODE_SHAREABLE;
 
-	return 0;
+out:
+	rdt_staged_configs_clear();
+	return ret;
 }
 
 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 714166cc25f2..0bab497c9436 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1118,21 +1118,20 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
 	zerofrom = offsetof(struct xregs_state, extended_state_area);
 
 	/*
-	 * The ptrace buffer is in non-compacted XSAVE format.  In
-	 * non-compacted format disabled features still occupy state space,
-	 * but there is no state to copy from in the compacted
-	 * init_fpstate. The gap tracking will zero these states.
-	 */
-	mask = fpstate->user_xfeatures;
-
-	/*
-	 * Dynamic features are not present in init_fpstate. When they are
-	 * in an all zeros init state, remove those from 'mask' to zero
-	 * those features in the user buffer instead of retrieving them
-	 * from init_fpstate.
+	 * This 'mask' indicates which states to copy from fpstate.
+	 * Those extended states that are not present in fpstate are
+	 * either disabled or initialized:
+	 *
+	 * In non-compacted format, disabled features still occupy
+	 * state space but there is no state to copy from in the
+	 * compacted init_fpstate. The gap tracking will zero these
+	 * states.
+	 *
+	 * The extended features have an all zeroes init state. Thus,
+	 * remove them from 'mask' to zero those features in the user
+	 * buffer instead of retrieving them from init_fpstate.
 	 */
-	if (fpu_state_size_dynamic())
-		mask &= (header.xfeatures | xinit->header.xcomp_bv);
+	mask = header.xfeatures;
 
 	for_each_extended_xfeature(i, mask) {
 		/*
@@ -1151,9 +1150,8 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
 			pkru.pkru = pkru_val;
 			membuf_write(&to, &pkru, sizeof(pkru));
 		} else {
-			copy_feature(header.xfeatures & BIT_ULL(i), &to,
+			membuf_write(&to,
 				     __raw_xsave_addr(xsave, i),
-				     __raw_xsave_addr(xinit, i),
 				     xstate_sizes[i]);
 		}
 		/*
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 1265ad519249..fb4f1e01b64a 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -136,10 +136,12 @@ SYM_TYPED_FUNC_START(ftrace_stub)
 	RET
 SYM_FUNC_END(ftrace_stub)
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 SYM_TYPED_FUNC_START(ftrace_stub_graph)
 	CALL_DEPTH_ACCOUNT
 	RET
 SYM_FUNC_END(ftrace_stub_graph)
+#endif
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 470c128759ea..708c87b88cc1 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -212,7 +212,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	switch_fpu_finish();
 
 	/* Load the Intel cache allocation PQR MSR. */
-	resctrl_sched_in();
+	resctrl_sched_in(next_p);
 
 	return prev_p;
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4e34b3b68ebd..bb65a68b4b49 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -656,7 +656,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	}
 
 	/* Load the Intel cache allocation PQR MSR. */
-	resctrl_sched_in();
+	resctrl_sched_in(next_p);
 
 	return prev_p;
 }
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 679026a640ef..3f664ab277c4 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2183,9 +2183,6 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
 	struct ghcb *ghcb;
 	int ret;
 
-	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
-		return -ENODEV;
-
 	if (!fw_err)
 		return -EINVAL;
 
@@ -2212,15 +2209,26 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
 	if (ret)
 		goto e_put;
 
-	if (ghcb->save.sw_exit_info_2) {
-		/* Number of expected pages are returned in RBX */
-		if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST &&
-		    ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN)
-			input->data_npages = ghcb_get_rbx(ghcb);
+	*fw_err = ghcb->save.sw_exit_info_2;
+	switch (*fw_err) {
+	case 0:
+		break;
 
-		*fw_err = ghcb->save.sw_exit_info_2;
+	case SNP_GUEST_REQ_ERR_BUSY:
+		ret = -EAGAIN;
+		break;
 
+	case SNP_GUEST_REQ_INVALID_LEN:
+		/* Number of expected pages are returned in RBX */
+		if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
+			input->data_npages = ghcb_get_rbx(ghcb);
+			ret = -ENOSPC;
+			break;
+		}
+		fallthrough;
+	default:
 		ret = -EIO;
+		break;
 	}
 
 e_put:
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ef80d361b463..10622cf2b30f 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -33,8 +33,8 @@ static int __init iommu_init_noop(void) { return 0; }
 static void iommu_shutdown_noop(void) { }
 bool __init bool_x86_init_noop(void) { return false; }
 void x86_op_int_noop(int cpu) { }
-static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
-static __init void get_rtc_noop(struct timespec64 *now) { }
+static int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
+static void get_rtc_noop(struct timespec64 *now) { }
 
 static __initconst const struct of_device_id of_cmos_match[] = {
 	{ .compatible = "motorola,mc146818" },
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8e578311ca9d..89ca7f4c1464 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -46,7 +46,6 @@ config KVM
 	select KVM_XFER_TO_GUEST_WORK
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
 	select KVM_VFIO
-	select SRCU
 	select INTERVAL_TREE
 	select HAVE_KVM_PM_NOTIFIER if PM
 	select KVM_GENERIC_HARDWARE_ENABLING
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 042dee556125..995eb5054360 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -368,9 +368,39 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 		mask_after = e->fields.mask;
 		if (mask_before != mask_after)
 			kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
-		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
-		    && ioapic->irr & (1 << index))
-			ioapic_service(ioapic, index, false);
+		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG &&
+		    ioapic->irr & (1 << index) && !e->fields.mask && !e->fields.remote_irr) {
+			/*
+			 * Pending status in irr may be outdated: the IRQ line may have
+			 * already been deasserted by a device while the IRQ was masked.
+			 * This occurs, for instance, if the interrupt is handled in a
+			 * Linux guest as a oneshot interrupt (IRQF_ONESHOT). In this
+			 * case the guest acknowledges the interrupt to the device in
+			 * its threaded irq handler, i.e. after the EOI but before
+			 * unmasking, so at the time of unmasking the IRQ line is
+			 * already down but our pending irr bit is still set. In such
+			 * cases, injecting this pending interrupt to the guest is
+			 * buggy: the guest will receive an extra unwanted interrupt.
+			 *
+			 * So we need to check here if the IRQ is actually still pending.
+			 * As we are generally not able to probe the IRQ line status
+			 * directly, we do it through irqfd resampler. Namely, we clear
+			 * the pending status and notify the resampler that this interrupt
+			 * is done, without actually injecting it into the guest. If the
+			 * IRQ line is actually already deasserted, we are done. If it is
+			 * still asserted, a new interrupt will be shortly triggered
+			 * through irqfd and injected into the guest.
+			 *
+			 * If, however, it's not possible to resample (no irqfd resampler
+			 * registered for this irq), then unconditionally inject this
+			 * pending interrupt into the guest, so the guest will not miss
+			 * an interrupt, although may get an extra unwanted interrupt.
+			 */
+			if (kvm_notify_irqfd_resampler(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index))
+				ioapic->irr &= ~(1 << index);
+			else
+				ioapic_service(ioapic, index, false);
+		}
 		if (e->fields.delivery_mode == APIC_DM_FIXED) {
 			struct kvm_lapic_irq irq;
 
diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h
index 287e98ef9df3..6272dabec02d 100644
--- a/arch/x86/kvm/kvm_onhyperv.h
+++ b/arch/x86/kvm/kvm_onhyperv.h
@@ -12,6 +12,11 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm,
 int hv_remote_flush_tlb(struct kvm *kvm);
 void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp);
 #else /* !CONFIG_HYPERV */
+static inline int hv_remote_flush_tlb(struct kvm *kvm)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp)
 {
 }
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index ca684979e90d..cfc8ab773025 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -27,19 +27,38 @@
 #include "irq.h"
 #include "svm.h"
 
-/* AVIC GATAG is encoded using VM and VCPU IDs */
-#define AVIC_VCPU_ID_BITS		8
-#define AVIC_VCPU_ID_MASK		((1 << AVIC_VCPU_ID_BITS) - 1)
+/*
+ * Encode the arbitrary VM ID and the vCPU's default APIC ID, i.e the vCPU ID,
+ * into the GATag so that KVM can retrieve the correct vCPU from a GALog entry
+ * if an interrupt can't be delivered, e.g. because the vCPU isn't running.
+ *
+ * For the vCPU ID, use however many bits are currently allowed for the max
+ * guest physical APIC ID (limited by the size of the physical ID table), and
+ * use whatever bits remain to assign arbitrary AVIC IDs to VMs.  Note, the
+ * size of the GATag is defined by hardware (32 bits), but is an opaque value
+ * as far as hardware is concerned.
+ */
+#define AVIC_VCPU_ID_MASK		AVIC_PHYSICAL_MAX_INDEX_MASK
 
-#define AVIC_VM_ID_BITS			24
-#define AVIC_VM_ID_NR			(1 << AVIC_VM_ID_BITS)
-#define AVIC_VM_ID_MASK			((1 << AVIC_VM_ID_BITS) - 1)
+#define AVIC_VM_ID_SHIFT		HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_MASK)
+#define AVIC_VM_ID_MASK			(GENMASK(31, AVIC_VM_ID_SHIFT) >> AVIC_VM_ID_SHIFT)
 
-#define AVIC_GATAG(x, y)		(((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
-						(y & AVIC_VCPU_ID_MASK))
-#define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
+#define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VM_ID_SHIFT) & AVIC_VM_ID_MASK)
 #define AVIC_GATAG_TO_VCPUID(x)		(x & AVIC_VCPU_ID_MASK)
 
+#define __AVIC_GATAG(vm_id, vcpu_id)	((((vm_id) & AVIC_VM_ID_MASK) << AVIC_VM_ID_SHIFT) | \
+					 ((vcpu_id) & AVIC_VCPU_ID_MASK))
+#define AVIC_GATAG(vm_id, vcpu_id)					\
+({									\
+	u32 ga_tag = __AVIC_GATAG(vm_id, vcpu_id);			\
+									\
+	WARN_ON_ONCE(AVIC_GATAG_TO_VCPUID(ga_tag) != (vcpu_id));	\
+	WARN_ON_ONCE(AVIC_GATAG_TO_VMID(ga_tag) != (vm_id));		\
+	ga_tag;								\
+})
+
+static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_ID_MASK) == -1u);
+
 static bool force_avic;
 module_param_unsafe(force_avic, bool, 0444);
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 252e7f37e4e2..f25bc3cbb250 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3729,7 +3729,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
 	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
 }
 
-static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
+static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -3753,6 +3753,37 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
 		svm->current_vmcb->asid_generation--;
 }
 
+static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
+{
+	hpa_t root_tdp = vcpu->arch.mmu->root.hpa;
+
+	/*
+	 * When running on Hyper-V with EnlightenedNptTlb enabled, explicitly
+	 * flush the NPT mappings via hypercall as flushing the ASID only
+	 * affects virtual to physical mappings, it does not invalidate guest
+	 * physical to host physical mappings.
+	 */
+	if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp))
+		hyperv_flush_guest_mapping(root_tdp);
+
+	svm_flush_tlb_asid(vcpu);
+}
+
+static void svm_flush_tlb_all(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB
+	 * flushes should be routed to hv_remote_flush_tlb() without requesting
+	 * a "regular" remote flush.  Reaching this point means either there's
+	 * a KVM bug or a prior hv_remote_flush_tlb() call failed, both of
+	 * which might be fatal to the guest.  Yell, but try to recover.
+	 */
+	if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu)))
+		hv_remote_flush_tlb(vcpu->kvm);
+
+	svm_flush_tlb_asid(vcpu);
+}
+
 static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -4745,10 +4776,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.set_rflags = svm_set_rflags,
 	.get_if_flag = svm_get_if_flag,
 
-	.flush_tlb_all = svm_flush_tlb_current,
+	.flush_tlb_all = svm_flush_tlb_all,
 	.flush_tlb_current = svm_flush_tlb_current,
 	.flush_tlb_gva = svm_flush_tlb_gva,
-	.flush_tlb_guest = svm_flush_tlb_current,
+	.flush_tlb_guest = svm_flush_tlb_asid,
 
 	.vcpu_pre_run = svm_vcpu_pre_run,
 	.vcpu_run = svm_vcpu_run,
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index cff838f15db5..786d46d73a8e 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -6,6 +6,8 @@
 #ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__
 #define __ARCH_X86_KVM_SVM_ONHYPERV_H__
 
+#include <asm/mshyperv.h>
+
 #if IS_ENABLED(CONFIG_HYPERV)
 
 #include "kvm_onhyperv.h"
@@ -15,6 +17,14 @@ static struct kvm_x86_ops svm_x86_ops;
 
 int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu);
 
+static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
+{
+	struct hv_vmcb_enlightenments *hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments;
+
+	return ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB &&
+	       !!hve->hv_enlightenments_control.enlightened_npt_tlb;
+}
+
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
 {
 	struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
@@ -80,6 +90,11 @@ static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu)
 }
 #else
 
+static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
 {
 }
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7c4f5ca405c7..768487611db7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2903,7 +2903,7 @@ static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 				       struct vmcs12 *vmcs12)
 {
-	bool ia32e;
+	bool ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
 
 	if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
 	    CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
@@ -2923,12 +2923,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 					   vmcs12->host_ia32_perf_global_ctrl)))
 		return -EINVAL;
 
-#ifdef CONFIG_X86_64
-	ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
-#else
-	ia32e = false;
-#endif
-
 	if (ia32e) {
 		if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
 			return -EINVAL;
@@ -3022,7 +3016,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 					struct vmcs12 *vmcs12,
 					enum vm_entry_failure_code *entry_failure_code)
 {
-	bool ia32e;
+	bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
 
 	*entry_failure_code = ENTRY_FAIL_DEFAULT;
 
@@ -3048,6 +3042,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 					   vmcs12->guest_ia32_perf_global_ctrl)))
 		return -EINVAL;
 
+	if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
+		return -EINVAL;
+
+	if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
+	    CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
+		return -EINVAL;
+
 	/*
 	 * If the load IA32_EFER VM-entry control is 1, the following checks
 	 * are performed on the field for the IA32_EFER MSR:
@@ -3059,7 +3060,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 	 */
 	if (to_vmx(vcpu)->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
-		ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
 		if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
 		    CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
 		    CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
@@ -3868,7 +3868,12 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu)
 		exit_qual = 0;
 	}
 
-	if (ex->has_error_code) {
+	/*
+	 * Unlike AMD's Paged Real Mode, which reports an error code on #PF
+	 * VM-Exits even if the CPU is in Real Mode, Intel VMX never sets the
+	 * "has error code" flags on VM-Exit if the CPU is in Real Mode.
+	 */
+	if (ex->has_error_code && is_protmode(vcpu)) {
 		/*
 		 * Intel CPUs do not generate error codes with bits 31:16 set,
 		 * and more importantly VMX disallows setting bits 31:16 in the
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index f550540ed54e..631fd7da2bc3 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -262,7 +262,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
 	 * eIBRS has its own protection against poisoned RSB, so it doesn't
 	 * need the RSB filling sequence.  But it does need to be enabled, and a
 	 * single call to retire, before the first unbalanced RET.
-         */
+	 */
 
 	FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
 			   X86_FEATURE_RSB_VMEXIT_LITE
@@ -311,7 +311,7 @@ SYM_FUNC_END(vmx_do_nmi_irqoff)
  * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
  * @field:	VMCS field encoding that failed
  * @fault:	%true if the VMREAD faulted, %false if it failed
-
+ *
  * Save and restore volatile registers across a call to vmread_error().  Note,
  * all parameters are passed on the stack.
  */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bcac3efcde41..d2d6e1b6c788 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -874,7 +874,7 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
 	 */
 	if (is_guest_mode(vcpu))
 		eb |= get_vmcs12(vcpu)->exception_bitmap;
-        else {
+	else {
 		int mask = 0, match = 0;
 
 		if (enable_ept && (eb & (1u << PF_VECTOR))) {
@@ -1282,7 +1282,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 		}
 	}
 
-    	if (vmx->nested.need_vmcs12_to_shadow_sync)
+	if (vmx->nested.need_vmcs12_to_shadow_sync)
 		nested_sync_vmcs12_to_shadow(vcpu);
 
 	if (vmx->guest_state_loaded)
@@ -5049,10 +5049,10 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 	if (to_vmx(vcpu)->nested.nested_run_pending)
 		return -EBUSY;
 
-       /*
-        * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
-        * e.g. if the IRQ arrived asynchronously after checking nested events.
-        */
+	/*
+	 * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+	 * e.g. if the IRQ arrived asynchronously after checking nested events.
+	 */
 	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
 		return -EBUSY;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7713420abab0..3d852ce84920 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4432,6 +4432,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_VAPIC:
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
+	case KVM_CAP_IRQFD_RESAMPLE:
 		r = 1;
 		break;
 	case KVM_CAP_EXIT_HYPERCALL:
@@ -8903,6 +8904,8 @@ restart:
 	}
 
 	if (ctxt->have_exception) {
+		WARN_ON_ONCE(vcpu->mmio_needed && !vcpu->mmio_is_write);
+		vcpu->mmio_needed = false;
 		r = 1;
 		inject_emulated_exception(vcpu);
 	} else if (vcpu->arch.pio.count) {
@@ -9906,13 +9909,20 @@ int kvm_check_nested_events(struct kvm_vcpu *vcpu)
 
 static void kvm_inject_exception(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * Suppress the error code if the vCPU is in Real Mode, as Real Mode
+	 * exceptions don't report error codes.  The presence of an error code
+	 * is carried with the exception and only stripped when the exception
+	 * is injected as intercepted #PF VM-Exits for AMD's Paged Real Mode do
+	 * report an error code despite the CPU being in Real Mode.
+	 */
+	vcpu->arch.exception.has_error_code &= is_protmode(vcpu);
+
 	trace_kvm_inj_exception(vcpu->arch.exception.vector,
 				vcpu->arch.exception.has_error_code,
 				vcpu->arch.exception.error_code,
 				vcpu->arch.exception.injected);
 
-	if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
-		vcpu->arch.exception.error_code = false;
 	static_call(kvm_x86_inject_exception)(vcpu);
 }
 
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 4f1a40a86534..01932af64193 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -71,6 +71,6 @@ ifneq ($(CONFIG_GENERIC_CSUM),y)
 endif
         lib-y += clear_page_64.o copy_page_64.o
         lib-y += memmove_64.o memset_64.o
-        lib-y += copy_user_64.o
+        lib-y += copy_user_64.o copy_user_uncached_64.o
 	lib-y += cmpxchg16b_emu.o
 endif
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index ecbfb4dd3b01..f74a3e704a1c 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -57,134 +57,85 @@ EXPORT_SYMBOL_GPL(clear_page_erms)
  * Input:
  * rdi destination
  * rcx count
+ * rax is zero
  *
  * Output:
  * rcx: uncleared bytes or 0 if successful.
  */
-SYM_FUNC_START(clear_user_original)
-	/*
-	 * Copy only the lower 32 bits of size as that is enough to handle the rest bytes,
-	 * i.e., no need for a 'q' suffix and thus a REX prefix.
-	 */
-	mov %ecx,%eax
-	shr $3,%rcx
-	jz .Lrest_bytes
+SYM_FUNC_START(rep_stos_alternative)
+	cmpq $64,%rcx
+	jae .Lunrolled
 
-	# do the qwords first
-	.p2align 4
-.Lqwords:
-	movq $0,(%rdi)
-	lea 8(%rdi),%rdi
-	dec %rcx
-	jnz .Lqwords
+	cmp $8,%ecx
+	jae .Lword
 
-.Lrest_bytes:
-	and $7,  %eax
-	jz .Lexit
+	testl %ecx,%ecx
+	je .Lexit
 
-	# now do the rest bytes
-.Lbytes:
-	movb $0,(%rdi)
+.Lclear_user_tail:
+0:	movb %al,(%rdi)
 	inc %rdi
-	dec %eax
-	jnz .Lbytes
-
+	dec %rcx
+	jnz .Lclear_user_tail
 .Lexit:
-	/*
-	 * %rax still needs to be cleared in the exception case because this function is called
-	 * from inline asm and the compiler expects %rax to be zero when exiting the inline asm,
-	 * in case it might reuse it somewhere.
-	 */
-        xor %eax,%eax
-        RET
-
-.Lqwords_exception:
-        # convert remaining qwords back into bytes to return to caller
-        shl $3, %rcx
-        and $7, %eax
-        add %rax,%rcx
-        jmp .Lexit
-
-.Lbytes_exception:
-        mov %eax,%ecx
-        jmp .Lexit
-
-        _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exception)
-        _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception)
-SYM_FUNC_END(clear_user_original)
-EXPORT_SYMBOL(clear_user_original)
-
-/*
- * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is
- * present.
- * Input:
- * rdi destination
- * rcx count
- *
- * Output:
- * rcx: uncleared bytes or 0 if successful.
- */
-SYM_FUNC_START(clear_user_rep_good)
-	# call the original thing for less than a cacheline
-	cmp $64, %rcx
-	jb clear_user_original
-
-.Lprep:
-	# copy lower 32-bits for rest bytes
-	mov %ecx, %edx
-	shr $3, %rcx
-	jz .Lrep_good_rest_bytes
-
-.Lrep_good_qwords:
-	rep stosq
-
-.Lrep_good_rest_bytes:
-	and $7, %edx
-	jz .Lrep_good_exit
-
-.Lrep_good_bytes:
-	mov %edx, %ecx
-	rep stosb
-
-.Lrep_good_exit:
-	# see .Lexit comment above
-	xor %eax, %eax
 	RET
 
-.Lrep_good_qwords_exception:
-	# convert remaining qwords back into bytes to return to caller
-	shl $3, %rcx
-	and $7, %edx
-	add %rdx, %rcx
-	jmp .Lrep_good_exit
+	_ASM_EXTABLE_UA( 0b, .Lexit)
 
-	_ASM_EXTABLE_UA(.Lrep_good_qwords, .Lrep_good_qwords_exception)
-	_ASM_EXTABLE_UA(.Lrep_good_bytes, .Lrep_good_exit)
-SYM_FUNC_END(clear_user_rep_good)
-EXPORT_SYMBOL(clear_user_rep_good)
+.Lword:
+1:	movq %rax,(%rdi)
+	addq $8,%rdi
+	sub $8,%ecx
+	je .Lexit
+	cmp $8,%ecx
+	jae .Lword
+	jmp .Lclear_user_tail
 
-/*
- * Alternative clear user-space when CPU feature X86_FEATURE_ERMS is present.
- * Input:
- * rdi destination
- * rcx count
- *
- * Output:
- * rcx: uncleared bytes or 0 if successful.
- *
- */
-SYM_FUNC_START(clear_user_erms)
-	# call the original thing for less than a cacheline
-	cmp $64, %rcx
-	jb clear_user_original
-
-.Lerms_bytes:
-	rep stosb
-
-.Lerms_exit:
-	xorl %eax,%eax
+	.p2align 4
+.Lunrolled:
+10:	movq %rax,(%rdi)
+11:	movq %rax,8(%rdi)
+12:	movq %rax,16(%rdi)
+13:	movq %rax,24(%rdi)
+14:	movq %rax,32(%rdi)
+15:	movq %rax,40(%rdi)
+16:	movq %rax,48(%rdi)
+17:	movq %rax,56(%rdi)
+	addq $64,%rdi
+	subq $64,%rcx
+	cmpq $64,%rcx
+	jae .Lunrolled
+	cmpl $8,%ecx
+	jae .Lword
+	testl %ecx,%ecx
+	jne .Lclear_user_tail
 	RET
 
-	_ASM_EXTABLE_UA(.Lerms_bytes, .Lerms_exit)
-SYM_FUNC_END(clear_user_erms)
-EXPORT_SYMBOL(clear_user_erms)
+	/*
+	 * If we take an exception on any of the
+	 * word stores, we know that %rcx isn't zero,
+	 * so we can just go to the tail clearing to
+	 * get the exact count.
+	 *
+	 * The unrolled case might end up clearing
+	 * some bytes twice. Don't care.
+	 *
+	 * We could use the value in %rdi to avoid
+	 * a second fault on the exact count case,
+	 * but do we really care? No.
+	 *
+	 * Finally, we could try to align %rdi at the
+	 * top of the unrolling. But unaligned stores
+	 * just aren't that common or expensive.
+	 */
+	_ASM_EXTABLE_UA( 1b, .Lclear_user_tail)
+	_ASM_EXTABLE_UA(10b, .Lclear_user_tail)
+	_ASM_EXTABLE_UA(11b, .Lclear_user_tail)
+	_ASM_EXTABLE_UA(12b, .Lclear_user_tail)
+	_ASM_EXTABLE_UA(13b, .Lclear_user_tail)
+	_ASM_EXTABLE_UA(14b, .Lclear_user_tail)
+	_ASM_EXTABLE_UA(15b, .Lclear_user_tail)
+	_ASM_EXTABLE_UA(16b, .Lclear_user_tail)
+	_ASM_EXTABLE_UA(17b, .Lclear_user_tail)
+SYM_FUNC_END(rep_stos_alternative)
+EXPORT_SYMBOL(rep_stos_alternative)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 9dec1b38a98f..4fc5c2de2de4 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -7,404 +7,108 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/current.h>
-#include <asm/asm-offsets.h>
-#include <asm/thread_info.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative.h>
 #include <asm/asm.h>
-#include <asm/smap.h>
 #include <asm/export.h>
-#include <asm/trapnr.h>
-
-.macro ALIGN_DESTINATION
-	/* check for bad alignment of destination */
-	movl %edi,%ecx
-	andl $7,%ecx
-	jz 102f				/* already aligned */
-	subl $8,%ecx
-	negl %ecx
-	subl %ecx,%edx
-100:	movb (%rsi),%al
-101:	movb %al,(%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz 100b
-102:
-
-	_ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align)
-	_ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align)
-.endm
 
 /*
- * copy_user_generic_unrolled - memory copy with exception handling.
- * This version is for CPUs like P4 that don't have efficient micro
- * code for rep movsq
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-SYM_FUNC_START(copy_user_generic_unrolled)
-	ASM_STAC
-	cmpl $8,%edx
-	jb .Lcopy_user_short_string_bytes
-	ALIGN_DESTINATION
-	movl %edx,%ecx
-	andl $63,%edx
-	shrl $6,%ecx
-	jz copy_user_short_string
-1:	movq (%rsi),%r8
-2:	movq 1*8(%rsi),%r9
-3:	movq 2*8(%rsi),%r10
-4:	movq 3*8(%rsi),%r11
-5:	movq %r8,(%rdi)
-6:	movq %r9,1*8(%rdi)
-7:	movq %r10,2*8(%rdi)
-8:	movq %r11,3*8(%rdi)
-9:	movq 4*8(%rsi),%r8
-10:	movq 5*8(%rsi),%r9
-11:	movq 6*8(%rsi),%r10
-12:	movq 7*8(%rsi),%r11
-13:	movq %r8,4*8(%rdi)
-14:	movq %r9,5*8(%rdi)
-15:	movq %r10,6*8(%rdi)
-16:	movq %r11,7*8(%rdi)
-	leaq 64(%rsi),%rsi
-	leaq 64(%rdi),%rdi
-	decl %ecx
-	jnz 1b
-	jmp copy_user_short_string
-
-30:	shll $6,%ecx
-	addl %ecx,%edx
-	jmp .Lcopy_user_handle_tail
-
-	_ASM_EXTABLE_CPY(1b, 30b)
-	_ASM_EXTABLE_CPY(2b, 30b)
-	_ASM_EXTABLE_CPY(3b, 30b)
-	_ASM_EXTABLE_CPY(4b, 30b)
-	_ASM_EXTABLE_CPY(5b, 30b)
-	_ASM_EXTABLE_CPY(6b, 30b)
-	_ASM_EXTABLE_CPY(7b, 30b)
-	_ASM_EXTABLE_CPY(8b, 30b)
-	_ASM_EXTABLE_CPY(9b, 30b)
-	_ASM_EXTABLE_CPY(10b, 30b)
-	_ASM_EXTABLE_CPY(11b, 30b)
-	_ASM_EXTABLE_CPY(12b, 30b)
-	_ASM_EXTABLE_CPY(13b, 30b)
-	_ASM_EXTABLE_CPY(14b, 30b)
-	_ASM_EXTABLE_CPY(15b, 30b)
-	_ASM_EXTABLE_CPY(16b, 30b)
-SYM_FUNC_END(copy_user_generic_unrolled)
-EXPORT_SYMBOL(copy_user_generic_unrolled)
-
-/* Some CPUs run faster using the string copy instructions.
- * This is also a lot simpler. Use them when possible.
- *
- * Only 4GB of copy is supported. This shouldn't be a problem
- * because the kernel normally only writes from/to page sized chunks
- * even if user space passed a longer buffer.
- * And more would be dangerous because both Intel and AMD have
- * errata with rep movsq > 4GB. If someone feels the need to fix
- * this please consider this.
+ * rep_movs_alternative - memory copy with exception handling.
+ * This version is for CPUs that don't have FSRM (Fast Short Rep Movs)
  *
  * Input:
  * rdi destination
  * rsi source
- * rdx count
+ * rcx count
  *
  * Output:
- * eax uncopied bytes or 0 if successful.
- */
-SYM_FUNC_START(copy_user_generic_string)
-	ASM_STAC
-	cmpl $8,%edx
-	jb 2f		/* less than 8 bytes, go to byte copy loop */
-	ALIGN_DESTINATION
-	movl %edx,%ecx
-	shrl $3,%ecx
-	andl $7,%edx
-1:	rep movsq
-2:	movl %edx,%ecx
-3:	rep movsb
-	xorl %eax,%eax
-	ASM_CLAC
-	RET
-
-11:	leal (%rdx,%rcx,8),%ecx
-12:	movl %ecx,%edx		/* ecx is zerorest also */
-	jmp .Lcopy_user_handle_tail
-
-	_ASM_EXTABLE_CPY(1b, 11b)
-	_ASM_EXTABLE_CPY(3b, 12b)
-SYM_FUNC_END(copy_user_generic_string)
-EXPORT_SYMBOL(copy_user_generic_string)
-
-/*
- * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
- * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count
+ * rcx uncopied bytes or 0 if successful.
  *
- * Output:
- * eax uncopied bytes or 0 if successful.
+ * NOTE! The calling convention is very intentionally the same as
+ * for 'rep movs', so that we can rewrite the function call with
+ * just a plain 'rep movs' on machines that have FSRM.  But to make
+ * it simpler for us, we can clobber rsi/rdi and rax/r8-r11 freely.
  */
-SYM_FUNC_START(copy_user_enhanced_fast_string)
-	ASM_STAC
-	/* CPUs without FSRM should avoid rep movsb for short copies */
-	ALTERNATIVE "cmpl $64, %edx; jb copy_user_short_string", "", X86_FEATURE_FSRM
-	movl %edx,%ecx
-1:	rep movsb
-	xorl %eax,%eax
-	ASM_CLAC
+SYM_FUNC_START(rep_movs_alternative)
+	cmpq $64,%rcx
+	jae .Lunrolled
+
+	cmp $8,%ecx
+	jae .Lword
+
+	testl %ecx,%ecx
+	je .Lexit
+
+.Lcopy_user_tail:
+0:	movb (%rsi),%al
+1:	movb %al,(%rdi)
+	inc %rdi
+	inc %rsi
+	dec %rcx
+	jne .Lcopy_user_tail
+.Lexit:
 	RET
 
-12:	movl %ecx,%edx		/* ecx is zerorest also */
-	jmp .Lcopy_user_handle_tail
-
-	_ASM_EXTABLE_CPY(1b, 12b)
-SYM_FUNC_END(copy_user_enhanced_fast_string)
-EXPORT_SYMBOL(copy_user_enhanced_fast_string)
-
-/*
- * Try to copy last bytes and clear the rest if needed.
- * Since protection fault in copy_from/to_user is not a normal situation,
- * it is not necessary to optimize tail handling.
- * Don't try to copy the tail if machine check happened
- *
- * Input:
- * eax trap number written by ex_handler_copy()
- * rdi destination
- * rsi source
- * rdx count
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
-	cmp $X86_TRAP_MC,%eax
-	je 3f
-
-	movl %edx,%ecx
-1:	rep movsb
-2:	mov %ecx,%eax
-	ASM_CLAC
+	_ASM_EXTABLE_UA( 0b, .Lexit)
+	_ASM_EXTABLE_UA( 1b, .Lexit)
+
+	.p2align 4
+.Lword:
+2:	movq (%rsi),%rax
+3:	movq %rax,(%rdi)
+	addq $8,%rsi
+	addq $8,%rdi
+	sub $8,%ecx
+	je .Lexit
+	cmp $8,%ecx
+	jae .Lword
+	jmp .Lcopy_user_tail
+
+	_ASM_EXTABLE_UA( 2b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA( 3b, .Lcopy_user_tail)
+
+	.p2align 4
+.Lunrolled:
+10:	movq (%rsi),%r8
+11:	movq 8(%rsi),%r9
+12:	movq 16(%rsi),%r10
+13:	movq 24(%rsi),%r11
+14:	movq %r8,(%rdi)
+15:	movq %r9,8(%rdi)
+16:	movq %r10,16(%rdi)
+17:	movq %r11,24(%rdi)
+20:	movq 32(%rsi),%r8
+21:	movq 40(%rsi),%r9
+22:	movq 48(%rsi),%r10
+23:	movq 56(%rsi),%r11
+24:	movq %r8,32(%rdi)
+25:	movq %r9,40(%rdi)
+26:	movq %r10,48(%rdi)
+27:	movq %r11,56(%rdi)
+	addq $64,%rsi
+	addq $64,%rdi
+	subq $64,%rcx
+	cmpq $64,%rcx
+	jae .Lunrolled
+	cmpl $8,%ecx
+	jae .Lword
+	testl %ecx,%ecx
+	jne .Lcopy_user_tail
 	RET
 
-3:
-	movl %edx,%eax
-	ASM_CLAC
-	RET
-
-	_ASM_EXTABLE_CPY(1b, 2b)
-
-.Lcopy_user_handle_align:
-	addl %ecx,%edx			/* ecx is zerorest also */
-	jmp .Lcopy_user_handle_tail
-
-SYM_CODE_END(.Lcopy_user_handle_tail)
-
-/*
- * Finish memcpy of less than 64 bytes.  #AC should already be set.
- *
- * Input:
- * rdi destination
- * rsi source
- * rdx count (< 64)
- *
- * Output:
- * eax uncopied bytes or 0 if successful.
- */
-SYM_CODE_START_LOCAL(copy_user_short_string)
-	movl %edx,%ecx
-	andl $7,%edx
-	shrl $3,%ecx
-	jz .Lcopy_user_short_string_bytes
-18:	movq (%rsi),%r8
-19:	movq %r8,(%rdi)
-	leaq 8(%rsi),%rsi
-	leaq 8(%rdi),%rdi
-	decl %ecx
-	jnz 18b
-.Lcopy_user_short_string_bytes:
-	andl %edx,%edx
-	jz 23f
-	movl %edx,%ecx
-21:	movb (%rsi),%al
-22:	movb %al,(%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz 21b
-23:	xor %eax,%eax
-	ASM_CLAC
-	RET
-
-40:	leal (%rdx,%rcx,8),%edx
-	jmp 60f
-50:	movl %ecx,%edx		/* ecx is zerorest also */
-60:	jmp .Lcopy_user_handle_tail
-
-	_ASM_EXTABLE_CPY(18b, 40b)
-	_ASM_EXTABLE_CPY(19b, 40b)
-	_ASM_EXTABLE_CPY(21b, 50b)
-	_ASM_EXTABLE_CPY(22b, 50b)
-SYM_CODE_END(copy_user_short_string)
-
-/*
- * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination out of cache for more performance.
- *
- * Note: Cached memory copy is used when destination or size is not
- * naturally aligned. That is:
- *  - Require 8-byte alignment when size is 8 bytes or larger.
- *  - Require 4-byte alignment when size is 4 bytes.
- */
-SYM_FUNC_START(__copy_user_nocache)
-	ASM_STAC
-
-	/* If size is less than 8 bytes, go to 4-byte copy */
-	cmpl $8,%edx
-	jb .L_4b_nocache_copy_entry
-
-	/* If destination is not 8-byte aligned, "cache" copy to align it */
-	ALIGN_DESTINATION
-
-	/* Set 4x8-byte copy count and remainder */
-	movl %edx,%ecx
-	andl $63,%edx
-	shrl $6,%ecx
-	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
-
-	/* Perform 4x8-byte nocache loop-copy */
-.L_4x8b_nocache_copy_loop:
-1:	movq (%rsi),%r8
-2:	movq 1*8(%rsi),%r9
-3:	movq 2*8(%rsi),%r10
-4:	movq 3*8(%rsi),%r11
-5:	movnti %r8,(%rdi)
-6:	movnti %r9,1*8(%rdi)
-7:	movnti %r10,2*8(%rdi)
-8:	movnti %r11,3*8(%rdi)
-9:	movq 4*8(%rsi),%r8
-10:	movq 5*8(%rsi),%r9
-11:	movq 6*8(%rsi),%r10
-12:	movq 7*8(%rsi),%r11
-13:	movnti %r8,4*8(%rdi)
-14:	movnti %r9,5*8(%rdi)
-15:	movnti %r10,6*8(%rdi)
-16:	movnti %r11,7*8(%rdi)
-	leaq 64(%rsi),%rsi
-	leaq 64(%rdi),%rdi
-	decl %ecx
-	jnz .L_4x8b_nocache_copy_loop
-
-	/* Set 8-byte copy count and remainder */
-.L_8b_nocache_copy_entry:
-	movl %edx,%ecx
-	andl $7,%edx
-	shrl $3,%ecx
-	jz .L_4b_nocache_copy_entry	/* jump if count is 0 */
-
-	/* Perform 8-byte nocache loop-copy */
-.L_8b_nocache_copy_loop:
-20:	movq (%rsi),%r8
-21:	movnti %r8,(%rdi)
-	leaq 8(%rsi),%rsi
-	leaq 8(%rdi),%rdi
-	decl %ecx
-	jnz .L_8b_nocache_copy_loop
-
-	/* If no byte left, we're done */
-.L_4b_nocache_copy_entry:
-	andl %edx,%edx
-	jz .L_finish_copy
-
-	/* If destination is not 4-byte aligned, go to byte copy: */
-	movl %edi,%ecx
-	andl $3,%ecx
-	jnz .L_1b_cache_copy_entry
-
-	/* Set 4-byte copy count (1 or 0) and remainder */
-	movl %edx,%ecx
-	andl $3,%edx
-	shrl $2,%ecx
-	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
-
-	/* Perform 4-byte nocache copy: */
-30:	movl (%rsi),%r8d
-31:	movnti %r8d,(%rdi)
-	leaq 4(%rsi),%rsi
-	leaq 4(%rdi),%rdi
-
-	/* If no bytes left, we're done: */
-	andl %edx,%edx
-	jz .L_finish_copy
-
-	/* Perform byte "cache" loop-copy for the remainder */
-.L_1b_cache_copy_entry:
-	movl %edx,%ecx
-.L_1b_cache_copy_loop:
-40:	movb (%rsi),%al
-41:	movb %al,(%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_1b_cache_copy_loop
-
-	/* Finished copying; fence the prior stores */
-.L_finish_copy:
-	xorl %eax,%eax
-	ASM_CLAC
-	sfence
-	RET
-
-.L_fixup_4x8b_copy:
-	shll $6,%ecx
-	addl %ecx,%edx
-	jmp .L_fixup_handle_tail
-.L_fixup_8b_copy:
-	lea (%rdx,%rcx,8),%rdx
-	jmp .L_fixup_handle_tail
-.L_fixup_4b_copy:
-	lea (%rdx,%rcx,4),%rdx
-	jmp .L_fixup_handle_tail
-.L_fixup_1b_copy:
-	movl %ecx,%edx
-.L_fixup_handle_tail:
-	sfence
-	jmp .Lcopy_user_handle_tail
-
-	_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
-	_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
-	_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
-	_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
-	_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
-	_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
-SYM_FUNC_END(__copy_user_nocache)
-EXPORT_SYMBOL(__copy_user_nocache)
+	_ASM_EXTABLE_UA(10b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(11b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(12b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(13b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(14b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(15b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(16b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(17b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(20b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(21b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(22b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(23b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(24b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(25b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(26b, .Lcopy_user_tail)
+	_ASM_EXTABLE_UA(27b, .Lcopy_user_tail)
+SYM_FUNC_END(rep_movs_alternative)
+EXPORT_SYMBOL(rep_movs_alternative)
diff --git a/arch/x86/lib/copy_user_uncached_64.S b/arch/x86/lib/copy_user_uncached_64.S
new file mode 100644
index 000000000000..5c5f38d32672
--- /dev/null
+++ b/arch/x86/lib/copy_user_uncached_64.S
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2023 Linus Torvalds <torvalds@linux-foundation.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/export.h>
+
+/*
+ * copy_user_nocache - Uncached memory copy with exception handling
+ *
+ * This copies from user space into kernel space, but the kernel
+ * space accesses can take a machine check exception, so they too
+ * need exception handling.
+ *
+ * Note: only 32-bit and 64-bit stores have non-temporal versions,
+ * and we only use aligned versions. Any unaligned parts at the
+ * start or end of the copy will be done using normal cached stores.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * edx count
+ *
+ * Output:
+ * rax uncopied bytes or 0 if successful.
+ */
+SYM_FUNC_START(__copy_user_nocache)
+	/* If destination is not 7-byte aligned, we'll have to align it */
+	testb $7,%dil
+	jne .Lalign
+
+.Lis_aligned:
+	cmp $64,%edx
+	jb .Lquadwords
+
+	.p2align 4,0x90
+.Lunrolled:
+10:	movq (%rsi),%r8
+11:	movq 8(%rsi),%r9
+12:	movq 16(%rsi),%r10
+13:	movq 24(%rsi),%r11
+20:	movnti %r8,(%rdi)
+21:	movnti %r9,8(%rdi)
+22:	movnti %r10,16(%rdi)
+23:	movnti %r11,24(%rdi)
+30:	movq 32(%rsi),%r8
+31:	movq 40(%rsi),%r9
+32:	movq 48(%rsi),%r10
+33:	movq 56(%rsi),%r11
+40:	movnti %r8,32(%rdi)
+41:	movnti %r9,40(%rdi)
+42:	movnti %r10,48(%rdi)
+43:	movnti %r11,56(%rdi)
+
+	addq $64,%rsi
+	addq $64,%rdi
+	sub $64,%edx
+	cmp $64,%edx
+	jae .Lunrolled
+
+/*
+ * First set of user mode loads have been done
+ * without any stores, so if they fail, we can
+ * just try the non-unrolled loop.
+ */
+_ASM_EXTABLE_UA(10b, .Lquadwords)
+_ASM_EXTABLE_UA(11b, .Lquadwords)
+_ASM_EXTABLE_UA(12b, .Lquadwords)
+_ASM_EXTABLE_UA(13b, .Lquadwords)
+
+/*
+ * The second set of user mode loads have been
+ * done with 32 bytes stored to the destination,
+ * so we need to take that into account before
+ * falling back to the unrolled loop.
+ */
+_ASM_EXTABLE_UA(30b, .Lfixup32)
+_ASM_EXTABLE_UA(31b, .Lfixup32)
+_ASM_EXTABLE_UA(32b, .Lfixup32)
+_ASM_EXTABLE_UA(33b, .Lfixup32)
+
+/*
+ * An exception on a write means that we're
+ * done, but we need to update the count
+ * depending on where in the unrolled loop
+ * we were.
+ */
+_ASM_EXTABLE_UA(20b, .Ldone0)
+_ASM_EXTABLE_UA(21b, .Ldone8)
+_ASM_EXTABLE_UA(22b, .Ldone16)
+_ASM_EXTABLE_UA(23b, .Ldone24)
+_ASM_EXTABLE_UA(40b, .Ldone32)
+_ASM_EXTABLE_UA(41b, .Ldone40)
+_ASM_EXTABLE_UA(42b, .Ldone48)
+_ASM_EXTABLE_UA(43b, .Ldone56)
+
+.Lquadwords:
+	cmp $8,%edx
+	jb .Llong
+50:	movq (%rsi),%rax
+51:	movnti %rax,(%rdi)
+	addq $8,%rsi
+	addq $8,%rdi
+	sub $8,%edx
+	jmp .Lquadwords
+
+/*
+ * If we fail on the last full quadword, we will
+ * not try to do any byte-wise cached accesses.
+ * We will try to do one more 4-byte uncached
+ * one, though.
+ */
+_ASM_EXTABLE_UA(50b, .Llast4)
+_ASM_EXTABLE_UA(51b, .Ldone0)
+
+.Llong:
+	test $4,%dl
+	je .Lword
+60:	movl (%rsi),%eax
+61:	movnti %eax,(%rdi)
+	addq $4,%rsi
+	addq $4,%rdi
+	sub $4,%edx
+.Lword:
+	sfence
+	test $2,%dl
+	je .Lbyte
+70:	movw (%rsi),%ax
+71:	movw %ax,(%rdi)
+	addq $2,%rsi
+	addq $2,%rdi
+	sub $2,%edx
+.Lbyte:
+	test $1,%dl
+	je .Ldone
+80:	movb (%rsi),%al
+81:	movb %al,(%rdi)
+	dec %edx
+.Ldone:
+	mov %edx,%eax
+	RET
+
+/*
+ * If we fail on the last four bytes, we won't
+ * bother with any fixups. It's dead, Jim. Note
+ * that there's no need for 'sfence' for any
+ * of this, since the exception will have been
+ * serializing.
+ */
+_ASM_EXTABLE_UA(60b, .Ldone)
+_ASM_EXTABLE_UA(61b, .Ldone)
+_ASM_EXTABLE_UA(70b, .Ldone)
+_ASM_EXTABLE_UA(71b, .Ldone)
+_ASM_EXTABLE_UA(80b, .Ldone)
+_ASM_EXTABLE_UA(81b, .Ldone)
+
+/*
+ * This is the "head needs aliging" case when
+ * the destination isn't 8-byte aligned. The
+ * 4-byte case can be done uncached, but any
+ * smaller alignment is done with regular stores.
+ */
+.Lalign:
+	test $1,%dil
+	je .Lalign_word
+	test %edx,%edx
+	je .Ldone
+90:	movb (%rsi),%al
+91:	movb %al,(%rdi)
+	inc %rsi
+	inc %rdi
+	dec %edx
+.Lalign_word:
+	test $2,%dil
+	je .Lalign_long
+	cmp $2,%edx
+	jb .Lbyte
+92:	movw (%rsi),%ax
+93:	movw %ax,(%rdi)
+	addq $2,%rsi
+	addq $2,%rdi
+	sub $2,%edx
+.Lalign_long:
+	test $4,%dil
+	je .Lis_aligned
+	cmp $4,%edx
+	jb .Lword
+94:	movl (%rsi),%eax
+95:	movnti %eax,(%rdi)
+	addq $4,%rsi
+	addq $4,%rdi
+	sub $4,%edx
+	jmp .Lis_aligned
+
+/*
+ * If we fail on the initial alignment accesses,
+ * we're all done. Again, no point in trying to
+ * do byte-by-byte probing if the 4-byte load
+ * fails - we're not doing any uncached accesses
+ * any more.
+ */
+_ASM_EXTABLE_UA(90b, .Ldone)
+_ASM_EXTABLE_UA(91b, .Ldone)
+_ASM_EXTABLE_UA(92b, .Ldone)
+_ASM_EXTABLE_UA(93b, .Ldone)
+_ASM_EXTABLE_UA(94b, .Ldone)
+_ASM_EXTABLE_UA(95b, .Ldone)
+
+/*
+ * Exception table fixups for faults in the middle
+ */
+.Ldone56: sub $8,%edx
+.Ldone48: sub $8,%edx
+.Ldone40: sub $8,%edx
+.Ldone32: sub $8,%edx
+.Ldone24: sub $8,%edx
+.Ldone16: sub $8,%edx
+.Ldone8: sub $8,%edx
+.Ldone0:
+	mov %edx,%eax
+	RET
+
+.Lfixup32:
+	addq $32,%rsi
+	addq $32,%rdi
+	sub $32,%edx
+	jmp .Lquadwords
+
+.Llast4:
+52:	movl (%rsi),%eax
+53:	movnti %eax,(%rdi)
+	sfence
+	sub $4,%edx
+	mov %edx,%eax
+	RET
+_ASM_EXTABLE_UA(52b, .Ldone0)
+_ASM_EXTABLE_UA(53b, .Ldone0)
+
+SYM_FUNC_END(__copy_user_nocache)
+EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index a64017602010..8f95fb267caa 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -11,13 +11,6 @@
 .section .noinstr.text, "ax"
 
 /*
- * We build a jump to memcpy_orig by default which gets NOPped out on
- * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
- * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
- * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
- */
-
-/*
  * memcpy - Copy a memory block.
  *
  * Input:
@@ -27,17 +20,21 @@
  *
  * Output:
  * rax original destination
+ *
+ * The FSRM alternative should be done inline (avoiding the call and
+ * the disgusting return handling), but that would require some help
+ * from the compiler for better calling conventions.
+ *
+ * The 'rep movsb' itself is small enough to replace the call, but the
+ * two register moves blow up the code. And one of them is "needed"
+ * only for the return value that is the same as the source input,
+ * which the compiler could/should do much better anyway.
  */
 SYM_TYPED_FUNC_START(__memcpy)
-	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
-		      "jmp memcpy_erms", X86_FEATURE_ERMS
+	ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
 
 	movq %rdi, %rax
 	movq %rdx, %rcx
-	shrq $3, %rcx
-	andl $7, %edx
-	rep movsq
-	movl %edx, %ecx
 	rep movsb
 	RET
 SYM_FUNC_END(__memcpy)
@@ -46,17 +43,6 @@ EXPORT_SYMBOL(__memcpy)
 SYM_FUNC_ALIAS(memcpy, __memcpy)
 EXPORT_SYMBOL(memcpy)
 
-/*
- * memcpy_erms() - enhanced fast string memcpy. This is faster and
- * simpler than memcpy. Use memcpy_erms when possible.
- */
-SYM_FUNC_START_LOCAL(memcpy_erms)
-	movq %rdi, %rax
-	movq %rdx, %rcx
-	rep movsb
-	RET
-SYM_FUNC_END(memcpy_erms)
-
 SYM_FUNC_START_LOCAL(memcpy_orig)
 	movq %rdi, %rax
 
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 6143b1a6fa2c..7c59a704c458 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -18,27 +18,22 @@
  * rdx   count (bytes)
  *
  * rax   original destination
+ *
+ * The FSRS alternative should be done inline (avoiding the call and
+ * the disgusting return handling), but that would require some help
+ * from the compiler for better calling conventions.
+ *
+ * The 'rep stosb' itself is small enough to replace the call, but all
+ * the register moves blow up the code. And two of them are "needed"
+ * only for the return value that is the same as the source input,
+ * which the compiler could/should do much better anyway.
  */
 SYM_FUNC_START(__memset)
-	/*
-	 * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
-	 * to use it when possible. If not available, use fast string instructions.
-	 *
-	 * Otherwise, use original memset function.
-	 */
-	ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
-		      "jmp memset_erms", X86_FEATURE_ERMS
+	ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
 
 	movq %rdi,%r9
+	movb %sil,%al
 	movq %rdx,%rcx
-	andl $7,%edx
-	shrq $3,%rcx
-	/* expand byte value  */
-	movzbl %sil,%esi
-	movabs $0x0101010101010101,%rax
-	imulq %rsi,%rax
-	rep stosq
-	movl %edx,%ecx
 	rep stosb
 	movq %r9,%rax
 	RET
@@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset)
 SYM_FUNC_ALIAS(memset, __memset)
 EXPORT_SYMBOL(memset)
 
-/*
- * ISO C memset - set a memory block to a byte value. This function uses
- * enhanced rep stosb to override the fast string function.
- * The code is simpler and shorter than the fast string function as well.
- *
- * rdi   destination
- * rsi   value (char)
- * rdx   count (bytes)
- *
- * rax   original destination
- */
-SYM_FUNC_START_LOCAL(memset_erms)
-	movq %rdi,%r9
-	movb %sil,%al
-	movq %rdx,%rcx
-	rep stosb
-	movq %r9,%rax
-	RET
-SYM_FUNC_END(memset_erms)
-
 SYM_FUNC_START_LOCAL(memset_orig)
 	movq %rdi,%r10
 
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 6c1f8ac5e721..c3a5bbc0b41e 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -45,7 +45,11 @@ EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
 {
 	unsigned long flushed, dest = (unsigned long) dst;
-	long rc = __copy_user_nocache(dst, src, size, 0);
+	long rc;
+
+	stac();
+	rc = __copy_user_nocache(dst, src, size);
+	clac();
 
 	/*
 	 * __copy_user_nocache() uses non-temporal stores for the bulk
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 7316a8224259..e91500a80963 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -10,6 +10,7 @@
 #include <asm/fixmap.h>
 #include <asm/desc.h>
 #include <asm/kasan.h>
+#include <asm/setup.h>
 
 static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
 
@@ -29,6 +30,12 @@ static __init void init_cea_offsets(void)
 	unsigned int max_cea;
 	unsigned int i, j;
 
+	if (!kaslr_enabled()) {
+		for_each_possible_cpu(i)
+			per_cpu(_cea_offset, i) = i;
+		return;
+	}
+
 	max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
 
 	/* O(sodding terrible) */
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 88cccd65029d..c6efcf559d88 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -600,7 +600,8 @@ void __init sme_enable(struct boot_params *bp)
 	cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
 				     ((u64)bp->ext_cmd_line_ptr << 32));
 
-	cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
+	if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
+		return;
 
 	if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
 		sme_me_mask = me_mask;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 615a76d70019..bf5161dcf89e 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -7,6 +7,7 @@
 #include <linux/dmi.h>
 #include <linux/pci.h>
 #include <linux/vgaarb.h>
+#include <asm/amd_nb.h>
 #include <asm/hpet.h>
 #include <asm/pci_x86.h>
 
@@ -824,3 +825,23 @@ static void rs690_fix_64bit_dma(struct pci_dev *pdev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
 
 #endif
+
+#ifdef CONFIG_AMD_NB
+
+#define AMD_15B8_RCC_DEV2_EPF0_STRAP2                                  0x10136008
+#define AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK       0x00000080L
+
+static void quirk_clear_strap_no_soft_reset_dev2_f0(struct pci_dev *dev)
+{
+	u32 data;
+
+	if (!amd_smn_read(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, &data)) {
+		data &= ~AMD_15B8_RCC_DEV2_EPF0_STRAP2_NO_SOFT_RESET_DEV2_F0_MASK;
+		if (amd_smn_write(0, AMD_15B8_RCC_DEV2_EPF0_STRAP2, data))
+			pci_err(dev, "Failed to write data 0x%x\n", data);
+	} else {
+		pci_err(dev, "Failed to read data\n");
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b8, quirk_clear_strap_no_soft_reset_dev2_f0);
+#endif
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 17f09dc26381..82fec66d46d2 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -69,8 +69,7 @@ CFLAGS_sha256.o			+= $(PURGATORY_CFLAGS)
 CFLAGS_REMOVE_string.o		+= $(PURGATORY_CFLAGS_REMOVE)
 CFLAGS_string.o			+= $(PURGATORY_CFLAGS)
 
-AFLAGS_REMOVE_setup-x86_$(BITS).o	+= -Wa,-gdwarf-2
-AFLAGS_REMOVE_entry64.o			+= -Wa,-gdwarf-2
+asflags-remove-y		+= $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
 
 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
 		$(call if_changed,ld)
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3c5b52fbe4a7..a9ec8c9f5c5d 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -45,6 +45,6 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
 
 obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
 
-obj-$(CONFIG_XEN_PV_DOM0)	+= vga.o
+obj-$(CONFIG_XEN_DOM0)		+= vga.o
 
 obj-$(CONFIG_XEN_EFI)		+= efi.o
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index bb59cc6ddb2d..093b78c8bbec 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1390,7 +1390,8 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
 
 		x86_platform.set_legacy_features =
 				xen_dom0_set_legacy_features;
-		xen_init_vga(info, xen_start_info->console.dom0.info_size);
+		xen_init_vga(info, xen_start_info->console.dom0.info_size,
+			     &boot_params.screen_info);
 		xen_start_info->console.domU.mfn = 0;
 		xen_start_info->console.domU.evtchn = 0;
 
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index bcae606bbc5c..ada3868c02c2 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -43,6 +43,19 @@ void __init xen_pvh_init(struct boot_params *boot_params)
 	x86_init.oem.banner = xen_banner;
 
 	xen_efi_init(boot_params);
+
+	if (xen_initial_domain()) {
+		struct xen_platform_op op = {
+			.cmd = XENPF_get_dom0_console,
+		};
+		int ret = HYPERVISOR_platform_op(&op);
+
+		if (ret > 0)
+			xen_init_vga(&op.u.dom0_console,
+				     min(ret * sizeof(char),
+					 sizeof(op.u.dom0_console)),
+				     &boot_params->screen_info);
+	}
 }
 
 void __init mem_map_via_hcall(struct boot_params *boot_params_p)
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 1d597364b49d..b74ac2562cfb 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -20,6 +20,7 @@
 #include <asm/pvclock.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
+#include <asm/xen/cpuid.h>
 
 #include <xen/events.h>
 #include <xen/features.h>
@@ -503,11 +504,7 @@ static int __init xen_tsc_safe_clocksource(void)
 	/* Leaf 4, sub-leaf 0 (0x40000x03) */
 	cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
 
-	/* tsc_mode = no_emulate (2) */
-	if (ebx != 2)
-		return 0;
-
-	return 1;
+	return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE;
 }
 
 static void __init xen_time_init(void)
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
index 14ea32e734d5..d97adab8420f 100644
--- a/arch/x86/xen/vga.c
+++ b/arch/x86/xen/vga.c
@@ -9,10 +9,9 @@
 
 #include "xen-ops.h"
 
-void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
+void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size,
+			 struct screen_info *screen_info)
 {
-	struct screen_info *screen_info = &boot_params.screen_info;
-
 	/* This is drawn from a dump from vgacon:startup in
 	 * standard Linux. */
 	screen_info->orig_video_mode = 3;
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9a8bb972193d..a10903785a33 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -108,11 +108,12 @@ static inline void xen_uninit_lock_cpu(int cpu)
 
 struct dom0_vga_console_info;
 
-#ifdef CONFIG_XEN_PV_DOM0
-void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
+#ifdef CONFIG_XEN_DOM0
+void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size,
+			 struct screen_info *);
 #else
 static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
-				       size_t size)
+				       size_t size, struct screen_info *si)
 {
 }
 #endif