summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorMark Brown <broonie@kernel.org>2026-03-30 19:59:52 +0300
committerMark Brown <broonie@kernel.org>2026-03-30 19:59:52 +0300
commit2a740dc5892a0e90e32ddae4d0ece501ace2adfc (patch)
treea1a309c150404c63605140544f60eee9287adf26 /arch
parent1e28cdeec31333b165f72b6ad647652c4c6f6ff2 (diff)
parent8ec017cf31299c4b6287ebe27afe81c986aeef88 (diff)
downloadlinux-2a740dc5892a0e90e32ddae4d0ece501ace2adfc.tar.xz
ASoC: Merge up fixes
Merge branch 'for-7.0' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into asoc-7.1 for both ASoC and general bug fixes to support testing.
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/kvm/at.c2
-rw-r--r--arch/arm64/kvm/reset.c14
-rw-r--r--arch/loongarch/include/asm/linkage.h36
-rw-r--r--arch/loongarch/include/asm/sigframe.h9
-rw-r--r--arch/loongarch/kernel/asm-offsets.c2
-rw-r--r--arch/loongarch/kernel/env.c7
-rw-r--r--arch/loongarch/kernel/signal.c6
-rw-r--r--arch/loongarch/kvm/intc/eiointc.c16
-rw-r--r--arch/loongarch/kvm/vcpu.c3
-rw-r--r--arch/loongarch/pci/pci.c80
-rw-r--r--arch/loongarch/vdso/Makefile4
-rw-r--r--arch/loongarch/vdso/sigreturn.S6
-rw-r--r--arch/s390/include/asm/barrier.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h3
-rw-r--r--arch/s390/include/asm/stacktrace.h2
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/entry.S7
-rw-r--r--arch/s390/kernel/nmi.c4
-rw-r--r--arch/s390/kernel/syscall.c5
-rw-r--r--arch/s390/kvm/dat.c100
-rw-r--r--arch/s390/kvm/dat.h23
-rw-r--r--arch/s390/kvm/gaccess.c77
-rw-r--r--arch/s390/kvm/gmap.c160
-rw-r--r--arch/s390/kvm/gmap.h33
-rw-r--r--arch/s390/kvm/interrupt.c18
-rw-r--r--arch/s390/kvm/kvm-s390.c34
-rw-r--r--arch/s390/kvm/vsie.c12
-rw-r--r--arch/s390/mm/fault.c11
-rw-r--r--arch/x86/coco/sev/noinstr.c6
-rw-r--r--arch/x86/entry/entry_fred.c14
-rw-r--r--arch/x86/kernel/cpu/common.c33
-rw-r--r--arch/x86/kvm/mmu/mmu.c17
-rw-r--r--arch/x86/platform/efi/quirks.c2
33 files changed, 512 insertions, 240 deletions
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index c5c5644b1878..a024d9a770dc 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -1753,7 +1753,7 @@ int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
if (!writable)
return -EPERM;
- ptep = (u64 __user *)hva + offset;
+ ptep = (void __user *)hva + offset;
if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
r = __lse_swap_desc(ptep, old, new);
else
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 959532422d3a..b963fd975aac 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -247,6 +247,20 @@ void kvm_reset_vcpu(struct kvm_vcpu *vcpu)
kvm_vcpu_set_be(vcpu);
*vcpu_pc(vcpu) = target_pc;
+
+ /*
+ * We may come from a state where either a PC update was
+ * pending (SMC call resulting in PC being increpented to
+ * skip the SMC) or a pending exception. Make sure we get
+ * rid of all that, as this cannot be valid out of reset.
+ *
+ * Note that clearing the exception mask also clears PC
+ * updates, but that's an implementation detail, and we
+ * really want to make it explicit.
+ */
+ vcpu_clear_flag(vcpu, PENDING_EXCEPTION);
+ vcpu_clear_flag(vcpu, EXCEPT_MASK);
+ vcpu_clear_flag(vcpu, INCREMENT_PC);
vcpu_set_reg(vcpu, 0, reset_state.r0);
}
diff --git a/arch/loongarch/include/asm/linkage.h b/arch/loongarch/include/asm/linkage.h
index e2eca1a25b4e..a1bd6a3ee03a 100644
--- a/arch/loongarch/include/asm/linkage.h
+++ b/arch/loongarch/include/asm/linkage.h
@@ -41,4 +41,40 @@
.cfi_endproc; \
SYM_END(name, SYM_T_NONE)
+/*
+ * This is for the signal handler trampoline, which is used as the return
+ * address of the signal handlers in userspace instead of called normally.
+ * The long standing libgcc bug https://gcc.gnu.org/PR124050 requires a
+ * nop between .cfi_startproc and the actual address of the trampoline, so
+ * we cannot simply use SYM_FUNC_START.
+ *
+ * This wrapper also contains all the .cfi_* directives for recovering
+ * the content of the GPRs and the "return address" (where the rt_sigreturn
+ * syscall will jump to), assuming there is a struct rt_sigframe (where
+ * a struct sigcontext containing those information we need to recover) at
+ * $sp. The "DWARF for the LoongArch(TM) Architecture" manual states
+ * column 0 is for $zero, but it does not make too much sense to
+ * save/restore the hardware zero register. Repurpose this column here
+ * for the return address (here it's not the content of $ra we cannot use
+ * the default column 3).
+ */
+#define SYM_SIGFUNC_START(name) \
+ .cfi_startproc; \
+ .cfi_signal_frame; \
+ .cfi_def_cfa 3, RT_SIGFRAME_SC; \
+ .cfi_return_column 0; \
+ .cfi_offset 0, SC_PC; \
+ \
+ .irp num, 1, 2, 3, 4, 5, 6, 7, 8, \
+ 9, 10, 11, 12, 13, 14, 15, 16, \
+ 17, 18, 19, 20, 21, 22, 23, 24, \
+ 25, 26, 27, 28, 29, 30, 31; \
+ .cfi_offset \num, SC_REGS + \num * SZREG; \
+ .endr; \
+ \
+ nop; \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+
+#define SYM_SIGFUNC_END(name) SYM_FUNC_END(name)
+
#endif
diff --git a/arch/loongarch/include/asm/sigframe.h b/arch/loongarch/include/asm/sigframe.h
new file mode 100644
index 000000000000..109298b8d7e0
--- /dev/null
+++ b/arch/loongarch/include/asm/sigframe.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#include <asm/siginfo.h>
+#include <asm/ucontext.h>
+
+struct rt_sigframe {
+ struct siginfo rs_info;
+ struct ucontext rs_uctx;
+};
diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
index 3017c7157600..2cc953f113ac 100644
--- a/arch/loongarch/kernel/asm-offsets.c
+++ b/arch/loongarch/kernel/asm-offsets.c
@@ -16,6 +16,7 @@
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <asm/ftrace.h>
+#include <asm/sigframe.h>
#include <vdso/datapage.h>
static void __used output_ptreg_defines(void)
@@ -220,6 +221,7 @@ static void __used output_sc_defines(void)
COMMENT("Linux sigcontext offsets.");
OFFSET(SC_REGS, sigcontext, sc_regs);
OFFSET(SC_PC, sigcontext, sc_pc);
+ OFFSET(RT_SIGFRAME_SC, rt_sigframe, rs_uctx.uc_mcontext);
BLANK();
}
diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c
index 841206fde3ab..652456768b55 100644
--- a/arch/loongarch/kernel/env.c
+++ b/arch/loongarch/kernel/env.c
@@ -42,16 +42,15 @@ static int __init init_cpu_fullname(void)
int cpu, ret;
char *cpuname;
const char *model;
- struct device_node *root;
/* Parsing cpuname from DTS model property */
- root = of_find_node_by_path("/");
- ret = of_property_read_string(root, "model", &model);
+ ret = of_property_read_string(of_root, "model", &model);
if (ret == 0) {
cpuname = kstrdup(model, GFP_KERNEL);
+ if (!cpuname)
+ return -ENOMEM;
loongson_sysconf.cpuname = strsep(&cpuname, " ");
}
- of_node_put(root);
if (loongson_sysconf.cpuname && !strncmp(loongson_sysconf.cpuname, "Loongson", 8)) {
for (cpu = 0; cpu < NR_CPUS; cpu++)
diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c
index c9f7ca778364..d4151d2fb82e 100644
--- a/arch/loongarch/kernel/signal.c
+++ b/arch/loongarch/kernel/signal.c
@@ -35,6 +35,7 @@
#include <asm/cpu-features.h>
#include <asm/fpu.h>
#include <asm/lbt.h>
+#include <asm/sigframe.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
@@ -51,11 +52,6 @@
#define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); })
#define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); })
-struct rt_sigframe {
- struct siginfo rs_info;
- struct ucontext rs_uctx;
-};
-
struct _ctx_layout {
struct sctx_info *addr;
unsigned int size;
diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c
index d2acb4d09e73..003bd773e11c 100644
--- a/arch/loongarch/kvm/intc/eiointc.c
+++ b/arch/loongarch/kvm/intc/eiointc.c
@@ -83,7 +83,7 @@ static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s,
if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) {
cpuid = ffs(cpuid) - 1;
- cpuid = (cpuid >= 4) ? 0 : cpuid;
+ cpuid = ((cpuid < 0) || (cpuid >= 4)) ? 0 : cpuid;
}
vcpu = kvm_get_vcpu_by_cpuid(s->kvm, cpuid);
@@ -472,34 +472,34 @@ static int kvm_eiointc_regs_access(struct kvm_device *dev,
switch (addr) {
case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
offset = (addr - EIOINTC_NODETYPE_START) / 4;
- p = s->nodetype + offset * 4;
+ p = (void *)s->nodetype + offset * 4;
break;
case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
offset = (addr - EIOINTC_IPMAP_START) / 4;
- p = &s->ipmap + offset * 4;
+ p = (void *)&s->ipmap + offset * 4;
break;
case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
offset = (addr - EIOINTC_ENABLE_START) / 4;
- p = s->enable + offset * 4;
+ p = (void *)s->enable + offset * 4;
break;
case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
offset = (addr - EIOINTC_BOUNCE_START) / 4;
- p = s->bounce + offset * 4;
+ p = (void *)s->bounce + offset * 4;
break;
case EIOINTC_ISR_START ... EIOINTC_ISR_END:
offset = (addr - EIOINTC_ISR_START) / 4;
- p = s->isr + offset * 4;
+ p = (void *)s->isr + offset * 4;
break;
case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
if (cpu >= s->num_cpu)
return -EINVAL;
offset = (addr - EIOINTC_COREISR_START) / 4;
- p = s->coreisr[cpu] + offset * 4;
+ p = (void *)s->coreisr[cpu] + offset * 4;
break;
case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
offset = (addr - EIOINTC_COREMAP_START) / 4;
- p = s->coremap + offset * 4;
+ p = (void *)s->coremap + offset * 4;
break;
default:
kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr);
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 8ffd50a470e6..831f381a8fd1 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -588,6 +588,9 @@ struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
{
struct kvm_phyid_map *map;
+ if (cpuid < 0)
+ return NULL;
+
if (cpuid >= KVM_MAX_PHYID)
return NULL;
diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c
index d923295ab8c6..d233ea2218fe 100644
--- a/arch/loongarch/pci/pci.c
+++ b/arch/loongarch/pci/pci.c
@@ -5,9 +5,11 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/acpi.h>
+#include <linux/delay.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/vgaarb.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/cacheflush.h>
#include <asm/loongson.h>
@@ -15,6 +17,9 @@
#define PCI_DEVICE_ID_LOONGSON_DC1 0x7a06
#define PCI_DEVICE_ID_LOONGSON_DC2 0x7a36
#define PCI_DEVICE_ID_LOONGSON_DC3 0x7a46
+#define PCI_DEVICE_ID_LOONGSON_GPU1 0x7a15
+#define PCI_DEVICE_ID_LOONGSON_GPU2 0x7a25
+#define PCI_DEVICE_ID_LOONGSON_GPU3 0x7a35
int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *val)
@@ -99,3 +104,78 @@ static void pci_fixup_vgadev(struct pci_dev *pdev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC1, pci_fixup_vgadev);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC2, pci_fixup_vgadev);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_DC3, pci_fixup_vgadev);
+
+#define CRTC_NUM_MAX 2
+#define CRTC_OUTPUT_ENABLE 0x100
+
+static void loongson_gpu_fixup_dma_hang(struct pci_dev *pdev, bool on)
+{
+ u32 i, val, count, crtc_offset, device;
+ void __iomem *crtc_reg, *base, *regbase;
+ static u32 crtc_status[CRTC_NUM_MAX] = { 0 };
+
+ base = pdev->bus->ops->map_bus(pdev->bus, pdev->devfn + 1, 0);
+ device = readw(base + PCI_DEVICE_ID);
+
+ regbase = ioremap(readq(base + PCI_BASE_ADDRESS_0) & ~0xffull, SZ_64K);
+ if (!regbase) {
+ pci_err(pdev, "Failed to ioremap()\n");
+ return;
+ }
+
+ switch (device) {
+ case PCI_DEVICE_ID_LOONGSON_DC2:
+ crtc_reg = regbase + 0x1240;
+ crtc_offset = 0x10;
+ break;
+ case PCI_DEVICE_ID_LOONGSON_DC3:
+ crtc_reg = regbase;
+ crtc_offset = 0x400;
+ break;
+ }
+
+ for (i = 0; i < CRTC_NUM_MAX; i++, crtc_reg += crtc_offset) {
+ val = readl(crtc_reg);
+
+ if (!on)
+ crtc_status[i] = val;
+
+ /* No need to fixup if the status is off at startup. */
+ if (!(crtc_status[i] & CRTC_OUTPUT_ENABLE))
+ continue;
+
+ if (on)
+ val |= CRTC_OUTPUT_ENABLE;
+ else
+ val &= ~CRTC_OUTPUT_ENABLE;
+
+ mb();
+ writel(val, crtc_reg);
+
+ for (count = 0; count < 40; count++) {
+ val = readl(crtc_reg) & CRTC_OUTPUT_ENABLE;
+ if ((on && val) || (!on && !val))
+ break;
+ udelay(1000);
+ }
+
+ pci_info(pdev, "DMA hang fixup at reg[0x%lx]: 0x%x\n",
+ (unsigned long)crtc_reg & 0xffff, readl(crtc_reg));
+ }
+
+ iounmap(regbase);
+}
+
+static void pci_fixup_dma_hang_early(struct pci_dev *pdev)
+{
+ loongson_gpu_fixup_dma_hang(pdev, false);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU2, pci_fixup_dma_hang_early);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU3, pci_fixup_dma_hang_early);
+
+static void pci_fixup_dma_hang_final(struct pci_dev *pdev)
+{
+ loongson_gpu_fixup_dma_hang(pdev, true);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU2, pci_fixup_dma_hang_final);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LOONGSON, PCI_DEVICE_ID_LOONGSON_GPU3, pci_fixup_dma_hang_final);
diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile
index 520f1513f07d..294c16b9517f 100644
--- a/arch/loongarch/vdso/Makefile
+++ b/arch/loongarch/vdso/Makefile
@@ -26,7 +26,7 @@ cflags-vdso := $(ccflags-vdso) \
$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
-std=gnu11 -fms-extensions -O2 -g -fno-strict-aliasing -fno-common -fno-builtin \
-fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
- $(call cc-option, -fno-asynchronous-unwind-tables) \
+ $(call cc-option, -fasynchronous-unwind-tables) \
$(call cc-option, -fno-stack-protector)
aflags-vdso := $(ccflags-vdso) \
-D__ASSEMBLY__ -Wa,-gdwarf-2
@@ -41,7 +41,7 @@ endif
# VDSO linker flags.
ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
- $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id -T
+ $(filter -E%,$(KBUILD_CFLAGS)) -shared --build-id --eh-frame-hdr -T
#
# Shared build commands.
diff --git a/arch/loongarch/vdso/sigreturn.S b/arch/loongarch/vdso/sigreturn.S
index 9cb3c58fad03..59f940d928de 100644
--- a/arch/loongarch/vdso/sigreturn.S
+++ b/arch/loongarch/vdso/sigreturn.S
@@ -12,13 +12,13 @@
#include <asm/regdef.h>
#include <asm/asm.h>
+#include <asm/asm-offsets.h>
.section .text
- .cfi_sections .debug_frame
-SYM_FUNC_START(__vdso_rt_sigreturn)
+SYM_SIGFUNC_START(__vdso_rt_sigreturn)
li.w a7, __NR_rt_sigreturn
syscall 0
-SYM_FUNC_END(__vdso_rt_sigreturn)
+SYM_SIGFUNC_END(__vdso_rt_sigreturn)
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index f3184073e754..dad02f5b3c8d 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -62,8 +62,8 @@ do { \
* @size: number of elements in array
*/
#define array_index_mask_nospec array_index_mask_nospec
-static inline unsigned long array_index_mask_nospec(unsigned long index,
- unsigned long size)
+static __always_inline unsigned long array_index_mask_nospec(unsigned long index,
+ unsigned long size)
{
unsigned long mask;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 64a50f0862aa..3039c88daa63 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -710,6 +710,9 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm);
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm);
+#define SIE64_RETURN_NORMAL 0
+#define SIE64_RETURN_MCCK 1
+
int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa,
unsigned long gasce);
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index c9ae680a28af..ac3606c3babe 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -62,7 +62,7 @@ struct stack_frame {
struct {
unsigned long sie_control_block;
unsigned long sie_savearea;
- unsigned long sie_reason;
+ unsigned long sie_return;
unsigned long sie_flags;
unsigned long sie_control_block_phys;
unsigned long sie_guest_asce;
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e1a5b5b54e4f..fbd26f3e9f96 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -63,7 +63,7 @@ int main(void)
OFFSET(__SF_EMPTY, stack_frame, empty[0]);
OFFSET(__SF_SIE_CONTROL, stack_frame, sie_control_block);
OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea);
- OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
+ OFFSET(__SF_SIE_RETURN, stack_frame, sie_return);
OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 4873fe9d891b..bb806d1ddae0 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -200,7 +200,7 @@ SYM_FUNC_START(__sie64a)
stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce
- xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
+ xc __SF_SIE_RETURN(8,%r15),__SF_SIE_RETURN(%r15) # return code = 0
mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
lmg %r0,%r13,0(%r4) # load guest gprs 0-13
mvi __TI_sie(%r14),1
@@ -237,7 +237,7 @@ SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
xgr %r4,%r4
xgr %r5,%r5
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
- lg %r2,__SF_SIE_REASON(%r15) # return exit reason code
+ lg %r2,__SF_SIE_RETURN(%r15) # return sie return code
BR_EX %r14
SYM_FUNC_END(__sie64a)
EXPORT_SYMBOL(__sie64a)
@@ -271,6 +271,7 @@ SYM_CODE_START(system_call)
xgr %r9,%r9
xgr %r10,%r10
xgr %r11,%r11
+ xgr %r12,%r12
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
mvc __PT_R8(64,%r2),__LC_SAVE_AREA(%r13)
MBEAR %r2,%r13
@@ -407,6 +408,7 @@ SYM_CODE_START(\name)
xgr %r6,%r6
xgr %r7,%r7
xgr %r10,%r10
+ xgr %r12,%r12
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA(%r13)
MBEAR %r11,%r13
@@ -496,6 +498,7 @@ SYM_CODE_START(mcck_int_handler)
xgr %r6,%r6
xgr %r7,%r7
xgr %r10,%r10
+ xgr %r12,%r12
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index a55abbf65333..94fbfad49f62 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -487,8 +487,8 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
if (test_cpu_flag(CIF_MCCK_GUEST) &&
(mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
- /* Set exit reason code for host's later handling */
- *((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
+ /* Set sie return code for host's later handling */
+ ((struct stack_frame *)regs->gprs[15])->sie_return = SIE64_RETURN_MCCK;
}
clear_cpu_flag(CIF_MCCK_GUEST);
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index 795b6cca74c9..d103c853e120 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -13,6 +13,7 @@
*/
#include <linux/cpufeature.h>
+#include <linux/nospec.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/mm.h>
@@ -131,8 +132,10 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)))
goto out;
regs->gprs[2] = -ENOSYS;
- if (likely(nr < NR_syscalls))
+ if (likely(nr < NR_syscalls)) {
+ nr = array_index_nospec(nr, NR_syscalls);
regs->gprs[2] = sys_call_table[nr](regs);
+ }
out:
syscall_exit_to_user_mode(regs);
}
diff --git a/arch/s390/kvm/dat.c b/arch/s390/kvm/dat.c
index 670404d4fa44..7b8d70fe406d 100644
--- a/arch/s390/kvm/dat.c
+++ b/arch/s390/kvm/dat.c
@@ -135,32 +135,6 @@ int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newt
}
/**
- * dat_crstep_xchg() - Exchange a gmap CRSTE with another.
- * @crstep: Pointer to the CRST entry
- * @new: Replacement entry.
- * @gfn: The affected guest address.
- * @asce: The ASCE of the address space.
- *
- * Context: This function is assumed to be called with kvm->mmu_lock held.
- */
-void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce)
-{
- if (crstep->h.i) {
- WRITE_ONCE(*crstep, new);
- return;
- } else if (cpu_has_edat2()) {
- crdte_crste(crstep, *crstep, new, gfn, asce);
- return;
- }
-
- if (machine_has_tlb_guest())
- idte_crste(crstep, gfn, IDTE_GUEST_ASCE, asce, IDTE_GLOBAL);
- else
- idte_crste(crstep, gfn, 0, NULL_ASCE, IDTE_GLOBAL);
- WRITE_ONCE(*crstep, new);
-}
-
-/**
* dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another.
* @crstep: Pointer to the CRST entry.
* @old: Expected old value.
@@ -175,8 +149,8 @@ void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce
*
* Return: %true if the exchange was successful.
*/
-bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
- union asce asce)
+bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new,
+ gfn_t gfn, union asce asce)
{
if (old.h.i)
return arch_try_cmpxchg((long *)crstep, &old.val, new.val);
@@ -292,6 +266,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
pt->ptes[i].val = init.val | i * PAGE_SIZE;
/* No need to take locks as the page table is not installed yet. */
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
+ pgste_init.vsie_notif = old.s.fc1.vsie_notif;
pgste_init.pcl = uses_skeys && init.h.i;
dat_init_pgstes(pt, pgste_init.val);
} else {
@@ -893,7 +868,8 @@ static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct d
/* This table entry needs to be updated. */
if (walk->start <= gfn && walk->end >= next) {
- dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce);
+ if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce))
+ return -EINVAL;
/* A lower level table was present, needs to be freed. */
if (!crste.h.fc && !crste.h.i) {
if (is_pmd(crste))
@@ -1021,67 +997,21 @@ bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end)
return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0;
}
-int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
- bool uses_skeys, struct guest_fault *f)
-{
- union crste oldval, newval;
- union pte newpte, oldpte;
- union pgste pgste;
- int rc = 0;
-
- rc = dat_entry_walk(mc, f->gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &f->crstep, &f->ptep);
- if (rc == -EINVAL || rc == -ENOMEM)
- return rc;
- if (rc)
- return -EAGAIN;
-
- if (WARN_ON_ONCE(unlikely(get_level(f->crstep, f->ptep) > level)))
- return -EINVAL;
-
- if (f->ptep) {
- pgste = pgste_get_lock(f->ptep);
- oldpte = *f->ptep;
- newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
- newpte.s.sd = oldpte.s.sd;
- oldpte.s.sd = 0;
- if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
- pgste = __dat_ptep_xchg(f->ptep, pgste, newpte, f->gfn, asce, uses_skeys);
- if (f->callback)
- f->callback(f);
- } else {
- rc = -EAGAIN;
- }
- pgste_set_unlock(f->ptep, pgste);
- } else {
- oldval = READ_ONCE(*f->crstep);
- newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
- f->write_attempt | oldval.s.fc1.d);
- newval.s.fc1.sd = oldval.s.fc1.sd;
- if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
- crste_origin_large(oldval) != crste_origin_large(newval))
- return -EAGAIN;
- if (!dat_crstep_xchg_atomic(f->crstep, oldval, newval, f->gfn, asce))
- return -EAGAIN;
- if (f->callback)
- f->callback(f);
- }
-
- return rc;
-}
-
static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
{
- union crste crste = READ_ONCE(*crstep);
+ union crste newcrste, oldcrste;
int *n = walk->priv;
- if (!crste.h.fc || crste.h.i || crste.h.p)
- return 0;
-
+ do {
+ oldcrste = READ_ONCE(*crstep);
+ if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p)
+ return 0;
+ if (oldcrste.s.fc1.prefix_notif)
+ break;
+ newcrste = oldcrste;
+ newcrste.s.fc1.prefix_notif = 1;
+ } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce));
*n = 2;
- if (crste.s.fc1.prefix_notif)
- return 0;
- crste.s.fc1.prefix_notif = 1;
- dat_crstep_xchg(crstep, crste, gfn, walk->asce);
return 0;
}
diff --git a/arch/s390/kvm/dat.h b/arch/s390/kvm/dat.h
index 123e11dcd70d..874cc962e196 100644
--- a/arch/s390/kvm/dat.h
+++ b/arch/s390/kvm/dat.h
@@ -160,14 +160,14 @@ union pmd {
unsigned long :44; /* HW */
unsigned long : 3; /* Unused */
unsigned long : 1; /* HW */
+ unsigned long s : 1; /* Special */
unsigned long w : 1; /* Writable soft-bit */
unsigned long r : 1; /* Readable soft-bit */
unsigned long d : 1; /* Dirty */
unsigned long y : 1; /* Young */
- unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long : 3; /* HW */
+ unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
- unsigned long : 1; /* Unused */
unsigned long : 4; /* HW */
unsigned long sd : 1; /* Soft-Dirty */
unsigned long pr : 1; /* Present */
@@ -183,14 +183,14 @@ union pud {
unsigned long :33; /* HW */
unsigned long :14; /* Unused */
unsigned long : 1; /* HW */
+ unsigned long s : 1; /* Special */
unsigned long w : 1; /* Writable soft-bit */
unsigned long r : 1; /* Readable soft-bit */
unsigned long d : 1; /* Dirty */
unsigned long y : 1; /* Young */
- unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long : 3; /* HW */
+ unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
- unsigned long : 1; /* Unused */
unsigned long : 4; /* HW */
unsigned long sd : 1; /* Soft-Dirty */
unsigned long pr : 1; /* Present */
@@ -254,14 +254,14 @@ union crste {
struct {
unsigned long :47;
unsigned long : 1; /* HW (should be 0) */
+ unsigned long s : 1; /* Special */
unsigned long w : 1; /* Writable */
unsigned long r : 1; /* Readable */
unsigned long d : 1; /* Dirty */
unsigned long y : 1; /* Young */
- unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long : 3; /* HW */
+ unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
- unsigned long : 1;
unsigned long : 4; /* HW */
unsigned long sd : 1; /* Soft-Dirty */
unsigned long pr : 1; /* Present */
@@ -540,8 +540,6 @@ int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gf
u16 type, u16 param);
int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
-int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
- bool uses_skeys, struct guest_fault *f);
int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
long dat_reset_cmma(union asce asce, gfn_t start_gfn);
@@ -938,11 +936,14 @@ static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pu
return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
}
-static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce asce)
+static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce)
{
- union crste newcrste = _CRSTE_EMPTY(crstep->h.tt);
+ union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt);
- dat_crstep_xchg(crstep, newcrste, gfn, asce);
+ do {
+ oldcrste = READ_ONCE(*crstep);
+ } while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce));
+ return oldcrste;
}
static inline int get_level(union crste *crstep, union pte *ptep)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4630b2a067ea..53a8550e7102 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1434,17 +1434,27 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
if (rc)
return rc;
- pgste = pgste_get_lock(ptep_h);
- newpte = _pte(f->pfn, f->writable, !p, 0);
- newpte.s.d |= ptep->s.d;
- newpte.s.sd |= ptep->s.sd;
- newpte.h.p &= ptep->h.p;
- pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
- pgste.vsie_notif = 1;
+ if (!pgste_get_trylock(ptep_h, &pgste))
+ return -EAGAIN;
+ newpte = _pte(f->pfn, f->writable, !p, ptep_h->s.s);
+ newpte.s.d |= ptep_h->s.d;
+ newpte.s.sd |= ptep_h->s.sd;
+ newpte.h.p &= ptep_h->h.p;
+ if (!newpte.h.p && !f->writable) {
+ rc = -EOPNOTSUPP;
+ } else {
+ pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
+ pgste.vsie_notif = 1;
+ }
pgste_set_unlock(ptep_h, pgste);
+ if (rc)
+ return rc;
+ if (!sg->parent)
+ return -EAGAIN;
newpte = _pte(f->pfn, 0, !p, 0);
- pgste = pgste_get_lock(ptep);
+ if (!pgste_get_trylock(ptep, &pgste))
+ return -EAGAIN;
pgste = __dat_ptep_xchg(ptep, pgste, newpte, gpa_to_gfn(raddr), sg->asce, uses_skeys(sg));
pgste_set_unlock(ptep, pgste);
@@ -1454,7 +1464,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table,
struct guest_fault *f, bool p)
{
- union crste newcrste;
+ union crste newcrste, oldcrste;
gfn_t gfn;
int rc;
@@ -1467,16 +1477,28 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
if (rc)
return rc;
- newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p);
- newcrste.s.fc1.d |= host->s.fc1.d;
- newcrste.s.fc1.sd |= host->s.fc1.sd;
- newcrste.h.p &= host->h.p;
- newcrste.s.fc1.vsie_notif = 1;
- newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif;
- _gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false);
+ do {
+ /* _gmap_crstep_xchg_atomic() could have unshadowed this shadow gmap */
+ if (!sg->parent)
+ return -EAGAIN;
+ oldcrste = READ_ONCE(*host);
+ newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p);
+ newcrste.s.fc1.d |= oldcrste.s.fc1.d;
+ newcrste.s.fc1.sd |= oldcrste.s.fc1.sd;
+ newcrste.h.p &= oldcrste.h.p;
+ newcrste.s.fc1.vsie_notif = 1;
+ newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif;
+ newcrste.s.fc1.s = oldcrste.s.fc1.s;
+ if (!newcrste.h.p && !f->writable)
+ return -EOPNOTSUPP;
+ } while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false));
+ if (!sg->parent)
+ return -EAGAIN;
- newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p);
- dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce);
+ newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
+ gfn = gpa_to_gfn(raddr);
+ while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce))
+ ;
return 0;
}
@@ -1500,21 +1522,31 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
if (rc)
return rc;
- /* A race occourred. The shadow mapping is already valid, nothing to do */
- if ((ptep && !ptep->h.i) || (!ptep && crste_leaf(*table)))
+ /* A race occurred. The shadow mapping is already valid, nothing to do */
+ if ((ptep && !ptep->h.i && ptep->h.p == w->p) ||
+ (!ptep && crste_leaf(*table) && !table->h.i && table->h.p == w->p))
return 0;
gl = get_level(table, ptep);
+ /* In case of a real address space */
+ if (w->level <= LEVEL_MEM) {
+ l = TABLE_TYPE_PAGE_TABLE;
+ hl = TABLE_TYPE_REGION1;
+ goto real_address_space;
+ }
+
/*
* Skip levels that are already protected. For each level, protect
* only the page containing the entry, not the whole table.
*/
for (i = gl ; i >= w->level; i--) {
- rc = gmap_protect_rmap(mc, sg, entries[i - 1].gfn, gpa_to_gfn(saddr),
- entries[i - 1].pfn, i, entries[i - 1].writable);
+ rc = gmap_protect_rmap(mc, sg, entries[i].gfn, gpa_to_gfn(saddr),
+ entries[i].pfn, i + 1, entries[i].writable);
if (rc)
return rc;
+ if (!sg->parent)
+ return -EAGAIN;
}
rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF,
@@ -1526,6 +1558,7 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
/* Get the smallest granularity */
l = min3(gl, hl, w->level);
+real_address_space:
flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
/* If necessary, create the shadow mapping */
if (l < gl) {
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
index ef0c6ebfdde2..645c32c767d2 100644
--- a/arch/s390/kvm/gmap.c
+++ b/arch/s390/kvm/gmap.c
@@ -313,13 +313,16 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
struct clear_young_pte_priv *priv = walk->priv;
union crste crste, new;
- crste = READ_ONCE(*crstep);
+ do {
+ crste = READ_ONCE(*crstep);
+
+ if (!crste.h.fc)
+ return 0;
+ if (!crste.s.fc1.y && crste.h.i)
+ return 0;
+ if (crste_prefix(crste) && !gmap_mkold_prefix(priv->gmap, gfn, end))
+ break;
- if (!crste.h.fc)
- return 0;
- if (!crste.s.fc1.y && crste.h.i)
- return 0;
- if (!crste_prefix(crste) || gmap_mkold_prefix(priv->gmap, gfn, end)) {
new = crste;
new.h.i = 1;
new.s.fc1.y = 0;
@@ -328,8 +331,8 @@ static long gmap_clear_young_crste(union crste *crstep, gfn_t gfn, gfn_t end, st
folio_set_dirty(phys_to_folio(crste_origin_large(crste)));
new.s.fc1.d = 0;
new.h.p = 1;
- dat_crstep_xchg(crstep, new, gfn, walk->asce);
- }
+ } while (!dat_crstep_xchg_atomic(crstep, crste, new, gfn, walk->asce));
+
priv->young = 1;
return 0;
}
@@ -391,14 +394,18 @@ static long _gmap_unmap_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct
{
struct gmap_unmap_priv *priv = walk->priv;
struct folio *folio = NULL;
+ union crste old = *crstep;
- if (crstep->h.fc) {
- if (crstep->s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
- folio = phys_to_folio(crste_origin_large(*crstep));
- gmap_crstep_xchg(priv->gmap, crstep, _CRSTE_EMPTY(crstep->h.tt), gfn);
- if (folio)
- uv_convert_from_secure_folio(folio);
- }
+ if (!old.h.fc)
+ return 0;
+
+ if (old.s.fc1.pr && test_bit(GMAP_FLAG_EXPORT_ON_UNMAP, &priv->gmap->flags))
+ folio = phys_to_folio(crste_origin_large(old));
+ /* No races should happen because kvm->mmu_lock is held in write mode */
+ KVM_BUG_ON(!gmap_crstep_xchg_atomic(priv->gmap, crstep, old, _CRSTE_EMPTY(old.h.tt), gfn),
+ priv->gmap->kvm);
+ if (folio)
+ uv_convert_from_secure_folio(folio);
return 0;
}
@@ -474,23 +481,24 @@ static long _crste_test_and_clear_softdirty(union crste *table, gfn_t gfn, gfn_t
if (fatal_signal_pending(current))
return 1;
- crste = READ_ONCE(*table);
- if (!crste.h.fc)
- return 0;
- if (crste.h.p && !crste.s.fc1.sd)
- return 0;
+ do {
+ crste = READ_ONCE(*table);
+ if (!crste.h.fc)
+ return 0;
+ if (crste.h.p && !crste.s.fc1.sd)
+ return 0;
- /*
- * If this large page contains one or more prefixes of vCPUs that are
- * currently running, do not reset the protection, leave it marked as
- * dirty.
- */
- if (!crste.s.fc1.prefix_notif || gmap_mkold_prefix(gmap, gfn, end)) {
+ /*
+ * If this large page contains one or more prefixes of vCPUs that are
+ * currently running, do not reset the protection, leave it marked as
+ * dirty.
+ */
+ if (crste.s.fc1.prefix_notif && !gmap_mkold_prefix(gmap, gfn, end))
+ break;
new = crste;
new.h.p = 1;
new.s.fc1.sd = 0;
- gmap_crstep_xchg(gmap, table, new, gfn);
- }
+ } while (!gmap_crstep_xchg_atomic(gmap, table, crste, new, gfn));
for ( ; gfn < end; gfn++)
mark_page_dirty(gmap->kvm, gfn);
@@ -511,7 +519,7 @@ void gmap_sync_dirty_log(struct gmap *gmap, gfn_t start, gfn_t end)
_dat_walk_gfn_range(start, end, gmap->asce, &walk_ops, 0, gmap);
}
-static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f)
+static int gmap_handle_minor_crste_fault(struct gmap *gmap, struct guest_fault *f)
{
union crste newcrste, oldcrste = READ_ONCE(*f->crstep);
@@ -536,10 +544,8 @@ static int gmap_handle_minor_crste_fault(union asce asce, struct guest_fault *f)
newcrste.s.fc1.d = 1;
newcrste.s.fc1.sd = 1;
}
- if (!oldcrste.s.fc1.d && newcrste.s.fc1.d)
- SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
/* In case of races, let the slow path deal with it. */
- return !dat_crstep_xchg_atomic(f->crstep, oldcrste, newcrste, f->gfn, asce);
+ return !gmap_crstep_xchg_atomic(gmap, f->crstep, oldcrste, newcrste, f->gfn);
}
/* Trying to write on a read-only page, let the slow path deal with it. */
return 1;
@@ -568,8 +574,6 @@ static int _gmap_handle_minor_pte_fault(struct gmap *gmap, union pgste *pgste,
newpte.s.d = 1;
newpte.s.sd = 1;
}
- if (!oldpte.s.d && newpte.s.d)
- SetPageDirty(pfn_to_page(newpte.h.pfra));
*pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, *pgste, f->gfn);
return 0;
@@ -606,7 +610,7 @@ int gmap_try_fixup_minor(struct gmap *gmap, struct guest_fault *fault)
fault->callback(fault);
pgste_set_unlock(fault->ptep, pgste);
} else {
- rc = gmap_handle_minor_crste_fault(gmap->asce, fault);
+ rc = gmap_handle_minor_crste_fault(gmap, fault);
if (!rc && fault->callback)
fault->callback(fault);
}
@@ -623,10 +627,61 @@ static inline bool gmap_1m_allowed(struct gmap *gmap, gfn_t gfn)
return test_bit(GMAP_FLAG_ALLOW_HPAGE_1M, &gmap->flags);
}
+static int _gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, int level,
+ struct guest_fault *f)
+{
+ union crste oldval, newval;
+ union pte newpte, oldpte;
+ union pgste pgste;
+ int rc = 0;
+
+ rc = dat_entry_walk(mc, f->gfn, gmap->asce, DAT_WALK_ALLOC_CONTINUE, level,
+ &f->crstep, &f->ptep);
+ if (rc == -ENOMEM)
+ return rc;
+ if (KVM_BUG_ON(rc == -EINVAL, gmap->kvm))
+ return rc;
+ if (rc)
+ return -EAGAIN;
+ if (KVM_BUG_ON(get_level(f->crstep, f->ptep) > level, gmap->kvm))
+ return -EINVAL;
+
+ if (f->ptep) {
+ pgste = pgste_get_lock(f->ptep);
+ oldpte = *f->ptep;
+ newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
+ newpte.s.sd = oldpte.s.sd;
+ oldpte.s.sd = 0;
+ if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
+ pgste = gmap_ptep_xchg(gmap, f->ptep, newpte, pgste, f->gfn);
+ if (f->callback)
+ f->callback(f);
+ } else {
+ rc = -EAGAIN;
+ }
+ pgste_set_unlock(f->ptep, pgste);
+ } else {
+ do {
+ oldval = READ_ONCE(*f->crstep);
+ newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
+ f->write_attempt | oldval.s.fc1.d);
+ newval.s.fc1.s = !f->page;
+ newval.s.fc1.sd = oldval.s.fc1.sd;
+ if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
+ crste_origin_large(oldval) != crste_origin_large(newval))
+ return -EAGAIN;
+ } while (!gmap_crstep_xchg_atomic(gmap, f->crstep, oldval, newval, f->gfn));
+ if (f->callback)
+ f->callback(f);
+ }
+
+ return rc;
+}
+
int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fault *f)
{
unsigned int order;
- int rc, level;
+ int level;
lockdep_assert_held(&gmap->kvm->mmu_lock);
@@ -638,16 +693,14 @@ int gmap_link(struct kvm_s390_mmu_cache *mc, struct gmap *gmap, struct guest_fau
else if (order >= get_order(_SEGMENT_SIZE) && gmap_1m_allowed(gmap, f->gfn))
level = TABLE_TYPE_SEGMENT;
}
- rc = dat_link(mc, gmap->asce, level, uses_skeys(gmap), f);
- KVM_BUG_ON(rc == -EINVAL, gmap->kvm);
- return rc;
+ return _gmap_link(mc, gmap, level, f);
}
static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
gfn_t p_gfn, gfn_t c_gfn, bool force_alloc)
{
+ union crste newcrste, oldcrste;
struct page_table *pt;
- union crste newcrste;
union crste *crstep;
union pte *ptep;
int rc;
@@ -673,7 +726,11 @@ static int gmap_ucas_map_one(struct kvm_s390_mmu_cache *mc, struct gmap *gmap,
&crstep, &ptep);
if (rc)
return rc;
- dat_crstep_xchg(crstep, newcrste, c_gfn, gmap->asce);
+ do {
+ oldcrste = READ_ONCE(*crstep);
+ if (oldcrste.val == newcrste.val)
+ break;
+ } while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, c_gfn, gmap->asce));
return 0;
}
@@ -777,8 +834,10 @@ static void gmap_ucas_unmap_one(struct gmap *gmap, gfn_t c_gfn)
int rc;
rc = dat_entry_walk(NULL, c_gfn, gmap->asce, 0, TABLE_TYPE_SEGMENT, &crstep, &ptep);
- if (!rc)
- dat_crstep_xchg(crstep, _PMD_EMPTY, c_gfn, gmap->asce);
+ if (rc)
+ return;
+ while (!dat_crstep_xchg_atomic(crstep, READ_ONCE(*crstep), _PMD_EMPTY, c_gfn, gmap->asce))
+ ;
}
void gmap_ucas_unmap(struct gmap *gmap, gfn_t c_gfn, unsigned long count)
@@ -1017,8 +1076,8 @@ static void gmap_unshadow_level(struct gmap *sg, gfn_t r_gfn, int level)
dat_ptep_xchg(ptep, _PTE_EMPTY, r_gfn, sg->asce, uses_skeys(sg));
return;
}
- crste = READ_ONCE(*crstep);
- dat_crstep_clear(crstep, r_gfn, sg->asce);
+
+ crste = dat_crstep_clear_atomic(crstep, r_gfn, sg->asce);
if (crste_leaf(crste) || crste.h.i)
return;
if (is_pmd(crste))
@@ -1101,6 +1160,7 @@ struct gmap_protect_asce_top_level {
static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
struct gmap_protect_asce_top_level *context)
{
+ struct gmap *parent;
int rc, i;
guard(write_lock)(&sg->kvm->mmu_lock);
@@ -1108,7 +1168,12 @@ static inline int __gmap_protect_asce_top_level(struct kvm_s390_mmu_cache *mc, s
if (kvm_s390_array_needs_retry_safe(sg->kvm, context->seq, context->f))
return -EAGAIN;
- scoped_guard(spinlock, &sg->parent->children_lock) {
+ parent = READ_ONCE(sg->parent);
+ if (!parent)
+ return -EAGAIN;
+ scoped_guard(spinlock, &parent->children_lock) {
+ if (READ_ONCE(sg->parent) != parent)
+ return -EAGAIN;
for (i = 0; i < CRST_TABLE_PAGES; i++) {
if (!context->f[i].valid)
continue;
@@ -1191,6 +1256,9 @@ struct gmap *gmap_create_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *pare
struct gmap *sg, *new;
int rc;
+ if (WARN_ON(!parent))
+ return ERR_PTR(-EINVAL);
+
scoped_guard(spinlock, &parent->children_lock) {
sg = gmap_find_shadow(parent, asce, edat_level);
if (sg) {
diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h
index ccb5cd751e31..579399ef5480 100644
--- a/arch/s390/kvm/gmap.h
+++ b/arch/s390/kvm/gmap.h
@@ -185,6 +185,8 @@ static inline union pgste _gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, un
else
_gmap_handle_vsie_unshadow_event(gmap, gfn);
}
+ if (!ptep->s.d && newpte.s.d && !newpte.s.s)
+ SetPageDirty(pfn_to_page(newpte.h.pfra));
return __dat_ptep_xchg(ptep, pgste, newpte, gfn, gmap->asce, uses_skeys(gmap));
}
@@ -194,35 +196,42 @@ static inline union pgste gmap_ptep_xchg(struct gmap *gmap, union pte *ptep, uni
return _gmap_ptep_xchg(gmap, ptep, newpte, pgste, gfn, true);
}
-static inline void _gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
- gfn_t gfn, bool needs_lock)
+static inline bool __must_check _gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
+ union crste oldcrste, union crste newcrste,
+ gfn_t gfn, bool needs_lock)
{
- unsigned long align = 8 + (is_pmd(*crstep) ? 0 : 11);
+ unsigned long align = is_pmd(newcrste) ? _PAGE_ENTRIES : _PAGE_ENTRIES * _CRST_ENTRIES;
+
+ if (KVM_BUG_ON(crstep->h.tt != oldcrste.h.tt || newcrste.h.tt != oldcrste.h.tt, gmap->kvm))
+ return true;
lockdep_assert_held(&gmap->kvm->mmu_lock);
if (!needs_lock)
lockdep_assert_held(&gmap->children_lock);
gfn = ALIGN_DOWN(gfn, align);
- if (crste_prefix(*crstep) && (ne.h.p || ne.h.i || !crste_prefix(ne))) {
- ne.s.fc1.prefix_notif = 0;
+ if (crste_prefix(oldcrste) && (newcrste.h.p || newcrste.h.i || !crste_prefix(newcrste))) {
+ newcrste.s.fc1.prefix_notif = 0;
gmap_unmap_prefix(gmap, gfn, gfn + align);
}
- if (crste_leaf(*crstep) && crstep->s.fc1.vsie_notif &&
- (ne.h.p || ne.h.i || !ne.s.fc1.vsie_notif)) {
- ne.s.fc1.vsie_notif = 0;
+ if (crste_leaf(oldcrste) && oldcrste.s.fc1.vsie_notif &&
+ (newcrste.h.p || newcrste.h.i || !newcrste.s.fc1.vsie_notif)) {
+ newcrste.s.fc1.vsie_notif = 0;
if (needs_lock)
gmap_handle_vsie_unshadow_event(gmap, gfn);
else
_gmap_handle_vsie_unshadow_event(gmap, gfn);
}
- dat_crstep_xchg(crstep, ne, gfn, gmap->asce);
+ if (!oldcrste.s.fc1.d && newcrste.s.fc1.d && !newcrste.s.fc1.s)
+ SetPageDirty(phys_to_page(crste_origin_large(newcrste)));
+ return dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, gmap->asce);
}
-static inline void gmap_crstep_xchg(struct gmap *gmap, union crste *crstep, union crste ne,
- gfn_t gfn)
+static inline bool __must_check gmap_crstep_xchg_atomic(struct gmap *gmap, union crste *crstep,
+ union crste oldcrste, union crste newcrste,
+ gfn_t gfn)
{
- return _gmap_crstep_xchg(gmap, crstep, ne, gfn, true);
+ return _gmap_crstep_xchg_atomic(gmap, crstep, oldcrste, newcrste, gfn, true);
}
/**
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 18932a65ca68..7cb8ce833b62 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2724,6 +2724,9 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
bit = bit_nr + (addr % PAGE_SIZE) * 8;
+ /* kvm_set_routing_entry() should never allow this to happen */
+ WARN_ON_ONCE(bit > (PAGE_SIZE * BITS_PER_BYTE - 1));
+
return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
}
@@ -2824,6 +2827,12 @@ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu,
int rc;
mci.val = mcck_info->mcic;
+
+ /* log machine checks being reinjected on all debugs */
+ VCPU_EVENT(vcpu, 2, "guest machine check %lx", mci.val);
+ KVM_EVENT(2, "guest machine check %lx", mci.val);
+ pr_info("guest machine check pid %d: %lx", current->pid, mci.val);
+
if (mci.sr)
cr14 |= CR14_RECOVERY_SUBMASK;
if (mci.dg)
@@ -2852,6 +2861,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
+ const struct kvm_irq_routing_s390_adapter *adapter;
u64 uaddr_s, uaddr_i;
int idx;
@@ -2862,6 +2872,14 @@ int kvm_set_routing_entry(struct kvm *kvm,
return -EINVAL;
e->set = set_adapter_int;
+ adapter = &ue->u.adapter;
+ if (adapter->summary_addr + (adapter->summary_offset / 8) >=
+ (adapter->summary_addr & PAGE_MASK) + PAGE_SIZE)
+ return -EINVAL;
+ if (adapter->ind_addr + (adapter->ind_offset / 8) >=
+ (adapter->ind_addr & PAGE_MASK) + PAGE_SIZE)
+ return -EINVAL;
+
idx = srcu_read_lock(&kvm->srcu);
uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr);
uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3eb60aa932ec..d7838334a338 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4617,7 +4617,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
return 0;
}
-static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
+static int vcpu_post_run(struct kvm_vcpu *vcpu, int sie_return)
{
struct mcck_volatile_info *mcck_info;
struct sie_page *sie_page;
@@ -4633,14 +4633,14 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
- if (exit_reason == -EINTR) {
- VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ if (sie_return == SIE64_RETURN_MCCK) {
sie_page = container_of(vcpu->arch.sie_block,
struct sie_page, sie_block);
mcck_info = &sie_page->mcck_info;
kvm_s390_reinject_machine_check(vcpu, mcck_info);
return 0;
}
+ WARN_ON_ONCE(sie_return != SIE64_RETURN_NORMAL);
if (vcpu->arch.sie_block->icptcode > 0) {
rc = kvm_handle_sie_intercept(vcpu);
@@ -4679,7 +4679,7 @@ int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
- int rc, exit_reason;
+ int rc, sie_return;
struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
/*
@@ -4719,9 +4719,9 @@ xfer_to_guest_mode_check:
guest_timing_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
- exit_reason = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
- vcpu->run->s.regs.gprs,
- vcpu->arch.gmap->asce.val);
+ sie_return = kvm_s390_enter_exit_sie(vcpu->arch.sie_block,
+ vcpu->run->s.regs.gprs,
+ vcpu->arch.gmap->asce.val);
__enable_cpu_timer_accounting(vcpu);
guest_timing_exit_irqoff();
@@ -4744,7 +4744,7 @@ xfer_to_guest_mode_check:
}
kvm_vcpu_srcu_read_lock(vcpu);
- rc = vcpu_post_run(vcpu, exit_reason);
+ rc = vcpu_post_run(vcpu, sie_return);
if (rc || guestdbg_exit_pending(vcpu)) {
kvm_vcpu_srcu_read_unlock(vcpu);
break;
@@ -5520,9 +5520,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = vcpu_dat_fault_handler(vcpu, arg, 0);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ gpa_t gaddr = arg;
+
+ scoped_guard(srcu, &vcpu->kvm->srcu) {
+ r = vcpu_ucontrol_translate(vcpu, &gaddr);
+ if (r)
+ break;
+
+ r = kvm_s390_faultin_gfn_simple(vcpu, NULL, gpa_to_gfn(gaddr), false);
+ if (r == PGM_ADDRESSING)
+ r = -EFAULT;
+ if (r <= 0)
+ break;
+ r = -EIO;
+ KVM_BUG_ON(r, vcpu->kvm);
+ }
break;
}
case KVM_ENABLE_CAP:
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index d249b10044eb..72895dddc39a 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1122,6 +1122,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, struc
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
+ unsigned long sie_return = SIE64_RETURN_NORMAL;
int guest_bp_isolation;
int rc = 0;
@@ -1163,7 +1164,7 @@ xfer_to_guest_mode_check:
goto xfer_to_guest_mode_check;
}
guest_timing_enter_irqoff();
- rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val);
+ sie_return = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, sg->asce.val);
guest_timing_exit_irqoff();
local_irq_enable();
}
@@ -1178,12 +1179,13 @@ skip_sie:
kvm_vcpu_srcu_read_lock(vcpu);
- if (rc == -EINTR) {
- VCPU_EVENT(vcpu, 3, "%s", "machine check");
+ if (sie_return == SIE64_RETURN_MCCK) {
kvm_s390_reinject_machine_check(vcpu, &vsie_page->mcck_info);
return 0;
}
+ WARN_ON_ONCE(sie_return != SIE64_RETURN_NORMAL);
+
if (rc > 0)
rc = 0; /* we could still have an icpt */
else if (current->thread.gmap_int_code)
@@ -1326,7 +1328,7 @@ static void unregister_shadow_scb(struct kvm_vcpu *vcpu)
static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- struct gmap *sg;
+ struct gmap *sg = NULL;
int rc = 0;
while (1) {
@@ -1366,6 +1368,8 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
sg = gmap_put(sg);
cond_resched();
}
+ if (sg)
+ sg = gmap_put(sg);
if (rc == -EFAULT) {
/*
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a52aa7a99b6b..191cc53caead 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -441,10 +441,17 @@ void do_secure_storage_access(struct pt_regs *regs)
folio = phys_to_folio(addr);
if (unlikely(!folio_try_get(folio)))
return;
- rc = arch_make_folio_accessible(folio);
+ rc = uv_convert_from_secure(folio_to_phys(folio));
+ if (!rc)
+ clear_bit(PG_arch_1, &folio->flags.f);
folio_put(folio);
+ /*
+ * There are some valid fixup types for kernel
+ * accesses to donated secure memory. zeropad is one
+ * of them.
+ */
if (rc)
- BUG();
+ return handle_fault_error_nolock(regs, 0);
} else {
if (faulthandler_disabled())
return handle_fault_error_nolock(regs, 0);
diff --git a/arch/x86/coco/sev/noinstr.c b/arch/x86/coco/sev/noinstr.c
index 9d94aca4a698..5afd663a1c21 100644
--- a/arch/x86/coco/sev/noinstr.c
+++ b/arch/x86/coco/sev/noinstr.c
@@ -121,6 +121,9 @@ noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
WARN_ON(!irqs_disabled());
+ if (!sev_cfg.ghcbs_initialized)
+ return boot_ghcb;
+
data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page;
@@ -164,6 +167,9 @@ noinstr void __sev_put_ghcb(struct ghcb_state *state)
WARN_ON(!irqs_disabled());
+ if (!sev_cfg.ghcbs_initialized)
+ return;
+
data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page;
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index 88c757ac8ccd..fbe2d10dd737 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -177,6 +177,16 @@ static noinstr void fred_extint(struct pt_regs *regs)
}
}
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+noinstr void exc_vmm_communication(struct pt_regs *regs, unsigned long error_code)
+{
+ if (user_mode(regs))
+ return user_exc_vmm_communication(regs, error_code);
+ else
+ return kernel_exc_vmm_communication(regs, error_code);
+}
+#endif
+
static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code)
{
/* Optimize for #PF. That's the only exception which matters performance wise */
@@ -207,6 +217,10 @@ static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code)
#ifdef CONFIG_X86_CET
case X86_TRAP_CP: return exc_control_protection(regs, error_code);
#endif
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ case X86_TRAP_VC: return exc_vmm_communication(regs, error_code);
+#endif
+
default: return fred_bad_type(regs, error_code);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a8ff4376c286..ec0670114efa 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -433,7 +433,20 @@ static __always_inline void setup_lass(struct cpuinfo_x86 *c)
/* These bits should not change their value after CPU init is finished. */
static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP |
- X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED;
+ X86_CR4_FSGSBASE | X86_CR4_CET;
+
+/*
+ * The CR pinning protects against ROP on the 'mov %reg, %CRn' instruction(s).
+ * Since you can ROP directly to these instructions (barring shadow stack),
+ * any protection must follow immediately and unconditionally after that.
+ *
+ * Specifically, the CR[04] write functions below will have the value
+ * validation controlled by the @cr_pinning static_branch which is
+ * __ro_after_init, just like the cr4_pinned_bits value.
+ *
+ * Once set, an attacker will have to defeat page-tables to get around these
+ * restrictions. Which is a much bigger ask than 'simple' ROP.
+ */
static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
static unsigned long cr4_pinned_bits __ro_after_init;
@@ -2050,12 +2063,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_umip(c);
setup_lass(c);
- /* Enable FSGSBASE instructions if available. */
- if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
- cr4_set_bits(X86_CR4_FSGSBASE);
- elf_hwcap2 |= HWCAP2_FSGSBASE;
- }
-
/*
* The vendor-specific functions might have changed features.
* Now we do "generic changes."
@@ -2416,6 +2423,18 @@ void cpu_init_exception_handling(bool boot_cpu)
/* GHCB needs to be setup to handle #VC. */
setup_ghcb();
+ /*
+ * On CPUs with FSGSBASE support, paranoid_entry() uses
+ * ALTERNATIVE-patched RDGSBASE/WRGSBASE instructions. Secondary CPUs
+ * boot after alternatives are patched globally, so early exceptions
+ * execute patched code that depends on FSGSBASE. Enable the feature
+ * before any exceptions occur.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_FSGSBASE)) {
+ cr4_set_bits(X86_CR4_FSGSBASE);
+ elf_hwcap2 |= HWCAP2_FSGSBASE;
+ }
+
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
/* The boot CPU has enabled FRED during early boot */
if (!boot_cpu)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b922a8b00057..dd06453d5b72 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3044,12 +3044,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
bool prefetch = !fault || fault->prefetch;
bool write_fault = fault && fault->write;
- if (unlikely(is_noslot_pfn(pfn))) {
- vcpu->stat.pf_mmio_spte_created++;
- mark_mmio_spte(vcpu, sptep, gfn, pte_access);
- return RET_PF_EMULATE;
- }
-
if (is_shadow_present_pte(*sptep)) {
if (prefetch && is_last_spte(*sptep, level) &&
pfn == spte_to_pfn(*sptep))
@@ -3066,13 +3060,22 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
child = spte_to_child_sp(pte);
drop_parent_pte(vcpu->kvm, child, sptep);
flush = true;
- } else if (WARN_ON_ONCE(pfn != spte_to_pfn(*sptep))) {
+ } else if (pfn != spte_to_pfn(*sptep)) {
+ WARN_ON_ONCE(vcpu->arch.mmu->root_role.direct);
drop_spte(vcpu->kvm, sptep);
flush = true;
} else
was_rmapped = 1;
}
+ if (unlikely(is_noslot_pfn(pfn))) {
+ vcpu->stat.pf_mmio_spte_created++;
+ mark_mmio_spte(vcpu, sptep, gfn, pte_access);
+ if (flush)
+ kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
+ return RET_PF_EMULATE;
+ }
+
wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch,
false, host_writable, &spte);
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 35caa5746115..79f0818131e8 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -424,7 +424,7 @@ void __init efi_unmap_boot_services(void)
if (efi_enabled(EFI_DBG))
return;
- sz = sizeof(*ranges_to_free) * efi.memmap.nr_map + 1;
+ sz = sizeof(*ranges_to_free) * (efi.memmap.nr_map + 1);
ranges_to_free = kzalloc(sz, GFP_KERNEL);
if (!ranges_to_free) {
pr_err("Failed to allocate storage for freeable EFI regions\n");