summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/elf.h6
-rw-r--r--arch/alpha/include/asm/pgtable.h2
-rw-r--r--arch/alpha/include/asm/processor.h8
-rw-r--r--arch/alpha/kernel/osf_sys.c11
-rw-r--r--arch/arm/Makefile2
-rw-r--r--arch/arm/boot/dts/am335x-bone-common.dtsi8
-rw-r--r--arch/arm/boot/dts/am335x-boneblack.dts2
-rw-r--r--arch/arm/boot/dts/at91sam9263ek.dts2
-rw-r--r--arch/arm/boot/dts/bcm2711.dtsi11
-rw-r--r--arch/arm/boot/dts/imx6qdl-apalis.dtsi10
-rw-r--r--arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi3
-rw-r--r--arch/arm/boot/dts/imx6ul-kontron-bl-common.dtsi1
-rw-r--r--arch/arm/boot/dts/qcom-apq8064.dtsi13
-rw-r--r--arch/arm/boot/dts/tegra114.dtsi2
-rw-r--r--arch/arm/boot/dts/tny_a9263.dts2
-rw-r--r--arch/arm/boot/dts/usb_a9263.dts4
-rw-r--r--arch/arm/boot/dts/vfxxx.dtsi2
-rw-r--r--arch/arm/crypto/aes-neonbs-glue.c2
-rw-r--r--arch/arm/include/asm/ptrace.h5
-rw-r--r--arch/arm/mach-aspeed/Kconfig1
-rw-r--r--arch/arm/mach-at91/pm.c21
-rw-r--r--arch/arm/mach-omap1/Kconfig1
-rw-r--r--arch/arm/mach-omap2/clockdomain.h1
-rw-r--r--arch/arm/mach-omap2/clockdomains33xx_data.c2
-rw-r--r--arch/arm/mach-omap2/cm33xx.c14
-rw-r--r--arch/arm/mach-omap2/pmic-cpcap.c6
-rw-r--r--arch/arm/mach-rockchip/platsmp.c15
-rw-r--r--arch/arm/mach-shmobile/headsmp.S1
-rw-r--r--arch/arm/mach-tegra/reset.c2
-rw-r--r--arch/arm/mm/fault.c8
-rw-r--r--arch/arm/mm/ioremap.c4
-rw-r--r--arch/arm64/Kconfig6
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts38
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts14
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi22
-rw-r--r--arch/arm64/boot/dts/apple/t8103-jxxx.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi6
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi27
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi16
-rw-r--r--arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi8
-rw-r--r--arch/arm64/boot/dts/mediatek/mt6359.dtsi4
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173.dtsi6
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8195.dtsi50
-rw-r--r--arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180.dtsi10
-rw-r--r--arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts3
-rw-r--r--arch/arm64/boot/dts/qcom/sdm845.dtsi10
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sm8350.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts8
-rw-r--r--arch/arm64/boot/dts/ti/k3-am62-main.dtsi1
-rw-r--r--arch/arm64/boot/dts/ti/k3-am65-main.dtsi20
-rw-r--r--arch/arm64/boot/dts/ti/k3-j721e-sk.dts31
-rw-r--r--arch/arm64/configs/defconfig3
-rw-r--r--arch/arm64/include/asm/acpi.h2
-rw-r--r--arch/arm64/include/asm/cputype.h8
-rw-r--r--arch/arm64/include/asm/fpsimd.h4
-rw-r--r--arch/arm64/include/asm/insn.h1
-rw-r--r--arch/arm64/include/asm/kvm_host.h19
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h1
-rw-r--r--arch/arm64/include/asm/pgtable.h3
-rw-r--r--arch/arm64/include/asm/processor.h7
-rw-r--r--arch/arm64/include/asm/spectre.h4
-rw-r--r--arch/arm64/kernel/compat_alignment.c2
-rw-r--r--arch/arm64/kernel/cpufeature.c1
-rw-r--r--arch/arm64/kernel/entry.S6
-rw-r--r--arch/arm64/kernel/fpsimd.c83
-rw-r--r--arch/arm64/kernel/process.c2
-rw-r--r--arch/arm64/kernel/proton-pack.c232
-rw-r--r--arch/arm64/kernel/ptrace.c5
-rw-r--r--arch/arm64/kernel/signal.c7
-rw-r--r--arch/arm64/kernel/traps.c1
-rw-r--r--arch/arm64/kvm/arm.c7
-rw-r--r--arch/arm64/kvm/fpsimd.c92
-rw-r--r--arch/arm64/kvm/hyp/entry.S5
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h106
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c17
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c91
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c12
-rw-r--r--arch/arm64/kvm/reset.c3
-rw-r--r--arch/arm64/lib/insn.c60
-rw-r--r--arch/arm64/mm/fault.c1
-rw-r--r--arch/arm64/mm/mmu.c11
-rw-r--r--arch/arm64/mm/ptdump_debugfs.c3
-rw-r--r--arch/arm64/net/bpf_jit_comp.c69
-rw-r--r--arch/arm64/xen/hypercall.S21
-rw-r--r--arch/loongarch/Kconfig5
-rw-r--r--arch/loongarch/Makefile2
-rw-r--r--arch/loongarch/include/asm/cache.h2
-rw-r--r--arch/loongarch/include/asm/irqflags.h16
-rw-r--r--arch/loongarch/include/asm/ptrace.h6
-rw-r--r--arch/loongarch/kernel/acpi.c12
-rw-r--r--arch/loongarch/kernel/machine_kexec.c4
-rw-r--r--arch/loongarch/mm/hugetlbpage.c2
-rw-r--r--arch/loongarch/mm/init.c3
-rw-r--r--arch/loongarch/net/bpf_jit.c26
-rw-r--r--arch/m68k/Kconfig.debug2
-rw-r--r--arch/m68k/kernel/early_printk.c42
-rw-r--r--arch/m68k/kernel/head.S39
-rw-r--r--arch/m68k/mac/config.c2
-rw-r--r--arch/mips/Makefile2
-rw-r--r--arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts1
-rw-r--r--arch/mips/crypto/chacha-core.S20
-rw-r--r--arch/mips/dec/prom/init.c2
-rw-r--r--arch/mips/include/asm/ds1287.h2
-rw-r--r--arch/mips/include/asm/ftrace.h16
-rw-r--r--arch/mips/include/asm/mips-cm.h22
-rw-r--r--arch/mips/include/asm/ptrace.h3
-rw-r--r--arch/mips/include/asm/vpe.h8
-rw-r--r--arch/mips/kernel/cevt-ds1287.c1
-rw-r--r--arch/mips/kernel/mips-cm.c14
-rw-r--r--arch/mips/kernel/pm-cps.c30
-rw-r--r--arch/mips/kernel/process.c16
-rw-r--r--arch/mips/lantiq/falcon/sysctrl.c23
-rw-r--r--arch/mips/mm/tlb-r4k.c56
-rw-r--r--arch/mips/vdso/Makefile1
-rw-r--r--arch/parisc/Makefile6
-rw-r--r--arch/parisc/boot/compressed/Makefile1
-rw-r--r--arch/parisc/include/asm/special_insns.h28
-rw-r--r--arch/parisc/include/asm/uaccess.h21
-rw-r--r--arch/parisc/kernel/entry.S17
-rw-r--r--arch/parisc/kernel/pdt.c2
-rw-r--r--arch/parisc/kernel/syscall.S30
-rw-r--r--arch/parisc/kernel/unaligned.c2
-rw-r--r--arch/parisc/lib/memcpy.c19
-rw-r--r--arch/parisc/math-emu/driver.c16
-rw-r--r--arch/parisc/mm/fault.c4
-rw-r--r--arch/powerpc/configs/mpc885_ads_defconfig2
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--arch/powerpc/include/asm/floppy.h5
-rw-r--r--arch/powerpc/include/uapi/asm/ioctls.h8
-rw-r--r--arch/powerpc/kernel/eeh.c3
-rw-r--r--arch/powerpc/kernel/eeh_driver.c48
-rw-r--r--arch/powerpc/kernel/eeh_pe.c11
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c3
-rw-r--r--arch/powerpc/kernel/prom_init.c4
-rw-r--r--arch/powerpc/kernel/rtas.c4
-rw-r--r--arch/powerpc/kexec/crash.c5
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c21
-rw-r--r--arch/powerpc/perf/core-book3s.c20
-rw-r--r--arch/powerpc/perf/isa207-common.c4
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_lpbfifo.c6
-rw-r--r--arch/powerpc/platforms/book3s/vas-api.c9
-rw-r--r--arch/powerpc/platforms/cell/spufs/gang.c1
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c63
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h2
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c8
-rw-r--r--arch/powerpc/platforms/pseries/msi.c7
-rw-r--r--arch/riscv/include/asm/ftrace.h4
-rw-r--r--arch/riscv/include/asm/kgdb.h9
-rw-r--r--arch/riscv/include/asm/page.h1
-rw-r--r--arch/riscv/include/asm/pgtable.h2
-rw-r--r--arch/riscv/include/asm/syscall.h7
-rw-r--r--arch/riscv/kernel/kgdb.c6
-rw-r--r--arch/riscv/kernel/setup.c36
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c8
-rw-r--r--arch/riscv/mm/init.c17
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c19
-rw-r--r--arch/s390/include/asm/timex.h13
-rw-r--r--arch/s390/kernel/entry.S2
-rw-r--r--arch/s390/kernel/time.c2
-rw-r--r--arch/s390/kernel/traps.c6
-rw-r--r--arch/s390/kvm/gaccess.c8
-rw-r--r--arch/s390/kvm/trace-s390.h4
-rw-r--r--arch/s390/mm/dump_pagetables.c2
-rw-r--r--arch/s390/net/bpf_jit_comp.c12
-rw-r--r--arch/s390/pci/pci_event.c4
-rw-r--r--arch/s390/pci/pci_mmio.c2
-rw-r--r--arch/sh/Makefile10
-rw-r--r--arch/sh/boot/compressed/Makefile4
-rw-r--r--arch/sh/boot/romimage/Makefile4
-rw-r--r--arch/sparc/mm/tlb.c5
-rw-r--r--arch/um/Makefile1
-rw-r--r--arch/um/drivers/rtc_user.c2
-rw-r--r--arch/um/drivers/ubd_user.c2
-rw-r--r--arch/um/drivers/vector_kern.c42
-rw-r--r--arch/um/include/asm/asm-prototypes.h5
-rw-r--r--arch/um/include/asm/thread_info.h4
-rw-r--r--arch/um/include/shared/os.h1
-rw-r--r--arch/um/kernel/Makefile2
-rw-r--r--arch/um/kernel/maccess.c19
-rw-r--r--arch/um/kernel/mem.c1
-rw-r--r--arch/um/kernel/process.c20
-rw-r--r--arch/um/kernel/trap.c129
-rw-r--r--arch/um/os-Linux/process.c51
-rw-r--r--arch/x86/Kconfig24
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/compressed/sev.c7
-rw-r--r--arch/x86/boot/cpuflags.c13
-rw-r--r--arch/x86/boot/genimage.sh5
-rw-r--r--arch/x86/entry/calling.h2
-rw-r--r--arch/x86/entry/entry.S10
-rw-r--r--arch/x86/entry/entry_64.S20
-rw-r--r--arch/x86/events/amd/ibs.c3
-rw-r--r--arch/x86/events/core.c2
-rw-r--r--arch/x86/events/intel/core.c130
-rw-r--r--arch/x86/events/intel/ds.c21
-rw-r--r--arch/x86/events/intel/uncore_snbep.c107
-rw-r--r--arch/x86/events/perf_event.h3
-rw-r--r--arch/x86/hyperv/irqdomain.c4
-rw-r--r--arch/x86/include/asm/alternative.h32
-rw-r--r--arch/x86/include/asm/cpu.h12
-rw-r--r--arch/x86/include/asm/cpufeatures.h10
-rw-r--r--arch/x86/include/asm/irqflags.h4
-rw-r--r--arch/x86/include/asm/kexec.h18
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h23
-rw-r--r--arch/x86/include/asm/msr-index.h9
-rw-r--r--arch/x86/include/asm/mwait.h19
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/include/asm/nospec-branch.h77
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/reboot.h5
-rw-r--r--arch/x86/include/asm/spec-ctrl.h11
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/virtext.h10
-rw-r--r--arch/x86/include/asm/xen/hypercall.h5
-rw-r--r--arch/x86/kernel/alternative.c248
-rw-r--r--arch/x86/kernel/amd_nb.c9
-rw-r--r--arch/x86/kernel/cpu/amd.c63
-rw-r--r--arch/x86/kernel/cpu/bugs.c308
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/common.c103
-rw-r--r--arch/x86/kernel/cpu/hygon.c3
-rw-r--r--arch/x86/kernel/cpu/intel.c72
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c28
-rw-r--r--arch/x86/kernel/cpu/mce/core.c8
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c1
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c11
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/x86/kernel/cpu/scattered.c3
-rw-r--r--arch/x86/kernel/cpu/sgx/driver.c10
-rw-r--r--arch/x86/kernel/cpu/sgx/ioctl.c7
-rw-r--r--arch/x86/kernel/cpu/sgx/main.c2
-rw-r--r--arch/x86/kernel/dumpstack.c5
-rw-r--r--arch/x86/kernel/e820.c17
-rw-r--r--arch/x86/kernel/fpu/core.c6
-rw-r--r--arch/x86/kernel/ftrace.c4
-rw-r--r--arch/x86/kernel/i8253.c3
-rw-r--r--arch/x86/kernel/ioport.c13
-rw-r--r--arch/x86/kernel/irq.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c1
-rw-r--r--arch/x86/kernel/machine_kexec_64.c45
-rw-r--r--arch/x86/kernel/module.c16
-rw-r--r--arch/x86/kernel/nmi.c42
-rw-r--r--arch/x86/kernel/process.c28
-rw-r--r--arch/x86/kernel/reboot.c53
-rw-r--r--arch/x86/kernel/sev-shared.c18
-rw-r--r--arch/x86/kernel/sev.c11
-rw-r--r--arch/x86/kernel/static_call.c2
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S10
-rw-r--r--arch/x86/kvm/cpuid.c9
-rw-r--r--arch/x86/kvm/hyperv.c10
-rw-r--r--arch/x86/kvm/lapic.c61
-rw-r--r--arch/x86/kvm/lapic.h1
-rw-r--r--arch/x86/kvm/reverse_cpuid.h8
-rw-r--r--arch/x86/kvm/svm/avic.c60
-rw-r--r--arch/x86/kvm/svm/sev.c4
-rw-r--r--arch/x86/kvm/svm/svm.c72
-rw-r--r--arch/x86/kvm/svm/svm.h2
-rw-r--r--arch/x86/kvm/svm/vmenter.S15
-rw-r--r--arch/x86/kvm/trace.h9
-rw-r--r--arch/x86/kvm/vmx/nested.c26
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c8
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c28
-rw-r--r--arch/x86/kvm/vmx/vmx.c182
-rw-r--r--arch/x86/kvm/vmx/vmx.h31
-rw-r--r--arch/x86/kvm/x86.c77
-rw-r--r--arch/x86/kvm/x86.h12
-rw-r--r--arch/x86/kvm/xen.c15
-rw-r--r--arch/x86/lib/retpoline.S39
-rw-r--r--arch/x86/mm/extable.c5
-rw-r--r--arch/x86/mm/init.c32
-rw-r--r--arch/x86/mm/init_64.c15
-rw-r--r--arch/x86/mm/kaslr.c10
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c4
-rw-r--r--arch/x86/mm/pat/cpa-test.c2
-rw-r--r--arch/x86/mm/tlb.c29
-rw-r--r--arch/x86/net/bpf_jit_comp.c60
-rw-r--r--arch/x86/platform/pvh/head.S7
-rw-r--r--arch/x86/tools/insn_decoder_test.c5
-rw-r--r--arch/x86/um/asm/checksum.h3
-rw-r--r--arch/x86/um/os-Linux/mcontext.c3
290 files changed, 3587 insertions, 1607 deletions
diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h
index 8049997fa372..2039a8c8d547 100644
--- a/arch/alpha/include/asm/elf.h
+++ b/arch/alpha/include/asm/elf.h
@@ -74,7 +74,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
-#define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
+#define elf_check_arch(x) (((x)->e_machine == EM_ALPHA) && !((x)->e_flags & EF_ALPHA_32BIT))
/*
* These are used to set parameters in the core dumps.
@@ -145,10 +145,6 @@ extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task);
: amask (AMASK_CIX) ? "ev6" : "ev67"); \
})
-#define SET_PERSONALITY(EX) \
- set_personality(((EX).e_flags & EF_ALPHA_32BIT) \
- ? PER_LINUX_32BIT : PER_LINUX)
-
extern int alpha_l1i_cacheshape;
extern int alpha_l1d_cacheshape;
extern int alpha_l2_cacheshape;
diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 9e45f6735d5d..6fe4e9deeb5e 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -322,7 +322,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
extern void paging_init(void);
-/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */
+/* We have our own get_unmapped_area */
#define HAVE_ARCH_UNMAPPED_AREA
#endif /* _ALPHA_PGTABLE_H */
diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index 714abe494e5f..916a2dd782c1 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -8,23 +8,19 @@
#ifndef __ASM_ALPHA_PROCESSOR_H
#define __ASM_ALPHA_PROCESSOR_H
-#include <linux/personality.h> /* for ADDR_LIMIT_32BIT */
-
/*
* We have a 42-bit user address space: 4TB user VM...
*/
#define TASK_SIZE (0x40000000000UL)
-#define STACK_TOP \
- (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL)
+#define STACK_TOP (0x00120000000UL)
#define STACK_TOP_MAX 0x00120000000UL
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
-#define TASK_UNMAPPED_BASE \
- ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : TASK_SIZE / 2)
+#define TASK_UNMAPPED_BASE (TASK_SIZE / 2)
/* This is dead. Everything has been moved to thread_info. */
struct thread_struct { };
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index c54469b369cb..03b70b6c9250 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1213,8 +1213,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
return ret;
}
-/* Get an address range which is currently unmapped. Similar to the
- generic version except that we know how to honor ADDR_LIMIT_32BIT. */
+/* Get an address range which is currently unmapped. */
static unsigned long
arch_get_unmapped_area_1(unsigned long addr, unsigned long len,
@@ -1236,13 +1235,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
{
- unsigned long limit;
-
- /* "32 bit" actually means 31 bit, since pointers sign extend. */
- if (current->personality & ADDR_LIMIT_32BIT)
- limit = 0x80000000;
- else
- limit = TASK_SIZE;
+ unsigned long limit = TASK_SIZE;
if (len > limit)
return -ENOMEM;
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index c846119c448f..590617240947 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -133,7 +133,7 @@ endif
# Need -Uarm for gcc < 3.x
KBUILD_CFLAGS +=$(CFLAGS_ABI) $(CFLAGS_ISA) $(arch-y) $(tune-y) $(call cc-option,-mshort-load-bytes,$(call cc-option,-malignment-traps,)) -msoft-float -Uarm
-KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) $(arch-y) $(tune-y) -include asm/unified.h -msoft-float
+KBUILD_AFLAGS +=$(CFLAGS_ABI) $(AFLAGS_ISA) $(arch-y) $(tune-y) -include $(srctree)/arch/arm/include/asm/unified.h -msoft-float
CHECKFLAGS += -D__arm__
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index 02e04a12a270..bfd26ebd7b9e 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -145,6 +145,8 @@
/* MDIO */
AM33XX_PADCONF(AM335X_PIN_MDIO, PIN_INPUT_PULLUP | SLEWCTRL_FAST, MUX_MODE0)
AM33XX_PADCONF(AM335X_PIN_MDC, PIN_OUTPUT_PULLUP, MUX_MODE0)
+ /* Added to support GPIO controlled PHY reset */
+ AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_OUTPUT_PULLUP, MUX_MODE7)
>;
};
@@ -153,6 +155,8 @@
/* MDIO reset value */
AM33XX_PADCONF(AM335X_PIN_MDIO, PIN_INPUT_PULLDOWN, MUX_MODE7)
AM33XX_PADCONF(AM335X_PIN_MDC, PIN_INPUT_PULLDOWN, MUX_MODE7)
+ /* Added to support GPIO controlled PHY reset */
+ AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_INPUT_PULLDOWN, MUX_MODE7)
>;
};
@@ -377,6 +381,10 @@
ethphy0: ethernet-phy@0 {
reg = <0>;
+ /* Support GPIO reset on revision C3 boards */
+ reset-gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <300>;
+ reset-deassert-us = <50000>;
};
};
diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts
index b956e2f60fe0..d20b935c0b69 100644
--- a/arch/arm/boot/dts/am335x-boneblack.dts
+++ b/arch/arm/boot/dts/am335x-boneblack.dts
@@ -34,7 +34,7 @@
"P9_18 [spi0_d1]",
"P9_17 [spi0_cs0]",
"[mmc0_cd]",
- "P8_42A [ecappwm0]",
+ "P9_42A [ecappwm0]",
"P8_35 [lcd d12]",
"P8_33 [lcd d13]",
"P8_31 [lcd d14]",
diff --git a/arch/arm/boot/dts/at91sam9263ek.dts b/arch/arm/boot/dts/at91sam9263ek.dts
index ce8baff6a9f4..e42e1a75a715 100644
--- a/arch/arm/boot/dts/at91sam9263ek.dts
+++ b/arch/arm/boot/dts/at91sam9263ek.dts
@@ -152,7 +152,7 @@
nand@3 {
reg = <0x3 0x0 0x800000>;
rb-gpios = <&pioA 22 GPIO_ACTIVE_HIGH>;
- cs-gpios = <&pioA 15 GPIO_ACTIVE_HIGH>;
+ cs-gpios = <&pioD 15 GPIO_ACTIVE_HIGH>;
nand-bus-width = <8>;
nand-ecc-mode = "soft";
nand-on-flash-bbt;
diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi
index 941c4d16791b..27b467219a40 100644
--- a/arch/arm/boot/dts/bcm2711.dtsi
+++ b/arch/arm/boot/dts/bcm2711.dtsi
@@ -134,7 +134,7 @@
clocks = <&clocks BCM2835_CLOCK_UART>,
<&clocks BCM2835_CLOCK_VPU>;
clock-names = "uartclk", "apb_pclk";
- arm,primecell-periphid = <0x00241011>;
+ arm,primecell-periphid = <0x00341011>;
status = "disabled";
};
@@ -145,7 +145,7 @@
clocks = <&clocks BCM2835_CLOCK_UART>,
<&clocks BCM2835_CLOCK_VPU>;
clock-names = "uartclk", "apb_pclk";
- arm,primecell-periphid = <0x00241011>;
+ arm,primecell-periphid = <0x00341011>;
status = "disabled";
};
@@ -156,7 +156,7 @@
clocks = <&clocks BCM2835_CLOCK_UART>,
<&clocks BCM2835_CLOCK_VPU>;
clock-names = "uartclk", "apb_pclk";
- arm,primecell-periphid = <0x00241011>;
+ arm,primecell-periphid = <0x00341011>;
status = "disabled";
};
@@ -167,7 +167,7 @@
clocks = <&clocks BCM2835_CLOCK_UART>,
<&clocks BCM2835_CLOCK_VPU>;
clock-names = "uartclk", "apb_pclk";
- arm,primecell-periphid = <0x00241011>;
+ arm,primecell-periphid = <0x00341011>;
status = "disabled";
};
@@ -451,8 +451,6 @@
IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) |
IRQ_TYPE_LEVEL_LOW)>;
- /* This only applies to the ARMv7 stub */
- arm,cpu-registers-not-fw-configured;
};
cpus: cpus {
@@ -1154,6 +1152,7 @@
};
&uart0 {
+ arm,primecell-periphid = <0x00341011>;
interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
};
diff --git a/arch/arm/boot/dts/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/imx6qdl-apalis.dtsi
index 7c17b91f0965..dc170a68c42f 100644
--- a/arch/arm/boot/dts/imx6qdl-apalis.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-apalis.dtsi
@@ -101,6 +101,11 @@
};
};
+ poweroff {
+ compatible = "regulator-poweroff";
+ cpu-supply = <&vgen2_reg>;
+ };
+
reg_module_3v3: regulator-module-3v3 {
compatible = "regulator-fixed";
regulator-always-on;
@@ -220,10 +225,6 @@
status = "disabled";
};
-&clks {
- fsl,pmic-stby-poweroff;
-};
-
/* Apalis SPI1 */
&ecspi1 {
cs-gpios = <&gpio5 25 GPIO_ACTIVE_LOW>;
@@ -511,7 +512,6 @@
pmic: pmic@8 {
compatible = "fsl,pfuze100";
- fsl,pmic-stby-poweroff;
reg = <0x08>;
regulators {
diff --git a/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi b/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi
index f2386dcb9ff2..dda4fa91b2f2 100644
--- a/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi
+++ b/arch/arm/boot/dts/imx6ul-imx6ull-opos6ul.dtsi
@@ -40,6 +40,9 @@
reg = <1>;
interrupt-parent = <&gpio4>;
interrupts = <16 IRQ_TYPE_LEVEL_LOW>;
+ micrel,led-mode = <1>;
+ clocks = <&clks IMX6UL_CLK_ENET_REF>;
+ clock-names = "rmii-ref";
status = "okay";
};
};
diff --git a/arch/arm/boot/dts/imx6ul-kontron-bl-common.dtsi b/arch/arm/boot/dts/imx6ul-kontron-bl-common.dtsi
index 43868311f48a..bb324725411c 100644
--- a/arch/arm/boot/dts/imx6ul-kontron-bl-common.dtsi
+++ b/arch/arm/boot/dts/imx6ul-kontron-bl-common.dtsi
@@ -169,7 +169,6 @@
pinctrl-0 = <&pinctrl_uart2>;
linux,rs485-enabled-at-boot-time;
rs485-rx-during-tx;
- rs485-rts-active-low;
uart-has-rtscts;
status = "okay";
};
diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi
index 2b3927a829b7..da7e780dc335 100644
--- a/arch/arm/boot/dts/qcom-apq8064.dtsi
+++ b/arch/arm/boot/dts/qcom-apq8064.dtsi
@@ -212,12 +212,6 @@
};
};
- sfpb_mutex: hwmutex {
- compatible = "qcom,sfpb-mutex";
- syscon = <&sfpb_wrapper_mutex 0x604 0x4>;
- #hwlock-cells = <1>;
- };
-
smem {
compatible = "qcom,smem";
memory-region = <&smem_region>;
@@ -361,9 +355,10 @@
pinctrl-0 = <&ps_hold>;
};
- sfpb_wrapper_mutex: syscon@1200000 {
- compatible = "syscon";
- reg = <0x01200000 0x8000>;
+ sfpb_mutex: hwmutex@1200600 {
+ compatible = "qcom,sfpb-mutex";
+ reg = <0x01200600 0x100>;
+ #hwlock-cells = <1>;
};
intc: interrupt-controller@2000000 {
diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi
index 09996acad639..bb23bb39dd5b 100644
--- a/arch/arm/boot/dts/tegra114.dtsi
+++ b/arch/arm/boot/dts/tegra114.dtsi
@@ -139,7 +139,7 @@
reg = <0x54400000 0x00040000>;
clocks = <&tegra_car TEGRA114_CLK_DSIB>,
<&tegra_car TEGRA114_CLK_DSIBLP>,
- <&tegra_car TEGRA114_CLK_PLL_D2_OUT0>;
+ <&tegra_car TEGRA114_CLK_PLL_D_OUT0>;
clock-names = "dsi", "lp", "parent";
resets = <&tegra_car 82>;
reset-names = "dsi";
diff --git a/arch/arm/boot/dts/tny_a9263.dts b/arch/arm/boot/dts/tny_a9263.dts
index 62b7d9f9a926..c8b6318aaa83 100644
--- a/arch/arm/boot/dts/tny_a9263.dts
+++ b/arch/arm/boot/dts/tny_a9263.dts
@@ -64,7 +64,7 @@
nand@3 {
reg = <0x3 0x0 0x800000>;
rb-gpios = <&pioA 22 GPIO_ACTIVE_HIGH>;
- cs-gpios = <&pioA 15 GPIO_ACTIVE_HIGH>;
+ cs-gpios = <&pioD 15 GPIO_ACTIVE_HIGH>;
nand-bus-width = <8>;
nand-ecc-mode = "soft";
nand-on-flash-bbt;
diff --git a/arch/arm/boot/dts/usb_a9263.dts b/arch/arm/boot/dts/usb_a9263.dts
index b6cb9cdf8197..83d0b98dd287 100644
--- a/arch/arm/boot/dts/usb_a9263.dts
+++ b/arch/arm/boot/dts/usb_a9263.dts
@@ -58,7 +58,7 @@
};
spi0: spi@fffa4000 {
- cs-gpios = <&pioB 15 GPIO_ACTIVE_HIGH>;
+ cs-gpios = <&pioA 5 GPIO_ACTIVE_LOW>;
status = "okay";
flash@0 {
compatible = "atmel,at45", "atmel,dataflash";
@@ -84,7 +84,7 @@
nand@3 {
reg = <0x3 0x0 0x800000>;
rb-gpios = <&pioA 22 GPIO_ACTIVE_HIGH>;
- cs-gpios = <&pioA 15 GPIO_ACTIVE_HIGH>;
+ cs-gpios = <&pioD 15 GPIO_ACTIVE_HIGH>;
nand-bus-width = <8>;
nand-ecc-mode = "soft";
nand-on-flash-bbt;
diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi
index d53f9c9db8bf..eb7973fb4713 100644
--- a/arch/arm/boot/dts/vfxxx.dtsi
+++ b/arch/arm/boot/dts/vfxxx.dtsi
@@ -617,7 +617,7 @@
ftm: ftm@400b8000 {
compatible = "fsl,ftm-timer";
- reg = <0x400b8000 0x1000 0x400b9000 0x1000>;
+ reg = <0x400b8000 0x1000>, <0x400b9000 0x1000>;
interrupts = <44 IRQ_TYPE_LEVEL_HIGH>;
clock-names = "ftm-evt", "ftm-src",
"ftm-evt-counter-en", "ftm-src-counter-en";
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index 0ca94b90bc4e..ba98daeb119c 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -245,7 +245,7 @@ static int ctr_encrypt(struct skcipher_request *req)
while (walk.nbytes > 0) {
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- int bytes = walk.nbytes;
+ unsigned int bytes = walk.nbytes;
if (unlikely(bytes < AES_BLOCK_SIZE))
src = dst = memcpy(buf + sizeof(buf) - bytes,
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index 1408a6a15d0e..fa622b77c785 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -10,6 +10,7 @@
#include <uapi/asm/ptrace.h>
#ifndef __ASSEMBLY__
+#include <linux/bitfield.h>
#include <linux/types.h>
struct pt_regs {
@@ -35,8 +36,8 @@ struct svc_pt_regs {
#ifndef CONFIG_CPU_V7M
#define isa_mode(regs) \
- ((((regs)->ARM_cpsr & PSR_J_BIT) >> (__ffs(PSR_J_BIT) - 1)) | \
- (((regs)->ARM_cpsr & PSR_T_BIT) >> (__ffs(PSR_T_BIT))))
+ (FIELD_GET(PSR_J_BIT, (regs)->ARM_cpsr) << 1 | \
+ FIELD_GET(PSR_T_BIT, (regs)->ARM_cpsr))
#else
#define isa_mode(regs) 1 /* Thumb */
#endif
diff --git a/arch/arm/mach-aspeed/Kconfig b/arch/arm/mach-aspeed/Kconfig
index 080019aa6fcd..fcf287edd0e5 100644
--- a/arch/arm/mach-aspeed/Kconfig
+++ b/arch/arm/mach-aspeed/Kconfig
@@ -2,7 +2,6 @@
menuconfig ARCH_ASPEED
bool "Aspeed BMC architectures"
depends on (CPU_LITTLE_ENDIAN && ARCH_MULTI_V5) || ARCH_MULTI_V6 || ARCH_MULTI_V7
- select SRAM
select WATCHDOG
select ASPEED_WATCHDOG
select MFD_SYSCON
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 4d0d0d49a744..77aec670f635 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -537,11 +537,12 @@ extern u32 at91_pm_suspend_in_sram_sz;
static int at91_suspend_finish(unsigned long val)
{
- unsigned char modified_gray_code[] = {
- 0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x04, 0x05, 0x0c, 0x0d,
- 0x0e, 0x0f, 0x0a, 0x0b, 0x08, 0x09, 0x18, 0x19, 0x1a, 0x1b,
- 0x1e, 0x1f, 0x1c, 0x1d, 0x14, 0x15, 0x16, 0x17, 0x12, 0x13,
- 0x10, 0x11,
+ /* SYNOPSYS workaround to fix a bug in the calibration logic */
+ unsigned char modified_fix_code[] = {
+ 0x00, 0x01, 0x01, 0x06, 0x07, 0x0c, 0x06, 0x07, 0x0b, 0x18,
+ 0x0a, 0x0b, 0x0c, 0x0d, 0x0d, 0x0a, 0x13, 0x13, 0x12, 0x13,
+ 0x14, 0x15, 0x15, 0x12, 0x18, 0x19, 0x19, 0x1e, 0x1f, 0x14,
+ 0x1e, 0x1f,
};
unsigned int tmp, index;
int i;
@@ -552,25 +553,25 @@ static int at91_suspend_finish(unsigned long val)
* restore the ZQ0SR0 with the value saved here. But the
* calibration is buggy and restoring some values from ZQ0SR0
* is forbidden and risky thus we need to provide processed
- * values for these (modified gray code values).
+ * values for these.
*/
tmp = readl(soc_pm.data.ramc_phy + DDR3PHY_ZQ0SR0);
/* Store pull-down output impedance select. */
index = (tmp >> DDR3PHY_ZQ0SR0_PDO_OFF) & 0x1f;
- soc_pm.bu->ddr_phy_calibration[0] = modified_gray_code[index];
+ soc_pm.bu->ddr_phy_calibration[0] = modified_fix_code[index] << DDR3PHY_ZQ0SR0_PDO_OFF;
/* Store pull-up output impedance select. */
index = (tmp >> DDR3PHY_ZQ0SR0_PUO_OFF) & 0x1f;
- soc_pm.bu->ddr_phy_calibration[0] |= modified_gray_code[index];
+ soc_pm.bu->ddr_phy_calibration[0] |= modified_fix_code[index] << DDR3PHY_ZQ0SR0_PUO_OFF;
/* Store pull-down on-die termination impedance select. */
index = (tmp >> DDR3PHY_ZQ0SR0_PDODT_OFF) & 0x1f;
- soc_pm.bu->ddr_phy_calibration[0] |= modified_gray_code[index];
+ soc_pm.bu->ddr_phy_calibration[0] |= modified_fix_code[index] << DDR3PHY_ZQ0SR0_PDODT_OFF;
/* Store pull-up on-die termination impedance select. */
index = (tmp >> DDR3PHY_ZQ0SRO_PUODT_OFF) & 0x1f;
- soc_pm.bu->ddr_phy_calibration[0] |= modified_gray_code[index];
+ soc_pm.bu->ddr_phy_calibration[0] |= modified_fix_code[index] << DDR3PHY_ZQ0SRO_PUODT_OFF;
/*
* The 1st 8 words of memory might get corrupted in the process
diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig
index 7ec7ada287e0..16bf3bed38fe 100644
--- a/arch/arm/mach-omap1/Kconfig
+++ b/arch/arm/mach-omap1/Kconfig
@@ -9,6 +9,7 @@ menuconfig ARCH_OMAP1
select ARCH_OMAP
select CLKSRC_MMIO
select FORCE_PCI if PCCARD
+ select GENERIC_IRQ_CHIP
select GPIOLIB
help
Support for older TI OMAP1 (omap7xx, omap15xx or omap16xx)
diff --git a/arch/arm/mach-omap2/clockdomain.h b/arch/arm/mach-omap2/clockdomain.h
index 68550b23c938..eb6ca2ea8067 100644
--- a/arch/arm/mach-omap2/clockdomain.h
+++ b/arch/arm/mach-omap2/clockdomain.h
@@ -48,6 +48,7 @@
#define CLKDM_NO_AUTODEPS (1 << 4)
#define CLKDM_ACTIVE_WITH_MPU (1 << 5)
#define CLKDM_MISSING_IDLE_REPORTING (1 << 6)
+#define CLKDM_STANDBY_FORCE_WAKEUP BIT(7)
#define CLKDM_CAN_HWSUP (CLKDM_CAN_ENABLE_AUTO | CLKDM_CAN_DISABLE_AUTO)
#define CLKDM_CAN_SWSUP (CLKDM_CAN_FORCE_SLEEP | CLKDM_CAN_FORCE_WAKEUP)
diff --git a/arch/arm/mach-omap2/clockdomains33xx_data.c b/arch/arm/mach-omap2/clockdomains33xx_data.c
index 87f4e927eb18..c05a3c07d448 100644
--- a/arch/arm/mach-omap2/clockdomains33xx_data.c
+++ b/arch/arm/mach-omap2/clockdomains33xx_data.c
@@ -19,7 +19,7 @@ static struct clockdomain l4ls_am33xx_clkdm = {
.pwrdm = { .name = "per_pwrdm" },
.cm_inst = AM33XX_CM_PER_MOD,
.clkdm_offs = AM33XX_CM_PER_L4LS_CLKSTCTRL_OFFSET,
- .flags = CLKDM_CAN_SWSUP,
+ .flags = CLKDM_CAN_SWSUP | CLKDM_STANDBY_FORCE_WAKEUP,
};
static struct clockdomain l3s_am33xx_clkdm = {
diff --git a/arch/arm/mach-omap2/cm33xx.c b/arch/arm/mach-omap2/cm33xx.c
index d61fa06117b4..5c833dec6352 100644
--- a/arch/arm/mach-omap2/cm33xx.c
+++ b/arch/arm/mach-omap2/cm33xx.c
@@ -20,6 +20,9 @@
#include "cm-regbits-34xx.h"
#include "cm-regbits-33xx.h"
#include "prm33xx.h"
+#if IS_ENABLED(CONFIG_SUSPEND)
+#include <linux/suspend.h>
+#endif
/*
* CLKCTRL_IDLEST_*: possible values for the CM_*_CLKCTRL.IDLEST bitfield:
@@ -328,8 +331,17 @@ static int am33xx_clkdm_clk_disable(struct clockdomain *clkdm)
{
bool hwsup = false;
+#if IS_ENABLED(CONFIG_SUSPEND)
+ /*
+ * In case of standby, Don't put the l4ls clk domain to sleep.
+ * Since CM3 PM FW doesn't wake-up/enable the l4ls clk domain
+ * upon wake-up, CM3 PM FW fails to wake-up th MPU.
+ */
+ if (pm_suspend_target_state == PM_SUSPEND_STANDBY &&
+ (clkdm->flags & CLKDM_STANDBY_FORCE_WAKEUP))
+ return 0;
+#endif
hwsup = am33xx_cm_is_clkdm_in_hwsup(clkdm->cm_inst, clkdm->clkdm_offs);
-
if (!hwsup && (clkdm->flags & CLKDM_CAN_FORCE_SLEEP))
am33xx_clkdm_sleep(clkdm);
diff --git a/arch/arm/mach-omap2/pmic-cpcap.c b/arch/arm/mach-omap2/pmic-cpcap.c
index 668dc84fd31e..527cf4b7e378 100644
--- a/arch/arm/mach-omap2/pmic-cpcap.c
+++ b/arch/arm/mach-omap2/pmic-cpcap.c
@@ -264,7 +264,11 @@ int __init omap4_cpcap_init(void)
static int __init cpcap_late_init(void)
{
- omap4_vc_set_pmic_signaling(PWRDM_POWER_RET);
+ if (!of_find_compatible_node(NULL, NULL, "motorola,cpcap"))
+ return 0;
+
+ if (soc_is_omap443x() || soc_is_omap446x() || soc_is_omap447x())
+ omap4_vc_set_pmic_signaling(PWRDM_POWER_RET);
return 0;
}
diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c
index 36915a073c23..f432d22bfed8 100644
--- a/arch/arm/mach-rockchip/platsmp.c
+++ b/arch/arm/mach-rockchip/platsmp.c
@@ -279,11 +279,6 @@ static void __init rockchip_smp_prepare_cpus(unsigned int max_cpus)
}
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) {
- if (rockchip_smp_prepare_sram(node)) {
- of_node_put(node);
- return;
- }
-
/* enable the SCU power domain */
pmu_set_power_domain(PMU_PWRDN_SCU, true);
@@ -316,11 +311,19 @@ static void __init rockchip_smp_prepare_cpus(unsigned int max_cpus)
asm ("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
ncores = ((l2ctlr >> 24) & 0x3) + 1;
}
- of_node_put(node);
/* Make sure that all cores except the first are really off */
for (i = 1; i < ncores; i++)
pmu_set_power_domain(0 + i, false);
+
+ if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) {
+ if (rockchip_smp_prepare_sram(node)) {
+ of_node_put(node);
+ return;
+ }
+ }
+
+ of_node_put(node);
}
static void __init rk3036_smp_prepare_cpus(unsigned int max_cpus)
diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S
index 9466ae61f56a..b45c68d88275 100644
--- a/arch/arm/mach-shmobile/headsmp.S
+++ b/arch/arm/mach-shmobile/headsmp.S
@@ -136,6 +136,7 @@ ENDPROC(shmobile_smp_sleep)
.long shmobile_smp_arg - 1b
.bss
+ .align 2
.globl shmobile_smp_mpidr
shmobile_smp_mpidr:
.space NR_CPUS * 4
diff --git a/arch/arm/mach-tegra/reset.c b/arch/arm/mach-tegra/reset.c
index d5c805adf7a8..ea706fac6358 100644
--- a/arch/arm/mach-tegra/reset.c
+++ b/arch/arm/mach-tegra/reset.c
@@ -63,7 +63,7 @@ static void __init tegra_cpu_reset_handler_enable(void)
BUG_ON(is_enabled);
BUG_ON(tegra_cpu_reset_handler_size > TEGRA_IRAM_RESET_HANDLER_SIZE);
- memcpy(iram_base, (void *)__tegra_cpu_reset_handler_start,
+ memcpy_toio(iram_base, (void *)__tegra_cpu_reset_handler_start,
tegra_cpu_reset_handler_size);
err = call_firmware_op(set_cpu_boot_addr, 0, reset_address);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index b0db85310331..16a7765511f8 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -27,6 +27,13 @@
#ifdef CONFIG_MMU
+bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
+{
+ unsigned long addr = (unsigned long)unsafe_src;
+
+ return addr >= TASK_SIZE && ULONG_MAX - addr >= size;
+}
+
/*
* This is useful to dump out the page tables associated with
* 'addr' in mm 'mm'.
@@ -552,6 +559,7 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
return;
+ pr_alert("8<--- cut here ---\n");
pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
inf->name, ifsr, addr);
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 1c5aeba9bc27..7801c5bb775d 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -515,7 +515,5 @@ void __init early_ioremap_init(void)
bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size,
unsigned long flags)
{
- unsigned long pfn = PHYS_PFN(offset);
-
- return memblock_is_map_memory(pfn);
+ return memblock_is_map_memory(offset);
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 57b437ed0974..6bb23a041e32 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -280,9 +280,9 @@ config ARCH_MMAP_RND_BITS_MAX
default 24 if ARM64_VA_BITS=39
default 27 if ARM64_VA_BITS=42
default 30 if ARM64_VA_BITS=47
- default 29 if ARM64_VA_BITS=48 && ARM64_64K_PAGES
- default 31 if ARM64_VA_BITS=48 && ARM64_16K_PAGES
- default 33 if ARM64_VA_BITS=48
+ default 29 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) && ARM64_64K_PAGES
+ default 31 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52) && ARM64_16K_PAGES
+ default 33 if (ARM64_VA_BITS=48 || ARM64_VA_BITS=52)
default 14 if ARM64_64K_PAGES
default 16 if ARM64_16K_PAGES
default 18
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
index 381d58cea092..c854c7e31051 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
@@ -151,28 +151,12 @@
vcc-pg-supply = <&reg_aldo1>;
};
-&r_ir {
- linux,rc-map-name = "rc-beelink-gs1";
- status = "okay";
-};
-
-&r_pio {
- /*
- * FIXME: We can't add that supply for now since it would
- * create a circular dependency between pinctrl, the regulator
- * and the RSB Bus.
- *
- * vcc-pl-supply = <&reg_aldo1>;
- */
- vcc-pm-supply = <&reg_aldo1>;
-};
-
-&r_rsb {
+&r_i2c {
status = "okay";
- axp805: pmic@745 {
+ axp805: pmic@36 {
compatible = "x-powers,axp805", "x-powers,axp806";
- reg = <0x745>;
+ reg = <0x36>;
interrupt-parent = <&r_intc>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
@@ -290,6 +274,22 @@
};
};
+&r_ir {
+ linux,rc-map-name = "rc-beelink-gs1";
+ status = "okay";
+};
+
+&r_pio {
+ /*
+ * PL0 and PL1 are used for PMIC I2C
+ * don't enable the pl-supply else
+ * it will fail at boot
+ *
+ * vcc-pl-supply = <&reg_aldo1>;
+ */
+ vcc-pm-supply = <&reg_aldo1>;
+};
+
&spdif {
pinctrl-names = "default";
pinctrl-0 = <&spdif_tx_pin>;
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts
index 6fc65e8db220..8c476e089185 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-3.dts
@@ -175,16 +175,12 @@
vcc-pg-supply = <&reg_vcc_wifi_io>;
};
-&r_ir {
- status = "okay";
-};
-
-&r_rsb {
+&r_i2c {
status = "okay";
- axp805: pmic@745 {
+ axp805: pmic@36 {
compatible = "x-powers,axp805", "x-powers,axp806";
- reg = <0x745>;
+ reg = <0x36>;
interrupt-parent = <&r_intc>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
@@ -295,6 +291,10 @@
};
};
+&r_ir {
+ status = "okay";
+};
+
&rtc {
clocks = <&ext_osc32k>;
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi
index 92745128fcfe..4ec4996592be 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi.dtsi
@@ -112,20 +112,12 @@
vcc-pg-supply = <&reg_aldo1>;
};
-&r_ir {
- status = "okay";
-};
-
-&r_pio {
- vcc-pm-supply = <&reg_bldo3>;
-};
-
-&r_rsb {
+&r_i2c {
status = "okay";
- axp805: pmic@745 {
+ axp805: pmic@36 {
compatible = "x-powers,axp805", "x-powers,axp806";
- reg = <0x745>;
+ reg = <0x36>;
interrupt-parent = <&r_intc>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_LOW>;
interrupt-controller;
@@ -240,6 +232,14 @@
};
};
+&r_ir {
+ status = "okay";
+};
+
+&r_pio {
+ vcc-pm-supply = <&reg_bldo3>;
+};
+
&rtc {
clocks = <&ext_osc32k>;
};
diff --git a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi
index 3d15b8e2a6c1..6d78f623e6df 100644
--- a/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi
+++ b/arch/arm64/boot/dts/apple/t8103-jxxx.dtsi
@@ -70,7 +70,7 @@
*/
&port00 {
bus-range = <1 1>;
- wifi0: network@0,0 {
+ wifi0: wifi@0,0 {
compatible = "pci14e4,4425";
reg = <0x10000 0x0 0x0 0x0 0x0>;
/* To be filled by the loader */
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi
index cf07987ccc10..94bec023868c 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi
@@ -231,6 +231,7 @@
rtc: rtc@51 {
compatible = "nxp,pcf85263";
reg = <0x51>;
+ quartz-load-femtofarads = <12500>;
};
};
@@ -283,6 +284,8 @@
pinctrl-0 = <&pinctrl_usdhc3>;
pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
+ assigned-clocks = <&clk IMX8MM_CLK_USDHC3>;
+ assigned-clock-rates = <400000000>;
bus-width = <8>;
non-removable;
status = "okay";
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi
index 7c3f5c54f040..d84ae7571d23 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin-dahlia.dtsi
@@ -16,10 +16,10 @@
"Headphone Jack", "HPOUTR",
"IN2L", "Line In Jack",
"IN2R", "Line In Jack",
- "Headphone Jack", "MICBIAS",
- "IN1L", "Headphone Jack";
+ "Microphone Jack", "MICBIAS",
+ "IN1L", "Microphone Jack";
simple-audio-card,widgets =
- "Microphone", "Headphone Jack",
+ "Microphone", "Microphone Jack",
"Headphone", "Headphone Jack",
"Line", "Line In Jack";
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
index a9bfcdf378ce..13301a479506 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
@@ -144,6 +144,19 @@
startup-delay-us = <20000>;
};
+ reg_usdhc2_vqmmc: regulator-usdhc2-vqmmc {
+ compatible = "regulator-gpio";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usdhc2_vsel>;
+ gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>;
+ regulator-max-microvolt = <3300000>;
+ regulator-min-microvolt = <1800000>;
+ states = <1800000 0x1>,
+ <3300000 0x0>;
+ regulator-name = "PMIC_USDHC_VSELECT";
+ vin-supply = <&reg_nvcc_sd>;
+ };
+
reserved-memory {
#address-cells = <2>;
#size-cells = <2>;
@@ -262,7 +275,7 @@
"SODIMM_19",
"",
"",
- "",
+ "PMIC_USDHC_VSELECT",
"",
"",
"",
@@ -359,7 +372,6 @@
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_pmic>;
reg = <0x25>;
- sd-vsel-gpios = <&gpio1 4 GPIO_ACTIVE_HIGH>;
/*
* The bootloader is expected to switch on the I2C level shifter for the TLA2024 ADC
@@ -454,6 +466,7 @@
};
reg_nvcc_sd: LDO5 {
+ regulator-always-on;
regulator-max-microvolt = <3300000>;
regulator-min-microvolt = <1800000>;
regulator-name = "On-module +V3.3_1.8_SD (LDO5)";
@@ -788,6 +801,7 @@
pinctrl-2 = <&pinctrl_usdhc2_200mhz>, <&pinctrl_usdhc2_cd>;
pinctrl-3 = <&pinctrl_usdhc2_sleep>, <&pinctrl_usdhc2_cd_sleep>;
vmmc-supply = <&reg_usdhc2_vmmc>;
+ vqmmc-supply = <&reg_usdhc2_vqmmc>;
};
&wdog1 {
@@ -1210,13 +1224,17 @@
<MX8MM_IOMUXC_NAND_CLE_GPIO3_IO5 0x6>; /* SODIMM 76 */
};
+ pinctrl_usdhc2_vsel: usdhc2vselgrp {
+ fsl,pins =
+ <MX8MM_IOMUXC_GPIO1_IO04_GPIO1_IO4 0x10>; /* PMIC_USDHC_VSELECT */
+ };
+
/*
* Note: Due to ERR050080 we use discrete external on-module resistors pulling-up to the
* on-module +V3.3_1.8_SD (LDO5) rail and explicitly disable the internal pull-ups here.
*/
pinctrl_usdhc2: usdhc2grp {
fsl,pins =
- <MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x10>,
<MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x90>, /* SODIMM 78 */
<MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x90>, /* SODIMM 74 */
<MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x90>, /* SODIMM 80 */
@@ -1227,7 +1245,6 @@
pinctrl_usdhc2_100mhz: usdhc2-100mhzgrp {
fsl,pins =
- <MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x10>,
<MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x94>,
<MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x94>,
<MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x94>,
@@ -1238,7 +1255,6 @@
pinctrl_usdhc2_200mhz: usdhc2-200mhzgrp {
fsl,pins =
- <MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x10>,
<MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x96>,
<MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x96>,
<MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x96>,
@@ -1250,7 +1266,6 @@
/* Avoid backfeeding with removed card power */
pinctrl_usdhc2_sleep: usdhc2slpgrp {
fsl,pins =
- <MX8MM_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x0>,
<MX8MM_IOMUXC_SD2_CLK_USDHC2_CLK 0x0>,
<MX8MM_IOMUXC_SD2_CMD_USDHC2_CMD 0x0>,
<MX8MM_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x0>,
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi
index 1133cded9be2..ef138c867fc8 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn-beacon-som.dtsi
@@ -240,6 +240,7 @@
rtc: rtc@51 {
compatible = "nxp,pcf85263";
reg = <0x51>;
+ quartz-load-femtofarads = <12500>;
};
};
@@ -294,6 +295,8 @@
pinctrl-0 = <&pinctrl_usdhc3>;
pinctrl-1 = <&pinctrl_usdhc3_100mhz>;
pinctrl-2 = <&pinctrl_usdhc3_200mhz>;
+ assigned-clocks = <&clk IMX8MN_CLK_USDHC3>;
+ assigned-clock-rates = <400000000>;
bus-width = <8>;
non-removable;
status = "okay";
diff --git a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi
index 7bd680a926ce..c63144f2456e 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp-tqma8mpql.dtsi
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-or-later OR MIT
/*
- * Copyright 2021-2022 TQ-Systems GmbH
- * Author: Alexander Stein <alexander.stein@tq-group.com>
+ * Copyright 2021-2025 TQ-Systems GmbH <linux@ew.tq-group.com>,
+ * D-82229 Seefeld, Germany.
+ * Author: Alexander Stein
*/
#include "imx8mp.dtsi"
@@ -23,15 +24,6 @@
regulator-max-microvolt = <3300000>;
regulator-always-on;
};
-
- /* e-MMC IO, needed for HS modes */
- reg_vcc1v8: regulator-vcc1v8 {
- compatible = "regulator-fixed";
- regulator-name = "VCC1V8";
- regulator-min-microvolt = <1800000>;
- regulator-max-microvolt = <1800000>;
- regulator-always-on;
- };
};
&A53_0 {
@@ -194,7 +186,7 @@
no-sd;
no-sdio;
vmmc-supply = <&reg_vcc3v3>;
- vqmmc-supply = <&reg_vcc1v8>;
+ vqmmc-supply = <&buck5_reg>;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
index 3f79923376fb..37244e8816d9 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi
@@ -26,6 +26,8 @@
leds {
compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi_quad_pins>;
led-power1 {
label = "udpu:green:power";
@@ -82,8 +84,6 @@
&spi0 {
status = "okay";
- pinctrl-names = "default";
- pinctrl-0 = <&spi_quad_pins>;
flash@0 {
compatible = "jedec,spi-nor";
@@ -108,6 +108,10 @@
};
};
+&spi_quad_pins {
+ function = "gpio";
+};
+
&pinctrl_nb {
i2c2_recovery_pins: i2c2-recovery-pins {
groups = "i2c2";
diff --git a/arch/arm64/boot/dts/mediatek/mt6359.dtsi b/arch/arm64/boot/dts/mediatek/mt6359.dtsi
index df3e822232d3..29e784bebb69 100644
--- a/arch/arm64/boot/dts/mediatek/mt6359.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt6359.dtsi
@@ -13,6 +13,8 @@
};
regulators {
+ compatible = "mediatek,mt6359-regulator";
+
mt6359_vs1_buck_reg: buck_vs1 {
regulator-name = "vs1";
regulator-min-microvolt = <800000>;
@@ -291,7 +293,7 @@
};
};
- mt6359rtc: mt6359rtc {
+ mt6359rtc: rtc {
compatible = "mediatek,mt6358-rtc";
};
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index 7640b5158ff9..256df3a2d823 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -1247,8 +1247,7 @@
};
pwm0: pwm@1401e000 {
- compatible = "mediatek,mt8173-disp-pwm",
- "mediatek,mt6595-disp-pwm";
+ compatible = "mediatek,mt8173-disp-pwm";
reg = <0 0x1401e000 0 0x1000>;
#pwm-cells = <2>;
clocks = <&mmsys CLK_MM_DISP_PWM026M>,
@@ -1258,8 +1257,7 @@
};
pwm1: pwm@1401f000 {
- compatible = "mediatek,mt8173-disp-pwm",
- "mediatek,mt6595-disp-pwm";
+ compatible = "mediatek,mt8173-disp-pwm";
reg = <0 0x1401f000 0 0x1000>;
#pwm-cells = <2>;
clocks = <&mmsys CLK_MM_DISP_PWM126M>,
diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
index 274edce5d5e6..6f9245167135 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi
@@ -461,22 +461,6 @@
#size-cells = <0>;
#power-domain-cells = <1>;
- power-domain@MT8195_POWER_DOMAIN_VDEC1 {
- reg = <MT8195_POWER_DOMAIN_VDEC1>;
- clocks = <&vdecsys CLK_VDEC_LARB1>;
- clock-names = "vdec1-0";
- mediatek,infracfg = <&infracfg_ao>;
- #power-domain-cells = <0>;
- };
-
- power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 {
- reg = <MT8195_POWER_DOMAIN_VENC_CORE1>;
- clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>;
- clock-names = "venc1-larb";
- mediatek,infracfg = <&infracfg_ao>;
- #power-domain-cells = <0>;
- };
-
power-domain@MT8195_POWER_DOMAIN_VDOSYS0 {
reg = <MT8195_POWER_DOMAIN_VDOSYS0>;
clocks = <&topckgen CLK_TOP_CFG_VDO0>,
@@ -522,15 +506,25 @@
clocks = <&vdecsys_soc CLK_VDEC_SOC_LARB1>;
clock-names = "vdec0-0";
mediatek,infracfg = <&infracfg_ao>;
+ #address-cells = <1>;
+ #size-cells = <0>;
#power-domain-cells = <0>;
- };
- power-domain@MT8195_POWER_DOMAIN_VDEC2 {
- reg = <MT8195_POWER_DOMAIN_VDEC2>;
- clocks = <&vdecsys_core1 CLK_VDEC_CORE1_LARB1>;
- clock-names = "vdec2-0";
- mediatek,infracfg = <&infracfg_ao>;
- #power-domain-cells = <0>;
+ power-domain@MT8195_POWER_DOMAIN_VDEC1 {
+ reg = <MT8195_POWER_DOMAIN_VDEC1>;
+ clocks = <&vdecsys CLK_VDEC_LARB1>;
+ clock-names = "vdec1-0";
+ mediatek,infracfg = <&infracfg_ao>;
+ #power-domain-cells = <0>;
+ };
+
+ power-domain@MT8195_POWER_DOMAIN_VDEC2 {
+ reg = <MT8195_POWER_DOMAIN_VDEC2>;
+ clocks = <&vdecsys_core1 CLK_VDEC_CORE1_LARB1>;
+ clock-names = "vdec2-0";
+ mediatek,infracfg = <&infracfg_ao>;
+ #power-domain-cells = <0>;
+ };
};
power-domain@MT8195_POWER_DOMAIN_VENC {
@@ -538,7 +532,17 @@
clocks = <&vencsys CLK_VENC_LARB>;
clock-names = "venc0-larb";
mediatek,infracfg = <&infracfg_ao>;
+ #address-cells = <1>;
+ #size-cells = <0>;
#power-domain-cells = <0>;
+
+ power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 {
+ reg = <MT8195_POWER_DOMAIN_VENC_CORE1>;
+ clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>;
+ clock-names = "venc1-larb";
+ mediatek,infracfg = <&infracfg_ao>;
+ #power-domain-cells = <0>;
+ };
};
power-domain@MT8195_POWER_DOMAIN_VDOSYS1 {
diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
index 634373a423ef..481a88d83a65 100644
--- a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
+++ b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi
@@ -1631,7 +1631,7 @@
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
regulator-always-on;
- gpio = <&exp1 14 GPIO_ACTIVE_HIGH>;
+ gpio = <&exp1 9 GPIO_ACTIVE_HIGH>;
enable-active-high;
vin-supply = <&vdd_1v8>;
};
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index a9f937b06847..41f6f9abf52f 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -3244,18 +3244,18 @@
cell-index = <0>;
};
- sram@146aa000 {
+ sram@14680000 {
compatible = "qcom,sc7180-imem", "syscon", "simple-mfd";
- reg = <0 0x146aa000 0 0x2000>;
+ reg = <0 0x14680000 0 0x2e000>;
#address-cells = <1>;
#size-cells = <1>;
- ranges = <0 0 0x146aa000 0x2000>;
+ ranges = <0 0 0x14680000 0x2e000>;
- pil-reloc@94c {
+ pil-reloc@2a94c {
compatible = "qcom,pil-reloc-info";
- reg = <0x94c 0xc8>;
+ reg = <0x2a94c 0xc8>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts b/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts
index 28050bc5f081..502a3481ba28 100644
--- a/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts
+++ b/arch/arm64/boot/dts/qcom/sda660-inforce-ifc6560.dts
@@ -155,6 +155,7 @@
* BAM DMA interconnects support is in place.
*/
/delete-property/ clocks;
+ /delete-property/ clock-names;
};
&blsp1_uart2 {
@@ -167,6 +168,7 @@
* BAM DMA interconnects support is in place.
*/
/delete-property/ clocks;
+ /delete-property/ clock-names;
};
&blsp2_uart1 {
diff --git a/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts b/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts
index a3559f6e34a5..6166099aa0c3 100644
--- a/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts
+++ b/arch/arm64/boot/dts/qcom/sdm660-xiaomi-lavender.dts
@@ -107,6 +107,7 @@
status = "okay";
vdd-supply = <&vreg_l1b_0p925>;
+ vdda-pll-supply = <&vreg_l10a_1p8>;
vdda-phy-dpdm-supply = <&vreg_l7b_3p125>;
};
@@ -402,6 +403,8 @@
&sdhc_2 {
status = "okay";
+ cd-gpios = <&tlmm 54 GPIO_ACTIVE_HIGH>;
+
vmmc-supply = <&vreg_l5b_2p95>;
vqmmc-supply = <&vreg_l2b_2p95>;
};
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index a5df310ce7f3..b77f65a612a1 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -4915,18 +4915,18 @@
cell-index = <0>;
};
- sram@146bf000 {
+ sram@14680000 {
compatible = "qcom,sdm845-imem", "syscon", "simple-mfd";
- reg = <0 0x146bf000 0 0x1000>;
+ reg = <0 0x14680000 0 0x40000>;
#address-cells = <1>;
#size-cells = <1>;
- ranges = <0 0 0x146bf000 0x1000>;
+ ranges = <0 0 0x14680000 0x40000>;
- pil-reloc@94c {
+ pil-reloc@3f94c {
compatible = "qcom,pil-reloc-info";
- reg = <0x94c 0xc8>;
+ reg = <0x3f94c 0xc8>;
};
};
diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi
index eb500cb67c86..72ab4ca12945 100644
--- a/arch/arm64/boot/dts/qcom/sm8250.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi
@@ -569,7 +569,7 @@
};
cpu7_opp9: opp-1747200000 {
- opp-hz = /bits/ 64 <1708800000>;
+ opp-hz = /bits/ 64 <1747200000>;
opp-peak-kBps = <5412000 42393600>;
};
diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi
index 5a4972afc977..75292ec3ee77 100644
--- a/arch/arm64/boot/dts/qcom/sm8350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi
@@ -421,7 +421,7 @@
no-map;
};
- pil_camera_mem: mmeory@85200000 {
+ pil_camera_mem: memory@85200000 {
reg = <0x0 0x85200000 0x0 0x500000>;
no-map;
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts
index fe5b52610010..6a6b36c36ce2 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi-r4s.dts
@@ -117,7 +117,7 @@
};
&u2phy1_host {
- status = "disabled";
+ phy-supply = <&vdd_5v>;
};
&uart0 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
index 115c14c0a3c6..396a6636073b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
@@ -251,14 +251,6 @@
status = "okay";
};
-&usb_host0_ehci {
- status = "okay";
-};
-
-&usb_host0_ohci {
- status = "okay";
-};
-
&vopb {
status = "okay";
};
diff --git a/arch/arm64/boot/dts/ti/k3-am62-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62-main.dtsi
index 04222028e53e..c05efc0f0ce7 100644
--- a/arch/arm64/boot/dts/ti/k3-am62-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am62-main.dtsi
@@ -386,7 +386,6 @@
clock-names = "clk_ahb", "clk_xin";
assigned-clocks = <&k3_clks 57 6>;
assigned-clock-parents = <&k3_clks 57 8>;
- mmc-ddr-1_8v;
mmc-hs200-1_8v;
ti,trm-icp = <0x2>;
bus-width = <8>;
diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
index 83dd8993027a..ebd8434c6b60 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
@@ -262,6 +262,8 @@
interrupts = <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
+ ti,clkbuf-sel = <0x7>;
+ ti,trm-icp = <0x8>;
ti,otap-del-sel-legacy = <0x0>;
ti,otap-del-sel-mmc-hs = <0x0>;
ti,otap-del-sel-sd-hs = <0x0>;
@@ -272,8 +274,9 @@
ti,otap-del-sel-ddr50 = <0x5>;
ti,otap-del-sel-ddr52 = <0x5>;
ti,otap-del-sel-hs200 = <0x5>;
- ti,otap-del-sel-hs400 = <0x0>;
- ti,trm-icp = <0x8>;
+ ti,itap-del-sel-legacy = <0xa>;
+ ti,itap-del-sel-mmc-hs = <0x1>;
+ ti,itap-del-sel-ddr52 = <0x0>;
dma-coherent;
};
@@ -284,19 +287,22 @@
clocks = <&k3_clks 48 0>, <&k3_clks 48 1>;
clock-names = "clk_ahb", "clk_xin";
interrupts = <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>;
+ ti,clkbuf-sel = <0x7>;
+ ti,trm-icp = <0x8>;
ti,otap-del-sel-legacy = <0x0>;
ti,otap-del-sel-mmc-hs = <0x0>;
ti,otap-del-sel-sd-hs = <0x0>;
- ti,otap-del-sel-sdr12 = <0x0>;
- ti,otap-del-sel-sdr25 = <0x0>;
+ ti,otap-del-sel-sdr12 = <0xf>;
+ ti,otap-del-sel-sdr25 = <0xf>;
ti,otap-del-sel-sdr50 = <0x8>;
ti,otap-del-sel-sdr104 = <0x7>;
ti,otap-del-sel-ddr50 = <0x4>;
ti,otap-del-sel-ddr52 = <0x4>;
ti,otap-del-sel-hs200 = <0x7>;
- ti,clkbuf-sel = <0x7>;
- ti,otap-del-sel = <0x2>;
- ti,trm-icp = <0x8>;
+ ti,itap-del-sel-legacy = <0xa>;
+ ti,itap-del-sel-sd-hs = <0x1>;
+ ti,itap-del-sel-sdr12 = <0xa>;
+ ti,itap-del-sel-sdr25 = <0x1>;
dma-coherent;
};
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-sk.dts b/arch/arm64/boot/dts/ti/k3-j721e-sk.dts
index f4a76926c4e6..d06266610d5e 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-sk.dts
+++ b/arch/arm64/boot/dts/ti/k3-j721e-sk.dts
@@ -175,6 +175,17 @@
regulator-boot-on;
};
+ vsys_5v0: fixedregulator-vsys5v0 {
+ /* Output of LM61460 */
+ compatible = "regulator-fixed";
+ regulator-name = "vsys_5v0";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ vin-supply = <&vusb_main>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
vdd_mmc1: fixedregulator-sd {
compatible = "regulator-fixed";
pinctrl-names = "default";
@@ -202,6 +213,20 @@
<3300000 0x1>;
};
+ vdd_sd_dv: gpio-regulator-TLV71033 {
+ compatible = "regulator-gpio";
+ pinctrl-names = "default";
+ pinctrl-0 = <&vdd_sd_dv_pins_default>;
+ regulator-name = "tlv71033";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ vin-supply = <&vsys_5v0>;
+ gpios = <&main_gpio0 118 GPIO_ACTIVE_HIGH>;
+ states = <1800000 0x0>,
+ <3300000 0x1>;
+ };
+
dp_pwr_3v3: fixedregulator-dp-prw {
compatible = "regulator-fixed";
regulator-name = "dp-pwr";
@@ -455,6 +480,12 @@
>;
};
+ vdd_sd_dv_pins_default: vdd-sd-dv-default-pins {
+ pinctrl-single,pins = <
+ J721E_IOPAD(0x1dc, PIN_OUTPUT, 7) /* (Y1) SPI1_CLK.GPIO0_118 */
+ >;
+ };
+
wkup_i2c0_pins_default: wkup-i2c0-pins-default {
pinctrl-single,pins = <
J721E_WKUP_IOPAD(0xf8, PIN_INPUT_PULLUP, 0) /* (J25) WKUP_I2C0_SCL */
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 623e9f308f38..4543b292b50b 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1230,6 +1230,9 @@ CONFIG_PHY_HISTB_COMBPHY=y
CONFIG_PHY_HISI_INNO_USB2=y
CONFIG_PHY_MVEBU_CP110_COMPHY=y
CONFIG_PHY_MTK_TPHY=y
+CONFIG_PHY_MTK_HDMI=m
+CONFIG_PHY_MTK_MIPI_DSI=m
+CONFIG_PHY_MTK_DP=m
CONFIG_PHY_QCOM_EDP=m
CONFIG_PHY_QCOM_PCIE2=m
CONFIG_PHY_QCOM_QMP=m
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 702587fda70c..8cbbd08cc8c5 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -128,7 +128,7 @@ acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor)
{}
#endif
-static inline const char *acpi_get_enable_method(int cpu)
+static __always_inline const char *acpi_get_enable_method(int cpu)
{
if (acpi_psci_present())
return "psci";
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 8efc3302bf96..41612b03af63 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -75,11 +75,13 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_CORTEX_A76AE 0xD0E
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
#define ARM_CPU_PART_CORTEX_A78 0xD41
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
#define ARM_CPU_PART_CORTEX_A520 0xD80
#define ARM_CPU_PART_CORTEX_A710 0xD47
#define ARM_CPU_PART_CORTEX_A715 0xD4D
@@ -119,6 +121,7 @@
#define QCOM_CPU_PART_KRYO 0x200
#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800
#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801
+#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
@@ -129,6 +132,7 @@
#define FUJITSU_CPU_PART_A64FX 0x001
#define HISI_CPU_PART_TSV110 0xD01
+#define HISI_CPU_PART_HIP09 0xD02
#define APPLE_CPU_PART_M1_ICESTORM 0x022
#define APPLE_CPU_PART_M1_FIRESTORM 0x023
@@ -151,11 +155,13 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE)
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
@@ -188,6 +194,7 @@
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD)
#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)
+#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD)
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
@@ -195,6 +202,7 @@
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 930b0e6c9462..7622782d0bb9 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -56,11 +56,13 @@ extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
+extern void fpsimd_kvm_prepare(void);
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
void *sve_state, unsigned int sve_vl,
void *za_state, unsigned int sme_vl,
- u64 *svcr);
+ u64 *svcr, enum fp_type *type,
+ enum fp_type to_save);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_and_flush_cpu_state(void);
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 834bff720582..0075dc5d8238 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -619,6 +619,7 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
}
#endif
u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type);
s32 aarch64_get_branch_offset(u32 insn);
u32 aarch64_set_branch_offset(u32 insn, s32 offset);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 577cf444c113..0935f9849510 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -67,6 +67,7 @@ enum kvm_mode kvm_get_mode(void);
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int kvm_sve_max_vl;
+extern unsigned int kvm_host_sve_max_vl;
int kvm_arm_init_sve(void);
u32 __attribute_const__ kvm_target_cpu(void);
@@ -309,8 +310,18 @@ struct vcpu_reset_state {
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;
- /* Guest floating point state */
+ /*
+ * Guest floating point state
+ *
+ * The architecture has two main floating point extensions,
+ * the original FPSIMD and SVE. These have overlapping
+ * register views, with the FPSIMD V registers occupying the
+ * low 128 bits of the SVE Z registers. When the core
+ * floating point code saves the register state of a task it
+ * records which view it saved in fp_type.
+ */
void *sve_state;
+ enum fp_type fp_type;
unsigned int sve_max_vl;
u64 svcr;
@@ -320,7 +331,6 @@ struct kvm_vcpu_arch {
/* Values of trap registers for the guest. */
u64 hcr_el2;
u64 mdcr_el2;
- u64 cptr_el2;
/* Values of trap registers for the host before guest entry. */
u64 mdcr_el2_host;
@@ -370,7 +380,6 @@ struct kvm_vcpu_arch {
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
- struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
struct task_struct *parent_task;
struct {
@@ -547,10 +556,6 @@ struct kvm_vcpu_arch {
/* Save TRBE context if active */
#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
-/* SVE enabled for host EL0 */
-#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
-/* SME enabled for EL0 */
-#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
/* Physical CPU not in supported_cpus */
#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
/* WFIT instruction trapped */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index aa7fa2a08f06..1d0bb7624a1c 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -122,5 +122,6 @@ extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
+extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 1d713cfb0af1..426c3cb3e3bb 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -677,7 +677,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!pud_table(pud))
+#define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \
+ PUD_TYPE_TABLE)
#define pud_present(pud) pte_present(pud_pte(pud))
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
#define pud_valid(pud) pte_valid(pud_pte(pud))
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 400f8956328b..1b822e618bb4 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -122,6 +122,12 @@ enum vec_type {
ARM64_VEC_MAX,
};
+enum fp_type {
+ FP_STATE_CURRENT, /* Save based on current task state. */
+ FP_STATE_FPSIMD,
+ FP_STATE_SVE,
+};
+
struct cpu_context {
unsigned long x19;
unsigned long x20;
@@ -152,6 +158,7 @@ struct thread_struct {
struct user_fpsimd_state fpsimd_state;
} uw;
+ enum fp_type fp_type; /* registers FPSIMD or SVE? */
unsigned int fpsimd_cpu;
void *sve_state; /* SVE registers, if any */
void *za_state; /* ZA register, if any */
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index aa3d3607d5c8..9de17d4bb036 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -96,7 +96,9 @@ enum mitigation_state arm64_get_meltdown_state(void);
enum mitigation_state arm64_get_spectre_bhb_state(void);
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
-u8 spectre_bhb_loop_affected(int scope);
+extern bool __nospectre_bhb;
+u8 get_spectre_bhb_loop_value(void);
+bool is_spectre_bhb_fw_mitigated(void);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SPECTRE_H */
diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c
index deff21bfa680..b68e1d328d4c 100644
--- a/arch/arm64/kernel/compat_alignment.c
+++ b/arch/arm64/kernel/compat_alignment.c
@@ -368,6 +368,8 @@ int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs)
return 1;
}
+ if (!handler)
+ return 1;
type = handler(addr, instr, regs);
if (type == TYPE_ERROR || type == TYPE_FAULT)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 840cc48b5147..5d2322eeee47 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -343,6 +343,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ECBHB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0),
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 62146d48dba7..4b4a9aa76e1a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -827,6 +827,7 @@ SYM_CODE_END(__bp_harden_el1_vectors)
*
*/
SYM_FUNC_START(cpu_switch_to)
+ save_and_disable_daif x11
mov x10, #THREAD_CPU_CONTEXT
add x8, x0, x10
mov x9, sp
@@ -850,6 +851,7 @@ SYM_FUNC_START(cpu_switch_to)
ptrauth_keys_install_kernel x1, x8, x9, x10
scs_save x0
scs_load_current
+ restore_irq x11
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
@@ -876,6 +878,7 @@ NOKPROBE(ret_from_fork)
* Calls func(regs) using this CPU's irq stack and shadow irq stack.
*/
SYM_FUNC_START(call_on_irq_stack)
+ save_and_disable_daif x9
#ifdef CONFIG_SHADOW_CALL_STACK
get_current_task x16
scs_save x16
@@ -890,8 +893,10 @@ SYM_FUNC_START(call_on_irq_stack)
/* Move to the new stack and call the function there */
add sp, x16, #IRQ_STACK_SIZE
+ restore_irq x9
blr x1
+ save_and_disable_daif x9
/*
* Restore the SP from the FP, and restore the FP and LR from the frame
* record.
@@ -899,6 +904,7 @@ SYM_FUNC_START(call_on_irq_stack)
mov sp, x29
ldp x29, x30, [sp], #16
scs_load_current
+ restore_irq x9
ret
SYM_FUNC_END(call_on_irq_stack)
NOKPROBE(call_on_irq_stack)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 43afe07c74fd..bc42163a7fd1 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -125,6 +125,8 @@ struct fpsimd_last_state_struct {
u64 *svcr;
unsigned int sve_vl;
unsigned int sme_vl;
+ enum fp_type *fp_type;
+ enum fp_type to_save;
};
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@ -330,15 +332,6 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
* The task can execute SVE instructions while in userspace without
* trapping to the kernel.
*
- * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
- * corresponding Zn), P0-P15 and FFR are encoded in
- * task->thread.sve_state, formatted appropriately for vector
- * length task->thread.sve_vl or, if SVCR.SM is set,
- * task->thread.sme_vl.
- *
- * task->thread.sve_state must point to a valid buffer at least
- * sve_state_size(task) bytes in size.
- *
* During any syscall, the kernel may optionally clear TIF_SVE and
* discard the vector state except for the FPSIMD subset.
*
@@ -348,7 +341,15 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
* do_sve_acc() to be called, which does some preparation and then
* sets TIF_SVE.
*
- * When stored, FPSIMD registers V0-V31 are encoded in
+ * During any syscall, the kernel may optionally clear TIF_SVE and
+ * discard the vector state except for the FPSIMD subset.
+ *
+ * The data will be stored in one of two formats:
+ *
+ * * FPSIMD only - FP_STATE_FPSIMD:
+ *
+ * When the FPSIMD only state stored task->thread.fp_type is set to
+ * FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
* task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
* logically zero but not stored anywhere; P0-P15 and FFR are not
* stored and have unspecified values from userspace's point of
@@ -356,7 +357,23 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
* but userspace is discouraged from relying on this.
*
* task->thread.sve_state does not need to be non-NULL, valid or any
- * particular size: it must not be dereferenced.
+ * particular size: it must not be dereferenced and any data stored
+ * there should be considered stale and not referenced.
+ *
+ * * SVE state - FP_STATE_SVE:
+ *
+ * When the full SVE state is stored task->thread.fp_type is set to
+ * FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
+ * corresponding Zn), P0-P15 and FFR are encoded in in
+ * task->thread.sve_state, formatted appropriately for vector
+ * length task->thread.sve_vl or, if SVCR.SM is set,
+ * task->thread.sme_vl. The storage for the vector registers in
+ * task->thread.uw.fpsimd_state should be ignored.
+ *
+ * task->thread.sve_state must point to a valid buffer at least
+ * sve_state_size(task) bytes in size. The data stored in
+ * task->thread.uw.fpsimd_state.vregs should be considered stale
+ * and not referenced.
*
* * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
* irrespective of whether TIF_SVE is clear or set, since these are
@@ -404,12 +421,15 @@ static void task_fpsimd_load(void)
}
}
- if (restore_sve_regs)
+ if (restore_sve_regs) {
+ WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
sve_load_state(sve_pffr(&current->thread),
&current->thread.uw.fpsimd_state.fpsr,
restore_ffr);
- else
+ } else {
+ WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
fpsimd_load_state(&current->thread.uw.fpsimd_state);
+ }
}
/*
@@ -419,8 +439,8 @@ static void task_fpsimd_load(void)
* last, if KVM is involved this may be the guest VM context rather
* than the host thread for the VM pointed to by current. This means
* that we must always reference the state storage via last rather
- * than via current, other than the TIF_ flags which KVM will
- * carefully maintain for us.
+ * than via current, if we are saving KVM state then it will have
+ * ensured that the type of registers to save is set in last->to_save.
*/
static void fpsimd_save(void)
{
@@ -437,7 +457,8 @@ static void fpsimd_save(void)
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
return;
- if (test_thread_flag(TIF_SVE)) {
+ if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE)) ||
+ last->to_save == FP_STATE_SVE) {
save_sve_regs = true;
save_ffr = true;
vl = last->sve_vl;
@@ -474,8 +495,10 @@ static void fpsimd_save(void)
sve_save_state((char *)last->sve_state +
sve_ffr_offset(vl),
&last->st->fpsr, save_ffr);
+ *last->fp_type = FP_STATE_SVE;
} else {
fpsimd_save_state(last->st);
+ *last->fp_type = FP_STATE_FPSIMD;
}
}
@@ -851,8 +874,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
fpsimd_flush_task_state(task);
if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
- thread_sm_enabled(&task->thread))
+ thread_sm_enabled(&task->thread)) {
sve_to_fpsimd(task);
+ task->thread.fp_type = FP_STATE_FPSIMD;
+ }
if (system_supports_sme()) {
if (type == ARM64_VEC_SME ||
@@ -1234,8 +1259,10 @@ void fpsimd_release_task(struct task_struct *dead_task)
*/
void sme_alloc(struct task_struct *task, bool flush)
{
- if (task->thread.za_state && flush) {
- memset(task->thread.za_state, 0, za_state_size(task));
+ if (task->thread.za_state) {
+ if (flush)
+ memset(task->thread.za_state, 0,
+ za_state_size(task));
return;
}
@@ -1383,6 +1410,7 @@ static void sve_init_regs(void)
fpsimd_bind_task_to_cpu();
} else {
fpsimd_to_sve(current);
+ current->thread.fp_type = FP_STATE_SVE;
fpsimd_flush_task_state(current);
}
}
@@ -1476,6 +1504,8 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
sme_set_vq(vq_minus_one);
fpsimd_bind_task_to_cpu();
+ } else {
+ fpsimd_flush_task_state(current);
}
put_cpu_fpsimd_context();
@@ -1612,6 +1642,8 @@ void fpsimd_flush_thread(void)
current->thread.svcr = 0;
}
+ current->thread.fp_type = FP_STATE_FPSIMD;
+
put_cpu_fpsimd_context();
kfree(sve_state);
kfree(za_state);
@@ -1660,6 +1692,8 @@ static void fpsimd_bind_task_to_cpu(void)
last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = &current->thread.svcr;
+ last->fp_type = &current->thread.fp_type;
+ last->to_save = FP_STATE_CURRENT;
current->thread.fpsimd_cpu = smp_processor_id();
/*
@@ -1683,7 +1717,8 @@ static void fpsimd_bind_task_to_cpu(void)
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
unsigned int sve_vl, void *za_state,
- unsigned int sme_vl, u64 *svcr)
+ unsigned int sme_vl, u64 *svcr,
+ enum fp_type *type, enum fp_type to_save)
{
struct fpsimd_last_state_struct *last =
this_cpu_ptr(&fpsimd_last_state);
@@ -1697,6 +1732,8 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
last->za_state = za_state;
last->sve_vl = sve_vl;
last->sme_vl = sme_vl;
+ last->fp_type = type;
+ last->to_save = to_save;
}
/*
@@ -1746,7 +1783,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
get_cpu_fpsimd_context();
current->thread.uw.fpsimd_state = *state;
- if (test_thread_flag(TIF_SVE))
+ if (current->thread.fp_type == FP_STATE_SVE)
fpsimd_to_sve(current);
task_fpsimd_load();
@@ -1814,10 +1851,10 @@ void fpsimd_save_and_flush_cpu_state(void)
if (!system_supports_fpsimd())
return;
WARN_ON(preemptible());
- __get_cpu_fpsimd_context();
+ get_cpu_fpsimd_context();
fpsimd_save();
fpsimd_flush_cpu_state();
- __put_cpu_fpsimd_context();
+ put_cpu_fpsimd_context();
}
#ifdef CONFIG_KERNEL_MODE_NEON
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 3f06e9d45271..7092840deb5c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -331,6 +331,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
clear_tsk_thread_flag(dst, TIF_SME);
}
+ dst->thread.fp_type = FP_STATE_FPSIMD;
+
/* clear any pending asynchronous tag fault raised by the parent */
clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 2df5e43ae4d1..4978c466e325 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -857,52 +857,90 @@ static unsigned long system_bhb_mitigations;
* This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
* SCOPE_SYSTEM call will give the right answer.
*/
-u8 spectre_bhb_loop_affected(int scope)
+static bool is_spectre_bhb_safe(int scope)
+{
+ static const struct midr_range spectre_bhb_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A510),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A520),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+ {},
+ };
+ static bool all_safe = true;
+
+ if (scope != SCOPE_LOCAL_CPU)
+ return all_safe;
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_safe_list))
+ return true;
+
+ all_safe = false;
+
+ return false;
+}
+
+static u8 spectre_bhb_loop_affected(void)
{
u8 k = 0;
- static u8 max_bhb_k;
-
- if (scope == SCOPE_LOCAL_CPU) {
- static const struct midr_range spectre_bhb_k32_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
- {},
- };
- static const struct midr_range spectre_bhb_k24_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
- MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
- {},
- };
- static const struct midr_range spectre_bhb_k11_list[] = {
- MIDR_ALL_VERSIONS(MIDR_AMPERE1),
- {},
- };
- static const struct midr_range spectre_bhb_k8_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- {},
- };
-
- if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
- k = 32;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
- k = 24;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
- k = 11;
- else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
- k = 8;
-
- max_bhb_k = max(max_bhb_k, k);
- } else {
- k = max_bhb_k;
- }
+
+ static const struct midr_range spectre_bhb_k132_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k38_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A715),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k32_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A710),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_X2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k24_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A76AE),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A77),
+ MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+ MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_GOLD),
+ MIDR_ALL_VERSIONS(MIDR_HISI_HIP09),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k11_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_AMPERE1),
+ {},
+ };
+ static const struct midr_range spectre_bhb_k8_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ {},
+ };
+
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k132_list))
+ k = 132;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k38_list))
+ k = 38;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
+ k = 32;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
+ k = 24;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list))
+ k = 11;
+ else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
+ k = 8;
return k;
}
@@ -928,29 +966,13 @@ static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
}
}
-static bool is_spectre_bhb_fw_affected(int scope)
+static bool has_spectre_bhb_fw_mitigation(void)
{
- static bool system_affected;
enum mitigation_state fw_state;
bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE;
- static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
- {},
- };
- bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
- spectre_bhb_firmware_mitigated_list);
-
- if (scope != SCOPE_LOCAL_CPU)
- return system_affected;
fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
- system_affected = true;
- return true;
- }
-
- return false;
+ return has_smccc && fw_state == SPECTRE_MITIGATED;
}
static bool supports_ecbhb(int scope)
@@ -966,6 +988,8 @@ static bool supports_ecbhb(int scope)
ID_AA64MMFR1_EL1_ECBHB_SHIFT);
}
+static u8 max_bhb_k;
+
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
int scope)
{
@@ -974,16 +998,23 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
if (supports_csv2p3(scope))
return false;
- if (supports_clearbhb(scope))
- return true;
+ if (is_spectre_bhb_safe(scope))
+ return false;
- if (spectre_bhb_loop_affected(scope))
- return true;
+ /*
+ * At this point the core isn't known to be "safe" so we're going to
+ * assume it's vulnerable. We still need to update `max_bhb_k` though,
+ * but only if we aren't mitigating with clearbhb though.
+ */
+ if (scope == SCOPE_LOCAL_CPU && !supports_clearbhb(SCOPE_LOCAL_CPU))
+ max_bhb_k = max(max_bhb_k, spectre_bhb_loop_affected());
- if (is_spectre_bhb_fw_affected(scope))
- return true;
+ return true;
+}
- return false;
+u8 get_spectre_bhb_loop_value(void)
+{
+ return max_bhb_k;
}
static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
@@ -1006,7 +1037,7 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
isb();
}
-static bool __read_mostly __nospectre_bhb;
+bool __read_mostly __nospectre_bhb;
static int __init parse_spectre_bhb_param(char *str)
{
__nospectre_bhb = true;
@@ -1017,7 +1048,7 @@ early_param("nospectre_bhb", parse_spectre_bhb_param);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
{
bp_hardening_cb_t cpu_cb;
- enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
+ enum mitigation_state state = SPECTRE_VULNERABLE;
struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
@@ -1043,7 +1074,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
state = SPECTRE_MITIGATED;
set_bit(BHB_INSN, &system_bhb_mitigations);
- } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+ } else if (spectre_bhb_loop_affected()) {
/*
* Ensure KVM uses the indirect vector which will have the
* branchy-loop added. A57/A72-r0 will already have selected
@@ -1056,37 +1087,39 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
state = SPECTRE_MITIGATED;
set_bit(BHB_LOOP, &system_bhb_mitigations);
- } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
- fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
- if (fw_state == SPECTRE_MITIGATED) {
- /*
- * Ensure KVM uses one of the spectre bp_hardening
- * vectors. The indirect vector doesn't include the EL3
- * call, so needs upgrading to
- * HYP_VECTOR_SPECTRE_INDIRECT.
- */
- if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
- data->slot += 1;
-
- this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
-
- /*
- * The WA3 call in the vectors supersedes the WA1 call
- * made during context-switch. Uninstall any firmware
- * bp_hardening callback.
- */
- cpu_cb = spectre_v2_get_sw_mitigation_cb();
- if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
- __this_cpu_write(bp_hardening_data.fn, NULL);
-
- state = SPECTRE_MITIGATED;
- set_bit(BHB_FW, &system_bhb_mitigations);
- }
+ } else if (has_spectre_bhb_fw_mitigation()) {
+ /*
+ * Ensure KVM uses one of the spectre bp_hardening
+ * vectors. The indirect vector doesn't include the EL3
+ * call, so needs upgrading to
+ * HYP_VECTOR_SPECTRE_INDIRECT.
+ */
+ if (!data->slot || data->slot == HYP_VECTOR_INDIRECT)
+ data->slot += 1;
+
+ this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
+
+ /*
+ * The WA3 call in the vectors supersedes the WA1 call
+ * made during context-switch. Uninstall any firmware
+ * bp_hardening callback.
+ */
+ cpu_cb = spectre_v2_get_sw_mitigation_cb();
+ if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb)
+ __this_cpu_write(bp_hardening_data.fn, NULL);
+
+ state = SPECTRE_MITIGATED;
+ set_bit(BHB_FW, &system_bhb_mitigations);
}
update_mitigation_state(&spectre_bhb_state, state);
}
+bool is_spectre_bhb_fw_mitigated(void)
+{
+ return test_bit(BHB_FW, &system_bhb_mitigations);
+}
+
/* Patched to NOP when enabled */
void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt,
__le32 *origptr,
@@ -1115,7 +1148,6 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
{
u8 rd;
u32 insn;
- u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
BUG_ON(nr_inst != 1); /* MOV -> MOV */
@@ -1124,7 +1156,7 @@ void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt,
insn = le32_to_cpu(*origptr);
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
- insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+ insn = aarch64_insn_gen_movewide(rd, max_bhb_k, 0,
AARCH64_INSN_VARIANT_64BIT,
AARCH64_INSN_MOVEWIDE_ZERO);
*updptr++ = cpu_to_le32(insn);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index b178bbdc1c3b..3025d14c253d 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -139,7 +139,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
- return *addr;
+ return READ_ONCE_NOCHECK(*addr);
else
return 0;
}
@@ -917,6 +917,7 @@ static int sve_set_common(struct task_struct *target,
clear_tsk_thread_flag(target, TIF_SVE);
if (type == ARM64_VEC_SME)
fpsimd_force_sync_to_sve(target);
+ target->thread.fp_type = FP_STATE_FPSIMD;
goto out;
}
@@ -939,6 +940,7 @@ static int sve_set_common(struct task_struct *target,
if (!target->thread.sve_state) {
ret = -ENOMEM;
clear_tsk_thread_flag(target, TIF_SVE);
+ target->thread.fp_type = FP_STATE_FPSIMD;
goto out;
}
@@ -952,6 +954,7 @@ static int sve_set_common(struct task_struct *target,
fpsimd_sync_to_sve(target);
if (type == ARM64_VEC_SVE)
set_tsk_thread_flag(target, TIF_SVE);
+ target->thread.fp_type = FP_STATE_SVE;
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
start = SVE_PT_SVE_OFFSET;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 82f4572c8ddf..2461bbffe7d4 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -207,6 +207,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
clear_thread_flag(TIF_SVE);
+ current->thread.fp_type = FP_STATE_FPSIMD;
/* load the hardware registers from the fpsimd_state structure */
if (!err)
@@ -297,6 +298,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
if (sve.head.size <= sizeof(*user->sve)) {
clear_thread_flag(TIF_SVE);
current->thread.svcr &= ~SVCR_SM_MASK;
+ current->thread.fp_type = FP_STATE_FPSIMD;
goto fpsimd_only;
}
@@ -332,6 +334,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
current->thread.svcr |= SVCR_SM_MASK;
else
set_thread_flag(TIF_SVE);
+ current->thread.fp_type = FP_STATE_SVE;
fpsimd_only:
/* copy the FP and status/control registers */
@@ -937,9 +940,11 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
* FPSIMD register state - flush the saved FPSIMD
* register state in case it gets loaded.
*/
- if (current->thread.svcr & SVCR_SM_MASK)
+ if (current->thread.svcr & SVCR_SM_MASK) {
memset(&current->thread.uw.fpsimd_state, 0,
sizeof(current->thread.uw.fpsimd_state));
+ current->thread.fp_type = FP_STATE_FPSIMD;
+ }
current->thread.svcr &= ~(SVCR_ZA_MASK |
SVCR_SM_MASK);
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 23d281ed7621..09489e92ff94 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -911,6 +911,7 @@ void panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far)
void __noreturn arm64_serror_panic(struct pt_regs *regs, unsigned long esr)
{
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
console_verbose();
pr_crit("SError Interrupt on CPU%d, code 0x%016lx -- %s\n",
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3a05f364b4b6..6eb992056c67 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -371,7 +371,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
return err;
- return kvm_share_hyp(vcpu, vcpu + 1);
+ err = kvm_share_hyp(vcpu, vcpu + 1);
+ if (err)
+ kvm_vgic_vcpu_destroy(vcpu);
+
+ return err;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -1230,7 +1234,6 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
}
vcpu_reset_hcr(vcpu);
- vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT;
/*
* Handle the "start in power-off" case.
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index ec8e4494873d..3fd86b71ee37 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -49,8 +49,6 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
if (ret)
return ret;
- vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
-
/*
* We need to keep current's task_struct pinned until its data has been
* unshared with the hypervisor to make sure it is not re-used by the
@@ -75,36 +73,20 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
{
BUG_ON(!current->mm);
- BUG_ON(test_thread_flag(TIF_SVE));
if (!system_supports_fpsimd())
return;
- vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
-
- vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
- if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
- vcpu_set_flag(vcpu, HOST_SVE_ENABLED);
-
/*
- * We don't currently support SME guests but if we leave
- * things in streaming mode then when the guest starts running
- * FPSIMD or SVE code it may generate SME traps so as a
- * special case if we are in streaming mode we force the host
- * state to be saved now and exit streaming mode so that we
- * don't have to handle any SME traps for valid guest
- * operations. Do this for ZA as well for now for simplicity.
+ * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such
+ * that the host kernel is responsible for restoring this state upon
+ * return to userspace, and the hyp code doesn't need to save anything.
+ *
+ * When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures
+ * that PSTATE.{SM,ZA} == {0,0}.
*/
- if (system_supports_sme()) {
- vcpu_clear_flag(vcpu, HOST_SME_ENABLED);
- if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
- vcpu_set_flag(vcpu, HOST_SME_ENABLED);
-
- if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
- vcpu->arch.fp_state = FP_STATE_FREE;
- fpsimd_save_and_flush_cpu_state();
- }
- }
+ fpsimd_save_and_flush_cpu_state();
+ vcpu->arch.fp_state = FP_STATE_FREE;
}
/*
@@ -129,9 +111,16 @@ void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
*/
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
{
+ enum fp_type fp_type;
+
WARN_ON_ONCE(!irqs_disabled());
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
+ if (vcpu_has_sve(vcpu))
+ fp_type = FP_STATE_SVE;
+ else
+ fp_type = FP_STATE_FPSIMD;
+
/*
* Currently we do not support SME guests so SVCR is
* always 0 and we just need a variable to point to.
@@ -139,10 +128,10 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
vcpu->arch.sve_state,
vcpu->arch.sve_max_vl,
- NULL, 0, &vcpu->arch.svcr);
+ NULL, 0, &vcpu->arch.svcr,
+ &vcpu->arch.fp_type, fp_type);
clear_thread_flag(TIF_FOREIGN_FPSTATE);
- update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
}
}
@@ -158,48 +147,19 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
local_irq_save(flags);
- /*
- * If we have VHE then the Hyp code will reset CPACR_EL1 to
- * CPACR_EL1_DEFAULT and we need to reenable SME.
- */
- if (has_vhe() && system_supports_sme()) {
- /* Also restore EL0 state seen on entry */
- if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
- sysreg_clear_set(CPACR_EL1, 0,
- CPACR_EL1_SMEN_EL0EN |
- CPACR_EL1_SMEN_EL1EN);
- else
- sysreg_clear_set(CPACR_EL1,
- CPACR_EL1_SMEN_EL0EN,
- CPACR_EL1_SMEN_EL1EN);
- }
-
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
- if (vcpu_has_sve(vcpu)) {
- __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
-
- /* Restore the VL that was saved when bound to the CPU */
- if (!has_vhe())
- sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1,
- SYS_ZCR_EL1);
- }
-
- fpsimd_save_and_flush_cpu_state();
- } else if (has_vhe() && system_supports_sve()) {
/*
- * The FPSIMD/SVE state in the CPU has not been touched, and we
- * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
- * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE
- * for EL0. To avoid spurious traps, restore the trap state
- * seen by kvm_arch_vcpu_load_fp():
+ * Flush (save and invalidate) the fpsimd/sve state so that if
+ * the host tries to use fpsimd/sve, it's not using stale data
+ * from the guest.
+ *
+ * Flushing the state sets the TIF_FOREIGN_FPSTATE bit for the
+ * context unconditionally, in both nVHE and VHE. This allows
+ * the kernel to restore the fpsimd/sve state, including ZCR_EL1
+ * when needed.
*/
- if (vcpu_get_flag(vcpu, HOST_SVE_ENABLED))
- sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
- else
- sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
+ fpsimd_save_and_flush_cpu_state();
}
- update_thread_flag(TIF_SVE, 0);
-
local_irq_restore(flags);
}
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 435346ea1504..d8c94c45cb2f 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN
alternative_else_nop_endif
mrs x1, isr_el1
cbz x1, 1f
+
+ // Ensure that __guest_enter() always provides a context
+ // synchronization event so that callers don't need ISBs for anything
+ // that would usually be synchonized by the ERET.
+ isb
mov x0, #ARM_EXCEPTION_IRQ
ret
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 081aca8f432e..275176e61d74 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -167,13 +167,68 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
+static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu)
+{
+ u64 zcr_el1, zcr_el2;
+
+ if (!guest_owns_fp_regs(vcpu))
+ return;
+
+ if (vcpu_has_sve(vcpu)) {
+ zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
+
+ write_sysreg_el2(zcr_el2, SYS_ZCR);
+
+ zcr_el1 = __vcpu_sys_reg(vcpu, ZCR_EL1);
+ write_sysreg_el1(zcr_el1, SYS_ZCR);
+ }
+}
+
+static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu)
+{
+ u64 zcr_el1, zcr_el2;
+
+ if (!guest_owns_fp_regs(vcpu))
+ return;
+
+ /*
+ * When the guest owns the FP regs, we know that guest+hyp traps for
+ * any FPSIMD/SVE/SME features exposed to the guest have been disabled
+ * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd()
+ * prior to __guest_entry(). As __guest_entry() guarantees a context
+ * synchronization event, we don't need an ISB here to avoid taking
+ * traps for anything that was exposed to the guest.
+ */
+ if (vcpu_has_sve(vcpu)) {
+ zcr_el1 = read_sysreg_el1(SYS_ZCR);
+ __vcpu_sys_reg(vcpu, ZCR_EL1) = zcr_el1;
+
+ /*
+ * The guest's state is always saved using the guest's max VL.
+ * Ensure that the host has the guest's max VL active such that
+ * the host can save the guest's state lazily, but don't
+ * artificially restrict the host to the guest's max VL.
+ */
+ if (has_vhe()) {
+ zcr_el2 = vcpu_sve_max_vq(vcpu) - 1;
+ write_sysreg_el2(zcr_el2, SYS_ZCR);
+ } else {
+ zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1;
+ write_sysreg_el2(zcr_el2, SYS_ZCR);
+
+ zcr_el1 = vcpu_sve_max_vq(vcpu) - 1;
+ write_sysreg_el1(zcr_el1, SYS_ZCR);
+ }
+ }
+}
+
/*
* We trap the first access to the FP/SIMD to save the host context and
* restore the guest context lazily.
* If FP/SIMD is not implemented, handle the trap and inject an undefined
* instruction exception to the guest. Similarly for trapped SVE accesses.
*/
-static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest;
u8 esr_ec;
@@ -207,10 +262,6 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
}
isb();
- /* Write out the host state if it's in the registers */
- if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
-
/* Restore the guest state */
if (sve_guest)
__hyp_sve_restore_guest(vcpu);
@@ -335,7 +386,7 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
return true;
}
-static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
handle_tx2_tvm(vcpu))
@@ -351,7 +402,7 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
-static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
__vgic_v3_perform_cpuif_access(vcpu) == 1)
@@ -360,19 +411,18 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
-static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu,
+ u64 *exit_code)
{
if (!__populate_fault_info(vcpu))
return true;
return false;
}
-static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
- __alias(kvm_hyp_handle_memory_fault);
-static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
- __alias(kvm_hyp_handle_memory_fault);
+#define kvm_hyp_handle_iabt_low kvm_hyp_handle_memory_fault
+#define kvm_hyp_handle_watchpt_low kvm_hyp_handle_memory_fault
-static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
return true;
@@ -402,23 +452,16 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
-static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
-
-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
-
/*
* Allow the hypervisor to handle the exit with an exit handler if it has one.
*
* Returns true if the hypervisor handled the exit, and control should go back
* to the guest, or false if it hasn't.
*/
-static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
+ const exit_handler_fn *handlers)
{
- const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
- exit_handler_fn fn;
-
- fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
-
+ exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
if (fn)
return fn(vcpu, exit_code);
@@ -448,20 +491,9 @@ static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code
* the guest, false when we should restore the host state and return to the
* main run loop.
*/
-static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code,
+ const exit_handler_fn *handlers)
{
- /*
- * Save PSTATE early so that we can evaluate the vcpu mode
- * early on.
- */
- synchronize_vcpu_pstate(vcpu, exit_code);
-
- /*
- * Check whether we want to repaint the state one way or
- * another.
- */
- early_exit_filter(vcpu, exit_code);
-
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
@@ -491,7 +523,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
goto exit;
/* Check if there's an exit handler and allow it to handle the exit. */
- if (kvm_hyp_handle_exit(vcpu, exit_code))
+ if (kvm_hyp_handle_exit(vcpu, exit_code, handlers))
goto guest;
exit:
/* Return to the host kernel and handle the exit */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 3cea4b6ac23e..b183cc866404 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -5,6 +5,7 @@
*/
#include <hyp/adjust_pc.h>
+#include <hyp/switch.h>
#include <asm/pgtable-types.h>
#include <asm/kvm_asm.h>
@@ -25,7 +26,9 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
+ fpsimd_lazy_switch_to_guest(kern_hyp_va(vcpu));
cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu));
+ fpsimd_lazy_switch_to_host(kern_hyp_va(vcpu));
}
static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
@@ -285,11 +288,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
case ESR_ELx_EC_SMC64:
handle_host_smc(host_ctxt);
break;
- case ESR_ELx_EC_SVE:
- sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
- isb();
- sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
- break;
case ESR_ELx_EC_IABT_LOW:
case ESR_ELx_EC_DABT_LOW:
handle_host_mem_abort(host_ctxt);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 85d3b7ae720f..6042cdd3d887 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -9,6 +9,8 @@
#include <nvhe/fixed_config.h>
#include <nvhe/trap_handler.h>
+unsigned int kvm_host_sve_max_vl;
+
/*
* Set trap register values based on features in ID_AA64PFR0.
*/
@@ -17,7 +19,6 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
u64 hcr_set = HCR_RW;
u64 hcr_clear = 0;
- u64 cptr_set = 0;
/* Protected KVM does not support AArch32 guests. */
BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
@@ -44,16 +45,10 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
/* Trap AMU */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) {
hcr_clear |= HCR_AMVOFFEN;
- cptr_set |= CPTR_EL2_TAM;
}
- /* Trap SVE */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids))
- cptr_set |= CPTR_EL2_TZ;
-
vcpu->arch.hcr_el2 |= hcr_set;
vcpu->arch.hcr_el2 &= ~hcr_clear;
- vcpu->arch.cptr_el2 |= cptr_set;
}
/*
@@ -83,7 +78,6 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
u64 mdcr_set = 0;
u64 mdcr_clear = 0;
- u64 cptr_set = 0;
/* Trap/constrain PMU */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) {
@@ -110,13 +104,8 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids))
mdcr_set |= MDCR_EL2_TTRF;
- /* Trap Trace */
- if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids))
- cptr_set |= CPTR_EL2_TTA;
-
vcpu->arch.mdcr_el2 |= mdcr_set;
vcpu->arch.mdcr_el2 &= ~mdcr_clear;
- vcpu->arch.cptr_el2 |= cptr_set;
}
/*
@@ -167,8 +156,6 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
/* Clear res0 and set res1 bits to trap potential new features. */
vcpu->arch.hcr_el2 &= ~(HCR_RES0);
vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
- vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
- vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
}
/*
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 895fb3200076..47c7f3a675ae 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -36,23 +36,54 @@ DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
-static void __activate_traps(struct kvm_vcpu *vcpu)
+static void __activate_cptr_traps(struct kvm_vcpu *vcpu)
{
- u64 val;
+ u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */
- ___activate_traps(vcpu);
- __activate_traps_common(vcpu);
-
- val = vcpu->arch.cptr_el2;
- val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
- if (!guest_owns_fp_regs(vcpu)) {
- val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
+ if (!guest_owns_fp_regs(vcpu))
__activate_traps_fpsimd32(vcpu);
- }
- if (cpus_have_final_cap(ARM64_SME))
+
+ /* !hVHE case upstream */
+ if (1) {
+ val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1;
+
+ /*
+ * Always trap SME since it's not supported in KVM.
+ * TSM is RES1 if SME isn't implemented.
+ */
val |= CPTR_EL2_TSM;
- write_sysreg(val, cptr_el2);
+ if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs(vcpu))
+ val |= CPTR_EL2_TZ;
+
+ if (!guest_owns_fp_regs(vcpu))
+ val |= CPTR_EL2_TFP;
+
+ write_sysreg(val, cptr_el2);
+ }
+}
+
+static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu)
+{
+ /* !hVHE case upstream */
+ if (1) {
+ u64 val = CPTR_NVHE_EL2_RES1;
+
+ if (!cpus_have_final_cap(ARM64_SVE))
+ val |= CPTR_EL2_TZ;
+ if (!cpus_have_final_cap(ARM64_SME))
+ val |= CPTR_EL2_TSM;
+
+ write_sysreg(val, cptr_el2);
+ }
+}
+
+static void __activate_traps(struct kvm_vcpu *vcpu)
+{
+ ___activate_traps(vcpu);
+ __activate_traps_common(vcpu);
+ __activate_cptr_traps(vcpu);
+
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
@@ -73,7 +104,6 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
static void __deactivate_traps(struct kvm_vcpu *vcpu)
{
extern char __kvm_hyp_host_vector[];
- u64 cptr;
___deactivate_traps(vcpu);
@@ -98,13 +128,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
- cptr = CPTR_EL2_DEFAULT;
- if (vcpu_has_sve(vcpu) && (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
- cptr |= CPTR_EL2_TZ;
- if (cpus_have_final_cap(ARM64_SME))
- cptr &= ~CPTR_EL2_TSM;
-
- write_sysreg(cptr, cptr_el2);
+ __deactivate_cptr_traps(vcpu);
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
}
@@ -209,21 +233,22 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
return hyp_exit_handlers;
}
-/*
- * Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
- * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
- * guest from dropping to AArch32 EL0 if implemented by the CPU. If the
- * hypervisor spots a guest in such a state ensure it is handled, and don't
- * trust the host to spot or fix it. The check below is based on the one in
- * kvm_arch_vcpu_ioctl_run().
- *
- * Returns false if the guest ran in AArch32 when it shouldn't have, and
- * thus should exit to the host, or true if a the guest run loop can continue.
- */
-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
+static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
+ const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+ synchronize_vcpu_pstate(vcpu, exit_code);
+
+ /*
+ * Some guests (e.g., protected VMs) are not be allowed to run in
+ * AArch32. The ARMv8 architecture does not give the hypervisor a
+ * mechanism to prevent a guest from dropping to AArch32 EL0 if
+ * implemented by the CPU. If the hypervisor spots a guest in such a
+ * state ensure it is handled, and don't trust the host to spot or fix
+ * it. The check below is based on the one in
+ * kvm_arch_vcpu_ioctl_run().
+ */
if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
/*
* As we have caught the guest red-handed, decide that it isn't
@@ -236,6 +261,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
}
+
+ return __fixup_guest_exit(vcpu, exit_code, handlers);
}
/* Switch to the guest for legacy non-VHE systems */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 45ac4a59cc2c..179152bb9e42 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -114,13 +114,11 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};
-static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
+static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- return hyp_exit_handlers;
-}
+ synchronize_vcpu_pstate(vcpu, exit_code);
-static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
+ return __fixup_guest_exit(vcpu, exit_code, hyp_exit_handlers);
}
/* Switch to the guest for VHE systems running in EL2 */
@@ -136,6 +134,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_save_host_state_vhe(host_ctxt);
+ fpsimd_lazy_switch_to_guest(vcpu);
+
/*
* ARM erratum 1165522 requires us to configure both stage 1 and
* stage 2 translation for the guest context before we clear
@@ -166,6 +166,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
__deactivate_traps(vcpu);
+ fpsimd_lazy_switch_to_host(vcpu);
+
sysreg_restore_host_state_vhe(host_ctxt);
if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index f9d070473614..54e00ee631a0 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -42,11 +42,14 @@ static u32 kvm_ipa_limit;
PSR_AA32_I_BIT | PSR_AA32_F_BIT)
unsigned int kvm_sve_max_vl;
+unsigned int kvm_host_sve_max_vl;
int kvm_arm_init_sve(void)
{
if (system_supports_sve()) {
kvm_sve_max_vl = sve_max_virtualisable_vl();
+ kvm_host_sve_max_vl = sve_max_vl();
+ kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
/*
* The get_sve_reg()/set_sve_reg() ioctl interface will need
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index 49e972beeac7..44bb90ee2f41 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -5,6 +5,7 @@
*
* Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
*/
+#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/printk.h>
@@ -1630,43 +1631,41 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
}
-u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
+static u32 __get_barrier_crm_val(enum aarch64_insn_mb_type type)
{
- u32 opt;
- u32 insn;
-
switch (type) {
case AARCH64_INSN_MB_SY:
- opt = 0xf;
- break;
+ return 0xf;
case AARCH64_INSN_MB_ST:
- opt = 0xe;
- break;
+ return 0xe;
case AARCH64_INSN_MB_LD:
- opt = 0xd;
- break;
+ return 0xd;
case AARCH64_INSN_MB_ISH:
- opt = 0xb;
- break;
+ return 0xb;
case AARCH64_INSN_MB_ISHST:
- opt = 0xa;
- break;
+ return 0xa;
case AARCH64_INSN_MB_ISHLD:
- opt = 0x9;
- break;
+ return 0x9;
case AARCH64_INSN_MB_NSH:
- opt = 0x7;
- break;
+ return 0x7;
case AARCH64_INSN_MB_NSHST:
- opt = 0x6;
- break;
+ return 0x6;
case AARCH64_INSN_MB_NSHLD:
- opt = 0x5;
- break;
+ return 0x5;
default:
- pr_err("%s: unknown dmb type %d\n", __func__, type);
+ pr_err("%s: unknown barrier type %d\n", __func__, type);
return AARCH64_BREAK_FAULT;
}
+}
+
+u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
+{
+ u32 opt;
+ u32 insn;
+
+ opt = __get_barrier_crm_val(type);
+ if (opt == AARCH64_BREAK_FAULT)
+ return AARCH64_BREAK_FAULT;
insn = aarch64_insn_get_dmb_value();
insn &= ~GENMASK(11, 8);
@@ -1674,3 +1673,18 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
return insn;
}
+
+u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type)
+{
+ u32 opt, insn;
+
+ opt = __get_barrier_crm_val(type);
+ if (opt == AARCH64_BREAK_FAULT)
+ return AARCH64_BREAK_FAULT;
+
+ insn = aarch64_insn_get_dsb_base_value();
+ insn &= ~GENMASK(11, 8);
+ insn |= (opt << 8);
+
+ return insn;
+}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 6b6b8a82f294..0776c98ad27f 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -710,6 +710,7 @@ static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
*/
siaddr = untagged_addr(far);
}
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK);
arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
return 0;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6a4f118fb25f..e9288b28cb1e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1209,8 +1209,11 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
pmd_t *pmdp;
WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
+ /* [start, end] should be within one section */
+ WARN_ON_ONCE(end - start > PAGES_PER_SECTION * sizeof(struct page));
- if (!ARM64_KERNEL_USES_PMD_MAPS)
+ if (!ARM64_KERNEL_USES_PMD_MAPS ||
+ (end - start < PAGES_PER_SECTION * sizeof(struct page)))
return vmemmap_populate_basepages(start, end, node, altmap);
do {
@@ -1500,7 +1503,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
next = addr;
end = addr + PUD_SIZE;
do {
- pmd_free_pte_page(pmdp, next);
+ if (pmd_present(READ_ONCE(*pmdp)))
+ pmd_free_pte_page(pmdp, next);
} while (pmdp++, next += PMD_SIZE, next != end);
pud_clear(pudp);
@@ -1575,7 +1579,8 @@ int arch_add_memory(int nid, u64 start, u64 size,
__remove_pgd_mapping(swapper_pg_dir,
__phys_to_virt(start), size);
else {
- max_pfn = PFN_UP(start + size);
+ /* Address of hotplugged memory can be smaller */
+ max_pfn = max(max_pfn, PFN_UP(start + size));
max_low_pfn = max_pfn;
}
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index 68bf1a125502..1e308328c079 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/debugfs.h>
-#include <linux/memory_hotplug.h>
#include <linux/seq_file.h>
#include <asm/ptdump.h>
@@ -9,9 +8,7 @@ static int ptdump_show(struct seq_file *m, void *v)
{
struct ptdump_info *info = m->private;
- get_online_mems();
ptdump_walk(m, info);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c04ace8f4843..3dd23050a6c8 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) "bpf_jit: " fmt
+#include <linux/arm-smccc.h>
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/filter.h>
@@ -17,6 +18,7 @@
#include <asm/asm-extable.h>
#include <asm/byteorder.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
#include <asm/insn.h>
#include <asm/patching.h>
@@ -653,7 +655,51 @@ static void build_plt(struct jit_ctx *ctx)
plt->target = (u64)&dummy_tramp;
}
-static void build_epilogue(struct jit_ctx *ctx)
+/* Clobbers BPF registers 1-4, aka x0-x3 */
+static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
+{
+ const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */
+ u8 k = get_spectre_bhb_loop_value();
+
+ if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) ||
+ cpu_mitigations_off() || __nospectre_bhb ||
+ arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
+ return;
+
+ if (capable(CAP_SYS_ADMIN))
+ return;
+
+ if (supports_clearbhb(SCOPE_SYSTEM)) {
+ emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx);
+ return;
+ }
+
+ if (k) {
+ emit_a64_mov_i64(r1, k, ctx);
+ emit(A64_B(1), ctx);
+ emit(A64_SUBS_I(true, r1, r1, 1), ctx);
+ emit(A64_B_(A64_COND_NE, -2), ctx);
+ emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx);
+ emit(aarch64_insn_get_isb_value(), ctx);
+ }
+
+ if (is_spectre_bhb_fw_mitigated()) {
+ emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR,
+ ARM_SMCCC_ARCH_WORKAROUND_3), ctx);
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
+ emit(aarch64_insn_get_hvc_value(), ctx);
+ break;
+ case SMCCC_CONDUIT_SMC:
+ emit(aarch64_insn_get_smc_value(), ctx);
+ break;
+ default:
+ pr_err_once("Firmware mitigation enabled with unknown conduit\n");
+ }
+ }
+}
+
+static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
{
const u8 r0 = bpf2a64[BPF_REG_0];
const u8 r6 = bpf2a64[BPF_REG_6];
@@ -675,10 +721,13 @@ static void build_epilogue(struct jit_ctx *ctx)
emit(A64_POP(r8, r9, A64_SP), ctx);
emit(A64_POP(r6, r7, A64_SP), ctx);
+ if (was_classic)
+ build_bhb_mitigation(ctx);
+
/* Restore FP/LR registers */
emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
- /* Set return value */
+ /* Move the return value from bpf:r0 (aka x7) to x0 */
emit(A64_MOV(1, A64_R(0), r0), ctx);
/* Authenticate lr */
@@ -1527,7 +1576,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}
ctx.epilogue_offset = ctx.idx;
- build_epilogue(&ctx);
+ build_epilogue(&ctx, was_classic);
build_plt(&ctx);
extable_align = __alignof__(struct exception_table_entry);
@@ -1563,7 +1612,7 @@ skip_init_ctx:
goto out_off;
}
- build_epilogue(&ctx);
+ build_epilogue(&ctx, was_classic);
build_plt(&ctx);
/* 3. Extra pass to validate JITed code. */
@@ -1893,7 +1942,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
+ /* for the first pass, assume the worst case */
+ if (!ctx->image)
+ ctx->idx += 4;
+ else
+ emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_enter, ctx);
}
@@ -1937,7 +1990,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (flags & BPF_TRAMP_F_CALL_ORIG) {
im->ip_epilogue = ctx->image + ctx->idx;
- emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
+ /* for the first pass, assume the worst case */
+ if (!ctx->image)
+ ctx->idx += 4;
+ else
+ emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_exit, ctx);
}
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 9d01361696a1..ae551b857137 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -83,7 +83,26 @@ HYPERCALL3(vcpu_op);
HYPERCALL1(platform_op_raw);
HYPERCALL2(multicall);
HYPERCALL2(vm_assist);
-HYPERCALL3(dm_op);
+
+SYM_FUNC_START(HYPERVISOR_dm_op)
+ mov x16, #__HYPERVISOR_dm_op; \
+ /*
+ * dm_op hypercalls are issued by the userspace. The kernel needs to
+ * enable access to TTBR0_EL1 as the hypervisor would issue stage 1
+ * translations to user memory via AT instructions. Since AT
+ * instructions are not affected by the PAN bit (ARMv8.1), we only
+ * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
+ * is enabled (it implies that hardware UAO and PAN disabled).
+ */
+ uaccess_ttbr0_enable x6, x7, x8
+ hvc XEN_IMM
+
+ /*
+ * Disable userspace access from kernel once the hyp call completed.
+ */
+ uaccess_ttbr0_disable x6, x7
+ ret
+SYM_FUNC_END(HYPERVISOR_dm_op);
SYM_FUNC_START(privcmd_call)
mov x16, x0
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index f4ba3638b76a..6c55d85b1c76 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -51,6 +51,7 @@ config LOONGARCH
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@@ -310,8 +311,8 @@ config CMDLINE_BOOTLOADER
config CMDLINE_EXTEND
bool "Use built-in to extend bootloader kernel arguments"
help
- The command-line arguments provided during boot will be
- appended to the built-in command line. This is useful in
+ The built-in command line will be appended to the command-
+ line arguments provided during boot. This is useful in
cases where the provided arguments are insufficient and
you don't want to or cannot modify them.
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 275d4d5260c7..9d4a6ab7bad9 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -38,7 +38,7 @@ endif
ifdef CONFIG_64BIT
ld-emul = $(64bit-emul)
-cflags-y += -mabi=lp64s
+cflags-y += -mabi=lp64s -mcmodel=normal
endif
cflags-y += -pipe -msoft-float
diff --git a/arch/loongarch/include/asm/cache.h b/arch/loongarch/include/asm/cache.h
index 1b6d09617199..aa622c754414 100644
--- a/arch/loongarch/include/asm/cache.h
+++ b/arch/loongarch/include/asm/cache.h
@@ -8,6 +8,8 @@
#define L1_CACHE_SHIFT CONFIG_L1_CACHE_SHIFT
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+#define ARCH_DMA_MINALIGN (16)
+
#define __read_mostly __section(".data..read_mostly")
#endif /* _ASM_CACHE_H */
diff --git a/arch/loongarch/include/asm/irqflags.h b/arch/loongarch/include/asm/irqflags.h
index 319a8c616f1f..003172b8406b 100644
--- a/arch/loongarch/include/asm/irqflags.h
+++ b/arch/loongarch/include/asm/irqflags.h
@@ -14,40 +14,48 @@
static inline void arch_local_irq_enable(void)
{
u32 flags = CSR_CRMD_IE;
+ register u32 mask asm("t0") = CSR_CRMD_IE;
+
__asm__ __volatile__(
"csrxchg %[val], %[mask], %[reg]\n\t"
: [val] "+r" (flags)
- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
: "memory");
}
static inline void arch_local_irq_disable(void)
{
u32 flags = 0;
+ register u32 mask asm("t0") = CSR_CRMD_IE;
+
__asm__ __volatile__(
"csrxchg %[val], %[mask], %[reg]\n\t"
: [val] "+r" (flags)
- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
: "memory");
}
static inline unsigned long arch_local_irq_save(void)
{
u32 flags = 0;
+ register u32 mask asm("t0") = CSR_CRMD_IE;
+
__asm__ __volatile__(
"csrxchg %[val], %[mask], %[reg]\n\t"
: [val] "+r" (flags)
- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
: "memory");
return flags;
}
static inline void arch_local_irq_restore(unsigned long flags)
{
+ register u32 mask asm("t0") = CSR_CRMD_IE;
+
__asm__ __volatile__(
"csrxchg %[val], %[mask], %[reg]\n\t"
: [val] "+r" (flags)
- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD)
+ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD)
: "memory");
}
diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
index 59c4608de91d..af86241ae08f 100644
--- a/arch/loongarch/include/asm/ptrace.h
+++ b/arch/loongarch/include/asm/ptrace.h
@@ -32,9 +32,9 @@ struct pt_regs {
unsigned long __last[];
} __aligned(8);
-static inline int regs_irqs_disabled(struct pt_regs *regs)
+static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
{
- return arch_irqs_disabled_flags(regs->csr_prmd);
+ return !(regs->csr_prmd & CSR_PRMD_PIE);
}
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
@@ -54,7 +54,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long v
/* Query offset/name of register from its name/offset */
extern int regs_query_register_offset(const char *name);
-#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last))
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last) - sizeof(unsigned long))
/**
* regs_get_register() - get register value from its offset
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index 8319cc409009..4dd55a50d89e 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -173,18 +173,6 @@ static __init int setup_node(int pxm)
return acpi_map_pxm_to_node(pxm);
}
-/*
- * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for
- * I/O localities since SRAT does not list them. I/O localities are
- * not supported at this point.
- */
-unsigned int numa_distance_cnt;
-
-static inline unsigned int get_numa_distances_cnt(struct acpi_table_slit *slit)
-{
- return slit->locality_count;
-}
-
void __init numa_set_distance(int from, int to, int distance)
{
if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) {
diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
index 2dcb9e003657..30aa420610a0 100644
--- a/arch/loongarch/kernel/machine_kexec.c
+++ b/arch/loongarch/kernel/machine_kexec.c
@@ -126,14 +126,14 @@ void kexec_reboot(void)
/* All secondary cpus go to kexec_smp_wait */
if (smp_processor_id() > 0) {
relocated_kexec_smp_wait(NULL);
- unreachable();
+ BUG();
}
#endif
do_kexec = (void *)reboot_code_buffer;
do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry);
- unreachable();
+ BUG();
}
diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c
index ba138117b124..70b4a51885c2 100644
--- a/arch/loongarch/mm/hugetlbpage.c
+++ b/arch/loongarch/mm/hugetlbpage.c
@@ -47,7 +47,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
pmd = pmd_offset(pud, addr);
}
}
- return (pte_t *) pmd;
+ return pmd_none(*pmd) ? NULL : (pte_t *) pmd;
}
/*
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index f42a3be5f28d..3861ae6942a0 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -89,9 +89,6 @@ void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
-#ifdef CONFIG_ZONE_DMA
- max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
-#endif
#ifdef CONFIG_ZONE_DMA32
max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
#endif
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 2567916370b4..ec5123c19612 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -201,11 +201,9 @@ bool bpf_jit_supports_kfunc_call(void)
return true;
}
-/* initialized on the first pass of build_body() */
-static int out_offset = -1;
-static int emit_bpf_tail_call(struct jit_ctx *ctx)
+static int emit_bpf_tail_call(struct jit_ctx *ctx, int insn)
{
- int off;
+ int off, tc_ninsn = 0;
u8 tcc = tail_call_reg(ctx);
u8 a1 = LOONGARCH_GPR_A1;
u8 a2 = LOONGARCH_GPR_A2;
@@ -215,7 +213,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
const int idx0 = ctx->idx;
#define cur_offset (ctx->idx - idx0)
-#define jmp_offset (out_offset - (cur_offset))
+#define jmp_offset (tc_ninsn - (cur_offset))
/*
* a0: &ctx
@@ -225,6 +223,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
* if (index >= array->map.max_entries)
* goto out;
*/
+ tc_ninsn = insn ? ctx->offset[insn+1] - ctx->offset[insn] : ctx->offset[0];
off = offsetof(struct bpf_array, map.max_entries);
emit_insn(ctx, ldwu, t1, a1, off);
/* bgeu $a2, $t1, jmp_offset */
@@ -256,15 +255,6 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
emit_insn(ctx, ldd, t3, t2, off);
__build_epilogue(ctx, true);
- /* out: */
- if (out_offset == -1)
- out_offset = cur_offset;
- if (cur_offset != out_offset) {
- pr_err_once("tail_call out_offset = %d, expected %d!\n",
- cur_offset, out_offset);
- return -1;
- }
-
return 0;
toofar:
@@ -789,7 +779,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
/* tail call */
case BPF_JMP | BPF_TAIL_CALL:
mark_tail_call(ctx);
- if (emit_bpf_tail_call(ctx) < 0)
+ if (emit_bpf_tail_call(ctx, i) < 0)
return -EINVAL;
break;
@@ -808,7 +798,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
{
const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
- move_imm(ctx, dst, imm64, is32);
+ if (bpf_pseudo_func(insn))
+ move_addr(ctx, dst, imm64);
+ else
+ move_imm(ctx, dst, imm64, is32);
return 1;
}
@@ -1167,7 +1160,6 @@ out:
if (tmp_blinded)
bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
- out_offset = -1;
return prog;
}
diff --git a/arch/m68k/Kconfig.debug b/arch/m68k/Kconfig.debug
index 465e28be0ce4..6a6f0ed7713f 100644
--- a/arch/m68k/Kconfig.debug
+++ b/arch/m68k/Kconfig.debug
@@ -10,7 +10,7 @@ config BOOTPARAM_STRING
config EARLY_PRINTK
bool "Early printk"
- depends on !(SUN3 || M68000 || COLDFIRE)
+ depends on MMU_MOTOROLA
help
Write kernel log output directly to a serial port.
Where implemented, output goes to the framebuffer as well.
diff --git a/arch/m68k/kernel/early_printk.c b/arch/m68k/kernel/early_printk.c
index f11ef9f1f56f..521cbb8a150c 100644
--- a/arch/m68k/kernel/early_printk.c
+++ b/arch/m68k/kernel/early_printk.c
@@ -16,25 +16,10 @@
#include "../mvme147/mvme147.h"
#include "../mvme16x/mvme16x.h"
-asmlinkage void __init debug_cons_nputs(const char *s, unsigned n);
-
-static void __ref debug_cons_write(struct console *c,
- const char *s, unsigned n)
-{
-#if !(defined(CONFIG_SUN3) || defined(CONFIG_M68000) || \
- defined(CONFIG_COLDFIRE))
- if (MACH_IS_MVME147)
- mvme147_scc_write(c, s, n);
- else if (MACH_IS_MVME16x)
- mvme16x_cons_write(c, s, n);
- else
- debug_cons_nputs(s, n);
-#endif
-}
+asmlinkage void __init debug_cons_nputs(struct console *c, const char *s, unsigned int n);
static struct console early_console_instance = {
.name = "debug",
- .write = debug_cons_write,
.flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1
};
@@ -44,6 +29,12 @@ static int __init setup_early_printk(char *buf)
if (early_console || buf)
return 0;
+ if (MACH_IS_MVME147)
+ early_console_instance.write = mvme147_scc_write;
+ else if (MACH_IS_MVME16x)
+ early_console_instance.write = mvme16x_cons_write;
+ else
+ early_console_instance.write = debug_cons_nputs;
early_console = &early_console_instance;
register_console(early_console);
@@ -51,20 +42,15 @@ static int __init setup_early_printk(char *buf)
}
early_param("earlyprintk", setup_early_printk);
-/*
- * debug_cons_nputs() defined in arch/m68k/kernel/head.S cannot be called
- * after init sections are discarded (for platforms that use it).
- */
-#if !(defined(CONFIG_SUN3) || defined(CONFIG_M68000) || \
- defined(CONFIG_COLDFIRE))
-
static int __init unregister_early_console(void)
{
- if (!early_console || MACH_IS_MVME16x)
- return 0;
+ /*
+ * debug_cons_nputs() defined in arch/m68k/kernel/head.S cannot be
+ * called after init sections are discarded (for platforms that use it).
+ */
+ if (early_console && early_console->write == debug_cons_nputs)
+ return unregister_console(early_console);
- return unregister_console(early_console);
+ return 0;
}
late_initcall(unregister_early_console);
-
-#endif
diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S
index 9e812d8606be..10f6aa4d8f05 100644
--- a/arch/m68k/kernel/head.S
+++ b/arch/m68k/kernel/head.S
@@ -3267,8 +3267,8 @@ func_return putn
* turns around and calls the internal routines. This routine
* is used by the boot console.
*
- * The calling parameters are:
- * void debug_cons_nputs(const char *str, unsigned length)
+ * The function signature is -
+ * void debug_cons_nputs(struct console *c, const char *s, unsigned int n)
*
* This routine does NOT understand variable arguments only
* simple strings!
@@ -3277,8 +3277,8 @@ ENTRY(debug_cons_nputs)
moveml %d0/%d1/%a0,%sp@-
movew %sr,%sp@-
ori #0x0700,%sr
- movel %sp@(18),%a0 /* fetch parameter */
- movel %sp@(22),%d1 /* fetch parameter */
+ movel %sp@(22),%a0 /* char *s */
+ movel %sp@(26),%d1 /* unsigned int n */
jra 2f
1:
#ifdef CONSOLE_DEBUG
@@ -3404,6 +3404,7 @@ L(console_clear_loop):
movel %d4,%d1 /* screen height in pixels */
divul %a0@(FONT_DESC_HEIGHT),%d1 /* d1 = max num rows */
+ subql #1,%d1 /* row range is 0 to num - 1 */
movel %d0,%a2@(Lconsole_struct_num_columns)
movel %d1,%a2@(Lconsole_struct_num_rows)
@@ -3550,15 +3551,14 @@ func_start console_putc,%a0/%a1/%d0-%d7
cmpib #10,%d7
jne L(console_not_lf)
movel %a0@(Lconsole_struct_cur_row),%d0
- addil #1,%d0
- movel %d0,%a0@(Lconsole_struct_cur_row)
movel %a0@(Lconsole_struct_num_rows),%d1
cmpl %d1,%d0
jcs 1f
- subil #1,%d0
- movel %d0,%a0@(Lconsole_struct_cur_row)
console_scroll
+ jra L(console_exit)
1:
+ addql #1,%d0
+ movel %d0,%a0@(Lconsole_struct_cur_row)
jra L(console_exit)
L(console_not_lf):
@@ -3585,12 +3585,6 @@ L(console_not_cr):
*/
L(console_not_home):
movel %a0@(Lconsole_struct_cur_column),%d0
- addql #1,%a0@(Lconsole_struct_cur_column)
- movel %a0@(Lconsole_struct_num_columns),%d1
- cmpl %d1,%d0
- jcs 1f
- console_putc #'\n' /* recursion is OK! */
-1:
movel %a0@(Lconsole_struct_cur_row),%d1
/*
@@ -3637,6 +3631,23 @@ L(console_do_font_scanline):
addq #1,%d1
dbra %d7,L(console_read_char_scanline)
+ /*
+ * Register usage in the code below:
+ * a0 = pointer to console globals
+ * d0 = cursor column
+ * d1 = cursor column limit
+ */
+
+ lea %pc@(L(console_globals)),%a0
+
+ movel %a0@(Lconsole_struct_cur_column),%d0
+ addql #1,%d0
+ movel %d0,%a0@(Lconsole_struct_cur_column) /* Update cursor pos */
+ movel %a0@(Lconsole_struct_num_columns),%d1
+ cmpl %d1,%d0
+ jcs L(console_exit)
+ console_putc #'\n' /* Line wrap using tail recursion */
+
L(console_exit):
func_return console_putc
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 382f656c29ea..9f5603e01a68 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -801,7 +801,7 @@ static void __init mac_identify(void)
}
macintosh_config = mac_data_table;
- for (m = macintosh_config; m->ident != -1; m++) {
+ for (m = &mac_data_table[1]; m->ident != -1; m++) {
if (m->ident == model) {
macintosh_config = m;
break;
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 6468f1eb39f3..94080a507348 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -351,7 +351,7 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_LDFLAGS += -m $(ld-emul)
ifdef need-compiler
-CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
+CHECKFLAGS += $(shell $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \
sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
endif
diff --git a/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts b/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts
index c7ea4f1c0bb2..6c277ab83d4b 100644
--- a/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts
+++ b/arch/mips/boot/dts/loongson/loongson64c_4core_ls7a.dts
@@ -29,6 +29,7 @@
compatible = "loongson,pch-msi-1.0";
reg = <0 0x2ff00000 0 0x8>;
interrupt-controller;
+ #interrupt-cells = <1>;
msi-controller;
loongson,msi-base-vec = <64>;
loongson,msi-num-vecs = <64>;
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
index 5755f69cfe00..706aeb850fb0 100644
--- a/arch/mips/crypto/chacha-core.S
+++ b/arch/mips/crypto/chacha-core.S
@@ -55,17 +55,13 @@
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define MSB 0
#define LSB 3
-#define ROTx rotl
-#define ROTR(n) rotr n, 24
#define CPU_TO_LE32(n) \
- wsbh n; \
+ wsbh n, n; \
rotr n, 16;
#else
#define MSB 3
#define LSB 0
-#define ROTx rotr
#define CPU_TO_LE32(n)
-#define ROTR(n)
#endif
#define FOR_EACH_WORD(x) \
@@ -192,10 +188,10 @@ CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
xor X(W), X(B); \
xor X(Y), X(C); \
xor X(Z), X(D); \
- rotl X(V), S; \
- rotl X(W), S; \
- rotl X(Y), S; \
- rotl X(Z), S;
+ rotr X(V), 32 - S; \
+ rotr X(W), 32 - S; \
+ rotr X(Y), 32 - S; \
+ rotr X(Z), 32 - S;
.text
.set reorder
@@ -372,21 +368,19 @@ chacha_crypt_arch:
/* First byte */
lbu T1, 0(IN)
addiu $at, BYTES, 1
- CPU_TO_LE32(SAVED_X)
- ROTR(SAVED_X)
xor T1, SAVED_X
sb T1, 0(OUT)
beqz $at, .Lchacha_mips_xor_done
/* Second byte */
lbu T1, 1(IN)
addiu $at, BYTES, 2
- ROTx SAVED_X, 8
+ rotr SAVED_X, 8
xor T1, SAVED_X
sb T1, 1(OUT)
beqz $at, .Lchacha_mips_xor_done
/* Third byte */
lbu T1, 2(IN)
- ROTx SAVED_X, 8
+ rotr SAVED_X, 8
xor T1, SAVED_X
sb T1, 2(OUT)
b .Lchacha_mips_xor_done
diff --git a/arch/mips/dec/prom/init.c b/arch/mips/dec/prom/init.c
index cb12eb211a49..8d74d7d6c05b 100644
--- a/arch/mips/dec/prom/init.c
+++ b/arch/mips/dec/prom/init.c
@@ -42,7 +42,7 @@ int (*__pmax_close)(int);
* Detect which PROM the DECSTATION has, and set the callback vectors
* appropriately.
*/
-void __init which_prom(s32 magic, s32 *prom_vec)
+static void __init which_prom(s32 magic, s32 *prom_vec)
{
/*
* No sign of the REX PROM's magic number means we assume a non-REX
diff --git a/arch/mips/include/asm/ds1287.h b/arch/mips/include/asm/ds1287.h
index 46cfb01f9a14..51cb61fd4c03 100644
--- a/arch/mips/include/asm/ds1287.h
+++ b/arch/mips/include/asm/ds1287.h
@@ -8,7 +8,7 @@
#define __ASM_DS1287_H
extern int ds1287_timer_state(void);
-extern void ds1287_set_base_clock(unsigned int clock);
+extern int ds1287_set_base_clock(unsigned int hz);
extern int ds1287_clockevent_init(int irq);
#endif
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index db497a8167da..e3212f44446f 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -87,4 +87,20 @@ struct dyn_arch_ftrace {
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */
+
+#ifdef CONFIG_FTRACE_SYSCALLS
+#ifndef __ASSEMBLY__
+/*
+ * Some syscall entry functions on mips start with "__sys_" (fork and clone,
+ * for instance). We should also match the sys_ variant with those.
+ */
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+static inline bool arch_syscall_match_sym_name(const char *sym,
+ const char *name)
+{
+ return !strcmp(sym, name) ||
+ (!strncmp(sym, "__sys_", 6) && !strcmp(sym + 6, name + 4));
+}
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_FTRACE_SYSCALLS */
#endif /* _ASM_MIPS_FTRACE_H */
diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
index 696b40beb774..8494466740cc 100644
--- a/arch/mips/include/asm/mips-cm.h
+++ b/arch/mips/include/asm/mips-cm.h
@@ -47,6 +47,16 @@ extern phys_addr_t __mips_cm_phys_base(void);
*/
extern int mips_cm_is64;
+/*
+ * mips_cm_is_l2_hci_broken - determine if HCI is broken
+ *
+ * Some CM reports show that Hardware Cache Initialization is
+ * complete, but in reality it's not the case. They also incorrectly
+ * indicate that Hardware Cache Initialization is supported. This
+ * flags allows warning about this broken feature.
+ */
+extern bool mips_cm_is_l2_hci_broken;
+
/**
* mips_cm_error_report - Report CM cache errors
*/
@@ -86,6 +96,18 @@ static inline bool mips_cm_present(void)
}
/**
+ * mips_cm_update_property - update property from the device tree
+ *
+ * Retrieve the properties from the device tree if a CM node exist and
+ * update the internal variable based on this.
+ */
+#ifdef CONFIG_MIPS_CM
+extern void mips_cm_update_property(void);
+#else
+static inline void mips_cm_update_property(void) {}
+#endif
+
+/**
* mips_cm_has_l2sync - determine whether an L2-only sync region is present
*
* Returns true if the system implements an L2-only sync region, else false.
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index 4a2b40ce39e0..841612913f0d 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -65,7 +65,8 @@ static inline void instruction_pointer_set(struct pt_regs *regs,
/* Query offset/name of register from its name/offset */
extern int regs_query_register_offset(const char *name);
-#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last))
+#define MAX_REG_OFFSET \
+ (offsetof(struct pt_regs, __last) - sizeof(unsigned long))
/**
* regs_get_register() - get register value from its offset
diff --git a/arch/mips/include/asm/vpe.h b/arch/mips/include/asm/vpe.h
index baa949a744cb..babbe8742b81 100644
--- a/arch/mips/include/asm/vpe.h
+++ b/arch/mips/include/asm/vpe.h
@@ -124,4 +124,12 @@ void cleanup_tc(struct tc *tc);
int __init vpe_module_init(void);
void __exit vpe_module_exit(void);
+
+#ifdef CONFIG_MIPS_VPE_LOADER_MT
+void *vpe_alloc(void);
+int vpe_start(void *vpe, unsigned long start);
+int vpe_stop(void *vpe);
+int vpe_free(void *vpe);
+#endif /* CONFIG_MIPS_VPE_LOADER_MT */
+
#endif /* _ASM_VPE_H */
diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c
index 9a47fbcd4638..de64d6bb7ba3 100644
--- a/arch/mips/kernel/cevt-ds1287.c
+++ b/arch/mips/kernel/cevt-ds1287.c
@@ -10,6 +10,7 @@
#include <linux/mc146818rtc.h>
#include <linux/irq.h>
+#include <asm/ds1287.h>
#include <asm/time.h>
int ds1287_timer_state(void)
diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c
index b4f7d950c846..e21c2fd76167 100644
--- a/arch/mips/kernel/mips-cm.c
+++ b/arch/mips/kernel/mips-cm.c
@@ -5,6 +5,7 @@
*/
#include <linux/errno.h>
+#include <linux/of.h>
#include <linux/percpu.h>
#include <linux/spinlock.h>
@@ -14,6 +15,7 @@
void __iomem *mips_gcr_base;
void __iomem *mips_cm_l2sync_base;
int mips_cm_is64;
+bool mips_cm_is_l2_hci_broken;
static char *cm2_tr[8] = {
"mem", "gcr", "gic", "mmio",
@@ -238,6 +240,18 @@ static void mips_cm_probe_l2sync(void)
mips_cm_l2sync_base = ioremap(addr, MIPS_CM_L2SYNC_SIZE);
}
+void mips_cm_update_property(void)
+{
+ struct device_node *cm_node;
+
+ cm_node = of_find_compatible_node(of_root, NULL, "mobileye,eyeq6-cm");
+ if (!cm_node)
+ return;
+ pr_info("HCI (Hardware Cache Init for the L2 cache) in GCR_L2_RAM_CONFIG from the CM3 is broken");
+ mips_cm_is_l2_hci_broken = true;
+ of_node_put(cm_node);
+}
+
int mips_cm_probe(void)
{
phys_addr_t addr;
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index 9bf60d7d44d3..a7bcf2b814c8 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -56,10 +56,7 @@ static DEFINE_PER_CPU_ALIGNED(u32*, ready_count);
/* Indicates online CPUs coupled with the current CPU */
static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled);
-/*
- * Used to synchronize entry to deep idle states. Actually per-core rather
- * than per-CPU.
- */
+/* Used to synchronize entry to deep idle states */
static DEFINE_PER_CPU_ALIGNED(atomic_t, pm_barrier);
/* Saved CPU state across the CPS_PM_POWER_GATED state */
@@ -118,9 +115,10 @@ int cps_pm_enter_state(enum cps_pm_state state)
cps_nc_entry_fn entry;
struct core_boot_config *core_cfg;
struct vpe_boot_config *vpe_cfg;
+ atomic_t *barrier;
/* Check that there is an entry function for this state */
- entry = per_cpu(nc_asm_enter, core)[state];
+ entry = per_cpu(nc_asm_enter, cpu)[state];
if (!entry)
return -EINVAL;
@@ -156,7 +154,7 @@ int cps_pm_enter_state(enum cps_pm_state state)
smp_mb__after_atomic();
/* Create a non-coherent mapping of the core ready_count */
- core_ready_count = per_cpu(ready_count, core);
+ core_ready_count = per_cpu(ready_count, cpu);
nc_addr = kmap_noncoherent(virt_to_page(core_ready_count),
(unsigned long)core_ready_count);
nc_addr += ((unsigned long)core_ready_count & ~PAGE_MASK);
@@ -164,7 +162,8 @@ int cps_pm_enter_state(enum cps_pm_state state)
/* Ensure ready_count is zero-initialised before the assembly runs */
WRITE_ONCE(*nc_core_ready_count, 0);
- coupled_barrier(&per_cpu(pm_barrier, core), online);
+ barrier = &per_cpu(pm_barrier, cpumask_first(&cpu_sibling_map[cpu]));
+ coupled_barrier(barrier, online);
/* Run the generated entry code */
left = entry(online, nc_core_ready_count);
@@ -635,12 +634,14 @@ out_err:
static int cps_pm_online_cpu(unsigned int cpu)
{
- enum cps_pm_state state;
- unsigned core = cpu_core(&cpu_data[cpu]);
+ unsigned int sibling, core;
void *entry_fn, *core_rc;
+ enum cps_pm_state state;
+
+ core = cpu_core(&cpu_data[cpu]);
for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) {
- if (per_cpu(nc_asm_enter, core)[state])
+ if (per_cpu(nc_asm_enter, cpu)[state])
continue;
if (!test_bit(state, state_support))
continue;
@@ -652,16 +653,19 @@ static int cps_pm_online_cpu(unsigned int cpu)
clear_bit(state, state_support);
}
- per_cpu(nc_asm_enter, core)[state] = entry_fn;
+ for_each_cpu(sibling, &cpu_sibling_map[cpu])
+ per_cpu(nc_asm_enter, sibling)[state] = entry_fn;
}
- if (!per_cpu(ready_count, core)) {
+ if (!per_cpu(ready_count, cpu)) {
core_rc = kmalloc(sizeof(u32), GFP_KERNEL);
if (!core_rc) {
pr_err("Failed allocate core %u ready_count\n", core);
return -ENOMEM;
}
- per_cpu(ready_count, core) = core_rc;
+
+ for_each_cpu(sibling, &cpu_sibling_map[cpu])
+ per_cpu(ready_count, sibling) = core_rc;
}
return 0;
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 17d80e2f2e4c..86deab01c578 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -690,18 +690,20 @@ unsigned long mips_stack_top(void)
}
/* Space for the VDSO, data page & GIC user page */
- top -= PAGE_ALIGN(current->thread.abi->vdso->size);
- top -= PAGE_SIZE;
- top -= mips_gic_present() ? PAGE_SIZE : 0;
+ if (current->thread.abi) {
+ top -= PAGE_ALIGN(current->thread.abi->vdso->size);
+ top -= PAGE_SIZE;
+ top -= mips_gic_present() ? PAGE_SIZE : 0;
+
+ /* Space to randomize the VDSO base */
+ if (current->flags & PF_RANDOMIZE)
+ top -= VDSO_RANDOMIZE_SIZE;
+ }
/* Space for cache colour alignment */
if (cpu_has_dc_aliases)
top -= shm_align_mask + 1;
- /* Space to randomize the VDSO base */
- if (current->flags & PF_RANDOMIZE)
- top -= VDSO_RANDOMIZE_SIZE;
-
return top;
}
diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c
index 1187729d8cbb..357543996ee6 100644
--- a/arch/mips/lantiq/falcon/sysctrl.c
+++ b/arch/mips/lantiq/falcon/sysctrl.c
@@ -214,19 +214,16 @@ void __init ltq_soc_init(void)
of_node_put(np_syseth);
of_node_put(np_sysgpe);
- if ((request_mem_region(res_status.start, resource_size(&res_status),
- res_status.name) < 0) ||
- (request_mem_region(res_ebu.start, resource_size(&res_ebu),
- res_ebu.name) < 0) ||
- (request_mem_region(res_sys[0].start,
- resource_size(&res_sys[0]),
- res_sys[0].name) < 0) ||
- (request_mem_region(res_sys[1].start,
- resource_size(&res_sys[1]),
- res_sys[1].name) < 0) ||
- (request_mem_region(res_sys[2].start,
- resource_size(&res_sys[2]),
- res_sys[2].name) < 0))
+ if ((!request_mem_region(res_status.start, resource_size(&res_status),
+ res_status.name)) ||
+ (!request_mem_region(res_ebu.start, resource_size(&res_ebu),
+ res_ebu.name)) ||
+ (!request_mem_region(res_sys[0].start, resource_size(&res_sys[0]),
+ res_sys[0].name)) ||
+ (!request_mem_region(res_sys[1].start, resource_size(&res_sys[1]),
+ res_sys[1].name)) ||
+ (!request_mem_region(res_sys[2].start, resource_size(&res_sys[2]),
+ res_sys[2].name)))
pr_err("Failed to request core resources");
status_membase = ioremap(res_status.start,
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 1b939abbe4ca..2e987b6e42bc 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -498,6 +498,60 @@ static int __init set_ntlb(char *str)
__setup("ntlb=", set_ntlb);
+/* Initialise all TLB entries with unique values */
+static void r4k_tlb_uniquify(void)
+{
+ int entry = num_wired_entries();
+
+ htw_stop();
+ write_c0_entrylo0(0);
+ write_c0_entrylo1(0);
+
+ while (entry < current_cpu_data.tlbsize) {
+ unsigned long asid_mask = cpu_asid_mask(&current_cpu_data);
+ unsigned long asid = 0;
+ int idx;
+
+ /* Skip wired MMID to make ginvt_mmid work */
+ if (cpu_has_mmid)
+ asid = MMID_KERNEL_WIRED + 1;
+
+ /* Check for match before using UNIQUE_ENTRYHI */
+ do {
+ if (cpu_has_mmid) {
+ write_c0_memorymapid(asid);
+ write_c0_entryhi(UNIQUE_ENTRYHI(entry));
+ } else {
+ write_c0_entryhi(UNIQUE_ENTRYHI(entry) | asid);
+ }
+ mtc0_tlbw_hazard();
+ tlb_probe();
+ tlb_probe_hazard();
+ idx = read_c0_index();
+ /* No match or match is on current entry */
+ if (idx < 0 || idx == entry)
+ break;
+ /*
+ * If we hit a match, we need to try again with
+ * a different ASID.
+ */
+ asid++;
+ } while (asid < asid_mask);
+
+ if (idx >= 0 && idx != entry)
+ panic("Unable to uniquify TLB entry %d", idx);
+
+ write_c0_index(entry);
+ mtc0_tlbw_hazard();
+ tlb_write_indexed();
+ entry++;
+ }
+
+ tlbw_use_hazard();
+ htw_start();
+ flush_micro_tlb();
+}
+
/*
* Configure TLB (for init or after a CPU has been powered off).
*/
@@ -537,7 +591,7 @@ static void r4k_tlb_configure(void)
temp_tlb_entry = current_cpu_data.tlbsize - 1;
/* From this point on the ARC firmware is dead. */
- local_flush_tlb_all();
+ r4k_tlb_uniquify();
/* Did I tell you that ARC SUCKS? */
}
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 1f7d5c6c10b0..e2e62b750fba 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -32,6 +32,7 @@ endif
# offsets.
cflags-vdso := $(ccflags-vdso) \
$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
+ $(filter -std=%,$(KBUILD_CFLAGS)) \
-O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \
-mrelax-pic-calls $(call cc-option, -mexplicit-relocs) \
-fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index a2d8600521f9..cc2a91b74f19 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -39,7 +39,9 @@ endif
export LD_BFD
-# Set default 32 bits cross compilers for vdso
+# Set default 32 bits cross compilers for vdso.
+# This means that for 64BIT, both the 64-bit tools and the 32-bit tools
+# need to be in the path.
CC_ARCHES_32 = hppa hppa2.0 hppa1.1
CC_SUFFIXES = linux linux-gnu unknown-linux-gnu suse-linux
CROSS32_COMPILE := $(call cc-cross-prefix, \
@@ -137,7 +139,7 @@ palo lifimage: vmlinuz
fi
@if test ! -f "$(PALOCONF)"; then \
cp $(srctree)/arch/parisc/defpalo.conf $(objtree)/palo.conf; \
- echo 'A generic palo config file ($(objree)/palo.conf) has been created for you.'; \
+ echo 'A generic palo config file ($(objtree)/palo.conf) has been created for you.'; \
echo 'You should check it and re-run "make palo".'; \
echo 'WARNING: the "lifimage" file is now placed in this directory by default!'; \
false; \
diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile
index a294a1b58ee7..9fba12b70d1b 100644
--- a/arch/parisc/boot/compressed/Makefile
+++ b/arch/parisc/boot/compressed/Makefile
@@ -22,6 +22,7 @@ KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os
ifndef CONFIG_64BIT
KBUILD_CFLAGS += -mfast-indirect-calls
endif
+KBUILD_CFLAGS += -std=gnu11
LDFLAGS_vmlinux := -X -e startup --as-needed -T
$(obj)/vmlinux: $(obj)/vmlinux.lds $(addprefix $(obj)/, $(OBJECTS)) $(LIBGCC) FORCE
diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
index 51f40eaf7780..1013eeba31e5 100644
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -32,6 +32,34 @@
pa; \
})
+/**
+ * prober_user() - Probe user read access
+ * @sr: Space regster.
+ * @va: Virtual address.
+ *
+ * Return: Non-zero if address is accessible.
+ *
+ * Due to the way _PAGE_READ is handled in TLB entries, we need
+ * a special check to determine whether a user address is accessible.
+ * The ldb instruction does the initial access check. If it is
+ * successful, the probe instruction checks user access rights.
+ */
+#define prober_user(sr, va) ({ \
+ unsigned long read_allowed; \
+ __asm__ __volatile__( \
+ "copy %%r0,%0\n" \
+ "8:\tldb 0(%%sr%1,%2),%%r0\n" \
+ "\tproberi (%%sr%1,%2),%3,%0\n" \
+ "9:\n" \
+ ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \
+ "or %%r0,%%r0,%%r0") \
+ : "=&r" (read_allowed) \
+ : "i" (sr), "r" (va), "i" (PRIV_USER) \
+ : "memory" \
+ ); \
+ read_allowed; \
+})
+
#define CR_EIEM 15 /* External Interrupt Enable Mask */
#define CR_CR16 16 /* CR16 Interval Timer */
#define CR_EIRR 23 /* External Interrupt Request Register */
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 88d0ae5769dd..6c531d2c847e 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -42,9 +42,24 @@
__gu_err; \
})
-#define __get_user(val, ptr) \
-({ \
- __get_user_internal(SR_USER, val, ptr); \
+#define __probe_user_internal(sr, error, ptr) \
+({ \
+ __asm__("\tproberi (%%sr%1,%2),%3,%0\n" \
+ "\tcmpiclr,= 1,%0,%0\n" \
+ "\tldi %4,%0\n" \
+ : "=r"(error) \
+ : "i"(sr), "r"(ptr), "i"(PRIV_USER), \
+ "i"(-EFAULT)); \
+})
+
+#define __get_user(val, ptr) \
+({ \
+ register long __gu_err; \
+ \
+ __gu_err = __get_user_internal(SR_USER, val, ptr); \
+ if (likely(!__gu_err)) \
+ __probe_user_internal(SR_USER, __gu_err, ptr); \
+ __gu_err; \
})
#define __get_user_asm(sr, val, ldx, ptr) \
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 1ff54f4dbedb..0cc81d76d6c0 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -486,6 +486,12 @@
* this happens is quite subtle, read below */
.macro make_insert_tlb spc,pte,prot,tmp
space_to_prot \spc \prot /* create prot id from space */
+
+#if _PAGE_SPECIAL_BIT == _PAGE_DMB_BIT
+ /* need to drop DMB bit, as it's used as SPECIAL flag */
+ depi 0,_PAGE_SPECIAL_BIT,1,\pte
+#endif
+
/* The following is the real subtlety. This is depositing
* T <-> _PAGE_REFTRAP
* D <-> _PAGE_DIRTY
@@ -498,17 +504,18 @@
* Finally, _PAGE_READ goes in the top bit of PL1 (so we
* trigger an access rights trap in user space if the user
* tries to read an unreadable page */
-#if _PAGE_SPECIAL_BIT == _PAGE_DMB_BIT
- /* need to drop DMB bit, as it's used as SPECIAL flag */
- depi 0,_PAGE_SPECIAL_BIT,1,\pte
-#endif
depd \pte,8,7,\prot
/* PAGE_USER indicates the page can be read with user privileges,
* so deposit X1|11 to PL1|PL2 (remember the upper bit of PL1
- * contains _PAGE_READ) */
+ * contains _PAGE_READ). While the kernel can't directly write
+ * user pages which have _PAGE_WRITE zero, it can read pages
+ * which have _PAGE_READ zero (PL <= PL1). Thus, the kernel
+ * exception fault handler doesn't trigger when reading pages
+ * that aren't user read accessible */
extrd,u,*= \pte,_PAGE_USER_BIT+32,1,%r0
depdi 7,11,3,\prot
+
/* If we're a gateway page, drop PL2 back to zero for promotion
* to kernel privilege (so we can execute the page as kernel).
* Any privilege promotion page always denys read and write */
diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
index e391b175f5ec..7dbb241a9fb7 100644
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -62,6 +62,7 @@ static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss;
#define PDT_ADDR_PERM_ERR (pdt_type != PDT_PDC ? 2UL : 0UL)
#define PDT_ADDR_SINGLE_ERR 1UL
+#ifdef CONFIG_PROC_FS
/* report PDT entries via /proc/meminfo */
void arch_report_meminfo(struct seq_file *m)
{
@@ -73,6 +74,7 @@ void arch_report_meminfo(struct seq_file *m)
seq_printf(m, "PDT_cur_entries: %7lu\n",
pdt_status.pdt_entries);
}
+#endif
static int get_info_pat_new(void)
{
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index f0f49ca70362..670aed0dea37 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -600,6 +600,9 @@ lws_compare_and_swap32:
lws_compare_and_swap:
/* Trigger memory reference interruptions without writing to memory */
1: ldw 0(%r26), %r28
+ proberi (%r26), PRIV_USER, %r28
+ comb,=,n %r28, %r0, lws_fault /* backwards, likely not taken */
+ nop
2: stbys,e %r0, 0(%r26)
/* Calculate 8-bit hash index from virtual address */
@@ -753,6 +756,9 @@ cas2_lock_start:
copy %r26, %r28
depi_safe 0, 31, 2, %r28
10: ldw 0(%r28), %r1
+ proberi (%r28), PRIV_USER, %r1
+ comb,=,n %r1, %r0, lws_fault /* backwards, likely not taken */
+ nop
11: stbys,e %r0, 0(%r28)
/* Calculate 8-bit hash index from virtual address */
@@ -936,41 +942,47 @@ atomic_xchg_begin:
/* 8-bit exchange */
1: ldb 0(%r24), %r20
+ proberi (%r24), PRIV_USER, %r20
+ comb,=,n %r20, %r0, lws_fault /* backwards, likely not taken */
+ nop
copy %r23, %r20
depi_safe 0, 31, 2, %r20
b atomic_xchg_start
2: stbys,e %r0, 0(%r20)
- nop
- nop
- nop
/* 16-bit exchange */
3: ldh 0(%r24), %r20
+ proberi (%r24), PRIV_USER, %r20
+ comb,=,n %r20, %r0, lws_fault /* backwards, likely not taken */
+ nop
copy %r23, %r20
depi_safe 0, 31, 2, %r20
b atomic_xchg_start
4: stbys,e %r0, 0(%r20)
- nop
- nop
- nop
/* 32-bit exchange */
5: ldw 0(%r24), %r20
+ proberi (%r24), PRIV_USER, %r20
+ comb,=,n %r20, %r0, lws_fault /* backwards, likely not taken */
+ nop
b atomic_xchg_start
6: stbys,e %r0, 0(%r23)
nop
nop
- nop
- nop
- nop
/* 64-bit exchange */
#ifdef CONFIG_64BIT
7: ldd 0(%r24), %r20
+ proberi (%r24), PRIV_USER, %r20
+ comb,=,n %r20, %r0, lws_fault /* backwards, likely not taken */
+ nop
8: stdby,e %r0, 0(%r23)
#else
7: ldw 0(%r24), %r20
8: ldw 4(%r24), %r20
+ proberi (%r24), PRIV_USER, %r20
+ comb,=,n %r20, %r0, lws_fault /* backwards, likely not taken */
+ nop
copy %r23, %r20
depi_safe 0, 31, 2, %r20
9: stbys,e %r0, 0(%r20)
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 782ee05e2088..71b03db60201 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -22,7 +22,7 @@
#define DPRINTF(fmt, args...)
#endif
-#define RFMT "%#08lx"
+#define RFMT "0x%08lx"
/* 1111 1100 0000 0000 0001 0011 1100 0000 */
#define OPCODE1(a,b,c) ((a)<<26|(b)<<12|(c)<<6)
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index 5fc0c852c84c..69d65ffab312 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/compiler.h>
#include <linux/uaccess.h>
+#include <linux/mm.h>
#define get_user_space() mfsp(SR_USER)
#define get_kernel_space() SR_KERNEL
@@ -32,9 +33,25 @@ EXPORT_SYMBOL(raw_copy_to_user);
unsigned long raw_copy_from_user(void *dst, const void __user *src,
unsigned long len)
{
+ unsigned long start = (unsigned long) src;
+ unsigned long end = start + len;
+ unsigned long newlen = len;
+
mtsp(get_user_space(), SR_TEMP1);
mtsp(get_kernel_space(), SR_TEMP2);
- return pa_memcpy(dst, (void __force *)src, len);
+
+ /* Check region is user accessible */
+ if (start)
+ while (start < end) {
+ if (!prober_user(SR_TEMP1, start)) {
+ newlen = (start - (unsigned long) src);
+ break;
+ }
+ start += PAGE_SIZE;
+ /* align to page boundry which may have different permission */
+ start = PAGE_ALIGN_DOWN(start);
+ }
+ return len - newlen + pa_memcpy(dst, (void __force *)src, newlen);
}
EXPORT_SYMBOL(raw_copy_from_user);
diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c
index 6ce427b58836..ecd27b48d61f 100644
--- a/arch/parisc/math-emu/driver.c
+++ b/arch/parisc/math-emu/driver.c
@@ -103,9 +103,19 @@ handle_fpe(struct pt_regs *regs)
memcpy(regs->fr, frcopy, sizeof regs->fr);
if (signalcode != 0) {
- force_sig_fault(signalcode >> 24, signalcode & 0xffffff,
- (void __user *) regs->iaoq[0]);
- return -1;
+ int sig = signalcode >> 24;
+
+ if (sig == SIGFPE) {
+ /*
+ * Clear floating point trap bit to avoid trapping
+ * again on the first floating-point instruction in
+ * the userspace signal handler.
+ */
+ regs->fr[0] &= ~(1ULL << 38);
+ }
+ force_sig_fault(sig, signalcode & 0xffffff,
+ (void __user *) regs->iaoq[0]);
+ return -1;
}
return signalcode ? -1 : 0;
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index fbd9ada5e527..331f32d6234e 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -358,6 +358,10 @@ bad_area:
mmap_read_unlock(mm);
bad_area_nosemaphore:
+ if (!user_mode(regs) && fixup_exception(regs)) {
+ return;
+ }
+
if (user_mode(regs)) {
int signo, si_code;
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 56b876e418e9..6b998cb57255 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -78,4 +78,4 @@ CONFIG_DEBUG_VM_PGTABLE=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_BDI_SWITCH=y
CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_GENERIC_PTDUMP=y
+CONFIG_PTDUMP_DEBUGFS=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index d23deb94b36e..4a1c19cb6ea8 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -263,7 +263,6 @@ CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCH_NETEM=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h
index f8ce178b43b7..34abf8bea2cc 100644
--- a/arch/powerpc/include/asm/floppy.h
+++ b/arch/powerpc/include/asm/floppy.h
@@ -144,9 +144,12 @@ static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
bus_addr = 0;
}
- if (!bus_addr) /* need to map it */
+ if (!bus_addr) { /* need to map it */
bus_addr = dma_map_single(&isa_bridge_pcidev->dev, addr, size,
dir);
+ if (dma_mapping_error(&isa_bridge_pcidev->dev, bus_addr))
+ return -ENOMEM;
+ }
/* remember this one as prev */
prev_addr = addr;
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index 2c145da3b774..b5211e413829 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -23,10 +23,10 @@
#define TCSETSW _IOW('t', 21, struct termios)
#define TCSETSF _IOW('t', 22, struct termios)
-#define TCGETA _IOR('t', 23, struct termio)
-#define TCSETA _IOW('t', 24, struct termio)
-#define TCSETAW _IOW('t', 25, struct termio)
-#define TCSETAF _IOW('t', 28, struct termio)
+#define TCGETA 0x40147417 /* _IOR('t', 23, struct termio) */
+#define TCSETA 0x80147418 /* _IOW('t', 24, struct termio) */
+#define TCSETAW 0x80147419 /* _IOW('t', 25, struct termio) */
+#define TCSETAF 0x8014741c /* _IOW('t', 28, struct termio) */
#define TCSBRK _IO('t', 29)
#define TCXONC _IO('t', 30)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index ab316e155ea9..82626363a309 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1130,6 +1130,7 @@ int eeh_unfreeze_pe(struct eeh_pe *pe)
return ret;
}
+EXPORT_SYMBOL_GPL(eeh_unfreeze_pe);
static struct pci_device_id eeh_reset_ids[] = {
@@ -1516,6 +1517,8 @@ int eeh_pe_configure(struct eeh_pe *pe)
/* Invalid PE ? */
if (!pe)
return -ENODEV;
+ else
+ ret = eeh_ops->configure_bridge(pe);
return ret;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index f279295179bd..429abaecad41 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -257,13 +257,12 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
struct pci_driver *driver;
enum pci_ers_result new_result;
- pci_lock_rescan_remove();
pdev = edev->pdev;
if (pdev)
get_device(&pdev->dev);
- pci_unlock_rescan_remove();
if (!pdev) {
eeh_edev_info(edev, "no device");
+ *result = PCI_ERS_RESULT_DISCONNECT;
return;
}
device_lock(&pdev->dev);
@@ -304,8 +303,9 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root,
struct eeh_dev *edev, *tmp;
pr_info("EEH: Beginning: '%s'\n", name);
- eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp)
- eeh_pe_report_edev(edev, fn, result);
+ eeh_for_each_pe(root, pe)
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ eeh_pe_report_edev(edev, fn, result);
if (result)
pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n",
name, pci_ers_result_name(*result));
@@ -383,6 +383,8 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
if (!edev)
return;
+ pci_lock_rescan_remove();
+
/*
* The content in the config space isn't saved because
* the blocked config space on some adapters. We have
@@ -393,14 +395,19 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
if (list_is_last(&edev->entry, &edev->pe->edevs))
eeh_pe_restore_bars(edev->pe);
+ pci_unlock_rescan_remove();
return;
}
pdev = eeh_dev_to_pci_dev(edev);
- if (!pdev)
+ if (!pdev) {
+ pci_unlock_rescan_remove();
return;
+ }
pci_restore_state(pdev);
+
+ pci_unlock_rescan_remove();
}
/**
@@ -647,9 +654,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
} else {
- pci_lock_rescan_remove();
pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
}
/*
@@ -665,8 +670,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
if (rc)
return rc;
- pci_lock_rescan_remove();
-
/* Restore PE */
eeh_ops->configure_bridge(pe);
eeh_pe_restore_bars(pe);
@@ -674,7 +677,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
/* Clear frozen state */
rc = eeh_clear_pe_frozen_state(pe, false);
if (rc) {
- pci_unlock_rescan_remove();
return rc;
}
@@ -709,7 +711,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
pe->tstamp = tstamp;
pe->freeze_count = cnt;
- pci_unlock_rescan_remove();
return 0;
}
@@ -843,10 +844,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
{LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
int devices = 0;
+ pci_lock_rescan_remove();
+
bus = eeh_pe_bus_get(pe);
if (!bus) {
pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
+ pci_unlock_rescan_remove();
return;
}
@@ -1085,10 +1089,15 @@ recover_failed:
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
+ bus = eeh_pe_bus_get(pe);
+ if (bus)
+ pci_hp_remove_devices(bus);
+ else
+ pr_err("%s: PCI bus for PHB#%x-PE#%x disappeared\n",
+ __func__, pe->phb->global_number, pe->addr);
+
/* The passed PE should no longer be used */
+ pci_unlock_rescan_remove();
return;
}
@@ -1105,6 +1114,8 @@ out:
eeh_clear_slot_attention(edev->pdev);
eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
+
+ pci_unlock_rescan_remove();
}
/**
@@ -1123,6 +1134,7 @@ void eeh_handle_special_event(void)
unsigned long flags;
int rc;
+ pci_lock_rescan_remove();
do {
rc = eeh_ops->next_error(&pe);
@@ -1162,10 +1174,12 @@ void eeh_handle_special_event(void)
break;
case EEH_NEXT_ERR_NONE:
+ pci_unlock_rescan_remove();
return;
default:
pr_warn("%s: Invalid value %d from next_error()\n",
__func__, rc);
+ pci_unlock_rescan_remove();
return;
}
@@ -1177,7 +1191,9 @@ void eeh_handle_special_event(void)
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ pci_unlock_rescan_remove();
eeh_handle_normal_event(pe);
+ pci_lock_rescan_remove();
} else {
eeh_for_each_pe(pe, tmp_pe)
eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
@@ -1190,7 +1206,6 @@ void eeh_handle_special_event(void)
"error_detected(permanent failure)", pe,
eeh_report_failure, NULL);
- pci_lock_rescan_remove();
list_for_each_entry(hose, &hose_list, list_node) {
phb_pe = eeh_phb_pe_get(hose);
if (!phb_pe ||
@@ -1209,7 +1224,6 @@ void eeh_handle_special_event(void)
}
pci_hp_remove_devices(bus);
}
- pci_unlock_rescan_remove();
}
/*
@@ -1219,4 +1233,6 @@ void eeh_handle_special_event(void)
if (rc == EEH_NEXT_ERR_DEAD_IOC)
break;
} while (rc != EEH_NEXT_ERR_NONE);
+
+ pci_unlock_rescan_remove();
}
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index e4624d789629..08095aeba5c9 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -671,11 +671,12 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val);
/* Check link */
- eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val);
- if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
- eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
- msleep(1000);
- return;
+ if (edev->pdev) {
+ if (!edev->pdev->link_active_reporting) {
+ eeh_edev_dbg(edev, "No link reporting capability\n");
+ msleep(1000);
+ return;
+ }
}
/* Wait the link is up until timeout (5s) */
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 0fe251c6ac2c..ac70e85b0df8 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -111,6 +111,9 @@ void pci_hp_add_devices(struct pci_bus *bus)
struct pci_controller *phb;
struct device_node *dn = pci_bus_to_OF_node(bus);
+ if (!dn)
+ return;
+
phb = pci_bus_to_host(bus);
mode = PCI_PROBE_NORMAL;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index a6090896f749..ac669e58e202 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2974,11 +2974,11 @@ static void __init fixup_device_tree_pmac(void)
char type[8];
phandle node;
- // Some pmacs are missing #size-cells on escc nodes
+ // Some pmacs are missing #size-cells on escc or i2s nodes
for (node = 0; prom_next_node(&node); ) {
type[0] = '\0';
prom_getprop(node, "device_type", type, sizeof(type));
- if (prom_strcmp(type, "escc"))
+ if (prom_strcmp(type, "escc") && prom_strcmp(type, "i2s"))
continue;
if (prom_getproplen(node, "#size-cells") != PROM_ERROR)
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index f8d3caad4cf3..3c06c8389e05 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -25,6 +25,7 @@
#include <linux/reboot.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include <linux/nospec.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
@@ -1178,6 +1179,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
|| nargs + nret > ARRAY_SIZE(args.args))
return -EINVAL;
+ nargs = array_index_nospec(nargs, ARRAY_SIZE(args.args));
+ nret = array_index_nospec(nret, ARRAY_SIZE(args.args) - nargs);
+
/* Copy in args. */
if (copy_from_user(args.args, uargs->args,
nargs * sizeof(rtas_arg_t)) != 0)
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 252724ed666a..14abe7046cd7 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -356,7 +356,10 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
is_via_system_reset = 1;
- crash_smp_send_stop();
+ if (IS_ENABLED(CONFIG_SMP))
+ crash_smp_send_stop();
+ else
+ crash_kexec_prepare();
crash_save_cpu(regs, crashing_cpu);
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 138fe5eb3801..05668e964140 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -242,7 +242,7 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
return tlbe->mas7_3 & (MAS3_SW|MAS3_UW);
}
-static inline bool kvmppc_e500_ref_setup(struct tlbe_ref *ref,
+static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
struct kvm_book3e_206_tlb_entry *gtlbe,
kvm_pfn_t pfn, unsigned int wimg)
{
@@ -252,7 +252,11 @@ static inline bool kvmppc_e500_ref_setup(struct tlbe_ref *ref,
/* Use guest supplied MAS2_G and MAS2_E */
ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
- return tlbe_is_writable(gtlbe);
+ /* Mark the page accessed */
+ kvm_set_pfn_accessed(pfn);
+
+ if (tlbe_is_writable(gtlbe))
+ kvm_set_pfn_dirty(pfn);
}
static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
@@ -322,7 +326,6 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
{
struct kvm_memory_slot *slot;
unsigned long pfn = 0; /* silence GCC warning */
- struct page *page = NULL;
unsigned long hva;
int pfnmap = 0;
int tsize = BOOK3E_PAGESZ_4K;
@@ -334,7 +337,6 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
unsigned int wimg = 0;
pgd_t *pgdir;
unsigned long flags;
- bool writable = false;
/* used to check for invalidations in progress */
mmu_seq = kvm->mmu_invalidate_seq;
@@ -444,7 +446,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
if (likely(!pfnmap)) {
tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT);
- pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, NULL, &page);
+ pfn = gfn_to_pfn_memslot(slot, gfn);
if (is_error_noslot_pfn(pfn)) {
if (printk_ratelimit())
pr_err("%s: real page not found for gfn %lx\n",
@@ -479,6 +481,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
if (pte_present(pte)) {
wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
MAS2_WIMGE_MASK;
+ local_irq_restore(flags);
} else {
local_irq_restore(flags);
pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
@@ -487,9 +490,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
goto out;
}
}
- local_irq_restore(flags);
+ kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
- writable = kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
ref, gvaddr, stlbe);
@@ -497,8 +499,11 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
kvmppc_mmu_flush_icache(pfn);
out:
- kvm_release_faultin_page(kvm, page, !!ret, writable);
spin_unlock(&kvm->mmu_lock);
+
+ /* Drop refcount on page, so that mmu notifiers can clear it */
+ kvm_release_pfn_clean(pfn);
+
return ret;
}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index e3c31c771ce9..470d7715ecf4 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2229,6 +2229,10 @@ static struct pmu power_pmu = {
#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_DATA_PAGE_SIZE)
+
+#define SIER_TYPE_SHIFT 15
+#define SIER_TYPE_MASK (0x7ull << SIER_TYPE_SHIFT)
+
/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
@@ -2298,6 +2302,22 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
record = 0;
/*
+ * SIER[46-48] presents instruction type of the sampled instruction.
+ * In ISA v3.0 and before values "0" and "7" are considered reserved.
+ * In ISA v3.1, value "7" has been used to indicate "larx/stcx".
+ * Drop the sample if "type" has reserved values for this field with a
+ * ISA version check.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
+ ppmu->get_mem_data_src) {
+ val = (regs->dar & SIER_TYPE_MASK) >> SIER_TYPE_SHIFT;
+ if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) {
+ record = 0;
+ atomic64_inc(&event->lost_samples);
+ }
+ }
+
+ /*
* Finally record data if requested.
*/
if (record) {
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 56301b2bc8ae..031a2b63c171 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -321,8 +321,10 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
sier = mfspr(SPRN_SIER);
val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
- if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31)))
+ if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31))) {
+ dsrc->val = 0;
return;
+ }
idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
index 04bf6ecf7d55..85e0fa7d902b 100644
--- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -240,10 +240,8 @@ static int mpc512x_lpbfifo_kick(void)
dma_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
/* Make DMA channel work with LPB FIFO data register */
- if (dma_dev->device_config(lpbfifo.chan, &dma_conf)) {
- ret = -EINVAL;
- goto err_dma_prep;
- }
+ if (dma_dev->device_config(lpbfifo.chan, &dma_conf))
+ return -EINVAL;
sg_init_table(&sg, 1);
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
index d954ddf7f059..0f73554c6ae9 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
return -EINVAL;
}
+ /*
+ * Map complete page to the paste address. So the user
+ * space should pass 0ULL to the offset parameter.
+ */
+ if (vma->vm_pgoff) {
+ pr_debug("Page offset unsupported to map paste address\n");
+ return -EINVAL;
+ }
+
/* Ensure instance has an open send window */
if (!txwin) {
pr_err("No send window open?\n");
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c
index 827d338deaf4..2c2999de6bfa 100644
--- a/arch/powerpc/platforms/cell/spufs/gang.c
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -25,6 +25,7 @@ struct spu_gang *alloc_spu_gang(void)
mutex_init(&gang->aff_mutex);
INIT_LIST_HEAD(&gang->list);
INIT_LIST_HEAD(&gang->aff_list_head);
+ gang->alive = 1;
out:
return gang;
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index dbcfe361831a..d11951586e1f 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -191,13 +191,32 @@ static int spufs_fill_dir(struct dentry *dir,
return -ENOMEM;
ret = spufs_new_file(dir->d_sb, dentry, files->ops,
files->mode & mode, files->size, ctx);
- if (ret)
+ if (ret) {
+ dput(dentry);
return ret;
+ }
files++;
}
return 0;
}
+static void unuse_gang(struct dentry *dir)
+{
+ struct inode *inode = dir->d_inode;
+ struct spu_gang *gang = SPUFS_I(inode)->i_gang;
+
+ if (gang) {
+ bool dead;
+
+ inode_lock(inode); // exclusion with spufs_create_context()
+ dead = !--gang->alive;
+ inode_unlock(inode);
+
+ if (dead)
+ simple_recursive_removal(dir, NULL);
+ }
+}
+
static int spufs_dir_close(struct inode *inode, struct file *file)
{
struct inode *parent;
@@ -212,6 +231,7 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
inode_unlock(parent);
WARN_ON(ret);
+ unuse_gang(dir->d_parent);
return dcache_dir_close(inode, file);
}
@@ -404,7 +424,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
{
int ret;
int affinity;
- struct spu_gang *gang;
+ struct spu_gang *gang = SPUFS_I(inode)->i_gang;
struct spu_context *neighbor;
struct path path = {.mnt = mnt, .dentry = dentry};
@@ -419,11 +439,15 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
return -ENODEV;
- gang = NULL;
+ if (gang) {
+ if (!gang->alive)
+ return -ENOENT;
+ gang->alive++;
+ }
+
neighbor = NULL;
affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
if (affinity) {
- gang = SPUFS_I(inode)->i_gang;
if (!gang)
return -EINVAL;
mutex_lock(&gang->aff_mutex);
@@ -435,8 +459,11 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
}
ret = spufs_mkdir(inode, dentry, flags, mode & 0777);
- if (ret)
+ if (ret) {
+ if (neighbor)
+ put_spu_context(neighbor);
goto out_aff_unlock;
+ }
if (affinity) {
spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx,
@@ -452,6 +479,8 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
out_aff_unlock:
if (affinity)
mutex_unlock(&gang->aff_mutex);
+ if (ret && gang)
+ gang->alive--; // can't reach 0
return ret;
}
@@ -481,6 +510,7 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_fop = &simple_dir_operations;
d_instantiate(dentry, inode);
+ dget(dentry);
inc_nlink(dir);
inc_nlink(d_inode(dentry));
return ret;
@@ -491,6 +521,21 @@ out:
return ret;
}
+static int spufs_gang_close(struct inode *inode, struct file *file)
+{
+ unuse_gang(file->f_path.dentry);
+ return dcache_dir_close(inode, file);
+}
+
+static const struct file_operations spufs_gang_fops = {
+ .open = dcache_dir_open,
+ .release = spufs_gang_close,
+ .llseek = dcache_dir_lseek,
+ .read = generic_read_dir,
+ .iterate_shared = dcache_readdir,
+ .fsync = noop_fsync,
+};
+
static int spufs_gang_open(const struct path *path)
{
int ret;
@@ -510,7 +555,7 @@ static int spufs_gang_open(const struct path *path)
return PTR_ERR(filp);
}
- filp->f_op = &simple_dir_operations;
+ filp->f_op = &spufs_gang_fops;
fd_install(ret, filp);
return ret;
}
@@ -525,10 +570,8 @@ static int spufs_create_gang(struct inode *inode,
ret = spufs_mkgang(inode, dentry, mode & 0777);
if (!ret) {
ret = spufs_gang_open(&path);
- if (ret < 0) {
- int err = simple_rmdir(inode, dentry);
- WARN_ON(err);
- }
+ if (ret < 0)
+ unuse_gang(dentry);
}
return ret;
}
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 84958487f696..d33787c57c39 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -151,6 +151,8 @@ struct spu_gang {
int aff_flags;
struct spu *aff_ref_spu;
atomic_t aff_sched_count;
+
+ int alive;
};
/* Flag bits for spu_gang aff_flags */
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 877720c64515..35471b679638 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct memtrace_entry *ent = filp->private_data;
+ unsigned long ent_nrpages = ent->size >> PAGE_SHIFT;
+ unsigned long vma_nrpages = vma_pages(vma);
- if (ent->size < vma->vm_end - vma->vm_start)
+ /* The requested page offset should be within object's page count */
+ if (vma->vm_pgoff >= ent_nrpages)
return -EINVAL;
- if (vma->vm_pgoff << PAGE_SHIFT >= ent->size)
+ /* The requested mapping range should remain within the bounds */
+ if (vma_nrpages > ent_nrpages - vma->vm_pgoff)
return -EINVAL;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index a3a71d37cb9a..e10ac4887823 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -522,7 +522,12 @@ static struct msi_domain_info pseries_msi_domain_info = {
static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
{
- __pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+ struct pci_dev *dev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data));
+
+ if (dev->current_state == PCI_D0)
+ __pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+ else
+ get_cached_msi_msg(data->irq, msg);
}
static struct irq_chip pseries_msi_irq_chip = {
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index dcf1bc9de584..3d97c951a384 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -82,7 +82,7 @@ struct dyn_arch_ftrace {
#define make_call_t0(caller, callee, call) \
do { \
unsigned int offset = \
- (unsigned long) callee - (unsigned long) caller; \
+ (unsigned long) (callee) - (unsigned long) (caller); \
call[0] = to_auipc_t0(offset); \
call[1] = to_jalr_t0(offset); \
} while (0)
@@ -98,7 +98,7 @@ do { \
#define make_call_ra(caller, callee, call) \
do { \
unsigned int offset = \
- (unsigned long) callee - (unsigned long) caller; \
+ (unsigned long) (callee) - (unsigned long) (caller); \
call[0] = to_auipc_ra(offset); \
call[1] = to_jalr_ra(offset); \
} while (0)
diff --git a/arch/riscv/include/asm/kgdb.h b/arch/riscv/include/asm/kgdb.h
index 46677daf708b..cc11c4544cff 100644
--- a/arch/riscv/include/asm/kgdb.h
+++ b/arch/riscv/include/asm/kgdb.h
@@ -19,16 +19,9 @@
#ifndef __ASSEMBLY__
+void arch_kgdb_breakpoint(void);
extern unsigned long kgdb_compiled_break;
-static inline void arch_kgdb_breakpoint(void)
-{
- asm(".global kgdb_compiled_break\n"
- ".option norvc\n"
- "kgdb_compiled_break: ebreak\n"
- ".option rvc\n");
-}
-
#endif /* !__ASSEMBLY__ */
#define DBG_REG_ZERO "zero"
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 86048c60f700..fd861ac47a89 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -115,6 +115,7 @@ struct kernel_mapping {
extern struct kernel_mapping kernel_map;
extern phys_addr_t phys_ram_base;
+extern unsigned long vmemmap_start_pfn;
#define is_kernel_mapping(x) \
((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7d1688f850c3..bb19a643c5c2 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -79,7 +79,7 @@
* Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
* is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
*/
-#define vmemmap ((struct page *)VMEMMAP_START - (phys_ram_base >> PAGE_SHIFT))
+#define vmemmap ((struct page *)VMEMMAP_START - vmemmap_start_pfn)
#define PCI_IO_SIZE SZ_16M
#define PCI_IO_END VMEMMAP_START
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 384a63b86420..8426c4510d31 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -61,8 +61,11 @@ static inline void syscall_get_arguments(struct task_struct *task,
unsigned long *args)
{
args[0] = regs->orig_a0;
- args++;
- memcpy(args, &regs->a1, 5 * sizeof(args[0]));
+ args[1] = regs->a1;
+ args[2] = regs->a2;
+ args[3] = regs->a3;
+ args[4] = regs->a4;
+ args[5] = regs->a5;
}
static inline int syscall_get_arch(struct task_struct *task)
diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c
index 963ed7edcff2..1d83b3696721 100644
--- a/arch/riscv/kernel/kgdb.c
+++ b/arch/riscv/kernel/kgdb.c
@@ -273,6 +273,12 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
regs->epc = pc;
}
+noinline void arch_kgdb_breakpoint(void)
+{
+ asm(".global kgdb_compiled_break\n"
+ "kgdb_compiled_break: ebreak\n");
+}
+
void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer,
char *remcom_out_buffer)
{
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 2acf51c23567..f4c3252abce4 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -76,6 +76,9 @@ static struct resource bss_res = { .name = "Kernel bss", };
static struct resource elfcorehdr_res = { .name = "ELF Core hdr", };
#endif
+static int num_standard_resources;
+static struct resource *standard_resources;
+
static int __init add_resource(struct resource *parent,
struct resource *res)
{
@@ -149,7 +152,7 @@ static void __init init_resources(void)
struct resource *res = NULL;
struct resource *mem_res = NULL;
size_t mem_res_sz = 0;
- int num_resources = 0, res_idx = 0;
+ int num_resources = 0, res_idx = 0, non_resv_res = 0;
int ret = 0;
/* + 1 as memblock_alloc() might increase memblock.reserved.cnt */
@@ -213,6 +216,7 @@ static void __init init_resources(void)
/* Add /memory regions to the resource tree */
for_each_mem_region(region) {
res = &mem_res[res_idx--];
+ non_resv_res++;
if (unlikely(memblock_is_nomap(region))) {
res->name = "Reserved";
@@ -230,6 +234,9 @@ static void __init init_resources(void)
goto error;
}
+ num_standard_resources = non_resv_res;
+ standard_resources = &mem_res[res_idx + 1];
+
/* Clean-up any unused pre-allocated resources */
if (res_idx >= 0)
memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res));
@@ -241,6 +248,33 @@ static void __init init_resources(void)
memblock_free(mem_res, mem_res_sz);
}
+static int __init reserve_memblock_reserved_regions(void)
+{
+ u64 i, j;
+
+ for (i = 0; i < num_standard_resources; i++) {
+ struct resource *mem = &standard_resources[i];
+ phys_addr_t r_start, r_end, mem_size = resource_size(mem);
+
+ if (!memblock_is_region_reserved(mem->start, mem_size))
+ continue;
+
+ for_each_reserved_mem_range(j, &r_start, &r_end) {
+ resource_size_t start, end;
+
+ start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);
+ end = min(PFN_PHYS(PFN_UP(r_end)) - 1, mem->end);
+
+ if (start > mem->end || end < mem->start)
+ continue;
+
+ reserve_region_with_split(mem, start, end, "Reserved");
+ }
+ }
+
+ return 0;
+}
+arch_initcall(reserve_memblock_reserved_regions);
static void __init parse_dtb(void)
{
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index 4c034d8a606a..1b8b3fa5f74a 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -91,14 +91,14 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
kvm_riscv_fence_i(vcpu->kvm, hbase, hmask);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- if (cp->a2 == 0 && cp->a3 == 0)
+ if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL)
kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask);
else
kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
cp->a2, cp->a3, PAGE_SHIFT);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- if (cp->a2 == 0 && cp->a3 == 0)
+ if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL)
kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
hbase, hmask, cp->a4);
else
@@ -113,9 +113,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
/*
* Until nested virtualization is implemented, the
- * SBI HFENCE calls should be treated as NOPs
+ * SBI HFENCE calls should return not supported
+ * hence fallthrough.
*/
- break;
default:
ret = -EOPNOTSUPP;
}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index ba2210b553f9..72b3462babbf 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -22,6 +22,7 @@
#include <linux/hugetlb.h>
#include <asm/fixmap.h>
+#include <asm/sparsemem.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/soc.h>
@@ -52,6 +53,13 @@ EXPORT_SYMBOL(pgtable_l5_enabled);
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#define VMEMMAP_ADDR_ALIGN (1ULL << SECTION_SIZE_BITS)
+
+unsigned long vmemmap_start_pfn __ro_after_init;
+EXPORT_SYMBOL(vmemmap_start_pfn);
+#endif
+
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -210,8 +218,12 @@ static void __init setup_bootmem(void)
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
phys_ram_end = memblock_end_of_DRAM();
- if (!IS_ENABLED(CONFIG_XIP_KERNEL))
+ if (!IS_ENABLED(CONFIG_XIP_KERNEL)) {
phys_ram_base = memblock_start_of_DRAM();
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
+#endif
+}
/*
* Reserve physical address space that would be mapped to virtual
* addresses greater than (void *)(-PAGE_SIZE) because:
@@ -946,6 +958,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
phys_ram_base = CONFIG_PHYS_RAM_BASE;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ vmemmap_start_pfn = round_down(phys_ram_base, VMEMMAP_ADDR_ALIGN) >> PAGE_SHIFT;
+#endif
kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE;
kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start);
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index f4c7dbfaf8ee..5848f2e374a6 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -6,6 +6,7 @@
* Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
+#include <linux/security.h>
#include <linux/slab.h>
#include "hypfs.h"
@@ -64,24 +65,28 @@ static long dbfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
long rc;
mutex_lock(&df->lock);
- if (df->unlocked_ioctl)
- rc = df->unlocked_ioctl(file, cmd, arg);
- else
- rc = -ENOTTY;
+ rc = df->unlocked_ioctl(file, cmd, arg);
mutex_unlock(&df->lock);
return rc;
}
-static const struct file_operations dbfs_ops = {
+static const struct file_operations dbfs_ops_ioctl = {
.read = dbfs_read,
.llseek = no_llseek,
.unlocked_ioctl = dbfs_ioctl,
};
+static const struct file_operations dbfs_ops = {
+ .read = dbfs_read,
+};
+
void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df)
{
- df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df,
- &dbfs_ops);
+ const struct file_operations *fops = &dbfs_ops;
+
+ if (df->unlocked_ioctl && !security_locked_down(LOCKDOWN_DEBUGFS))
+ fops = &dbfs_ops_ioctl;
+ df->dentry = debugfs_create_file(df->name, 0400, dbfs_dir, df, fops);
mutex_init(&df->lock);
}
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index ce878e85b6e4..d0255aa5b36e 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -192,13 +192,6 @@ static inline unsigned long get_tod_clock_fast(void)
asm volatile("stckf %0" : "=Q" (clk) : : "cc");
return clk;
}
-
-static inline cycles_t get_cycles(void)
-{
- return (cycles_t) get_tod_clock() >> 2;
-}
-#define get_cycles get_cycles
-
int get_phys_clock(unsigned long *clock);
void init_cpu_timer(void);
@@ -221,6 +214,12 @@ static inline unsigned long get_tod_clock_monotonic(void)
return tod;
}
+static inline cycles_t get_cycles(void)
+{
+ return (cycles_t)get_tod_clock_monotonic() >> 2;
+}
+#define get_cycles get_cycles
+
/**
* tod_to_ns - convert a TOD format value to nanoseconds
* @todval: to be converted TOD format value
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 62b80616ca72..576457915625 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -690,7 +690,7 @@ ENTRY(stack_overflow)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_R8(64,%r11),0(%r14)
- stg %r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+ mvc __PT_ORIG_GPR2(8,%r11),__LC_PGM_LAST_BREAK
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
jg kernel_stack_overflow
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 6b7b6d5e3632..add2862e835b 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -574,7 +574,7 @@ static int stp_sync_clock(void *data)
atomic_dec(&sync->cpus);
/* Wait for in_sync to be set. */
while (READ_ONCE(sync->in_sync) == 0)
- __udelay(1);
+ ;
}
if (sync->in_sync != 1)
/* Didn't work. Clear per-cpu in sync bit again. */
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 1d2aa448d103..bfedbd7fae3b 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -276,10 +276,10 @@ static void __init test_monitor_call(void)
return;
asm volatile(
" mc 0,0\n"
- "0: xgr %0,%0\n"
+ "0: lhi %[val],0\n"
"1:\n"
- EX_TABLE(0b,1b)
- : "+d" (val));
+ EX_TABLE(0b, 1b)
+ : [val] "+d" (val));
if (!val)
panic("Monitor call doesn't work!\n");
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index e88803e3f271..1a55952fe308 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -490,7 +490,7 @@ enum prot_type {
PROT_TYPE_DAT = 3,
PROT_TYPE_IEP = 4,
/* Dummy value for passing an initialized value when code != PGM_PROTECTION */
- PROT_NONE,
+ PROT_TYPE_DUMMY,
};
static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
@@ -506,7 +506,7 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
switch (code) {
case PGM_PROTECTION:
switch (prot) {
- case PROT_NONE:
+ case PROT_TYPE_DUMMY:
/* We should never get here, acts like termination */
WARN_ON_ONCE(1);
break;
@@ -976,7 +976,7 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
gpa = kvm_s390_real_to_abs(vcpu, ga);
if (kvm_is_error_gpa(vcpu->kvm, gpa)) {
rc = PGM_ADDRESSING;
- prot = PROT_NONE;
+ prot = PROT_TYPE_DUMMY;
}
}
if (rc)
@@ -1134,7 +1134,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc == PGM_PROTECTION)
prot = PROT_TYPE_KEYC;
else
- prot = PROT_NONE;
+ prot = PROT_TYPE_DUMMY;
rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
}
out_unlock:
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 6f0209d45164..9c5f546a2e1a 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu,
__entry->sie_block = sie_block;
),
- TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+ TP_printk("create cpu %d at 0x%p, sie block at 0x%p",
__entry->id, __entry->vcpu, __entry->sie_block)
);
@@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css,
__entry->kvm = kvm;
),
- TP_printk("enabling channel I/O support (kvm @ %pK)\n",
+ TP_printk("enabling channel I/O support (kvm @ %p)\n",
__entry->kvm)
);
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index ba5f80268878..65243e1d0c8e 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -249,11 +249,9 @@ static int ptdump_show(struct seq_file *m, void *v)
.marker = address_markers,
};
- get_online_mems();
mutex_lock(&cpa_mutex);
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
mutex_unlock(&cpa_mutex);
- put_online_mems();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(ptdump);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 862386393557..a9dbf7475258 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -543,17 +543,15 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
}
/* Setup stack and backchain */
if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* lgr %w1,%r15 (backchain) */
- EMIT4(0xb9040000, REG_W1, REG_15);
+ /* lgr %w1,%r15 (backchain) */
+ EMIT4(0xb9040000, REG_W1, REG_15);
/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
- if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
- /* stg %w1,152(%r15) (backchain) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
- REG_15, 152);
+ /* stg %w1,152(%r15) (backchain) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
+ REG_15, 152);
}
}
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b3961f1016ea..d969f36bf186 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -98,6 +98,10 @@ static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
struct zpci_dev *zdev = to_zpci(pdev);
int rc;
+ /* The underlying device may have been disabled by the event */
+ if (!zdev_enabled(zdev))
+ return PCI_ERS_RESULT_NEED_RESET;
+
pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
rc = zpci_reset_load_store_blocked(zdev);
if (rc) {
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index a90499c087f0..2ee7b5a5016c 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -223,7 +223,7 @@ static inline int __pcilg_mio_inuser(
[ioaddr_len] "+&d" (ioaddr_len.pair),
[cc] "+d" (cc), [val] "=d" (val),
[dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
- [shift] "+d" (shift)
+ [shift] "+a" (shift)
:: "cc", "memory");
/* did we write everything to the user space buffer? */
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index 5c8776482530..22c47e4ad572 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -103,16 +103,16 @@ UTS_MACHINE := sh
LDFLAGS_vmlinux += -e _stext
ifdef CONFIG_CPU_LITTLE_ENDIAN
-ld-bfd := elf32-sh-linux
-LDFLAGS_vmlinux += --defsym jiffies=jiffies_64 --oformat $(ld-bfd)
+ld_bfd := elf32-sh-linux
+LDFLAGS_vmlinux += --defsym jiffies=jiffies_64 --oformat $(ld_bfd)
KBUILD_LDFLAGS += -EL
else
-ld-bfd := elf32-shbig-linux
-LDFLAGS_vmlinux += --defsym jiffies=jiffies_64+4 --oformat $(ld-bfd)
+ld_bfd := elf32-shbig-linux
+LDFLAGS_vmlinux += --defsym jiffies=jiffies_64+4 --oformat $(ld_bfd)
KBUILD_LDFLAGS += -EB
endif
-export ld-bfd
+export ld_bfd
# Mach groups
machdir-$(CONFIG_SOLUTION_ENGINE) += mach-se
diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile
index 591125c42d49..05542eb20136 100644
--- a/arch/sh/boot/compressed/Makefile
+++ b/arch/sh/boot/compressed/Makefile
@@ -36,7 +36,7 @@ endif
ccflags-remove-$(CONFIG_MCOUNT) += -pg
-LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \
+LDFLAGS_vmlinux := --oformat $(ld_bfd) -Ttext $(IMAGE_OFFSET) -e startup \
-T $(obj)/../../kernel/vmlinux.lds
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
@@ -60,7 +60,7 @@ $(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
OBJCOPYFLAGS += -R .empty_zero_page
-LDFLAGS_piggy.o := -r --format binary --oformat $(ld-bfd) -T
+LDFLAGS_piggy.o := -r --format binary --oformat $(ld_bfd) -T
$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE
$(call if_changed,ld)
diff --git a/arch/sh/boot/romimage/Makefile b/arch/sh/boot/romimage/Makefile
index c7c8be58400c..17b03df0a8de 100644
--- a/arch/sh/boot/romimage/Makefile
+++ b/arch/sh/boot/romimage/Makefile
@@ -13,7 +13,7 @@ mmcif-obj-$(CONFIG_CPU_SUBTYPE_SH7724) := $(obj)/mmcif-sh7724.o
load-$(CONFIG_ROMIMAGE_MMCIF) := $(mmcif-load-y)
obj-$(CONFIG_ROMIMAGE_MMCIF) := $(mmcif-obj-y)
-LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(load-y) -e romstart \
+LDFLAGS_vmlinux := --oformat $(ld_bfd) -Ttext $(load-y) -e romstart \
-T $(obj)/../../kernel/vmlinux.lds
$(obj)/vmlinux: $(obj)/head.o $(obj-y) $(obj)/piggy.o FORCE
@@ -24,7 +24,7 @@ OBJCOPYFLAGS += -j .empty_zero_page
$(obj)/zeropage.bin: vmlinux FORCE
$(call if_changed,objcopy)
-LDFLAGS_piggy.o := -r --format binary --oformat $(ld-bfd) -T
+LDFLAGS_piggy.o := -r --format binary --oformat $(ld_bfd) -T
$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/zeropage.bin arch/sh/boot/zImage FORCE
$(call if_changed,ld)
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 946f33c1b032..4b762c35b608 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -52,8 +52,10 @@ out:
void arch_enter_lazy_mmu_mode(void)
{
- struct tlb_batch *tb = this_cpu_ptr(&tlb_batch);
+ struct tlb_batch *tb;
+ preempt_disable();
+ tb = this_cpu_ptr(&tlb_batch);
tb->active = 1;
}
@@ -64,6 +66,7 @@ void arch_leave_lazy_mmu_mode(void)
if (tb->tlb_nr)
flush_tlb_pending();
tb->active = 0;
+ preempt_enable();
}
static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 778c50f27399..25d0501549f5 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -155,5 +155,6 @@ MRPROPER_FILES += $(HOST_DIR)/include/generated
archclean:
@find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
-o -name '*.gcov' \) -type f -print | xargs rm -f
+ $(Q)$(MAKE) -f $(srctree)/Makefile ARCH=$(HEADER_ARCH) clean
export HEADER_ARCH SUBARCH USER_CFLAGS CFLAGS_NO_HARDENING OS DEV_NULL_PATH
diff --git a/arch/um/drivers/rtc_user.c b/arch/um/drivers/rtc_user.c
index 7c3cec4c68cf..006a5a164ea9 100644
--- a/arch/um/drivers/rtc_user.c
+++ b/arch/um/drivers/rtc_user.c
@@ -28,7 +28,7 @@ int uml_rtc_start(bool timetravel)
int err;
if (timetravel) {
- int err = os_pipe(uml_rtc_irq_fds, 1, 1);
+ err = os_pipe(uml_rtc_irq_fds, 1, 1);
if (err)
goto fail;
} else {
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index a1afe414ce48..fb5b1e7c133d 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -41,7 +41,7 @@ int start_io_thread(unsigned long sp, int *fd_out)
*fd_out = fds[1];
err = os_set_fd_block(*fd_out, 0);
- err = os_set_fd_block(kernel_fd, 0);
+ err |= os_set_fd_block(kernel_fd, 0);
if (err) {
printk("start_io_thread - failed to set nonblocking I/O.\n");
goto out_close;
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 2baa8d4a33ed..1a068859a418 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1600,35 +1600,19 @@ static void vector_eth_configure(
device->dev = dev;
- *vp = ((struct vector_private)
- {
- .list = LIST_HEAD_INIT(vp->list),
- .dev = dev,
- .unit = n,
- .options = get_transport_options(def),
- .rx_irq = 0,
- .tx_irq = 0,
- .parsed = def,
- .max_packet = get_mtu(def) + ETH_HEADER_OTHER,
- /* TODO - we need to calculate headroom so that ip header
- * is 16 byte aligned all the time
- */
- .headroom = get_headroom(def),
- .form_header = NULL,
- .verify_header = NULL,
- .header_rxbuffer = NULL,
- .header_txbuffer = NULL,
- .header_size = 0,
- .rx_header_size = 0,
- .rexmit_scheduled = false,
- .opened = false,
- .transport_data = NULL,
- .in_write_poll = false,
- .coalesce = 2,
- .req_size = get_req_size(def),
- .in_error = false,
- .bpf = NULL
- });
+ INIT_LIST_HEAD(&vp->list);
+ vp->dev = dev;
+ vp->unit = n;
+ vp->options = get_transport_options(def);
+ vp->parsed = def;
+ vp->max_packet = get_mtu(def) + ETH_HEADER_OTHER;
+ /*
+ * TODO - we need to calculate headroom so that ip header
+ * is 16 byte aligned all the time
+ */
+ vp->headroom = get_headroom(def);
+ vp->coalesce = 2;
+ vp->req_size = get_req_size(def);
dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
INIT_WORK(&vp->reset_tx, vector_reset_tx);
diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h
index 5898a26daa0d..408b31d59127 100644
--- a/arch/um/include/asm/asm-prototypes.h
+++ b/arch/um/include/asm/asm-prototypes.h
@@ -1 +1,6 @@
#include <asm-generic/asm-prototypes.h>
+#include <asm/checksum.h>
+
+#ifdef CONFIG_UML_X86
+extern void cmpxchg8b_emu(void);
+#endif
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index c7b4b49826a2..40d823f36c09 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -68,7 +68,11 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
+ _TIF_NOTIFY_RESUME)
+
#endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 0df646c6651e..3b382da2996f 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -211,7 +211,6 @@ extern int os_protect_memory(void *addr, unsigned long len,
extern int os_unmap_memory(void *addr, int len);
extern int os_drop_memory(void *addr, int length);
extern int can_drop_memory(void);
-extern int os_mincore(void *addr, unsigned long len);
/* execvp.c */
extern int execvp_noalloc(char *buf, const char *file, char *const argv[]);
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index 1c2d4b29a3d4..6407fc911007 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -17,7 +17,7 @@ extra-y := vmlinux.lds
obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
physmem.o process.o ptrace.o reboot.o sigio.o \
signal.o sysrq.o time.o tlb.o trap.o \
- um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
+ um_arch.o umid.o kmsg_dump.o capflags.o skas/
obj-y += load_file.o
obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c
deleted file mode 100644
index 8ccd56813f68..000000000000
--- a/arch/um/kernel/maccess.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2013 Richard Weinberger <richrd@nod.at>
- */
-
-#include <linux/uaccess.h>
-#include <linux/kernel.h>
-#include <os.h>
-
-bool copy_from_kernel_nofault_allowed(const void *src, size_t size)
-{
- void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE);
-
- if ((unsigned long)src < PAGE_SIZE || size <= 0)
- return false;
- if (os_mincore(psrc, size + src - psrc) <= 0)
- return false;
- return true;
-}
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 38d5a71a579b..f6c766b2bdf5 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -68,6 +68,7 @@ void __init mem_init(void)
map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
memblock_free((void *)brk_end, uml_reserved - brk_end);
uml_reserved = brk_end;
+ min_low_pfn = PFN_UP(__pa(uml_reserved));
/* this will put all low memory onto the freelists */
memblock_free_all();
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c5281ce31685..d8c274d99390 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -97,14 +97,18 @@ void *__switch_to(struct task_struct *from, struct task_struct *to)
void interrupt_end(void)
{
struct pt_regs *regs = &current->thread.regs;
-
- if (need_resched())
- schedule();
- if (test_thread_flag(TIF_SIGPENDING) ||
- test_thread_flag(TIF_NOTIFY_SIGNAL))
- do_signal(regs);
- if (test_thread_flag(TIF_NOTIFY_RESUME))
- resume_user_mode_work(regs);
+ unsigned long thread_flags;
+
+ thread_flags = read_thread_flags();
+ while (thread_flags & _TIF_WORK_MASK) {
+ if (thread_flags & _TIF_NEED_RESCHED)
+ schedule();
+ if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
+ do_signal(regs);
+ if (thread_flags & _TIF_NOTIFY_RESUME)
+ resume_user_mode_work(regs);
+ thread_flags = read_thread_flags();
+ }
}
int get_current_pid(void)
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 6d8ae86ae978..c16b80011ada 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -18,6 +18,122 @@
#include <skas.h>
/*
+ * NOTE: UML does not have exception tables. As such, this is almost a copy
+ * of the code in mm/memory.c, only adjusting the logic to simply check whether
+ * we are coming from the kernel instead of doing an additional lookup in the
+ * exception table.
+ * We can do this simplification because we never get here if the exception was
+ * fixable.
+ */
+static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+ if (likely(mmap_read_trylock(mm)))
+ return true;
+
+ if (!is_user)
+ return false;
+
+ return !mmap_read_lock_killable(mm);
+}
+
+static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
+{
+ /*
+ * We don't have this operation yet.
+ *
+ * It should be easy enough to do: it's basically a
+ * atomic_long_try_cmpxchg_acquire()
+ * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
+ * it also needs the proper lockdep magic etc.
+ */
+ return false;
+}
+
+static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
+{
+ mmap_read_unlock(mm);
+ if (!is_user)
+ return false;
+
+ return !mmap_write_lock_killable(mm);
+}
+
+/*
+ * Helper for page fault handling.
+ *
+ * This is kind of equivalend to "mmap_read_lock()" followed
+ * by "find_extend_vma()", except it's a lot more careful about
+ * the locking (and will drop the lock on failure).
+ *
+ * For example, if we have a kernel bug that causes a page
+ * fault, we don't want to just use mmap_read_lock() to get
+ * the mm lock, because that would deadlock if the bug were
+ * to happen while we're holding the mm lock for writing.
+ *
+ * So this checks the exception tables on kernel faults in
+ * order to only do this all for instructions that are actually
+ * expected to fault.
+ *
+ * We can also actually take the mm lock for writing if we
+ * need to extend the vma, which helps the VM layer a lot.
+ */
+static struct vm_area_struct *
+um_lock_mm_and_find_vma(struct mm_struct *mm,
+ unsigned long addr, bool is_user)
+{
+ struct vm_area_struct *vma;
+
+ if (!get_mmap_lock_carefully(mm, is_user))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (likely(vma && (vma->vm_start <= addr)))
+ return vma;
+
+ /*
+ * Well, dang. We might still be successful, but only
+ * if we can extend a vma to do so.
+ */
+ if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
+ mmap_read_unlock(mm);
+ return NULL;
+ }
+
+ /*
+ * We can try to upgrade the mmap lock atomically,
+ * in which case we can continue to use the vma
+ * we already looked up.
+ *
+ * Otherwise we'll have to drop the mmap lock and
+ * re-take it, and also look up the vma again,
+ * re-checking it.
+ */
+ if (!mmap_upgrade_trylock(mm)) {
+ if (!upgrade_mmap_lock_carefully(mm, is_user))
+ return NULL;
+
+ vma = find_vma(mm, addr);
+ if (!vma)
+ goto fail;
+ if (vma->vm_start <= addr)
+ goto success;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto fail;
+ }
+
+ if (expand_stack_locked(vma, addr))
+ goto fail;
+
+success:
+ mmap_write_downgrade(mm);
+ return vma;
+
+fail:
+ mmap_write_unlock(mm);
+ return NULL;
+}
+
+/*
* Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by
* segv().
*/
@@ -43,21 +159,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
if (is_user)
flags |= FAULT_FLAG_USER;
retry:
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
- if (!vma)
- goto out;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto out;
- if (is_user && !ARCH_IS_STACKGROW(address))
- goto out;
- vma = expand_stack(mm, address);
+ vma = um_lock_mm_and_find_vma(mm, address, is_user);
if (!vma)
goto out_nosemaphore;
-good_area:
*code_out = SEGV_ACCERR;
if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index e52dd37ddadc..2686120ab232 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -223,57 +223,6 @@ out:
return ok;
}
-static int os_page_mincore(void *addr)
-{
- char vec[2];
- int ret;
-
- ret = mincore(addr, UM_KERN_PAGE_SIZE, vec);
- if (ret < 0) {
- if (errno == ENOMEM || errno == EINVAL)
- return 0;
- else
- return -errno;
- }
-
- return vec[0] & 1;
-}
-
-int os_mincore(void *addr, unsigned long len)
-{
- char *vec;
- int ret, i;
-
- if (len <= UM_KERN_PAGE_SIZE)
- return os_page_mincore(addr);
-
- vec = calloc(1, (len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE);
- if (!vec)
- return -ENOMEM;
-
- ret = mincore(addr, UM_KERN_PAGE_SIZE, vec);
- if (ret < 0) {
- if (errno == ENOMEM || errno == EINVAL)
- ret = 0;
- else
- ret = -errno;
-
- goto out;
- }
-
- for (i = 0; i < ((len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); i++) {
- if (!(vec[i] & 1)) {
- ret = 0;
- goto out;
- }
- }
-
- ret = 1;
-out:
- free(vec);
- return ret;
-}
-
void init_new_thread_signals(void)
{
set_handler(SIGSEGV);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ca8dd7e5585f..f23510275076 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -124,7 +124,7 @@ config X86
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANTS_NO_INSTR
select ARCH_WANT_GENERAL_HUGETLB
- select ARCH_WANT_HUGE_PMD_SHARE
+ select ARCH_WANT_HUGE_PMD_SHARE if X86_64
select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP if X86_64
select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_THP_SWAP if X86_64
@@ -208,7 +208,7 @@ config X86
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
- select HAVE_EISA
+ select HAVE_EISA if X86_32
select HAVE_EXIT_THREAD
select HAVE_FAST_GUP
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
@@ -2575,6 +2575,26 @@ config MITIGATION_SPECTRE_BHI
indirect branches.
See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+config MITIGATION_ITS
+ bool "Enable Indirect Target Selection mitigation"
+ depends on CPU_SUP_INTEL && X86_64
+ depends on RETPOLINE && RETHUNK
+ default y
+ help
+ Enable Indirect Target Selection (ITS) mitigation. ITS is a bug in
+ BPU on some Intel CPUs that may allow Spectre V2 style attacks. If
+ disabled, mitigation cannot be enabled via cmdline.
+ See <file:Documentation/admin-guide/hw-vuln/indirect-target-selection.rst>
+
+config MITIGATION_TSA
+ bool "Mitigate Transient Scheduler Attacks"
+ depends on CPU_SUP_AMD
+ default y
+ help
+ Enable mitigation for Transient Scheduler Attacks. TSA is a hardware
+ security vulnerability on AMD CPUs which can lead to forwarding of
+ invalid info to subsequent instructions and thus can affect their
+ timing and thereby cause a leakage.
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 3419ffa2a350..a4764ec92373 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -43,7 +43,7 @@ endif
# How to compile the 16-bit code. Note we always compile for -march=i386;
# that way we can complain to the user if the CPU is insufficient.
-REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \
+REALMODE_CFLAGS := -std=gnu11 -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \
-Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
-mno-mmx -mno-sse $(call cc-option,-fcf-protection=none)
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 3c5d5c97f8f7..4f61d48f2575 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -164,6 +164,13 @@ static void __page_state_change(unsigned long paddr, enum psc_op op)
*/
if (op == SNP_PAGE_STATE_PRIVATE && pvalidate(paddr, RMP_PG_SIZE_4K, 1))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
+
+ /*
+ * If validating memory (making it private) and affected by the
+ * cache-coherency vulnerability, perform the cache eviction mitigation.
+ */
+ if (op == SNP_PAGE_STATE_PRIVATE && !has_cpuflag(X86_FEATURE_COHERENCY_SFW_NO))
+ sev_evict_cache((void *)paddr, 1);
}
void snp_set_page_private(unsigned long paddr)
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index a83d67ec627d..aa4943432dcf 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -124,5 +124,18 @@ void get_cpuflags(void)
cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
&cpu.flags[1]);
}
+
+ if (max_amd_level >= 0x8000001f) {
+ u32 ebx;
+
+ /*
+ * The X86_FEATURE_COHERENCY_SFW_NO feature bit is in
+ * the virtualization flags entry (word 8) and set by
+ * scattered.c, so the bit needs to be explicitly set.
+ */
+ cpuid(0x8000001f, &ignored, &ebx, &ignored, &ignored);
+ if (ebx & BIT(31))
+ set_bit(X86_FEATURE_COHERENCY_SFW_NO, cpu.flags);
+ }
}
}
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index c9299aeb7333..3882ead513f7 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -22,6 +22,7 @@
# This script requires:
# bash
# syslinux
+# genisoimage
# mtools (for fdimage* and hdimage)
# edk2/OVMF (for hdimage)
#
@@ -251,7 +252,9 @@ geniso() {
cp "$isolinux" "$ldlinux" "$tmp_dir"
cp "$FBZIMAGE" "$tmp_dir"/linux
echo default linux "$KCMDLINE" > "$tmp_dir"/isolinux.cfg
- cp "${FDINITRDS[@]}" "$tmp_dir"/
+ if [ ${#FDINITRDS[@]} -gt 0 ]; then
+ cp "${FDINITRDS[@]}" "$tmp_dir"/
+ fi
genisoimage -J -r -appid 'LINUX_BOOT' -input-charset=utf-8 \
-quiet -o "$FIMAGE" -b isolinux.bin \
-c boot.cat -no-emul-boot -boot-load-size 4 \
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index f6907627172b..01e9593e2bd9 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -70,6 +70,8 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
+ /* We just clobbered the return address - use the IRET frame for unwinding: */
+ UNWIND_HINT_IRET_REGS offset=3*8
.else
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index f4419afc7147..057eeb4eda4e 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -16,7 +16,7 @@
SYM_FUNC_START(entry_ibpb)
movl $MSR_IA32_PRED_CMD, %ecx
- movl $PRED_CMD_IBPB, %eax
+ movl _ASM_RIP(x86_pred_cmd), %eax
xorl %edx, %edx
wrmsr
@@ -31,20 +31,20 @@ EXPORT_SYMBOL_GPL(entry_ibpb);
/*
* Define the VERW operand that is disguised as entry code so that
- * it can be referenced with KPTI enabled. This ensure VERW can be
+ * it can be referenced with KPTI enabled. This ensures VERW can be
* used late in exit-to-user path after page tables are switched.
*/
.pushsection .entry.text, "ax"
.align L1_CACHE_BYTES, 0xcc
-SYM_CODE_START_NOALIGN(mds_verw_sel)
+SYM_CODE_START_NOALIGN(x86_verw_sel)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
.word __KERNEL_DS
.align L1_CACHE_BYTES, 0xcc
-SYM_CODE_END(mds_verw_sel);
+SYM_CODE_END(x86_verw_sel);
/* For KVM */
-EXPORT_SYMBOL_GPL(mds_verw_sel);
+EXPORT_SYMBOL_GPL(x86_verw_sel);
.popsection
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a114338380a6..4f5968810450 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1559,7 +1559,9 @@ SYM_CODE_END(rewind_stack_and_make_dead)
* ORC to unwind properly.
*
* The alignment is for performance and not for safety, and may be safely
- * refactored in the future if needed.
+ * refactored in the future if needed. The .skips are for safety, to ensure
+ * that all RETs are in the second half of a cacheline to mitigate Indirect
+ * Target Selection, rather than taking the slowpath via its_return_thunk.
*/
SYM_FUNC_START(clear_bhb_loop)
push %rbp
@@ -1569,10 +1571,22 @@ SYM_FUNC_START(clear_bhb_loop)
call 1f
jmp 5f
.align 64, 0xcc
+ /*
+ * Shift instructions so that the RET is in the upper half of the
+ * cacheline and don't take the slowpath to its_return_thunk.
+ */
+ .skip 32 - (.Lret1 - 1f), 0xcc
ANNOTATE_INTRA_FUNCTION_CALL
1: call 2f
- RET
+.Lret1: RET
.align 64, 0xcc
+ /*
+ * As above shift instructions for RET at .Lret2 as well.
+ *
+ * This should be ideally be: .skip 32 - (.Lret2 - 2f), 0xcc
+ * but some Clang versions (e.g. 18) don't like this.
+ */
+ .skip 32 - 18, 0xcc
2: movl $5, %eax
3: jmp 4f
nop
@@ -1580,7 +1594,7 @@ SYM_FUNC_START(clear_bhb_loop)
jnz 3b
sub $1, %ecx
jnz 1b
- RET
+.Lret2: RET
5: lfence
pop %rbp
RET
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 37cbbc5c659a..8c385e231e07 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -1216,7 +1216,8 @@ static __init int perf_ibs_op_init(void)
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
- perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
+ perf_ibs_op.cnt_mask |= (IBS_OP_MAX_CNT_EXT_MASK |
+ IBS_OP_CUR_CNT_EXT_MASK);
}
if (ibs_caps & IBS_CAPS_ZEN4)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index fa0744732444..a8732dd9fedb 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -623,7 +623,7 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.type == event->pmu->type)
event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
- if (!event->attr.freq && x86_pmu.limit_period) {
+ if (is_sampling_event(event) && !event->attr.freq && x86_pmu.limit_period) {
s64 left = event->attr.sample_period;
x86_pmu.limit_period(event, &left);
if (left > event->attr.sample_period)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 117f86283183..2cb5b1f715b6 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2689,28 +2689,33 @@ static u64 adl_update_topdown_event(struct perf_event *event)
DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
-static void intel_pmu_read_topdown_event(struct perf_event *event)
+static void intel_pmu_read_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ bool pmu_enabled = cpuc->enabled;
- /* Only need to call update_topdown_event() once for group read. */
- if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
- !is_slots_event(event))
- return;
+ /* Only need to call update_topdown_event() once for group read. */
+ if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ))
+ return;
- perf_pmu_disable(event->pmu);
- static_call(intel_pmu_update_topdown_event)(event);
- perf_pmu_enable(event->pmu);
-}
+ cpuc->enabled = 0;
+ if (pmu_enabled)
+ intel_pmu_disable_all();
-static void intel_pmu_read_event(struct perf_event *event)
-{
- if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
- intel_pmu_auto_reload_read(event);
- else if (is_topdown_count(event))
- intel_pmu_read_topdown_event(event);
- else
- x86_perf_event_update(event);
+ if (is_topdown_count(event))
+ static_call(intel_pmu_update_topdown_event)(event);
+ else
+ intel_pmu_drain_pebs_buffer();
+
+ cpuc->enabled = pmu_enabled;
+ if (pmu_enabled)
+ intel_pmu_enable_all(0);
+
+ return;
+ }
+
+ x86_perf_event_update(event);
}
static void intel_pmu_enable_fixed(struct perf_event *event)
@@ -2975,7 +2980,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
handled++;
x86_pmu_handle_guest_pebs(regs, &data);
- x86_pmu.drain_pebs(regs, &data);
+ static_call(x86_pmu_drain_pebs)(regs, &data);
status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
/*
@@ -3852,6 +3857,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
}
+static u64 intel_pmu_freq_start_period(struct perf_event *event)
+{
+ int type = event->attr.type;
+ u64 config, factor;
+ s64 start;
+
+ /*
+ * The 127 is the lowest possible recommended SAV (sample after value)
+ * for a 4000 freq (default freq), according to the event list JSON file.
+ * Also, assume the workload is idle 50% time.
+ */
+ factor = 64 * 4000;
+ if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE)
+ goto end;
+
+ /*
+ * The estimation of the start period in the freq mode is
+ * based on the below assumption.
+ *
+ * For a cycles or an instructions event, 1GHZ of the
+ * underlying platform, 1 IPC. The workload is idle 50% time.
+ * The start period = 1,000,000,000 * 1 / freq / 2.
+ * = 500,000,000 / freq
+ *
+ * Usually, the branch-related events occur less than the
+ * instructions event. According to the Intel event list JSON
+ * file, the SAV (sample after value) of a branch-related event
+ * is usually 1/4 of an instruction event.
+ * The start period of branch-related events = 125,000,000 / freq.
+ *
+ * The cache-related events occurs even less. The SAV is usually
+ * 1/20 of an instruction event.
+ * The start period of cache-related events = 25,000,000 / freq.
+ */
+ config = event->attr.config & PERF_HW_EVENT_MASK;
+ if (type == PERF_TYPE_HARDWARE) {
+ switch (config) {
+ case PERF_COUNT_HW_CPU_CYCLES:
+ case PERF_COUNT_HW_INSTRUCTIONS:
+ case PERF_COUNT_HW_BUS_CYCLES:
+ case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND:
+ case PERF_COUNT_HW_STALLED_CYCLES_BACKEND:
+ case PERF_COUNT_HW_REF_CPU_CYCLES:
+ factor = 500000000;
+ break;
+ case PERF_COUNT_HW_BRANCH_INSTRUCTIONS:
+ case PERF_COUNT_HW_BRANCH_MISSES:
+ factor = 125000000;
+ break;
+ case PERF_COUNT_HW_CACHE_REFERENCES:
+ case PERF_COUNT_HW_CACHE_MISSES:
+ factor = 25000000;
+ break;
+ default:
+ goto end;
+ }
+ }
+
+ if (type == PERF_TYPE_HW_CACHE)
+ factor = 25000000;
+end:
+ /*
+ * Usually, a prime or a number with less factors (close to prime)
+ * is chosen as an SAV, which makes it less likely that the sampling
+ * period synchronizes with some periodic event in the workload.
+ * Minus 1 to make it at least avoiding values near power of twos
+ * for the default freq.
+ */
+ start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1;
+
+ if (start > x86_pmu.max_period)
+ start = x86_pmu.max_period;
+
+ if (x86_pmu.limit_period)
+ x86_pmu.limit_period(event, &start);
+
+ return start;
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -3863,6 +3947,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ if (event->attr.freq && event->attr.sample_freq) {
+ event->hw.sample_period = intel_pmu_freq_start_period(event);
+ event->hw.last_period = event->hw.sample_period;
+ local64_set(&event->hw.period_left, event->hw.sample_period);
+ }
+
if (event->attr.precise_ip) {
if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
return -EINVAL;
@@ -4087,7 +4177,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
arr[pebs_enable] = (struct perf_guest_switch_msr){
.msr = MSR_IA32_PEBS_ENABLE,
.host = cpuc->pebs_enabled & ~cpuc->intel_ctrl_guest_mask,
- .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask,
+ .guest = pebs_mask & ~cpuc->intel_ctrl_host_mask & kvm_pmu->pebs_enable,
};
if (arr[pebs_enable].host) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 3ff38e7409e3..4b7faa3180e5 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -789,11 +789,11 @@ unlock:
return 1;
}
-static inline void intel_pmu_drain_pebs_buffer(void)
+void intel_pmu_drain_pebs_buffer(void)
{
struct perf_sample_data data;
- x86_pmu.drain_pebs(NULL, &data);
+ static_call(x86_pmu_drain_pebs)(NULL, &data);
}
/*
@@ -1149,8 +1149,10 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event)
* + precise_ip < 2 for the non event IP
* + For RTM TSX weight we need GPRs for the abort code.
*/
- gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
- (attr->sample_regs_intr & PEBS_GP_REGS);
+ gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) &&
+ (attr->sample_regs_intr & PEBS_GP_REGS)) ||
+ ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (attr->sample_regs_user & PEBS_GP_REGS));
tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
((attr->config & INTEL_ARCH_EVENT_MASK) ==
@@ -1792,7 +1794,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
regs->flags &= ~PERF_EFLAGS_EXACT;
}
- if (sample_type & PERF_SAMPLE_REGS_INTR)
+ if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
adaptive_pebs_save_regs(regs, gprs);
}
@@ -1902,15 +1904,6 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
return NULL;
}
-void intel_pmu_auto_reload_read(struct perf_event *event)
-{
- WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
-
- perf_pmu_disable(event->pmu);
- intel_pmu_drain_pebs_buffer();
- perf_pmu_enable(event->pmu);
-}
-
/*
* Special variant of intel_pmu_save_and_restart() for auto-reload.
*/
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index d081eb89ba12..831c4e3f371a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -4656,28 +4656,28 @@ static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = {
INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
/* Free-Running IIO BANDWIDTH IN Counters */
INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"),
{ /* end: all zeroes */ },
};
@@ -5250,37 +5250,6 @@ static struct freerunning_counters icx_iio_freerunning[] = {
[ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets },
};
-static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = {
- /* Free-Running IIO CLOCKS Counter */
- INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
- /* Free-Running IIO BANDWIDTH IN Counters */
- INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"),
- { /* end: all zeroes */ },
-};
-
static struct intel_uncore_type icx_uncore_iio_free_running = {
.name = "iio_free_running",
.num_counters = 9,
@@ -5288,7 +5257,7 @@ static struct intel_uncore_type icx_uncore_iio_free_running = {
.num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX,
.freerunning = icx_iio_freerunning,
.ops = &skx_uncore_iio_freerunning_ops,
- .event_descs = icx_uncore_iio_freerunning_events,
+ .event_descs = snr_uncore_iio_freerunning_events,
.format_group = &skx_uncore_iio_freerunning_format_group,
};
@@ -5857,69 +5826,13 @@ static struct freerunning_counters spr_iio_freerunning[] = {
[SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 },
};
-static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = {
- /* Free-Running IIO CLOCKS Counter */
- INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
- /* Free-Running IIO BANDWIDTH IN Counters */
- INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"),
- /* Free-Running IIO BANDWIDTH OUT Counters */
- INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"),
- INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"),
- { /* end: all zeroes */ },
-};
-
static struct intel_uncore_type spr_uncore_iio_free_running = {
.name = "iio_free_running",
.num_counters = 17,
.num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX,
.freerunning = spr_iio_freerunning,
.ops = &skx_uncore_iio_freerunning_ops,
- .event_descs = spr_uncore_iio_freerunning_events,
+ .event_descs = snr_uncore_iio_freerunning_events,
.format_group = &skx_uncore_iio_freerunning_format_group,
};
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 332d2e6d8ae4..a3e1da86181f 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1047,6 +1047,7 @@ extern struct x86_pmu x86_pmu __read_mostly;
DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period);
DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update);
+DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
{
@@ -1535,7 +1536,7 @@ void intel_pmu_pebs_disable_all(void);
void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
-void intel_pmu_auto_reload_read(struct perf_event *event);
+void intel_pmu_drain_pebs_buffer(void);
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
index 42c70d28ef27..865ae4be233b 100644
--- a/arch/x86/hyperv/irqdomain.c
+++ b/arch/x86/hyperv/irqdomain.c
@@ -192,7 +192,6 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
struct pci_dev *dev;
struct hv_interrupt_entry out_entry, *stored_entry;
struct irq_cfg *cfg = irqd_cfg(data);
- const cpumask_t *affinity;
int cpu;
u64 status;
@@ -204,8 +203,7 @@ static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
return;
}
- affinity = irq_data_get_effective_affinity_mask(data);
- cpu = cpumask_first_and(affinity, cpu_online_mask);
+ cpu = cpumask_first(irq_data_get_effective_affinity_mask(data));
if (data->chip_data) {
/*
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 9542c582d546..fd3b73017738 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/stringify.h>
#include <asm/asm.h>
+#include <asm/bug.h>
#define ALTINSTR_FLAG_INV (1 << 15)
#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
@@ -81,6 +82,37 @@ extern void apply_ibt_endbr(s32 *start, s32 *end);
struct module;
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_init_mod(struct module *mod);
+extern void its_fini_mod(struct module *mod);
+extern void its_free_mod(struct module *mod);
+extern u8 *its_static_thunk(int reg);
+#else /* CONFIG_MITIGATION_ITS */
+static inline void its_init_mod(struct module *mod) { }
+static inline void its_fini_mod(struct module *mod) { }
+static inline void its_free_mod(struct module *mod) { }
+static inline u8 *its_static_thunk(int reg)
+{
+ WARN_ONCE(1, "ITS not compiled in");
+
+ return NULL;
+}
+#endif
+
+#if defined(CONFIG_RETHUNK) && defined(CONFIG_OBJTOOL)
+extern bool cpu_wants_rethunk(void);
+extern bool cpu_wants_rethunk_at(void *addr);
+#else
+static __always_inline bool cpu_wants_rethunk(void)
+{
+ return false;
+}
+static __always_inline bool cpu_wants_rethunk_at(void *addr)
+{
+ return false;
+}
+#endif
+
#ifdef CONFIG_SMP
extern void alternatives_smp_module_add(struct module *mod, char *name,
void *locks, void *locks_end,
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 37639a2d9c34..c976bbf909e0 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -98,4 +98,16 @@ extern u64 x86_read_arch_cap_msr(void);
extern struct cpumask cpus_stop_mask;
+union zen_patch_rev {
+ struct {
+ __u32 rev : 8,
+ stepping : 4,
+ model : 4,
+ __reserved : 4,
+ ext_model : 4,
+ ext_fam : 8;
+ };
+ __u32 ucode_rev;
+};
+
#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 5709cbba28e7..c48a9733e906 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -230,6 +230,7 @@
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
+#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 5) /* "" SNP cache coherency software work around not needed */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
@@ -429,6 +430,7 @@
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* "" The memory form of VERW mitigates TSA */
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
@@ -445,6 +447,11 @@
#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32 + 5) /* "" Use thunk for indirect branches in lower half of cacheline */
+
+#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* "" AMD CPU not vulnerable to TSA-SQ */
+#define X86_FEATURE_TSA_L1_NO (21*32+12) /* "" AMD CPU not vulnerable to TSA-L1 */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* "" Clear CPU buffers using VERW before VMRUN */
/*
* BUG word(s)
@@ -495,4 +502,7 @@
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_ITS X86_BUG(1*32 + 5) /* CPU is affected by Indirect Target Selection */
+#define X86_BUG_ITS_NATIVE_ONLY X86_BUG(1*32 + 6) /* CPU is affected by ITS, VMX is not affected */
+#define X86_BUG_TSA X86_BUG(1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 7793e52d6237..b8ec2be2bbe4 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -47,13 +47,13 @@ static __always_inline void native_irq_enable(void)
static inline __cpuidle void native_safe_halt(void)
{
- mds_idle_clear_cpu_buffers();
+ x86_idle_clear_cpu_buffers();
asm volatile("sti; hlt": : :"memory");
}
static inline __cpuidle void native_halt(void)
{
- mds_idle_clear_cpu_buffers();
+ x86_idle_clear_cpu_buffers();
asm volatile("hlt": : :"memory");
}
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index e2e1ec99c999..256eee99afc8 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -16,7 +16,6 @@
# define PAGES_NR 4
#endif
-# define KEXEC_CONTROL_PAGE_SIZE 4096
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
#ifndef __ASSEMBLY__
@@ -45,6 +44,7 @@ struct kimage;
/* Maximum address we can use for the control code buffer */
# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+# define KEXEC_CONTROL_PAGE_SIZE 4096
/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_386
@@ -59,6 +59,9 @@ struct kimage;
/* Maximum address we can use for the control pages */
# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
+/* Allocate one page for the pdp and the second for the code */
+# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
+
/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_X86_64
#endif
@@ -143,19 +146,6 @@ struct kimage_arch {
};
#else
struct kimage_arch {
- /*
- * This is a kimage control page, as it must not overlap with either
- * source or destination address ranges.
- */
- pgd_t *pgd;
- /*
- * The virtual mapping of the control code page itself is used only
- * during the transition, while the current kernel's pages are all
- * in place. Thus the intermediate page table pages used to map it
- * are not control pages, but instead just normal pages obtained
- * with get_zeroed_page(). And have to be tracked (below) so that
- * they can be freed.
- */
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index abc07d004589..c068565fe954 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -99,7 +99,6 @@ KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
-KVM_X86_OP(request_immediate_exit)
KVM_X86_OP(sched_in)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
KVM_X86_OP_OPTIONAL(vcpu_blocking)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9de3db4a32f8..d0229323ca63 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -677,6 +677,7 @@ struct kvm_vcpu_arch {
u32 pkru;
u32 hflags;
u64 efer;
+ u64 host_debugctl;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
bool load_eoi_exitmap_pending;
@@ -1455,6 +1456,12 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
}
+enum kvm_x86_run_flags {
+ KVM_RUN_FORCE_IMMEDIATE_EXIT = BIT(0),
+ KVM_RUN_LOAD_GUEST_DR6 = BIT(1),
+ KVM_RUN_LOAD_DEBUGCTL = BIT(2),
+};
+
struct kvm_x86_ops {
const char *name;
@@ -1478,6 +1485,12 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
+ /*
+ * Mask of DEBUGCTL bits that are owned by the host, i.e. that need to
+ * match the host's value even while the guest is active.
+ */
+ const u64 HOST_OWNED_DEBUGCTL;
+
void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
@@ -1526,7 +1539,8 @@ struct kvm_x86_ops {
void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
- enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu);
+ enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu,
+ u64 run_flags);
int (*handle_exit)(struct kvm_vcpu *vcpu,
enum exit_fastpath_completion exit_fastpath);
int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
@@ -1546,10 +1560,12 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
+
+ const bool x2apic_icr_is_split;
bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
- void (*hwapic_isr_update)(int isr);
+ void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
@@ -1585,8 +1601,6 @@ struct kvm_x86_ops {
struct x86_exception *exception);
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
- void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
-
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
/*
@@ -2054,7 +2068,6 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
u32 size);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 5ffcfcc41282..afd65c815043 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -185,6 +185,14 @@
* VERW clears CPU Register
* File.
*/
+#define ARCH_CAP_ITS_NO BIT_ULL(62) /*
+ * Not susceptible to
+ * Indirect Target Selection.
+ * This bit is not set by
+ * HW, but is synthesized by
+ * VMMs for guests to know
+ * their affected status.
+ */
#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
* IA32_XAPIC_DISABLE_STATUS MSR
@@ -371,6 +379,7 @@
#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+#define DEBUGCTLMSR_RTM_DEBUG BIT(15)
#define MSR_PEBS_FRONTEND 0x000003f7
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 3a8fdf881313..d938792c11d9 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -44,8 +44,6 @@ static inline void __monitorx(const void *eax, unsigned long ecx,
static inline void __mwait(unsigned long eax, unsigned long ecx)
{
- mds_idle_clear_cpu_buffers();
-
/* "mwait %eax, %ecx;" */
asm volatile(".byte 0x0f, 0x01, 0xc9;"
:: "a" (eax), "c" (ecx));
@@ -80,7 +78,7 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
static inline void __mwaitx(unsigned long eax, unsigned long ebx,
unsigned long ecx)
{
- /* No MDS buffer clear as this is AMD/HYGON only */
+ /* No need for TSA buffer clearing on AMD */
/* "mwaitx %eax, %ebx, %ecx;" */
asm volatile(".byte 0x0f, 0x01, 0xfb;"
@@ -89,7 +87,7 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
- mds_idle_clear_cpu_buffers();
+
/* "mwait %eax, %ecx;" */
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
:: "a" (eax), "c" (ecx));
@@ -107,6 +105,11 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
*/
static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
{
+ if (need_resched())
+ return;
+
+ x86_idle_clear_cpu_buffers();
+
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) {
mb();
@@ -115,9 +118,13 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
}
__monitor((void *)&current_thread_info()->flags, 0, 0);
- if (!need_resched())
- __mwait(eax, ecx);
+ if (need_resched())
+ goto out;
+
+ __mwait(eax, ecx);
}
+
+out:
current_clr_polling();
}
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 5c5f1e56c404..6f3d145670a9 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -59,6 +59,8 @@ int __register_nmi_handler(unsigned int, struct nmiaction *);
void unregister_nmi_handler(unsigned int, const char *);
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler);
+
void stop_nmi(void);
void restart_nmi(void);
void local_touch_nmi(void);
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index daf58a96e0a7..c77a65a3e5f1 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -119,9 +119,8 @@
.endm
/*
- * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call
- * to the retpoline thunk with a CS prefix when the register requires
- * a RAX prefix byte to encode. Also see apply_retpolines().
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
*/
.macro __CS_PREFIX reg:req
.irp rs,r8,r9,r10,r11,r12,r13,r14,r15
@@ -203,27 +202,33 @@
.endm
/*
- * Macro to execute VERW instruction that mitigate transient data sampling
- * attacks such as MDS. On affected systems a microcode update overloaded VERW
- * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
- *
+ * Macro to execute VERW insns that mitigate transient data sampling
+ * attacks such as MDS or TSA. On affected systems a microcode update
+ * overloaded VERW insns to also clear the CPU buffers. VERW clobbers
+ * CFLAGS.ZF.
* Note: Only the memory operand variant of VERW clears the CPU buffers.
*/
-.macro CLEAR_CPU_BUFFERS
- ALTERNATIVE "jmp .Lskip_verw_\@", "", X86_FEATURE_CLEAR_CPU_BUF
+.macro __CLEAR_CPU_BUFFERS feature
+ ALTERNATIVE "jmp .Lskip_verw_\@", "", \feature
#ifdef CONFIG_X86_64
- verw mds_verw_sel(%rip)
+ verw x86_verw_sel(%rip)
#else
/*
* In 32bit mode, the memory operand must be a %cs reference. The data
* segments may not be usable (vm86 mode), and the stack segment may not
* be flat (ESPFIX32).
*/
- verw %cs:mds_verw_sel
+ verw %cs:x86_verw_sel
#endif
.Lskip_verw_\@:
.endm
+#define CLEAR_CPU_BUFFERS \
+ __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF
+
+#define VM_CLEAR_CPU_BUFFERS \
+ __CLEAR_CPU_BUFFERS X86_FEATURE_CLEAR_CPU_BUF_VM
+
#ifdef CONFIG_X86_64
.macro CLEAR_BRANCH_HISTORY
ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
@@ -245,8 +250,12 @@
_ASM_PTR " 999b\n\t" \
".popsection\n\t"
+#define ITS_THUNK_SIZE 64
+
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+typedef u8 its_thunk_t[ITS_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
+extern its_thunk_t __x86_indirect_its_thunk_array[];
#ifdef CONFIG_RETHUNK
extern void __x86_return_thunk(void);
@@ -254,6 +263,12 @@ extern void __x86_return_thunk(void);
static inline void __x86_return_thunk(void) {}
#endif
+#ifdef CONFIG_MITIGATION_ITS
+extern void its_return_thunk(void);
+#else
+static inline void its_return_thunk(void) {}
+#endif
+
extern void retbleed_return_thunk(void);
extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
@@ -281,19 +296,22 @@ extern void (*x86_return_thunk)(void);
#ifdef CONFIG_X86_64
/*
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
+ */
+#define __CS_PREFIX(reg) \
+ ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \
+ ".ifc \\rs," reg "\n" \
+ ".byte 0x2e\n" \
+ ".endif\n" \
+ ".endr\n"
+
+/*
* Inline asm uses the %V modifier which is only in newer GCC
* which is ensured when CONFIG_RETPOLINE is defined.
*/
-# define CALL_NOSPEC \
- ALTERNATIVE_2( \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- "call __x86_indirect_thunk_%V[thunk_target]\n", \
- X86_FEATURE_RETPOLINE, \
- "lfence;\n" \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_LFENCE)
+#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \
+ "call __x86_indirect_thunk_%V[thunk_target]\n"
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
@@ -415,24 +433,24 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
+DECLARE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
-extern u16 mds_verw_sel;
+extern u16 x86_verw_sel;
#include <asm/segment.h>
/**
- * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
+ * x86_clear_cpu_buffers - Buffer clearing support for different x86 CPU vulns
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
* instruction is executed.
*/
-static __always_inline void mds_clear_cpu_buffers(void)
+static __always_inline void x86_clear_cpu_buffers(void)
{
static const u16 ds = __KERNEL_DS;
@@ -449,14 +467,15 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * x86_idle_clear_cpu_buffers - Buffer clearing support in idle for the MDS
+ * and TSA vulnerabilities.
*
* Clear CPU buffers if the corresponding static key is enabled
*/
-static inline void mds_idle_clear_cpu_buffers(void)
+static __always_inline void x86_idle_clear_cpu_buffers(void)
{
- if (static_branch_likely(&mds_idle_clear))
- mds_clear_cpu_buffers();
+ if (static_branch_likely(&cpu_buf_idle_clear))
+ x86_clear_cpu_buffers();
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4d810b9478a4..fa3576ef19da 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -456,6 +456,7 @@ struct pebs_xmm {
*/
#define IBS_OP_CUR_CNT (0xFFF80ULL<<32)
#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32)
+#define IBS_OP_CUR_CNT_EXT_MASK (0x7FULL<<52)
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 2551baec927d..d9a38d379d18 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,8 +25,9 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
-typedef void crash_vmclear_fn(void);
-extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
+typedef void (cpu_emergency_virt_cb)(void);
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_disable_virtualization(void);
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index cb0386fc4dc3..c648502e4535 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -4,6 +4,7 @@
#include <linux/thread_info.h>
#include <asm/nospec-branch.h>
+#include <asm/msr.h>
/*
* On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
@@ -76,6 +77,16 @@ static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
}
+/*
+ * This can be used in noinstr functions & should only be called in bare
+ * metal context.
+ */
+static __always_inline void __update_spec_ctrl(u64 val)
+{
+ __this_cpu_write(x86_spec_ctrl_current, val);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, val);
+}
+
#ifdef CONFIG_SMP
extern void speculative_store_bypass_ht_init(void);
#else
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index d1eb6bbfd39e..5abe6c3d982a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -228,7 +228,7 @@ void flush_tlb_multi(const struct cpumask *cpumask,
flush_tlb_mm_range((vma)->vm_mm, start, end, \
((vma)->vm_flags & VM_HUGETLB) \
? huge_page_shift(hstate_vma(vma)) \
- : PAGE_SHIFT, false)
+ : PAGE_SHIFT, true)
extern void flush_tlb_all(void);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 724ce44809ed..1b683bf71d14 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -70,16 +70,6 @@ static inline void __cpu_emergency_vmxoff(void)
cpu_vmxoff();
}
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
- if (cpu_has_vmx())
- __cpu_emergency_vmxoff();
-}
-
-
-
/*
* SVM functions:
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9b4eddd5833a..a54c4d3633e9 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -94,12 +94,13 @@ DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func);
#ifdef MODULE
#define __ADDRESSABLE_xen_hypercall
#else
-#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall)
+#define __ADDRESSABLE_xen_hypercall \
+ __stringify(.global STATIC_CALL_KEY(xen_hypercall);)
#endif
#define __HYPERCALL \
__ADDRESSABLE_xen_hypercall \
- "call __SCT__xen_hypercall"
+ __stringify(call STATIC_CALL_TRAMP(xen_hypercall))
#define __HYPERCALL_ENTRY(x) "a" (x)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 69f85e274611..3bc6be6404d2 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -18,6 +18,8 @@
#include <linux/mmu_context.h>
#include <linux/bsearch.h>
#include <linux/sync_core.h>
+#include <linux/moduleloader.h>
+#include <linux/cleanup.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -30,6 +32,8 @@
#include <asm/fixmap.h>
#include <asm/paravirt.h>
#include <asm/asm-prototypes.h>
+#include <asm/cfi.h>
+#include <asm/set_memory.h>
int __read_mostly alternatives_patched;
@@ -396,6 +400,212 @@ static int emit_indirect(int op, int reg, u8 *bytes)
return i;
}
+#ifdef CONFIG_MITIGATION_ITS
+
+#ifdef CONFIG_MODULES
+static struct module *its_mod;
+static void *its_page;
+static unsigned int its_offset;
+
+/* Initialize a thunk with the "jmp *reg; int3" instructions. */
+static void *its_init_thunk(void *thunk, int reg)
+{
+ u8 *bytes = thunk;
+ int i = 0;
+
+ if (reg >= 8) {
+ bytes[i++] = 0x41; /* REX.B prefix */
+ reg -= 8;
+ }
+ bytes[i++] = 0xff;
+ bytes[i++] = 0xe0 + reg; /* jmp *reg */
+ bytes[i++] = 0xcc;
+
+ return thunk;
+}
+
+void its_init_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ mutex_lock(&text_mutex);
+ its_mod = mod;
+ its_page = NULL;
+}
+
+void its_fini_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ WARN_ON_ONCE(its_mod != mod);
+
+ its_mod = NULL;
+ its_page = NULL;
+ mutex_unlock(&text_mutex);
+
+ for (int i = 0; i < mod->its_num_pages; i++) {
+ void *page = mod->its_page_array[i];
+ set_memory_ro((unsigned long)page, 1);
+ set_memory_x((unsigned long)page, 1);
+ }
+}
+
+void its_free_mod(struct module *mod)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return;
+
+ for (int i = 0; i < mod->its_num_pages; i++) {
+ void *page = mod->its_page_array[i];
+ module_memfree(page);
+ }
+ kfree(mod->its_page_array);
+}
+
+DEFINE_FREE(its_execmem, void *, if (_T) module_memfree(_T));
+
+static void *its_alloc(void)
+{
+ void *page __free(its_execmem) = module_alloc(PAGE_SIZE);
+
+ if (!page)
+ return NULL;
+
+ if (its_mod) {
+ void *tmp = krealloc(its_mod->its_page_array,
+ (its_mod->its_num_pages+1) * sizeof(void *),
+ GFP_KERNEL);
+ if (!tmp)
+ return NULL;
+
+ its_mod->its_page_array = tmp;
+ its_mod->its_page_array[its_mod->its_num_pages++] = page;
+ }
+
+ return no_free_ptr(page);
+}
+
+static void *its_allocate_thunk(int reg)
+{
+ int size = 3 + (reg / 8);
+ void *thunk;
+
+ if (!its_page || (its_offset + size - 1) >= PAGE_SIZE) {
+ its_page = its_alloc();
+ if (!its_page) {
+ pr_err("ITS page allocation failed\n");
+ return NULL;
+ }
+ memset(its_page, INT3_INSN_OPCODE, PAGE_SIZE);
+ its_offset = 32;
+ }
+
+ /*
+ * If the indirect branch instruction will be in the lower half
+ * of a cacheline, then update the offset to reach the upper half.
+ */
+ if ((its_offset + size - 1) % 64 < 32)
+ its_offset = ((its_offset - 1) | 0x3F) + 33;
+
+ thunk = its_page + its_offset;
+ its_offset += size;
+
+ set_memory_rw((unsigned long)its_page, 1);
+ thunk = its_init_thunk(thunk, reg);
+ set_memory_ro((unsigned long)its_page, 1);
+ set_memory_x((unsigned long)its_page, 1);
+
+ return thunk;
+}
+#else /* CONFIG_MODULES */
+
+static void *its_allocate_thunk(int reg)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_MODULES */
+
+static int __emit_trampoline(void *addr, struct insn *insn, u8 *bytes,
+ void *call_dest, void *jmp_dest)
+{
+ u8 op = insn->opcode.bytes[0];
+ int i = 0;
+
+ /*
+ * Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional
+ * tail-calls. Deal with them.
+ */
+ if (is_jcc32(insn)) {
+ bytes[i++] = op;
+ op = insn->opcode.bytes[1];
+ goto clang_jcc;
+ }
+
+ if (insn->length == 6)
+ bytes[i++] = 0x2e; /* CS-prefix */
+
+ switch (op) {
+ case CALL_INSN_OPCODE:
+ __text_gen_insn(bytes+i, op, addr+i,
+ call_dest,
+ CALL_INSN_SIZE);
+ i += CALL_INSN_SIZE;
+ break;
+
+ case JMP32_INSN_OPCODE:
+clang_jcc:
+ __text_gen_insn(bytes+i, op, addr+i,
+ jmp_dest,
+ JMP32_INSN_SIZE);
+ i += JMP32_INSN_SIZE;
+ break;
+
+ default:
+ WARN(1, "%pS %px %*ph\n", addr, addr, 6, addr);
+ return -1;
+ }
+
+ WARN_ON_ONCE(i != insn->length);
+
+ return i;
+}
+
+static int emit_its_trampoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+{
+ u8 *thunk = __x86_indirect_its_thunk_array[reg];
+ u8 *tmp = its_allocate_thunk(reg);
+
+ if (tmp)
+ thunk = tmp;
+
+ return __emit_trampoline(addr, insn, bytes, thunk, thunk);
+}
+
+/* Check if an indirect branch is at ITS-unsafe address */
+static bool cpu_wants_indirect_its_thunk_at(unsigned long addr, int reg)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS))
+ return false;
+
+ /* Indirect branch opcode is 2 or 3 bytes depending on reg */
+ addr += 1 + reg / 8;
+
+ /* Lower-half of the cacheline? */
+ return !(addr & 0x20);
+}
+
+u8 *its_static_thunk(int reg)
+{
+ u8 *thunk = __x86_indirect_its_thunk_array[reg];
+
+ return thunk;
+}
+
+#endif
+
/*
* Rewrite the compiler generated retpoline thunk calls.
*
@@ -466,6 +676,15 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
bytes[i++] = 0xe8; /* LFENCE */
}
+#ifdef CONFIG_MITIGATION_ITS
+ /*
+ * Check if the address of last byte of emitted-indirect is in
+ * lower-half of the cacheline. Such branches need ITS mitigation.
+ */
+ if (cpu_wants_indirect_its_thunk_at((unsigned long)addr + i, reg))
+ return emit_its_trampoline(addr, insn, reg, bytes);
+#endif
+
ret = emit_indirect(op, reg, bytes + i);
if (ret < 0)
return ret;
@@ -537,6 +756,21 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
#ifdef CONFIG_RETHUNK
+bool cpu_wants_rethunk(void)
+{
+ return cpu_feature_enabled(X86_FEATURE_RETHUNK);
+}
+
+bool cpu_wants_rethunk_at(void *addr)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ return false;
+ if (x86_return_thunk != its_return_thunk)
+ return true;
+
+ return !((unsigned long)addr & 0x20);
+}
+
/*
* Rewrite the compiler generated return thunk tail-calls.
*
@@ -552,13 +786,12 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
{
int i = 0;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
- if (x86_return_thunk == __x86_return_thunk)
- return -1;
-
+ /* Patch the custom return thunks... */
+ if (cpu_wants_rethunk_at(addr)) {
i = JMP32_INSN_SIZE;
__text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
} else {
+ /* ... or patch them out if not needed. */
bytes[i++] = RET_INSN_OPCODE;
}
@@ -914,6 +1147,8 @@ static noinline void __init int3_selftest(void)
void __init alternative_instructions(void)
{
+ u64 ibt;
+
int3_selftest();
/*
@@ -951,6 +1186,9 @@ void __init alternative_instructions(void)
*/
paravirt_set_cap();
+ /* Keep CET-IBT disabled until caller/callee are patched */
+ ibt = ibt_save();
+
/*
* First patch paravirt functions, such that we overwrite the indirect
* call with the direct call.
@@ -972,6 +1210,8 @@ void __init alternative_instructions(void)
apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
+ ibt_restore(ibt);
+
#ifdef CONFIG_SMP
/* Patch to UP if other cpus not imminent. */
if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 8992a6bce9f0..e5e7c43bf67b 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -342,7 +342,6 @@ bool __init early_is_amd_nb(u32 device)
struct resource *amd_get_mmconfig_range(struct resource *res)
{
- u32 address;
u64 base, msr;
unsigned int segn_busn_bits;
@@ -350,13 +349,11 @@ struct resource *amd_get_mmconfig_range(struct resource *res)
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
return NULL;
- /* assume all cpus from fam10h have mmconfig */
- if (boot_cpu_data.x86 < 0x10)
+ /* Assume CPUs from Fam10h have mmconfig, although not all VMs do */
+ if (boot_cpu_data.x86 < 0x10 ||
+ rdmsrl_safe(MSR_FAM10H_MMIO_CONF_BASE, &msr))
return NULL;
- address = MSR_FAM10H_MMIO_CONF_BASE;
- rdmsrl(address, msr);
-
/* mmconfig is not enabled */
if (!(msr & FAM10H_MMIO_CONF_ENABLE))
return NULL;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 37796a1d0715..2d71c329b347 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -553,6 +553,64 @@ static void early_init_amd_mc(struct cpuinfo_x86 *c)
#endif
}
+static bool amd_check_tsa_microcode(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ union zen_patch_rev p;
+ u32 min_rev = 0;
+
+ p.ext_fam = c->x86 - 0xf;
+ p.model = c->x86_model;
+ p.ext_model = c->x86_model >> 4;
+ p.stepping = c->x86_stepping;
+ /* reserved bits are expected to be 0 in test below */
+ p.__reserved = 0;
+
+ if (c->x86 == 0x19) {
+ switch (p.ucode_rev >> 8) {
+ case 0xa0011: min_rev = 0x0a0011d7; break;
+ case 0xa0012: min_rev = 0x0a00123b; break;
+ case 0xa0082: min_rev = 0x0a00820d; break;
+ case 0xa1011: min_rev = 0x0a10114c; break;
+ case 0xa1012: min_rev = 0x0a10124c; break;
+ case 0xa1081: min_rev = 0x0a108109; break;
+ case 0xa2010: min_rev = 0x0a20102e; break;
+ case 0xa2012: min_rev = 0x0a201211; break;
+ case 0xa4041: min_rev = 0x0a404108; break;
+ case 0xa5000: min_rev = 0x0a500012; break;
+ case 0xa6012: min_rev = 0x0a60120a; break;
+ case 0xa7041: min_rev = 0x0a704108; break;
+ case 0xa7052: min_rev = 0x0a705208; break;
+ case 0xa7080: min_rev = 0x0a708008; break;
+ case 0xa70c0: min_rev = 0x0a70c008; break;
+ case 0xaa002: min_rev = 0x0aa00216; break;
+ default:
+ pr_debug("%s: ucode_rev: 0x%x, current revision: 0x%x\n",
+ __func__, p.ucode_rev, c->microcode);
+ return false;
+ }
+ }
+
+ if (!min_rev)
+ return false;
+
+ return c->microcode >= min_rev;
+}
+
+static void tsa_init(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return;
+
+ if (c->x86 == 0x19) {
+ if (amd_check_tsa_microcode())
+ setup_force_cpu_cap(X86_FEATURE_VERW_CLEAR);
+ } else {
+ setup_force_cpu_cap(X86_FEATURE_TSA_SQ_NO);
+ setup_force_cpu_cap(X86_FEATURE_TSA_L1_NO);
+ }
+}
+
static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -620,6 +678,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
}
resctrl_cpu_detect(c);
+
+ tsa_init(c);
}
static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
@@ -663,6 +723,7 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
if (!(msr & MSR_K7_HWCR_SMMLOCK))
goto clear_sev;
+
return;
clear_all:
@@ -787,7 +848,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
* (model = 0x14) and later actually support it.
* (AMD Erratum #110, docId: 25759).
*/
- if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
+ if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM) && !cpu_has(c, X86_FEATURE_HYPERVISOR)) {
clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
if (!rdmsrl_amd_safe(0xc001100d, &value)) {
value &= ~BIT_64(32);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03221a060ae7..4fbb5b15ab75 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -48,6 +48,8 @@ static void __init srbds_select_mitigation(void);
static void __init l1d_flush_select_mitigation(void);
static void __init gds_select_mitigation(void);
static void __init srso_select_mitigation(void);
+static void __init its_select_mitigation(void);
+static void __init tsa_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
@@ -66,6 +68,13 @@ static DEFINE_MUTEX(spec_ctrl_mutex);
void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
+static void __init set_return_thunk(void *thunk)
+{
+ x86_return_thunk = thunk;
+
+ pr_info("active return thunk: %ps\n", thunk);
+}
+
/* Update SPEC_CTRL MSR and its cached copy unconditionally */
static void update_spec_ctrl(u64 val)
{
@@ -92,7 +101,7 @@ void update_spec_ctrl_cond(u64 val)
wrmsrl(MSR_IA32_SPEC_CTRL, val);
}
-u64 spec_ctrl_current(void)
+noinstr u64 spec_ctrl_current(void)
{
return this_cpu_read(x86_spec_ctrl_current);
}
@@ -112,9 +121,9 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-/* Control MDS CPU buffer clear before idling (halt, mwait) */
-DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
-EXPORT_SYMBOL_GPL(mds_idle_clear);
+/* Control CPU buffer clear before idling (halt, mwait) */
+DEFINE_STATIC_KEY_FALSE(cpu_buf_idle_clear);
+EXPORT_SYMBOL_GPL(cpu_buf_idle_clear);
/*
* Controls whether l1d flush based mitigations are enabled,
@@ -174,6 +183,8 @@ void __init cpu_select_mitigations(void)
*/
srso_select_mitigation();
gds_select_mitigation();
+ its_select_mitigation();
+ tsa_select_mitigation();
}
/*
@@ -434,7 +445,7 @@ static void __init mmio_select_mitigation(void)
* is required irrespective of SMT state.
*/
if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
- static_branch_enable(&mds_idle_clear);
+ static_branch_enable(&cpu_buf_idle_clear);
/*
* Check if the system has the right microcode.
@@ -1081,7 +1092,7 @@ do_cmd_auto:
setup_force_cpu_cap(X86_FEATURE_UNRET);
if (IS_ENABLED(CONFIG_RETHUNK))
- x86_return_thunk = retbleed_return_thunk;
+ set_return_thunk(retbleed_return_thunk);
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
@@ -1143,6 +1154,116 @@ do_cmd_auto:
}
#undef pr_fmt
+#define pr_fmt(fmt) "ITS: " fmt
+
+enum its_mitigation_cmd {
+ ITS_CMD_OFF,
+ ITS_CMD_ON,
+ ITS_CMD_VMEXIT,
+};
+
+enum its_mitigation {
+ ITS_MITIGATION_OFF,
+ ITS_MITIGATION_VMEXIT_ONLY,
+ ITS_MITIGATION_ALIGNED_THUNKS,
+};
+
+static const char * const its_strings[] = {
+ [ITS_MITIGATION_OFF] = "Vulnerable",
+ [ITS_MITIGATION_VMEXIT_ONLY] = "Mitigation: Vulnerable, KVM: Not affected",
+ [ITS_MITIGATION_ALIGNED_THUNKS] = "Mitigation: Aligned branch/return thunks",
+};
+
+static enum its_mitigation its_mitigation __ro_after_init = ITS_MITIGATION_ALIGNED_THUNKS;
+
+static enum its_mitigation_cmd its_cmd __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_ITS) ? ITS_CMD_ON : ITS_CMD_OFF;
+
+static int __init its_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!IS_ENABLED(CONFIG_MITIGATION_ITS)) {
+ pr_err("Mitigation disabled at compile time, ignoring option (%s)", str);
+ return 0;
+ }
+
+ if (!strcmp(str, "off")) {
+ its_cmd = ITS_CMD_OFF;
+ } else if (!strcmp(str, "on")) {
+ its_cmd = ITS_CMD_ON;
+ } else if (!strcmp(str, "force")) {
+ its_cmd = ITS_CMD_ON;
+ setup_force_cpu_bug(X86_BUG_ITS);
+ } else if (!strcmp(str, "vmexit")) {
+ its_cmd = ITS_CMD_VMEXIT;
+ } else {
+ pr_err("Ignoring unknown indirect_target_selection option (%s).", str);
+ }
+
+ return 0;
+}
+early_param("indirect_target_selection", its_parse_cmdline);
+
+static void __init its_select_mitigation(void)
+{
+ enum its_mitigation_cmd cmd = its_cmd;
+
+ if (!boot_cpu_has_bug(X86_BUG_ITS) || cpu_mitigations_off()) {
+ its_mitigation = ITS_MITIGATION_OFF;
+ return;
+ }
+
+ /* Exit early to avoid irrelevant warnings */
+ if (cmd == ITS_CMD_OFF) {
+ its_mitigation = ITS_MITIGATION_OFF;
+ goto out;
+ }
+ if (spectre_v2_enabled == SPECTRE_V2_NONE) {
+ pr_err("WARNING: Spectre-v2 mitigation is off, disabling ITS\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ goto out;
+ }
+ if (!IS_ENABLED(CONFIG_RETPOLINE) || !IS_ENABLED(CONFIG_RETHUNK)) {
+ pr_err("WARNING: ITS mitigation depends on retpoline and rethunk support\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ goto out;
+ }
+ if (IS_ENABLED(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)) {
+ pr_err("WARNING: ITS mitigation is not compatible with CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ goto out;
+ }
+ if (boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) {
+ pr_err("WARNING: ITS mitigation is not compatible with lfence mitigation\n");
+ its_mitigation = ITS_MITIGATION_OFF;
+ goto out;
+ }
+
+ switch (cmd) {
+ case ITS_CMD_OFF:
+ its_mitigation = ITS_MITIGATION_OFF;
+ break;
+ case ITS_CMD_VMEXIT:
+ if (boot_cpu_has_bug(X86_BUG_ITS_NATIVE_ONLY)) {
+ its_mitigation = ITS_MITIGATION_VMEXIT_ONLY;
+ goto out;
+ }
+ fallthrough;
+ case ITS_CMD_ON:
+ its_mitigation = ITS_MITIGATION_ALIGNED_THUNKS;
+ if (!boot_cpu_has(X86_FEATURE_RETPOLINE))
+ setup_force_cpu_cap(X86_FEATURE_INDIRECT_THUNK_ITS);
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ set_return_thunk(its_return_thunk);
+ break;
+ }
+out:
+ pr_info("%s\n", its_strings[its_mitigation]);
+}
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
@@ -1553,7 +1674,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void)
rrsba_disabled = true;
}
-static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
+static void __init spectre_v2_select_rsb_mitigation(enum spectre_v2_mitigation mode)
{
/*
* Similar to context switches, there are two types of RSB attacks
@@ -1577,27 +1698,30 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
*/
switch (mode) {
case SPECTRE_V2_NONE:
- return;
+ break;
- case SPECTRE_V2_EIBRS_LFENCE:
case SPECTRE_V2_EIBRS:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
- setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n");
+ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE);
}
- return;
+ break;
- case SPECTRE_V2_EIBRS_RETPOLINE:
case SPECTRE_V2_RETPOLINE:
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_IBRS:
+ pr_info("Spectre v2 / SpectreRSB: Filling RSB on context switch and VMEXIT\n");
+ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
- pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n");
- return;
- }
+ break;
- pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit");
- dump_stack();
+ default:
+ pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation\n");
+ dump_stack();
+ break;
+ }
}
/*
@@ -1653,10 +1777,11 @@ static void __init bhi_select_mitigation(void)
return;
}
- if (spec_ctrl_bhi_dis())
+ if (!IS_ENABLED(CONFIG_X86_64))
return;
- if (!IS_ENABLED(CONFIG_X86_64))
+ /* Mitigate in hardware if supported */
+ if (spec_ctrl_bhi_dis())
return;
/* Mitigate KVM by default */
@@ -1822,10 +1947,7 @@ static void __init spectre_v2_select_mitigation(void)
*
* FIXME: Is this pointless for retbleed-affected AMD?
*/
- setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
- pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
-
- spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
+ spectre_v2_select_rsb_mitigation(mode);
/*
* Retpoline protects the kernel, but doesn't protect firmware. IBRS
@@ -1907,10 +2029,10 @@ static void update_mds_branch_idle(void)
return;
if (sched_smt_active()) {
- static_branch_enable(&mds_idle_clear);
+ static_branch_enable(&cpu_buf_idle_clear);
} else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
- static_branch_disable(&mds_idle_clear);
+ static_branch_disable(&cpu_buf_idle_clear);
}
}
@@ -1918,6 +2040,94 @@ static void update_mds_branch_idle(void)
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
+#undef pr_fmt
+#define pr_fmt(fmt) "Transient Scheduler Attacks: " fmt
+
+enum tsa_mitigations {
+ TSA_MITIGATION_NONE,
+ TSA_MITIGATION_UCODE_NEEDED,
+ TSA_MITIGATION_USER_KERNEL,
+ TSA_MITIGATION_VM,
+ TSA_MITIGATION_FULL,
+};
+
+static const char * const tsa_strings[] = {
+ [TSA_MITIGATION_NONE] = "Vulnerable",
+ [TSA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",
+ [TSA_MITIGATION_USER_KERNEL] = "Mitigation: Clear CPU buffers: user/kernel boundary",
+ [TSA_MITIGATION_VM] = "Mitigation: Clear CPU buffers: VM",
+ [TSA_MITIGATION_FULL] = "Mitigation: Clear CPU buffers",
+};
+
+static enum tsa_mitigations tsa_mitigation __ro_after_init =
+ IS_ENABLED(CONFIG_MITIGATION_TSA) ? TSA_MITIGATION_FULL : TSA_MITIGATION_NONE;
+
+static int __init tsa_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ tsa_mitigation = TSA_MITIGATION_NONE;
+ else if (!strcmp(str, "on"))
+ tsa_mitigation = TSA_MITIGATION_FULL;
+ else if (!strcmp(str, "user"))
+ tsa_mitigation = TSA_MITIGATION_USER_KERNEL;
+ else if (!strcmp(str, "vm"))
+ tsa_mitigation = TSA_MITIGATION_VM;
+ else
+ pr_err("Ignoring unknown tsa=%s option.\n", str);
+
+ return 0;
+}
+early_param("tsa", tsa_parse_cmdline);
+
+static void __init tsa_select_mitigation(void)
+{
+ if (tsa_mitigation == TSA_MITIGATION_NONE)
+ return;
+
+ if (cpu_mitigations_off() || !boot_cpu_has_bug(X86_BUG_TSA)) {
+ tsa_mitigation = TSA_MITIGATION_NONE;
+ return;
+ }
+
+ if (!boot_cpu_has(X86_FEATURE_VERW_CLEAR))
+ tsa_mitigation = TSA_MITIGATION_UCODE_NEEDED;
+
+ switch (tsa_mitigation) {
+ case TSA_MITIGATION_USER_KERNEL:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ break;
+
+ case TSA_MITIGATION_VM:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM);
+ break;
+
+ case TSA_MITIGATION_UCODE_NEEDED:
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ goto out;
+
+ pr_notice("Forcing mitigation on in a VM\n");
+
+ /*
+ * On the off-chance that microcode has been updated
+ * on the host, enable the mitigation in the guest just
+ * in case.
+ */
+ fallthrough;
+ case TSA_MITIGATION_FULL:
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF_VM);
+ break;
+ default:
+ break;
+ }
+
+out:
+ pr_info("%s\n", tsa_strings[tsa_mitigation]);
+}
+
void cpu_bugs_smt_update(void)
{
mutex_lock(&spec_ctrl_mutex);
@@ -1971,6 +2181,24 @@ void cpu_bugs_smt_update(void)
break;
}
+ switch (tsa_mitigation) {
+ case TSA_MITIGATION_USER_KERNEL:
+ case TSA_MITIGATION_VM:
+ case TSA_MITIGATION_FULL:
+ case TSA_MITIGATION_UCODE_NEEDED:
+ /*
+ * TSA-SQ can potentially lead to info leakage between
+ * SMT threads.
+ */
+ if (sched_smt_active())
+ static_branch_enable(&cpu_buf_idle_clear);
+ else
+ static_branch_disable(&cpu_buf_idle_clear);
+ break;
+ case TSA_MITIGATION_NONE:
+ break;
+ }
+
mutex_unlock(&spec_ctrl_mutex);
}
@@ -2591,10 +2819,10 @@ static void __init srso_select_mitigation(void)
if (boot_cpu_data.x86 == 0x19) {
setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
- x86_return_thunk = srso_alias_return_thunk;
+ set_return_thunk(srso_alias_return_thunk);
} else {
setup_force_cpu_cap(X86_FEATURE_SRSO);
- x86_return_thunk = srso_return_thunk;
+ set_return_thunk(srso_return_thunk);
}
srso_mitigation = SRSO_MITIGATION_SAFE_RET;
} else {
@@ -2774,6 +3002,11 @@ static ssize_t rfds_show_state(char *buf)
return sysfs_emit(buf, "%s\n", rfds_strings[rfds_mitigation]);
}
+static ssize_t its_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", its_strings[its_mitigation]);
+}
+
static char *stibp_state(void)
{
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
@@ -2900,6 +3133,11 @@ static ssize_t srso_show_state(char *buf)
boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) ? "" : ", no microcode");
}
+static ssize_t tsa_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", tsa_strings[tsa_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -2958,6 +3196,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_RFDS:
return rfds_show_state(buf);
+ case X86_BUG_ITS:
+ return its_show_state(buf);
+
+ case X86_BUG_TSA:
+ return tsa_show_state(buf);
+
default:
break;
}
@@ -3037,4 +3281,14 @@ ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attrib
{
return cpu_show_common(dev, attr, buf, X86_BUG_RFDS);
}
+
+ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_ITS);
+}
+
+ssize_t cpu_show_tsa(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_TSA);
+}
#endif
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 66556833d7af..7cf43c6fb59d 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -801,7 +801,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
/* If bit 31 is set, this is an unknown format */
- for (j = 0 ; j < 3 ; j++)
+ for (j = 0 ; j < 4 ; j++)
if (regs[j] & (1 << 31))
regs[j] = 0;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 62aaabea7e33..9c849a4160cd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1045,17 +1045,18 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_D_1_EAX] = eax;
}
- /* AMD-defined flags: level 0x80000001 */
+ /*
+ * Check if extended CPUID leaves are implemented: Max extended
+ * CPUID leaf must be in the 0x80000001-0x8000ffff range.
+ */
eax = cpuid_eax(0x80000000);
- c->extended_cpuid_level = eax;
+ c->extended_cpuid_level = ((eax & 0xffff0000) == 0x80000000) ? eax : 0;
- if ((eax & 0xffff0000) == 0x80000000) {
- if (eax >= 0x80000001) {
- cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+ if (c->extended_cpuid_level >= 0x80000001) {
+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
- c->x86_capability[CPUID_8000_0001_ECX] = ecx;
- c->x86_capability[CPUID_8000_0001_EDX] = edx;
- }
+ c->x86_capability[CPUID_8000_0001_ECX] = ecx;
+ c->x86_capability[CPUID_8000_0001_EDX] = edx;
}
if (c->extended_cpuid_level >= 0x80000007) {
@@ -1251,6 +1252,12 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define GDS BIT(6)
/* CPU is affected by Register File Data Sampling */
#define RFDS BIT(7)
+/* CPU is affected by Indirect Target Selection */
+#define ITS BIT(8)
+/* CPU is affected by Indirect Target Selection, but guest-host isolation is not affected */
+#define ITS_NATIVE_ONLY BIT(9)
+/* CPU is affected by Transient Scheduler Attacks */
+#define TSA BIT(10)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
@@ -1262,22 +1269,25 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x0, 0x5), MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | ITS),
VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xb), MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | ITS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xc), MMIO | RETBLEED | GDS | SRBDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS | ITS),
VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS),
- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS),
- VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
- VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS),
- VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED | ITS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS | ITS),
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS | ITS | ITS_NATIVE_ONLY),
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | ITS | ITS_NATIVE_ONLY),
VULNBL_INTEL_STEPPINGS(ALDERLAKE, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(ALDERLAKE_L, X86_STEPPING_ANY, RFDS),
VULNBL_INTEL_STEPPINGS(RAPTORLAKE, X86_STEPPING_ANY, RFDS),
@@ -1295,7 +1305,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_AMD(0x16, RETBLEED),
VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
VULNBL_HYGON(0x18, RETBLEED | SMT_RSB | SRSO),
- VULNBL_AMD(0x19, SRSO),
+ VULNBL_AMD(0x19, SRSO | TSA),
{}
};
@@ -1341,6 +1351,32 @@ static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
return cpu_matches(cpu_vuln_blacklist, RFDS);
}
+static bool __init vulnerable_to_its(u64 x86_arch_cap_msr)
+{
+ /* The "immunity" bit trumps everything else: */
+ if (x86_arch_cap_msr & ARCH_CAP_ITS_NO)
+ return false;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ /* None of the affected CPUs have BHI_CTRL */
+ if (boot_cpu_has(X86_FEATURE_BHI_CTRL))
+ return false;
+
+ /*
+ * If a VMM did not expose ITS_NO, assume that a guest could
+ * be running on a vulnerable hardware or may migrate to such
+ * hardware.
+ */
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return true;
+
+ if (cpu_matches(cpu_vuln_blacklist, ITS))
+ return true;
+
+ return false;
+}
+
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
@@ -1455,9 +1491,12 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (vulnerable_to_rfds(x86_arch_cap_msr))
setup_force_cpu_bug(X86_BUG_RFDS);
- /* When virtualized, eIBRS could be hidden, assume vulnerable */
- if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
- !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+ /*
+ * Intel parts with eIBRS are vulnerable to BHI attacks. Parts with
+ * BHI_NO still need to use the BHI mitigation to prevent Intra-mode
+ * attacks. When virtualized, eIBRS could be hidden, assume vulnerable.
+ */
+ if (!cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
(boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
setup_force_cpu_bug(X86_BUG_BHI);
@@ -1465,6 +1504,22 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET))
setup_force_cpu_bug(X86_BUG_IBPB_NO_RET);
+ if (vulnerable_to_its(x86_arch_cap_msr)) {
+ setup_force_cpu_bug(X86_BUG_ITS);
+ if (cpu_matches(cpu_vuln_blacklist, ITS_NATIVE_ONLY))
+ setup_force_cpu_bug(X86_BUG_ITS_NATIVE_ONLY);
+ }
+
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ if (!cpu_has(c, X86_FEATURE_TSA_SQ_NO) ||
+ !cpu_has(c, X86_FEATURE_TSA_L1_NO)) {
+ if (cpu_matches(cpu_vuln_blacklist, TSA) ||
+ /* Enable bug on Zen guests to allow for live migration. */
+ (cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_ZEN)))
+ setup_force_cpu_bug(X86_BUG_TSA);
+ }
+ }
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 8a80d5343f3a..4f69e85bc255 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -14,6 +14,7 @@
#include <asm/cacheinfo.h>
#include <asm/spec-ctrl.h>
#include <asm/delay.h>
+#include <asm/resctrl.h>
#include "cpu.h"
@@ -239,6 +240,8 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c)
x86_amd_ls_cfg_ssbd_mask = 1ULL << 10;
}
}
+
+ resctrl_cpu_detect(c);
}
static void early_init_hygon(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 32bd64017047..2c43a1423b09 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -784,26 +784,37 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
}
#endif
-#define TLB_INST_4K 0x01
-#define TLB_INST_4M 0x02
-#define TLB_INST_2M_4M 0x03
+#define TLB_INST_4K 0x01
+#define TLB_INST_4M 0x02
+#define TLB_INST_2M_4M 0x03
-#define TLB_INST_ALL 0x05
-#define TLB_INST_1G 0x06
+#define TLB_INST_ALL 0x05
+#define TLB_INST_1G 0x06
-#define TLB_DATA_4K 0x11
-#define TLB_DATA_4M 0x12
-#define TLB_DATA_2M_4M 0x13
-#define TLB_DATA_4K_4M 0x14
+#define TLB_DATA_4K 0x11
+#define TLB_DATA_4M 0x12
+#define TLB_DATA_2M_4M 0x13
+#define TLB_DATA_4K_4M 0x14
-#define TLB_DATA_1G 0x16
+#define TLB_DATA_1G 0x16
+#define TLB_DATA_1G_2M_4M 0x17
-#define TLB_DATA0_4K 0x21
-#define TLB_DATA0_4M 0x22
-#define TLB_DATA0_2M_4M 0x23
+#define TLB_DATA0_4K 0x21
+#define TLB_DATA0_4M 0x22
+#define TLB_DATA0_2M_4M 0x23
-#define STLB_4K 0x41
-#define STLB_4K_2M 0x42
+#define STLB_4K 0x41
+#define STLB_4K_2M 0x42
+
+/*
+ * All of leaf 0x2's one-byte TLB descriptors implies the same number of
+ * entries for their respective TLB types. The 0x63 descriptor is an
+ * exception: it implies 4 dTLB entries for 1GB pages 32 dTLB entries
+ * for 2MB or 4MB pages. Encode descriptor 0x63 dTLB entry count for
+ * 2MB/4MB pages here, as its count for dTLB 1GB pages is already at the
+ * intel_tlb_table[] mapping.
+ */
+#define TLB_0x63_2M_4M_ENTRIES 32
static const struct _tlb_table intel_tlb_table[] = {
{ 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
@@ -825,7 +836,8 @@ static const struct _tlb_table intel_tlb_table[] = {
{ 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" },
{ 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" },
{ 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" },
- { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" },
+ { 0x63, TLB_DATA_1G_2M_4M, 4, " TLB_DATA 1 GByte pages, 4-way set associative"
+ " (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here)" },
{ 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" },
{ 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" },
{ 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" },
@@ -925,6 +937,12 @@ static void intel_tlb_lookup(const unsigned char desc)
if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries)
tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries;
break;
+ case TLB_DATA_1G_2M_4M:
+ if (tlb_lld_2m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES)
+ tlb_lld_2m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES;
+ if (tlb_lld_4m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES)
+ tlb_lld_4m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES;
+ fallthrough;
case TLB_DATA_1G:
if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries)
tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries;
@@ -948,7 +966,7 @@ static void intel_detect_tlb(struct cpuinfo_x86 *c)
cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
/* If bit 31 is set, this is an unknown format */
- for (j = 0 ; j < 3 ; j++)
+ for (j = 0 ; j < 4 ; j++)
if (regs[j] & (1 << 31))
regs[j] = 0;
@@ -1186,7 +1204,13 @@ static void __split_lock_reenable(struct work_struct *work)
{
sld_update_msr(true);
}
-static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable);
+/*
+ * In order for each CPU to schedule its delayed work independently of the
+ * others, delayed work struct must be per-CPU. This is not required when
+ * sysctl_sld_mitigate is enabled because of the semaphore that limits
+ * the number of simultaneously scheduled delayed works to 1.
+ */
+static DEFINE_PER_CPU(struct delayed_work, sl_reenable);
/*
* If a CPU goes offline with pending delayed work to re-enable split lock
@@ -1207,7 +1231,7 @@ static int splitlock_cpu_offline(unsigned int cpu)
static void split_lock_warn(unsigned long ip)
{
- struct delayed_work *work;
+ struct delayed_work *work = NULL;
int cpu;
if (!current->reported_split_lock)
@@ -1229,11 +1253,17 @@ static void split_lock_warn(unsigned long ip)
if (down_interruptible(&buslock_sem) == -EINTR)
return;
work = &sl_reenable_unlock;
- } else {
- work = &sl_reenable;
}
cpu = get_cpu();
+
+ if (!work) {
+ work = this_cpu_ptr(&sl_reenable);
+ /* Deferred initialization of per-CPU struct */
+ if (!work->work.func)
+ INIT_DELAYED_WORK(work, __split_lock_reenable);
+ }
+
schedule_delayed_work_on(cpu, work, 2);
/* Disable split lock detection on this CPU to make progress */
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 991f38f57caf..9939b984bceb 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -333,7 +333,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
struct thresh_restart {
struct threshold_block *b;
- int reset;
int set_lvt_off;
int lvt_off;
u16 old_limit;
@@ -428,13 +427,13 @@ static void threshold_restart_bank(void *_tr)
rdmsr(tr->b->address, lo, hi);
- if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
- tr->reset = 1; /* limit cannot be lower than err count */
-
- if (tr->reset) { /* reset err count and overflow bit */
- hi =
- (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
- (THRESHOLD_MAX - tr->b->threshold_limit);
+ /*
+ * Reset error count and overflow bit.
+ * This is done during init or after handling an interrupt.
+ */
+ if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) {
+ hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI);
+ hi |= THRESHOLD_MAX - tr->b->threshold_limit;
} else if (tr->old_limit) { /* change limit w/o reset */
int new_count = (hi & THRESHOLD_MAX) +
(tr->old_limit - tr->b->threshold_limit);
@@ -1053,13 +1052,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
}
bank_type = smca_get_bank_type(cpu, bank);
- if (bank_type >= N_SMCA_BANK_TYPES)
- return NULL;
if (b && bank_type == SMCA_UMC) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
- return NULL;
+ }
+
+ if (b && b->block) {
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block);
+ return buf_mcatype;
+ }
+
+ if (bank_type >= N_SMCA_BANK_TYPES) {
+ snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank);
+ return buf_mcatype;
}
if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 359218bc1b34..e65dfc88b925 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2707,15 +2707,9 @@ static int mce_cpu_dead(unsigned int cpu)
static int mce_cpu_online(unsigned int cpu)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
- int ret;
mce_device_create(cpu);
-
- ret = mce_threshold_create_device(cpu);
- if (ret) {
- mce_device_remove(cpu);
- return ret;
- }
+ mce_threshold_create_device(cpu);
mce_reenable_cpu();
mce_start_timer(t);
return 0;
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 95275a5e57e0..7d0fb5e0b32f 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -500,6 +500,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
+ cmci_clear();
}
bool intel_filter_mce(struct mce *m)
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 9a3092ec9b27..b8d6e616fc50 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -867,7 +867,7 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz
return ret;
}
- for_each_node(nid) {
+ for_each_node_with_cpus(nid) {
cpu = cpumask_first(cpumask_of_node(nid));
c = &cpu_data(cpu);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 6090fe513d40..100fed2afbe7 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -16,7 +16,6 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/kexec.h>
-#include <linux/i8253.h>
#include <linux/random.h>
#include <linux/swiotlb.h>
#include <asm/processor.h>
@@ -461,16 +460,6 @@ static void __init ms_hyperv_init_platform(void)
if (efi_enabled(EFI_BOOT))
x86_platform.get_nmi_reason = hv_get_nmi_reason;
- /*
- * Hyper-V VMs have a PIT emulation quirk such that zeroing the
- * counter register during PIT shutdown restarts the PIT. So it
- * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
- * to false tells pit_shutdown() not to zero the counter so that
- * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
- * and setting this value has no effect.
- */
- i8253_clear_counter_on_shutdown = false;
-
#if IS_ENABLED(CONFIG_HYPERV)
/*
* Setup the hook to get control post apic initialization.
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 558108296f3c..31549e7f6b7c 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -349,7 +349,7 @@ static void get_fixed_ranges(mtrr_type *frs)
void mtrr_save_fixed_ranges(void *info)
{
- if (boot_cpu_has(X86_FEATURE_MTRR))
+ if (mtrr_state.have_fixed)
get_fixed_ranges(mtrr_state.fixed_ranges);
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 28c357cf7c75..0d019e6972be 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -45,6 +45,9 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
+ { X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 },
+ { X86_FEATURE_TSA_SQ_NO, CPUID_ECX, 1, 0x80000021, 0 },
+ { X86_FEATURE_TSA_L1_NO, CPUID_ECX, 2, 0x80000021, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
index aa9b8b868867..afccb69cd9a2 100644
--- a/arch/x86/kernel/cpu/sgx/driver.c
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -150,13 +150,15 @@ int __init sgx_drv_init(void)
u64 xfrm_mask;
int ret;
- if (!cpu_feature_enabled(X86_FEATURE_SGX_LC))
+ if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
+ pr_info("SGX disabled: SGX launch control CPU feature is not available, /dev/sgx_enclave disabled.\n");
return -ENODEV;
+ }
cpuid_count(SGX_CPUID, 0, &eax, &ebx, &ecx, &edx);
if (!(eax & 1)) {
- pr_err("SGX disabled: SGX1 instruction support not available.\n");
+ pr_info("SGX disabled: SGX1 instruction support not available, /dev/sgx_enclave disabled.\n");
return -ENODEV;
}
@@ -173,8 +175,10 @@ int __init sgx_drv_init(void)
}
ret = misc_register(&sgx_dev_enclave);
- if (ret)
+ if (ret) {
+ pr_info("SGX disabled: Unable to register the /dev/sgx_enclave driver (%d).\n", ret);
return ret;
+ }
return 0;
}
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index da8b8ea6b063..9634ac0fef1d 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -64,6 +64,13 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
struct file *backing;
long ret;
+ /*
+ * ECREATE would detect this too, but checking here also ensures
+ * that the 'encl_size' calculations below can never overflow.
+ */
+ if (!is_power_of_2(secs->size))
+ return -EINVAL;
+
va_page = sgx_encl_grow(encl, true);
if (IS_ERR(va_page))
return PTR_ERR(va_page);
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index c4960b8e5195..b86eb601827b 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -718,6 +718,8 @@ int arch_memory_failure(unsigned long pfn, int flags)
goto out;
}
+ sgx_unmark_page_reclaimable(page);
+
/*
* TBD: Add additional plumbing to enable pre-emptive
* action for asynchronous poison notification. Until
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f18ca44c904b..52dc5839d1e8 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -195,6 +195,7 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk("%sCall Trace:\n", log_lvl);
unwind_start(&state, task, regs, stack);
+ stack = stack ?: get_stack_pointer(task, regs);
regs = unwind_get_entry_regs(&state, &partial);
/*
@@ -213,9 +214,7 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* - hardirq stack
* - entry stack
*/
- for (stack = stack ?: get_stack_pointer(task, regs);
- stack;
- stack = stack_info.next_sp) {
+ for (; stack; stack = stack_info.next_sp) {
const char *stack_name;
stack = PTR_ALIGN(stack, sizeof(long));
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 993734e96615..b87a3f8ccb32 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
void __init e820__register_nosave_regions(unsigned long limit_pfn)
{
int i;
- unsigned long pfn = 0;
+ u64 last_addr = 0;
for (i = 0; i < e820_table->nr_entries; i++) {
struct e820_entry *entry = &e820_table->entries[i];
- if (pfn < PFN_UP(entry->addr))
- register_nosave_region(pfn, PFN_UP(entry->addr));
-
- pfn = PFN_DOWN(entry->addr + entry->size);
-
if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
- register_nosave_region(PFN_UP(entry->addr), pfn);
+ continue;
- if (pfn >= limit_pfn)
- break;
+ if (last_addr < entry->addr)
+ register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr));
+
+ last_addr = entry->addr + entry->size;
}
+
+ register_nosave_region(PFN_DOWN(last_addr), limit_pfn);
}
#ifdef CONFIG_ACPI
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index f1446f532b17..80ba0f81a44a 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -220,7 +220,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
struct fpstate *fpstate;
unsigned int size;
- size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
+ size = fpu_kernel_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
fpstate = vzalloc(size);
if (!fpstate)
return false;
@@ -232,8 +232,8 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
fpstate->is_guest = true;
gfpu->fpstate = fpstate;
- gfpu->xfeatures = fpu_user_cfg.default_features;
- gfpu->perm = fpu_user_cfg.default_features;
+ gfpu->xfeatures = fpu_kernel_cfg.default_features;
+ gfpu->perm = fpu_kernel_cfg.default_features;
/*
* KVM sets the FP+SSE bits in the XSAVE header when copying FPU state
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ec51ce713dea..48cf91625a16 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -360,7 +360,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
goto fail;
ip = trampoline + size;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ if (cpu_wants_rethunk_at(ip))
__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
else
memcpy(ip, retq, sizeof(retq));
@@ -415,8 +415,6 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/* ALLOC_TRAMP flags lets us know we created it */
ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
- set_vm_flush_reset_perms(trampoline);
-
if (likely(system_state != SYSTEM_BOOTING))
set_memory_ro((unsigned long)trampoline, npages);
set_memory_x((unsigned long)trampoline, npages);
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 80e262bb627f..cb9852ad6098 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -46,7 +46,8 @@ bool __init pit_timer_init(void)
* VMMs otherwise steal CPU time just to pointlessly waggle
* the (masked) IRQ.
*/
- clockevent_i8253_disable();
+ scoped_guard(irq)
+ clockevent_i8253_disable();
return false;
}
clockevent_i8253_init(true);
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index e2fab3ceb09f..9a101150376d 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -33,8 +33,9 @@ void io_bitmap_share(struct task_struct *tsk)
set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
}
-static void task_update_io_bitmap(struct task_struct *tsk)
+static void task_update_io_bitmap(void)
{
+ struct task_struct *tsk = current;
struct thread_struct *t = &tsk->thread;
if (t->iopl_emul == 3 || t->io_bitmap) {
@@ -54,7 +55,12 @@ void io_bitmap_exit(struct task_struct *tsk)
struct io_bitmap *iobm = tsk->thread.io_bitmap;
tsk->thread.io_bitmap = NULL;
- task_update_io_bitmap(tsk);
+ /*
+ * Don't touch the TSS when invoked on a failed fork(). TSS
+ * reflects the state of @current and not the state of @tsk.
+ */
+ if (tsk == current)
+ task_update_io_bitmap();
if (iobm && refcount_dec_and_test(&iobm->refcnt))
kfree(iobm);
}
@@ -192,8 +198,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
}
t->iopl_emul = level;
- task_update_io_bitmap(current);
-
+ task_update_io_bitmap();
return 0;
}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 766ffe3ba313..439fdb3f5fdf 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -23,8 +23,10 @@
#include <asm/traps.h>
#include <asm/thermal.h>
+#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR)
#define CREATE_TRACE_POINTS
#include <asm/trace/irq_vectors.h>
+#endif
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 991f00c817e6..180c708879d2 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -427,7 +427,6 @@ void *alloc_insn_page(void)
if (!page)
return NULL;
- set_vm_flush_reset_perms(page);
/*
* First make the page read-only, and only then make it executable to
* prevent it from being W+X in between.
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 5d61a342871b..24b6eaacc81e 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -149,8 +149,7 @@ static void free_transition_pgtable(struct kimage *image)
image->arch.pte = NULL;
}
-static int init_transition_pgtable(struct kimage *image, pgd_t *pgd,
- unsigned long control_page)
+static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
{
pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
unsigned long vaddr, paddr;
@@ -161,7 +160,7 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd,
pte_t *pte;
vaddr = (unsigned long)relocate_kernel;
- paddr = control_page;
+ paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
pgd += pgd_index(vaddr);
if (!pgd_present(*pgd)) {
p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL);
@@ -220,7 +219,7 @@ static void *alloc_pgt_page(void *data)
return p;
}
-static int init_pgtable(struct kimage *image, unsigned long control_page)
+static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
{
struct x86_mapping_info info = {
.alloc_pgt_page = alloc_pgt_page,
@@ -229,12 +228,12 @@ static int init_pgtable(struct kimage *image, unsigned long control_page)
.kernpg_flag = _KERNPG_TABLE_NOENC,
};
unsigned long mstart, mend;
+ pgd_t *level4p;
int result;
int i;
- image->arch.pgd = alloc_pgt_page(image);
- if (!image->arch.pgd)
- return -ENOMEM;
+ level4p = (pgd_t *)__va(start_pgtable);
+ clear_page(level4p);
if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
info.page_flag |= _PAGE_ENC;
@@ -248,8 +247,8 @@ static int init_pgtable(struct kimage *image, unsigned long control_page)
mstart = pfn_mapped[i].start << PAGE_SHIFT;
mend = pfn_mapped[i].end << PAGE_SHIFT;
- result = kernel_ident_mapping_init(&info, image->arch.pgd,
- mstart, mend);
+ result = kernel_ident_mapping_init(&info,
+ level4p, mstart, mend);
if (result)
return result;
}
@@ -264,8 +263,8 @@ static int init_pgtable(struct kimage *image, unsigned long control_page)
mstart = image->segment[i].mem;
mend = mstart + image->segment[i].memsz;
- result = kernel_ident_mapping_init(&info, image->arch.pgd,
- mstart, mend);
+ result = kernel_ident_mapping_init(&info,
+ level4p, mstart, mend);
if (result)
return result;
@@ -275,19 +274,15 @@ static int init_pgtable(struct kimage *image, unsigned long control_page)
* Prepare EFI systab and ACPI tables for kexec kernel since they are
* not covered by pfn_mapped.
*/
- result = map_efi_systab(&info, image->arch.pgd);
+ result = map_efi_systab(&info, level4p);
if (result)
return result;
- result = map_acpi_tables(&info, image->arch.pgd);
+ result = map_acpi_tables(&info, level4p);
if (result)
return result;
- /*
- * This must be last because the intermediate page table pages it
- * allocates will not be control pages and may overlap the image.
- */
- return init_transition_pgtable(image, image->arch.pgd, control_page);
+ return init_transition_pgtable(image, level4p);
}
static void load_segments(void)
@@ -304,14 +299,14 @@ static void load_segments(void)
int machine_kexec_prepare(struct kimage *image)
{
- unsigned long control_page;
+ unsigned long start_pgtable;
int result;
/* Calculate the offsets */
- control_page = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
+ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
/* Setup the identity mapped 64bit page table */
- result = init_pgtable(image, control_page);
+ result = init_pgtable(image, start_pgtable);
if (result)
return result;
@@ -358,12 +353,13 @@ void machine_kexec(struct kimage *image)
#endif
}
- control_page = page_address(image->control_code_page);
+ control_page = page_address(image->control_code_page) + PAGE_SIZE;
__memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
- page_list[PA_TABLE_PAGE] = (unsigned long)__pa(image->arch.pgd);
+ page_list[PA_TABLE_PAGE] =
+ (unsigned long)__pa(page_address(image->control_code_page));
if (image->type == KEXEC_TYPE_DEFAULT)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
@@ -582,7 +578,8 @@ static void kexec_mark_crashkres(bool protect)
/* Don't touch the control code page used in crash_kexec().*/
control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page));
- kexec_mark_range(crashk_res.start, control - 1, protect);
+ /* Control code page is located in the 2nd page. */
+ kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect);
control += KEXEC_CONTROL_PAGE_SIZE;
kexec_mark_range(control, crashk_res.end, protect);
}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index c032edcd3d95..c34ea5e028c4 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -74,10 +74,11 @@ void *module_alloc(unsigned long size)
return NULL;
p = __vmalloc_node_range(size, MODULE_ALIGN,
- MODULES_VADDR + get_module_load_offset(),
- MODULES_END, gfp_mask,
- PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
- __builtin_return_address(0));
+ MODULES_VADDR + get_module_load_offset(),
+ MODULES_END, gfp_mask, PAGE_KERNEL,
+ VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
+ NUMA_NO_NODE, __builtin_return_address(0));
+
if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
vfree(p);
return NULL;
@@ -285,10 +286,16 @@ int module_finalize(const Elf_Ehdr *hdr,
void *pseg = (void *)para->sh_addr;
apply_paravirt(pseg, pseg + para->sh_size);
}
+
+ its_init_mod(me);
+
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
+
+ its_fini_mod(me);
+
if (returns) {
void *rseg = (void *)returns->sh_addr;
apply_returns(rseg, rseg + returns->sh_size);
@@ -320,4 +327,5 @@ int module_finalize(const Elf_Ehdr *hdr,
void module_arch_cleanup(struct module *mod)
{
alternatives_smp_module_del(mod);
+ its_free_mod(mod);
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index ed6cce6c3950..b9a128546970 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -38,8 +38,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/nmi.h>
+/*
+ * An emergency handler can be set in any context including NMI
+ */
struct nmi_desc {
raw_spinlock_t lock;
+ nmi_handler_t emerg_handler;
struct list_head head;
};
@@ -121,9 +125,22 @@ static void nmi_check_duration(struct nmiaction *action, u64 duration)
static int nmi_handle(unsigned int type, struct pt_regs *regs)
{
struct nmi_desc *desc = nmi_to_desc(type);
+ nmi_handler_t ehandler;
struct nmiaction *a;
int handled=0;
+ /*
+ * Call the emergency handler, if set
+ *
+ * In the case of crash_nmi_callback() emergency handler, it will
+ * return in the case of the crashing CPU to enable it to complete
+ * other necessary crashing actions ASAP. Other handlers in the
+ * linked list won't need to be run.
+ */
+ ehandler = desc->emerg_handler;
+ if (ehandler)
+ return ehandler(type, regs);
+
rcu_read_lock();
/*
@@ -213,6 +230,31 @@ void unregister_nmi_handler(unsigned int type, const char *name)
}
EXPORT_SYMBOL_GPL(unregister_nmi_handler);
+/**
+ * set_emergency_nmi_handler - Set emergency handler
+ * @type: NMI type
+ * @handler: the emergency handler to be stored
+ *
+ * Set an emergency NMI handler which, if set, will preempt all the other
+ * handlers in the linked list. If a NULL handler is passed in, it will clear
+ * it. It is expected that concurrent calls to this function will not happen
+ * or the system is screwed beyond repair.
+ */
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler)
+{
+ struct nmi_desc *desc = nmi_to_desc(type);
+
+ if (WARN_ON_ONCE(desc->emerg_handler == handler))
+ return;
+ desc->emerg_handler = handler;
+
+ /*
+ * Ensure the emergency handler is visible to other CPUs before
+ * function return
+ */
+ smp_wmb();
+}
+
static void
pci_serr_error(unsigned char reason, struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index acc83738bf5b..1138b999cb14 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -87,7 +87,12 @@ EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
*/
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- memcpy(dst, src, arch_task_struct_size);
+ /* init_task is not dynamically sized (incomplete FPU state) */
+ if (unlikely(src == &init_task))
+ memcpy_and_pad(dst, arch_task_struct_size, src, sizeof(init_task), 0);
+ else
+ memcpy(dst, src, arch_task_struct_size);
+
#ifdef CONFIG_VM86
dst->thread.vm86 = NULL;
#endif
@@ -149,6 +154,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
p->thread.io_bitmap = NULL;
+ clear_tsk_thread_flag(p, TIF_IO_BITMAP);
p->thread.iopl_warn = 0;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -423,6 +429,11 @@ void native_tss_update_io_bitmap(void)
} else {
struct io_bitmap *iobm = t->io_bitmap;
+ if (WARN_ON_ONCE(!iobm)) {
+ clear_thread_flag(TIF_IO_BITMAP);
+ native_tss_invalidate_io_bitmap();
+ }
+
/*
* Only copy bitmap data when the sequence number differs. The
* update time is accounted to the incoming task.
@@ -876,6 +887,11 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
*/
static __cpuidle void mwait_idle(void)
{
+ if (need_resched())
+ return;
+
+ x86_idle_clear_cpu_buffers();
+
if (!current_set_polling_and_test()) {
if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
mb(); /* quirk */
@@ -884,13 +900,17 @@ static __cpuidle void mwait_idle(void)
}
__monitor((void *)&current_thread_info()->flags, 0, 0);
- if (!need_resched())
- __sti_mwait(0, 0);
- else
+ if (need_resched()) {
raw_local_irq_enable();
+ goto out;
+ }
+
+ __sti_mwait(0, 0);
} else {
raw_local_irq_enable();
}
+
+out:
__current_clr_polling();
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 299b970e5f82..79e1ac3d0625 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -787,26 +787,27 @@ void machine_crash_shutdown(struct pt_regs *regs)
}
#endif
-/*
- * This is used to VMCLEAR all VMCSs loaded on the
- * processor. And when loading kvm_intel module, the
- * callback function pointer will be assigned.
- *
- * protected by rcu.
- */
-crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
-EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
+/* RCU-protected callback to disable virtualization prior to reboot. */
+static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
-static inline void cpu_crash_vmclear_loaded_vmcss(void)
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
{
- crash_vmclear_fn *do_vmclear_operation = NULL;
+ if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback)))
+ return;
- rcu_read_lock();
- do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
- if (do_vmclear_operation)
- do_vmclear_operation();
- rcu_read_unlock();
+ rcu_assign_pointer(cpu_emergency_virt_callback, callback);
}
+EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback);
+
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
+{
+ if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback))
+ return;
+
+ rcu_assign_pointer(cpu_emergency_virt_callback, NULL);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
@@ -818,9 +819,15 @@ int crashing_cpu = -1;
*/
void cpu_emergency_disable_virtualization(void)
{
- cpu_crash_vmclear_loaded_vmcss();
+ cpu_emergency_virt_cb *callback;
- cpu_emergency_vmxoff();
+ rcu_read_lock();
+ callback = rcu_dereference(cpu_emergency_virt_callback);
+ if (callback)
+ callback();
+ rcu_read_unlock();
+
+ /* KVM_AMD doesn't yet utilize the common callback. */
cpu_emergency_svm_disable();
}
@@ -896,15 +903,11 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
shootdown_callback = callback;
atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
- /* Would it be better to replace the trap vector here? */
- if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
- NMI_FLAG_FIRST, "crash"))
- return; /* Return what? */
+
/*
- * Ensure the new callback function is set before sending
- * out the NMI
+ * Set emergency handler to preempt other handlers.
*/
- wmb();
+ set_emergency_nmi_handler(NMI_LOCAL, crash_nmi_callback);
apic_send_IPI_allbutself(NMI_VECTOR);
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 3fe76bf17d95..e658e83c62ae 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -1064,3 +1064,21 @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
RIP_REL_REF(cpuid_ext_range_max) = fn->eax;
}
}
+
+static inline void sev_evict_cache(void *va, int npages)
+{
+ volatile u8 val __always_unused;
+ u8 *bytes = va;
+ int page_idx;
+
+ /*
+ * For SEV guests, a read from the first/last cache-lines of a 4K page
+ * using the guest key is sufficient to cause a flush of all cache-lines
+ * associated with that 4K page without incurring all the overhead of a
+ * full CLFLUSH sequence.
+ */
+ for (page_idx = 0; page_idx < npages; page_idx++) {
+ val = bytes[page_idx * PAGE_SIZE];
+ val = bytes[page_idx * PAGE_SIZE + PAGE_SIZE - 1];
+ }
+}
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index f8a8249ae117..7b7fa85d1547 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -676,10 +676,12 @@ static u64 __init get_jump_table_addr(void)
static void pvalidate_pages(unsigned long vaddr, unsigned long npages, bool validate)
{
- unsigned long vaddr_end;
+ unsigned long vaddr_begin, vaddr_end;
int rc;
vaddr = vaddr & PAGE_MASK;
+
+ vaddr_begin = vaddr;
vaddr_end = vaddr + (npages << PAGE_SHIFT);
while (vaddr < vaddr_end) {
@@ -689,6 +691,13 @@ static void pvalidate_pages(unsigned long vaddr, unsigned long npages, bool vali
vaddr = vaddr + PAGE_SIZE;
}
+
+ /*
+ * If validating memory (making it private) and affected by the
+ * cache-coherency vulnerability, perform the cache eviction mitigation.
+ */
+ if (validate && !cpu_feature_enabled(X86_FEATURE_COHERENCY_SFW_NO))
+ sev_evict_cache((void *)vaddr_begin, npages);
}
static void __head early_set_pages_state(unsigned long paddr, unsigned long npages, enum psc_op op)
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index ca8dfc0c9edb..93809a4dc793 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -79,7 +79,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
break;
case RET:
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ if (cpu_wants_rethunk_at(insn))
code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk);
else
code = &retinsn;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cafacb2e58cc..e5ec68ce938a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -920,7 +920,7 @@ static unsigned long long cyc2ns_suspend;
void tsc_save_sched_clock_state(void)
{
- if (!sched_clock_stable())
+ if (!static_branch_likely(&__use_tsc) && !sched_clock_stable())
return;
cyc2ns_suspend = sched_clock();
@@ -940,7 +940,7 @@ void tsc_restore_sched_clock_state(void)
unsigned long flags;
int cpu;
- if (!sched_clock_stable())
+ if (!static_branch_likely(&__use_tsc) && !sched_clock_stable())
return;
local_irq_save(flags);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 78ccb5ec3c0e..35aeec9d2267 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -528,6 +528,16 @@ INIT_PER_CPU(irq_stack_backing_store);
"SRSO function pair won't alias");
#endif
+#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)
+. = ASSERT(__x86_indirect_its_thunk_rax & 0x20, "__x86_indirect_thunk_rax not in second half of cacheline");
+. = ASSERT(((__x86_indirect_its_thunk_rcx - __x86_indirect_its_thunk_rax) % 64) == 0, "Indirect thunks are not cacheline apart");
+. = ASSERT(__x86_indirect_its_thunk_array == __x86_indirect_its_thunk_rax, "Gap in ITS thunk array");
+#endif
+
+#if defined(CONFIG_MITIGATION_ITS) && !defined(CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B)
+. = ASSERT(its_return_thunk & 0x20, "its_return_thunk not in second half of cacheline");
+#endif
+
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_KEXEC_CORE
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 43b1df7ec183..1bb5e8f6c63e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -758,6 +758,12 @@ void kvm_set_cpu_caps(void)
if (cpu_feature_enabled(X86_FEATURE_SRSO_NO))
kvm_cpu_cap_set(X86_FEATURE_SRSO_NO);
+ kvm_cpu_cap_mask(CPUID_8000_0021_EAX, F(VERW_CLEAR));
+
+ kvm_cpu_cap_init_kvm_defined(CPUID_8000_0021_ECX,
+ F(TSA_SQ_NO) | F(TSA_L1_NO)
+ );
+
/*
* Hide RDTSCP and RDPID if either feature is reported as supported but
* probing MSR_TSC_AUX failed. This is purely a sanity check and
@@ -1243,7 +1249,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x80000021:
- entry->ebx = entry->ecx = entry->edx = 0;
+ entry->ebx = entry->edx = 0;
/*
* Pass down these bits:
* EAX 0 NNDBP, Processor ignores nested data breakpoints
@@ -1259,6 +1265,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->eax |= BIT(2);
if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
entry->eax |= BIT(6);
+ cpuid_entry_override(entry, CPUID_8000_0021_ECX);
break;
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 28555bbd52e8..cb0a531e13c5 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1406,8 +1406,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
return syndbg_set_msr(vcpu, msr, data, host);
default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
- msr, data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
return 1;
}
return 0;
@@ -1528,8 +1527,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
break;
default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n",
- msr, data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
return 1;
}
@@ -1581,7 +1579,7 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
return syndbg_get_msr(vcpu, msr, pdata, host);
default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+ kvm_pr_unimpl_rdmsr(vcpu, msr);
return 1;
}
@@ -1646,7 +1644,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
data = APIC_BUS_FREQUENCY;
break;
default:
- vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+ kvm_pr_unimpl_rdmsr(vcpu, msr);
return 1;
}
*pdata = data;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 7f57dce5c828..9aae76b74417 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -587,7 +587,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
* just set SVI.
*/
if (unlikely(apic->apicv_active))
- static_call_cond(kvm_x86_hwapic_isr_update)(vec);
+ static_call_cond(kvm_x86_hwapic_isr_update)(apic->vcpu, vec);
else {
++apic->isr_count;
BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -632,7 +632,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
* and must be left alone.
*/
if (unlikely(apic->apicv_active))
- static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
+ static_call_cond(kvm_x86_hwapic_isr_update)(apic->vcpu, apic_find_highest_isr(apic));
else {
--apic->isr_count;
BUG_ON(apic->isr_count < 0);
@@ -640,6 +640,17 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
}
}
+void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active)
+ return;
+
+ static_call(kvm_x86_hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
+}
+EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr);
+
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
{
/* This may race with setting of irr in __apic_accept_irq() and
@@ -2315,11 +2326,25 @@ int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
data &= ~APIC_ICR_BUSY;
kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
- kvm_lapic_set_reg64(apic, APIC_ICR, data);
+ if (kvm_x86_ops.x2apic_icr_is_split) {
+ kvm_lapic_set_reg(apic, APIC_ICR, data);
+ kvm_lapic_set_reg(apic, APIC_ICR2, data >> 32);
+ } else {
+ kvm_lapic_set_reg64(apic, APIC_ICR, data);
+ }
trace_kvm_apic_write(APIC_ICR, data);
return 0;
}
+static u64 kvm_x2apic_icr_read(struct kvm_lapic *apic)
+{
+ if (kvm_x86_ops.x2apic_icr_is_split)
+ return (u64)kvm_lapic_get_reg(apic, APIC_ICR) |
+ (u64)kvm_lapic_get_reg(apic, APIC_ICR2) << 32;
+
+ return kvm_lapic_get_reg64(apic, APIC_ICR);
+}
+
/* emulate APIC access in a trap manner */
void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
{
@@ -2337,7 +2362,7 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
* maybe-unecessary write, and both are in the noise anyways.
*/
if (apic_x2apic_mode(apic) && offset == APIC_ICR)
- WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_lapic_get_reg64(apic, APIC_ICR)));
+ WARN_ON_ONCE(kvm_x2apic_icr_write(apic, kvm_x2apic_icr_read(apic)));
else
kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
}
@@ -2540,7 +2565,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
if (apic->apicv_active) {
static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1);
- static_call_cond(kvm_x86_hwapic_isr_update)(-1);
+ static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, -1);
}
vcpu->arch.apic_arb_prio = 0;
@@ -2760,18 +2785,22 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
/*
* In x2APIC mode, the LDR is fixed and based on the id. And
- * ICR is internally a single 64-bit register, but needs to be
- * split to ICR+ICR2 in userspace for backwards compatibility.
+ * if the ICR is _not_ split, ICR is internally a single 64-bit
+ * register, but needs to be split to ICR+ICR2 in userspace for
+ * backwards compatibility.
*/
- if (set) {
+ if (set)
*ldr = kvm_apic_calc_x2apic_ldr(*id);
- icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
- (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
- __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
- } else {
- icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
- __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
+ if (!kvm_x86_ops.x2apic_icr_is_split) {
+ if (set) {
+ icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
+ (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
+ __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
+ } else {
+ icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
+ __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
+ }
}
}
@@ -2829,7 +2858,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
if (apic->apicv_active) {
static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
- static_call_cond(kvm_x86_hwapic_isr_update)(apic_find_highest_isr(apic));
+ static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
}
kvm_make_request(KVM_REQ_EVENT, vcpu);
if (ioapic_in_kernel(vcpu->kvm))
@@ -2971,7 +3000,7 @@ static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
u32 low;
if (reg == APIC_ICR) {
- *data = kvm_lapic_get_reg64(apic, APIC_ICR);
+ *data = kvm_x2apic_icr_read(apic);
return 0;
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index a5ac4a5a5179..e5d2dc58fcf8 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -122,6 +122,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu);
+void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index e43909d6504a..7fbd24fbf363 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -14,6 +14,7 @@
enum kvm_only_cpuid_leafs {
CPUID_12_EAX = NCAPINTS,
CPUID_7_2_EDX,
+ CPUID_8000_0021_ECX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -45,6 +46,10 @@ enum kvm_only_cpuid_leafs {
#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
#define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
+/* CPUID level 0x80000021 (ECX) */
+#define KVM_X86_FEATURE_TSA_SQ_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 1)
+#define KVM_X86_FEATURE_TSA_L1_NO KVM_X86_FEATURE(CPUID_8000_0021_ECX, 2)
+
struct cpuid_reg {
u32 function;
u32 index;
@@ -71,6 +76,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
[CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX},
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
+ [CPUID_8000_0021_ECX] = {0x80000021, 0, CPUID_ECX},
};
/*
@@ -107,6 +113,8 @@ static __always_inline u32 __feature_translate(int x86_feature)
KVM_X86_TRANSLATE_FEATURE(SGX2);
KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
+ KVM_X86_TRANSLATE_FEATURE(TSA_SQ_NO);
+ KVM_X86_TRANSLATE_FEATURE(TSA_L1_NO);
default:
return x86_feature;
}
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index fb125b54ee68..0adbf0677b7c 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -839,7 +839,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
* Allocating new amd_iommu_pi_data, which will get
* add to the per-vcpu ir_list.
*/
- ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
+ ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT);
if (!ir) {
ret = -ENOMEM;
goto out;
@@ -915,6 +915,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
{
struct kvm_kernel_irq_routing_entry *e;
struct kvm_irq_routing_table *irq_rt;
+ bool enable_remapped_mode = true;
int idx, ret = 0;
if (!kvm_arch_has_assigned_device(kvm) ||
@@ -952,6 +953,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
kvm_vcpu_apicv_active(&svm->vcpu)) {
struct amd_iommu_pi_data pi;
+ enable_remapped_mode = false;
+
/* Try to enable guest_mode in IRTE */
pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
AVIC_HPA_MASK);
@@ -970,33 +973,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
*/
if (!ret && pi.is_guest_mode)
svm_ir_list_add(svm, &pi);
- } else {
- /* Use legacy mode in IRTE */
- struct amd_iommu_pi_data pi;
-
- /**
- * Here, pi is used to:
- * - Tell IOMMU to use legacy mode for this interrupt.
- * - Retrieve ga_tag of prior interrupt remapping data.
- */
- pi.prev_ga_tag = 0;
- pi.is_guest_mode = false;
- ret = irq_set_vcpu_affinity(host_irq, &pi);
-
- /**
- * Check if the posted interrupt was previously
- * setup with the guest_mode by checking if the ga_tag
- * was cached. If so, we need to clean up the per-vcpu
- * ir_list.
- */
- if (!ret && pi.prev_ga_tag) {
- int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
- struct kvm_vcpu *vcpu;
-
- vcpu = kvm_get_vcpu_by_id(kvm, id);
- if (vcpu)
- svm_ir_list_del(to_svm(vcpu), &pi);
- }
}
if (!ret && svm) {
@@ -1012,6 +988,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
}
ret = 0;
+ if (enable_remapped_mode) {
+ /* Use legacy mode in IRTE */
+ struct amd_iommu_pi_data pi;
+
+ /**
+ * Here, pi is used to:
+ * - Tell IOMMU to use legacy mode for this interrupt.
+ * - Retrieve ga_tag of prior interrupt remapping data.
+ */
+ pi.prev_ga_tag = 0;
+ pi.is_guest_mode = false;
+ ret = irq_set_vcpu_affinity(host_irq, &pi);
+
+ /**
+ * Check if the posted interrupt was previously
+ * setup with the guest_mode by checking if the ga_tag
+ * was cached. If so, we need to clean up the per-vcpu
+ * ir_list.
+ */
+ if (!ret && pi.prev_ga_tag) {
+ int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
+ struct kvm_vcpu *vcpu;
+
+ vcpu = kvm_get_vcpu_by_id(kvm, id);
+ if (vcpu)
+ svm_ir_list_del(to_svm(vcpu), &pi);
+ }
+ }
out:
srcu_read_unlock(&kvm->irq_srcu, idx);
return ret;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index d8e192ad5953..0e0bc2c46b51 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1755,6 +1755,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
struct kvm_vcpu *src_vcpu;
unsigned long i;
+ if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
+ dst->created_vcpus != atomic_read(&dst->online_vcpus))
+ return -EBUSY;
+
if (!sev_es_guest(src))
return 0;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a96facc05139..5a6bd9d5cceb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1426,7 +1426,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb)
{
int i;
- for_each_online_cpu(i)
+ for_each_possible_cpu(i)
cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
}
@@ -1920,11 +1920,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
svm->asid = sd->next_asid++;
}
-static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
+static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
{
- struct vmcb *vmcb = svm->vmcb;
+ struct vmcb *vmcb = to_svm(vcpu)->vmcb;
- if (svm->vcpu.arch.guest_state_protected)
+ if (vcpu->arch.guest_state_protected)
return;
if (unlikely(value != vmcb->save.dr6)) {
@@ -3035,10 +3035,21 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_IA32_DEBUGCTLMSR:
if (!lbrv) {
- vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
- __func__, data);
+ kvm_pr_unimpl_wrmsr(vcpu, ecx, data);
break;
}
+
+ /*
+ * AMD changed the architectural behavior of bits 5:2. On CPUs
+ * without BusLockTrap, bits 5:2 control "external pins", but
+ * on CPUs that support BusLockDetect, bit 2 enables BusLockTrap
+ * and bits 5:3 are reserved-to-zero. Sadly, old KVM allowed
+ * the guest to set bits 5:2 despite not actually virtualizing
+ * Performance-Monitoring/Breakpoint external pins. Drop bits
+ * 5:2 for backwards compatibility.
+ */
+ data &= ~GENMASK(5, 2);
+
if (data & DEBUGCTL_RESERVED_BITS)
return 1;
@@ -3065,7 +3076,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_CR:
return svm_set_vm_cr(vcpu, data);
case MSR_VM_IGNNE:
- vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
+ kvm_pr_unimpl_wrmsr(vcpu, ecx, data);
break;
case MSR_AMD64_DE_CFG: {
struct kvm_msr_entry msr_entry;
@@ -3953,6 +3964,9 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+ if (is_guest_mode(vcpu))
+ return EXIT_FASTPATH_NONE;
+
/*
* Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
* can't read guest memory (dereference memslots) to decode the WRMSR.
@@ -3970,6 +3984,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
guest_state_enter_irqoff();
+ /*
+ * Set RFLAGS.IF prior to VMRUN, as the host's RFLAGS.IF at the time of
+ * VMRUN controls whether or not physical IRQs are masked (KVM always
+ * runs with V_INTR_MASKING_MASK). Toggle RFLAGS.IF here to avoid the
+ * temptation to do STI+VMRUN+CLI, as AMD CPUs bleed the STI shadow
+ * into guest state if delivery of an event during VMRUN triggers a
+ * #VMEXIT, and the guest_state transitions already tell lockdep that
+ * IRQs are being enabled/disabled. Note! GIF=0 for the entirety of
+ * this path, so IRQs aren't actually unmasked while running host code.
+ */
+ raw_local_irq_enable();
+
amd_clear_divider();
if (sev_es_guest(vcpu->kvm))
@@ -3977,15 +4003,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
else
__svm_vcpu_run(svm, spec_ctrl_intercepted);
+ raw_local_irq_disable();
+
guest_state_exit_irqoff();
}
-static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
+static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
{
+ bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT;
struct vcpu_svm *svm = to_svm(vcpu);
bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
- trace_kvm_entry(vcpu);
+ trace_kvm_entry(vcpu, force_immediate_exit);
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -4004,9 +4033,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
* is enough to force an immediate vmexit.
*/
disable_nmi_singlestep(svm);
- smp_send_reschedule(vcpu->cpu);
+ force_immediate_exit = true;
}
+ if (force_immediate_exit)
+ smp_send_reschedule(vcpu->cpu);
+
pre_svm_run(vcpu);
sync_lapic_to_cr8(vcpu);
@@ -4020,13 +4052,14 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
svm_hv_update_vp_id(svm->vmcb, vcpu);
/*
- * Run with all-zero DR6 unless needed, so that we can get the exact cause
- * of a #DB.
+ * Run with all-zero DR6 unless the guest can write DR6 freely, so that
+ * KVM can get the exact cause of a #DB. Note, loading guest DR6 from
+ * KVM's snapshot is only necessary when DR accesses won't exit.
*/
- if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
- svm_set_dr6(svm, vcpu->arch.dr6);
- else
- svm_set_dr6(svm, DR6_ACTIVE_LOW);
+ if (unlikely(run_flags & KVM_RUN_LOAD_GUEST_DR6))
+ svm_set_dr6(vcpu, vcpu->arch.dr6);
+ else if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)))
+ svm_set_dr6(vcpu, DR6_ACTIVE_LOW);
clgi();
kvm_load_guest_xsave_state(vcpu);
@@ -4103,9 +4136,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
svm_complete_interrupts(vcpu);
- if (is_guest_mode(vcpu))
- return EXIT_FASTPATH_NONE;
-
return svm_exit_handlers_fastpath(vcpu);
}
@@ -4826,6 +4856,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.enable_nmi_window = svm_enable_nmi_window,
.enable_irq_window = svm_enable_irq_window,
.update_cr8_intercept = svm_update_cr8_intercept,
+
+ .x2apic_icr_is_split = true,
.set_virtual_apic_mode = avic_refresh_virtual_apic_mode,
.refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
.check_apicv_inhibit_reasons = avic_check_apicv_inhibit_reasons,
@@ -4847,8 +4879,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.check_intercept = svm_check_intercept,
.handle_exit_irqoff = svm_handle_exit_irqoff,
- .request_immediate_exit = __kvm_request_immediate_exit,
-
.sched_in = svm_sched_in,
.nested_ops = &svm_nested_ops,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 4cb1425900c6..a7f2faea8858 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -539,7 +539,7 @@ static inline bool is_x2apic_msrpm_offset(u32 offset)
/* svm.c */
#define MSR_INVALID 0xffffffffU
-#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+#define DEBUGCTL_RESERVED_BITS (~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR))
extern bool dump_invalid_vmcb;
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 5be9a63f09ff..48b72625cc45 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -166,13 +166,12 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
mov VCPU_RDI(%_ASM_DI), %_ASM_DI
- /* Enter guest mode */
- sti
+ /* Clobbers EFLAGS.ZF */
+ VM_CLEAR_CPU_BUFFERS
+ /* Enter guest mode */
3: vmrun %_ASM_AX
4:
- cli
-
/* Pop @svm to RAX while it's the only available register. */
pop %_ASM_AX
@@ -336,12 +335,12 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
- /* Enter guest mode */
- sti
+ /* Clobbers EFLAGS.ZF */
+ VM_CLEAR_CPU_BUFFERS
+ /* Enter guest mode */
1: vmrun %_ASM_AX
-
-2: cli
+2:
/* Pop @svm to RDI, guest registers have been saved already. */
pop %_ASM_DI
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6c1dcf44c4fa..ab407bc00d84 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -15,20 +15,23 @@
* Tracepoint for guest mode entry.
*/
TRACE_EVENT(kvm_entry,
- TP_PROTO(struct kvm_vcpu *vcpu),
- TP_ARGS(vcpu),
+ TP_PROTO(struct kvm_vcpu *vcpu, bool force_immediate_exit),
+ TP_ARGS(vcpu, force_immediate_exit),
TP_STRUCT__entry(
__field( unsigned int, vcpu_id )
__field( unsigned long, rip )
+ __field( bool, immediate_exit )
),
TP_fast_assign(
__entry->vcpu_id = vcpu->vcpu_id;
__entry->rip = kvm_rip_read(vcpu);
+ __entry->immediate_exit = force_immediate_exit;
),
- TP_printk("vcpu %u, rip 0x%lx", __entry->vcpu_id, __entry->rip)
+ TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip,
+ __entry->immediate_exit ? "[immediate exit]" : "")
);
/*
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 8052f8b7d8e1..2c3cf4351c4c 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2532,10 +2532,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
if (vmx->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
- vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
+ vmx_guest_debugctl_write(vcpu, vmcs12->guest_ia32_debugctl &
+ vmx_get_supported_debugctl(vcpu, false));
} else {
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
- vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl);
+ vmx_guest_debugctl_write(vcpu, vmx->nested.pre_vmenter_debugctl);
}
if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
@@ -3022,7 +3023,8 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
return -EINVAL;
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
- CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
+ (CC(!kvm_dr7_valid(vmcs12->guest_dr7)) ||
+ CC(!vmx_is_valid_debugctl(vcpu, vmcs12->guest_ia32_debugctl, false))))
return -EINVAL;
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
@@ -3402,7 +3404,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
if (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
- vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ vmx->nested.pre_vmenter_debugctl = vmx_guest_debugctl_read();
if (kvm_mpx_supported() &&
(!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
@@ -4374,6 +4376,12 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
+ /*
+ * Note! Save DR7, but intentionally don't grab DEBUGCTL from vmcs02.
+ * Writes to DEBUGCTL that aren't intercepted by L1 are immediately
+ * propagated to vmcs12 (see vmx_set_msr()), as the value loaded into
+ * vmcs02 doesn't strictly track vmcs12.
+ */
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
@@ -4564,7 +4572,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
__vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
kvm_set_dr(vcpu, 7, 0x400);
- vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+ vmx_guest_debugctl_write(vcpu, 0);
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
vmcs12->vm_exit_msr_load_count))
@@ -4619,6 +4627,9 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
}
+ /* Reload DEBUGCTL to ensure vmcs01 has a fresh FREEZE_IN_SMM value. */
+ vmx_reload_guest_debugctl(vcpu);
+
/*
* Note that calling vmx_set_{efer,cr0,cr4} is important as they
* handle a variety of side effects to KVM's software model.
@@ -4839,6 +4850,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
}
+ if (vmx->nested.update_vmcs01_hwapic_isr) {
+ vmx->nested.update_vmcs01_hwapic_isr = false;
+ kvm_apic_update_hwapic_isr(vcpu);
+ }
+
if ((vm_exit_reason != -1) &&
(enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
vmx->nested.need_vmcs12_to_shadow_sync = true;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 220cdbe1e286..76d3ed8abf6a 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -672,11 +672,11 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
*/
static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu)
{
- u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ u64 data = vmx_guest_debugctl_read();
if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) {
data &= ~DEBUGCTLMSR_LBR;
- vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+ vmx_guest_debugctl_write(vcpu, data);
}
}
@@ -746,7 +746,7 @@ void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu)
if (!lbr_desc->event) {
vmx_disable_lbr_msrs_passthrough(vcpu);
- if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)
+ if (vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR)
goto warn;
if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use))
goto warn;
@@ -769,7 +769,7 @@ warn:
static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
{
- if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
+ if (!(vmx_guest_debugctl_read() & DEBUGCTLMSR_LBR))
intel_pmu_release_guest_lbr_event(vcpu);
}
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 1b56c5e5c9fb..0d04382f37af 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -272,6 +272,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
{
struct kvm_kernel_irq_routing_entry *e;
struct kvm_irq_routing_table *irq_rt;
+ bool enable_remapped_mode = true;
struct kvm_lapic_irq irq;
struct kvm_vcpu *vcpu;
struct vcpu_data vcpu_info;
@@ -310,21 +311,8 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
kvm_set_msi_irq(kvm, e, &irq);
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
- !kvm_irq_is_postable(&irq)) {
- /*
- * Make sure the IRTE is in remapped mode if
- * we don't handle it in posted mode.
- */
- ret = irq_set_vcpu_affinity(host_irq, NULL);
- if (ret < 0) {
- printk(KERN_INFO
- "failed to back to remapped mode, irq: %u\n",
- host_irq);
- goto out;
- }
-
+ !kvm_irq_is_postable(&irq))
continue;
- }
vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
vcpu_info.vector = irq.vector;
@@ -332,11 +320,12 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
vcpu_info.vector, vcpu_info.pi_desc_addr, set);
- if (set)
- ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
- else
- ret = irq_set_vcpu_affinity(host_irq, NULL);
+ if (!set)
+ continue;
+ enable_remapped_mode = false;
+
+ ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
if (ret < 0) {
printk(KERN_INFO "%s: failed to update PI IRTE\n",
__func__);
@@ -344,6 +333,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
}
}
+ if (enable_remapped_mode)
+ ret = irq_set_vcpu_affinity(host_irq, NULL);
+
ret = 0;
out:
srcu_read_unlock(&kvm->irq_srcu, idx);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 460ba48eb66c..22e4c9bbbcb4 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -49,6 +49,8 @@
#include <asm/virtext.h>
#include <asm/vmx.h>
+#include <trace/events/ipi.h>
+
#include "capabilities.h"
#include "cpuid.h"
#include "evmcs.h"
@@ -705,14 +707,19 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
return ret;
}
-static void crash_vmclear_local_loaded_vmcss(void)
+static void vmx_emergency_disable(void)
{
int cpu = raw_smp_processor_id();
struct loaded_vmcs *v;
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
- loaded_vmcss_on_cpu_link)
+ loaded_vmcss_on_cpu_link) {
vmcs_clear(v->vmcs);
+ if (v->shadow_vmcs)
+ vmcs_clear(v->shadow_vmcs);
+ }
+
+ __cpu_emergency_vmxoff();
}
static void __loaded_vmcs_clear(void *arg)
@@ -1223,8 +1230,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
u16 fs_sel, gs_sel;
int i;
- vmx->req_immediate_exit = false;
-
/*
* Note that guest MSRs to be saved/restored can also be changed
* when guest state is loaded. This happens when guest transitions
@@ -1418,13 +1423,9 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
*/
static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
vmx_vcpu_pi_load(vcpu, cpu);
-
- vmx->host_debugctlmsr = get_debugctlmsr();
}
static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
@@ -2031,7 +2032,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
break;
case MSR_IA32_DEBUGCTLMSR:
- msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ msr_info->data = vmx_guest_debugctl_read();
break;
default:
find_uret_msr:
@@ -2056,7 +2057,7 @@ static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu,
return (unsigned long)data;
}
-static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
+u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated)
{
u64 debugctl = 0;
@@ -2068,9 +2069,25 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
(host_initiated || intel_pmu_lbr_is_enabled(vcpu)))
debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
+ if (boot_cpu_has(X86_FEATURE_RTM) &&
+ (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_RTM)))
+ debugctl |= DEBUGCTLMSR_RTM_DEBUG;
+
return debugctl;
}
+bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated)
+{
+ u64 invalid;
+
+ invalid = data & ~vmx_get_supported_debugctl(vcpu, host_initiated);
+ if (invalid & (DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR)) {
+ kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data);
+ invalid &= ~(DEBUGCTLMSR_BTF | DEBUGCTLMSR_LBR);
+ }
+ return !invalid;
+}
+
/*
* Writes msr value into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
@@ -2139,31 +2156,22 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
- case MSR_IA32_DEBUGCTLMSR: {
- u64 invalid;
-
- invalid = data & ~vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
- if (invalid & (DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR)) {
- if (report_ignored_msrs)
- vcpu_unimpl(vcpu, "%s: BTF|LBR in IA32_DEBUGCTLMSR 0x%llx, nop\n",
- __func__, data);
- data &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
- invalid &= ~(DEBUGCTLMSR_BTF|DEBUGCTLMSR_LBR);
- }
-
- if (invalid)
+ case MSR_IA32_DEBUGCTLMSR:
+ if (!vmx_is_valid_debugctl(vcpu, data, msr_info->host_initiated))
return 1;
+ data &= vmx_get_supported_debugctl(vcpu, msr_info->host_initiated);
+
if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
VM_EXIT_SAVE_DEBUG_CONTROLS)
get_vmcs12(vcpu)->guest_ia32_debugctl = data;
- vmcs_write64(GUEST_IA32_DEBUGCTL, data);
+ vmx_guest_debugctl_write(vcpu, data);
+
if (intel_pmu_lbr_is_enabled(vcpu) && !to_vmx(vcpu)->lbr_desc.event &&
(data & DEBUGCTLMSR_LBR))
intel_pmu_create_guest_lbr_event(vcpu);
return 0;
- }
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@@ -4749,7 +4757,8 @@ static void init_vmcs(struct vcpu_vmx *vmx)
vmcs_write32(GUEST_SYSENTER_CS, 0);
vmcs_writel(GUEST_SYSENTER_ESP, 0);
vmcs_writel(GUEST_SYSENTER_EIP, 0);
- vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
+
+ vmx_guest_debugctl_write(&vmx->vcpu, 0);
if (cpu_has_vmx_tpr_shadow()) {
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
@@ -5929,22 +5938,46 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
return 1;
}
-static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
+static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu,
+ bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!vmx->req_immediate_exit &&
- !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) {
- kvm_lapic_expired_hv_timer(vcpu);
+ /*
+ * In the *extremely* unlikely scenario that this is a spurious VM-Exit
+ * due to the timer expiring while it was "soft" disabled, just eat the
+ * exit and re-enter the guest.
+ */
+ if (unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
return EXIT_FASTPATH_REENTER_GUEST;
- }
- return EXIT_FASTPATH_NONE;
+ /*
+ * If the timer expired because KVM used it to force an immediate exit,
+ * then mission accomplished.
+ */
+ if (force_immediate_exit)
+ return EXIT_FASTPATH_EXIT_HANDLED;
+
+ /*
+ * If L2 is active, go down the slow path as emulating the guest timer
+ * expiration likely requires synthesizing a nested VM-Exit.
+ */
+ if (is_guest_mode(vcpu))
+ return EXIT_FASTPATH_NONE;
+
+ kvm_lapic_expired_hv_timer(vcpu);
+ return EXIT_FASTPATH_REENTER_GUEST;
}
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{
- handle_fastpath_preemption_timer(vcpu);
+ /*
+ * This non-fastpath handler is reached if and only if the preemption
+ * timer was being used to emulate a guest timer while L2 is active.
+ * All other scenarios are supposed to be handled in the fastpath.
+ */
+ WARN_ON_ONCE(!is_guest_mode(vcpu));
+ kvm_lapic_expired_hv_timer(vcpu);
return 1;
}
@@ -6702,11 +6735,27 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
put_page(page);
}
-static void vmx_hwapic_isr_update(int max_isr)
+static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
{
u16 status;
u8 old;
+ /*
+ * If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI
+ * is only relevant for if and only if Virtual Interrupt Delivery is
+ * enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's
+ * vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested
+ * VM-Exit, otherwise L1 with run with a stale SVI.
+ */
+ if (is_guest_mode(vcpu)) {
+ /*
+ * KVM is supposed to forward intercepted L2 EOIs to L1 if VID
+ * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC.
+ */
+ to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
+ return;
+ }
+
if (max_isr == -1)
max_isr = 0;
@@ -7051,13 +7100,13 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
}
-static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
+static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 tscl;
u32 delta_tsc;
- if (vmx->req_immediate_exit) {
+ if (force_immediate_exit) {
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
vmx->loaded_vmcs->hv_timer_soft_disabled = false;
} else if (vmx->hv_deadline_tsc != -1) {
@@ -7110,13 +7159,22 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
barrier_nospec();
}
-static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu,
+ bool force_immediate_exit)
{
+ /*
+ * If L2 is active, some VMX preemption timer exits can be handled in
+ * the fastpath even, all other exits must use the slow path.
+ */
+ if (is_guest_mode(vcpu) &&
+ to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_PREEMPTION_TIMER)
+ return EXIT_FASTPATH_NONE;
+
switch (to_vmx(vcpu)->exit_reason.basic) {
case EXIT_REASON_MSR_WRITE:
return handle_fastpath_set_msr_irqoff(vcpu);
case EXIT_REASON_PREEMPTION_TIMER:
- return handle_fastpath_preemption_timer(vcpu);
+ return handle_fastpath_preemption_timer(vcpu, force_immediate_exit);
default:
return EXIT_FASTPATH_NONE;
}
@@ -7138,7 +7196,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
- mds_clear_cpu_buffers();
+ x86_clear_cpu_buffers();
vmx_disable_fb_clear(vmx);
@@ -7155,8 +7213,9 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
guest_state_exit_irqoff();
}
-static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
{
+ bool force_immediate_exit = run_flags & KVM_RUN_FORCE_IMMEDIATE_EXIT;
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long cr3, cr4;
@@ -7182,7 +7241,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_NONE;
}
- trace_kvm_entry(vcpu);
+ trace_kvm_entry(vcpu, force_immediate_exit);
if (vmx->ple_window_dirty) {
vmx->ple_window_dirty = false;
@@ -7201,6 +7260,12 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
vcpu->arch.regs_dirty = 0;
+ if (run_flags & KVM_RUN_LOAD_GUEST_DR6)
+ set_debugreg(vcpu->arch.dr6, 6);
+
+ if (run_flags & KVM_RUN_LOAD_DEBUGCTL)
+ vmx_reload_guest_debugctl(vcpu);
+
/*
* Refresh vmcs.HOST_CR3 if necessary. This must be done immediately
* prior to VM-Enter, as the kernel may load a new ASID (PCID) any time
@@ -7220,10 +7285,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->loaded_vmcs->host_state.cr4 = cr4;
}
- /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */
- if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
- set_debugreg(vcpu->arch.dr6, 6);
-
/* When single-stepping over STI and MOV SS, we must clear the
* corresponding interruptibility bits in the guest state. Otherwise
* vmentry fails as it then expects bit 14 (BS) in pending debug
@@ -7241,7 +7302,9 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_passthrough_lbr_msrs(vcpu);
if (enable_preemption_timer)
- vmx_update_hv_timer(vcpu);
+ vmx_update_hv_timer(vcpu, force_immediate_exit);
+ else if (force_immediate_exit)
+ smp_send_reschedule(vcpu->cpu);
kvm_wait_lapic_expire(vcpu);
@@ -7257,8 +7320,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
}
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
- if (vmx->host_debugctlmsr)
- update_debugctlmsr(vmx->host_debugctlmsr);
+ if (vcpu->arch.host_debugctl)
+ update_debugctlmsr(vcpu->arch.host_debugctl);
#ifndef CONFIG_X86_64
/*
@@ -7315,10 +7378,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
- if (is_guest_mode(vcpu))
- return EXIT_FASTPATH_NONE;
-
- return vmx_exit_handlers_fastpath(vcpu);
+ return vmx_exit_handlers_fastpath(vcpu, force_immediate_exit);
}
static void vmx_vcpu_free(struct kvm_vcpu *vcpu)
@@ -7825,11 +7885,6 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
}
-static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
- to_vmx(vcpu)->req_immediate_exit = true;
-}
-
static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info)
{
@@ -8150,6 +8205,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put,
+ .HOST_OWNED_DEBUGCTL = DEBUGCTLMSR_FREEZE_IN_SMM,
+
.update_exception_bitmap = vmx_update_exception_bitmap,
.get_msr_feature = vmx_get_msr_feature,
.get_msr = vmx_get_msr,
@@ -8199,6 +8256,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.enable_nmi_window = vmx_enable_nmi_window,
.enable_irq_window = vmx_enable_irq_window,
.update_cr8_intercept = vmx_update_cr8_intercept,
+
+ .x2apic_icr_is_split = false,
.set_virtual_apic_mode = vmx_set_virtual_apic_mode,
.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
@@ -8232,8 +8291,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.check_intercept = vmx_check_intercept,
.handle_exit_irqoff = vmx_handle_exit_irqoff,
- .request_immediate_exit = vmx_request_immediate_exit,
-
.sched_in = vmx_sched_in,
.cpu_dirty_log_size = PML_ENTITY_NUM,
@@ -8490,7 +8547,6 @@ static __init int hardware_setup(void)
if (!enable_preemption_timer) {
vmx_x86_ops.set_hv_timer = NULL;
vmx_x86_ops.cancel_hv_timer = NULL;
- vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
}
kvm_caps.supported_mce_cap |= MCG_LMCE_P;
@@ -8551,8 +8607,7 @@ static void __vmx_exit(void)
{
allow_smaller_maxphyaddr = false;
- RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
- synchronize_rcu();
+ cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
vmx_cleanup_l1d_flush();
}
@@ -8626,8 +8681,7 @@ static int __init vmx_init(void)
pi_init_cpu(cpu);
}
- rcu_assign_pointer(crash_vmclear_loaded_vmcss,
- crash_vmclear_local_loaded_vmcss);
+ cpu_emergency_register_virt_callback(vmx_emergency_disable);
vmx_check_vmcs12_offsets();
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 9e0bb98b116d..dc6f06326648 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -189,6 +189,7 @@ struct nested_vmx {
bool reload_vmcs01_apic_access_page;
bool update_vmcs01_cpu_dirty_logging;
bool update_vmcs01_apicv_status;
+ bool update_vmcs01_hwapic_isr;
/*
* Enlightened VMCS has been enabled. It does not mean that L1 has to
@@ -342,8 +343,6 @@ struct vcpu_vmx {
unsigned int ple_window;
bool ple_window_dirty;
- bool req_immediate_exit;
-
/* Support for PML */
#define PML_ENTITY_NUM 512
struct page *pml_pg;
@@ -351,8 +350,6 @@ struct vcpu_vmx {
/* apic deadline value in host tsc */
u64 hv_deadline_tsc;
- unsigned long host_debugctlmsr;
-
/*
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
* msr_ia32_feature_control. FEAT_CTL_LOCKED is always included
@@ -445,6 +442,32 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
+u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated);
+bool vmx_is_valid_debugctl(struct kvm_vcpu *vcpu, u64 data, bool host_initiated);
+
+static inline void vmx_guest_debugctl_write(struct kvm_vcpu *vcpu, u64 val)
+{
+ WARN_ON_ONCE(val & DEBUGCTLMSR_FREEZE_IN_SMM);
+
+ val |= vcpu->arch.host_debugctl & DEBUGCTLMSR_FREEZE_IN_SMM;
+ vmcs_write64(GUEST_IA32_DEBUGCTL, val);
+}
+
+static inline u64 vmx_guest_debugctl_read(void)
+{
+ return vmcs_read64(GUEST_IA32_DEBUGCTL) & ~DEBUGCTLMSR_FREEZE_IN_SMM;
+}
+
+static inline void vmx_reload_guest_debugctl(struct kvm_vcpu *vcpu)
+{
+ u64 val = vmcs_read64(GUEST_IA32_DEBUGCTL);
+
+ if (!((val ^ vcpu->arch.host_debugctl) & DEBUGCTLMSR_FREEZE_IN_SMM))
+ return;
+
+ vmx_guest_debugctl_write(vcpu, val & ~DEBUGCTLMSR_FREEZE_IN_SMM);
+}
+
/*
* Note, early Intel manuals have the write-low and read-high bitmap offsets
* the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19ce8199f347..f542ab5e8698 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1614,7 +1614,7 @@ static unsigned int num_msr_based_features;
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
- ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
+ ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO | ARCH_CAP_ITS_NO)
static u64 kvm_get_arch_capabilities(void)
{
@@ -1653,6 +1653,8 @@ static u64 kvm_get_arch_capabilities(void)
data |= ARCH_CAP_MDS_NO;
if (!boot_cpu_has_bug(X86_BUG_RFDS))
data |= ARCH_CAP_RFDS_NO;
+ if (!boot_cpu_has_bug(X86_BUG_ITS))
+ data |= ARCH_CAP_ITS_NO;
if (!boot_cpu_has(X86_FEATURE_RTM)) {
/*
@@ -3571,7 +3573,6 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
- bool pr = false;
u32 msr = msr_info->index;
u64 data = msr_info->data;
@@ -3623,15 +3624,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (data == BIT_ULL(18)) {
vcpu->arch.msr_hwcr = data;
} else if (data != 0) {
- vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
- data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
return 1;
}
break;
case MSR_FAM10H_MMIO_CONF_BASE:
if (data != 0) {
- vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
- "0x%llx\n", data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
return 1;
}
break;
@@ -3811,16 +3810,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
- pr = true;
- fallthrough;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
- if (pr || data != 0)
- vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
- "0x%x data 0x%llx\n", msr, data);
+ if (data)
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
break;
case MSR_K7_CLK_CTL:
/*
@@ -3847,9 +3843,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail.
*/
- if (report_ignored_msrs)
- vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
- msr, data);
+ kvm_pr_unimpl_wrmsr(vcpu, msr, data);
break;
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
@@ -10584,12 +10578,6 @@ static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
static_call_cond(kvm_x86_set_apic_access_page_addr)(vcpu);
}
-void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
- smp_send_reschedule(vcpu->cpu);
-}
-EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
-
/*
* Called within kvm->srcu read side.
* Returns 1 to let vcpu_run() continue the guest execution loop without
@@ -10603,6 +10591,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
dm_request_for_irq_injection(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu);
fastpath_t exit_fastpath;
+ u64 run_flags, debug_ctl;
bool req_immediate_exit = false;
@@ -10823,9 +10812,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}
+ run_flags = 0;
if (req_immediate_exit) {
+ run_flags |= KVM_RUN_FORCE_IMMEDIATE_EXIT;
kvm_make_request(KVM_REQ_EVENT, vcpu);
- static_call(kvm_x86_request_immediate_exit)(vcpu);
}
fpregs_assert_state_consistent();
@@ -10841,10 +10831,25 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(vcpu->arch.eff_db[1], 1);
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
+ /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */
+ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
+ run_flags |= KVM_RUN_LOAD_GUEST_DR6;
} else if (unlikely(hw_breakpoint_active())) {
set_debugreg(0, 7);
}
+ /*
+ * Refresh the host DEBUGCTL snapshot after disabling IRQs, as DEBUGCTL
+ * can be modified in IRQ context, e.g. via SMP function calls. Inform
+ * vendor code if any host-owned bits were changed, e.g. so that the
+ * value loaded into hardware while running the guest can be updated.
+ */
+ debug_ctl = get_debugctlmsr();
+ if ((debug_ctl ^ vcpu->arch.host_debugctl) & kvm_x86_ops.HOST_OWNED_DEBUGCTL &&
+ !vcpu->arch.guest_state_protected)
+ run_flags |= KVM_RUN_LOAD_DEBUGCTL;
+ vcpu->arch.host_debugctl = debug_ctl;
+
guest_timing_enter_irqoff();
for (;;) {
@@ -10857,7 +10862,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
WARN_ON_ONCE((kvm_vcpu_apicv_activated(vcpu) != kvm_vcpu_apicv_active(vcpu)) &&
(kvm_get_apic_mode(vcpu) != LAPIC_MODE_DISABLED));
- exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu);
+ exit_fastpath = static_call(kvm_x86_vcpu_run)(vcpu, run_flags);
if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
break;
@@ -10869,6 +10874,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
break;
}
+ run_flags = 0;
+
/* Note, VM-Exits that go down the "slow" path are accounted below. */
++vcpu->stat.exits;
}
@@ -11460,6 +11467,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
if (kvm_mpx_supported())
kvm_load_guest_fpu(vcpu);
+ kvm_vcpu_srcu_read_lock(vcpu);
+
r = kvm_apic_accept_events(vcpu);
if (r < 0)
goto out;
@@ -11473,6 +11482,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
mp_state->mp_state = vcpu->arch.mp_state;
out:
+ kvm_vcpu_srcu_read_unlock(vcpu);
+
if (kvm_mpx_supported())
kvm_put_guest_fpu(vcpu);
vcpu_put(vcpu);
@@ -13376,16 +13387,22 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm *kvm = irqfd->kvm;
int ret;
- irqfd->producer = prod;
kvm_arch_start_assignment(irqfd->kvm);
+
+ spin_lock_irq(&kvm->irqfds.lock);
+ irqfd->producer = prod;
+
ret = static_call(kvm_x86_pi_update_irte)(irqfd->kvm,
prod->irq, irqfd->gsi, 1);
-
if (ret)
kvm_arch_end_assignment(irqfd->kvm);
+ spin_unlock_irq(&kvm->irqfds.lock);
+
+
return ret;
}
@@ -13395,9 +13412,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
int ret;
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm *kvm = irqfd->kvm;
WARN_ON(irqfd->producer != prod);
- irqfd->producer = NULL;
/*
* When producer of consumer is unregistered, we change back to
@@ -13405,11 +13422,18 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
* when the irq is masked/disabled or the consumer side (KVM
* int this case doesn't want to receive the interrupts.
*/
+ spin_lock_irq(&kvm->irqfds.lock);
+ irqfd->producer = NULL;
+
+
ret = static_call(kvm_x86_pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0);
if (ret)
printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
" fails: %d\n", irqfd->consumer.token, ret);
+ spin_unlock_irq(&kvm->irqfds.lock);
+
+
kvm_arch_end_assignment(irqfd->kvm);
}
@@ -13422,7 +13446,8 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
struct kvm_kernel_irq_routing_entry *new)
{
- if (new->type != KVM_IRQ_ROUTING_MSI)
+ if (old->type != KVM_IRQ_ROUTING_MSI ||
+ new->type != KVM_IRQ_ROUTING_MSI)
return true;
return !!memcmp(&old->msi, &new->msi, sizeof(new->msi));
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 9de72586f406..f3554bf05201 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -331,6 +331,18 @@ extern bool report_ignored_msrs;
extern bool eager_page_split;
+static inline void kvm_pr_unimpl_wrmsr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu, "Unhandled WRMSR(0x%x) = 0x%llx\n", msr, data);
+}
+
+static inline void kvm_pr_unimpl_rdmsr(struct kvm_vcpu *vcpu, u32 msr)
+{
+ if (report_ignored_msrs)
+ vcpu_unimpl(vcpu, "Unhandled RDMSR(0x%x)\n", msr);
+}
+
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
{
return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 684a39df60d9..8e38f976feb4 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1536,8 +1536,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm,
{
struct kvm_vcpu *vcpu;
- if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm))
- return -EINVAL;
+ /*
+ * Don't check for the port being within range of max_evtchn_port().
+ * Userspace can configure what ever targets it likes; events just won't
+ * be delivered if/while the target is invalid, just like userspace can
+ * configure MSIs which target non-existent APICs.
+ *
+ * This allow on Live Migration and Live Update, the IRQ routing table
+ * can be restored *independently* of other things like creating vCPUs,
+ * without imposing an ordering dependency on userspace. In this
+ * particular case, the problematic ordering would be with setting the
+ * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096
+ * instead of 1024 event channels.
+ */
/* We only support 2 level event channels for now */
if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 7880e2a7ec6a..e42661500094 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -258,6 +258,45 @@ SYM_FUNC_START(entry_untrain_ret)
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)
+#ifdef CONFIG_MITIGATION_ITS
+
+.macro ITS_THUNK reg
+
+SYM_INNER_LABEL(__x86_indirect_its_thunk_\reg, SYM_L_GLOBAL)
+ UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
+ ANNOTATE_RETPOLINE_SAFE
+ jmp *%\reg
+ int3
+ .align 32, 0xcc /* fill to the end of the line */
+ .skip 32, 0xcc /* skip to the next upper half */
+.endm
+
+/* ITS mitigation requires thunks be aligned to upper half of cacheline */
+.align 64, 0xcc
+.skip 32, 0xcc
+SYM_CODE_START(__x86_indirect_its_thunk_array)
+
+#define GEN(reg) ITS_THUNK reg
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+ .align 64, 0xcc
+SYM_CODE_END(__x86_indirect_its_thunk_array)
+
+.align 64, 0xcc
+.skip 32, 0xcc
+SYM_CODE_START(its_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+SYM_CODE_END(its_return_thunk)
+EXPORT_SYMBOL(its_return_thunk)
+
+#endif /* CONFIG_MITIGATION_ITS */
+
SYM_CODE_START(__x86_return_thunk)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 60814e110a54..fcb8e7af5b4d 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -121,13 +121,12 @@ static bool ex_handler_sgx(const struct exception_table_entry *fixup,
static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
struct pt_regs *regs)
{
- regs->ip = ex_fixup_addr(fixup);
-
WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
(void *)instruction_pointer(regs));
fpu_reset_from_exception_fixup();
- return true;
+
+ return ex_handler_default(fixup, regs);
}
static bool ex_handler_uaccess(const struct exception_table_entry *fixup,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ed861ef33f80..446bf7fbc325 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -263,28 +263,33 @@ static void __init probe_page_size_mask(void)
}
/*
- * INVLPG may not properly flush Global entries
- * on these CPUs when PCIDs are enabled.
+ * INVLPG may not properly flush Global entries on
+ * these CPUs. New microcode fixes the issue.
*/
static const struct x86_cpu_id invlpg_miss_ids[] = {
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 0),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 0),
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, 0),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, 0),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, 0),
- X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, 0),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 0x2e),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 0x42c),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, 0x11),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, 0x118),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, 0x4117),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, 0x2e),
{}
};
static void setup_pcid(void)
{
+ const struct x86_cpu_id *invlpg_miss_match;
+
if (!IS_ENABLED(CONFIG_X86_64))
return;
if (!boot_cpu_has(X86_FEATURE_PCID))
return;
- if (x86_match_cpu(invlpg_miss_ids)) {
+ invlpg_miss_match = x86_match_cpu(invlpg_miss_ids);
+
+ if (invlpg_miss_match &&
+ boot_cpu_data.microcode < invlpg_miss_match->driver_data) {
pr_info("Incomplete global flushes, disabling PCID");
setup_clear_cpu_cap(X86_FEATURE_PCID);
return;
@@ -649,8 +654,13 @@ static void __init memory_map_top_down(unsigned long map_start,
*/
addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start,
map_end);
- memblock_phys_free(addr, PMD_SIZE);
- real_end = addr + PMD_SIZE;
+ if (!addr) {
+ pr_warn("Failed to release memory for alloc_low_pages()");
+ real_end = max(map_start, ALIGN_DOWN(map_end, PMD_SIZE));
+ } else {
+ memblock_phys_free(addr, PMD_SIZE);
+ real_end = addr + PMD_SIZE;
+ }
/* step_size need to be small so pgt_buf from BRK could cover it */
step_size = PMD_SIZE;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 851711509d38..9d75b89fa257 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -959,9 +959,18 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
ret = __add_pages(nid, start_pfn, nr_pages, params);
WARN_ON_ONCE(ret);
- /* update max_pfn, max_low_pfn and high_memory */
- update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
- nr_pages << PAGE_SHIFT);
+ /*
+ * Special case: add_pages() is called by memremap_pages() for adding device
+ * private pages. Do not bump up max_pfn in the device private path,
+ * because max_pfn changes affect dma_addressing_limited().
+ *
+ * dma_addressing_limited() returning true when max_pfn is the device's
+ * addressable memory can force device drivers to use bounce buffers
+ * and impact their performance negatively:
+ */
+ if (!params->pgmap)
+ /* update max_pfn, max_low_pfn and high_memory */
+ update_end_of_memory_vars(start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
return ret;
}
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 230f1dee4f09..e0b0ec0f8245 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -109,8 +109,14 @@ void __init kernel_randomize_memory(void)
memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
- /* Adapt physical memory region size based on available memory */
- if (memory_tb < kaslr_regions[0].size_tb)
+ /*
+ * Adapt physical memory region size based on available memory,
+ * except when CONFIG_PCI_P2PDMA is enabled. P2PDMA exposes the
+ * device BAR space assuming the direct map space is large enough
+ * for creating a ZONE_DEVICE mapping in the direct map corresponding
+ * to the physical BAR address.
+ */
+ if (!IS_ENABLED(CONFIG_PCI_P2PDMA) && (memory_tb < kaslr_regions[0].size_tb))
kaslr_regions[0].size_tb = memory_tb;
/*
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index f17609884874..839d8a4a1cd0 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -588,7 +588,7 @@ void __head sme_enable(struct boot_params *bp)
out:
RIP_REL_REF(sme_me_mask) = me_mask;
- physical_mask &= ~me_mask;
- cc_vendor = CC_VENDOR_AMD;
+ RIP_REL_REF(physical_mask) &= ~me_mask;
+ RIP_REL_REF(cc_vendor) = CC_VENDOR_AMD;
cc_set_mask(me_mask);
}
diff --git a/arch/x86/mm/pat/cpa-test.c b/arch/x86/mm/pat/cpa-test.c
index 423b21e80929..55ce39b6b1b0 100644
--- a/arch/x86/mm/pat/cpa-test.c
+++ b/arch/x86/mm/pat/cpa-test.c
@@ -183,7 +183,7 @@ static int pageattr_test(void)
break;
case 1:
- err = change_page_attr_set(addrs, len[1], PAGE_CPA_TEST, 1);
+ err = change_page_attr_set(addrs, len[i], PAGE_CPA_TEST, 1);
break;
case 2:
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index b07e2167fceb..4918268d2007 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -385,9 +385,9 @@ static void cond_mitigation(struct task_struct *next)
prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_spec);
/*
- * Avoid user/user BTB poisoning by flushing the branch predictor
- * when switching between processes. This stops one process from
- * doing Spectre-v2 attacks on another.
+ * Avoid user->user BTB/RSB poisoning by flushing them when switching
+ * between processes. This stops one process from doing Spectre-v2
+ * attacks on another.
*
* Both, the conditional and the always IBPB mode use the mm
* pointer to avoid the IBPB when switching between tasks of the
@@ -617,7 +617,11 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
- /* Let nmi_uaccess_okay() know that we're changing CR3. */
+ /*
+ * Indicate that CR3 is about to change. nmi_uaccess_okay()
+ * and others are sensitive to the window where mm_cpumask(),
+ * CR3 and cpu_tlbstate.loaded_mm are not all in sync.
+ */
this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
barrier();
}
@@ -880,8 +884,16 @@ done:
static bool should_flush_tlb(int cpu, void *data)
{
+ struct mm_struct *loaded_mm = per_cpu(cpu_tlbstate.loaded_mm, cpu);
struct flush_tlb_info *info = data;
+ /*
+ * Order the 'loaded_mm' and 'is_lazy' against their
+ * write ordering in switch_mm_irqs_off(). Ensure
+ * 'is_lazy' is at least as new as 'loaded_mm'.
+ */
+ smp_rmb();
+
/* Lazy TLB will get flushed at the next context switch. */
if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
return false;
@@ -890,8 +902,15 @@ static bool should_flush_tlb(int cpu, void *data)
if (!info->mm)
return true;
+ /*
+ * While switching, the remote CPU could have state from
+ * either the prev or next mm. Assume the worst and flush.
+ */
+ if (loaded_mm == LOADED_MM_SWITCHING)
+ return true;
+
/* The target mm is loaded, and the CPU is not lazy. */
- if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm)
+ if (loaded_mm == info->mm)
return true;
/* In cpumask, but not the loaded mm? Periodically remove by flushing. */
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 92db785a0a8e..f3068bb53c4d 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -36,6 +36,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+#define EMIT5(b1, b2, b3, b4, b5) \
+ do { EMIT1(b1); EMIT4(b2, b3, b4, b5); } while (0)
#define EMIT1_off32(b1, off) \
do { EMIT1(b1); EMIT(off, 4); } while (0)
@@ -462,7 +464,11 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
{
u8 *prog = *pprog;
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
+ if (IS_ENABLED(CONFIG_MITIGATION_ITS) &&
+ cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) {
+ OPTIMIZER_HIDE_VAR(reg);
+ emit_jump(&prog, its_static_thunk(reg), ip);
+ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
@@ -481,7 +487,7 @@ static void emit_return(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ if (cpu_wants_rethunk()) {
emit_jump(&prog, x86_return_thunk, ip);
} else {
EMIT1(0xC3); /* ret */
@@ -947,6 +953,47 @@ static void emit_nops(u8 **pprog, int len)
#define RESTORE_TAIL_CALL_CNT(stack) \
EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8)
+static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
+ struct bpf_prog *bpf_prog)
+{
+ u8 *prog = *pprog;
+ u8 *func;
+
+ if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) {
+ /* The clearing sequence clobbers eax and ecx. */
+ EMIT1(0x50); /* push rax */
+ EMIT1(0x51); /* push rcx */
+ ip += 2;
+
+ func = (u8 *)clear_bhb_loop;
+
+ if (emit_call(&prog, func, ip))
+ return -EINVAL;
+ EMIT1(0x59); /* pop rcx */
+ EMIT1(0x58); /* pop rax */
+ }
+ /* Insert IBHF instruction */
+ if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) &&
+ cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) ||
+ cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW)) {
+ /*
+ * Add an Indirect Branch History Fence (IBHF). IBHF acts as a
+ * fence preventing branch history from before the fence from
+ * affecting indirect branches after the fence. This is
+ * specifically used in cBPF jitted code to prevent Intra-mode
+ * BHI attacks. The IBHF instruction is designed to be a NOP on
+ * hardware that doesn't need or support it. The REP and REX.W
+ * prefixes are required by the microcode, and they also ensure
+ * that the NOP is unlikely to be used in existing code.
+ *
+ * IBHF is not a valid instruction in 32-bit mode.
+ */
+ EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */
+ }
+ *pprog = prog;
+ return 0;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
@@ -1760,6 +1807,15 @@ emit_jmp:
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
+
+ if (bpf_prog_was_classic(bpf_prog) &&
+ !capable(CAP_SYS_ADMIN)) {
+ u8 *ip = image + addrs[i - 1];
+
+ if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog))
+ return -EINVAL;
+ }
+
pop_callee_regs(&prog, callee_regs_used);
EMIT1(0xC9); /* leave */
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index 7fe564eaf228..9e6a27018d0f 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -100,7 +100,12 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
xor %edx, %edx
wrmsr
- call xen_prepare_pvh
+ /* Call xen_prepare_pvh() via the kernel virtual mapping */
+ leaq xen_prepare_pvh(%rip), %rax
+ subq phys_base(%rip), %rax
+ addq $__START_KERNEL_map, %rax
+ ANNOTATE_RETPOLINE_SAFE
+ call *%rax
/* startup_64 expects boot_params in %rsi. */
mov $_pa(pvh_bootparams), %rsi
diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c
index 472540aeabc2..08cd913cbd4e 100644
--- a/arch/x86/tools/insn_decoder_test.c
+++ b/arch/x86/tools/insn_decoder_test.c
@@ -10,8 +10,7 @@
#include <assert.h>
#include <unistd.h>
#include <stdarg.h>
-
-#define unlikely(cond) (cond)
+#include <linux/kallsyms.h>
#include <asm/insn.h>
#include <inat.c>
@@ -106,7 +105,7 @@ static void parse_args(int argc, char **argv)
}
}
-#define BUFSIZE 256
+#define BUFSIZE (256 + KSYM_NAME_LEN)
int main(int argc, char **argv)
{
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index b07824500363..ddc144657efa 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -20,6 +20,9 @@
*/
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+/* Do not call this directly. Declared for export type visibility. */
+extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
+
/**
* csum_fold - Fold and invert a 32bit checksum.
* sum: 32bit unfolded sum
diff --git a/arch/x86/um/os-Linux/mcontext.c b/arch/x86/um/os-Linux/mcontext.c
index 49c3744cac37..81b9d1f9f4e6 100644
--- a/arch/x86/um/os-Linux/mcontext.c
+++ b/arch/x86/um/os-Linux/mcontext.c
@@ -26,7 +26,6 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
COPY(RIP);
COPY2(EFLAGS, EFL);
COPY2(CS, CSGSFS);
- regs->gp[CS / sizeof(unsigned long)] &= 0xffff;
- regs->gp[CS / sizeof(unsigned long)] |= 3;
+ regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48;
#endif
}