summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2021-10-31 09:28:48 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2021-10-31 09:28:48 +0300
commit4e3386843325299df13069a1c94e27237b12be51 (patch)
treeb1cf4c009b01eee0d017e3c01acc7a7495adcc46 /arch
parente59f3e5d4521cb95233e03ece48772e9161cbfd3 (diff)
parent5a2acbbb0179a7ffbb5440b9fa46689f619705ac (diff)
downloadlinux-4e3386843325299df13069a1c94e27237b12be51.tar.xz
Merge tag 'kvmarm-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for Linux 5.16 - More progress on the protected VM front, now with the full fixed feature set as well as the limitation of some hypercalls after initialisation. - Cleanup of the RAZ/WI sysreg handling, which was pointlessly complicated - Fixes for the vgic placement in the IPA space, together with a bunch of selftests - More memcg accounting of the memory allocated on behalf of a guest - Timer and vgic selftests - Workarounds for the Apple M1 broken vgic implementation - KConfig cleanups - New kvmarm.mode=none option, for those who really dislike us
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/boot/dts/qcom/ipq8074.dtsi2
-rw-r--r--arch/arm64/include/asm/acpi.h3
-rw-r--r--arch/arm64/include/asm/assembler.h5
-rw-r--r--arch/arm64/include/asm/kvm_arm.h1
-rw-r--r--arch/arm64/include/asm/kvm_asm.h48
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h5
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h5
-rw-r--r--arch/arm64/include/asm/mte.h6
-rw-r--r--arch/arm64/include/asm/string.h2
-rw-r--r--arch/arm64/include/asm/sysreg.h3
-rw-r--r--arch/arm64/kernel/acpi.c19
-rw-r--r--arch/arm64/kernel/cpufeature.c8
-rw-r--r--arch/arm64/kernel/mte.c10
-rw-r--r--arch/arm64/kernel/smp.c3
-rw-r--r--arch/arm64/kvm/Kconfig10
-rw-r--r--arch/arm64/kvm/arm.c94
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/fault.h75
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h235
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h200
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/trap_handler.h2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S26
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c48
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c11
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c185
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c99
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c487
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c22
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c16
-rw-r--r--arch/arm64/kvm/mmu.c2
-rw-r--r--arch/arm64/kvm/pmu-emul.c2
-rw-r--r--arch/arm64/kvm/reset.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c41
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-irqfd.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c18
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c25
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c8
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c27
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic.h5
-rw-r--r--arch/arm64/lib/strcmp.S2
-rw-r--r--arch/arm64/lib/strncmp.S2
-rw-r--r--arch/m68k/68000/entry.S4
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/m68k/coldfire/entry.S4
-rw-r--r--arch/m68k/include/asm/processor.h31
-rw-r--r--arch/m68k/include/asm/segment.h59
-rw-r--r--arch/m68k/include/asm/thread_info.h3
-rw-r--r--arch/m68k/include/asm/tlbflush.h11
-rw-r--r--arch/m68k/include/asm/traps.h4
-rw-r--r--arch/m68k/include/asm/uaccess.h215
-rw-r--r--arch/m68k/kernel/asm-offsets.c2
-rw-r--r--arch/m68k/kernel/entry.S58
-rw-r--r--arch/m68k/kernel/process.c4
-rw-r--r--arch/m68k/kernel/signal.c199
-rw-r--r--arch/m68k/kernel/traps.c13
-rw-r--r--arch/m68k/mac/misc.c1
-rw-r--r--arch/m68k/mm/cache.c25
-rw-r--r--arch/m68k/mm/init.c6
-rw-r--r--arch/m68k/mm/kmap.c1
-rw-r--r--arch/m68k/mm/memory.c1
-rw-r--r--arch/m68k/mm/motorola.c2
-rw-r--r--arch/m68k/sun3/config.c3
-rw-r--r--arch/m68k/sun3/mmu_emu.c6
-rw-r--r--arch/m68k/sun3/sun3ints.c1
-rw-r--r--arch/m68k/sun3x/prom.c1
-rw-r--r--arch/mips/net/bpf_jit.c57
-rw-r--r--arch/nios2/Kconfig.debug3
-rw-r--r--arch/nios2/kernel/setup.c2
-rw-r--r--arch/s390/include/asm/ccwgroup.h2
-rw-r--r--arch/s390/net/bpf_jit_comp.c70
-rw-r--r--arch/sh/include/asm/pgtable-3level.h2
-rw-r--r--arch/sparc/lib/iomap.c2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/crypto/sm4-aesni-avx-asm_64.S5
-rw-r--r--arch/x86/events/core.c1
-rw-r--r--arch/x86/events/intel/core.c1
-rw-r--r--arch/x86/include/asm/pkeys.h2
-rw-r--r--arch/x86/include/asm/special_insns.h2
-rw-r--r--arch/x86/include/asm/xen/swiotlb-xen.h6
-rw-r--r--arch/x86/kernel/setup.c26
-rw-r--r--arch/x86/kvm/emulate.c1
-rw-r--r--arch/x86/lib/insn.c4
-rw-r--r--arch/x86/mm/fault.c26
-rw-r--r--arch/x86/net/bpf_jit_comp.c66
-rw-r--r--arch/x86/xen/enlighten_pv.c15
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c4
-rw-r--r--arch/x86/xen/smp_pv.c4
92 files changed, 1953 insertions, 783 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5c7ae4c3954b..0d921d21fd35 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -186,6 +186,7 @@ config ARM64
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_KVM
select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi
index a620ac0d0b19..db333001df4d 100644
--- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi
@@ -487,7 +487,6 @@
interrupts = <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>;
phys = <&qusb_phy_0>, <&usb0_ssphy>;
phy-names = "usb2-phy", "usb3-phy";
- tx-fifo-resize;
snps,is-utmi-l1-suspend;
snps,hird-threshold = /bits/ 8 <0x0>;
snps,dis_u2_susphy_quirk;
@@ -528,7 +527,6 @@
interrupts = <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>;
phys = <&qusb_phy_1>, <&usb1_ssphy>;
phy-names = "usb2-phy", "usb3-phy";
- tx-fifo-resize;
snps,is-utmi-l1-suspend;
snps,hird-threshold = /bits/ 8 <0x0>;
snps,dis_u2_susphy_quirk;
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 7535dc7cc5aa..bd68e1b7f29f 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -50,9 +50,6 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
#define acpi_os_ioremap acpi_os_ioremap
-void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size);
-#define acpi_os_memmap acpi_os_memmap
-
typedef u64 phys_cpuid_t;
#define PHYS_CPUID_INVALID INVALID_HWID
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 89faca0e740d..bfa58409a4d4 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -525,6 +525,11 @@ alternative_endif
#define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name)
#endif
+#ifdef CONFIG_KASAN_HW_TAGS
+#define EXPORT_SYMBOL_NOHWKASAN(name)
+#else
+#define EXPORT_SYMBOL_NOHWKASAN(name) EXPORT_SYMBOL_NOKASAN(name)
+#endif
/*
* Emit a 64-bit absolute little endian symbol reference in a way that
* ensures that it will be resolved at build time, even when building a
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 327120c0089f..a39fcf318c77 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -295,6 +295,7 @@
#define MDCR_EL2_HPMFZO (UL(1) << 29)
#define MDCR_EL2_MTPME (UL(1) << 28)
#define MDCR_EL2_TDCC (UL(1) << 27)
+#define MDCR_EL2_HLP (UL(1) << 26)
#define MDCR_EL2_HCCD (UL(1) << 23)
#define MDCR_EL2_TTRF (UL(1) << 19)
#define MDCR_EL2_HPMD (UL(1) << 17)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index e86045ac43ba..0167f3702c61 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -44,31 +44,39 @@
#define KVM_HOST_SMCCC_FUNC(name) KVM_HOST_SMCCC_ID(__KVM_HOST_SMCCC_FUNC_##name)
#define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0
-#define __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run 1
-#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2
-#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3
-#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4
-#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context 5
-#define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6
-#define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7
-#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8
-#define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9
-#define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10
-#define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11
-#define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12
-#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13
-#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14
-#define __KVM_HOST_SMCCC_FUNC___pkvm_init 15
-#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp 16
-#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17
-#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18
-#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
-#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20
#ifndef __ASSEMBLY__
#include <linux/mm.h>
+enum __kvm_host_smccc_func {
+ /* Hypercalls available only prior to pKVM finalisation */
+ /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */
+ __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init,
+ __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping,
+ __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector,
+ __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config,
+ __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize,
+
+ /* Hypercalls available after pKVM finalisation */
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
+ __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
+ __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
+ __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
+ __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
+ __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
+ __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
+};
+
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
#define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[]
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fd418955e31e..f4871e47b2d0 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -396,7 +396,10 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
if (vcpu_mode_is_32bit(vcpu))
return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT);
- return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+ if (vcpu_mode_priv(vcpu))
+ return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE);
+ else
+ return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E);
}
static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 369c30e28301..4be8486042a7 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -58,6 +58,7 @@
enum kvm_mode {
KVM_MODE_DEFAULT,
KVM_MODE_PROTECTED,
+ KVM_MODE_NONE,
};
enum kvm_mode kvm_get_mode(void);
@@ -779,6 +780,8 @@ static inline bool kvm_vm_is_protected(struct kvm *kvm)
return false;
}
+void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
+
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 657d0c94cf82..5afd14ab15b9 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -115,7 +115,12 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
#endif
+extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
+extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 3f93b9e0b339..02511650cffe 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -99,11 +99,17 @@ void mte_check_tfsr_el1(void);
static inline void mte_check_tfsr_entry(void)
{
+ if (!system_supports_mte())
+ return;
+
mte_check_tfsr_el1();
}
static inline void mte_check_tfsr_exit(void)
{
+ if (!system_supports_mte())
+ return;
+
/*
* The asynchronous faults are sync'ed automatically with
* TFSR_EL1 on kernel entry but for exit an explicit dsb()
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 3a3264ff47b9..95f7686b728d 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -12,11 +12,13 @@ extern char *strrchr(const char *, int c);
#define __HAVE_ARCH_STRCHR
extern char *strchr(const char *, int c);
+#ifndef CONFIG_KASAN_HW_TAGS
#define __HAVE_ARCH_STRCMP
extern int strcmp(const char *, const char *);
#define __HAVE_ARCH_STRNCMP
extern int strncmp(const char *, const char *, __kernel_size_t);
+#endif
#define __HAVE_ARCH_STRLEN
extern __kernel_size_t strlen(const char *);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index b268082d67ed..9412a645a1c0 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1152,6 +1152,7 @@
#define ICH_HCR_TC (1 << 10)
#define ICH_HCR_TALL0 (1 << 11)
#define ICH_HCR_TALL1 (1 << 12)
+#define ICH_HCR_TDIR (1 << 14)
#define ICH_HCR_EOIcount_SHIFT 27
#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT)
@@ -1184,6 +1185,8 @@
#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)
#define ICH_VTR_A3V_SHIFT 21
#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)
+#define ICH_VTR_TDS_SHIFT 19
+#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT)
#define ARM64_FEATURE_FIELD_BITS 4
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 1c9c2f7a1c04..f3851724fe35 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -273,8 +273,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
return __pgprot(PROT_DEVICE_nGnRnE);
}
-static void __iomem *__acpi_os_ioremap(acpi_physical_address phys,
- acpi_size size, bool memory)
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
{
efi_memory_desc_t *md, *region = NULL;
pgprot_t prot;
@@ -300,11 +299,9 @@ static void __iomem *__acpi_os_ioremap(acpi_physical_address phys,
* It is fine for AML to remap regions that are not represented in the
* EFI memory map at all, as it only describes normal memory, and MMIO
* regions that require a virtual mapping to make them accessible to
- * the EFI runtime services. Determine the region default
- * attributes by checking the requested memory semantics.
+ * the EFI runtime services.
*/
- prot = memory ? __pgprot(PROT_NORMAL_NC) :
- __pgprot(PROT_DEVICE_nGnRnE);
+ prot = __pgprot(PROT_DEVICE_nGnRnE);
if (region) {
switch (region->type) {
case EFI_LOADER_CODE:
@@ -364,16 +361,6 @@ static void __iomem *__acpi_os_ioremap(acpi_physical_address phys,
return __ioremap(phys, size, prot);
}
-void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
-{
- return __acpi_os_ioremap(phys, size, false);
-}
-
-void __iomem *acpi_os_memmap(acpi_physical_address phys, acpi_size size)
-{
- return __acpi_os_ioremap(phys, size, true);
-}
-
/*
* Claim Synchronous External Aborts as a firmware first notification.
*
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f8a3067d10c6..6ec7036ef7e1 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1526,9 +1526,13 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
/*
* For reasons that aren't entirely clear, enabling KPTI on Cavium
* ThunderX leads to apparent I-cache corruption of kernel text, which
- * ends as well as you might imagine. Don't even try.
+ * ends as well as you might imagine. Don't even try. We cannot rely
+ * on the cpus_have_*cap() helpers here to detect the CPU erratum
+ * because cpucap detection order may change. However, since we know
+ * affected CPUs are always in a homogeneous configuration, it is
+ * safe to rely on this_cpu_has_cap() here.
*/
- if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_27456)) {
+ if (this_cpu_has_cap(ARM64_WORKAROUND_CAVIUM_27456)) {
str = "ARM64_WORKAROUND_CAVIUM_27456";
__kpti_forced = -1;
}
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 9d314a3bad3b..e5e801bc5312 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -142,12 +142,7 @@ void mte_enable_kernel_async(void)
#ifdef CONFIG_KASAN_HW_TAGS
void mte_check_tfsr_el1(void)
{
- u64 tfsr_el1;
-
- if (!system_supports_mte())
- return;
-
- tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1);
+ u64 tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1);
if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) {
/*
@@ -199,6 +194,9 @@ void mte_thread_init_user(void)
void mte_thread_switch(struct task_struct *next)
{
+ if (!system_supports_mte())
+ return;
+
mte_update_sctlr_user(next);
/*
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6f6ff072acbd..44369b99a57e 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -1128,5 +1128,6 @@ bool cpus_are_stuck_in_kernel(void)
{
bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
- return !!cpus_stuck_in_kernel || smp_spin_tables;
+ return !!cpus_stuck_in_kernel || smp_spin_tables ||
+ is_protected_kvm_enabled();
}
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index d7eec0b43744..8ffcbe29395e 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -4,6 +4,7 @@
#
source "virt/lib/Kconfig"
+source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
@@ -19,7 +20,7 @@ if VIRTUALIZATION
menuconfig KVM
bool "Kernel-based Virtual Machine (KVM) support"
- depends on OF
+ depends on HAVE_KVM
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -43,12 +44,9 @@ menuconfig KVM
If unsure, say N.
-if KVM
-
-source "virt/kvm/Kconfig"
-
config NVHE_EL2_DEBUG
bool "Debug mode for non-VHE EL2 object"
+ depends on KVM
help
Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
Failure reports will BUG() in the hypervisor. This is intended for
@@ -56,6 +54,4 @@ config NVHE_EL2_DEBUG
If unsure, say N.
-endif # KVM
-
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 7838e9fb693e..f5490afe1ebf 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -291,10 +291,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
struct kvm *kvm_arch_alloc_vm(void)
{
+ size_t sz = sizeof(struct kvm);
+
if (!has_vhe())
- return kzalloc(sizeof(struct kvm), GFP_KERNEL);
+ return kzalloc(sz, GFP_KERNEL_ACCOUNT);
- return vzalloc(sizeof(struct kvm));
+ return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO);
}
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
@@ -612,6 +614,14 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
ret = kvm_arm_pmu_v3_enable(vcpu);
+ /*
+ * Initialize traps for protected VMs.
+ * NOTE: Move to run in EL2 directly, rather than via a hypercall, once
+ * the code is in place for first run initialization at EL2.
+ */
+ if (kvm_vm_is_protected(kvm))
+ kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
+
return ret;
}
@@ -1571,25 +1581,33 @@ static void cpu_set_hyp_vector(void)
kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot);
}
-static void cpu_hyp_reinit(void)
+static void cpu_hyp_init_context(void)
{
kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
- cpu_hyp_reset();
-
- if (is_kernel_in_hyp_mode())
- kvm_timer_init_vhe();
- else
+ if (!is_kernel_in_hyp_mode())
cpu_init_hyp_mode();
+}
+static void cpu_hyp_init_features(void)
+{
cpu_set_hyp_vector();
-
kvm_arm_init_debug();
+ if (is_kernel_in_hyp_mode())
+ kvm_timer_init_vhe();
+
if (vgic_present)
kvm_vgic_init_cpu_hardware();
}
+static void cpu_hyp_reinit(void)
+{
+ cpu_hyp_reset();
+ cpu_hyp_init_context();
+ cpu_hyp_init_features();
+}
+
static void _kvm_arch_hardware_enable(void *discard)
{
if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
@@ -1780,10 +1798,17 @@ static int do_pkvm_init(u32 hyp_va_bits)
int ret;
preempt_disable();
- hyp_install_host_vector();
+ cpu_hyp_init_context();
ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size,
num_possible_cpus(), kern_hyp_va(per_cpu_base),
hyp_va_bits);
+ cpu_hyp_init_features();
+
+ /*
+ * The stub hypercalls are now disabled, so set our local flag to
+ * prevent a later re-init attempt in kvm_arch_hardware_enable().
+ */
+ __this_cpu_write(kvm_arm_hardware_enabled, 1);
preempt_enable();
return ret;
@@ -1794,8 +1819,13 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
void *addr = phys_to_virt(hyp_mem_base);
int ret;
+ kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+ kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
+ kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
+ kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+ kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
if (ret)
@@ -1963,9 +1993,25 @@ out_err:
return err;
}
-static void _kvm_host_prot_finalize(void *discard)
+static void _kvm_host_prot_finalize(void *arg)
{
- WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
+ int *err = arg;
+
+ if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
+ WRITE_ONCE(*err, -EINVAL);
+}
+
+static int pkvm_drop_host_privileges(void)
+{
+ int ret = 0;
+
+ /*
+ * Flip the static key upfront as that may no longer be possible
+ * once the host stage 2 is installed.
+ */
+ static_branch_enable(&kvm_protected_mode_initialized);
+ on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
+ return ret;
}
static int finalize_hyp_mode(void)
@@ -1979,15 +2025,7 @@ static int finalize_hyp_mode(void)
* None of other sections should ever be introspected.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
-
- /*
- * Flip the static key upfront as that may no longer be possible
- * once the host stage 2 is installed.
- */
- static_branch_enable(&kvm_protected_mode_initialized);
- on_each_cpu(_kvm_host_prot_finalize, NULL, 1);
-
- return 0;
+ return pkvm_drop_host_privileges();
}
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
@@ -2056,6 +2094,11 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}
+ if (kvm_get_mode() == KVM_MODE_NONE) {
+ kvm_info("KVM disabled from command line\n");
+ return -ENODEV;
+ }
+
in_hyp_mode = is_kernel_in_hyp_mode();
if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
@@ -2129,8 +2172,15 @@ static int __init early_kvm_mode_cfg(char *arg)
return 0;
}
- if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode()))
+ if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) {
+ kvm_mode = KVM_MODE_DEFAULT;
+ return 0;
+ }
+
+ if (strcmp(arg, "none") == 0) {
+ kvm_mode = KVM_MODE_NONE;
return 0;
+ }
return -EINVAL;
}
diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
new file mode 100644
index 000000000000..1b8a2dcd712f
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ */
+
+#ifndef __ARM64_KVM_HYP_FAULT_H__
+#define __ARM64_KVM_HYP_FAULT_H__
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
+{
+ u64 par, tmp;
+
+ /*
+ * Resolve the IPA the hard way using the guest VA.
+ *
+ * Stage-1 translation already validated the memory access
+ * rights. As such, we can use the EL1 translation regime, and
+ * don't have to distinguish between EL0 and EL1 access.
+ *
+ * We do need to save/restore PAR_EL1 though, as we haven't
+ * saved the guest context yet, and we may return early...
+ */
+ par = read_sysreg_par();
+ if (!__kvm_at("s1e1r", far))
+ tmp = read_sysreg_par();
+ else
+ tmp = SYS_PAR_EL1_F; /* back to the guest */
+ write_sysreg(par, par_el1);
+
+ if (unlikely(tmp & SYS_PAR_EL1_F))
+ return false; /* Translation failed, back to guest */
+
+ /* Convert PAR to HPFAR format */
+ *hpfar = PAR_TO_HPFAR(tmp);
+ return true;
+}
+
+static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
+{
+ u64 hpfar, far;
+
+ far = read_sysreg_el2(SYS_FAR);
+
+ /*
+ * The HPFAR can be invalid if the stage 2 fault did not
+ * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
+ * bit is clear) and one of the two following cases are true:
+ * 1. The fault was due to a permission fault
+ * 2. The processor carries errata 834220
+ *
+ * Therefore, for all non S1PTW faults where we either have a
+ * permission fault or the errata workaround is enabled, we
+ * resolve the IPA using the AT instruction.
+ */
+ if (!(esr & ESR_ELx_S1PTW) &&
+ (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
+ (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+ if (!__translate_far_to_hpfar(far, &hpfar))
+ return false;
+ } else {
+ hpfar = read_sysreg(hpfar_el2);
+ }
+
+ fault->far_el2 = far;
+ fault->hpfar_el2 = hpfar;
+ return true;
+}
+
+#endif
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index a0e78a6027be..c6e98c7e918b 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -8,6 +8,7 @@
#define __ARM64_KVM_HYP_SWITCH_H__
#include <hyp/adjust_pc.h>
+#include <hyp/fault.h>
#include <linux/arm-smccc.h>
#include <linux/kvm_host.h>
@@ -133,78 +134,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
}
}
-static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
-{
- u64 par, tmp;
-
- /*
- * Resolve the IPA the hard way using the guest VA.
- *
- * Stage-1 translation already validated the memory access
- * rights. As such, we can use the EL1 translation regime, and
- * don't have to distinguish between EL0 and EL1 access.
- *
- * We do need to save/restore PAR_EL1 though, as we haven't
- * saved the guest context yet, and we may return early...
- */
- par = read_sysreg_par();
- if (!__kvm_at("s1e1r", far))
- tmp = read_sysreg_par();
- else
- tmp = SYS_PAR_EL1_F; /* back to the guest */
- write_sysreg(par, par_el1);
-
- if (unlikely(tmp & SYS_PAR_EL1_F))
- return false; /* Translation failed, back to guest */
-
- /* Convert PAR to HPFAR format */
- *hpfar = PAR_TO_HPFAR(tmp);
- return true;
-}
-
-static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
-{
- u64 hpfar, far;
-
- far = read_sysreg_el2(SYS_FAR);
-
- /*
- * The HPFAR can be invalid if the stage 2 fault did not
- * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
- * bit is clear) and one of the two following cases are true:
- * 1. The fault was due to a permission fault
- * 2. The processor carries errata 834220
- *
- * Therefore, for all non S1PTW faults where we either have a
- * permission fault or the errata workaround is enabled, we
- * resolve the IPA using the AT instruction.
- */
- if (!(esr & ESR_ELx_S1PTW) &&
- (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
- (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
- if (!__translate_far_to_hpfar(far, &hpfar))
- return false;
- } else {
- hpfar = read_sysreg(hpfar_el2);
- }
-
- fault->far_el2 = far;
- fault->hpfar_el2 = hpfar;
- return true;
-}
-
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
{
- u8 ec;
- u64 esr;
-
- esr = vcpu->arch.fault.esr_el2;
- ec = ESR_ELx_EC(esr);
-
- if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
- return true;
-
- return __get_fault_info(esr, &vcpu->arch.fault);
+ return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault);
}
static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
@@ -225,8 +157,13 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
-/* Check for an FPSIMD/SVE trap and handle as appropriate */
-static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
+/*
+ * We trap the first access to the FP/SIMD to save the host context and
+ * restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an undefined
+ * instruction exception to the guest. Similarly for trapped SVE accesses.
+ */
+static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest, sve_host;
u8 esr_ec;
@@ -244,9 +181,6 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
}
esr_ec = kvm_vcpu_trap_get_class(vcpu);
- if (esr_ec != ESR_ELx_EC_FP_ASIMD &&
- esr_ec != ESR_ELx_EC_SVE)
- return false;
/* Don't handle SVE traps for non-SVE vcpus here: */
if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
@@ -348,14 +282,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
static inline bool esr_is_ptrauth_trap(u32 esr)
{
- u32 ec = ESR_ELx_EC(esr);
-
- if (ec == ESR_ELx_EC_PAC)
- return true;
-
- if (ec != ESR_ELx_EC_SYS64)
- return false;
-
switch (esr_sys64_to_sysreg(esr)) {
case SYS_APIAKEYLO_EL1:
case SYS_APIAKEYHI_EL1:
@@ -384,13 +310,12 @@ static inline bool esr_is_ptrauth_trap(u32 esr)
DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
-static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
+static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
{
struct kvm_cpu_context *ctxt;
u64 val;
- if (!vcpu_has_ptrauth(vcpu) ||
- !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
+ if (!vcpu_has_ptrauth(vcpu))
return false;
ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
@@ -409,6 +334,90 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu)
return true;
}
+static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ handle_tx2_tvm(vcpu))
+ return true;
+
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ __vgic_v3_perform_cpuif_access(vcpu) == 1)
+ return true;
+
+ if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
+ return kvm_hyp_handle_ptrauth(vcpu, exit_code);
+
+ return false;
+}
+
+static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+ __vgic_v3_perform_cpuif_access(vcpu) == 1)
+ return true;
+
+ return false;
+}
+
+static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ return false;
+}
+
+static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ if (!__populate_fault_info(vcpu))
+ return true;
+
+ if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
+ bool valid;
+
+ valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
+ kvm_vcpu_dabt_isvalid(vcpu) &&
+ !kvm_vcpu_abt_issea(vcpu) &&
+ !kvm_vcpu_abt_iss1tw(vcpu);
+
+ if (valid) {
+ int ret = __vgic_v2_perform_cpuif_access(vcpu);
+
+ if (ret == 1)
+ return true;
+
+ /* Promote an illegal access to an SError.*/
+ if (ret == -1)
+ *exit_code = ARM_EXCEPTION_EL1_SERROR;
+ }
+ }
+
+ return false;
+}
+
+typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
+
+static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
+
+/*
+ * Allow the hypervisor to handle the exit with an exit handler if it has one.
+ *
+ * Returns true if the hypervisor handled the exit, and control should go back
+ * to the guest, or false if it hasn't.
+ */
+static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu);
+ exit_handler_fn fn;
+
+ fn = handlers[kvm_vcpu_trap_get_class(vcpu)];
+
+ if (fn)
+ return fn(vcpu, exit_code);
+
+ return false;
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
@@ -443,59 +452,9 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (*exit_code != ARM_EXCEPTION_TRAP)
goto exit;
- if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
- handle_tx2_tvm(vcpu))
+ /* Check if there's an exit handler and allow it to handle the exit. */
+ if (kvm_hyp_handle_exit(vcpu, exit_code))
goto guest;
-
- /*
- * We trap the first access to the FP/SIMD to save the host context
- * and restore the guest context lazily.
- * If FP/SIMD is not implemented, handle the trap and inject an
- * undefined instruction exception to the guest.
- * Similarly for trapped SVE accesses.
- */
- if (__hyp_handle_fpsimd(vcpu))
- goto guest;
-
- if (__hyp_handle_ptrauth(vcpu))
- goto guest;
-
- if (!__populate_fault_info(vcpu))
- goto guest;
-
- if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
- bool valid;
-
- valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
- kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
- kvm_vcpu_dabt_isvalid(vcpu) &&
- !kvm_vcpu_abt_issea(vcpu) &&
- !kvm_vcpu_abt_iss1tw(vcpu);
-
- if (valid) {
- int ret = __vgic_v2_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- goto guest;
-
- /* Promote an illegal access to an SError.*/
- if (ret == -1)
- *exit_code = ARM_EXCEPTION_EL1_SERROR;
-
- goto exit;
- }
- }
-
- if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
- kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
- int ret = __vgic_v3_perform_cpuif_access(vcpu);
-
- if (ret == 1)
- goto guest;
- }
-
exit:
/* Return to the host kernel and handle the exit */
return false;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
new file mode 100644
index 000000000000..eea1f6a53723
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Google LLC
+ * Author: Fuad Tabba <tabba@google.com>
+ */
+
+#ifndef __ARM64_KVM_FIXED_CONFIG_H__
+#define __ARM64_KVM_FIXED_CONFIG_H__
+
+#include <asm/sysreg.h>
+
+/*
+ * This file contains definitions for features to be allowed or restricted for
+ * guest virtual machines, depending on the mode KVM is running in and on the
+ * type of guest that is running.
+ *
+ * The ALLOW masks represent a bitmask of feature fields that are allowed
+ * without any restrictions as long as they are supported by the system.
+ *
+ * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for
+ * features that are restricted to support at most the specified feature.
+ *
+ * If a feature field is not present in either, than it is not supported.
+ *
+ * The approach taken for protected VMs is to allow features that are:
+ * - Needed by common Linux distributions (e.g., floating point)
+ * - Trivial to support, e.g., supporting the feature does not introduce or
+ * require tracking of additional state in KVM
+ * - Cannot be trapped or prevent the guest from using anyway
+ */
+
+/*
+ * Allow for protected VMs:
+ * - Floating-point and Advanced SIMD
+ * - Data Independent Timing
+ */
+#define PVM_ID_AA64PFR0_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
+ )
+
+/*
+ * Restrict to the following *unsigned* features for protected VMs:
+ * - AArch64 guests only (no support for AArch32 guests):
+ * AArch32 adds complexity in trap handling, emulation, condition codes,
+ * etc...
+ * - RAS (v1)
+ * Supported by KVM
+ */
+#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Branch Target Identification
+ * - Speculative Store Bypassing
+ */
+#define PVM_ID_AA64PFR1_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
+ ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Mixed-endian
+ * - Distinction between Secure and Non-secure Memory
+ * - Mixed-endian at EL0 only
+ * - Non-context synchronizing exception entry and exit
+ */
+#define PVM_ID_AA64MMFR0_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
+ )
+
+/*
+ * Restrict to the following *unsigned* features for protected VMs:
+ * - 40-bit IPA
+ * - 16-bit ASID
+ */
+#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
+ FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Hardware translation table updates to Access flag and Dirty state
+ * - Number of VMID bits from CPU
+ * - Hierarchical Permission Disables
+ * - Privileged Access Never
+ * - SError interrupt exceptions from speculative reads
+ * - Enhanced Translation Synchronization
+ */
+#define PVM_ID_AA64MMFR1_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
+ )
+
+/*
+ * Allow for protected VMs:
+ * - Common not Private translations
+ * - User Access Override
+ * - IESB bit in the SCTLR_ELx registers
+ * - Unaligned single-copy atomicity and atomic functions
+ * - ESR_ELx.EC value on an exception by read access to feature ID space
+ * - TTL field in address operations.
+ * - Break-before-make sequences when changing translation block size
+ * - E0PDx mechanism
+ */
+#define PVM_ID_AA64MMFR2_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
+ ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
+ )
+
+/*
+ * No support for Scalable Vectors for protected VMs:
+ * Requires additional support from KVM, e.g., context-switching and
+ * trapping at EL2
+ */
+#define PVM_ID_AA64ZFR0_ALLOW (0ULL)
+
+/*
+ * No support for debug, including breakpoints, and watchpoints for protected
+ * VMs:
+ * The Arm architecture mandates support for at least the Armv8 debug
+ * architecture, which would include at least 2 hardware breakpoints and
+ * watchpoints. Providing that support to protected guests adds
+ * considerable state and complexity. Therefore, the reserved value of 0 is
+ * used for debug-related fields.
+ */
+#define PVM_ID_AA64DFR0_ALLOW (0ULL)
+#define PVM_ID_AA64DFR1_ALLOW (0ULL)
+
+/*
+ * No support for implementation defined features.
+ */
+#define PVM_ID_AA64AFR0_ALLOW (0ULL)
+#define PVM_ID_AA64AFR1_ALLOW (0ULL)
+
+/*
+ * No restrictions on instructions implemented in AArch64.
+ */
+#define PVM_ID_AA64ISAR0_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \
+ )
+
+#define PVM_ID_AA64ISAR1_ALLOW (\
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \
+ )
+
+u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);
+bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code);
+bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code);
+int kvm_check_pvm_sysreg_table(void);
+
+#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
index 1e6d995968a1..45a84f0ade04 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
@@ -15,4 +15,6 @@
#define DECLARE_REG(type, name, ctxt, reg) \
type name = (type)cpu_reg(ctxt, (reg))
+void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu);
+
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 8d741f71377f..c3c11974fa3b 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs))
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
- cache.o setup.o mm.o mem_protect.o
+ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
obj-y += $(lib-objs)
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index 4b652ffb591d..0c6116d34e18 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -110,17 +110,14 @@ SYM_FUNC_START(__hyp_do_panic)
b __host_enter_for_panic
SYM_FUNC_END(__hyp_do_panic)
-.macro host_el1_sync_vect
- .align 7
-.L__vect_start\@:
- stp x0, x1, [sp, #-16]!
- mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
- cmp x0, #ESR_ELx_EC_HVC64
- b.ne __host_exit
-
+SYM_FUNC_START(__host_hvc)
ldp x0, x1, [sp] // Don't fixup the stack yet
+ /* No stub for you, sonny Jim */
+alternative_if ARM64_KVM_PROTECTED_MODE
+ b __host_exit
+alternative_else_nop_endif
+
/* Check for a stub HVC call */
cmp x0, #HVC_STUB_HCALL_NR
b.hs __host_exit
@@ -137,6 +134,17 @@ SYM_FUNC_END(__hyp_do_panic)
ldr x5, =__kvm_handle_stub_hvc
hyp_pa x5, x6
br x5
+SYM_FUNC_END(__host_hvc)
+
+.macro host_el1_sync_vect
+ .align 7
+.L__vect_start\@:
+ stp x0, x1, [sp, #-16]!
+ mrs x0, esr_el2
+ lsr x0, x0, #ESR_ELx_EC_SHIFT
+ cmp x0, #ESR_ELx_EC_HVC64
+ b.eq __host_hvc
+ b __host_exit
.L__vect_end\@:
.if ((.L__vect_end\@ - .L__vect_start\@) > 0x80)
.error "host_el1_sync_vect larger than vector entry"
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2da6aa8da868..b096bf009144 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -4,7 +4,7 @@
* Author: Andrew Scull <ascull@google.com>
*/
-#include <hyp/switch.h>
+#include <hyp/adjust_pc.h>
#include <asm/pgtable-types.h>
#include <asm/kvm_asm.h>
@@ -160,41 +160,65 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
+
+static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
+
+ __pkvm_vcpu_init_traps(kern_hyp_va(vcpu));
+}
+
typedef void (*hcall_t)(struct kvm_cpu_context *);
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
static const hcall_t host_hcall[] = {
- HANDLE_FUNC(__kvm_vcpu_run),
+ /* ___kvm_hyp_init */
+ HANDLE_FUNC(__kvm_get_mdcr_el2),
+ HANDLE_FUNC(__pkvm_init),
+ HANDLE_FUNC(__pkvm_create_private_mapping),
+ HANDLE_FUNC(__pkvm_cpu_set_vector),
+ HANDLE_FUNC(__kvm_enable_ssbs),
+ HANDLE_FUNC(__vgic_v3_init_lrs),
+ HANDLE_FUNC(__vgic_v3_get_gic_config),
+ HANDLE_FUNC(__pkvm_prot_finalize),
+
+ HANDLE_FUNC(__pkvm_host_share_hyp),
HANDLE_FUNC(__kvm_adjust_pc),
+ HANDLE_FUNC(__kvm_vcpu_run),
HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
- HANDLE_FUNC(__kvm_enable_ssbs),
- HANDLE_FUNC(__vgic_v3_get_gic_config),
HANDLE_FUNC(__vgic_v3_read_vmcr),
HANDLE_FUNC(__vgic_v3_write_vmcr),
- HANDLE_FUNC(__vgic_v3_init_lrs),
- HANDLE_FUNC(__kvm_get_mdcr_el2),
HANDLE_FUNC(__vgic_v3_save_aprs),
HANDLE_FUNC(__vgic_v3_restore_aprs),
- HANDLE_FUNC(__pkvm_init),
- HANDLE_FUNC(__pkvm_cpu_set_vector),
- HANDLE_FUNC(__pkvm_host_share_hyp),
- HANDLE_FUNC(__pkvm_create_private_mapping),
- HANDLE_FUNC(__pkvm_prot_finalize),
+ HANDLE_FUNC(__pkvm_vcpu_init_traps),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(unsigned long, id, host_ctxt, 0);
+ unsigned long hcall_min = 0;
hcall_t hfn;
+ /*
+ * If pKVM has been initialised then reject any calls to the
+ * early "privileged" hypercalls. Note that we cannot reject
+ * calls to __pkvm_prot_finalize for two reasons: (1) The static
+ * key used to determine initialisation must be toggled prior to
+ * finalisation and (2) finalisation is performed on a per-CPU
+ * basis. This is all fine, however, since __pkvm_prot_finalize
+ * returns -EPERM after the first call for a given CPU.
+ */
+ if (static_branch_unlikely(&kvm_protected_mode_initialized))
+ hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize;
+
id -= KVM_HOST_SMCCC_ID(0);
- if (unlikely(id >= ARRAY_SIZE(host_hcall)))
+ if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall)))
goto inval;
hfn = host_hcall[id];
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 34eeb524b686..c1a90dd022b8 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -11,7 +11,7 @@
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
-#include <hyp/switch.h>
+#include <hyp/fault.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
@@ -25,12 +25,6 @@ struct host_kvm host_kvm;
static struct hyp_pool host_s2_pool;
-/*
- * Copies of the host's CPU features registers holding sanitized values.
- */
-u64 id_aa64mmfr0_el1_sys_val;
-u64 id_aa64mmfr1_el1_sys_val;
-
const u8 pkvm_hyp_id = 1;
static void *host_s2_zalloc_pages_exact(size_t size)
@@ -134,6 +128,9 @@ int __pkvm_prot_finalize(void)
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
+ if (params->hcr_el2 & HCR_VM)
+ return -EPERM;
+
params->vttbr = kvm_get_vttbr(mmu);
params->vtcr = host_kvm.arch.vtcr;
params->hcr_el2 |= HCR_VM;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
new file mode 100644
index 000000000000..99c8d8b73e70
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Google LLC
+ * Author: Fuad Tabba <tabba@google.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/mm.h>
+#include <nvhe/fixed_config.h>
+#include <nvhe/trap_handler.h>
+
+/*
+ * Set trap register values based on features in ID_AA64PFR0.
+ */
+static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1);
+ u64 hcr_set = HCR_RW;
+ u64 hcr_clear = 0;
+ u64 cptr_set = 0;
+
+ /* Protected KVM does not support AArch32 guests. */
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
+
+ /*
+ * Linux guests assume support for floating-point and Advanced SIMD. Do
+ * not change the trapping behavior for these from the KVM default.
+ */
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
+ PVM_ID_AA64PFR0_ALLOW));
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
+ PVM_ID_AA64PFR0_ALLOW));
+
+ /* Trap RAS unless all current versions are supported */
+ if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) <
+ ID_AA64PFR0_RAS_V1P1) {
+ hcr_set |= HCR_TERR | HCR_TEA;
+ hcr_clear |= HCR_FIEN;
+ }
+
+ /* Trap AMU */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) {
+ hcr_clear |= HCR_AMVOFFEN;
+ cptr_set |= CPTR_EL2_TAM;
+ }
+
+ /* Trap SVE */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids))
+ cptr_set |= CPTR_EL2_TZ;
+
+ vcpu->arch.hcr_el2 |= hcr_set;
+ vcpu->arch.hcr_el2 &= ~hcr_clear;
+ vcpu->arch.cptr_el2 |= cptr_set;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64PFR1.
+ */
+static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1);
+ u64 hcr_set = 0;
+ u64 hcr_clear = 0;
+
+ /* Memory Tagging: Trap and Treat as Untagged if not supported. */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) {
+ hcr_set |= HCR_TID5;
+ hcr_clear |= HCR_DCT | HCR_ATA;
+ }
+
+ vcpu->arch.hcr_el2 |= hcr_set;
+ vcpu->arch.hcr_el2 &= ~hcr_clear;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64DFR0.
+ */
+static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1);
+ u64 mdcr_set = 0;
+ u64 mdcr_clear = 0;
+ u64 cptr_set = 0;
+
+ /* Trap/constrain PMU */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) {
+ mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
+ mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
+ MDCR_EL2_HPMN_MASK;
+ }
+
+ /* Trap Debug */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids))
+ mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
+
+ /* Trap OS Double Lock */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids))
+ mdcr_set |= MDCR_EL2_TDOSA;
+
+ /* Trap SPE */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) {
+ mdcr_set |= MDCR_EL2_TPMS;
+ mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+ }
+
+ /* Trap Trace Filter */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids))
+ mdcr_set |= MDCR_EL2_TTRF;
+
+ /* Trap Trace */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids))
+ cptr_set |= CPTR_EL2_TTA;
+
+ vcpu->arch.mdcr_el2 |= mdcr_set;
+ vcpu->arch.mdcr_el2 &= ~mdcr_clear;
+ vcpu->arch.cptr_el2 |= cptr_set;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64MMFR0.
+ */
+static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1);
+ u64 mdcr_set = 0;
+
+ /* Trap Debug Communications Channel registers */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids))
+ mdcr_set |= MDCR_EL2_TDCC;
+
+ vcpu->arch.mdcr_el2 |= mdcr_set;
+}
+
+/*
+ * Set trap register values based on features in ID_AA64MMFR1.
+ */
+static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
+{
+ const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1);
+ u64 hcr_set = 0;
+
+ /* Trap LOR */
+ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids))
+ hcr_set |= HCR_TLOR;
+
+ vcpu->arch.hcr_el2 |= hcr_set;
+}
+
+/*
+ * Set baseline trap register values.
+ */
+static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
+{
+ const u64 hcr_trap_feat_regs = HCR_TID3;
+ const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1;
+
+ /*
+ * Always trap:
+ * - Feature id registers: to control features exposed to guests
+ * - Implementation-defined features
+ */
+ vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef;
+
+ /* Clear res0 and set res1 bits to trap potential new features. */
+ vcpu->arch.hcr_el2 &= ~(HCR_RES0);
+ vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0);
+ vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1;
+ vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0);
+}
+
+/*
+ * Initialize trap register values for protected VMs.
+ */
+void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
+{
+ pvm_init_trap_regs(vcpu);
+ pvm_init_traps_aa64pfr0(vcpu);
+ pvm_init_traps_aa64pfr1(vcpu);
+ pvm_init_traps_aa64dfr0(vcpu);
+ pvm_init_traps_aa64mmfr0(vcpu);
+ pvm_init_traps_aa64mmfr1(vcpu);
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 57c27846320f..862c7b514e20 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -10,6 +10,7 @@
#include <asm/kvm_pgtable.h>
#include <nvhe/early_alloc.h>
+#include <nvhe/fixed_config.h>
#include <nvhe/gfp.h>
#include <nvhe/memory.h>
#include <nvhe/mem_protect.h>
@@ -260,6 +261,8 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
void (*fn)(phys_addr_t params_pa, void *finalize_fn_va);
int ret;
+ BUG_ON(kvm_check_pvm_sysreg_table());
+
if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
return -EINVAL;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index a34b01cc8ab9..c0e3fed26d93 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -27,6 +27,7 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
+#include <nvhe/fixed_config.h>
#include <nvhe/mem_protect.h>
/* Non-VHE specific context */
@@ -158,6 +159,101 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
write_sysreg(pmu->events_host, pmcntenset_el0);
}
+/**
+ * Handler for protected VM MSR, MRS or System instruction execution in AArch64.
+ *
+ * Returns true if the hypervisor has handled the exit, and control should go
+ * back to the guest, or false if it hasn't.
+ */
+static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ /*
+ * Make sure we handle the exit for workarounds and ptrauth
+ * before the pKVM handling, as the latter could decide to
+ * UNDEF.
+ */
+ return (kvm_hyp_handle_sysreg(vcpu, exit_code) ||
+ kvm_handle_pvm_sysreg(vcpu, exit_code));
+}
+
+/**
+ * Handler for protected floating-point and Advanced SIMD accesses.
+ *
+ * Returns true if the hypervisor has handled the exit, and control should go
+ * back to the guest, or false if it hasn't.
+ */
+static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ /* Linux guests assume support for floating-point and Advanced SIMD. */
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
+ PVM_ID_AA64PFR0_ALLOW));
+ BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
+ PVM_ID_AA64PFR0_ALLOW));
+
+ return kvm_hyp_handle_fpsimd(vcpu, exit_code);
+}
+
+static const exit_handler_fn hyp_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = NULL,
+ [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
+ [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
+ [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
+ [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+};
+
+static const exit_handler_fn pvm_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = NULL,
+ [ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64,
+ [ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd,
+ [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
+ [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+};
+
+static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
+{
+ if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
+ return pvm_exit_handlers;
+
+ return hyp_exit_handlers;
+}
+
+/*
+ * Some guests (e.g., protected VMs) are not be allowed to run in AArch32.
+ * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a
+ * guest from dropping to AArch32 EL0 if implemented by the CPU. If the
+ * hypervisor spots a guest in such a state ensure it is handled, and don't
+ * trust the host to spot or fix it. The check below is based on the one in
+ * kvm_arch_vcpu_ioctl_run().
+ *
+ * Returns false if the guest ran in AArch32 when it shouldn't have, and
+ * thus should exit to the host, or true if a the guest run loop can continue.
+ */
+static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+
+ if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
+ /*
+ * As we have caught the guest red-handed, decide that it isn't
+ * fit for purpose anymore by making the vcpu invalid. The VMM
+ * can try and fix it by re-initializing the vcpu with
+ * KVM_ARM_VCPU_INIT, however, this is likely not possible for
+ * protected VMs.
+ */
+ vcpu->arch.target = -1;
+ *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
+ *exit_code |= ARM_EXCEPTION_IL;
+ return false;
+ }
+
+ return true;
+}
+
/* Switch to the guest for legacy non-VHE systems */
int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
{
@@ -220,6 +316,9 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
+ if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
+ break;
+
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
new file mode 100644
index 000000000000..3787ee6fb1a2
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Google LLC
+ * Author: Fuad Tabba <tabba@google.com>
+ */
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#include <hyp/adjust_pc.h>
+
+#include <nvhe/fixed_config.h>
+
+#include "../../sys_regs.h"
+
+/*
+ * Copies of the host's CPU features registers holding sanitized values at hyp.
+ */
+u64 id_aa64pfr0_el1_sys_val;
+u64 id_aa64pfr1_el1_sys_val;
+u64 id_aa64isar0_el1_sys_val;
+u64 id_aa64isar1_el1_sys_val;
+u64 id_aa64mmfr0_el1_sys_val;
+u64 id_aa64mmfr1_el1_sys_val;
+u64 id_aa64mmfr2_el1_sys_val;
+
+/*
+ * Inject an unknown/undefined exception to an AArch64 guest while most of its
+ * sysregs are live.
+ */
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+ u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
+
+ *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
+ *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR);
+
+ vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 |
+ KVM_ARM64_EXCEPT_AA64_ELx_SYNC |
+ KVM_ARM64_PENDING_EXCEPTION);
+
+ __kvm_adjust_pc(vcpu);
+
+ write_sysreg_el1(esr, SYS_ESR);
+ write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
+ write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
+ write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR);
+}
+
+/*
+ * Returns the restricted features values of the feature register based on the
+ * limitations in restrict_fields.
+ * A feature id field value of 0b0000 does not impose any restrictions.
+ * Note: Use only for unsigned feature field values.
+ */
+static u64 get_restricted_features_unsigned(u64 sys_reg_val,
+ u64 restrict_fields)
+{
+ u64 value = 0UL;
+ u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+ /*
+ * According to the Arm Architecture Reference Manual, feature fields
+ * use increasing values to indicate increases in functionality.
+ * Iterate over the restricted feature fields and calculate the minimum
+ * unsigned value between the one supported by the system, and what the
+ * value is being restricted to.
+ */
+ while (sys_reg_val && restrict_fields) {
+ value |= min(sys_reg_val & mask, restrict_fields & mask);
+ sys_reg_val &= ~mask;
+ restrict_fields &= ~mask;
+ mask <<= ARM64_FEATURE_FIELD_BITS;
+ }
+
+ return value;
+}
+
+/*
+ * Functions that return the value of feature id registers for protected VMs
+ * based on allowed features, system features, and KVM support.
+ */
+
+static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
+{
+ const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
+ u64 set_mask = 0;
+ u64 allow_mask = PVM_ID_AA64PFR0_ALLOW;
+
+ if (!vcpu_has_sve(vcpu))
+ allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
+
+ set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val,
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
+
+ /* Spectre and Meltdown mitigation in KVM */
+ set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2),
+ (u64)kvm->arch.pfr0_csv2);
+ set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3),
+ (u64)kvm->arch.pfr0_csv3);
+
+ return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
+}
+
+static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
+{
+ const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm);
+ u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
+
+ if (!kvm_has_mte(kvm))
+ allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+
+ return id_aa64pfr1_el1_sys_val & allow_mask;
+}
+
+static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for Scalable Vectors, therefore, hyp has no sanitized
+ * copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for debug, including breakpoints, and watchpoints,
+ * therefore, pKVM has no sanitized copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for debug, therefore, hyp has no sanitized copy of the
+ * feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for implementation defined features, therefore, hyp has no
+ * sanitized copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu)
+{
+ /*
+ * No support for implementation defined features, therefore, hyp has no
+ * sanitized copy of the feature id register.
+ */
+ BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL);
+ return 0;
+}
+
+static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu)
+{
+ return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW;
+}
+
+static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu)
+{
+ u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW;
+
+ if (!vcpu_has_ptrauth(vcpu))
+ allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_API) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) |
+ ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI));
+
+ return id_aa64isar1_el1_sys_val & allow_mask;
+}
+
+static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu)
+{
+ u64 set_mask;
+
+ set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val,
+ PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED);
+
+ return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask;
+}
+
+static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu)
+{
+ return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW;
+}
+
+static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu)
+{
+ return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW;
+}
+
+/* Read a sanitized cpufeature ID register by its encoding */
+u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id)
+{
+ switch (id) {
+ case SYS_ID_AA64PFR0_EL1:
+ return get_pvm_id_aa64pfr0(vcpu);
+ case SYS_ID_AA64PFR1_EL1:
+ return get_pvm_id_aa64pfr1(vcpu);
+ case SYS_ID_AA64ZFR0_EL1:
+ return get_pvm_id_aa64zfr0(vcpu);
+ case SYS_ID_AA64DFR0_EL1:
+ return get_pvm_id_aa64dfr0(vcpu);
+ case SYS_ID_AA64DFR1_EL1:
+ return get_pvm_id_aa64dfr1(vcpu);
+ case SYS_ID_AA64AFR0_EL1:
+ return get_pvm_id_aa64afr0(vcpu);
+ case SYS_ID_AA64AFR1_EL1:
+ return get_pvm_id_aa64afr1(vcpu);
+ case SYS_ID_AA64ISAR0_EL1:
+ return get_pvm_id_aa64isar0(vcpu);
+ case SYS_ID_AA64ISAR1_EL1:
+ return get_pvm_id_aa64isar1(vcpu);
+ case SYS_ID_AA64MMFR0_EL1:
+ return get_pvm_id_aa64mmfr0(vcpu);
+ case SYS_ID_AA64MMFR1_EL1:
+ return get_pvm_id_aa64mmfr1(vcpu);
+ case SYS_ID_AA64MMFR2_EL1:
+ return get_pvm_id_aa64mmfr2(vcpu);
+ default:
+ /*
+ * Should never happen because all cases are covered in
+ * pvm_sys_reg_descs[].
+ */
+ WARN_ON(1);
+ break;
+ }
+
+ return 0;
+}
+
+static u64 read_id_reg(const struct kvm_vcpu *vcpu,
+ struct sys_reg_desc const *r)
+{
+ return pvm_read_id_reg(vcpu, reg_to_encoding(r));
+}
+
+/* Handler to RAZ/WI sysregs */
+static bool pvm_access_raz_wi(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (!p->is_write)
+ p->regval = 0;
+
+ return true;
+}
+
+/*
+ * Accessor for AArch32 feature id registers.
+ *
+ * The value of these registers is "unknown" according to the spec if AArch32
+ * isn't supported.
+ */
+static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ inject_undef64(vcpu);
+ return false;
+ }
+
+ /*
+ * No support for AArch32 guests, therefore, pKVM has no sanitized copy
+ * of AArch32 feature id registers.
+ */
+ BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
+ PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY);
+
+ return pvm_access_raz_wi(vcpu, p, r);
+}
+
+/*
+ * Accessor for AArch64 feature id registers.
+ *
+ * If access is allowed, set the regval to the protected VM's view of the
+ * register and return true.
+ * Otherwise, inject an undefined exception and return false.
+ */
+static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ inject_undef64(vcpu);
+ return false;
+ }
+
+ p->regval = read_id_reg(vcpu, r);
+ return true;
+}
+
+static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ /* pVMs only support GICv3. 'nuf said. */
+ if (!p->is_write)
+ p->regval = ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB | ICC_SRE_EL1_SRE;
+
+ return true;
+}
+
+/* Mark the specified system register as an AArch32 feature id register. */
+#define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 }
+
+/* Mark the specified system register as an AArch64 feature id register. */
+#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
+
+/* Mark the specified system register as Read-As-Zero/Write-Ignored */
+#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
+
+/* Mark the specified system register as not being handled in hyp. */
+#define HOST_HANDLED(REG) { SYS_DESC(REG), .access = NULL }
+
+/*
+ * Architected system registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * NOTE: Anything not explicitly listed here is *restricted by default*, i.e.,
+ * it will lead to injecting an exception into the guest.
+ */
+static const struct sys_reg_desc pvm_sys_reg_descs[] = {
+ /* Cache maintenance by set/way operations are restricted. */
+
+ /* Debug and Trace Registers are restricted. */
+
+ /* AArch64 mappings of the AArch32 ID registers */
+ /* CRm=1 */
+ AARCH32(SYS_ID_PFR0_EL1),
+ AARCH32(SYS_ID_PFR1_EL1),
+ AARCH32(SYS_ID_DFR0_EL1),
+ AARCH32(SYS_ID_AFR0_EL1),
+ AARCH32(SYS_ID_MMFR0_EL1),
+ AARCH32(SYS_ID_MMFR1_EL1),
+ AARCH32(SYS_ID_MMFR2_EL1),
+ AARCH32(SYS_ID_MMFR3_EL1),
+
+ /* CRm=2 */
+ AARCH32(SYS_ID_ISAR0_EL1),
+ AARCH32(SYS_ID_ISAR1_EL1),
+ AARCH32(SYS_ID_ISAR2_EL1),
+ AARCH32(SYS_ID_ISAR3_EL1),
+ AARCH32(SYS_ID_ISAR4_EL1),
+ AARCH32(SYS_ID_ISAR5_EL1),
+ AARCH32(SYS_ID_MMFR4_EL1),
+ AARCH32(SYS_ID_ISAR6_EL1),
+
+ /* CRm=3 */
+ AARCH32(SYS_MVFR0_EL1),
+ AARCH32(SYS_MVFR1_EL1),
+ AARCH32(SYS_MVFR2_EL1),
+ AARCH32(SYS_ID_PFR2_EL1),
+ AARCH32(SYS_ID_DFR1_EL1),
+ AARCH32(SYS_ID_MMFR5_EL1),
+
+ /* AArch64 ID registers */
+ /* CRm=4 */
+ AARCH64(SYS_ID_AA64PFR0_EL1),
+ AARCH64(SYS_ID_AA64PFR1_EL1),
+ AARCH64(SYS_ID_AA64ZFR0_EL1),
+ AARCH64(SYS_ID_AA64DFR0_EL1),
+ AARCH64(SYS_ID_AA64DFR1_EL1),
+ AARCH64(SYS_ID_AA64AFR0_EL1),
+ AARCH64(SYS_ID_AA64AFR1_EL1),
+ AARCH64(SYS_ID_AA64ISAR0_EL1),
+ AARCH64(SYS_ID_AA64ISAR1_EL1),
+ AARCH64(SYS_ID_AA64MMFR0_EL1),
+ AARCH64(SYS_ID_AA64MMFR1_EL1),
+ AARCH64(SYS_ID_AA64MMFR2_EL1),
+
+ /* Scalable Vector Registers are restricted. */
+
+ RAZ_WI(SYS_ERRIDR_EL1),
+ RAZ_WI(SYS_ERRSELR_EL1),
+ RAZ_WI(SYS_ERXFR_EL1),
+ RAZ_WI(SYS_ERXCTLR_EL1),
+ RAZ_WI(SYS_ERXSTATUS_EL1),
+ RAZ_WI(SYS_ERXADDR_EL1),
+ RAZ_WI(SYS_ERXMISC0_EL1),
+ RAZ_WI(SYS_ERXMISC1_EL1),
+
+ /* Performance Monitoring Registers are restricted. */
+
+ /* Limited Ordering Regions Registers are restricted. */
+
+ HOST_HANDLED(SYS_ICC_SGI1R_EL1),
+ HOST_HANDLED(SYS_ICC_ASGI1R_EL1),
+ HOST_HANDLED(SYS_ICC_SGI0R_EL1),
+ { SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, },
+
+ HOST_HANDLED(SYS_CCSIDR_EL1),
+ HOST_HANDLED(SYS_CLIDR_EL1),
+ HOST_HANDLED(SYS_CSSELR_EL1),
+ HOST_HANDLED(SYS_CTR_EL0),
+
+ /* Performance Monitoring Registers are restricted. */
+
+ /* Activity Monitoring Registers are restricted. */
+
+ HOST_HANDLED(SYS_CNTP_TVAL_EL0),
+ HOST_HANDLED(SYS_CNTP_CTL_EL0),
+ HOST_HANDLED(SYS_CNTP_CVAL_EL0),
+
+ /* Performance Monitoring Registers are restricted. */
+};
+
+/*
+ * Checks that the sysreg table is unique and in-order.
+ *
+ * Returns 0 if the table is consistent, or 1 otherwise.
+ */
+int kvm_check_pvm_sysreg_table(void)
+{
+ unsigned int i;
+
+ for (i = 1; i < ARRAY_SIZE(pvm_sys_reg_descs); i++) {
+ if (cmp_sys_reg(&pvm_sys_reg_descs[i-1], &pvm_sys_reg_descs[i]) >= 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Handler for protected VM MSR, MRS or System instruction execution.
+ *
+ * Returns true if the hypervisor has handled the exit, and control should go
+ * back to the guest, or false if it hasn't, to be handled by the host.
+ */
+bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ const struct sys_reg_desc *r;
+ struct sys_reg_params params;
+ unsigned long esr = kvm_vcpu_get_esr(vcpu);
+ int Rt = kvm_vcpu_sys_get_rt(vcpu);
+
+ params = esr_sys64_to_params(esr);
+ params.regval = vcpu_get_reg(vcpu, Rt);
+
+ r = find_reg(&params, pvm_sys_reg_descs, ARRAY_SIZE(pvm_sys_reg_descs));
+
+ /* Undefined (RESTRICTED). */
+ if (r == NULL) {
+ inject_undef64(vcpu);
+ return true;
+ }
+
+ /* Handled by the host (HOST_HANDLED) */
+ if (r->access == NULL)
+ return false;
+
+ /* Handled by hyp: skip instruction if instructed to do so. */
+ if (r->access(vcpu, &params, r))
+ __kvm_skip_instr(vcpu);
+
+ if (!params.is_write)
+ vcpu_set_reg(vcpu, Rt, params.regval);
+
+ return true;
+}
+
+/**
+ * Handler for protected VM restricted exceptions.
+ *
+ * Inject an undefined exception into the guest and return true to indicate that
+ * the hypervisor has handled the exit, and control should go back to the guest.
+ */
+bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ inject_undef64(vcpu);
+ return true;
+}
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 39f8f7f9227c..20db2f281cf2 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -695,9 +695,7 @@ static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
goto spurious;
lr_val &= ~ICH_LR_STATE;
- /* No active state for LPIs */
- if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI)
- lr_val |= ICH_LR_ACTIVE_BIT;
+ lr_val |= ICH_LR_ACTIVE_BIT;
__gic_v3_set_lr(lr_val, lr);
__vgic_v3_set_active_priority(lr_prio, vmcr, grp);
vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
@@ -764,20 +762,18 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
/* Drop priority in any case */
act_prio = __vgic_v3_clear_highest_active_priority();
- /* If EOIing an LPI, no deactivate to be performed */
- if (vid >= VGIC_MIN_LPI)
- return;
-
- /* EOImode == 1, nothing to be done here */
- if (vmcr & ICH_VMCR_EOIM_MASK)
- return;
-
lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
if (lr == -1) {
- __vgic_v3_bump_eoicount();
+ /* Do not bump EOIcount for LPIs that aren't in the LRs */
+ if (!(vid >= VGIC_MIN_LPI))
+ __vgic_v3_bump_eoicount();
return;
}
+ /* EOImode == 1 and not an LPI, nothing to be done here */
+ if ((vmcr & ICH_VMCR_EOIM_MASK) && !(vid >= VGIC_MIN_LPI))
+ return;
+
lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
/* If priorities or group do not match, the guest has fscked-up. */
@@ -987,8 +983,6 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT;
/* IDbits */
val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT;
- /* SEIS */
- val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT;
/* A3V */
val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT;
/* EOImode */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index ded2c66675f0..5a2cb5d9bc4b 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -96,6 +96,22 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu)
__deactivate_traps_common(vcpu);
}
+static const exit_handler_fn hyp_exit_handlers[] = {
+ [0 ... ESR_ELx_EC_MAX] = NULL,
+ [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
+ [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
+ [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
+ [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
+ [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
+ [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+};
+
+static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
+{
+ return hyp_exit_handlers;
+}
+
/* Switch to the guest for VHE systems running in EL2 */
static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 69bd1732a299..326cdfec74a1 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -512,7 +512,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
return -EINVAL;
}
- pgt = kzalloc(sizeof(*pgt), GFP_KERNEL);
+ pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT);
if (!pgt)
return -ENOMEM;
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 2af3c37445e0..a5e4bbf5e68f 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -978,7 +978,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
mutex_lock(&vcpu->kvm->lock);
if (!vcpu->kvm->arch.pmu_filter) {
- vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL);
+ vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
if (!vcpu->kvm->arch.pmu_filter) {
mutex_unlock(&vcpu->kvm->lock);
return -ENOMEM;
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 5ce36b0a3343..acad22ebac12 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -106,7 +106,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
vl > SVE_VL_ARCH_MAX))
return -EIO;
- buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL);
+ buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL_ACCOUNT);
if (!buf)
return -ENOMEM;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1d46e185f31e..e3ec1a44f94d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1064,7 +1064,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
struct sys_reg_desc const *r, bool raz)
{
u32 id = reg_to_encoding(r);
- u64 val = raz ? 0 : read_sanitised_ftr_reg(id);
+ u64 val;
+
+ if (raz)
+ return 0;
+
+ val = read_sanitised_ftr_reg(id);
switch (id) {
case SYS_ID_AA64PFR0_EL1:
@@ -1075,16 +1080,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
+ if (irqchip_in_kernel(vcpu->kvm) &&
+ vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC);
+ val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1);
+ }
break;
case SYS_ID_AA64PFR1_EL1:
- val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
- if (kvm_has_mte(vcpu->kvm)) {
- u64 pfr, mte;
-
- pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
- mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
- val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte);
- }
+ if (!kvm_has_mte(vcpu->kvm))
+ val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
@@ -1268,16 +1272,19 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
return __set_id_reg(vcpu, rd, uaddr, raz);
}
-static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- return __get_id_reg(vcpu, rd, uaddr, true);
+ return __set_id_reg(vcpu, rd, uaddr, true);
}
-static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
- const struct kvm_one_reg *reg, void __user *uaddr)
+static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
{
- return __set_id_reg(vcpu, rd, uaddr, true);
+ const u64 id = sys_reg_to_index(rd);
+ const u64 val = 0;
+
+ return reg_to_user(uaddr, &val, id);
}
static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
@@ -1388,7 +1395,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
#define ID_UNALLOCATED(crm, op2) { \
Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
.access = access_raz_id_reg, \
- .get_user = get_raz_id_reg, \
+ .get_user = get_raz_reg, \
.set_user = set_raz_id_reg, \
}
@@ -1400,7 +1407,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
#define ID_HIDDEN(name) { \
SYS_DESC(SYS_##name), \
.access = access_raz_id_reg, \
- .get_user = get_raz_id_reg, \
+ .get_user = get_raz_reg, \
.set_user = set_raz_id_reg, \
}
@@ -1642,7 +1649,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
* previously (and pointlessly) advertised in the past...
*/
{ PMU_SYS_REG(SYS_PMSWINC_EL0),
- .get_user = get_raz_id_reg, .set_user = set_wi_reg,
+ .get_user = get_raz_reg, .set_user = set_wi_reg,
.access = access_pmswinc, .reset = NULL },
{ PMU_SYS_REG(SYS_PMSELR_EL0),
.access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 340c51d87677..0a06d0648970 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -134,7 +134,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
int i;
- dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL);
+ dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
if (!dist->spis)
return -ENOMEM;
diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c
index 79f8899b234c..475059bacedf 100644
--- a/arch/arm64/kvm/vgic/vgic-irqfd.c
+++ b/arch/arm64/kvm/vgic/vgic-irqfd.c
@@ -139,7 +139,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm)
u32 nr = dist->nr_spis;
int i, ret;
- entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
+ entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL_ACCOUNT);
if (!entries)
return -ENOMEM;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 61728c543eb9..089fc2ffcb43 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -48,7 +48,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
if (irq)
return irq;
- irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
+ irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
if (!irq)
return ERR_PTR(-ENOMEM);
@@ -332,7 +332,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
* we must be careful not to overrun the array.
*/
irq_count = READ_ONCE(dist->lpi_list_count);
- intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL);
+ intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
if (!intids)
return -ENOMEM;
@@ -985,7 +985,7 @@ static int vgic_its_alloc_collection(struct vgic_its *its,
if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL))
return E_ITS_MAPC_COLLECTION_OOR;
- collection = kzalloc(sizeof(*collection), GFP_KERNEL);
+ collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT);
if (!collection)
return -ENOMEM;
@@ -1029,7 +1029,7 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device,
{
struct its_ite *ite;
- ite = kzalloc(sizeof(*ite), GFP_KERNEL);
+ ite = kzalloc(sizeof(*ite), GFP_KERNEL_ACCOUNT);
if (!ite)
return ERR_PTR(-ENOMEM);
@@ -1150,7 +1150,7 @@ static struct its_device *vgic_its_alloc_device(struct vgic_its *its,
{
struct its_device *device;
- device = kzalloc(sizeof(*device), GFP_KERNEL);
+ device = kzalloc(sizeof(*device), GFP_KERNEL_ACCOUNT);
if (!device)
return ERR_PTR(-ENOMEM);
@@ -1847,7 +1847,7 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm)
struct vgic_translation_cache_entry *cte;
/* An allocation failure is not fatal */
- cte = kzalloc(sizeof(*cte), GFP_KERNEL);
+ cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT);
if (WARN_ON(!cte))
break;
@@ -1888,7 +1888,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
return -ENODEV;
- its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL);
+ its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL_ACCOUNT);
if (!its)
return -ENOMEM;
@@ -2710,8 +2710,8 @@ static int vgic_its_set_attr(struct kvm_device *dev,
if (copy_from_user(&addr, uaddr, sizeof(addr)))
return -EFAULT;
- ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base,
- addr, SZ_64K);
+ ret = vgic_check_iorange(dev->kvm, its->vgic_its_base,
+ addr, SZ_64K, KVM_VGIC_V3_ITS_SIZE);
if (ret)
return ret;
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 7740995de982..0d000d2fe8d2 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -14,17 +14,21 @@
/* common helpers */
-int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
- phys_addr_t addr, phys_addr_t alignment)
+int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
+ phys_addr_t addr, phys_addr_t alignment,
+ phys_addr_t size)
{
- if (addr & ~kvm_phys_mask(kvm))
- return -E2BIG;
+ if (!IS_VGIC_ADDR_UNDEF(ioaddr))
+ return -EEXIST;
- if (!IS_ALIGNED(addr, alignment))
+ if (!IS_ALIGNED(addr, alignment) || !IS_ALIGNED(size, alignment))
return -EINVAL;
- if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
- return -EEXIST;
+ if (addr + size < addr)
+ return -EINVAL;
+
+ if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm))
+ return -E2BIG;
return 0;
}
@@ -57,7 +61,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
{
int r = 0;
struct vgic_dist *vgic = &kvm->arch.vgic;
- phys_addr_t *addr_ptr, alignment;
+ phys_addr_t *addr_ptr, alignment, size;
u64 undef_value = VGIC_ADDR_UNDEF;
mutex_lock(&kvm->lock);
@@ -66,16 +70,19 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
addr_ptr = &vgic->vgic_dist_base;
alignment = SZ_4K;
+ size = KVM_VGIC_V2_DIST_SIZE;
break;
case KVM_VGIC_V2_ADDR_TYPE_CPU:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
addr_ptr = &vgic->vgic_cpu_base;
alignment = SZ_4K;
+ size = KVM_VGIC_V2_CPU_SIZE;
break;
case KVM_VGIC_V3_ADDR_TYPE_DIST:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3);
addr_ptr = &vgic->vgic_dist_base;
alignment = SZ_64K;
+ size = KVM_VGIC_V3_DIST_SIZE;
break;
case KVM_VGIC_V3_ADDR_TYPE_REDIST: {
struct vgic_redist_region *rdreg;
@@ -140,7 +147,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
goto out;
if (write) {
- r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment);
+ r = vgic_check_iorange(kvm, *addr_ptr, *addr, alignment, size);
if (!r)
*addr_ptr = *addr;
} else {
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index a09cdc0b953c..bf7ec4a78497 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -796,7 +796,9 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
struct vgic_dist *d = &kvm->arch.vgic;
struct vgic_redist_region *rdreg;
struct list_head *rd_regions = &d->rd_regions;
- size_t size = count * KVM_VGIC_V3_REDIST_SIZE;
+ int nr_vcpus = atomic_read(&kvm->online_vcpus);
+ size_t size = count ? count * KVM_VGIC_V3_REDIST_SIZE
+ : nr_vcpus * KVM_VGIC_V3_REDIST_SIZE;
int ret;
/* cross the end of memory ? */
@@ -834,13 +836,13 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
if (vgic_v3_rdist_overlap(kvm, base, size))
return -EINVAL;
- rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL);
+ rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL_ACCOUNT);
if (!rdreg)
return -ENOMEM;
rdreg->base = VGIC_ADDR_UNDEF;
- ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K);
+ ret = vgic_check_iorange(kvm, rdreg->base, base, SZ_64K, size);
if (ret)
goto free;
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 21a6207fb2ee..04f62c4b07fb 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -15,6 +15,7 @@
static bool group0_trap;
static bool group1_trap;
static bool common_trap;
+static bool dir_trap;
static bool gicv4_enable;
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
@@ -296,6 +297,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
vgic_v3->vgic_hcr |= ICH_HCR_TALL1;
if (common_trap)
vgic_v3->vgic_hcr |= ICH_HCR_TC;
+ if (dir_trap)
+ vgic_v3->vgic_hcr |= ICH_HCR_TDIR;
}
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
@@ -483,8 +486,10 @@ bool vgic_v3_check_base(struct kvm *kvm)
return false;
list_for_each_entry(rdreg, &d->rd_regions, list) {
- if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) <
- rdreg->base)
+ size_t sz = vgic_v3_rd_region_size(kvm, rdreg);
+
+ if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF,
+ rdreg->base, SZ_64K, sz))
return false;
}
@@ -671,11 +676,23 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
group1_trap = true;
}
- if (group0_trap || group1_trap || common_trap) {
- kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n",
+ if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) {
+ kvm_info("GICv3 with locally generated SEI\n");
+
+ group0_trap = true;
+ group1_trap = true;
+ if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
+ dir_trap = true;
+ else
+ common_trap = true;
+ }
+
+ if (group0_trap || group1_trap || common_trap | dir_trap) {
+ kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n",
group0_trap ? "G0" : "",
group1_trap ? "G1" : "",
- common_trap ? "C" : "");
+ common_trap ? "C" : "",
+ dir_trap ? "D" : "");
static_branch_enable(&vgic_v3_cpuif_trap);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index c1845d8f5f7e..772dd15a22c7 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -246,7 +246,7 @@ int vgic_v4_init(struct kvm *kvm)
nr_vcpus = atomic_read(&kvm->online_vcpus);
dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!dist->its_vm.vpes)
return -ENOMEM;
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 14a9218641f5..3fd6c86a7ef3 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -172,8 +172,9 @@ void vgic_kick_vcpus(struct kvm *kvm);
void vgic_irq_handle_resampling(struct vgic_irq *irq,
bool lr_deactivated, bool lr_pending);
-int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
- phys_addr_t addr, phys_addr_t alignment);
+int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
+ phys_addr_t addr, phys_addr_t alignment,
+ phys_addr_t size);
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index d7bee210a798..83bcad72ec97 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -173,4 +173,4 @@ L(done):
ret
SYM_FUNC_END_PI(strcmp)
-EXPORT_SYMBOL_NOKASAN(strcmp)
+EXPORT_SYMBOL_NOHWKASAN(strcmp)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index 48d44f7fddb1..e42bcfcd37e6 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -258,4 +258,4 @@ L(ret0):
ret
SYM_FUNC_END_PI(strncmp)
-EXPORT_SYMBOL_NOKASAN(strncmp)
+EXPORT_SYMBOL_NOHWKASAN(strncmp)
diff --git a/arch/m68k/68000/entry.S b/arch/m68k/68000/entry.S
index 259b3661b614..997b54933015 100644
--- a/arch/m68k/68000/entry.S
+++ b/arch/m68k/68000/entry.S
@@ -15,7 +15,6 @@
#include <asm/unistd.h>
#include <asm/errno.h>
#include <asm/setup.h>
-#include <asm/segment.h>
#include <asm/traps.h>
#include <asm/asm-offsets.h>
#include <asm/entry.h>
@@ -25,7 +24,6 @@
.globl system_call
.globl resume
.globl ret_from_exception
-.globl ret_from_signal
.globl sys_call_table
.globl bad_interrupt
.globl inthandler1
@@ -59,8 +57,6 @@ do_trace:
subql #4,%sp /* dummy return address */
SAVE_SWITCH_STACK
jbsr syscall_trace_leave
-
-ret_from_signal:
RESTORE_SWITCH_STACK
addql #4,%sp
jra ret_from_exception
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 774c35f47eea..0b50da08a9c5 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -29,7 +29,6 @@ config M68K
select NO_DMA if !MMU && !COLDFIRE
select OLD_SIGACTION
select OLD_SIGSUSPEND3
- select SET_FS
select UACCESS_MEMCPY if !MMU
select VIRT_TO_BUS
select ZONE_DMA
diff --git a/arch/m68k/coldfire/entry.S b/arch/m68k/coldfire/entry.S
index d43a02795a4a..9f337c70243a 100644
--- a/arch/m68k/coldfire/entry.S
+++ b/arch/m68k/coldfire/entry.S
@@ -31,7 +31,6 @@
#include <asm/thread_info.h>
#include <asm/errno.h>
#include <asm/setup.h>
-#include <asm/segment.h>
#include <asm/asm-offsets.h>
#include <asm/entry.h>
@@ -51,7 +50,6 @@ sw_usp:
.globl system_call
.globl resume
.globl ret_from_exception
-.globl ret_from_signal
.globl sys_call_table
.globl inthandler
@@ -98,8 +96,6 @@ ENTRY(system_call)
subql #4,%sp /* dummy return address */
SAVE_SWITCH_STACK
jbsr syscall_trace_leave
-
-ret_from_signal:
RESTORE_SWITCH_STACK
addql #4,%sp
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index 3750819ac5a1..f4d82c619a5c 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -9,7 +9,6 @@
#define __ASM_M68K_PROCESSOR_H
#include <linux/thread_info.h>
-#include <asm/segment.h>
#include <asm/fpu.h>
#include <asm/ptrace.h>
@@ -75,11 +74,37 @@ static inline void wrusp(unsigned long usp)
#define TASK_UNMAPPED_BASE 0
#endif
+/* Address spaces (or Function Codes in Motorola lingo) */
+#define USER_DATA 1
+#define USER_PROGRAM 2
+#define SUPER_DATA 5
+#define SUPER_PROGRAM 6
+#define CPU_SPACE 7
+
+#ifdef CONFIG_CPU_HAS_ADDRESS_SPACES
+/*
+ * Set the SFC/DFC registers for special MM operations. For most normal
+ * operation these remain set to USER_DATA for the uaccess routines.
+ */
+static inline void set_fc(unsigned long val)
+{
+ WARN_ON_ONCE(in_interrupt());
+
+ __asm__ __volatile__ ("movec %0,%/sfc\n\t"
+ "movec %0,%/dfc\n\t"
+ : /* no outputs */ : "r" (val) : "memory");
+}
+#else
+static inline void set_fc(unsigned long val)
+{
+}
+#endif /* CONFIG_CPU_HAS_ADDRESS_SPACES */
+
struct thread_struct {
unsigned long ksp; /* kernel stack pointer */
unsigned long usp; /* user stack pointer */
unsigned short sr; /* saved status register */
- unsigned short fs; /* saved fs (sfc, dfc) */
+ unsigned short fc; /* saved fc (sfc, dfc) */
unsigned long crp[2]; /* cpu root pointer */
unsigned long esp0; /* points to SR of stack frame */
unsigned long faddr; /* info about last fault */
@@ -92,7 +117,7 @@ struct thread_struct {
#define INIT_THREAD { \
.ksp = sizeof(init_stack) + (unsigned long) init_stack, \
.sr = PS_S, \
- .fs = __KERNEL_DS, \
+ .fc = USER_DATA, \
}
/*
diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h
deleted file mode 100644
index 2b5e68a71ef7..000000000000
--- a/arch/m68k/include/asm/segment.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _M68K_SEGMENT_H
-#define _M68K_SEGMENT_H
-
-/* define constants */
-/* Address spaces (FC0-FC2) */
-#define USER_DATA (1)
-#ifndef __USER_DS
-#define __USER_DS (USER_DATA)
-#endif
-#define USER_PROGRAM (2)
-#define SUPER_DATA (5)
-#ifndef __KERNEL_DS
-#define __KERNEL_DS (SUPER_DATA)
-#endif
-#define SUPER_PROGRAM (6)
-#define CPU_SPACE (7)
-
-#ifndef __ASSEMBLY__
-
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
-
-#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
-
-#ifdef CONFIG_CPU_HAS_ADDRESS_SPACES
-/*
- * Get/set the SFC/DFC registers for MOVES instructions
- */
-#define USER_DS MAKE_MM_SEG(__USER_DS)
-#define KERNEL_DS MAKE_MM_SEG(__KERNEL_DS)
-
-static inline mm_segment_t get_fs(void)
-{
- mm_segment_t _v;
- __asm__ ("movec %/dfc,%0":"=r" (_v.seg):);
- return _v;
-}
-
-static inline void set_fs(mm_segment_t val)
-{
- __asm__ __volatile__ ("movec %0,%/sfc\n\t"
- "movec %0,%/dfc\n\t"
- : /* no outputs */ : "r" (val.seg) : "memory");
-}
-
-#else
-#define USER_DS MAKE_MM_SEG(TASK_SIZE)
-#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF)
-#define get_fs() (current_thread_info()->addr_limit)
-#define set_fs(x) (current_thread_info()->addr_limit = (x))
-#endif
-
-#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _M68K_SEGMENT_H */
diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h
index 15a757073fa5..c952658ba792 100644
--- a/arch/m68k/include/asm/thread_info.h
+++ b/arch/m68k/include/asm/thread_info.h
@@ -4,7 +4,6 @@
#include <asm/types.h>
#include <asm/page.h>
-#include <asm/segment.h>
/*
* On machines with 4k pages we default to an 8k thread size, though we
@@ -27,7 +26,6 @@
struct thread_info {
struct task_struct *task; /* main task structure */
unsigned long flags;
- mm_segment_t addr_limit; /* thread address space */
int preempt_count; /* 0 => preemptable, <0 => BUG */
__u32 cpu; /* should always be 0 on m68k */
unsigned long tp_value; /* thread pointer */
@@ -37,7 +35,6 @@ struct thread_info {
#define INIT_THREAD_INFO(tsk) \
{ \
.task = &tsk, \
- .addr_limit = KERNEL_DS, \
.preempt_count = INIT_PREEMPT_COUNT, \
}
diff --git a/arch/m68k/include/asm/tlbflush.h b/arch/m68k/include/asm/tlbflush.h
index a6318ccd308f..b882e2f4f551 100644
--- a/arch/m68k/include/asm/tlbflush.h
+++ b/arch/m68k/include/asm/tlbflush.h
@@ -13,13 +13,12 @@ static inline void flush_tlb_kernel_page(void *addr)
if (CPU_IS_COLDFIRE) {
mmu_write(MMUOR, MMUOR_CNL);
} else if (CPU_IS_040_OR_060) {
- mm_segment_t old_fs = get_fs();
- set_fs(KERNEL_DS);
+ set_fc(SUPER_DATA);
__asm__ __volatile__(".chip 68040\n\t"
"pflush (%0)\n\t"
".chip 68k"
: : "a" (addr));
- set_fs(old_fs);
+ set_fc(USER_DATA);
} else if (CPU_IS_020_OR_030)
__asm__ __volatile__("pflush #4,#4,(%0)" : : "a" (addr));
}
@@ -84,12 +83,8 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
{
- if (vma->vm_mm == current->active_mm) {
- mm_segment_t old_fs = force_uaccess_begin();
-
+ if (vma->vm_mm == current->active_mm)
__flush_tlb_one(addr);
- force_uaccess_end(old_fs);
- }
}
static inline void flush_tlb_range(struct vm_area_struct *vma,
diff --git a/arch/m68k/include/asm/traps.h b/arch/m68k/include/asm/traps.h
index 4aff3358fbaf..a9d5c1c870d3 100644
--- a/arch/m68k/include/asm/traps.h
+++ b/arch/m68k/include/asm/traps.h
@@ -267,6 +267,10 @@ struct frame {
} un;
};
+#ifdef CONFIG_M68040
+asmlinkage void berr_040cleanup(struct frame *fp);
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _M68K_TRAPS_H */
diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h
index f98208ccbbcd..ba670523885c 100644
--- a/arch/m68k/include/asm/uaccess.h
+++ b/arch/m68k/include/asm/uaccess.h
@@ -9,13 +9,16 @@
*/
#include <linux/compiler.h>
#include <linux/types.h>
-#include <asm/segment.h>
#include <asm/extable.h>
/* We let the MMU do all checking */
static inline int access_ok(const void __user *addr,
unsigned long size)
{
+ /*
+ * XXX: for !CONFIG_CPU_HAS_ADDRESS_SPACES this really needs to check
+ * for TASK_SIZE!
+ */
return 1;
}
@@ -35,12 +38,9 @@ static inline int access_ok(const void __user *addr,
#define MOVES "move"
#endif
-extern int __put_user_bad(void);
-extern int __get_user_bad(void);
-
-#define __put_user_asm(res, x, ptr, bwl, reg, err) \
+#define __put_user_asm(inst, res, x, ptr, bwl, reg, err) \
asm volatile ("\n" \
- "1: "MOVES"."#bwl" %2,%1\n" \
+ "1: "inst"."#bwl" %2,%1\n" \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .even\n" \
@@ -56,6 +56,31 @@ asm volatile ("\n" \
: "+d" (res), "=m" (*(ptr)) \
: #reg (x), "i" (err))
+#define __put_user_asm8(inst, res, x, ptr) \
+do { \
+ const void *__pu_ptr = (const void __force *)(ptr); \
+ \
+ asm volatile ("\n" \
+ "1: "inst".l %2,(%1)+\n" \
+ "2: "inst".l %R2,(%1)\n" \
+ "3:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .even\n" \
+ "10: movel %3,%0\n" \
+ " jra 3b\n" \
+ " .previous\n" \
+ "\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,10b\n" \
+ " .long 2b,10b\n" \
+ " .long 3b,10b\n" \
+ " .previous" \
+ : "+d" (res), "+a" (__pu_ptr) \
+ : "r" (x), "i" (-EFAULT) \
+ : "memory"); \
+} while (0)
+
/*
* These are the main single-value transfer routines. They automatically
* use the right size if we just have the right pointer type.
@@ -68,51 +93,29 @@ asm volatile ("\n" \
__chk_user_ptr(ptr); \
switch (sizeof (*(ptr))) { \
case 1: \
- __put_user_asm(__pu_err, __pu_val, ptr, b, d, -EFAULT); \
+ __put_user_asm(MOVES, __pu_err, __pu_val, ptr, b, d, -EFAULT); \
break; \
case 2: \
- __put_user_asm(__pu_err, __pu_val, ptr, w, r, -EFAULT); \
+ __put_user_asm(MOVES, __pu_err, __pu_val, ptr, w, r, -EFAULT); \
break; \
case 4: \
- __put_user_asm(__pu_err, __pu_val, ptr, l, r, -EFAULT); \
+ __put_user_asm(MOVES, __pu_err, __pu_val, ptr, l, r, -EFAULT); \
break; \
case 8: \
- { \
- const void __user *__pu_ptr = (ptr); \
- asm volatile ("\n" \
- "1: "MOVES".l %2,(%1)+\n" \
- "2: "MOVES".l %R2,(%1)\n" \
- "3:\n" \
- " .section .fixup,\"ax\"\n" \
- " .even\n" \
- "10: movel %3,%0\n" \
- " jra 3b\n" \
- " .previous\n" \
- "\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,10b\n" \
- " .long 2b,10b\n" \
- " .long 3b,10b\n" \
- " .previous" \
- : "+d" (__pu_err), "+a" (__pu_ptr) \
- : "r" (__pu_val), "i" (-EFAULT) \
- : "memory"); \
+ __put_user_asm8(MOVES, __pu_err, __pu_val, ptr); \
break; \
- } \
default: \
- __pu_err = __put_user_bad(); \
- break; \
+ BUILD_BUG(); \
} \
__pu_err; \
})
#define put_user(x, ptr) __put_user(x, ptr)
-#define __get_user_asm(res, x, ptr, type, bwl, reg, err) ({ \
+#define __get_user_asm(inst, res, x, ptr, type, bwl, reg, err) ({ \
type __gu_val; \
asm volatile ("\n" \
- "1: "MOVES"."#bwl" %2,%1\n" \
+ "1: "inst"."#bwl" %2,%1\n" \
"2:\n" \
" .section .fixup,\"ax\"\n" \
" .even\n" \
@@ -130,53 +133,57 @@ asm volatile ("\n" \
(x) = (__force typeof(*(ptr)))(__force unsigned long)__gu_val; \
})
+#define __get_user_asm8(inst, res, x, ptr) \
+do { \
+ const void *__gu_ptr = (const void __force *)(ptr); \
+ union { \
+ u64 l; \
+ __typeof__(*(ptr)) t; \
+ } __gu_val; \
+ \
+ asm volatile ("\n" \
+ "1: "inst".l (%2)+,%1\n" \
+ "2: "inst".l (%2),%R1\n" \
+ "3:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .even\n" \
+ "10: move.l %3,%0\n" \
+ " sub.l %1,%1\n" \
+ " sub.l %R1,%R1\n" \
+ " jra 3b\n" \
+ " .previous\n" \
+ "\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,10b\n" \
+ " .long 2b,10b\n" \
+ " .previous" \
+ : "+d" (res), "=&r" (__gu_val.l), \
+ "+a" (__gu_ptr) \
+ : "i" (-EFAULT) \
+ : "memory"); \
+ (x) = __gu_val.t; \
+} while (0)
+
#define __get_user(x, ptr) \
({ \
int __gu_err = 0; \
__chk_user_ptr(ptr); \
switch (sizeof(*(ptr))) { \
case 1: \
- __get_user_asm(__gu_err, x, ptr, u8, b, d, -EFAULT); \
+ __get_user_asm(MOVES, __gu_err, x, ptr, u8, b, d, -EFAULT); \
break; \
case 2: \
- __get_user_asm(__gu_err, x, ptr, u16, w, r, -EFAULT); \
+ __get_user_asm(MOVES, __gu_err, x, ptr, u16, w, r, -EFAULT); \
break; \
case 4: \
- __get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT); \
+ __get_user_asm(MOVES, __gu_err, x, ptr, u32, l, r, -EFAULT); \
break; \
- case 8: { \
- const void __user *__gu_ptr = (ptr); \
- union { \
- u64 l; \
- __typeof__(*(ptr)) t; \
- } __gu_val; \
- asm volatile ("\n" \
- "1: "MOVES".l (%2)+,%1\n" \
- "2: "MOVES".l (%2),%R1\n" \
- "3:\n" \
- " .section .fixup,\"ax\"\n" \
- " .even\n" \
- "10: move.l %3,%0\n" \
- " sub.l %1,%1\n" \
- " sub.l %R1,%R1\n" \
- " jra 3b\n" \
- " .previous\n" \
- "\n" \
- " .section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,10b\n" \
- " .long 2b,10b\n" \
- " .previous" \
- : "+d" (__gu_err), "=&r" (__gu_val.l), \
- "+a" (__gu_ptr) \
- : "i" (-EFAULT) \
- : "memory"); \
- (x) = __gu_val.t; \
+ case 8: \
+ __get_user_asm8(MOVES, __gu_err, x, ptr); \
break; \
- } \
default: \
- __gu_err = __get_user_bad(); \
- break; \
+ BUILD_BUG(); \
} \
__gu_err; \
})
@@ -322,16 +329,19 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n)
switch (n) {
case 1:
- __put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1);
+ __put_user_asm(MOVES, res, *(u8 *)from, (u8 __user *)to,
+ b, d, 1);
break;
case 2:
- __put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, r, 2);
+ __put_user_asm(MOVES, res, *(u16 *)from, (u16 __user *)to,
+ w, r, 2);
break;
case 3:
__constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,);
break;
case 4:
- __put_user_asm(res, *(u32 *)from, (u32 __user *)to, l, r, 4);
+ __put_user_asm(MOVES, res, *(u32 *)from, (u32 __user *)to,
+ l, r, 4);
break;
case 5:
__constant_copy_to_user_asm(res, to, from, tmp, 5, l, b,);
@@ -380,8 +390,65 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
#define INLINE_COPY_FROM_USER
#define INLINE_COPY_TO_USER
-#define user_addr_max() \
- (uaccess_kernel() ? ~0UL : TASK_SIZE)
+#define HAVE_GET_KERNEL_NOFAULT
+
+#define __get_kernel_nofault(dst, src, type, err_label) \
+do { \
+ type *__gk_dst = (type *)(dst); \
+ type *__gk_src = (type *)(src); \
+ int __gk_err = 0; \
+ \
+ switch (sizeof(type)) { \
+ case 1: \
+ __get_user_asm("move", __gk_err, *__gk_dst, __gk_src, \
+ u8, b, d, -EFAULT); \
+ break; \
+ case 2: \
+ __get_user_asm("move", __gk_err, *__gk_dst, __gk_src, \
+ u16, w, r, -EFAULT); \
+ break; \
+ case 4: \
+ __get_user_asm("move", __gk_err, *__gk_dst, __gk_src, \
+ u32, l, r, -EFAULT); \
+ break; \
+ case 8: \
+ __get_user_asm8("move", __gk_err, *__gk_dst, __gk_src); \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ } \
+ if (unlikely(__gk_err)) \
+ goto err_label; \
+} while (0)
+
+#define __put_kernel_nofault(dst, src, type, err_label) \
+do { \
+ type __pk_src = *(type *)(src); \
+ type *__pk_dst = (type *)(dst); \
+ int __pk_err = 0; \
+ \
+ switch (sizeof(type)) { \
+ case 1: \
+ __put_user_asm("move", __pk_err, __pk_src, __pk_dst, \
+ b, d, -EFAULT); \
+ break; \
+ case 2: \
+ __put_user_asm("move", __pk_err, __pk_src, __pk_dst, \
+ w, r, -EFAULT); \
+ break; \
+ case 4: \
+ __put_user_asm("move", __pk_err, __pk_src, __pk_dst, \
+ l, r, -EFAULT); \
+ break; \
+ case 8: \
+ __put_user_asm8("move", __pk_err, __pk_src, __pk_dst); \
+ break; \
+ default: \
+ BUILD_BUG(); \
+ } \
+ if (unlikely(__pk_err)) \
+ goto err_label; \
+} while (0)
extern long strncpy_from_user(char *dst, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);
diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
index ccea355052ef..906d73230537 100644
--- a/arch/m68k/kernel/asm-offsets.c
+++ b/arch/m68k/kernel/asm-offsets.c
@@ -31,7 +31,7 @@ int main(void)
DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
DEFINE(THREAD_USP, offsetof(struct thread_struct, usp));
DEFINE(THREAD_SR, offsetof(struct thread_struct, sr));
- DEFINE(THREAD_FS, offsetof(struct thread_struct, fs));
+ DEFINE(THREAD_FC, offsetof(struct thread_struct, fc));
DEFINE(THREAD_CRP, offsetof(struct thread_struct, crp));
DEFINE(THREAD_ESP0, offsetof(struct thread_struct, esp0));
DEFINE(THREAD_FPREG, offsetof(struct thread_struct, fp));
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 9dd76fbb7c6b..9434fca68de5 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -36,7 +36,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/setup.h>
-#include <asm/segment.h>
#include <asm/traps.h>
#include <asm/unistd.h>
#include <asm/asm-offsets.h>
@@ -78,20 +77,38 @@ ENTRY(__sys_clone3)
ENTRY(sys_sigreturn)
SAVE_SWITCH_STACK
- movel %sp,%sp@- | switch_stack pointer
- pea %sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer
+ movel %sp,%a1 | switch_stack pointer
+ lea %sp@(SWITCH_STACK_SIZE),%a0 | pt_regs pointer
+ lea %sp@(-84),%sp | leave a gap
+ movel %a1,%sp@-
+ movel %a0,%sp@-
jbsr do_sigreturn
- addql #8,%sp
- RESTORE_SWITCH_STACK
- rts
+ jra 1f | shared with rt_sigreturn()
ENTRY(sys_rt_sigreturn)
SAVE_SWITCH_STACK
- movel %sp,%sp@- | switch_stack pointer
- pea %sp@(SWITCH_STACK_SIZE+4) | pt_regs pointer
+ movel %sp,%a1 | switch_stack pointer
+ lea %sp@(SWITCH_STACK_SIZE),%a0 | pt_regs pointer
+ lea %sp@(-84),%sp | leave a gap
+ movel %a1,%sp@-
+ movel %a0,%sp@-
+ | stack contents:
+ | [original pt_regs address] [original switch_stack address]
+ | [gap] [switch_stack] [pt_regs] [exception frame]
jbsr do_rt_sigreturn
- addql #8,%sp
+
+1:
+ | stack contents now:
+ | [original pt_regs address] [original switch_stack address]
+ | [unused part of the gap] [moved switch_stack] [moved pt_regs]
+ | [replacement exception frame]
+ | return value of do_{rt_,}sigreturn() points to moved switch_stack.
+
+ movel %d0,%sp | discard the leftover junk
RESTORE_SWITCH_STACK
+ | stack contents now is just [syscall return address] [pt_regs] [frame]
+ | return pt_regs.d0
+ movel %sp@(PT_OFF_D0+4),%d0
rts
ENTRY(buserr)
@@ -182,25 +199,6 @@ do_trace_exit:
addql #4,%sp
jra .Lret_from_exception
-ENTRY(ret_from_signal)
- movel %curptr@(TASK_STACK),%a1
- tstb %a1@(TINFO_FLAGS+2)
- jge 1f
- jbsr syscall_trace
-1: RESTORE_SWITCH_STACK
- addql #4,%sp
-/* on 68040 complete pending writebacks if any */
-#ifdef CONFIG_M68040
- bfextu %sp@(PT_OFF_FORMATVEC){#0,#4},%d0
- subql #7,%d0 | bus error frame ?
- jbne 1f
- movel %sp,%sp@-
- jbsr berr_040cleanup
- addql #4,%sp
-1:
-#endif
- jra .Lret_from_exception
-
ENTRY(system_call)
SAVE_ALL_SYS
@@ -338,7 +336,7 @@ resume:
/* save fs (sfc,%dfc) (may be pointing to kernel memory) */
movec %sfc,%d0
- movew %d0,%a0@(TASK_THREAD+THREAD_FS)
+ movew %d0,%a0@(TASK_THREAD+THREAD_FC)
/* save usp */
/* it is better to use a movel here instead of a movew 8*) */
@@ -424,7 +422,7 @@ resume:
movel %a0,%usp
/* restore fs (sfc,%dfc) */
- movew %a1@(TASK_THREAD+THREAD_FS),%a0
+ movew %a1@(TASK_THREAD+THREAD_FC),%a0
movec %a0,%sfc
movec %a0,%dfc
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index db49f9091711..1ab692b952cd 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -92,7 +92,7 @@ void show_regs(struct pt_regs * regs)
void flush_thread(void)
{
- current->thread.fs = __USER_DS;
+ current->thread.fc = USER_DATA;
#ifdef CONFIG_FPU
if (!FPU_IS_EMU) {
unsigned long zero = 0;
@@ -155,7 +155,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
* Must save the current SFC/DFC value, NOT the value when
* the parent was last descheduled - RGH 10-08-96
*/
- p->thread.fs = get_fs().seg;
+ p->thread.fc = USER_DATA;
if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
/* kernel thread */
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 8f215e79e70e..338817d0cb3f 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -447,7 +447,7 @@ static inline void save_fpu_state(struct sigcontext *sc, struct pt_regs *regs)
if (CPU_IS_060 ? sc->sc_fpstate[2] : sc->sc_fpstate[0]) {
fpu_version = sc->sc_fpstate[0];
- if (CPU_IS_020_OR_030 &&
+ if (CPU_IS_020_OR_030 && !regs->stkadj &&
regs->vector >= (VEC_FPBRUC * 4) &&
regs->vector <= (VEC_FPNAN * 4)) {
/* Clear pending exception in 68882 idle frame */
@@ -510,7 +510,7 @@ static inline int rt_save_fpu_state(struct ucontext __user *uc, struct pt_regs *
if (!(CPU_IS_060 || CPU_IS_COLDFIRE))
context_size = fpstate[1];
fpu_version = fpstate[0];
- if (CPU_IS_020_OR_030 &&
+ if (CPU_IS_020_OR_030 && !regs->stkadj &&
regs->vector >= (VEC_FPBRUC * 4) &&
regs->vector <= (VEC_FPNAN * 4)) {
/* Clear pending exception in 68882 idle frame */
@@ -641,56 +641,35 @@ static inline void siginfo_build_tests(void)
static int mangle_kernel_stack(struct pt_regs *regs, int formatvec,
void __user *fp)
{
- int fsize = frame_extra_sizes(formatvec >> 12);
- if (fsize < 0) {
+ int extra = frame_extra_sizes(formatvec >> 12);
+ char buf[sizeof_field(struct frame, un)];
+
+ if (extra < 0) {
/*
* user process trying to return with weird frame format
*/
pr_debug("user process returning with weird frame format\n");
- return 1;
+ return -1;
}
- if (!fsize) {
- regs->format = formatvec >> 12;
- regs->vector = formatvec & 0xfff;
- } else {
- struct switch_stack *sw = (struct switch_stack *)regs - 1;
- /* yes, twice as much as max(sizeof(frame.un.fmt<x>)) */
- unsigned long buf[sizeof_field(struct frame, un) / 2];
-
- /* that'll make sure that expansion won't crap over data */
- if (copy_from_user(buf + fsize / 4, fp, fsize))
- return 1;
-
- /* point of no return */
- regs->format = formatvec >> 12;
- regs->vector = formatvec & 0xfff;
-#define frame_offset (sizeof(struct pt_regs)+sizeof(struct switch_stack))
- __asm__ __volatile__ (
-#ifdef CONFIG_COLDFIRE
- " movel %0,%/sp\n\t"
- " bra ret_from_signal\n"
-#else
- " movel %0,%/a0\n\t"
- " subl %1,%/a0\n\t" /* make room on stack */
- " movel %/a0,%/sp\n\t" /* set stack pointer */
- /* move switch_stack and pt_regs */
- "1: movel %0@+,%/a0@+\n\t"
- " dbra %2,1b\n\t"
- " lea %/sp@(%c3),%/a0\n\t" /* add offset of fmt */
- " lsrl #2,%1\n\t"
- " subql #1,%1\n\t"
- /* copy to the gap we'd made */
- "2: movel %4@+,%/a0@+\n\t"
- " dbra %1,2b\n\t"
- " bral ret_from_signal\n"
+ if (extra && copy_from_user(buf, fp, extra))
+ return -1;
+ regs->format = formatvec >> 12;
+ regs->vector = formatvec & 0xfff;
+ if (extra) {
+ void *p = (struct switch_stack *)regs - 1;
+ struct frame *new = (void *)regs - extra;
+ int size = sizeof(struct pt_regs)+sizeof(struct switch_stack);
+
+ memmove(p - extra, p, size);
+ memcpy(p - extra + size, buf, extra);
+ current->thread.esp0 = (unsigned long)&new->ptregs;
+#ifdef CONFIG_M68040
+ /* on 68040 complete pending writebacks if any */
+ if (new->ptregs.format == 7) // bus error frame
+ berr_040cleanup(new);
#endif
- : /* no outputs, it doesn't ever return */
- : "a" (sw), "d" (fsize), "d" (frame_offset/4-1),
- "n" (frame_offset), "a" (buf + fsize/4)
- : "a0");
-#undef frame_offset
}
- return 0;
+ return extra;
}
static inline int
@@ -698,7 +677,6 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u
{
int formatvec;
struct sigcontext context;
- int err = 0;
siginfo_build_tests();
@@ -707,7 +685,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u
/* get previous context */
if (copy_from_user(&context, usc, sizeof(context)))
- goto badframe;
+ return -1;
/* restore passed registers */
regs->d0 = context.sc_d0;
@@ -720,15 +698,10 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u
wrusp(context.sc_usp);
formatvec = context.sc_formatvec;
- err = restore_fpu_state(&context);
+ if (restore_fpu_state(&context))
+ return -1;
- if (err || mangle_kernel_stack(regs, formatvec, fp))
- goto badframe;
-
- return 0;
-
-badframe:
- return 1;
+ return mangle_kernel_stack(regs, formatvec, fp);
}
static inline int
@@ -745,7 +718,7 @@ rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw,
err = __get_user(temp, &uc->uc_mcontext.version);
if (temp != MCONTEXT_VERSION)
- goto badframe;
+ return -1;
/* restore passed registers */
err |= __get_user(regs->d0, &gregs[0]);
err |= __get_user(regs->d1, &gregs[1]);
@@ -774,22 +747,17 @@ rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw,
err |= restore_altstack(&uc->uc_stack);
if (err)
- goto badframe;
+ return -1;
- if (mangle_kernel_stack(regs, temp, &uc->uc_extra))
- goto badframe;
-
- return 0;
-
-badframe:
- return 1;
+ return mangle_kernel_stack(regs, temp, &uc->uc_extra);
}
-asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
+asmlinkage void *do_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
{
unsigned long usp = rdusp();
struct sigframe __user *frame = (struct sigframe __user *)(usp - 4);
sigset_t set;
+ int size;
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
@@ -801,20 +769,22 @@ asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->sc, frame + 1))
+ size = restore_sigcontext(regs, &frame->sc, frame + 1);
+ if (size < 0)
goto badframe;
- return regs->d0;
+ return (void *)sw - size;
badframe:
force_sig(SIGSEGV);
- return 0;
+ return sw;
}
-asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
+asmlinkage void *do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
{
unsigned long usp = rdusp();
struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(usp - 4);
sigset_t set;
+ int size;
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
@@ -823,27 +793,34 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw)
set_current_blocked(&set);
- if (rt_restore_ucontext(regs, sw, &frame->uc))
+ size = rt_restore_ucontext(regs, sw, &frame->uc);
+ if (size < 0)
goto badframe;
- return regs->d0;
+ return (void *)sw - size;
badframe:
force_sig(SIGSEGV);
- return 0;
+ return sw;
+}
+
+static inline struct pt_regs *rte_regs(struct pt_regs *regs)
+{
+ return (void *)regs + regs->stkadj;
}
static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
unsigned long mask)
{
+ struct pt_regs *tregs = rte_regs(regs);
sc->sc_mask = mask;
sc->sc_usp = rdusp();
sc->sc_d0 = regs->d0;
sc->sc_d1 = regs->d1;
sc->sc_a0 = regs->a0;
sc->sc_a1 = regs->a1;
- sc->sc_sr = regs->sr;
- sc->sc_pc = regs->pc;
- sc->sc_formatvec = regs->format << 12 | regs->vector;
+ sc->sc_sr = tregs->sr;
+ sc->sc_pc = tregs->pc;
+ sc->sc_formatvec = tregs->format << 12 | tregs->vector;
save_a5_state(sc, regs);
save_fpu_state(sc, regs);
}
@@ -851,6 +828,7 @@ static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs)
{
struct switch_stack *sw = (struct switch_stack *)regs - 1;
+ struct pt_regs *tregs = rte_regs(regs);
greg_t __user *gregs = uc->uc_mcontext.gregs;
int err = 0;
@@ -871,9 +849,9 @@ static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *
err |= __put_user(sw->a5, &gregs[13]);
err |= __put_user(sw->a6, &gregs[14]);
err |= __put_user(rdusp(), &gregs[15]);
- err |= __put_user(regs->pc, &gregs[16]);
- err |= __put_user(regs->sr, &gregs[17]);
- err |= __put_user((regs->format << 12) | regs->vector, &uc->uc_formatvec);
+ err |= __put_user(tregs->pc, &gregs[16]);
+ err |= __put_user(tregs->sr, &gregs[17]);
+ err |= __put_user((tregs->format << 12) | tregs->vector, &uc->uc_formatvec);
err |= rt_save_fpu_state(uc, regs);
return err;
}
@@ -890,13 +868,14 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
struct sigframe __user *frame;
- int fsize = frame_extra_sizes(regs->format);
+ struct pt_regs *tregs = rte_regs(regs);
+ int fsize = frame_extra_sizes(tregs->format);
struct sigcontext context;
int err = 0, sig = ksig->sig;
if (fsize < 0) {
pr_debug("setup_frame: Unknown frame format %#x\n",
- regs->format);
+ tregs->format);
return -EFAULT;
}
@@ -907,7 +886,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
err |= __put_user(sig, &frame->sig);
- err |= __put_user(regs->vector, &frame->code);
+ err |= __put_user(tregs->vector, &frame->code);
err |= __put_user(&frame->sc, &frame->psc);
if (_NSIG_WORDS > 1)
@@ -934,33 +913,27 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
push_cache ((unsigned long) &frame->retcode);
/*
- * Set up registers for signal handler. All the state we are about
- * to destroy is successfully copied to sigframe.
- */
- wrusp ((unsigned long) frame);
- regs->pc = (unsigned long) ksig->ka.sa.sa_handler;
- adjustformat(regs);
-
- /*
* This is subtle; if we build more than one sigframe, all but the
* first one will see frame format 0 and have fsize == 0, so we won't
* screw stkadj.
*/
- if (fsize)
+ if (fsize) {
regs->stkadj = fsize;
-
- /* Prepare to skip over the extra stuff in the exception frame. */
- if (regs->stkadj) {
- struct pt_regs *tregs =
- (struct pt_regs *)((ulong)regs + regs->stkadj);
+ tregs = rte_regs(regs);
pr_debug("Performing stackadjust=%04lx\n", regs->stkadj);
- /* This must be copied with decreasing addresses to
- handle overlaps. */
tregs->vector = 0;
tregs->format = 0;
- tregs->pc = regs->pc;
tregs->sr = regs->sr;
}
+
+ /*
+ * Set up registers for signal handler. All the state we are about
+ * to destroy is successfully copied to sigframe.
+ */
+ wrusp ((unsigned long) frame);
+ tregs->pc = (unsigned long) ksig->ka.sa.sa_handler;
+ adjustformat(regs);
+
return 0;
}
@@ -968,7 +941,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
struct rt_sigframe __user *frame;
- int fsize = frame_extra_sizes(regs->format);
+ struct pt_regs *tregs = rte_regs(regs);
+ int fsize = frame_extra_sizes(tregs->format);
int err = 0, sig = ksig->sig;
if (fsize < 0) {
@@ -1019,33 +993,26 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
push_cache ((unsigned long) &frame->retcode);
/*
- * Set up registers for signal handler. All the state we are about
- * to destroy is successfully copied to sigframe.
- */
- wrusp ((unsigned long) frame);
- regs->pc = (unsigned long) ksig->ka.sa.sa_handler;
- adjustformat(regs);
-
- /*
* This is subtle; if we build more than one sigframe, all but the
* first one will see frame format 0 and have fsize == 0, so we won't
* screw stkadj.
*/
- if (fsize)
+ if (fsize) {
regs->stkadj = fsize;
-
- /* Prepare to skip over the extra stuff in the exception frame. */
- if (regs->stkadj) {
- struct pt_regs *tregs =
- (struct pt_regs *)((ulong)regs + regs->stkadj);
+ tregs = rte_regs(regs);
pr_debug("Performing stackadjust=%04lx\n", regs->stkadj);
- /* This must be copied with decreasing addresses to
- handle overlaps. */
tregs->vector = 0;
tregs->format = 0;
- tregs->pc = regs->pc;
tregs->sr = regs->sr;
}
+
+ /*
+ * Set up registers for signal handler. All the state we are about
+ * to destroy is successfully copied to sigframe.
+ */
+ wrusp ((unsigned long) frame);
+ tregs->pc = (unsigned long) ksig->ka.sa.sa_handler;
+ adjustformat(regs);
return 0;
}
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 5b19fcdcd69e..9718ce94cc84 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -181,9 +181,8 @@ static inline void access_error060 (struct frame *fp)
static inline unsigned long probe040(int iswrite, unsigned long addr, int wbs)
{
unsigned long mmusr;
- mm_segment_t old_fs = get_fs();
- set_fs(MAKE_MM_SEG(wbs));
+ set_fc(wbs);
if (iswrite)
asm volatile (".chip 68040; ptestw (%0); .chip 68k" : : "a" (addr));
@@ -192,7 +191,7 @@ static inline unsigned long probe040(int iswrite, unsigned long addr, int wbs)
asm volatile (".chip 68040; movec %%mmusr,%0; .chip 68k" : "=r" (mmusr));
- set_fs(old_fs);
+ set_fc(USER_DATA);
return mmusr;
}
@@ -201,10 +200,8 @@ static inline int do_040writeback1(unsigned short wbs, unsigned long wba,
unsigned long wbd)
{
int res = 0;
- mm_segment_t old_fs = get_fs();
- /* set_fs can not be moved, otherwise put_user() may oops */
- set_fs(MAKE_MM_SEG(wbs));
+ set_fc(wbs);
switch (wbs & WBSIZ_040) {
case BA_SIZE_BYTE:
@@ -218,9 +215,7 @@ static inline int do_040writeback1(unsigned short wbs, unsigned long wba,
break;
}
- /* set_fs can not be moved, otherwise put_user() may oops */
- set_fs(old_fs);
-
+ set_fc(USER_DATA);
pr_debug("do_040writeback1, res=%d\n", res);
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 90f4e9ca1276..4fab34791758 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -18,7 +18,6 @@
#include <linux/uaccess.h>
#include <asm/io.h>
-#include <asm/segment.h>
#include <asm/setup.h>
#include <asm/macintosh.h>
#include <asm/mac_via.h>
diff --git a/arch/m68k/mm/cache.c b/arch/m68k/mm/cache.c
index b486c0889eec..dde978e66f14 100644
--- a/arch/m68k/mm/cache.c
+++ b/arch/m68k/mm/cache.c
@@ -49,24 +49,7 @@ static unsigned long virt_to_phys_slow(unsigned long vaddr)
if (mmusr & MMU_R_040)
return (mmusr & PAGE_MASK) | (vaddr & ~PAGE_MASK);
} else {
- unsigned short mmusr;
- unsigned long *descaddr;
-
- asm volatile ("ptestr %3,%2@,#7,%0\n\t"
- "pmove %%psr,%1"
- : "=a&" (descaddr), "=m" (mmusr)
- : "a" (vaddr), "d" (get_fs().seg));
- if (mmusr & (MMU_I|MMU_B|MMU_L))
- return 0;
- descaddr = phys_to_virt((unsigned long)descaddr);
- switch (mmusr & MMU_NUM) {
- case 1:
- return (*descaddr & 0xfe000000) | (vaddr & 0x01ffffff);
- case 2:
- return (*descaddr & 0xfffc0000) | (vaddr & 0x0003ffff);
- case 3:
- return (*descaddr & PAGE_MASK) | (vaddr & ~PAGE_MASK);
- }
+ WARN_ON_ONCE(!CPU_IS_040_OR_060);
}
return 0;
}
@@ -107,11 +90,9 @@ void flush_icache_user_range(unsigned long address, unsigned long endaddr)
void flush_icache_range(unsigned long address, unsigned long endaddr)
{
- mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
+ set_fc(SUPER_DATA);
flush_icache_user_range(address, endaddr);
- set_fs(old_fs);
+ set_fc(USER_DATA);
}
EXPORT_SYMBOL(flush_icache_range);
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 5d749e188246..1b47bec15832 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -72,12 +72,6 @@ void __init paging_init(void)
if (!empty_zero_page)
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
__func__, PAGE_SIZE, PAGE_SIZE);
-
- /*
- * Set up SFC/DFC registers (user data space).
- */
- set_fs (USER_DS);
-
max_zone_pfn[ZONE_DMA] = end_mem >> PAGE_SHIFT;
free_area_init(max_zone_pfn);
}
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 1269d513b221..20ddf71b43d0 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -17,7 +17,6 @@
#include <linux/vmalloc.h>
#include <asm/setup.h>
-#include <asm/segment.h>
#include <asm/page.h>
#include <asm/io.h>
#include <asm/tlbflush.h>
diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c
index fe75aecfb238..c2c03b0a1567 100644
--- a/arch/m68k/mm/memory.c
+++ b/arch/m68k/mm/memory.c
@@ -15,7 +15,6 @@
#include <linux/gfp.h>
#include <asm/setup.h>
-#include <asm/segment.h>
#include <asm/page.h>
#include <asm/traps.h>
#include <asm/machdep.h>
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 3a653f0a4188..9f3f77785aa7 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -467,7 +467,7 @@ void __init paging_init(void)
/*
* Set up SFC/DFC registers
*/
- set_fs(KERNEL_DS);
+ set_fc(USER_DATA);
#ifdef DEBUG
printk ("before free_area_init\n");
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index f7dd47232b6c..203f428a0344 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -31,7 +31,6 @@
#include <asm/intersil.h>
#include <asm/irq.h>
#include <asm/sections.h>
-#include <asm/segment.h>
#include <asm/sun3ints.h>
char sun3_reserved_pmeg[SUN3_PMEGS_NUM];
@@ -89,7 +88,7 @@ void __init sun3_init(void)
sun3_reserved_pmeg[249] = 1;
sun3_reserved_pmeg[252] = 1;
sun3_reserved_pmeg[253] = 1;
- set_fs(KERNEL_DS);
+ set_fc(USER_DATA);
}
/* Without this, Bad Things happen when something calls arch_reset. */
diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c
index 7aa879b7c7ff..7ec20817c0c9 100644
--- a/arch/m68k/sun3/mmu_emu.c
+++ b/arch/m68k/sun3/mmu_emu.c
@@ -23,7 +23,6 @@
#include <linux/uaccess.h>
#include <asm/page.h>
#include <asm/sun3mmu.h>
-#include <asm/segment.h>
#include <asm/oplib.h>
#include <asm/mmu_context.h>
#include <asm/dvma.h>
@@ -191,14 +190,13 @@ void __init mmu_emu_init(unsigned long bootmem_end)
for(seg = 0; seg < PAGE_OFFSET; seg += SUN3_PMEG_SIZE)
sun3_put_segmap(seg, SUN3_INVALID_PMEG);
- set_fs(MAKE_MM_SEG(3));
+ set_fc(3);
for(seg = 0; seg < 0x10000000; seg += SUN3_PMEG_SIZE) {
i = sun3_get_segmap(seg);
for(j = 1; j < CONTEXTS_NUM; j++)
(*(romvec->pv_setctxt))(j, (void *)seg, i);
}
- set_fs(KERNEL_DS);
-
+ set_fc(USER_DATA);
}
/* erase the mappings for a dead context. Uses the pg_dir for hints
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index 41ae422119d3..36cc280a4505 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -11,7 +11,6 @@
#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/interrupt.h>
-#include <asm/segment.h>
#include <asm/intersil.h>
#include <asm/oplib.h>
#include <asm/sun3ints.h>
diff --git a/arch/m68k/sun3x/prom.c b/arch/m68k/sun3x/prom.c
index 74d2fe57524b..64c23bfaa90c 100644
--- a/arch/m68k/sun3x/prom.c
+++ b/arch/m68k/sun3x/prom.c
@@ -14,7 +14,6 @@
#include <asm/traps.h>
#include <asm/sun3xprom.h>
#include <asm/idprom.h>
-#include <asm/segment.h>
#include <asm/sun3ints.h>
#include <asm/openprom.h>
#include <asm/machines.h>
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 0af88622c619..cb6d22439f71 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -662,6 +662,11 @@ static void build_epilogue(struct jit_ctx *ctx)
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \
func##_positive)
+static bool is_bad_offset(int b_off)
+{
+ return b_off > 0x1ffff || b_off < -0x20000;
+}
+
static int build_body(struct jit_ctx *ctx)
{
const struct bpf_prog *prog = ctx->skf;
@@ -728,7 +733,10 @@ load_common:
/* Load return register on DS for failures */
emit_reg_move(r_ret, r_zero, ctx);
/* Return with error */
- emit_b(b_imm(prog->len, ctx), ctx);
+ b_off = b_imm(prog->len, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_b(b_off, ctx);
emit_nop(ctx);
break;
case BPF_LD | BPF_W | BPF_IND:
@@ -775,8 +783,10 @@ load_ind:
emit_jalr(MIPS_R_RA, r_s0, ctx);
emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
/* Check the error value */
- emit_bcond(MIPS_COND_NE, r_ret, 0,
- b_imm(prog->len, ctx), ctx);
+ b_off = b_imm(prog->len, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx);
emit_reg_move(r_ret, r_zero, ctx);
/* We are good */
/* X <- P[1:K] & 0xf */
@@ -855,8 +865,10 @@ load_ind:
/* A /= X */
ctx->flags |= SEEN_X | SEEN_A;
/* Check if r_X is zero */
- emit_bcond(MIPS_COND_EQ, r_X, r_zero,
- b_imm(prog->len, ctx), ctx);
+ b_off = b_imm(prog->len, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx);
emit_load_imm(r_ret, 0, ctx); /* delay slot */
emit_div(r_A, r_X, ctx);
break;
@@ -864,8 +876,10 @@ load_ind:
/* A %= X */
ctx->flags |= SEEN_X | SEEN_A;
/* Check if r_X is zero */
- emit_bcond(MIPS_COND_EQ, r_X, r_zero,
- b_imm(prog->len, ctx), ctx);
+ b_off = b_imm(prog->len, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx);
emit_load_imm(r_ret, 0, ctx); /* delay slot */
emit_mod(r_A, r_X, ctx);
break;
@@ -926,7 +940,10 @@ load_ind:
break;
case BPF_JMP | BPF_JA:
/* pc += K */
- emit_b(b_imm(i + k + 1, ctx), ctx);
+ b_off = b_imm(i + k + 1, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_b(b_off, ctx);
emit_nop(ctx);
break;
case BPF_JMP | BPF_JEQ | BPF_K:
@@ -1056,12 +1073,16 @@ jmp_cmp:
break;
case BPF_RET | BPF_A:
ctx->flags |= SEEN_A;
- if (i != prog->len - 1)
+ if (i != prog->len - 1) {
/*
* If this is not the last instruction
* then jump to the epilogue
*/
- emit_b(b_imm(prog->len, ctx), ctx);
+ b_off = b_imm(prog->len, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_b(b_off, ctx);
+ }
emit_reg_move(r_ret, r_A, ctx); /* delay slot */
break;
case BPF_RET | BPF_K:
@@ -1075,7 +1096,10 @@ jmp_cmp:
* If this is not the last instruction
* then jump to the epilogue
*/
- emit_b(b_imm(prog->len, ctx), ctx);
+ b_off = b_imm(prog->len, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_b(b_off, ctx);
emit_nop(ctx);
}
break;
@@ -1133,8 +1157,10 @@ jmp_cmp:
/* Load *dev pointer */
emit_load_ptr(r_s0, r_skb, off, ctx);
/* error (0) in the delay slot */
- emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
- b_imm(prog->len, ctx), ctx);
+ b_off = b_imm(prog->len, ctx);
+ if (is_bad_offset(b_off))
+ return -E2BIG;
+ emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx);
emit_reg_move(r_ret, r_zero, ctx);
if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
@@ -1244,7 +1270,10 @@ void bpf_jit_compile(struct bpf_prog *fp)
/* Generate the actual JIT code */
build_prologue(&ctx);
- build_body(&ctx);
+ if (build_body(&ctx)) {
+ module_memfree(ctx.target);
+ goto out;
+ }
build_epilogue(&ctx);
/* Update the icache */
diff --git a/arch/nios2/Kconfig.debug b/arch/nios2/Kconfig.debug
index a8bc06e96ef5..ca1beb87f987 100644
--- a/arch/nios2/Kconfig.debug
+++ b/arch/nios2/Kconfig.debug
@@ -3,9 +3,10 @@
config EARLY_PRINTK
bool "Activate early kernel debugging"
default y
+ depends on TTY
select SERIAL_CORE_CONSOLE
help
- Enable early printk on console
+ Enable early printk on console.
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized.
You should normally say N here, unless you want to debug such a crash.
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index cf8d687a2644..40bc8fb75e0b 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -149,8 +149,6 @@ static void __init find_limits(unsigned long *min, unsigned long *max_low,
void __init setup_arch(char **cmdline_p)
{
- int dram_start;
-
console_verbose();
memory_start = memblock_start_of_DRAM();
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 36dbf5043fc0..aa995d91cd1d 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -55,7 +55,7 @@ int ccwgroup_create_dev(struct device *root, struct ccwgroup_driver *gdrv,
int num_devices, const char *buf);
extern int ccwgroup_set_online(struct ccwgroup_device *gdev);
-extern int ccwgroup_set_offline(struct ccwgroup_device *gdev);
+int ccwgroup_set_offline(struct ccwgroup_device *gdev, bool call_gdrv);
extern int ccwgroup_probe_ccwdev(struct ccw_device *cdev);
extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 88419263a89a..840d8594437d 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -248,8 +248,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
- /* Branch instruction needs 6 bytes */ \
- int rel = (addrs[(i) + (off) + 1] - (addrs[(i) + 1] - 6)) / 2;\
+ int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \
_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
@@ -761,10 +760,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4(0xb9080000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
- if (!imm)
- break;
- /* alfi %dst,imm */
- EMIT6_IMM(0xc20b0000, dst_reg, imm);
+ if (imm != 0) {
+ /* alfi %dst,imm */
+ EMIT6_IMM(0xc20b0000, dst_reg, imm);
+ }
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
@@ -786,17 +785,22 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4(0xb9090000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
- if (!imm)
- break;
- /* alfi %dst,-imm */
- EMIT6_IMM(0xc20b0000, dst_reg, -imm);
+ if (imm != 0) {
+ /* alfi %dst,-imm */
+ EMIT6_IMM(0xc20b0000, dst_reg, -imm);
+ }
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
if (!imm)
break;
- /* agfi %dst,-imm */
- EMIT6_IMM(0xc2080000, dst_reg, -imm);
+ if (imm == -0x80000000) {
+ /* algfi %dst,0x80000000 */
+ EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000);
+ } else {
+ /* agfi %dst,-imm */
+ EMIT6_IMM(0xc2080000, dst_reg, -imm);
+ }
break;
/*
* BPF_MUL
@@ -811,10 +815,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4(0xb90c0000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
- if (imm == 1)
- break;
- /* msfi %r5,imm */
- EMIT6_IMM(0xc2010000, dst_reg, imm);
+ if (imm != 1) {
+ /* msfi %r5,imm */
+ EMIT6_IMM(0xc2010000, dst_reg, imm);
+ }
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
@@ -867,6 +871,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
if (BPF_OP(insn->code) == BPF_MOD)
/* lhgi %dst,0 */
EMIT4_IMM(0xa7090000, dst_reg, 0);
+ else
+ EMIT_ZERO(dst_reg);
break;
}
/* lhi %w0,0 */
@@ -999,10 +1005,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4(0xb9820000, dst_reg, src_reg);
break;
case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
- if (!imm)
- break;
- /* xilf %dst,imm */
- EMIT6_IMM(0xc0070000, dst_reg, imm);
+ if (imm != 0) {
+ /* xilf %dst,imm */
+ EMIT6_IMM(0xc0070000, dst_reg, imm);
+ }
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
@@ -1033,10 +1039,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
break;
case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
- if (imm == 0)
- break;
- /* sll %dst,imm(%r0) */
- EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
+ if (imm != 0) {
+ /* sll %dst,imm(%r0) */
+ EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
+ }
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
@@ -1058,10 +1064,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
break;
case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
- if (imm == 0)
- break;
- /* srl %dst,imm(%r0) */
- EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
+ if (imm != 0) {
+ /* srl %dst,imm(%r0) */
+ EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
+ }
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
@@ -1083,10 +1089,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
break;
case BPF_ALU | BPF_ARSH | BPF_K: /* ((s32) dst >> imm */
- if (imm == 0)
- break;
- /* sra %dst,imm(%r0) */
- EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm);
+ if (imm != 0) {
+ /* sra %dst,imm(%r0) */
+ EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm);
+ }
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h
index 56bf35c2f29c..cdced80a7ffa 100644
--- a/arch/sh/include/asm/pgtable-3level.h
+++ b/arch/sh/include/asm/pgtable-3level.h
@@ -34,7 +34,7 @@ typedef struct { unsigned long long pmd; } pmd_t;
static inline pmd_t *pud_pgtable(pud_t pud)
{
- return (pmd_t *)pud_val(pud);
+ return (pmd_t *)(unsigned long)pud_val(pud);
}
/* only used by the stubbed out hugetlb gup code, should never be called */
diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c
index c9da9f139694..f3a8cd491ce0 100644
--- a/arch/sparc/lib/iomap.c
+++ b/arch/sparc/lib/iomap.c
@@ -19,8 +19,10 @@ void ioport_unmap(void __iomem *addr)
EXPORT_SYMBOL(ioport_map);
EXPORT_SYMBOL(ioport_unmap);
+#ifdef CONFIG_PCI
void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
{
/* nothing to do */
}
EXPORT_SYMBOL(pci_iounmap);
+#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dad7f85dcdea..ab83c22d274e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2610,7 +2610,6 @@ config PCI_OLPC
config PCI_XEN
def_bool y
depends on PCI && XEN
- select SWIOTLB_XEN
config MMCONF_FAM10H
def_bool y
diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
index fa2c3f50aecb..18d2f5199194 100644
--- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
@@ -367,10 +367,11 @@ SYM_FUNC_START(sm4_aesni_avx_crypt8)
* %rdx: src (1..8 blocks)
* %rcx: num blocks (1..8)
*/
- FRAME_BEGIN
-
cmpq $5, %rcx;
jb sm4_aesni_avx_crypt4;
+
+ FRAME_BEGIN
+
vmovdqu (0 * 16)(%rdx), RA0;
vmovdqu (1 * 16)(%rdx), RA1;
vmovdqu (2 * 16)(%rdx), RA2;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 2a57dbed4894..6dfa8ddaa60f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2465,6 +2465,7 @@ static int x86_pmu_event_init(struct perf_event *event)
if (err) {
if (event->destroy)
event->destroy(event);
+ event->destroy = NULL;
}
if (READ_ONCE(x86_pmu.attr_rdpmc) &&
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7011e87be6d0..9a044438072b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -263,6 +263,7 @@ static struct event_constraint intel_icl_event_constraints[] = {
INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
+ INTEL_EVENT_CONSTRAINT(0xef, 0xf),
INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
EVENT_CONSTRAINT_END
};
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 5c7bcaa79623..1d5f14aff5f6 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -2,8 +2,6 @@
#ifndef _ASM_X86_PKEYS_H
#define _ASM_X86_PKEYS_H
-#define ARCH_DEFAULT_PKEY 0
-
/*
* If more than 16 keys are ever supported, a thorough audit
* will be necessary to ensure that the types that store key
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index f3fbb84ff8a7..68c257a3de0d 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -275,7 +275,7 @@ static inline int enqcmds(void __iomem *dst, const void *src)
{
const struct { char _[64]; } *__src = src;
struct { char _[64]; } __iomem *__dst = dst;
- int zf;
+ bool zf;
/*
* ENQCMDS %(rdx), rax
diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
index 6b56d0d45d15..66b4ddde7743 100644
--- a/arch/x86/include/asm/xen/swiotlb-xen.h
+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
@@ -3,14 +3,10 @@
#define _ASM_X86_SWIOTLB_XEN_H
#ifdef CONFIG_SWIOTLB_XEN
-extern int xen_swiotlb;
extern int __init pci_xen_swiotlb_detect(void);
-extern void __init pci_xen_swiotlb_init(void);
extern int pci_xen_swiotlb_init_late(void);
#else
-#define xen_swiotlb (0)
-static inline int __init pci_xen_swiotlb_detect(void) { return 0; }
-static inline void __init pci_xen_swiotlb_init(void) { }
+#define pci_xen_swiotlb_detect NULL
static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
#endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 79f164141116..40ed44ead063 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -830,6 +830,20 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.arch_setup();
+ /*
+ * Do some memory reservations *before* memory is added to memblock, so
+ * memblock allocations won't overwrite it.
+ *
+ * After this point, everything still needed from the boot loader or
+ * firmware or kernel text should be early reserved or marked not RAM in
+ * e820. All other memory is free game.
+ *
+ * This call needs to happen before e820__memory_setup() which calls the
+ * xen_memory_setup() on Xen dom0 which relies on the fact that those
+ * early reservations have happened already.
+ */
+ early_reserve_memory();
+
iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
e820__memory_setup();
parse_setup_data();
@@ -876,18 +890,6 @@ void __init setup_arch(char **cmdline_p)
parse_early_param();
- /*
- * Do some memory reservations *before* memory is added to
- * memblock, so memblock allocations won't overwrite it.
- * Do it after early param, so we could get (unlikely) panic from
- * serial.
- *
- * After this point everything still needed from the boot loader or
- * firmware or kernel text should be early reserved or marked not
- * RAM in e820. All other memory is free game.
- */
- early_reserve_memory();
-
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Memory used by the kernel cannot be hot-removed because Linux
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 532791ffcbb9..28b1a4e57827 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -435,7 +435,6 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
__FOP_RET(#op)
asm(".pushsection .fixup, \"ax\"\n"
- ".global kvm_fastop_exception \n"
"kvm_fastop_exception: xor %esi, %esi; ret\n"
".popsection");
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 058f19b20465..c565def611e2 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -37,10 +37,10 @@
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
#define __get_next(t, insn) \
- ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })
+ ({ t r; memcpy(&r, insn->next_byte, sizeof(t)); insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })
#define __peek_nbyte_next(t, insn, n) \
- ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); })
+ ({ t r; memcpy(&r, (insn)->next_byte + n, sizeof(t)); leXX_to_cpu(t, r); })
#define get_next(t, insn) \
({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b2eefdefc108..84a2c8c4af73 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -710,7 +710,8 @@ oops:
static noinline void
kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, int signal, int si_code)
+ unsigned long address, int signal, int si_code,
+ u32 pkey)
{
WARN_ON_ONCE(user_mode(regs));
@@ -735,8 +736,12 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
set_signal_archinfo(address, error_code);
- /* XXX: hwpoison faults will set the wrong code. */
- force_sig_fault(signal, si_code, (void __user *)address);
+ if (si_code == SEGV_PKUERR) {
+ force_sig_pkuerr((void __user *)address, pkey);
+ } else {
+ /* XXX: hwpoison faults will set the wrong code. */
+ force_sig_fault(signal, si_code, (void __user *)address);
+ }
}
/*
@@ -798,7 +803,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
struct task_struct *tsk = current;
if (!user_mode(regs)) {
- kernelmode_fixup_or_oops(regs, error_code, address, pkey, si_code);
+ kernelmode_fixup_or_oops(regs, error_code, address,
+ SIGSEGV, si_code, pkey);
return;
}
@@ -930,7 +936,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
{
/* Kernel mode? Handle exceptions or die: */
if (!user_mode(regs)) {
- kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR);
+ kernelmode_fixup_or_oops(regs, error_code, address,
+ SIGBUS, BUS_ADRERR, ARCH_DEFAULT_PKEY);
return;
}
@@ -1396,7 +1403,8 @@ good_area:
*/
if (!user_mode(regs))
kernelmode_fixup_or_oops(regs, error_code, address,
- SIGBUS, BUS_ADRERR);
+ SIGBUS, BUS_ADRERR,
+ ARCH_DEFAULT_PKEY);
return;
}
@@ -1416,7 +1424,8 @@ good_area:
return;
if (fatal_signal_pending(current) && !user_mode(regs)) {
- kernelmode_fixup_or_oops(regs, error_code, address, 0, 0);
+ kernelmode_fixup_or_oops(regs, error_code, address,
+ 0, 0, ARCH_DEFAULT_PKEY);
return;
}
@@ -1424,7 +1433,8 @@ good_area:
/* Kernel mode? Handle exceptions or die: */
if (!user_mode(regs)) {
kernelmode_fixup_or_oops(regs, error_code, address,
- SIGSEGV, SEGV_MAPERR);
+ SIGSEGV, SEGV_MAPERR,
+ ARCH_DEFAULT_PKEY);
return;
}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 0fe6aacef3db..9ea57389c554 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1341,9 +1341,10 @@ st: if (is_imm8(insn->off))
if (insn->imm == (BPF_AND | BPF_FETCH) ||
insn->imm == (BPF_OR | BPF_FETCH) ||
insn->imm == (BPF_XOR | BPF_FETCH)) {
- u8 *branch_target;
bool is64 = BPF_SIZE(insn->code) == BPF_DW;
u32 real_src_reg = src_reg;
+ u32 real_dst_reg = dst_reg;
+ u8 *branch_target;
/*
* Can't be implemented with a single x86 insn.
@@ -1354,11 +1355,13 @@ st: if (is_imm8(insn->off))
emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0);
if (src_reg == BPF_REG_0)
real_src_reg = BPF_REG_AX;
+ if (dst_reg == BPF_REG_0)
+ real_dst_reg = BPF_REG_AX;
branch_target = prog;
/* Load old value */
emit_ldx(&prog, BPF_SIZE(insn->code),
- BPF_REG_0, dst_reg, insn->off);
+ BPF_REG_0, real_dst_reg, insn->off);
/*
* Perform the (commutative) operation locally,
* put the result in the AUX_REG.
@@ -1369,7 +1372,8 @@ st: if (is_imm8(insn->off))
add_2reg(0xC0, AUX_REG, real_src_reg));
/* Attempt to swap in new value */
err = emit_atomic(&prog, BPF_CMPXCHG,
- dst_reg, AUX_REG, insn->off,
+ real_dst_reg, AUX_REG,
+ insn->off,
BPF_SIZE(insn->code));
if (WARN_ON(err))
return err;
@@ -1383,11 +1387,10 @@ st: if (is_imm8(insn->off))
/* Restore R0 after clobbering RAX */
emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX);
break;
-
}
err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
- insn->off, BPF_SIZE(insn->code));
+ insn->off, BPF_SIZE(insn->code));
if (err)
return err;
break;
@@ -1744,7 +1747,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
}
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
- struct bpf_prog *p, int stack_size, bool mod_ret)
+ struct bpf_prog *p, int stack_size, bool save_ret)
{
u8 *prog = *pprog;
u8 *jmp_insn;
@@ -1777,11 +1780,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
if (emit_call(&prog, p->bpf_func, prog))
return -EINVAL;
- /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
+ /*
+ * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
* of the previous call which is then passed on the stack to
* the next BPF program.
+ *
+ * BPF_TRAMP_FENTRY trampoline may need to return the return
+ * value of BPF_PROG_TYPE_STRUCT_OPS prog.
*/
- if (mod_ret)
+ if (save_ret)
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
/* replace 2 nops with JE insn, since jmp target is known */
@@ -1828,13 +1835,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
}
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
- struct bpf_tramp_progs *tp, int stack_size)
+ struct bpf_tramp_progs *tp, int stack_size,
+ bool save_ret)
{
int i;
u8 *prog = *pprog;
for (i = 0; i < tp->nr_progs; i++) {
- if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
+ if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size,
+ save_ret))
return -EINVAL;
}
*pprog = prog;
@@ -1877,6 +1886,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
return 0;
}
+static bool is_valid_bpf_tramp_flags(unsigned int flags)
+{
+ if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+ (flags & BPF_TRAMP_F_SKIP_FRAME))
+ return false;
+
+ /*
+ * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
+ * and it must be used alone.
+ */
+ if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) &&
+ (flags & ~BPF_TRAMP_F_RET_FENTRY_RET))
+ return false;
+
+ return true;
+}
+
/* Example:
* __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
* its 'struct btf_func_model' will be nr_args=2
@@ -1949,17 +1975,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
u8 **branches = NULL;
u8 *prog;
+ bool save_ret;
/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
if (nr_args > 6)
return -ENOTSUPP;
- if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
- (flags & BPF_TRAMP_F_SKIP_FRAME))
+ if (!is_valid_bpf_tramp_flags(flags))
return -EINVAL;
- if (flags & BPF_TRAMP_F_CALL_ORIG)
- stack_size += 8; /* room for return value of orig_call */
+ /* room for return value of orig_call or fentry prog */
+ save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
+ if (save_ret)
+ stack_size += 8;
if (flags & BPF_TRAMP_F_IP_ARG)
stack_size += 8; /* room for IP address argument */
@@ -2005,7 +2033,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}
if (fentry->nr_progs)
- if (invoke_bpf(m, &prog, fentry, stack_size))
+ if (invoke_bpf(m, &prog, fentry, stack_size,
+ flags & BPF_TRAMP_F_RET_FENTRY_RET))
return -EINVAL;
if (fmod_ret->nr_progs) {
@@ -2052,7 +2081,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}
if (fexit->nr_progs)
- if (invoke_bpf(m, &prog, fexit, stack_size)) {
+ if (invoke_bpf(m, &prog, fexit, stack_size, false)) {
ret = -EINVAL;
goto cleanup;
}
@@ -2072,9 +2101,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
ret = -EINVAL;
goto cleanup;
}
- /* restore original return value back into RAX */
- emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
}
+ /* restore return value of orig_call or fentry prog back into RAX */
+ if (save_ret)
+ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
EMIT1(0x5B); /* pop rbx */
EMIT1(0xC9); /* leave */
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 349f780a1567..6e0d0754f94f 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -755,8 +755,8 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
preempt_enable();
}
-static void xen_convert_trap_info(const struct desc_ptr *desc,
- struct trap_info *traps)
+static unsigned xen_convert_trap_info(const struct desc_ptr *desc,
+ struct trap_info *traps, bool full)
{
unsigned in, out, count;
@@ -766,17 +766,18 @@ static void xen_convert_trap_info(const struct desc_ptr *desc,
for (in = out = 0; in < count; in++) {
gate_desc *entry = (gate_desc *)(desc->address) + in;
- if (cvt_gate_to_trap(in, entry, &traps[out]))
+ if (cvt_gate_to_trap(in, entry, &traps[out]) || full)
out++;
}
- traps[out].address = 0;
+
+ return out;
}
void xen_copy_trap_info(struct trap_info *traps)
{
const struct desc_ptr *desc = this_cpu_ptr(&idt_desc);
- xen_convert_trap_info(desc, traps);
+ xen_convert_trap_info(desc, traps, true);
}
/* Load a new IDT into Xen. In principle this can be per-CPU, so we
@@ -786,6 +787,7 @@ static void xen_load_idt(const struct desc_ptr *desc)
{
static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257];
+ unsigned out;
trace_xen_cpu_load_idt(desc);
@@ -793,7 +795,8 @@ static void xen_load_idt(const struct desc_ptr *desc)
memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
- xen_convert_trap_info(desc, traps);
+ out = xen_convert_trap_info(desc, traps, false);
+ memset(&traps[out], 0, sizeof(traps[0]));
xen_mc_flush();
if (HYPERVISOR_set_trap_table(traps))
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 54f9aa7e8457..46df59aeaa06 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -18,7 +18,7 @@
#endif
#include <linux/export.h>
-int xen_swiotlb __read_mostly;
+static int xen_swiotlb __read_mostly;
/*
* pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary
@@ -56,7 +56,7 @@ int __init pci_xen_swiotlb_detect(void)
return xen_swiotlb;
}
-void __init pci_xen_swiotlb_init(void)
+static void __init pci_xen_swiotlb_init(void)
{
if (xen_swiotlb) {
xen_swiotlb_init_early();
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 96afadf9878e..7ed56c6075b0 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -290,8 +290,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
gdt = get_cpu_gdt_rw(cpu);
- memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
-
/*
* Bring up the CPU in cpu_bringup_and_idle() with the stack
* pointing just below where pt_regs would be if it were a normal
@@ -308,8 +306,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
xen_copy_trap_info(ctxt->trap_ctxt);
- ctxt->ldt_ents = 0;
-
BUG_ON((unsigned long)gdt & ~PAGE_MASK);
gdt_mfn = arbitrary_virt_to_mfn(gdt);