summaryrefslogtreecommitdiff
path: root/arch/arm64/include/asm
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2022-05-20 14:16:27 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2022-05-25 12:09:23 +0300
commit47e8eec83262083c7da220446551eaad614218ea (patch)
tree1bcdf6cb6541441d1042fdf68c2f7982d80a9178 /arch/arm64/include/asm
parent825be3b5abae1e67db45ff7d4b9a7764a2419bd9 (diff)
parent5c0ad551e9aa6188f2bda0977c1cb6768a2b74ef (diff)
downloadlinux-47e8eec83262083c7da220446551eaad614218ea.tar.xz
Merge tag 'kvmarm-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 5.19 - Add support for the ARMv8.6 WFxT extension - Guard pages for the EL2 stacks - Trap and emulate AArch32 ID registers to hide unsupported features - Ability to select and save/restore the set of hypercalls exposed to the guest - Support for PSCI-initiated suspend in collaboration with userspace - GICv3 register-based LPI invalidation support - Move host PMU event merging into the vcpu data structure - GICv3 ITS save/restore fixes - The usual set of small-scale cleanups and fixes [Due to the conflict, KVM_SYSTEM_EVENT_SEV_TERM is relocated from 4 to 6. - Paolo]
Diffstat (limited to 'arch/arm64/include/asm')
-rw-r--r--arch/arm64/include/asm/barrier.h4
-rw-r--r--arch/arm64/include/asm/cpu.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h24
-rw-r--r--arch/arm64/include/asm/cputype.h10
-rw-r--r--arch/arm64/include/asm/el2_setup.h66
-rw-r--r--arch/arm64/include/asm/esr.h21
-rw-r--r--arch/arm64/include/asm/exception.h1
-rw-r--r--arch/arm64/include/asm/fpsimd.h123
-rw-r--r--arch/arm64/include/asm/fpsimdmacros.h87
-rw-r--r--arch/arm64/include/asm/hwcap.h9
-rw-r--r--arch/arm64/include/asm/kvm_arm.h4
-rw-r--r--arch/arm64/include/asm/kvm_asm.h1
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h8
-rw-r--r--arch/arm64/include/asm/kvm_host.h45
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h3
-rw-r--r--arch/arm64/include/asm/pgtable.h4
-rw-r--r--arch/arm64/include/asm/processor.h26
-rw-r--r--arch/arm64/include/asm/sysreg.h67
-rw-r--r--arch/arm64/include/asm/thread_info.h2
19 files changed, 474 insertions, 35 deletions
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 62217be36217..9f3e2c3d2ca0 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -16,7 +16,11 @@
#define sev() asm volatile("sev" : : : "memory")
#define wfe() asm volatile("wfe" : : : "memory")
+#define wfet(val) asm volatile("msr s0_3_c1_c0_0, %0" \
+ : : "r" (val) : "memory")
#define wfi() asm volatile("wfi" : : : "memory")
+#define wfit(val) asm volatile("msr s0_3_c1_c0_1, %0" \
+ : : "r" (val) : "memory")
#define isb() asm volatile("isb" : : : "memory")
#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index a58e366f0b07..115cdec1ae87 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -58,11 +58,15 @@ struct cpuinfo_arm64 {
u64 reg_id_aa64pfr0;
u64 reg_id_aa64pfr1;
u64 reg_id_aa64zfr0;
+ u64 reg_id_aa64smfr0;
struct cpuinfo_32bit aarch32;
/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
u64 reg_zcr;
+
+ /* pseudo-SMCR for recording maximum SMCR_EL1 LEN value: */
+ u64 reg_smcr;
};
DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index c62e7e5e2f0c..14a8f3d93add 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -622,6 +622,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
return val > 0;
}
+static inline bool id_aa64pfr1_sme(u64 pfr1)
+{
+ u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT);
+
+ return val > 0;
+}
+
static inline bool id_aa64pfr1_mte(u64 pfr1)
{
u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
@@ -759,6 +766,23 @@ static __always_inline bool system_supports_sve(void)
cpus_have_const_cap(ARM64_SVE);
}
+static __always_inline bool system_supports_sme(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_SME) &&
+ cpus_have_const_cap(ARM64_SME);
+}
+
+static __always_inline bool system_supports_fa64(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_SME) &&
+ cpus_have_const_cap(ARM64_SME_FA64);
+}
+
+static __always_inline bool system_supports_tpidr2(void)
+{
+ return system_supports_sme();
+}
+
static __always_inline bool system_supports_cnp(void)
{
return IS_ENABLED(CONFIG_ARM64_CNP) &&
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 232b439cbaf3..60647bdc0b09 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -75,6 +75,7 @@
#define ARM_CPU_PART_CORTEX_A77 0xD0D
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
#define ARM_CPU_PART_CORTEX_A78 0xD41
+#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
#define ARM_CPU_PART_CORTEX_A710 0xD47
@@ -117,6 +118,10 @@
#define APPLE_CPU_PART_M1_ICESTORM 0x022
#define APPLE_CPU_PART_M1_FIRESTORM 0x023
+#define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024
+#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
+#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
+#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
@@ -130,6 +135,7 @@
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
+#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
@@ -162,6 +168,10 @@
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
+#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
+#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
+#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
+#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 7f3c87f7a0ce..fabdbde0fe02 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -107,7 +107,7 @@
isb // Make sure SRE is now set
mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back,
tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks
- msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults
+ msr_s SYS_ICH_HCR_EL2, xzr // Reset ICH_HCR_EL2 to defaults
.Lskip_gicv3_\@:
.endm
@@ -143,6 +143,50 @@
.Lskip_sve_\@:
.endm
+/* SME register access and priority mapping */
+.macro __init_el2_nvhe_sme
+ mrs x1, id_aa64pfr1_el1
+ ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
+ cbz x1, .Lskip_sme_\@
+
+ bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps
+ msr cptr_el2, x0 // Disable copro. traps to EL2
+ isb
+
+ mrs x1, sctlr_el2
+ orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
+ msr sctlr_el2, x1
+ isb
+
+ mov x1, #0 // SMCR controls
+
+ mrs_s x2, SYS_ID_AA64SMFR0_EL1
+ ubfx x2, x2, #ID_AA64SMFR0_FA64_SHIFT, #1 // Full FP in SM?
+ cbz x2, .Lskip_sme_fa64_\@
+
+ orr x1, x1, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64_\@:
+
+ orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector
+ msr_s SYS_SMCR_EL2, x1 // length for EL1.
+
+ mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
+ ubfx x1, x1, #SYS_SMIDR_EL1_SMPS_SHIFT, #1
+ cbz x1, .Lskip_sme_\@
+
+ msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
+
+ mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
+ ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
+ cbz x1, .Lskip_sme_\@
+
+ mrs_s x1, SYS_HCRX_EL2
+ orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
+ msr_s SYS_HCRX_EL2, x1
+
+.Lskip_sme_\@:
+.endm
+
/* Disable any fine grained traps */
.macro __init_el2_fgt
mrs x1, id_aa64mmfr0_el1
@@ -153,15 +197,26 @@
mrs x1, id_aa64dfr0_el1
ubfx x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
cmp x1, #3
- b.lt .Lset_fgt_\@
+ b.lt .Lset_debug_fgt_\@
/* Disable PMSNEVFR_EL1 read and write traps */
orr x0, x0, #(1 << 62)
-.Lset_fgt_\@:
+.Lset_debug_fgt_\@:
msr_s SYS_HDFGRTR_EL2, x0
msr_s SYS_HDFGWTR_EL2, x0
- msr_s SYS_HFGRTR_EL2, xzr
- msr_s SYS_HFGWTR_EL2, xzr
+
+ mov x0, xzr
+ mrs x1, id_aa64pfr1_el1
+ ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
+ cbz x1, .Lset_fgt_\@
+
+ /* Disable nVHE traps of TPIDR2 and SMPRI */
+ orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
+ orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
+
+.Lset_fgt_\@:
+ msr_s SYS_HFGRTR_EL2, x0
+ msr_s SYS_HFGWTR_EL2, x0
msr_s SYS_HFGITR_EL2, xzr
mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU
@@ -196,6 +251,7 @@
__init_el2_nvhe_idregs
__init_el2_nvhe_cptr
__init_el2_nvhe_sve
+ __init_el2_nvhe_sme
__init_el2_fgt
__init_el2_nvhe_prepare_eret
.endm
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index d52a0b269ee8..0bc9941b412e 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -37,7 +37,8 @@
#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
/* Unallocated EC: 0x1B */
#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */
-/* Unallocated EC: 0x1D - 0x1E */
+#define ESR_ELx_EC_SME (0x1D)
+/* Unallocated EC: 0x1E */
#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
#define ESR_ELx_EC_IABT_LOW (0x20)
#define ESR_ELx_EC_IABT_CUR (0x21)
@@ -75,6 +76,7 @@
#define ESR_ELx_IL_SHIFT (25)
#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
+#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK)
/* ISS field definitions shared by different classes */
#define ESR_ELx_WNR_SHIFT (6)
@@ -133,7 +135,10 @@
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
-#define ESR_ELx_WFx_ISS_TI (UL(1) << 0)
+#define ESR_ELx_WFx_ISS_RN (UL(0x1F) << 5)
+#define ESR_ELx_WFx_ISS_RV (UL(1) << 2)
+#define ESR_ELx_WFx_ISS_TI (UL(3) << 0)
+#define ESR_ELx_WFx_ISS_WFxT (UL(2) << 0)
#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0)
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
@@ -146,7 +151,8 @@
#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
/* ESR value templates for specific events */
-#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | ESR_ELx_WFx_ISS_TI)
+#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | \
+ (ESR_ELx_WFx_ISS_TI & ~ESR_ELx_WFx_ISS_WFxT))
#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \
ESR_ELx_WFx_ISS_WFI)
@@ -327,6 +333,15 @@
#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\
ESR_ELx_CP15_32_ISS_DIR_READ)
+/*
+ * ISS values for SME traps
+ */
+
+#define ESR_ELx_SME_ISS_SME_DISABLED 0
+#define ESR_ELx_SME_ISS_ILL 1
+#define ESR_ELx_SME_ISS_SM_DISABLED 2
+#define ESR_ELx_SME_ISS_ZA_DISABLED 3
+
#ifndef __ASSEMBLY__
#include <asm/types.h>
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 339477dca551..2add7f33b7c2 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -64,6 +64,7 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
struct pt_regs *regs);
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
void do_sve_acc(unsigned int esr, struct pt_regs *regs);
+void do_sme_acc(unsigned int esr, struct pt_regs *regs);
void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
void do_sysinstr(unsigned int esr, struct pt_regs *regs);
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index cb24385e3632..5afcd0709aae 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -46,11 +46,23 @@ extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
- void *sve_state, unsigned int sve_vl);
+ void *sve_state, unsigned int sve_vl,
+ void *za_state, unsigned int sme_vl,
+ u64 *svcr);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void fpsimd_save_and_flush_cpu_state(void);
+static inline bool thread_sm_enabled(struct thread_struct *thread)
+{
+ return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK);
+}
+
+static inline bool thread_za_enabled(struct thread_struct *thread)
+{
+ return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_ZA_MASK);
+}
+
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
#define VL_ARCH_MAX 0x100
@@ -62,7 +74,14 @@ static inline size_t sve_ffr_offset(int vl)
static inline void *sve_pffr(struct thread_struct *thread)
{
- return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread));
+ unsigned int vl;
+
+ if (system_supports_sme() && thread_sm_enabled(thread))
+ vl = thread_get_sme_vl(thread);
+ else
+ vl = thread_get_sve_vl(thread);
+
+ return (char *)thread->sve_state + sve_ffr_offset(vl);
}
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
@@ -71,11 +90,17 @@ extern void sve_load_state(void const *state, u32 const *pfpsr,
extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
extern unsigned int sve_get_vl(void);
extern void sve_set_vq(unsigned long vq_minus_1);
+extern void sme_set_vq(unsigned long vq_minus_1);
+extern void za_save_state(void *state);
+extern void za_load_state(void const *state);
struct arm64_cpu_capabilities;
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
+extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
extern u64 read_zcr_features(void);
+extern u64 read_smcr_features(void);
/*
* Helpers to translate bit indices in sve_vq_map to VQ values (and
@@ -119,6 +144,7 @@ struct vl_info {
extern void sve_alloc(struct task_struct *task);
extern void fpsimd_release_task(struct task_struct *task);
extern void fpsimd_sync_to_sve(struct task_struct *task);
+extern void fpsimd_force_sync_to_sve(struct task_struct *task);
extern void sve_sync_to_fpsimd(struct task_struct *task);
extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
@@ -171,6 +197,12 @@ static inline void write_vl(enum vec_type type, u64 val)
write_sysreg_s(tmp | val, SYS_ZCR_EL1);
break;
#endif
+#ifdef CONFIG_ARM64_SME
+ case ARM64_VEC_SME:
+ tmp = read_sysreg_s(SYS_SMCR_EL1) & ~SMCR_ELx_LEN_MASK;
+ write_sysreg_s(tmp | val, SYS_SMCR_EL1);
+ break;
+#endif
default:
WARN_ON_ONCE(1);
break;
@@ -208,6 +240,8 @@ static inline bool sve_vq_available(unsigned int vq)
return vq_available(ARM64_VEC_SVE, vq);
}
+size_t sve_state_size(struct task_struct const *task);
+
#else /* ! CONFIG_ARM64_SVE */
static inline void sve_alloc(struct task_struct *task) { }
@@ -247,8 +281,93 @@ static inline void vec_update_vq_map(enum vec_type t) { }
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
static inline void sve_setup(void) { }
+static inline size_t sve_state_size(struct task_struct const *task)
+{
+ return 0;
+}
+
#endif /* ! CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_SME
+
+static inline void sme_user_disable(void)
+{
+ sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
+}
+
+static inline void sme_user_enable(void)
+{
+ sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
+}
+
+static inline void sme_smstart_sm(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop_sm(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTOP_SM_EL0, "xzr"));
+}
+
+static inline void sme_smstop(void)
+{
+ asm volatile(__msr_s(SYS_SVCR_SMSTOP_SMZA_EL0, "xzr"));
+}
+
+extern void __init sme_setup(void);
+
+static inline int sme_max_vl(void)
+{
+ return vec_max_vl(ARM64_VEC_SME);
+}
+
+static inline int sme_max_virtualisable_vl(void)
+{
+ return vec_max_virtualisable_vl(ARM64_VEC_SME);
+}
+
+extern void sme_alloc(struct task_struct *task);
+extern unsigned int sme_get_vl(void);
+extern int sme_set_current_vl(unsigned long arg);
+extern int sme_get_current_vl(void);
+
+/*
+ * Return how many bytes of memory are required to store the full SME
+ * specific state (currently just ZA) for task, given task's currently
+ * configured vector length.
+ */
+static inline size_t za_state_size(struct task_struct const *task)
+{
+ unsigned int vl = task_get_sme_vl(task);
+
+ return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+}
+
+#else
+
+static inline void sme_user_disable(void) { BUILD_BUG(); }
+static inline void sme_user_enable(void) { BUILD_BUG(); }
+
+static inline void sme_smstart_sm(void) { }
+static inline void sme_smstop_sm(void) { }
+static inline void sme_smstop(void) { }
+
+static inline void sme_alloc(struct task_struct *task) { }
+static inline void sme_setup(void) { }
+static inline unsigned int sme_get_vl(void) { return 0; }
+static inline int sme_max_vl(void) { return 0; }
+static inline int sme_max_virtualisable_vl(void) { return 0; }
+static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
+static inline int sme_get_current_vl(void) { return -EINVAL; }
+
+static inline size_t za_state_size(struct task_struct const *task)
+{
+ return 0;
+}
+
+#endif /* ! CONFIG_ARM64_SME */
+
/* For use by EFI runtime services calls only */
extern void __efi_fpsimd_begin(void);
extern void __efi_fpsimd_end(void);
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 2509d7dde55a..5e0910cf4832 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -93,6 +93,12 @@
.endif
.endm
+.macro _sme_check_wv v
+ .if (\v) < 12 || (\v) > 15
+ .error "Bad vector select register \v."
+ .endif
+.endm
+
/* SVE instruction encodings for non-SVE-capable assemblers */
/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
@@ -174,6 +180,54 @@
| (\np)
.endm
+/* SME instruction encodings for non-SME-capable assemblers */
+/* (pre binutils 2.38/LLVM 13) */
+
+/* RDSVL X\nx, #\imm */
+.macro _sme_rdsvl nx, imm
+ _check_general_reg \nx
+ _check_num (\imm), -0x20, 0x1f
+ .inst 0x04bf5800 \
+ | (\nx) \
+ | (((\imm) & 0x3f) << 5)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_str_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _sme_ldr_zav nw, nxbase, offset=0
+ _sme_check_wv \nw
+ _check_general_reg \nxbase
+ _check_num (\offset), -0x100, 0xff
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * Zero the entire ZA array
+ * ZERO ZA
+ */
+.macro zero_za
+ .inst 0xc00800ff
+.endm
+
.macro __for from:req, to:req
.if (\from) == (\to)
_for__body %\from
@@ -208,6 +262,17 @@
921:
.endm
+/* Update SMCR_EL1.LEN with the new VQ */
+.macro sme_load_vq xvqminus1, xtmp, xtmp2
+ mrs_s \xtmp, SYS_SMCR_EL1
+ bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
+ orr \xtmp2, \xtmp2, \xvqminus1
+ cmp \xtmp2, \xtmp
+ b.eq 921f
+ msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
+921:
+.endm
+
/* Preserve the first 128-bits of Znz and zero the rest. */
.macro _sve_flush_z nz
_sve_check_zreg \nz
@@ -254,3 +319,25 @@
ldr w\nxtmp, [\xpfpsr, #4]
msr fpcr, x\nxtmp
.endm
+
+.macro sme_save_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_str_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
+.endm
+
+.macro sme_load_za nxbase, xvl, nw
+ mov w\nw, #0
+
+423:
+ _sme_ldr_zav \nw, \nxbase
+ add x\nxbase, x\nxbase, \xvl
+ add x\nw, x\nw, #1
+ cmp \xvl, x\nw
+ bne 423b
+.endm
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 8db5ec0089db..aa443d8f8cfb 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -109,6 +109,15 @@
#define KERNEL_HWCAP_AFP __khwcap2_feature(AFP)
#define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES)
#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3)
+#define KERNEL_HWCAP_SME __khwcap2_feature(SME)
+#define KERNEL_HWCAP_SME_I16I64 __khwcap2_feature(SME_I16I64)
+#define KERNEL_HWCAP_SME_F64F64 __khwcap2_feature(SME_F64F64)
+#define KERNEL_HWCAP_SME_I8I32 __khwcap2_feature(SME_I8I32)
+#define KERNEL_HWCAP_SME_F16F32 __khwcap2_feature(SME_F16F32)
+#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32)
+#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32)
+#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64)
+#define KERNEL_HWCAP_WFXT __khwcap2_feature(WFXT)
/*
* This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 1767ded83888..8aa8492dafc0 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -80,11 +80,12 @@
* FMO: Override CPSR.F and enable signaling with VF
* SWIO: Turn set/way invalidates into set/way clean+invalidate
* PTW: Take a stage2 fault if a stage1 walk steps in device memory
+ * TID3: Trap EL1 reads of group 3 ID registers
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
HCR_BSU_IS | HCR_FB | HCR_TACR | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
- HCR_FMO | HCR_IMO | HCR_PTW )
+ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 )
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
@@ -279,6 +280,7 @@
#define CPTR_EL2_TCPAC (1U << 31)
#define CPTR_EL2_TAM (1 << 30)
#define CPTR_EL2_TTA (1 << 20)
+#define CPTR_EL2_TSM (1 << 12)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
#define CPTR_EL2_TZ (1 << 8)
#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index d5b0386ef765..2e277f2ed671 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -169,6 +169,7 @@ struct kvm_nvhe_init_params {
unsigned long tcr_el2;
unsigned long tpidr_el2;
unsigned long stack_hyp_va;
+ unsigned long stack_pa;
phys_addr_t pgd_pa;
unsigned long hcr_el2;
unsigned long vttbr;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 7496deab025a..07812680fcaf 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -40,6 +40,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu);
void kvm_inject_vabt(struct kvm_vcpu *vcpu);
void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
@@ -86,13 +87,6 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
if (vcpu_el1_is_32bit(vcpu))
vcpu->arch.hcr_el2 &= ~HCR_RW;
- else
- /*
- * TID3: trap feature register accesses that we virtualise.
- * For now this is conditional, since no AArch32 feature regs
- * are currently virtualised.
- */
- vcpu->arch.hcr_el2 |= HCR_TID3;
if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
vcpu_el1_is_32bit(vcpu))
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 27ebb2929e0c..78e718a1b23d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -46,6 +46,7 @@
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
+#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@@ -101,6 +102,19 @@ struct kvm_s2_mmu {
struct kvm_arch_memory_slot {
};
+/**
+ * struct kvm_smccc_features: Descriptor of the hypercall services exposed to the guests
+ *
+ * @std_bmap: Bitmap of standard secure service calls
+ * @std_hyp_bmap: Bitmap of standard hypervisor service calls
+ * @vendor_hyp_bmap: Bitmap of vendor specific hypervisor service calls
+ */
+struct kvm_smccc_features {
+ unsigned long std_bmap;
+ unsigned long std_hyp_bmap;
+ unsigned long vendor_hyp_bmap;
+};
+
struct kvm_arch {
struct kvm_s2_mmu mmu;
@@ -133,6 +147,8 @@ struct kvm_arch {
*/
#define KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED 3
#define KVM_ARCH_FLAG_EL1_32BIT 4
+ /* PSCI SYSTEM_SUSPEND enabled for the guest */
+#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
unsigned long flags;
@@ -147,6 +163,9 @@ struct kvm_arch {
u8 pfr0_csv2;
u8 pfr0_csv3;
+
+ /* Hypercall features firmware registers' descriptor */
+ struct kvm_smccc_features smccc_feat;
};
struct kvm_vcpu_fault_info {
@@ -251,14 +270,8 @@ struct kvm_cpu_context {
struct kvm_vcpu *__hyp_running_vcpu;
};
-struct kvm_pmu_events {
- u32 events_host;
- u32 events_guest;
-};
-
struct kvm_host_data {
struct kvm_cpu_context host_ctxt;
- struct kvm_pmu_events pmu_events;
};
struct kvm_host_psci_config {
@@ -292,8 +305,11 @@ struct vcpu_reset_state {
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;
+
+ /* Guest floating point state */
void *sve_state;
unsigned int sve_max_vl;
+ u64 svcr;
/* Stage 2 paging state used by the hardware on next switch */
struct kvm_s2_mmu *hw_mmu;
@@ -362,8 +378,8 @@ struct kvm_vcpu_arch {
u32 mdscr_el1;
} guest_debug_preserved;
- /* vcpu power-off state */
- bool power_off;
+ /* vcpu power state */
+ struct kvm_mp_state mp_state;
/* Don't run the guest (internal implementation need) */
bool pause;
@@ -448,6 +464,8 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14)
#define KVM_ARM64_ON_UNSUPPORTED_CPU (1 << 15) /* Physical CPU not in supported_cpus */
+#define KVM_ARM64_HOST_SME_ENABLED (1 << 16) /* SME enabled for EL0 */
+#define KVM_ARM64_WFIT (1 << 17) /* WFIT instruction trapped */
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
@@ -680,10 +698,11 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu);
int kvm_handle_cp15_32(struct kvm_vcpu *vcpu);
int kvm_handle_cp15_64(struct kvm_vcpu *vcpu);
int kvm_handle_sys_reg(struct kvm_vcpu *vcpu);
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
-void kvm_sys_reg_table_init(void);
+int kvm_sys_reg_table_init(void);
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
@@ -792,9 +811,6 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u32 clr);
-
-void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
-void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
#else
static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
static inline void kvm_clr_pmu_events(u32 clr) {}
@@ -826,8 +842,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_has_mte(kvm) \
(system_supports_mte() && \
test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags))
-#define kvm_vcpu_has_pmu(vcpu) \
- (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
int kvm_trng_call(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
@@ -838,4 +852,7 @@ void __init kvm_hyp_reserve(void);
static inline void kvm_hyp_reserve(void) { }
#endif
+void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
+bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 74735a864eee..b208da3bebec 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -154,6 +154,9 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
int kvm_share_hyp(void *from, void *to);
void kvm_unshare_hyp(void *from, void *to);
int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
+int __create_hyp_mappings(unsigned long start, unsigned long size,
+ unsigned long phys, enum kvm_pgtable_prot prot);
+int hyp_alloc_private_va_range(size_t size, unsigned long *haddr);
int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **kaddr,
void __iomem **haddr);
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 94e147e5456c..dff2b483ea50 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -535,7 +535,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
PMD_TYPE_TABLE)
#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
PMD_TYPE_SECT)
-#define pmd_leaf(pmd) pmd_sect(pmd)
+#define pmd_leaf(pmd) (pmd_present(pmd) && !pmd_table(pmd))
#define pmd_bad(pmd) (!pmd_table(pmd))
#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
@@ -625,7 +625,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!pud_table(pud))
#define pud_present(pud) pte_present(pud_pte(pud))
-#define pud_leaf(pud) pud_sect(pud)
+#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
#define pud_valid(pud) pte_valid(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 73e38d9a540c..1d2ca4870b84 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -118,6 +118,7 @@ struct debug_info {
enum vec_type {
ARM64_VEC_SVE = 0,
+ ARM64_VEC_SME,
ARM64_VEC_MAX,
};
@@ -153,6 +154,7 @@ struct thread_struct {
unsigned int fpsimd_cpu;
void *sve_state; /* SVE registers, if any */
+ void *za_state; /* ZA register, if any */
unsigned int vl[ARM64_VEC_MAX]; /* vector length */
unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */
unsigned long fault_address; /* fault info */
@@ -168,6 +170,8 @@ struct thread_struct {
u64 mte_ctrl;
#endif
u64 sctlr_user;
+ u64 svcr;
+ u64 tpidr2_el0;
};
static inline unsigned int thread_get_vl(struct thread_struct *thread,
@@ -181,6 +185,19 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread)
return thread_get_vl(thread, ARM64_VEC_SVE);
}
+static inline unsigned int thread_get_sme_vl(struct thread_struct *thread)
+{
+ return thread_get_vl(thread, ARM64_VEC_SME);
+}
+
+static inline unsigned int thread_get_cur_vl(struct thread_struct *thread)
+{
+ if (system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK))
+ return thread_get_sme_vl(thread);
+ else
+ return thread_get_sve_vl(thread);
+}
+
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type);
void task_set_vl(struct task_struct *task, enum vec_type type,
unsigned long vl);
@@ -194,6 +211,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task)
return task_get_vl(task, ARM64_VEC_SVE);
}
+static inline unsigned int task_get_sme_vl(const struct task_struct *task)
+{
+ return task_get_vl(task, ARM64_VEC_SME);
+}
+
static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl)
{
task_set_vl(task, ARM64_VEC_SVE, vl);
@@ -354,9 +376,11 @@ extern void __init minsigstksz_setup(void);
*/
#include <asm/fpsimd.h>
-/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
+/* Userspace interface for PR_S[MV]E_{SET,GET}_VL prctl()s: */
#define SVE_SET_VL(arg) sve_set_current_vl(arg)
#define SVE_GET_VL() sve_get_current_vl()
+#define SME_SET_VL(arg) sme_set_current_vl(arg)
+#define SME_GET_VL() sme_get_current_vl()
/* PR_PAC_RESET_KEYS prctl */
#define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index fbf5f8bb9055..bebfdd27296a 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -118,6 +118,10 @@
* System registers, organised loosely by encoding but grouped together
* where the architected name contains an index. e.g. ID_MMFR<n>_EL1.
*/
+#define SYS_SVCR_SMSTOP_SM_EL0 sys_reg(0, 3, 4, 2, 3)
+#define SYS_SVCR_SMSTART_SM_EL0 sys_reg(0, 3, 4, 3, 3)
+#define SYS_SVCR_SMSTOP_SMZA_EL0 sys_reg(0, 3, 4, 6, 3)
+
#define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2)
#define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0)
#define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2)
@@ -181,6 +185,7 @@
#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0)
#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1)
#define SYS_ID_AA64ZFR0_EL1 sys_reg(3, 0, 0, 4, 4)
+#define SYS_ID_AA64SMFR0_EL1 sys_reg(3, 0, 0, 4, 5)
#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0)
#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1)
@@ -204,6 +209,8 @@
#define SYS_ZCR_EL1 sys_reg(3, 0, 1, 2, 0)
#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1)
+#define SYS_SMPRI_EL1 sys_reg(3, 0, 1, 2, 4)
+#define SYS_SMCR_EL1 sys_reg(3, 0, 1, 2, 6)
#define SYS_TTBR0_EL1 sys_reg(3, 0, 2, 0, 0)
#define SYS_TTBR1_EL1 sys_reg(3, 0, 2, 0, 1)
@@ -396,6 +403,8 @@
#define TRBIDR_ALIGN_MASK GENMASK(3, 0)
#define TRBIDR_ALIGN_SHIFT 0
+#define SMPRI_EL1_PRIORITY_MASK 0xf
+
#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1)
#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2)
@@ -451,8 +460,13 @@
#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0)
#define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1)
#define SYS_GMID_EL1 sys_reg(3, 1, 0, 0, 4)
+#define SYS_SMIDR_EL1 sys_reg(3, 1, 0, 0, 6)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
+#define SYS_SMIDR_EL1_IMPLEMENTER_SHIFT 24
+#define SYS_SMIDR_EL1_SMPS_SHIFT 15
+#define SYS_SMIDR_EL1_AFFINITY_SHIFT 0
+
#define SYS_CSSELR_EL1 sys_reg(3, 2, 0, 0, 0)
#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1)
@@ -461,6 +475,10 @@
#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0)
#define SYS_RNDRRS_EL0 sys_reg(3, 3, 2, 4, 1)
+#define SYS_SVCR_EL0 sys_reg(3, 3, 4, 2, 2)
+#define SYS_SVCR_EL0_ZA_MASK 2
+#define SYS_SVCR_EL0_SM_MASK 1
+
#define SYS_PMCR_EL0 sys_reg(3, 3, 9, 12, 0)
#define SYS_PMCNTENSET_EL0 sys_reg(3, 3, 9, 12, 1)
#define SYS_PMCNTENCLR_EL0 sys_reg(3, 3, 9, 12, 2)
@@ -477,6 +495,7 @@
#define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2)
#define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3)
+#define SYS_TPIDR2_EL0 sys_reg(3, 3, 13, 0, 5)
#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7)
@@ -546,6 +565,9 @@
#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6)
#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
+#define SYS_HCRX_EL2 sys_reg(3, 4, 1, 2, 2)
+#define SYS_SMPRIMAP_EL2 sys_reg(3, 4, 1, 2, 5)
+#define SYS_SMCR_EL2 sys_reg(3, 4, 1, 2, 6)
#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
@@ -605,6 +627,7 @@
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
+#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
@@ -628,6 +651,7 @@
#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2)
/* Common SCTLR_ELx flags. */
+#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))
#define SCTLR_ELx_ATA (BIT(43))
@@ -836,6 +860,7 @@
#define ID_AA64PFR0_ELx_32BIT_64BIT 0x2
/* id_aa64pfr1 */
+#define ID_AA64PFR1_SME_SHIFT 24
#define ID_AA64PFR1_MPAMFRAC_SHIFT 16
#define ID_AA64PFR1_RASFRAC_SHIFT 12
#define ID_AA64PFR1_MTE_SHIFT 8
@@ -846,6 +871,7 @@
#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1
#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2
#define ID_AA64PFR1_BT_BTI 0x1
+#define ID_AA64PFR1_SME 1
#define ID_AA64PFR1_MTE_NI 0x0
#define ID_AA64PFR1_MTE_EL0 0x1
@@ -874,6 +900,23 @@
#define ID_AA64ZFR0_AES_PMULL 0x2
#define ID_AA64ZFR0_SVEVER_SVE2 0x1
+/* id_aa64smfr0 */
+#define ID_AA64SMFR0_FA64_SHIFT 63
+#define ID_AA64SMFR0_I16I64_SHIFT 52
+#define ID_AA64SMFR0_F64F64_SHIFT 48
+#define ID_AA64SMFR0_I8I32_SHIFT 36
+#define ID_AA64SMFR0_F16F32_SHIFT 35
+#define ID_AA64SMFR0_B16F32_SHIFT 34
+#define ID_AA64SMFR0_F32F32_SHIFT 32
+
+#define ID_AA64SMFR0_FA64 0x1
+#define ID_AA64SMFR0_I16I64 0x4
+#define ID_AA64SMFR0_F64F64 0x1
+#define ID_AA64SMFR0_I8I32 0x4
+#define ID_AA64SMFR0_F16F32 0x1
+#define ID_AA64SMFR0_B16F32 0x1
+#define ID_AA64SMFR0_F32F32 0x1
+
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_ECV_SHIFT 60
#define ID_AA64MMFR0_FGT_SHIFT 56
@@ -926,6 +969,7 @@
/* id_aa64mmfr1 */
#define ID_AA64MMFR1_ECBHB_SHIFT 60
+#define ID_AA64MMFR1_HCX_SHIFT 40
#define ID_AA64MMFR1_AFP_SHIFT 44
#define ID_AA64MMFR1_ETS_SHIFT 36
#define ID_AA64MMFR1_TWED_SHIFT 32
@@ -1119,9 +1163,24 @@
#define ZCR_ELx_LEN_SIZE 9
#define ZCR_ELx_LEN_MASK 0x1ff
+#define SMCR_ELx_FA64_SHIFT 31
+#define SMCR_ELx_FA64_MASK (1 << SMCR_ELx_FA64_SHIFT)
+
+/*
+ * The SMCR_ELx_LEN_* definitions intentionally include bits [8:4] which
+ * are reserved by the SME architecture for future expansion of the LEN
+ * field, with compatible semantics.
+ */
+#define SMCR_ELx_LEN_SHIFT 0
+#define SMCR_ELx_LEN_SIZE 9
+#define SMCR_ELx_LEN_MASK 0x1ff
+
#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */
#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */
+#define CPACR_EL1_SMEN_EL1EN (BIT(24)) /* enable EL1 access */
+#define CPACR_EL1_SMEN_EL0EN (BIT(25)) /* enable EL0 access, if EL1EN set */
+
#define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */
#define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */
@@ -1170,6 +1229,8 @@
#define TRFCR_ELx_ExTRE BIT(1)
#define TRFCR_ELx_E0TRE BIT(0)
+/* HCRX_EL2 definitions */
+#define HCRX_EL2_SMPME_MASK (1 << 5)
/* GIC Hypervisor interface registers */
/* ICH_MISR_EL2 bit definitions */
@@ -1233,6 +1294,12 @@
#define ICH_VTR_TDS_SHIFT 19
#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT)
+/* HFG[WR]TR_EL2 bit definitions */
+#define HFGxTR_EL2_nTPIDR2_EL0_SHIFT 55
+#define HFGxTR_EL2_nTPIDR2_EL0_MASK BIT_MASK(HFGxTR_EL2_nTPIDR2_EL0_SHIFT)
+#define HFGxTR_EL2_nSMPRI_EL1_SHIFT 54
+#define HFGxTR_EL2_nSMPRI_EL1_MASK BIT_MASK(HFGxTR_EL2_nSMPRI_EL1_SHIFT)
+
#define ARM64_FEATURE_FIELD_BITS 4
/* Create a mask for the feature bits of the specified feature. */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e1317b7c4525..848739c15de8 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -82,6 +82,8 @@ int arch_dup_task_struct(struct task_struct *dst,
#define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */
#define TIF_SSBD 25 /* Wants SSB mitigation */
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
+#define TIF_SME 27 /* SME in use */
+#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)