summaryrefslogtreecommitdiff
path: root/arch/arm64/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/include')
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/apple_m1_pmu.h1
-rw-r--r--arch/arm64/include/asm/asm-extable.h10
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h4
-rw-r--r--arch/arm64/include/asm/assembler.h5
-rw-r--r--arch/arm64/include/asm/cache.h4
-rw-r--r--arch/arm64/include/asm/cpu.h1
-rw-r--r--arch/arm64/include/asm/cpucaps.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h32
-rw-r--r--arch/arm64/include/asm/cputype.h62
-rw-r--r--arch/arm64/include/asm/el2_setup.h125
-rw-r--r--arch/arm64/include/asm/esr.h73
-rw-r--r--arch/arm64/include/asm/extable.h4
-rw-r--r--arch/arm64/include/asm/fixmap.h6
-rw-r--r--arch/arm64/include/asm/fpsimd.h65
-rw-r--r--arch/arm64/include/asm/ftrace.h51
-rw-r--r--arch/arm64/include/asm/hardirq.h4
-rw-r--r--arch/arm64/include/asm/hugetlb.h35
-rw-r--r--arch/arm64/include/asm/hwcap.h15
-rw-r--r--arch/arm64/include/asm/hyperv-tlfs.h71
-rw-r--r--arch/arm64/include/asm/hypervisor.h1
-rw-r--r--arch/arm64/include/asm/insn.h13
-rw-r--r--arch/arm64/include/asm/io.h6
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h10
-rw-r--r--arch/arm64/include/asm/kvm_arm.h199
-rw-r--r--arch/arm64/include/asm/kvm_asm.h14
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h185
-rw-r--r--arch/arm64/include/asm/kvm_host.h360
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h2
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h18
-rw-r--r--arch/arm64/include/asm/kvm_nested.h112
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h43
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h60
-rw-r--r--arch/arm64/include/asm/kvm_ras.h2
-rw-r--r--arch/arm64/include/asm/mem_encrypt.h13
-rw-r--r--arch/arm64/include/asm/memory.h5
-rw-r--r--arch/arm64/include/asm/mmu.h15
-rw-r--r--arch/arm64/include/asm/mmu_context.h14
-rw-r--r--arch/arm64/include/asm/mshyperv.h20
-rw-r--r--arch/arm64/include/asm/pgalloc.h18
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h41
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h43
-rw-r--r--arch/arm64/include/asm/pgtable-types.h20
-rw-r--r--arch/arm64/include/asm/pgtable.h330
-rw-r--r--arch/arm64/include/asm/por.h11
-rw-r--r--arch/arm64/include/asm/ptdump.h28
-rw-r--r--arch/arm64/include/asm/rqspinlock.h93
-rw-r--r--arch/arm64/include/asm/rsi.h2
-rw-r--r--arch/arm64/include/asm/rsi_cmds.h2
-rw-r--r--arch/arm64/include/asm/rwonce.h4
-rw-r--r--arch/arm64/include/asm/seccomp.h1
-rw-r--r--arch/arm64/include/asm/sections.h1
-rw-r--r--arch/arm64/include/asm/sparsemem.h5
-rw-r--r--arch/arm64/include/asm/spectre.h4
-rw-r--r--arch/arm64/include/asm/stacktrace/nvhe.h2
-rw-r--r--arch/arm64/include/asm/syscall.h29
-rw-r--r--arch/arm64/include/asm/sysreg.h115
-rw-r--r--arch/arm64/include/asm/thread_info.h18
-rw-r--r--arch/arm64/include/asm/tlb.h21
-rw-r--r--arch/arm64/include/asm/tlbflush.h54
-rw-r--r--arch/arm64/include/asm/traps.h4
-rw-r--r--arch/arm64/include/asm/vdso.h2
-rw-r--r--arch/arm64/include/asm/vdso/compat_gettimeofday.h38
-rw-r--r--arch/arm64/include/asm/vdso/getrandom.h12
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h43
-rw-r--r--arch/arm64/include/asm/vdso/vsyscall.h29
-rw-r--r--arch/arm64/include/asm/virt.h3
-rw-r--r--arch/arm64/include/asm/vmalloc.h45
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h5
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h15
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h26
71 files changed, 1707 insertions, 1022 deletions
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 4e350df9a02d..d2ff8f6c3231 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -8,6 +8,7 @@ syscall-y += unistd_32.h
syscall-y += unistd_compat_32.h
generic-y += early_ioremap.h
+generic-y += fprobe.h
generic-y += mcs_spinlock.h
generic-y += mmzone.h
generic-y += qrwlock.h
diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h
index 99483b19b99f..02e05d05851f 100644
--- a/arch/arm64/include/asm/apple_m1_pmu.h
+++ b/arch/arm64/include/asm/apple_m1_pmu.h
@@ -37,6 +37,7 @@
#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44)
#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0)
+#define SYS_IMP_APL_PMCR1_EL12 sys_reg(3, 1, 15, 7, 2)
#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8)
#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16)
#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40)
diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
index b8a5861dc7b7..292f2687a12e 100644
--- a/arch/arm64/include/asm/asm-extable.h
+++ b/arch/arm64/include/asm/asm-extable.h
@@ -9,7 +9,8 @@
#define EX_TYPE_BPF 1
#define EX_TYPE_UACCESS_ERR_ZERO 2
#define EX_TYPE_KACCESS_ERR_ZERO 3
-#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
+#define EX_TYPE_UACCESS_CPY 4
+#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 5
/* Data fields for EX_TYPE_UACCESS_ERR_ZERO */
#define EX_DATA_REG_ERR_SHIFT 0
@@ -23,6 +24,9 @@
#define EX_DATA_REG_ADDR_SHIFT 5
#define EX_DATA_REG_ADDR GENMASK(9, 5)
+/* Data fields for EX_TYPE_UACCESS_CPY */
+#define EX_DATA_UACCESS_WRITE BIT(0)
+
#ifdef __ASSEMBLY__
#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \
@@ -69,6 +73,10 @@
.endif
.endm
+ .macro _asm_extable_uaccess_cpy, insn, fixup, uaccess_is_write
+ __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_UACCESS_CPY, \uaccess_is_write)
+ .endm
+
#else /* __ASSEMBLY__ */
#include <linux/stringify.h>
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index 5b6efe8abeeb..9148f5a31968 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -61,6 +61,10 @@ alternative_else_nop_endif
9999: x; \
_asm_extable_uaccess 9999b, l
+#define USER_CPY(l, uaccess_is_write, x...) \
+9999: x; \
+ _asm_extable_uaccess_cpy 9999b, l, uaccess_is_write
+
/*
* Generate the assembly for LDTR/STTR with exception table entries.
* This is complicated as there is no post-increment or pair versions of the
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 3d8d534a7a77..ad63457a05c5 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -343,6 +343,11 @@ alternative_cb_end
// Narrow PARange to fit the PS field in TCR_ELx
ubfx \tmp0, \tmp0, #ID_AA64MMFR0_EL1_PARANGE_SHIFT, #3
mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_MAX
+#ifdef CONFIG_ARM64_LPA2
+alternative_if_not ARM64_HAS_VA52
+ mov \tmp1, #ID_AA64MMFR0_EL1_PARANGE_48
+alternative_else_nop_endif
+#endif
cmp \tmp0, \tmp1
csel \tmp0, \tmp1, \tmp0, hi
bfi \tcr, \tmp0, \pos, #3
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 06a4670bdb0b..99cd6546e72e 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -35,7 +35,7 @@
#define ARCH_DMA_MINALIGN (128)
#define ARCH_KMALLOC_MINALIGN (8)
-#ifndef __ASSEMBLY__
+#if !defined(__ASSEMBLY__) && !defined(BUILD_VDSO)
#include <linux/bitops.h>
#include <linux/kasan-enabled.h>
@@ -118,6 +118,6 @@ static inline u32 __attribute_const__ read_cpuid_effective_cachetype(void)
return ctr;
}
-#endif /* __ASSEMBLY__ */
+#endif /* !defined(__ASSEMBLY__) && !defined(BUILD_VDSO) */
#endif
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 81e4157f92b7..71493b760b83 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -44,6 +44,7 @@ struct cpuinfo_arm64 {
u64 reg_dczid;
u64 reg_midr;
u64 reg_revidr;
+ u64 reg_aidr;
u64 reg_gmid;
u64 reg_smidr;
u64 reg_mpamidr;
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index cbbf70e0f204..9d769291a306 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -46,6 +46,8 @@ cpucap_is_possible(const unsigned int cap)
return IS_ENABLED(CONFIG_ARM64_POE);
case ARM64_HAS_GCS:
return IS_ENABLED(CONFIG_ARM64_GCS);
+ case ARM64_HAFT:
+ return IS_ENABLED(CONFIG_ARM64_HAFT);
case ARM64_UNMAP_KERNEL_AT_EL0:
return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0);
case ARM64_WORKAROUND_843419:
@@ -69,6 +71,8 @@ cpucap_is_possible(const unsigned int cap)
* KVM MPAM support doesn't rely on the host kernel supporting MPAM.
*/
return true;
+ case ARM64_HAS_PMUV3:
+ return IS_ENABLED(CONFIG_HW_PERF_EVENTS);
}
return true;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8b4e5a3cd24c..c4326f1cb917 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -525,29 +525,6 @@ cpuid_feature_extract_unsigned_field(u64 features, int field)
return cpuid_feature_extract_unsigned_field_width(features, field, 4);
}
-/*
- * Fields that identify the version of the Performance Monitors Extension do
- * not follow the standard ID scheme. See ARM DDI 0487E.a page D13-2825,
- * "Alternative ID scheme used for the Performance Monitors Extension version".
- */
-static inline u64 __attribute_const__
-cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap)
-{
- u64 val = cpuid_feature_extract_unsigned_field(features, field);
- u64 mask = GENMASK_ULL(field + 3, field);
-
- /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */
- if (val == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
- val = 0;
-
- if (val > cap) {
- features &= ~mask;
- features |= (cap << field) & mask;
- }
-
- return features;
-}
-
static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp)
{
return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
@@ -671,6 +648,7 @@ static inline bool supports_clearbhb(int scope)
}
const struct cpumask *system_32bit_el0_cpumask(void);
+const struct cpumask *fallback_32bit_el0_cpumask(void);
DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
static inline bool system_supports_32bit_el0(void)
@@ -852,8 +830,7 @@ static inline bool system_supports_gcs(void)
static inline bool system_supports_haft(void)
{
- return IS_ENABLED(CONFIG_ARM64_HAFT) &&
- cpus_have_final_cap(ARM64_HAFT);
+ return cpus_have_final_cap(ARM64_HAFT);
}
static __always_inline bool system_supports_mpam(void)
@@ -866,6 +843,11 @@ static __always_inline bool system_supports_mpam_hcr(void)
return alternative_has_cap_unlikely(ARM64_MPAM_HCR);
}
+static inline bool system_supports_pmuv3(void)
+{
+ return cpus_have_final_cap(ARM64_HAS_PMUV3);
+}
+
int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
bool try_emulate_mrs(struct pt_regs *regs, u32 isn);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 488f8e751349..661735616787 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -75,11 +75,13 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_CORTEX_A76AE 0xD0E
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
#define ARM_CPU_PART_CORTEX_A78 0xD41
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
#define ARM_CPU_PART_CORTEX_A520 0xD80
#define ARM_CPU_PART_CORTEX_A710 0xD47
#define ARM_CPU_PART_CORTEX_A715 0xD4D
@@ -119,9 +121,11 @@
#define QCOM_CPU_PART_KRYO 0x200
#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800
#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801
+#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
+#define QCOM_CPU_PART_ORYON_X1 0x001
#define NVIDIA_CPU_PART_DENVER 0x003
#define NVIDIA_CPU_PART_CARMEL 0x004
@@ -129,6 +133,8 @@
#define FUJITSU_CPU_PART_A64FX 0x001
#define HISI_CPU_PART_TSV110 0xD01
+#define HISI_CPU_PART_HIP09 0xD02
+#define HISI_CPU_PART_HIP12 0xD06
#define APPLE_CPU_PART_M1_ICESTORM 0x022
#define APPLE_CPU_PART_M1_FIRESTORM 0x023
@@ -158,11 +164,13 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE)
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
@@ -195,13 +203,27 @@
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD)
#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)
+#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD)
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
+#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
+
+/*
+ * NOTES:
+ * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77
+ * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER
+ * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1
+ * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78
+ * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55
+ */
+
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
+#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
@@ -230,6 +252,16 @@
#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg)
/*
+ * The CPU ID never changes at run time, so we might as well tell the
+ * compiler that it's constant. Use this function to read the CPU ID
+ * rather than directly reading processor_id or read_cpuid() directly.
+ */
+static inline u32 __attribute_const__ read_cpuid_id(void)
+{
+ return read_cpuid(MIDR_EL1);
+}
+
+/*
* Represent a range of MIDR values for a given CPU model and a
* range of variant/revision values.
*
@@ -264,30 +296,14 @@ static inline bool midr_is_cpu_model_range(u32 midr, u32 model, u32 rv_min,
return _model == model && rv >= rv_min && rv <= rv_max;
}
-static inline bool is_midr_in_range(u32 midr, struct midr_range const *range)
-{
- return midr_is_cpu_model_range(midr, range->model,
- range->rv_min, range->rv_max);
-}
-
-static inline bool
-is_midr_in_range_list(u32 midr, struct midr_range const *ranges)
-{
- while (ranges->model)
- if (is_midr_in_range(midr, ranges++))
- return true;
- return false;
-}
+struct target_impl_cpu {
+ u64 midr;
+ u64 revidr;
+ u64 aidr;
+};
-/*
- * The CPU ID never changes at run time, so we might as well tell the
- * compiler that it's constant. Use this function to read the CPU ID
- * rather than directly reading processor_id or read_cpuid() directly.
- */
-static inline u32 __attribute_const__ read_cpuid_id(void)
-{
- return read_cpuid(MIDR_EL1);
-}
+bool cpu_errata_set_target_impl(u64 num, void *impl_cpus);
+bool is_midr_in_range_list(struct midr_range const *ranges);
static inline u64 __attribute_const__ read_cpuid_mpidr(void)
{
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 4ef52d7245bb..9f38340d24c2 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -16,6 +16,32 @@
#include <asm/sysreg.h>
#include <linux/irqchip/arm-gic-v3.h>
+.macro init_el2_hcr val
+ mov_q x0, \val
+
+ /*
+ * Compliant CPUs advertise their VHE-onlyness with
+ * ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it
+ * can reset into an UNKNOWN state and might not read as 1 until it has
+ * been initialized explicitly.
+ *
+ * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
+ * don't advertise it (they predate this relaxation).
+ *
+ * Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H
+ * indicating whether the CPU is running in E2H mode.
+ */
+ mrs_s x1, SYS_ID_AA64MMFR4_EL1
+ sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
+ cmp x1, #0
+ b.ge .LnVHE_\@
+
+ orr x0, x0, #HCR_E2H
+.LnVHE_\@:
+ msr_hcr_el2 x0
+ isb
+.endm
+
.macro __init_el2_sctlr
mov_q x0, INIT_SCTLR_EL2_MMU_OFF
msr sctlr_el2, x0
@@ -26,7 +52,7 @@
mrs x0, id_aa64mmfr1_el1
ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
cbz x0, .Lskip_hcrx_\@
- mov_q x0, HCRX_HOST_FLAGS
+ mov_q x0, (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
/* Enable GCS if supported */
mrs_s x1, SYS_ID_AA64PFR1_EL1
@@ -154,7 +180,7 @@
/* Coprocessor traps */
.macro __init_el2_cptr
__check_hvhe .LnVHE_\@, x1
- mov x0, #CPACR_ELx_FPEN
+ mov x0, #CPACR_EL1_FPEN
msr cpacr_el1, x0
b .Lskip_set_cptr_\@
.LnVHE_\@:
@@ -178,26 +204,28 @@
orr x0, x0, #(1 << 62)
.Lskip_spe_fgt_\@:
+
+.Lset_debug_fgt_\@:
msr_s SYS_HDFGRTR_EL2, x0
msr_s SYS_HDFGWTR_EL2, x0
mov x0, xzr
mrs x1, id_aa64pfr1_el1
ubfx x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4
- cbz x1, .Lskip_debug_fgt_\@
+ cbz x1, .Lskip_sme_fgt_\@
/* Disable nVHE traps of TPIDR2 and SMPRI */
- orr x0, x0, #HFGxTR_EL2_nSMPRI_EL1_MASK
- orr x0, x0, #HFGxTR_EL2_nTPIDR2_EL0_MASK
+ orr x0, x0, #HFGRTR_EL2_nSMPRI_EL1_MASK
+ orr x0, x0, #HFGRTR_EL2_nTPIDR2_EL0_MASK
-.Lskip_debug_fgt_\@:
+.Lskip_sme_fgt_\@:
mrs_s x1, SYS_ID_AA64MMFR3_EL1
ubfx x1, x1, #ID_AA64MMFR3_EL1_S1PIE_SHIFT, #4
cbz x1, .Lskip_pie_fgt_\@
/* Disable trapping of PIR_EL1 / PIRE0_EL1 */
- orr x0, x0, #HFGxTR_EL2_nPIR_EL1
- orr x0, x0, #HFGxTR_EL2_nPIRE0_EL1
+ orr x0, x0, #HFGRTR_EL2_nPIR_EL1
+ orr x0, x0, #HFGRTR_EL2_nPIRE0_EL1
.Lskip_pie_fgt_\@:
mrs_s x1, SYS_ID_AA64MMFR3_EL1
@@ -205,17 +233,19 @@
cbz x1, .Lskip_poe_fgt_\@
/* Disable trapping of POR_EL0 */
- orr x0, x0, #HFGxTR_EL2_nPOR_EL0
+ orr x0, x0, #HFGRTR_EL2_nPOR_EL0
.Lskip_poe_fgt_\@:
/* GCS depends on PIE so we don't check it if PIE is absent */
mrs_s x1, SYS_ID_AA64PFR1_EL1
ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
- cbz x1, .Lset_fgt_\@
+ cbz x1, .Lskip_gce_fgt_\@
/* Disable traps of access to GCS registers at EL0 and EL1 */
- orr x0, x0, #HFGxTR_EL2_nGCS_EL1_MASK
- orr x0, x0, #HFGxTR_EL2_nGCS_EL0_MASK
+ orr x0, x0, #HFGRTR_EL2_nGCS_EL1_MASK
+ orr x0, x0, #HFGRTR_EL2_nGCS_EL0_MASK
+
+.Lskip_gce_fgt_\@:
.Lset_fgt_\@:
msr_s SYS_HFGRTR_EL2, x0
@@ -233,33 +263,28 @@
.Lskip_fgt_\@:
.endm
-.macro __init_el2_gcs
- mrs_s x1, SYS_ID_AA64PFR1_EL1
- ubfx x1, x1, #ID_AA64PFR1_EL1_GCS_SHIFT, #4
- cbz x1, .Lskip_gcs_\@
-
- /* Ensure GCS is not enabled when we start trying to do BLs */
- msr_s SYS_GCSCR_EL1, xzr
- msr_s SYS_GCSCRE0_EL1, xzr
-.Lskip_gcs_\@:
-.endm
-
-.macro __init_el2_nvhe_prepare_eret
- mov x0, #INIT_PSTATE_EL1
- msr spsr_el2, x0
-.endm
+.macro __init_el2_fgt2
+ mrs x1, id_aa64mmfr0_el1
+ ubfx x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4
+ cmp x1, #ID_AA64MMFR0_EL1_FGT_FGT2
+ b.lt .Lskip_fgt2_\@
-.macro __init_el2_mpam
- /* Memory Partitioning And Monitoring: disable EL2 traps */
- mrs x1, id_aa64pfr0_el1
- ubfx x0, x1, #ID_AA64PFR0_EL1_MPAM_SHIFT, #4
- cbz x0, .Lskip_mpam_\@ // skip if no MPAM
- msr_s SYS_MPAM2_EL2, xzr // use the default partition
- // and disable lower traps
- mrs_s x0, SYS_MPAMIDR_EL1
- tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg
- msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2
-.Lskip_mpam_\@:
+ mov x0, xzr
+ mrs x1, id_aa64dfr0_el1
+ ubfx x1, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
+ cmp x1, #ID_AA64DFR0_EL1_PMUVer_V3P9
+ b.lt .Lskip_pmuv3p9_\@
+
+ orr x0, x0, #HDFGRTR2_EL2_nPMICNTR_EL0
+ orr x0, x0, #HDFGRTR2_EL2_nPMICFILTR_EL0
+ orr x0, x0, #HDFGRTR2_EL2_nPMUACR_EL1
+.Lskip_pmuv3p9_\@:
+ msr_s SYS_HDFGRTR2_EL2, x0
+ msr_s SYS_HDFGWTR2_EL2, x0
+ msr_s SYS_HFGRTR2_EL2, xzr
+ msr_s SYS_HFGWTR2_EL2, xzr
+ msr_s SYS_HFGITR2_EL2, xzr
+.Lskip_fgt2_\@:
.endm
/**
@@ -279,11 +304,10 @@
__init_el2_stage2
__init_el2_gicv3
__init_el2_hstr
- __init_el2_mpam
__init_el2_nvhe_idregs
__init_el2_cptr
__init_el2_fgt
- __init_el2_gcs
+ __init_el2_fgt2
.endm
#ifndef __KVM_NVHE_HYPERVISOR__
@@ -325,6 +349,23 @@
#endif
.macro finalise_el2_state
+ check_override id_aa64pfr0, ID_AA64PFR0_EL1_MPAM_SHIFT, .Linit_mpam_\@, .Lskip_mpam_\@, x1, x2
+
+.Linit_mpam_\@:
+ msr_s SYS_MPAM2_EL2, xzr // use the default partition
+ // and disable lower traps
+ mrs_s x0, SYS_MPAMIDR_EL1
+ tbz x0, #MPAMIDR_EL1_HAS_HCR_SHIFT, .Lskip_mpam_\@ // skip if no MPAMHCR reg
+ msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2
+
+.Lskip_mpam_\@:
+ check_override id_aa64pfr1, ID_AA64PFR1_EL1_GCS_SHIFT, .Linit_gcs_\@, .Lskip_gcs_\@, x1, x2
+
+.Linit_gcs_\@:
+ msr_s SYS_GCSCR_EL1, xzr
+ msr_s SYS_GCSCRE0_EL1, xzr
+
+.Lskip_gcs_\@:
check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
.Linit_sve_\@: /* SVE register access */
@@ -332,7 +373,7 @@
// (h)VHE case
mrs x0, cpacr_el1 // Disable SVE traps
- orr x0, x0, #CPACR_ELx_ZEN
+ orr x0, x0, #CPACR_EL1_ZEN
msr cpacr_el1, x0
b .Lskip_set_cptr_\@
@@ -353,7 +394,7 @@
// (h)VHE case
mrs x0, cpacr_el1 // Disable SME traps
- orr x0, x0, #CPACR_ELx_SMEN
+ orr x0, x0, #CPACR_EL1_SMEN
msr cpacr_el1, x0
b .Lskip_set_cptr_sme_\@
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index d1b1a33f9a8b..e1deed824464 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -20,7 +20,8 @@
#define ESR_ELx_EC_FP_ASIMD UL(0x07)
#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */
#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */
-/* Unallocated EC: 0x0A - 0x0B */
+#define ESR_ELx_EC_OTHER UL(0x0A)
+/* Unallocated EC: 0x0B */
#define ESR_ELx_EC_CP14_64 UL(0x0C)
#define ESR_ELx_EC_BTI UL(0x0D)
#define ESR_ELx_EC_ILL UL(0x0E)
@@ -99,6 +100,8 @@
#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT)
/* Shared ISS field definitions for Data/Instruction aborts */
+#define ESR_ELx_VNCR_SHIFT (13)
+#define ESR_ELx_VNCR (UL(1) << ESR_ELx_VNCR_SHIFT)
#define ESR_ELx_SET_SHIFT (11)
#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT)
#define ESR_ELx_FnV_SHIFT (10)
@@ -121,6 +124,15 @@
#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n))
#define ESR_ELx_FSC_SECC (0x18)
#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
+#define ESR_ELx_FSC_ADDRSZ (0x00)
+
+/*
+ * Annoyingly, the negative levels for Address size faults aren't laid out
+ * contiguously (or in the desired order)
+ */
+#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C)
+#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \
+ (ESR_ELx_FSC_ADDRSZ + (n)))
/* Status codes for individual page table levels */
#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n))
@@ -161,8 +173,6 @@
#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))
/* ISS field definitions for exceptions taken in to Hyp */
-#define ESR_ELx_FSC_ADDRSZ (0x00)
-#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n))
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
@@ -174,6 +184,13 @@
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1)
+/* ISS definitions for LD64B/ST64B/{T,P}SBCSYNC instructions */
+#define ESR_ELx_ISS_OTHER_ST64BV (0)
+#define ESR_ELx_ISS_OTHER_ST64BV0 (1)
+#define ESR_ELx_ISS_OTHER_LDST64B (2)
+#define ESR_ELx_ISS_OTHER_TSBCSYNC (3)
+#define ESR_ELx_ISS_OTHER_PSBCSYNC (4)
+
#define DISR_EL1_IDS (UL(1) << 24)
/*
* DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean
@@ -371,12 +388,14 @@
/*
* ISS values for SME traps
*/
+#define ESR_ELx_SME_ISS_SMTC_MASK GENMASK(2, 0)
+#define ESR_ELx_SME_ISS_SMTC(esr) ((esr) & ESR_ELx_SME_ISS_SMTC_MASK)
-#define ESR_ELx_SME_ISS_SME_DISABLED 0
-#define ESR_ELx_SME_ISS_ILL 1
-#define ESR_ELx_SME_ISS_SM_DISABLED 2
-#define ESR_ELx_SME_ISS_ZA_DISABLED 3
-#define ESR_ELx_SME_ISS_ZT_DISABLED 4
+#define ESR_ELx_SME_ISS_SMTC_SME_DISABLED 0
+#define ESR_ELx_SME_ISS_SMTC_ILL 1
+#define ESR_ELx_SME_ISS_SMTC_SM_DISABLED 2
+#define ESR_ELx_SME_ISS_SMTC_ZA_DISABLED 3
+#define ESR_ELx_SME_ISS_SMTC_ZT_DISABLED 4
/* ISS field definitions for MOPS exceptions */
#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24)
@@ -433,6 +452,11 @@ static inline bool esr_is_cfi_brk(unsigned long esr)
(esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE;
}
+static inline bool esr_is_ubsan_brk(unsigned long esr)
+{
+ return (esr_brk_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM;
+}
+
static inline bool esr_fsc_is_translation_fault(unsigned long esr)
{
esr = esr & ESR_ELx_FSC;
@@ -464,6 +488,39 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
(esr == ESR_ELx_FSC_ACCESS_L(0));
}
+static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr)
+{
+ esr &= ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(2)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(1)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(0)) ||
+ (esr == ESR_ELx_FSC_ADDRSZ_L(-1));
+}
+
+static inline bool esr_fsc_is_sea_ttw(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_SEA_TTW(3)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(2)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(1)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(0)) ||
+ (esr == ESR_ELx_FSC_SEA_TTW(-1));
+}
+
+static inline bool esr_fsc_is_secc_ttw(unsigned long esr)
+{
+ esr = esr & ESR_ELx_FSC;
+
+ return (esr == ESR_ELx_FSC_SECC_TTW(3)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(2)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(1)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(0)) ||
+ (esr == ESR_ELx_FSC_SECC_TTW(-1));
+}
+
/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */
static inline bool esr_iss_is_eretax(unsigned long esr)
{
diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
index 72b0e71cc3de..9dc39612bdf5 100644
--- a/arch/arm64/include/asm/extable.h
+++ b/arch/arm64/include/asm/extable.h
@@ -33,6 +33,8 @@ do { \
(b)->data = (tmp).data; \
} while (0)
+bool insn_may_access_user(unsigned long addr, unsigned long esr);
+
#ifdef CONFIG_BPF_JIT
bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs);
@@ -45,5 +47,5 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
}
#endif /* !CONFIG_BPF_JIT */
-bool fixup_exception(struct pt_regs *regs);
+bool fixup_exception(struct pt_regs *regs, unsigned long esr);
#endif
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 87e307804b99..635a43c4ec85 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -48,6 +48,12 @@ enum fixed_addresses {
FIX_EARLYCON_MEM_BASE,
FIX_TEXT_POKE0,
+#ifdef CONFIG_KVM
+ /* One slot per CPU, mapping the guest's VNCR page at EL2. */
+ FIX_VNCR_END,
+ FIX_VNCR = FIX_VNCR_END + NR_CPUS,
+#endif
+
#ifdef CONFIG_ACPI_APEI_GHES
/* Used for GHES mapping from assorted contexts */
FIX_APEI_GHES_IRQ,
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index f2a84efc3618..b8cf0ea43cc0 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -6,6 +6,7 @@
#define __ASM_FP_H
#include <asm/errno.h>
+#include <asm/percpu.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <asm/sigcontext.h>
@@ -76,11 +77,9 @@ extern void fpsimd_load_state(struct user_fpsimd_state *state);
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
-extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
-extern void fpsimd_kvm_prepare(void);
struct cpu_fp_state {
struct user_fpsimd_state *st;
@@ -94,9 +93,12 @@ struct cpu_fp_state {
enum fp_type to_save;
};
+DECLARE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
+
extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state);
extern void fpsimd_flush_task_state(struct task_struct *target);
+extern void fpsimd_save_and_flush_current_state(void);
extern void fpsimd_save_and_flush_cpu_state(void);
static inline bool thread_sm_enabled(struct thread_struct *thread)
@@ -109,6 +111,8 @@ static inline bool thread_za_enabled(struct thread_struct *thread)
return system_supports_sme() && (thread->svcr & SVCR_ZA_MASK);
}
+extern void task_smstop_sm(struct task_struct *task);
+
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
#define VL_ARCH_MAX 0x100
@@ -196,10 +200,8 @@ struct vl_info {
extern void sve_alloc(struct task_struct *task, bool flush);
extern void fpsimd_release_task(struct task_struct *task);
-extern void fpsimd_sync_to_sve(struct task_struct *task);
-extern void fpsimd_force_sync_to_sve(struct task_struct *task);
-extern void sve_sync_to_fpsimd(struct task_struct *task);
-extern void sve_sync_from_fpsimd_zeropad(struct task_struct *task);
+extern void fpsimd_sync_from_effective_state(struct task_struct *task);
+extern void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task);
extern int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags);
@@ -293,14 +295,29 @@ static inline bool sve_vq_available(unsigned int vq)
return vq_available(ARM64_VEC_SVE, vq);
}
-size_t sve_state_size(struct task_struct const *task);
+static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
+{
+ unsigned int vl = max(sve_vl, sme_vl);
+ return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
+}
+
+/*
+ * Return how many bytes of memory are required to store the full SVE
+ * state for task, given task's currently configured vector length.
+ */
+static inline size_t sve_state_size(struct task_struct const *task)
+{
+ unsigned int sve_vl = task_get_sve_vl(task);
+ unsigned int sme_vl = task_get_sme_vl(task);
+ return __sve_state_size(sve_vl, sme_vl);
+}
#else /* ! CONFIG_ARM64_SVE */
static inline void sve_alloc(struct task_struct *task, bool flush) { }
static inline void fpsimd_release_task(struct task_struct *task) { }
-static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
-static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
+static inline void fpsimd_sync_from_effective_state(struct task_struct *task) { }
+static inline void fpsimd_sync_to_effective_state_zeropad(struct task_struct *task) { }
static inline int sve_max_virtualisable_vl(void)
{
@@ -334,6 +351,11 @@ static inline void vec_update_vq_map(enum vec_type t) { }
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
static inline void sve_setup(void) { }
+static inline size_t __sve_state_size(unsigned int sve_vl, unsigned int sme_vl)
+{
+ return 0;
+}
+
static inline size_t sve_state_size(struct task_struct const *task)
{
return 0;
@@ -386,6 +408,16 @@ extern int sme_set_current_vl(unsigned long arg);
extern int sme_get_current_vl(void);
extern void sme_suspend_exit(void);
+static inline size_t __sme_state_size(unsigned int sme_vl)
+{
+ size_t size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(sme_vl));
+
+ if (system_supports_sme2())
+ size += ZT_SIG_REG_SIZE;
+
+ return size;
+}
+
/*
* Return how many bytes of memory are required to store the full SME
* specific state for task, given task's currently configured vector
@@ -393,15 +425,7 @@ extern void sme_suspend_exit(void);
*/
static inline size_t sme_state_size(struct task_struct const *task)
{
- unsigned int vl = task_get_sme_vl(task);
- size_t size;
-
- size = ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
-
- if (system_supports_sme2())
- size += ZT_SIG_REG_SIZE;
-
- return size;
+ return __sme_state_size(task_get_sme_vl(task));
}
#else
@@ -422,6 +446,11 @@ static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
static inline int sme_get_current_vl(void) { return -EINVAL; }
static inline void sme_suspend_exit(void) { }
+static inline size_t __sme_state_size(unsigned int sme_vl)
+{
+ return 0;
+}
+
static inline size_t sme_state_size(struct task_struct const *task)
{
return 0;
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 5ccff4de7f09..bfe3ce9df197 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -52,6 +52,8 @@ extern unsigned long ftrace_graph_call;
extern void return_to_handler(void);
unsigned long ftrace_call_adjust(unsigned long addr);
+unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip);
+#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
#define HAVE_ARCH_FTRACE_REGS
@@ -129,6 +131,38 @@ ftrace_override_function_with_return(struct ftrace_regs *fregs)
arch_ftrace_regs(fregs)->pc = arch_ftrace_regs(fregs)->lr;
}
+static __always_inline unsigned long
+ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->fp;
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->lr;
+}
+
+static __always_inline struct pt_regs *
+ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs);
+
+ memcpy(regs->regs, afregs->regs, sizeof(afregs->regs));
+ regs->sp = afregs->sp;
+ regs->pc = afregs->pc;
+ regs->regs[29] = afregs->fp;
+ regs->regs[30] = afregs->lr;
+ return regs;
+}
+
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
+ (_regs)->pc = arch_ftrace_regs(fregs)->pc; \
+ (_regs)->regs[29] = arch_ftrace_regs(fregs)->fp; \
+ (_regs)->sp = arch_ftrace_regs(fregs)->sp; \
+ (_regs)->pstate = PSR_MODE_EL1h; \
+ } while (0)
+
int ftrace_regs_query_register_offset(const char *name);
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
@@ -186,23 +220,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym,
#ifndef __ASSEMBLY__
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-struct fgraph_ret_regs {
- /* x0 - x7 */
- unsigned long regs[8];
-
- unsigned long fp;
- unsigned long __unused;
-};
-
-static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
-{
- return ret_regs->regs[0];
-}
-
-static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
-{
- return ret_regs->fp;
-}
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
unsigned long frame_pointer);
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index cbfa7b6f2e09..77d6b8c63d4e 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -41,7 +41,7 @@ do { \
\
___hcr = read_sysreg(hcr_el2); \
if (!(___hcr & HCR_TGE)) { \
- write_sysreg(___hcr | HCR_TGE, hcr_el2); \
+ write_sysreg_hcr(___hcr | HCR_TGE); \
isb(); \
} \
/* \
@@ -82,7 +82,7 @@ do { \
*/ \
barrier(); \
if (!___ctx->cnt && !(___hcr & HCR_TGE)) \
- write_sysreg(___hcr, hcr_el2); \
+ write_sysreg_hcr(___hcr); \
} while (0)
static inline void ack_bad_irq(unsigned int irq)
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index c6dff3e69539..2a8155c4a882 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -42,8 +42,8 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty);
#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
-extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep);
+extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz);
#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
@@ -69,6 +69,30 @@ extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
#include <asm-generic/hugetlb.h>
+static inline void __flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ unsigned long stride,
+ bool last_level)
+{
+ switch (stride) {
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ __flush_tlb_range(vma, start, end, PUD_SIZE, last_level, 1);
+ break;
+#endif
+ case CONT_PMD_SIZE:
+ case PMD_SIZE:
+ __flush_tlb_range(vma, start, end, PMD_SIZE, last_level, 2);
+ break;
+ case CONT_PTE_SIZE:
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, 3);
+ break;
+ default:
+ __flush_tlb_range(vma, start, end, PAGE_SIZE, last_level, TLBI_TTL_UNKNOWN);
+ }
+}
+
#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
unsigned long start,
@@ -76,12 +100,7 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
{
unsigned long stride = huge_page_size(hstate_vma(vma));
- if (stride == PMD_SIZE)
- __flush_tlb_range(vma, start, end, stride, false, 2);
- else if (stride == PUD_SIZE)
- __flush_tlb_range(vma, start, end, stride, false, 1);
- else
- __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0);
+ __flush_hugetlb_tlb_range(vma, start, end, stride, false);
}
#endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 2b6c61c608e2..1c3f9617d54f 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -93,6 +93,21 @@
#define KERNEL_HWCAP_PACA __khwcap_feature(PACA)
#define KERNEL_HWCAP_PACG __khwcap_feature(PACG)
#define KERNEL_HWCAP_GCS __khwcap_feature(GCS)
+#define KERNEL_HWCAP_CMPBR __khwcap_feature(CMPBR)
+#define KERNEL_HWCAP_FPRCVT __khwcap_feature(FPRCVT)
+#define KERNEL_HWCAP_F8MM8 __khwcap_feature(F8MM8)
+#define KERNEL_HWCAP_F8MM4 __khwcap_feature(F8MM4)
+#define KERNEL_HWCAP_SVE_F16MM __khwcap_feature(SVE_F16MM)
+#define KERNEL_HWCAP_SVE_ELTPERM __khwcap_feature(SVE_ELTPERM)
+#define KERNEL_HWCAP_SVE_AES2 __khwcap_feature(SVE_AES2)
+#define KERNEL_HWCAP_SVE_BFSCALE __khwcap_feature(SVE_BFSCALE)
+#define KERNEL_HWCAP_SVE2P2 __khwcap_feature(SVE2P2)
+#define KERNEL_HWCAP_SME2P2 __khwcap_feature(SME2P2)
+#define KERNEL_HWCAP_SME_SBITPERM __khwcap_feature(SME_SBITPERM)
+#define KERNEL_HWCAP_SME_AES __khwcap_feature(SME_AES)
+#define KERNEL_HWCAP_SME_SFEXPA __khwcap_feature(SME_SFEXPA)
+#define KERNEL_HWCAP_SME_STMOP __khwcap_feature(SME_STMOP)
+#define KERNEL_HWCAP_SME_SMOP4 __khwcap_feature(SME_SMOP4)
#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 64)
#define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP)
diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h
deleted file mode 100644
index bc30aadedfe9..000000000000
--- a/arch/arm64/include/asm/hyperv-tlfs.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * This file contains definitions from the Hyper-V Hypervisor Top-Level
- * Functional Specification (TLFS):
- * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
- *
- * Copyright (C) 2021, Microsoft, Inc.
- *
- * Author : Michael Kelley <mikelley@microsoft.com>
- */
-
-#ifndef _ASM_HYPERV_TLFS_H
-#define _ASM_HYPERV_TLFS_H
-
-#include <linux/types.h>
-
-/*
- * All data structures defined in the TLFS that are shared between Hyper-V
- * and a guest VM use Little Endian byte ordering. This matches the default
- * byte ordering of Linux running on ARM64, so no special handling is required.
- */
-
-/*
- * Group C Features. See the asm-generic version of hyperv-tlfs.h
- * for a description of Feature Groups.
- */
-
-/* Crash MSRs available */
-#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(8)
-
-/* STIMER direct mode is available */
-#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13)
-
-/*
- * To support arch-generic code calling hv_set/get_register:
- * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl
- * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall
- */
-#define HV_MSR_CRASH_P0 (HV_REGISTER_GUEST_CRASH_P0)
-#define HV_MSR_CRASH_P1 (HV_REGISTER_GUEST_CRASH_P1)
-#define HV_MSR_CRASH_P2 (HV_REGISTER_GUEST_CRASH_P2)
-#define HV_MSR_CRASH_P3 (HV_REGISTER_GUEST_CRASH_P3)
-#define HV_MSR_CRASH_P4 (HV_REGISTER_GUEST_CRASH_P4)
-#define HV_MSR_CRASH_CTL (HV_REGISTER_GUEST_CRASH_CTL)
-
-#define HV_MSR_VP_INDEX (HV_REGISTER_VP_INDEX)
-#define HV_MSR_TIME_REF_COUNT (HV_REGISTER_TIME_REF_COUNT)
-#define HV_MSR_REFERENCE_TSC (HV_REGISTER_REFERENCE_TSC)
-
-#define HV_MSR_SINT0 (HV_REGISTER_SINT0)
-#define HV_MSR_SCONTROL (HV_REGISTER_SCONTROL)
-#define HV_MSR_SIEFP (HV_REGISTER_SIEFP)
-#define HV_MSR_SIMP (HV_REGISTER_SIMP)
-#define HV_MSR_EOM (HV_REGISTER_EOM)
-
-#define HV_MSR_STIMER0_CONFIG (HV_REGISTER_STIMER0_CONFIG)
-#define HV_MSR_STIMER0_COUNT (HV_REGISTER_STIMER0_COUNT)
-
-union hv_msi_entry {
- u64 as_uint64[2];
- struct {
- u64 address;
- u32 data;
- u32 reserved;
- } __packed;
-};
-
-#include <asm-generic/hyperv-tlfs.h>
-
-#endif
diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h
index 409e239834d1..a12fd897c877 100644
--- a/arch/arm64/include/asm/hypervisor.h
+++ b/arch/arm64/include/asm/hypervisor.h
@@ -6,6 +6,7 @@
void kvm_init_hyp_services(void);
bool kvm_arm_hyp_service_available(u32 func_id);
+void kvm_arm_target_impl_cpu_init(void);
#ifdef CONFIG_ARM_PKVM_GUEST
void pkvm_init_hyp_services(void);
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index e390c432f546..18c7811774d3 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -188,8 +188,10 @@ enum aarch64_insn_ldst_type {
AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
+ AARCH64_INSN_LDST_LOAD_ACQ,
AARCH64_INSN_LDST_LOAD_EX,
AARCH64_INSN_LDST_LOAD_ACQ_EX,
+ AARCH64_INSN_LDST_STORE_REL,
AARCH64_INSN_LDST_STORE_EX,
AARCH64_INSN_LDST_STORE_REL_EX,
AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET,
@@ -351,8 +353,10 @@ __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
-__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
-__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
+__AARCH64_INSN_FUNCS(load_acq, 0x3FDFFC00, 0x08DFFC00)
+__AARCH64_INSN_FUNCS(store_rel, 0x3FDFFC00, 0x089FFC00)
+__AARCH64_INSN_FUNCS(load_ex, 0x3FC00000, 0x08400000)
+__AARCH64_INSN_FUNCS(store_ex, 0x3FC00000, 0x08000000)
__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400)
__AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000)
__AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000)
@@ -602,6 +606,10 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
int offset,
enum aarch64_insn_variant variant,
enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
+ enum aarch64_insn_register base,
+ enum aarch64_insn_size_type size,
+ enum aarch64_insn_ldst_type type);
u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register base,
enum aarch64_insn_register state,
@@ -698,6 +706,7 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
}
#endif
u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+u32 aarch64_insn_gen_dsb(enum aarch64_insn_mb_type type);
u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
enum aarch64_insn_system_register sysreg);
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 76ebbdc6ffdd..9b96840fb979 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -270,9 +270,9 @@ int arm64_ioremap_prot_hook_register(const ioremap_prot_hook_t hook);
#define _PAGE_IOREMAP PROT_DEVICE_nGnRE
#define ioremap_wc(addr, size) \
- ioremap_prot((addr), (size), PROT_NORMAL_NC)
+ ioremap_prot((addr), (size), __pgprot(PROT_NORMAL_NC))
#define ioremap_np(addr, size) \
- ioremap_prot((addr), (size), PROT_DEVICE_nGnRnE)
+ ioremap_prot((addr), (size), __pgprot(PROT_DEVICE_nGnRnE))
/*
* io{read,write}{16,32,64}be() macros
@@ -293,7 +293,7 @@ static inline void __iomem *ioremap_cache(phys_addr_t addr, size_t size)
if (pfn_is_map_memory(__phys_to_pfn(addr)))
return (void __iomem *)__phys_to_virt(addr);
- return ioremap_prot(addr, size, PROT_NORMAL);
+ return ioremap_prot(addr, size, __pgprot(PROT_NORMAL));
}
/*
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index fd5a08450b12..74a4f738c5f5 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -45,11 +45,11 @@
#define SPAN_NR_ENTRIES(vstart, vend, shift) \
((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1)
-#define EARLY_ENTRIES(vstart, vend, shift, add) \
- (SPAN_NR_ENTRIES(vstart, vend, shift) + (add))
+#define EARLY_ENTRIES(lvl, vstart, vend) \
+ SPAN_NR_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * PTDESC_TABLE_SHIFT)
-#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
- (lvls > lvl ? EARLY_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * (PAGE_SHIFT - 3), add) : 0)
+#define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
+ ((lvls) > (lvl) ? EARLY_ENTRIES(lvl, vstart, vend) + (add) : 0)
#define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \
+ EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
@@ -58,7 +58,7 @@
#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \
+ EARLY_SEGMENT_EXTRA_PAGES))
-#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, _end, 1))
+#define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, kimage_limit, 1))
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE)
#define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3e0f0de1d2da..1da290aeedce 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -12,67 +12,70 @@
#include <asm/sysreg.h>
#include <asm/types.h>
-/* Hyp Configuration Register (HCR) bits */
-
-#define HCR_TID5 (UL(1) << 58)
-#define HCR_DCT (UL(1) << 57)
-#define HCR_ATA_SHIFT 56
-#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
-#define HCR_TTLBOS (UL(1) << 55)
-#define HCR_TTLBIS (UL(1) << 54)
-#define HCR_ENSCXT (UL(1) << 53)
-#define HCR_TOCU (UL(1) << 52)
-#define HCR_AMVOFFEN (UL(1) << 51)
-#define HCR_TICAB (UL(1) << 50)
-#define HCR_TID4 (UL(1) << 49)
-#define HCR_FIEN (UL(1) << 47)
-#define HCR_FWB (UL(1) << 46)
-#define HCR_NV2 (UL(1) << 45)
-#define HCR_AT (UL(1) << 44)
-#define HCR_NV1 (UL(1) << 43)
-#define HCR_NV (UL(1) << 42)
-#define HCR_API (UL(1) << 41)
-#define HCR_APK (UL(1) << 40)
-#define HCR_TEA (UL(1) << 37)
-#define HCR_TERR (UL(1) << 36)
-#define HCR_TLOR (UL(1) << 35)
-#define HCR_E2H (UL(1) << 34)
-#define HCR_ID (UL(1) << 33)
-#define HCR_CD (UL(1) << 32)
-#define HCR_RW_SHIFT 31
-#define HCR_RW (UL(1) << HCR_RW_SHIFT)
-#define HCR_TRVM (UL(1) << 30)
-#define HCR_HCD (UL(1) << 29)
-#define HCR_TDZ (UL(1) << 28)
-#define HCR_TGE (UL(1) << 27)
-#define HCR_TVM (UL(1) << 26)
-#define HCR_TTLB (UL(1) << 25)
-#define HCR_TPU (UL(1) << 24)
-#define HCR_TPC (UL(1) << 23) /* HCR_TPCP if FEAT_DPB */
-#define HCR_TSW (UL(1) << 22)
-#define HCR_TACR (UL(1) << 21)
-#define HCR_TIDCP (UL(1) << 20)
-#define HCR_TSC (UL(1) << 19)
-#define HCR_TID3 (UL(1) << 18)
-#define HCR_TID2 (UL(1) << 17)
-#define HCR_TID1 (UL(1) << 16)
-#define HCR_TID0 (UL(1) << 15)
-#define HCR_TWE (UL(1) << 14)
-#define HCR_TWI (UL(1) << 13)
-#define HCR_DC (UL(1) << 12)
-#define HCR_BSU (3 << 10)
-#define HCR_BSU_IS (UL(1) << 10)
-#define HCR_FB (UL(1) << 9)
-#define HCR_VSE (UL(1) << 8)
-#define HCR_VI (UL(1) << 7)
-#define HCR_VF (UL(1) << 6)
-#define HCR_AMO (UL(1) << 5)
-#define HCR_IMO (UL(1) << 4)
-#define HCR_FMO (UL(1) << 3)
-#define HCR_PTW (UL(1) << 2)
-#define HCR_SWIO (UL(1) << 1)
-#define HCR_VM (UL(1) << 0)
-#define HCR_RES0 ((UL(1) << 48) | (UL(1) << 39))
+/*
+ * Because I'm terribly lazy and that repainting the whole of the KVM
+ * code with the proper names is a pain, use a helper to map the names
+ * inherited from AArch32 with the new fancy nomenclature. One day...
+ */
+#define __HCR(x) HCR_EL2_##x
+
+#define HCR_TID5 __HCR(TID5)
+#define HCR_DCT __HCR(DCT)
+#define HCR_ATA_SHIFT __HCR(ATA_SHIFT)
+#define HCR_ATA __HCR(ATA)
+#define HCR_TTLBOS __HCR(TTLBOS)
+#define HCR_TTLBIS __HCR(TTLBIS)
+#define HCR_ENSCXT __HCR(EnSCXT)
+#define HCR_TOCU __HCR(TOCU)
+#define HCR_AMVOFFEN __HCR(AMVOFFEN)
+#define HCR_TICAB __HCR(TICAB)
+#define HCR_TID4 __HCR(TID4)
+#define HCR_FIEN __HCR(FIEN)
+#define HCR_FWB __HCR(FWB)
+#define HCR_NV2 __HCR(NV2)
+#define HCR_AT __HCR(AT)
+#define HCR_NV1 __HCR(NV1)
+#define HCR_NV __HCR(NV)
+#define HCR_API __HCR(API)
+#define HCR_APK __HCR(APK)
+#define HCR_TEA __HCR(TEA)
+#define HCR_TERR __HCR(TERR)
+#define HCR_TLOR __HCR(TLOR)
+#define HCR_E2H __HCR(E2H)
+#define HCR_ID __HCR(ID)
+#define HCR_CD __HCR(CD)
+#define HCR_RW __HCR(RW)
+#define HCR_TRVM __HCR(TRVM)
+#define HCR_HCD __HCR(HCD)
+#define HCR_TDZ __HCR(TDZ)
+#define HCR_TGE __HCR(TGE)
+#define HCR_TVM __HCR(TVM)
+#define HCR_TTLB __HCR(TTLB)
+#define HCR_TPU __HCR(TPU)
+#define HCR_TPC __HCR(TPCP)
+#define HCR_TSW __HCR(TSW)
+#define HCR_TACR __HCR(TACR)
+#define HCR_TIDCP __HCR(TIDCP)
+#define HCR_TSC __HCR(TSC)
+#define HCR_TID3 __HCR(TID3)
+#define HCR_TID2 __HCR(TID2)
+#define HCR_TID1 __HCR(TID1)
+#define HCR_TID0 __HCR(TID0)
+#define HCR_TWE __HCR(TWE)
+#define HCR_TWI __HCR(TWI)
+#define HCR_DC __HCR(DC)
+#define HCR_BSU __HCR(BSU)
+#define HCR_BSU_IS __HCR(BSU_IS)
+#define HCR_FB __HCR(FB)
+#define HCR_VSE __HCR(VSE)
+#define HCR_VI __HCR(VI)
+#define HCR_VF __HCR(VF)
+#define HCR_AMO __HCR(AMO)
+#define HCR_IMO __HCR(IMO)
+#define HCR_FMO __HCR(FMO)
+#define HCR_PTW __HCR(PTW)
+#define HCR_SWIO __HCR(SWIO)
+#define HCR_VM __HCR(VM)
/*
* The bits we set in HCR:
@@ -92,17 +95,16 @@
* SWIO: Turn set/way invalidates into set/way clean+invalidate
* PTW: Take a stage2 fault if a stage1 walk steps in device memory
* TID3: Trap EL1 reads of group 3 ID registers
- * TID2: Trap CTR_EL0, CCSIDR2_EL1, CLIDR_EL1, and CSSELR_EL1
+ * TID1: Trap REVIDR_EL1, AIDR_EL1, and SMIDR_EL1
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
HCR_BSU_IS | HCR_FB | HCR_TACR | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
- HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
+ HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID1)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
-#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
+#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H | HCR_AMO | HCR_IMO | HCR_FMO)
-#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
#define MPAMHCR_HOST_FLAGS 0
/* TCR_EL2 Registers bits */
@@ -119,7 +121,7 @@
#define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
#define TCR_EL2_T0SZ_MASK 0x3f
#define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \
- TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
+ TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK)
/* VTCR_EL2 Registers bits */
#define VTCR_EL2_DS TCR_EL2_DS
@@ -300,7 +302,7 @@
#define CPTR_EL2_TSM (1 << 12)
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
#define CPTR_EL2_TZ (1 << 8)
-#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
+#define CPTR_NVHE_EL2_RES1 (BIT(13) | BIT(9) | GENMASK(7, 0))
#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
GENMASK(29, 21) | \
GENMASK(19, 14) | \
@@ -313,56 +315,19 @@
GENMASK(15, 0))
/*
- * FGT register definitions
- *
- * RES0 and polarity masks as of DDI0487J.a, to be updated as needed.
- * We're not using the generated masks as they are usually ahead of
- * the published ARM ARM, which we use as a reference.
- *
- * Once we get to a point where the two describe the same thing, we'll
- * merge the definitions. One day.
- */
-#define __HFGRTR_EL2_RES0 HFGxTR_EL2_RES0
-#define __HFGRTR_EL2_MASK GENMASK(49, 0)
-#define __HFGRTR_EL2_nMASK ~(__HFGRTR_EL2_RES0 | __HFGRTR_EL2_MASK)
-
-/*
- * The HFGWTR bits are a subset of HFGRTR bits. To ensure we don't miss any
- * future additions, define __HFGWTR* macros relative to __HFGRTR* ones.
+ * Polarity masks for HCRX_EL2, limited to the bits that we know about
+ * at this point in time. It doesn't mean that we actually *handle*
+ * them, but that at least those that are not advertised to a guest
+ * will be RES0 for that guest.
*/
-#define __HFGRTR_ONLY_MASK (BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
- GENMASK(26, 25) | BIT(21) | BIT(18) | \
- GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
-#define __HFGWTR_EL2_RES0 (__HFGRTR_EL2_RES0 | __HFGRTR_ONLY_MASK)
-#define __HFGWTR_EL2_MASK (__HFGRTR_EL2_MASK & ~__HFGRTR_ONLY_MASK)
-#define __HFGWTR_EL2_nMASK ~(__HFGWTR_EL2_RES0 | __HFGWTR_EL2_MASK)
-
-#define __HFGITR_EL2_RES0 HFGITR_EL2_RES0
-#define __HFGITR_EL2_MASK (BIT(62) | BIT(60) | GENMASK(54, 0))
-#define __HFGITR_EL2_nMASK ~(__HFGITR_EL2_RES0 | __HFGITR_EL2_MASK)
-
-#define __HDFGRTR_EL2_RES0 HDFGRTR_EL2_RES0
-#define __HDFGRTR_EL2_MASK (BIT(63) | GENMASK(58, 50) | GENMASK(48, 43) | \
- GENMASK(41, 40) | GENMASK(37, 22) | \
- GENMASK(19, 9) | GENMASK(7, 0))
-#define __HDFGRTR_EL2_nMASK ~(__HDFGRTR_EL2_RES0 | __HDFGRTR_EL2_MASK)
-
-#define __HDFGWTR_EL2_RES0 HDFGWTR_EL2_RES0
-#define __HDFGWTR_EL2_MASK (GENMASK(57, 52) | GENMASK(50, 48) | \
- GENMASK(46, 44) | GENMASK(42, 41) | \
- GENMASK(37, 35) | GENMASK(33, 31) | \
- GENMASK(29, 23) | GENMASK(21, 10) | \
- GENMASK(8, 7) | GENMASK(5, 0))
-#define __HDFGWTR_EL2_nMASK ~(__HDFGWTR_EL2_RES0 | __HDFGWTR_EL2_MASK)
-
-#define __HAFGRTR_EL2_RES0 HAFGRTR_EL2_RES0
-#define __HAFGRTR_EL2_MASK (GENMASK(49, 17) | GENMASK(4, 0))
-#define __HAFGRTR_EL2_nMASK ~(__HAFGRTR_EL2_RES0 | __HAFGRTR_EL2_MASK)
-
-/* Similar definitions for HCRX_EL2 */
-#define __HCRX_EL2_RES0 HCRX_EL2_RES0
-#define __HCRX_EL2_MASK (BIT(6))
-#define __HCRX_EL2_nMASK ~(__HCRX_EL2_RES0 | __HCRX_EL2_MASK)
+#define __HCRX_EL2_MASK (BIT_ULL(6))
+#define __HCRX_EL2_nMASK (GENMASK_ULL(24, 14) | \
+ GENMASK_ULL(11, 7) | \
+ GENMASK_ULL(5, 0))
+#define __HCRX_EL2_RES0 ~(__HCRX_EL2_nMASK | __HCRX_EL2_MASK)
+#define __HCRX_EL2_RES1 ~(__HCRX_EL2_nMASK | \
+ __HCRX_EL2_MASK | \
+ __HCRX_EL2_RES0)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
@@ -391,8 +356,6 @@
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
-#define CPACR_EL1_TTA (1 << 28)
-
#define kvm_mode_names \
{ PSR_MODE_EL0t, "EL0t" }, \
{ PSR_MODE_EL1t, "EL1t" }, \
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ca2590344313..bec227f9500a 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -53,8 +53,7 @@
enum __kvm_host_smccc_func {
/* Hypercalls available only prior to pKVM finalisation */
/* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */
- __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
- __KVM_HOST_SMCCC_FUNC___pkvm_init,
+ __KVM_HOST_SMCCC_FUNC___pkvm_init = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1,
__KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping,
__KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector,
__KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs,
@@ -65,6 +64,12 @@ enum __kvm_host_smccc_func {
/* Hypercalls available after pKVM finalisation */
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest,
+ __KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest,
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
@@ -79,6 +84,9 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
+ __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
+ __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
@@ -247,8 +255,6 @@ extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_gic_config(void);
extern void __vgic_v3_init_lrs(void);
-extern u64 __kvm_get_mdcr_el2(void);
-
#define __KVM_EXTABLE(from, to) \
" .pushsection __kvm_ex_table, \"a\"\n" \
" .align 3\n" \
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index cf811009a33c..0720898f563e 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -184,29 +184,30 @@ static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
return vcpu_is_el2_ctxt(&vcpu->arch.ctxt);
}
-static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt)
+static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
{
return (!cpus_have_final_cap(ARM64_HAS_HCR_NV1) ||
- (ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H));
+ (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_E2H));
}
-static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
+static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
{
- return __vcpu_el2_e2h_is_set(&vcpu->arch.ctxt);
+ return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_TGE;
}
-static inline bool __vcpu_el2_tge_is_set(const struct kvm_cpu_context *ctxt)
+static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
{
- return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_TGE;
-}
+ bool e2h, tge;
+ u64 hcr;
-static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
-{
- return __vcpu_el2_tge_is_set(&vcpu->arch.ctxt);
-}
+ if (!vcpu_has_nv(vcpu))
+ return false;
+
+ hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ e2h = (hcr & HCR_E2H);
+ tge = (hcr & HCR_TGE);
-static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
-{
/*
* We are in a hypervisor context if the vcpu mode is EL2 or
* E2H and TGE bits are set. The latter means we are in the user space
@@ -215,14 +216,7 @@ static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
* Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the
* rest of the KVM code, and will result in a misbehaving guest.
*/
- return vcpu_is_el2_ctxt(ctxt) ||
- (__vcpu_el2_e2h_is_set(ctxt) && __vcpu_el2_tge_is_set(ctxt)) ||
- __vcpu_el2_tge_is_set(ctxt);
-}
-
-static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
-{
- return vcpu_has_nv(vcpu) && __is_hyp_ctxt(&vcpu->arch.ctxt);
+ return vcpu_is_el2(vcpu) || (e2h && tge) || tge;
}
static inline bool vcpu_is_host_el0(const struct kvm_vcpu *vcpu)
@@ -281,6 +275,19 @@ static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu)
return vcpu->arch.fault.esr_el2;
}
+static inline bool guest_hyp_wfx_traps_enabled(const struct kvm_vcpu *vcpu)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ bool is_wfe = !!(esr & ESR_ELx_WFx_ISS_WFE);
+ u64 hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2);
+
+ if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu))
+ return false;
+
+ return ((is_wfe && (hcr_el2 & HCR_TWE)) ||
+ (!is_wfe && (hcr_el2 & HCR_TWI)));
+}
+
static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
{
u64 esr = kvm_vcpu_get_esr(vcpu);
@@ -298,7 +305,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc
static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
{
- return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+ u64 hpfar = vcpu->arch.fault.hpfar_el2;
+
+ if (unlikely(!(hpfar & HPFAR_EL2_NS)))
+ return INVALID_GPA;
+
+ return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12;
}
static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
@@ -549,110 +561,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
vcpu_set_flag((v), e); \
} while (0)
-#define __build_check_all_or_none(r, bits) \
- BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits))
-
-#define __cpacr_to_cptr_clr(clr, set) \
- ({ \
- u64 cptr = 0; \
- \
- if ((set) & CPACR_ELx_FPEN) \
- cptr |= CPTR_EL2_TFP; \
- if ((set) & CPACR_ELx_ZEN) \
- cptr |= CPTR_EL2_TZ; \
- if ((set) & CPACR_ELx_SMEN) \
- cptr |= CPTR_EL2_TSM; \
- if ((clr) & CPACR_ELx_TTA) \
- cptr |= CPTR_EL2_TTA; \
- if ((clr) & CPTR_EL2_TAM) \
- cptr |= CPTR_EL2_TAM; \
- if ((clr) & CPTR_EL2_TCPAC) \
- cptr |= CPTR_EL2_TCPAC; \
- \
- cptr; \
- })
-
-#define __cpacr_to_cptr_set(clr, set) \
- ({ \
- u64 cptr = 0; \
- \
- if ((clr) & CPACR_ELx_FPEN) \
- cptr |= CPTR_EL2_TFP; \
- if ((clr) & CPACR_ELx_ZEN) \
- cptr |= CPTR_EL2_TZ; \
- if ((clr) & CPACR_ELx_SMEN) \
- cptr |= CPTR_EL2_TSM; \
- if ((set) & CPACR_ELx_TTA) \
- cptr |= CPTR_EL2_TTA; \
- if ((set) & CPTR_EL2_TAM) \
- cptr |= CPTR_EL2_TAM; \
- if ((set) & CPTR_EL2_TCPAC) \
- cptr |= CPTR_EL2_TCPAC; \
- \
- cptr; \
- })
-
-#define cpacr_clear_set(clr, set) \
- do { \
- BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \
- BUILD_BUG_ON((clr) & CPACR_ELx_E0POE); \
- __build_check_all_or_none((clr), CPACR_ELx_FPEN); \
- __build_check_all_or_none((set), CPACR_ELx_FPEN); \
- __build_check_all_or_none((clr), CPACR_ELx_ZEN); \
- __build_check_all_or_none((set), CPACR_ELx_ZEN); \
- __build_check_all_or_none((clr), CPACR_ELx_SMEN); \
- __build_check_all_or_none((set), CPACR_ELx_SMEN); \
- \
- if (has_vhe() || has_hvhe()) \
- sysreg_clear_set(cpacr_el1, clr, set); \
- else \
- sysreg_clear_set(cptr_el2, \
- __cpacr_to_cptr_clr(clr, set), \
- __cpacr_to_cptr_set(clr, set));\
- } while (0)
-
-static __always_inline void kvm_write_cptr_el2(u64 val)
-{
- if (has_vhe() || has_hvhe())
- write_sysreg(val, cpacr_el1);
- else
- write_sysreg(val, cptr_el2);
-}
-
-static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
-{
- u64 val;
-
- if (has_vhe()) {
- val = (CPACR_ELx_FPEN | CPACR_EL1_ZEN_EL1EN);
- if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_EL1_SMEN_EL1EN;
- } else if (has_hvhe()) {
- val = CPACR_ELx_FPEN;
-
- if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
- val |= CPACR_ELx_ZEN;
- if (cpus_have_final_cap(ARM64_SME))
- val |= CPACR_ELx_SMEN;
- } else {
- val = CPTR_NVHE_EL2_RES1;
-
- if (vcpu_has_sve(vcpu) && guest_owns_fp_regs())
- val |= CPTR_EL2_TZ;
- if (cpus_have_final_cap(ARM64_SME))
- val &= ~CPTR_EL2_TSM;
- }
-
- return val;
-}
-
-static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
-{
- u64 val = kvm_get_reset_cptr_el2(vcpu);
-
- kvm_write_cptr_el2(val);
-}
-
/*
* Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
* format if E2H isn't set.
@@ -685,7 +593,7 @@ static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu,
#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \
(!vcpu_has_nv(vcpu) ? false : \
____cptr_xen_trap_enabled(vcpu, \
- SYS_FIELD_GET(CPACR_ELx, xen, \
+ SYS_FIELD_GET(CPACR_EL1, xen, \
vcpu_sanitised_cptr_el2(vcpu))))
static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu)
@@ -698,8 +606,27 @@ static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu)
return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN);
}
-static inline void kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
+static inline void vcpu_set_hcrx(struct kvm_vcpu *vcpu)
{
- vcpu_set_flag(vcpu, GUEST_HAS_PTRAUTH);
+ struct kvm *kvm = vcpu->kvm;
+
+ if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+ /*
+ * In general, all HCRX_EL2 bits are gated by a feature.
+ * The only reason we can set SMPME without checking any
+ * feature is that its effects are not directly observable
+ * from the guest.
+ */
+ vcpu->arch.hcrx_el2 = HCRX_EL2_SMPME;
+
+ if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
+ vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
+
+ if (kvm_has_tcr2(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En;
+
+ if (kvm_has_fpmr(kvm))
+ vcpu->arch.hcrx_el2 |= HCRX_EL2_EnFPM;
+ }
}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e18e9244d17a..3e41a880b062 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -39,19 +39,21 @@
#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
-#define KVM_VCPU_MAX_FEATURES 7
+#define KVM_VCPU_MAX_FEATURES 9
#define KVM_VCPU_VALID_FEATURES (BIT(KVM_VCPU_MAX_FEATURES) - 1)
#define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
-#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
-#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
-#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
-#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
-#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
-#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
-#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
+#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
+#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
+#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
+#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
+#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
+#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
+#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
+#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9)
+#define KVM_REQ_MAP_L1_VNCR_EL2 KVM_ARCH_REQ(10)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@@ -85,6 +87,10 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
struct kvm_hyp_memcache {
phys_addr_t head;
unsigned long nr_pages;
+ struct pkvm_mapping *mapping; /* only used from EL1 */
+
+#define HYP_MEMCACHE_ACCOUNT_STAGE2 BIT(1)
+ unsigned long flags;
};
static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
@@ -99,7 +105,7 @@ static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
void *(*to_va)(phys_addr_t phys))
{
- phys_addr_t *p = to_va(mc->head);
+ phys_addr_t *p = to_va(mc->head & PAGE_MASK);
if (!mc->nr_pages)
return NULL;
@@ -236,7 +242,8 @@ struct kvm_arch_memory_slot {
struct kvm_smccc_features {
unsigned long std_bmap;
unsigned long std_hyp_bmap;
- unsigned long vendor_hyp_bmap;
+ unsigned long vendor_hyp_bmap; /* Function numbers 0-63 */
+ unsigned long vendor_hyp_bmap_2; /* Function numbers 64-127 */
};
typedef unsigned int pkvm_handle_t;
@@ -244,6 +251,7 @@ typedef unsigned int pkvm_handle_t;
struct kvm_protected_vm {
pkvm_handle_t handle;
struct kvm_hyp_memcache teardown_mc;
+ struct kvm_hyp_memcache stage2_teardown_mc;
bool enabled;
};
@@ -266,11 +274,17 @@ struct kvm_sysreg_masks;
enum fgt_group_id {
__NO_FGT_GROUP__,
- HFGxTR_GROUP,
+ HFGRTR_GROUP,
+ HFGWTR_GROUP = HFGRTR_GROUP,
HDFGRTR_GROUP,
HDFGWTR_GROUP = HDFGRTR_GROUP,
HFGITR_GROUP,
HAFGRTR_GROUP,
+ HFGRTR2_GROUP,
+ HFGWTR2_GROUP = HFGRTR2_GROUP,
+ HDFGRTR2_GROUP,
+ HDFGWTR2_GROUP = HDFGRTR2_GROUP,
+ HFGITR2_GROUP,
/* Must be last */
__NR_FGT_GROUP_IDS__
@@ -331,6 +345,10 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_ID_REGS_INITIALIZED 7
/* Fine-Grained UNDEF initialised */
#define KVM_ARCH_FLAG_FGU_INITIALIZED 8
+ /* SVE exposed to guest */
+#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9
+ /* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */
+#define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS 10
unsigned long flags;
/* VM-wide vCPU feature set */
@@ -348,8 +366,8 @@ struct kvm_arch {
cpumask_var_t supported_cpus;
- /* PMCR_EL0.N value for the guest */
- u8 pmcr_n;
+ /* Maximum number of counters for the guest */
+ u8 nr_pmu_counters;
/* Iterator for idreg debugfs */
u8 idreg_debugfs_iter;
@@ -370,11 +388,17 @@ struct kvm_arch {
#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
u64 id_regs[KVM_ARM_ID_REG_NUM];
+ u64 midr_el1;
+ u64 revidr_el1;
+ u64 aidr_el1;
u64 ctr_el0;
/* Masks for VNCR-backed and general EL2 sysregs */
struct kvm_sysreg_masks *sysreg_masks;
+ /* Count the number of VNCR_EL2 currently mapped */
+ atomic_t vncr_map_count;
+
/*
* For an untrusted host VM, 'pkvm.handle' is used to lookup
* the associated pKVM instance in the hypervisor.
@@ -490,7 +514,6 @@ enum vcpu_sysreg {
VBAR_EL2, /* Vector Base Address Register (EL2) */
RVBAR_EL2, /* Reset Vector Base Address Register */
CONTEXTIDR_EL2, /* Context ID Register (EL2) */
- CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
CNTHP_CTL_EL2,
CNTHP_CVAL_EL2,
@@ -501,6 +524,7 @@ enum vcpu_sysreg {
MARKER(__SANITISED_REG_START__),
TCR2_EL2, /* Extended Translation Control Register (EL2) */
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
+ CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
/* Any VNCR-capable reg goes after this point */
MARKER(__VNCR_START__),
@@ -547,6 +571,13 @@ enum vcpu_sysreg {
VNCR(HDFGRTR_EL2),
VNCR(HDFGWTR_EL2),
VNCR(HAFGRTR_EL2),
+ VNCR(HFGRTR2_EL2),
+ VNCR(HFGWTR2_EL2),
+ VNCR(HFGITR2_EL2),
+ VNCR(HDFGRTR2_EL2),
+ VNCR(HDFGWTR2_EL2),
+
+ VNCR(VNCR_EL2),
VNCR(CNTVOFF_EL2),
VNCR(CNTV_CVAL_EL0),
@@ -554,7 +585,33 @@ enum vcpu_sysreg {
VNCR(CNTP_CVAL_EL0),
VNCR(CNTP_CTL_EL0),
+ VNCR(ICH_LR0_EL2),
+ VNCR(ICH_LR1_EL2),
+ VNCR(ICH_LR2_EL2),
+ VNCR(ICH_LR3_EL2),
+ VNCR(ICH_LR4_EL2),
+ VNCR(ICH_LR5_EL2),
+ VNCR(ICH_LR6_EL2),
+ VNCR(ICH_LR7_EL2),
+ VNCR(ICH_LR8_EL2),
+ VNCR(ICH_LR9_EL2),
+ VNCR(ICH_LR10_EL2),
+ VNCR(ICH_LR11_EL2),
+ VNCR(ICH_LR12_EL2),
+ VNCR(ICH_LR13_EL2),
+ VNCR(ICH_LR14_EL2),
+ VNCR(ICH_LR15_EL2),
+
+ VNCR(ICH_AP0R0_EL2),
+ VNCR(ICH_AP0R1_EL2),
+ VNCR(ICH_AP0R2_EL2),
+ VNCR(ICH_AP0R3_EL2),
+ VNCR(ICH_AP1R0_EL2),
+ VNCR(ICH_AP1R1_EL2),
+ VNCR(ICH_AP1R2_EL2),
+ VNCR(ICH_AP1R3_EL2),
VNCR(ICH_HCR_EL2),
+ VNCR(ICH_VMCR_EL2),
NR_SYS_REGS /* Nothing after this line! */
};
@@ -566,6 +623,37 @@ struct kvm_sysreg_masks {
} mask[NR_SYS_REGS - __SANITISED_REG_START__];
};
+struct fgt_masks {
+ const char *str;
+ u64 mask;
+ u64 nmask;
+ u64 res0;
+};
+
+extern struct fgt_masks hfgrtr_masks;
+extern struct fgt_masks hfgwtr_masks;
+extern struct fgt_masks hfgitr_masks;
+extern struct fgt_masks hdfgrtr_masks;
+extern struct fgt_masks hdfgwtr_masks;
+extern struct fgt_masks hafgrtr_masks;
+extern struct fgt_masks hfgrtr2_masks;
+extern struct fgt_masks hfgwtr2_masks;
+extern struct fgt_masks hfgitr2_masks;
+extern struct fgt_masks hdfgrtr2_masks;
+extern struct fgt_masks hdfgwtr2_masks;
+
+extern struct fgt_masks kvm_nvhe_sym(hfgrtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgwtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgitr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgrtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgwtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hafgrtr_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgrtr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgwtr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hfgitr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgrtr2_masks);
+extern struct fgt_masks kvm_nvhe_sym(hdfgwtr2_masks);
+
struct kvm_cpu_context {
struct user_pt_regs regs; /* sp = sp_el0 */
@@ -610,26 +698,24 @@ struct cpu_sve_state {
* field.
*/
struct kvm_host_data {
+#define KVM_HOST_DATA_FLAG_HAS_SPE 0
+#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
+#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4
+#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5
+#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 6
+#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 7
+ unsigned long flags;
+
struct kvm_cpu_context host_ctxt;
/*
- * All pointers in this union are hyp VA.
+ * Hyp VA.
* sve_state is only used in pKVM and if system_supports_sve().
*/
- union {
- struct user_fpsimd_state *fpsimd_state;
- struct cpu_sve_state *sve_state;
- };
-
- union {
- /* HYP VA pointer to the host storage for FPMR */
- u64 *fpmr_ptr;
- /*
- * Used by pKVM only, as it needs to provide storage
- * for the host
- */
- u64 fpmr;
- };
+ struct cpu_sve_state *sve_state;
+
+ /* Used by pKVM only. */
+ u64 fpmr;
/* Ownership of the FP regs */
enum {
@@ -642,7 +728,7 @@ struct kvm_host_data {
* host_debug_state contains the host registers which are
* saved and restored during world switches.
*/
- struct {
+ struct {
/* {Break,watch}point registers */
struct kvm_guest_debug_arch regs;
/* Statistical profiling extension */
@@ -652,6 +738,16 @@ struct kvm_host_data {
/* Values of trap registers for the host before guest entry. */
u64 mdcr_el2;
} host_debug_state;
+
+ /* Guest trace filter value */
+ u64 trfcr_while_in_guest;
+
+ /* Number of programmable event counters (PMCR_EL0.N) for this CPU */
+ unsigned int nr_event_counters;
+
+ /* Number of debug breakpoints/watchpoints for this CPU (minus 1) */
+ unsigned int debug_brps;
+ unsigned int debug_wrps;
};
struct kvm_host_psci_config {
@@ -684,6 +780,8 @@ struct vcpu_reset_state {
bool reset;
};
+struct vncr_tlb;
+
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt;
@@ -708,7 +806,6 @@ struct kvm_vcpu_arch {
u64 hcr_el2;
u64 hcrx_el2;
u64 mdcr_el2;
- u64 cptr_el2;
/* Exception Information */
struct kvm_vcpu_fault_info fault;
@@ -739,31 +836,22 @@ struct kvm_vcpu_arch {
*
* external_debug_state contains the debug values we want to debug the
* guest. This is set via the KVM_SET_GUEST_DEBUG ioctl.
- *
- * debug_ptr points to the set of debug registers that should be loaded
- * onto the hardware when running the guest.
*/
- struct kvm_guest_debug_arch *debug_ptr;
struct kvm_guest_debug_arch vcpu_debug_state;
struct kvm_guest_debug_arch external_debug_state;
+ u64 external_mdscr_el1;
+
+ enum {
+ VCPU_DEBUG_FREE,
+ VCPU_DEBUG_HOST_OWNED,
+ VCPU_DEBUG_GUEST_OWNED,
+ } debug_owner;
/* VGIC state */
struct vgic_cpu vgic_cpu;
struct arch_timer_cpu timer_cpu;
struct kvm_pmu pmu;
- /*
- * Guest registers we preserve during guest debugging.
- *
- * These shadow registers are updated by the kvm_handle_sys_reg
- * trap handler if the guest accesses or updates them while we
- * are using guest debug.
- */
- struct {
- u32 mdscr_el1;
- bool pstate_ss;
- } guest_debug_preserved;
-
/* vcpu power state */
struct kvm_mp_state mp_state;
spinlock_t mp_state_lock;
@@ -771,6 +859,9 @@ struct kvm_vcpu_arch {
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
+ /* Pages to top-up the pKVM/EL2 guest pool */
+ struct kvm_hyp_memcache pkvm_memcache;
+
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;
@@ -785,6 +876,9 @@ struct kvm_vcpu_arch {
/* Per-vcpu CCSIDR override or NULL */
u32 *ccsidr;
+
+ /* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */
+ struct vncr_tlb *vncr_tlb;
};
/*
@@ -863,14 +957,12 @@ struct kvm_vcpu_arch {
#define vcpu_set_flag(v, ...) __vcpu_set_flag((v), __VA_ARGS__)
#define vcpu_clear_flag(v, ...) __vcpu_clear_flag((v), __VA_ARGS__)
-/* SVE exposed to guest */
-#define GUEST_HAS_SVE __vcpu_single_flag(cflags, BIT(0))
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(0))
/* SVE config completed */
#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
-/* PTRAUTH exposed to guest */
-#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
-/* KVM_ARM_VCPU_INIT completed */
-#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3))
+/* pKVM VCPU setup completed */
+#define VCPU_PKVM_FINALIZED __vcpu_single_flag(cflags, BIT(2))
/* Exception pending */
#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
@@ -906,29 +998,23 @@ struct kvm_vcpu_arch {
#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
#define EXCEPT_AA64_EL2_SERR __vcpu_except_flags(7)
-/* Guest debug is live */
-#define DEBUG_DIRTY __vcpu_single_flag(iflags, BIT(4))
-/* Save SPE context if active */
-#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
-/* Save TRBE context if active */
-#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
-
-/* SVE enabled for host EL0 */
-#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
-/* SME enabled for EL0 */
-#define HOST_SME_ENABLED __vcpu_single_flag(sflags, BIT(1))
+
/* Physical CPU not in supported_cpus */
-#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(2))
+#define ON_UNSUPPORTED_CPU __vcpu_single_flag(sflags, BIT(0))
/* WFIT instruction trapped */
-#define IN_WFIT __vcpu_single_flag(sflags, BIT(3))
+#define IN_WFIT __vcpu_single_flag(sflags, BIT(1))
/* vcpu system registers loaded on physical CPU */
-#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4))
-/* Software step state is Active-pending */
-#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
+#define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(2))
+/* Software step state is Active-pending for external debug */
+#define HOST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(3))
+/* Software step state is Active pending for guest debug */
+#define GUEST_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(4))
/* PMUSERENR for the guest EL0 is on physical CPU */
-#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
+#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(5))
/* WFI instruction trapped */
-#define IN_WFI __vcpu_single_flag(sflags, BIT(7))
+#define IN_WFI __vcpu_single_flag(sflags, BIT(6))
+/* KVM is currently emulating a nested ERET */
+#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -940,33 +1026,42 @@ struct kvm_vcpu_arch {
#define vcpu_sve_zcr_elx(vcpu) \
(unlikely(is_hyp_ctxt(vcpu)) ? ZCR_EL2 : ZCR_EL1)
-#define vcpu_sve_state_size(vcpu) ({ \
+#define sve_state_size_from_vl(sve_max_vl) ({ \
size_t __size_ret; \
- unsigned int __vcpu_vq; \
+ unsigned int __vq; \
\
- if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \
+ if (WARN_ON(!sve_vl_valid(sve_max_vl))) { \
__size_ret = 0; \
} else { \
- __vcpu_vq = vcpu_sve_max_vq(vcpu); \
- __size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \
+ __vq = sve_vq_from_vl(sve_max_vl); \
+ __size_ret = SVE_SIG_REGS_SIZE(__vq); \
} \
\
__size_ret; \
})
+#define vcpu_sve_state_size(vcpu) sve_state_size_from_vl((vcpu)->arch.sve_max_vl)
+
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
KVM_GUESTDBG_USE_HW | \
KVM_GUESTDBG_SINGLESTEP)
-#define vcpu_has_sve(vcpu) (system_supports_sve() && \
- vcpu_get_flag(vcpu, GUEST_HAS_SVE))
+#define kvm_has_sve(kvm) (system_supports_sve() && \
+ test_bit(KVM_ARCH_FLAG_GUEST_HAS_SVE, &(kvm)->arch.flags))
+
+#ifdef __KVM_NVHE_HYPERVISOR__
+#define vcpu_has_sve(vcpu) kvm_has_sve(kern_hyp_va((vcpu)->kvm))
+#else
+#define vcpu_has_sve(vcpu) kvm_has_sve((vcpu)->kvm)
+#endif
#ifdef CONFIG_ARM64_PTR_AUTH
#define vcpu_has_ptrauth(vcpu) \
((cpus_have_final_cap(ARM64_HAS_ADDRESS_AUTH) || \
cpus_have_final_cap(ARM64_HAS_GENERIC_AUTH)) && \
- vcpu_get_flag(vcpu, GUEST_HAS_PTRAUTH))
+ (vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_ADDRESS) || \
+ vcpu_has_feature(vcpu, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
#else
#define vcpu_has_ptrauth(vcpu) false
#endif
@@ -1012,14 +1107,36 @@ static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64);
+
+#define __vcpu_assign_sys_reg(v, r, val) \
+ do { \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = (val); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ \
+ ctxt_sys_reg(ctxt, (r)) = __v; \
+ } while (0)
+
+#define __vcpu_rmw_sys_reg(v, r, op, val) \
+ do { \
+ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
+ u64 __v = ctxt_sys_reg(ctxt, (r)); \
+ __v op (val); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ \
+ ctxt_sys_reg(ctxt, (r)) = __v; \
+ } while (0)
+
#define __vcpu_sys_reg(v,r) \
- (*({ \
+ ({ \
const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
- u64 *__r = __ctxt_sys_reg(ctxt, (r)); \
+ u64 __v = ctxt_sys_reg(ctxt, (r)); \
if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
- *__r = kvm_vcpu_apply_reg_masks((v), (r), *__r);\
- __r; \
- }))
+ __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \
+ __v; \
+ })
u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg);
void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
@@ -1172,9 +1289,8 @@ void kvm_arm_resume_guest(struct kvm *kvm);
})
/*
- * The couple of isb() below are there to guarantee the same behaviour
- * on VHE as on !VHE, where the eret to EL1 acts as a context
- * synchronization event.
+ * The isb() below is there to guarantee the same behaviour on VHE as on !VHE,
+ * where the eret to EL1 acts as a context synchronization event.
*/
#define kvm_call_hyp(f, ...) \
do { \
@@ -1192,7 +1308,6 @@ void kvm_arm_resume_guest(struct kvm *kvm);
\
if (has_vhe()) { \
ret = f(__VA_ARGS__); \
- isb(); \
} else { \
ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \
} \
@@ -1225,9 +1340,6 @@ int __init populate_sysreg_config(const struct sys_reg_desc *sr,
unsigned int idx);
int __init populate_nv_trap_config(void);
-bool lock_all_vcpus(struct kvm *kvm);
-void unlock_all_vcpus(struct kvm *kvm);
-
void kvm_calculate_traps(struct kvm_vcpu *vcpu);
/* MMIO helpers */
@@ -1262,7 +1374,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
extern unsigned int __ro_after_init kvm_arm_vmid_bits;
int __init kvm_arm_vmid_alloc_init(void);
void __init kvm_arm_vmid_alloc_free(void);
-bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
+void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
void kvm_arm_vmid_clear_active(void);
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
@@ -1307,6 +1419,13 @@ DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data);
&this_cpu_ptr_hyp_sym(kvm_host_data)->f)
#endif
+#define host_data_test_flag(flag) \
+ (test_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags)))
+#define host_data_set_flag(flag) \
+ set_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))
+#define host_data_clear_flag(flag) \
+ clear_bit(KVM_HOST_DATA_FLAG_##flag, host_data_ptr(flags))
+
/* Check whether the FP regs are owned by the guest */
static inline bool guest_owns_fp_regs(void)
{
@@ -1330,17 +1449,22 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
return cpus_have_final_cap(ARM64_SPECTRE_V3A);
}
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-
-void kvm_arm_init_debug(void);
-void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
-void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+void kvm_init_host_debug_data(void);
+void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu);
+void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu);
+void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val);
#define kvm_vcpu_os_lock_enabled(vcpu) \
(!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & OSLSR_EL1_OSLK))
+#define kvm_debug_regs_in_use(vcpu) \
+ ((vcpu)->arch.debug_owner != VCPU_DEBUG_FREE)
+#define kvm_host_owns_debug_regs(vcpu) \
+ ((vcpu)->arch.debug_owner == VCPU_DEBUG_HOST_OWNED)
+#define kvm_guest_owns_debug_regs(vcpu) \
+ ((vcpu)->arch.debug_owner == VCPU_DEBUG_GUEST_OWNED)
+
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
@@ -1356,7 +1480,6 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm,
struct reg_mask_range *range);
/* Guest/host FPSIMD coordination helpers */
-int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
@@ -1367,14 +1490,13 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
return (!has_vhe() && attr->exclude_host);
}
-/* Flags for host debug state */
-void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
-void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
-
#ifdef CONFIG_KVM
void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
void kvm_clr_pmu_events(u64 clr);
bool kvm_set_pmuserenr(u64 val);
+void kvm_enable_trbe(void);
+void kvm_disable_trbe(void);
+void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest);
#else
static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
static inline void kvm_clr_pmu_events(u64 clr) {}
@@ -1382,6 +1504,9 @@ static inline bool kvm_set_pmuserenr(u64 val)
{
return false;
}
+static inline void kvm_enable_trbe(void) {}
+static inline void kvm_disable_trbe(void) {}
+static inline void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest) {}
#endif
void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
@@ -1422,6 +1547,7 @@ static inline bool __vcpu_has_feature(const struct kvm_arch *ka, int feature)
return test_bit(feature, ka->vcpu_features);
}
+#define kvm_vcpu_has_feature(k, f) __vcpu_has_feature(&(k)->arch, (f))
#define vcpu_has_feature(v, f) __vcpu_has_feature(&(v)->kvm->arch, (f))
#define kvm_vcpu_initialized(v) vcpu_get_flag(vcpu, VCPU_INITIALIZED)
@@ -1445,6 +1571,12 @@ static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg)
return &ka->id_regs[IDREG_IDX(reg)];
case SYS_CTR_EL0:
return &ka->ctr_el0;
+ case SYS_MIDR_EL1:
+ return &ka->midr_el1;
+ case SYS_REVIDR_EL1:
+ return &ka->revidr_el1;
+ case SYS_AIDR_EL1:
+ return &ka->aidr_el1;
default:
WARN_ON_ONCE(1);
return NULL;
@@ -1491,12 +1623,16 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
kvm_cmp_feat_signed(kvm, id, fld, op, limit) : \
kvm_cmp_feat_unsigned(kvm, id, fld, op, limit))
-#define kvm_has_feat(kvm, id, fld, limit) \
+#define __kvm_has_feat(kvm, id, fld, limit) \
kvm_cmp_feat(kvm, id, fld, >=, limit)
-#define kvm_has_feat_enum(kvm, id, fld, val) \
+#define kvm_has_feat(kvm, ...) __kvm_has_feat(kvm, __VA_ARGS__)
+
+#define __kvm_has_feat_enum(kvm, id, fld, val) \
kvm_cmp_feat_unsigned(kvm, id, fld, ==, val)
+#define kvm_has_feat_enum(kvm, ...) __kvm_has_feat_enum(kvm, __VA_ARGS__)
+
#define kvm_has_feat_range(kvm, id, fld, min, max) \
(kvm_cmp_feat(kvm, id, fld, >=, min) && \
kvm_cmp_feat(kvm, id, fld, <=, max))
@@ -1529,4 +1665,14 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
#define kvm_has_s1poe(k) \
(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
+static inline bool kvm_arch_has_irq_bypass(void)
+{
+ return true;
+}
+
+void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt);
+void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *res1);
+void check_feature_map(void);
+
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index c838309e4ec4..e6be1f5d0967 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -76,6 +76,8 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
+u64 __gic_v3_get_lr(unsigned int lr);
+
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 66d93e320ec8..b98ac6aa631f 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -139,6 +139,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
#define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
+extern u32 __hyp_va_bits;
+
/*
* We currently support using a VM-specified IPA size. For backward
* compatibility, the default IPA size is fixed to 40bits.
@@ -353,6 +355,22 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
return &kvm->arch.mmu != mmu;
}
+static inline void kvm_fault_lock(struct kvm *kvm)
+{
+ if (is_protected_kvm_enabled())
+ write_lock(&kvm->mmu_lock);
+ else
+ read_lock(&kvm->mmu_lock);
+}
+
+static inline void kvm_fault_unlock(struct kvm *kvm)
+{
+ if (is_protected_kvm_enabled())
+ write_unlock(&kvm->mmu_lock);
+ else
+ read_unlock(&kvm->mmu_lock);
+}
+
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
#else
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 233e65522716..0bd07ea068a1 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -33,14 +33,14 @@ static inline u64 translate_tcr_el2_to_tcr_el1(u64 tcr)
static inline u64 translate_cptr_el2_to_cpacr_el1(u64 cptr_el2)
{
- u64 cpacr_el1 = CPACR_ELx_RES1;
+ u64 cpacr_el1 = CPACR_EL1_RES1;
if (cptr_el2 & CPTR_EL2_TTA)
- cpacr_el1 |= CPACR_ELx_TTA;
+ cpacr_el1 |= CPACR_EL1_TTA;
if (!(cptr_el2 & CPTR_EL2_TFP))
- cpacr_el1 |= CPACR_ELx_FPEN;
+ cpacr_el1 |= CPACR_EL1_FPEN;
if (!(cptr_el2 & CPTR_EL2_TZ))
- cpacr_el1 |= CPACR_ELx_ZEN;
+ cpacr_el1 |= CPACR_EL1_ZEN;
cpacr_el1 |= cptr_el2 & (CPTR_EL2_TCPAC | CPTR_EL2_TAM);
@@ -64,6 +64,7 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
}
extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
+extern bool forward_debug_exception(struct kvm_vcpu *vcpu);
extern void kvm_init_nested(struct kvm *kvm);
extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
@@ -186,7 +187,8 @@ static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr)
return true;
}
-int kvm_init_nv_sysregs(struct kvm *kvm);
+int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu);
+u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val);
#ifdef CONFIG_ARM64_PTR_AUTH
bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
@@ -229,6 +231,38 @@ static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans)
shift; \
})
+static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid)
+{
+ u64 base, tg, num, scale;
+ int shift;
+
+ tg = FIELD_GET(GENMASK(47, 46), val);
+
+ switch(tg) {
+ case 1:
+ shift = 12;
+ break;
+ case 2:
+ shift = 14;
+ break;
+ case 3:
+ default: /* IMPDEF: handle tg==0 as 64k */
+ shift = 16;
+ break;
+ }
+
+ base = (val & GENMASK(36, 0)) << shift;
+
+ if (asid)
+ *asid = FIELD_GET(TLBIR_ASID_MASK, val);
+
+ scale = FIELD_GET(GENMASK(45, 44), val);
+ num = FIELD_GET(GENMASK(43, 39), val);
+ *range = __TLBI_RANGE_PAGES(num, scale) << shift;
+
+ return base;
+}
+
static inline unsigned int ps_to_output_size(unsigned int ps)
{
switch (ps) {
@@ -243,4 +277,72 @@ static inline unsigned int ps_to_output_size(unsigned int ps)
}
}
+enum trans_regime {
+ TR_EL10,
+ TR_EL20,
+ TR_EL2,
+};
+
+struct s1_walk_info {
+ u64 baddr;
+ enum trans_regime regime;
+ unsigned int max_oa_bits;
+ unsigned int pgshift;
+ unsigned int txsz;
+ int sl;
+ bool as_el0;
+ bool hpd;
+ bool e0poe;
+ bool poe;
+ bool pan;
+ bool be;
+ bool s2;
+};
+
+struct s1_walk_result {
+ union {
+ struct {
+ u64 desc;
+ u64 pa;
+ s8 level;
+ u8 APTable;
+ bool nG;
+ u16 asid;
+ bool UXNTable;
+ bool PXNTable;
+ bool uwxn;
+ bool uov;
+ bool ur;
+ bool uw;
+ bool ux;
+ bool pwxn;
+ bool pov;
+ bool pr;
+ bool pw;
+ bool px;
+ };
+ struct {
+ u8 fst;
+ bool ptw;
+ bool s2;
+ };
+ };
+ bool failed;
+};
+
+int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
+ struct s1_walk_result *wr, u64 va);
+
+/* VNCR management */
+int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
+int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);
+void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
+
+#define vncr_fixmap(c) \
+ ({ \
+ u32 __c = (c); \
+ BUG_ON(__c >= NR_CPUS); \
+ (FIX_VNCR - __c); \
+ })
+
#endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index aab04097b505..2888b5d03757 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -59,6 +59,11 @@ typedef u64 kvm_pte_t;
#define KVM_PHYS_INVALID (-1ULL)
+#define KVM_PTE_TYPE BIT(1)
+#define KVM_PTE_TYPE_BLOCK 0
+#define KVM_PTE_TYPE_PAGE 1
+#define KVM_PTE_TYPE_TABLE 1
+
#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
@@ -412,15 +417,20 @@ static inline bool kvm_pgtable_walk_lock_held(void)
* be used instead of block mappings.
*/
struct kvm_pgtable {
- u32 ia_bits;
- s8 start_level;
- kvm_pteref_t pgd;
- struct kvm_pgtable_mm_ops *mm_ops;
-
- /* Stage-2 only */
- struct kvm_s2_mmu *mmu;
- enum kvm_pgtable_stage2_flags flags;
- kvm_pgtable_force_pte_cb_t force_pte_cb;
+ union {
+ struct rb_root_cached pkvm_mappings;
+ struct {
+ u32 ia_bits;
+ s8 start_level;
+ kvm_pteref_t pgd;
+ struct kvm_pgtable_mm_ops *mm_ops;
+
+ /* Stage-2 only */
+ enum kvm_pgtable_stage2_flags flags;
+ kvm_pgtable_force_pte_cb_t force_pte_cb;
+ };
+ };
+ struct kvm_s2_mmu *mmu;
};
/**
@@ -526,8 +536,11 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb);
-#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
- __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
+static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops)
+{
+ return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL);
+}
/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
@@ -669,13 +682,15 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
* kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored.
*
* If there is a valid, leaf page-table entry used to translate @addr, then
* set the access flag in that entry.
*/
-void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
+void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags);
/**
* kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
@@ -705,6 +720,7 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
* @prot: Additional permissions to grant for the mapping.
+ * @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored.
*
@@ -717,7 +733,8 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
- enum kvm_pgtable_prot prot);
+ enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_walk_flags flags);
/**
* kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index cd56acd9a842..ea58282f59bb 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -19,6 +19,32 @@
int pkvm_init_host_vm(struct kvm *kvm);
int pkvm_create_hyp_vm(struct kvm *kvm);
void pkvm_destroy_hyp_vm(struct kvm *kvm);
+int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu);
+
+/*
+ * This functions as an allow-list of protected VM capabilities.
+ * Features not explicitly allowed by this function are denied.
+ */
+static inline bool kvm_pvm_ext_allowed(long ext)
+{
+ switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_ARM_PSCI:
+ case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ case KVM_CAP_MAX_VCPU_ID:
+ case KVM_CAP_MSI_DEVID:
+ case KVM_CAP_ARM_VM_IPA_SIZE:
+ case KVM_CAP_ARM_PMU_V3:
+ case KVM_CAP_ARM_SVE:
+ case KVM_CAP_ARM_PTRAUTH_ADDRESS:
+ case KVM_CAP_ARM_PTRAUTH_GENERIC:
+ return true;
+ default:
+ return false;
+ }
+}
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
@@ -109,6 +135,12 @@ static inline unsigned long host_s2_pgtable_pages(void)
return res;
}
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static inline unsigned long pkvm_selftest_pages(void) { return 32; }
+#else
+static inline unsigned long pkvm_selftest_pages(void) { return 0; }
+#endif
+
#define KVM_FFA_MBOX_NR_PAGES 1
static inline unsigned long hyp_ffa_proxy_pages(void)
@@ -137,4 +169,32 @@ static inline size_t pkvm_host_sve_state_size(void)
SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
}
+struct pkvm_mapping {
+ struct rb_node node;
+ u64 gfn;
+ u64 pfn;
+ u64 nr_pages;
+ u64 __subtree_last; /* Internal member for interval tree */
+};
+
+int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops);
+void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
+int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
+ enum kvm_pgtable_prot prot, void *mc,
+ enum kvm_pgtable_walk_flags flags);
+int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
+int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
+int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
+bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold);
+int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
+ enum kvm_pgtable_walk_flags flags);
+void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
+ enum kvm_pgtable_walk_flags flags);
+int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc);
+void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
+kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
+ enum kvm_pgtable_prot prot, void *mc,
+ bool force_pte);
#endif /* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h
index 87e10d9a635b..9398ade632aa 100644
--- a/arch/arm64/include/asm/kvm_ras.h
+++ b/arch/arm64/include/asm/kvm_ras.h
@@ -14,7 +14,7 @@
* Was this synchronous external abort a RAS notification?
* Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
*/
-static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
+static inline int kvm_handle_guest_sea(void)
{
/* apei_claim_sea(NULL) expects to mask interrupts itself */
lockdep_assert_irqs_enabled();
diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h
index f8f78f622dd2..314b2b52025f 100644
--- a/arch/arm64/include/asm/mem_encrypt.h
+++ b/arch/arm64/include/asm/mem_encrypt.h
@@ -4,6 +4,8 @@
#include <asm/rsi.h>
+struct device;
+
struct arm64_mem_crypt_ops {
int (*encrypt)(unsigned long addr, int numpages);
int (*decrypt)(unsigned long addr, int numpages);
@@ -21,4 +23,15 @@ static inline bool force_dma_unencrypted(struct device *dev)
return is_realm_world();
}
+/*
+ * For Arm CCA guests, canonical addresses are "encrypted", so no changes
+ * required for dma_addr_encrypted().
+ * The unencrypted DMA buffers must be accessed via the unprotected IPA,
+ * "top IPA bit" set.
+ */
+#define dma_addr_unencrypted(x) ((x) | PROT_NS_SHARED)
+
+/* Clear the "top" IPA bit while converting back */
+#define dma_addr_canonical(x) ((x) & ~PROT_NS_SHARED)
+
#endif /* __ASM_MEM_ENCRYPT_H */
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 8b9f33cf561b..717829df294e 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -145,13 +145,16 @@
#define OVERFLOW_STACK_SIZE SZ_4K
+#define NVHE_STACK_SHIFT PAGE_SHIFT
+#define NVHE_STACK_SIZE (UL(1) << NVHE_STACK_SHIFT)
+
/*
* With the minimum frame size of [x29, x30], exactly half the combined
* sizes of the hyp and overflow stacks is the maximum size needed to
* save the unwinded stacktrace; plus an additional entry to delimit the
* end.
*/
-#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
+#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + NVHE_STACK_SIZE) / 2 + sizeof(long))
/*
* Alignment of kernel segments (e.g. .text, .data).
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 2ec96d91acc6..6e8aa8e72601 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -94,23 +94,8 @@ static inline bool kaslr_requires_kpti(void)
return false;
}
- /*
- * Systems affected by Cavium erratum 24756 are incompatible
- * with KPTI.
- */
- if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) {
- extern const struct midr_range cavium_erratum_27456_cpus[];
-
- if (is_midr_in_range_list(read_cpuid_id(),
- cavium_erratum_27456_cpus))
- return false;
- }
-
return true;
}
-#define INIT_MM_CONTEXT(name) \
- .pgd = swapper_pg_dir,
-
#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 48b3d9553b67..0dbe3b29049b 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -271,18 +271,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
}
static inline const struct cpumask *
-task_cpu_possible_mask(struct task_struct *p)
+__task_cpu_possible_mask(struct task_struct *p, const struct cpumask *mask)
{
if (!static_branch_unlikely(&arm64_mismatched_32bit_el0))
- return cpu_possible_mask;
+ return mask;
if (!is_compat_thread(task_thread_info(p)))
- return cpu_possible_mask;
+ return mask;
return system_32bit_el0_cpumask();
}
+
+static inline const struct cpumask *
+task_cpu_possible_mask(struct task_struct *p)
+{
+ return __task_cpu_possible_mask(p, cpu_possible_mask);
+}
#define task_cpu_possible_mask task_cpu_possible_mask
+const struct cpumask *task_cpu_fallback_mask(struct task_struct *p);
+
void verify_cpu_asid_bits(void);
void post_ttbr_update_workaround(void);
diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h
index a975e1a689dd..b721d3134ab6 100644
--- a/arch/arm64/include/asm/mshyperv.h
+++ b/arch/arm64/include/asm/mshyperv.h
@@ -6,9 +6,8 @@
* the ARM64 architecture. See include/asm-generic/mshyperv.h for
* definitions are that architecture independent.
*
- * Definitions that are specified in the Hyper-V Top Level Functional
- * Spec (TLFS) should not go in this file, but should instead go in
- * hyperv-tlfs.h.
+ * Definitions that are derived from Hyper-V code or headers should not go in
+ * this file, but should instead go in the relevant files in include/hyperv.
*
* Copyright (C) 2021, Microsoft, Inc.
*
@@ -20,7 +19,7 @@
#include <linux/types.h>
#include <linux/arm-smccc.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
/*
* Declare calls to get and set Hyper-V VP register values on ARM64, which
@@ -41,6 +40,19 @@ static inline u64 hv_get_msr(unsigned int reg)
return hv_get_vpreg(reg);
}
+/*
+ * Nested is not supported on arm64
+ */
+static inline void hv_set_non_nested_msr(unsigned int reg, u64 value)
+{
+ hv_set_msr(reg, value);
+}
+
+static inline u64 hv_get_non_nested_msr(unsigned int reg)
+{
+ return hv_get_msr(reg);
+}
+
/* SMCCC hypercall parameters */
#define HV_SMCCC_FUNC_NUMBER 1
#define HV_FUNC_ID ARM_SMCCC_CALL_VAL( \
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index e75422864d1b..1b4509d3382c 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -85,24 +85,6 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, p4d_t *p4dp)
__pgd_populate(pgdp, __pa(p4dp), pgdval);
}
-static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- gfp_t gfp = GFP_PGTABLE_USER;
-
- if (mm == &init_mm)
- gfp = GFP_PGTABLE_KERNEL;
- return (p4d_t *)get_zeroed_page(gfp);
-}
-
-static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
-{
- if (!pgtable_l5_enabled())
- return;
- BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
- free_page((unsigned long)p4d);
-}
-
-#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
#else
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t p4dp, pgdval_t prot)
{
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index c78a988cca93..f3b77deedfa2 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -7,40 +7,46 @@
#include <asm/memory.h>
+#define PTDESC_ORDER 3
+
+/* Number of VA bits resolved by a single translation table level */
+#define PTDESC_TABLE_SHIFT (PAGE_SHIFT - PTDESC_ORDER)
+
/*
* Number of page-table levels required to address 'va_bits' wide
* address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
- * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
+ * bits with PTDESC_TABLE_SHIFT bits at each page table level. Hence:
*
- * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
+ * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), PTDESC_TABLE_SHIFT)
*
* where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
*
* We cannot include linux/kernel.h which defines DIV_ROUND_UP here
* due to build issues. So we open code DIV_ROUND_UP here:
*
- * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
+ * ((((va_bits) - PAGE_SHIFT) + PTDESC_TABLE_SHIFT - 1) / PTDESC_TABLE_SHIFT)
*
* which gets simplified as :
*/
-#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
+#define ARM64_HW_PGTABLE_LEVELS(va_bits) \
+ (((va_bits) - PTDESC_ORDER - 1) / PTDESC_TABLE_SHIFT)
/*
* Size mapped by an entry at level n ( -1 <= n <= 3)
- * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
+ * We map PTDESC_TABLE_SHIFT at all translation levels and PAGE_SHIFT bits
* in the final page. The maximum number of translation levels supported by
* the architecture is 5. Hence, starting at level n, we have further
* ((4 - n) - 1) levels of translation excluding the offset within the page.
* So, the total number of bits mapped by an entry at level n is :
*
- * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
+ * ((4 - n) - 1) * PTDESC_TABLE_SHIFT + PAGE_SHIFT
*
* Rearranging it a bit we get :
- * (4 - n) * (PAGE_SHIFT - 3) + 3
+ * (4 - n) * PTDESC_TABLE_SHIFT + PTDESC_ORDER
*/
-#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3)
+#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) (PTDESC_TABLE_SHIFT * (4 - (n)) + PTDESC_ORDER)
-#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_PTE (1 << PTDESC_TABLE_SHIFT)
/*
* PMD_SHIFT determines the size a level 2 page table entry can map.
@@ -49,7 +55,7 @@
#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-#define PTRS_PER_PMD (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_PMD (1 << PTDESC_TABLE_SHIFT)
#endif
/*
@@ -59,14 +65,14 @@
#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK (~(PUD_SIZE-1))
-#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_PUD (1 << PTDESC_TABLE_SHIFT)
#endif
#if CONFIG_PGTABLE_LEVELS > 4
#define P4D_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(0)
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE-1))
-#define PTRS_PER_P4D (1 << (PAGE_SHIFT - 3))
+#define PTRS_PER_P4D (1 << PTDESC_TABLE_SHIFT)
#endif
/*
@@ -97,7 +103,6 @@
* Level -1 descriptor (PGD).
*/
#define PGD_TYPE_TABLE (_AT(pgdval_t, 3) << 0)
-#define PGD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
#define PGD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
#define PGD_TABLE_AF (_AT(pgdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
#define PGD_TABLE_PXN (_AT(pgdval_t, 1) << 59)
@@ -107,7 +112,6 @@
* Level 0 descriptor (P4D).
*/
#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0)
-#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1)
#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0)
#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0)
#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */
@@ -119,7 +123,6 @@
* Level 1 descriptor (PUD).
*/
#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0)
-#define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1)
#define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0)
#define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0)
#define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */
@@ -133,7 +136,6 @@
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
-#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
#define PMD_TABLE_AF (_AT(pmdval_t, 1) << 10) /* Ignored if no FEAT_HAFT */
/*
@@ -162,7 +164,6 @@
#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
-#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
#define PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
#define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
@@ -222,12 +223,6 @@
*/
#define S1_TABLE_AP (_AT(pmdval_t, 3) << 61)
-/*
- * Highest possible physical address supported.
- */
-#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
-#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
-
#define TTBR_CNP_BIT (UL(1) << 0)
/*
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 9f9cf13bbd95..7830d031742e 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -81,6 +81,7 @@ extern unsigned long prot_ns_shared;
#define lpa2_is_enabled() false
#define PTE_MAYBE_SHARED PTE_SHARED
#define PMD_MAYBE_SHARED PMD_SECT_S
+#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
#else
static inline bool __pure lpa2_is_enabled(void)
{
@@ -89,9 +90,15 @@ static inline bool __pure lpa2_is_enabled(void)
#define PTE_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PTE_SHARED)
#define PMD_MAYBE_SHARED (lpa2_is_enabled() ? 0 : PMD_SECT_S)
+#define PHYS_MASK_SHIFT (lpa2_is_enabled() ? CONFIG_ARM64_PA_BITS : 48)
#endif
/*
+ * Highest possible physical address supported.
+ */
+#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
+
+/*
* If we have userspace only BTI we don't want to mark kernel pages
* guarded even if the system does support BTI.
*/
@@ -162,25 +169,25 @@ static inline bool __pure lpa2_is_enabled(void)
#define PAGE_GCS_RO __pgprot(_PAGE_GCS_RO)
#define PIE_E0 ( \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW_O))
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_GCS) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_X_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_RX_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RWX_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW_O))
#define PIE_E1 ( \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_READONLY), PIE_R) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_SHARED), PIE_RW) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \
- PIRx_ELx_PERM(pte_pi_index(_PAGE_KERNEL), PIE_RW))
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_GCS_RO), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_EXECONLY), PIE_NONE_O) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY_EXEC), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED_EXEC), PIE_RW) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_READONLY), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_SHARED), PIE_RW) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_ROX), PIE_RX) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_EXEC), PIE_RWX) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL_RO), PIE_R) | \
+ PIRx_ELx_PERM_PREP(pte_pi_index(_PAGE_KERNEL), PIE_RW))
#endif /* __ASM_PGTABLE_PROT_H */
diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h
index 6d6d4065b0cb..265e8301d7ba 100644
--- a/arch/arm64/include/asm/pgtable-types.h
+++ b/arch/arm64/include/asm/pgtable-types.h
@@ -11,11 +11,19 @@
#include <asm/types.h>
-typedef u64 pteval_t;
-typedef u64 pmdval_t;
-typedef u64 pudval_t;
-typedef u64 p4dval_t;
-typedef u64 pgdval_t;
+/*
+ * Page Table Descriptor
+ *
+ * Generic page table descriptor format from which
+ * all level specific descriptors can be derived.
+ */
+typedef u64 ptdesc_t;
+
+typedef ptdesc_t pteval_t;
+typedef ptdesc_t pmdval_t;
+typedef ptdesc_t pudval_t;
+typedef ptdesc_t p4dval_t;
+typedef ptdesc_t pgdval_t;
/*
* These are used to make use of C type-checking..
@@ -46,7 +54,7 @@ typedef struct { pgdval_t pgd; } pgd_t;
#define pgd_val(x) ((x).pgd)
#define __pgd(x) ((pgd_t) { (x) } )
-typedef struct { pteval_t pgprot; } pgprot_t;
+typedef struct { ptdesc_t pgprot; } pgprot_t;
#define pgprot_val(x) ((x).pgprot)
#define __pgprot(x) ((pgprot_t) { (x) } )
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 6986345b537a..192d86e1cc76 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -40,6 +40,85 @@
#include <linux/sched.h>
#include <linux/page_table_check.h>
+static inline void emit_pte_barriers(void)
+{
+ /*
+ * These barriers are emitted under certain conditions after a pte entry
+ * was modified (see e.g. __set_pte_complete()). The dsb makes the store
+ * visible to the table walker. The isb ensures that any previous
+ * speculative "invalid translation" marker that is in the CPU's
+ * pipeline gets cleared, so that any access to that address after
+ * setting the pte to valid won't cause a spurious fault. If the thread
+ * gets preempted after storing to the pgtable but before emitting these
+ * barriers, __switch_to() emits a dsb which ensure the walker gets to
+ * see the store. There is no guarantee of an isb being issued though.
+ * This is safe because it will still get issued (albeit on a
+ * potentially different CPU) when the thread starts running again,
+ * before any access to the address.
+ */
+ dsb(ishst);
+ isb();
+}
+
+static inline void queue_pte_barriers(void)
+{
+ unsigned long flags;
+
+ if (in_interrupt()) {
+ emit_pte_barriers();
+ return;
+ }
+
+ flags = read_thread_flags();
+
+ if (flags & BIT(TIF_LAZY_MMU)) {
+ /* Avoid the atomic op if already set. */
+ if (!(flags & BIT(TIF_LAZY_MMU_PENDING)))
+ set_thread_flag(TIF_LAZY_MMU_PENDING);
+ } else {
+ emit_pte_barriers();
+ }
+}
+
+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+static inline void arch_enter_lazy_mmu_mode(void)
+{
+ /*
+ * lazy_mmu_mode is not supposed to permit nesting. But in practice this
+ * does happen with CONFIG_DEBUG_PAGEALLOC, where a page allocation
+ * inside a lazy_mmu_mode section (such as zap_pte_range()) will change
+ * permissions on the linear map with apply_to_page_range(), which
+ * re-enters lazy_mmu_mode. So we tolerate nesting in our
+ * implementation. The first call to arch_leave_lazy_mmu_mode() will
+ * flush and clear the flag such that the remainder of the work in the
+ * outer nest behaves as if outside of lazy mmu mode. This is safe and
+ * keeps tracking simple.
+ */
+
+ if (in_interrupt())
+ return;
+
+ set_thread_flag(TIF_LAZY_MMU);
+}
+
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+ if (in_interrupt())
+ return;
+
+ if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING))
+ emit_pte_barriers();
+}
+
+static inline void arch_leave_lazy_mmu_mode(void)
+{
+ if (in_interrupt())
+ return;
+
+ arch_flush_lazy_mmu_mode();
+ clear_thread_flag(TIF_LAZY_MMU);
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
@@ -68,10 +147,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
-/*
- * Macros to convert between a physical address and its placement in a
- * page table entry, taking care of 52-bit addresses.
- */
#ifdef CONFIG_ARM64_PA_BITS_52
static inline phys_addr_t __pte_to_phys(pte_t pte)
{
@@ -84,8 +159,15 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
return (phys | (phys >> PTE_ADDR_HIGH_SHIFT)) & PHYS_TO_PTE_ADDR_MASK;
}
#else
-#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_LOW)
-#define __phys_to_pte_val(phys) (phys)
+static inline phys_addr_t __pte_to_phys(pte_t pte)
+{
+ return pte_val(pte) & PTE_ADDR_LOW;
+}
+
+static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
+{
+ return phys;
+}
#endif
#define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
@@ -273,7 +355,7 @@ static inline pte_t pte_mknoncont(pte_t pte)
return clear_pte_bit(pte, __pgprot(PTE_CONT));
}
-static inline pte_t pte_mkpresent(pte_t pte)
+static inline pte_t pte_mkvalid(pte_t pte)
{
return set_pte_bit(pte, __pgprot(PTE_VALID));
}
@@ -317,18 +399,20 @@ static inline void __set_pte_nosync(pte_t *ptep, pte_t pte)
WRITE_ONCE(*ptep, pte);
}
-static inline void __set_pte(pte_t *ptep, pte_t pte)
+static inline void __set_pte_complete(pte_t pte)
{
- __set_pte_nosync(ptep, pte);
-
/*
* Only if the new pte is valid and kernel, otherwise TLB maintenance
- * or update_mmu_cache() have the necessary barriers.
+ * has the necessary barriers.
*/
- if (pte_valid_not_user(pte)) {
- dsb(ishst);
- isb();
- }
+ if (pte_valid_not_user(pte))
+ queue_pte_barriers();
+}
+
+static inline void __set_pte(pte_t *ptep, pte_t pte)
+{
+ __set_pte_nosync(ptep, pte);
+ __set_pte_complete(pte);
}
static inline pte_t __ptep_get(pte_t *ptep)
@@ -420,23 +504,6 @@ static inline pte_t pte_advance_pfn(pte_t pte, unsigned long nr)
return pfn_pte(pte_pfn(pte) + nr, pte_pgprot(pte));
}
-static inline void __set_ptes(struct mm_struct *mm,
- unsigned long __always_unused addr,
- pte_t *ptep, pte_t pte, unsigned int nr)
-{
- page_table_check_ptes_set(mm, ptep, pte, nr);
- __sync_cache_and_tags(pte, nr);
-
- for (;;) {
- __check_safe_pte_update(mm, ptep, pte);
- __set_pte(ptep, pte);
- if (--nr == 0)
- break;
- ptep++;
- pte = pte_advance_pfn(pte, 1);
- }
-}
-
/*
* Hugetlb definitions.
*/
@@ -483,12 +550,12 @@ static inline pmd_t pte_pmd(pte_t pte)
static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
{
- return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT);
+ return __pgprot((pgprot_val(prot) & ~PUD_TYPE_MASK) | PUD_TYPE_SECT);
}
static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
{
- return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
+ return __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
}
static inline pte_t pte_swp_mkexclusive(pte_t pte)
@@ -496,7 +563,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte)
return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE));
}
-static inline int pte_swp_exclusive(pte_t pte)
+static inline bool pte_swp_exclusive(pte_t pte)
{
return pte_val(pte) & PTE_SWP_EXCLUSIVE;
}
@@ -548,18 +615,6 @@ static inline int pmd_protnone(pmd_t pmd)
#endif
#define pmd_present(pmd) pte_present(pmd_pte(pmd))
-
-/*
- * THP definitions.
- */
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline int pmd_trans_huge(pmd_t pmd)
-{
- return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
#define pmd_valid(pmd) pte_valid(pmd_pte(pmd))
@@ -585,7 +640,18 @@ static inline int pmd_trans_huge(pmd_t pmd)
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
-#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ /*
+ * It's possible that the pmd is present-invalid on entry
+ * and in that case it needs to remain present-invalid on
+ * exit. So ensure the VALID bit does not get modified.
+ */
+ pmdval_t mask = PMD_TYPE_MASK & ~PTE_VALID;
+ pmdval_t val = PMD_TYPE_SECT & ~PTE_VALID;
+
+ return __pmd((pmd_val(pmd) & ~mask) | val);
+}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_devmap(pmd) pte_devmap(pmd_pte(pmd))
@@ -607,24 +673,29 @@ static inline pmd_t pmd_mkspecial(pmd_t pmd)
#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys)
#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#define pud_young(pud) pte_young(pud_pte(pud))
#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
#define pud_write(pud) pte_write(pud_pte(pud))
-#define pud_mkhuge(pud) (__pud(pud_val(pud) & ~PUD_TABLE_BIT))
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+ /*
+ * It's possible that the pud is present-invalid on entry
+ * and in that case it needs to remain present-invalid on
+ * exit. So ensure the VALID bit does not get modified.
+ */
+ pudval_t mask = PUD_TYPE_MASK & ~PTE_VALID;
+ pudval_t val = PUD_TYPE_SECT & ~PTE_VALID;
+
+ return __pud((pud_val(pud) & ~mask) | val);
+}
#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
-#define pud_special(pte) pte_special(pud_pte(pud))
-#define pud_mkspecial(pte) pte_pud(pte_mkspecial(pud_pte(pud)))
-#endif
-
#define pmd_pgprot pmd_pgprot
static inline pgprot_t pmd_pgprot(pmd_t pmd)
{
@@ -641,30 +712,64 @@ static inline pgprot_t pud_pgprot(pud_t pud)
return __pgprot(pud_val(pfn_pud(pfn, __pgprot(0))) ^ pud_val(pud));
}
-static inline void __set_pte_at(struct mm_struct *mm,
- unsigned long __always_unused addr,
- pte_t *ptep, pte_t pte, unsigned int nr)
+static inline void __set_ptes_anysz(struct mm_struct *mm, pte_t *ptep,
+ pte_t pte, unsigned int nr,
+ unsigned long pgsize)
+{
+ unsigned long stride = pgsize >> PAGE_SHIFT;
+
+ switch (pgsize) {
+ case PAGE_SIZE:
+ page_table_check_ptes_set(mm, ptep, pte, nr);
+ break;
+ case PMD_SIZE:
+ page_table_check_pmds_set(mm, (pmd_t *)ptep, pte_pmd(pte), nr);
+ break;
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ page_table_check_puds_set(mm, (pud_t *)ptep, pte_pud(pte), nr);
+ break;
+#endif
+ default:
+ VM_WARN_ON(1);
+ }
+
+ __sync_cache_and_tags(pte, nr * stride);
+
+ for (;;) {
+ __check_safe_pte_update(mm, ptep, pte);
+ __set_pte_nosync(ptep, pte);
+ if (--nr == 0)
+ break;
+ ptep++;
+ pte = pte_advance_pfn(pte, stride);
+ }
+
+ __set_pte_complete(pte);
+}
+
+static inline void __set_ptes(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pte_t *ptep, pte_t pte, unsigned int nr)
{
- __sync_cache_and_tags(pte, nr);
- __check_safe_pte_update(mm, ptep, pte);
- __set_pte(ptep, pte);
+ __set_ptes_anysz(mm, ptep, pte, nr, PAGE_SIZE);
}
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd)
+static inline void __set_pmds(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pmd_t *pmdp, pmd_t pmd, unsigned int nr)
{
- page_table_check_pmd_set(mm, pmdp, pmd);
- return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd),
- PMD_SIZE >> PAGE_SHIFT);
+ __set_ptes_anysz(mm, (pte_t *)pmdp, pmd_pte(pmd), nr, PMD_SIZE);
}
+#define set_pmd_at(mm, addr, pmdp, pmd) __set_pmds(mm, addr, pmdp, pmd, 1)
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
- pud_t *pudp, pud_t pud)
+static inline void __set_puds(struct mm_struct *mm,
+ unsigned long __always_unused addr,
+ pud_t *pudp, pud_t pud, unsigned int nr)
{
- page_table_check_pud_set(mm, pudp, pud);
- return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud),
- PUD_SIZE >> PAGE_SHIFT);
+ __set_ptes_anysz(mm, (pte_t *)pudp, pud_pte(pud), nr, PUD_SIZE);
}
+#define set_pud_at(mm, addr, pudp, pud) __set_puds(mm, addr, pudp, pud, 1)
#define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d))
#define __phys_to_p4d_val(phys) __phys_to_pte_val(phys)
@@ -724,6 +829,17 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
#define pmd_leaf_size(pmd) (pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
#define pte_leaf_size(pte) (pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ /*
+ * If pmd is present-invalid, pmd_table() won't detect it
+ * as a table, so force the valid bit for the comparison.
+ */
+ return pmd_present(pmd) && !pmd_table(__pmd(pmd_val(pmd) | PTE_VALID));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
static inline bool pud_sect(pud_t pud) { return false; }
static inline bool pud_table(pud_t pud) { return true; }
@@ -734,8 +850,6 @@ static inline bool pud_table(pud_t pud) { return true; }
PUD_TYPE_TABLE)
#endif
-extern pgd_t init_pg_dir[];
-extern pgd_t init_pg_end[];
extern pgd_t swapper_pg_dir[];
extern pgd_t idmap_pg_dir[];
extern pgd_t tramp_pg_dir[];
@@ -760,10 +874,8 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
WRITE_ONCE(*pmdp, pmd);
- if (pmd_valid(pmd)) {
- dsb(ishst);
- isb();
- }
+ if (pmd_valid(pmd))
+ queue_pte_barriers();
}
static inline void pmd_clear(pmd_t *pmdp)
@@ -793,19 +905,14 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
/* use ONLY for statically allocated translation tables */
#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
-/*
- * Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- */
-#define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot)
-
#if CONFIG_PGTABLE_LEVELS > 2
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!pud_table(pud))
+#define pud_bad(pud) ((pud_val(pud) & PUD_TYPE_MASK) != \
+ PUD_TYPE_TABLE)
#define pud_present(pud) pte_present(pud_pte(pud))
#ifndef __PAGETABLE_PMD_FOLDED
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
@@ -827,10 +934,8 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
WRITE_ONCE(*pudp, pud);
- if (pud_valid(pud)) {
- dsb(ishst);
- isb();
- }
+ if (pud_valid(pud))
+ queue_pte_barriers();
}
static inline void pud_clear(pud_t *pudp)
@@ -896,7 +1001,9 @@ static inline bool mm_pud_folded(const struct mm_struct *mm)
pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
#define p4d_none(p4d) (pgtable_l4_enabled() && !p4d_val(p4d))
-#define p4d_bad(p4d) (pgtable_l4_enabled() && !(p4d_val(p4d) & 2))
+#define p4d_bad(p4d) (pgtable_l4_enabled() && \
+ ((p4d_val(p4d) & P4D_TYPE_MASK) != \
+ P4D_TYPE_TABLE))
#define p4d_present(p4d) (!p4d_none(p4d))
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
@@ -907,8 +1014,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
}
WRITE_ONCE(*p4dp, p4d);
- dsb(ishst);
- isb();
+ queue_pte_barriers();
}
static inline void p4d_clear(p4d_t *p4dp)
@@ -1023,7 +1129,9 @@ static inline bool mm_p4d_folded(const struct mm_struct *mm)
pr_err("%s:%d: bad p4d %016llx.\n", __FILE__, __LINE__, p4d_val(e))
#define pgd_none(pgd) (pgtable_l5_enabled() && !pgd_val(pgd))
-#define pgd_bad(pgd) (pgtable_l5_enabled() && !(pgd_val(pgd) & 2))
+#define pgd_bad(pgd) (pgtable_l5_enabled() && \
+ ((pgd_val(pgd) & PGD_TYPE_MASK) != \
+ PGD_TYPE_TABLE))
#define pgd_present(pgd) (!pgd_none(pgd))
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -1034,8 +1142,7 @@ static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
}
WRITE_ONCE(*pgdp, pgd);
- dsb(ishst);
- isb();
+ queue_pte_barriers();
}
static inline void pgd_clear(pgd_t *pgdp)
@@ -1276,16 +1383,37 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
-static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
- unsigned long address, pte_t *ptep)
+static inline pte_t __ptep_get_and_clear_anysz(struct mm_struct *mm,
+ pte_t *ptep,
+ unsigned long pgsize)
{
pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0));
- page_table_check_pte_clear(mm, pte);
+ switch (pgsize) {
+ case PAGE_SIZE:
+ page_table_check_pte_clear(mm, pte);
+ break;
+ case PMD_SIZE:
+ page_table_check_pmd_clear(mm, pte_pmd(pte));
+ break;
+#ifndef __PAGETABLE_PMD_FOLDED
+ case PUD_SIZE:
+ page_table_check_pud_clear(mm, pte_pud(pte));
+ break;
+#endif
+ default:
+ VM_WARN_ON(1);
+ }
return pte;
}
+static inline pte_t __ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ return __ptep_get_and_clear_anysz(mm, ptep, PAGE_SIZE);
+}
+
static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned int nr, int full)
{
@@ -1322,11 +1450,7 @@ static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm,
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- pmd_t pmd = __pmd(xchg_relaxed(&pmd_val(*pmdp), 0));
-
- page_table_check_pmd_clear(mm, pmd);
-
- return pmd;
+ return pte_pmd(__ptep_get_and_clear_anysz(mm, (pte_t *)pmdp, PMD_SIZE));
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -1345,7 +1469,7 @@ static inline void ___ptep_set_wrprotect(struct mm_struct *mm,
}
/*
- * __ptep_set_wrprotect - mark read-only while trasferring potential hardware
+ * __ptep_set_wrprotect - mark read-only while transferring potential hardware
* dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
*/
static inline void __ptep_set_wrprotect(struct mm_struct *mm,
diff --git a/arch/arm64/include/asm/por.h b/arch/arm64/include/asm/por.h
index e06e9f473675..d913d5b529e4 100644
--- a/arch/arm64/include/asm/por.h
+++ b/arch/arm64/include/asm/por.h
@@ -6,26 +6,27 @@
#ifndef _ASM_ARM64_POR_H
#define _ASM_ARM64_POR_H
-#define POR_BITS_PER_PKEY 4
-#define POR_ELx_IDX(por_elx, idx) (((por_elx) >> ((idx) * POR_BITS_PER_PKEY)) & 0xf)
+#include <asm/sysreg.h>
+
+#define POR_EL0_INIT POR_ELx_PERM_PREP(0, POE_RWX)
static inline bool por_elx_allows_read(u64 por, u8 pkey)
{
- u8 perm = POR_ELx_IDX(por, pkey);
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
return perm & POE_R;
}
static inline bool por_elx_allows_write(u64 por, u8 pkey)
{
- u8 perm = POR_ELx_IDX(por, pkey);
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
return perm & POE_W;
}
static inline bool por_elx_allows_exec(u64 por, u8 pkey)
{
- u8 perm = POR_ELx_IDX(por, pkey);
+ u8 perm = POR_ELx_PERM_GET(pkey, por);
return perm & POE_X;
}
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 6cf4aae05219..fded5358641f 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -7,7 +7,7 @@
#include <linux/ptdump.h>
-#ifdef CONFIG_PTDUMP_CORE
+#ifdef CONFIG_PTDUMP
#include <linux/mm_types.h>
#include <linux/seq_file.h>
@@ -24,8 +24,8 @@ struct ptdump_info {
};
struct ptdump_prot_bits {
- u64 mask;
- u64 val;
+ ptdesc_t mask;
+ ptdesc_t val;
const char *set;
const char *clear;
};
@@ -34,7 +34,7 @@ struct ptdump_pg_level {
const struct ptdump_prot_bits *bits;
char name[4];
int num;
- u64 mask;
+ ptdesc_t mask;
};
/*
@@ -51,7 +51,7 @@ struct ptdump_pg_state {
const struct mm_struct *mm;
unsigned long start_address;
int level;
- u64 current_prot;
+ ptdesc_t current_prot;
bool check_wx;
unsigned long wx_pages;
unsigned long uxn_pages;
@@ -59,7 +59,13 @@ struct ptdump_pg_state {
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
- u64 val);
+ pteval_t val);
+void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte);
+void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd);
+void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud);
+void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d);
+void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd);
+void note_page_flush(struct ptdump_state *st);
#ifdef CONFIG_PTDUMP_DEBUGFS
#define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64
void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
@@ -69,7 +75,13 @@ static inline void ptdump_debugfs_register(struct ptdump_info *info,
#endif /* CONFIG_PTDUMP_DEBUGFS */
#else
static inline void note_page(struct ptdump_state *pt_st, unsigned long addr,
- int level, u64 val) { }
-#endif /* CONFIG_PTDUMP_CORE */
+ int level, pteval_t val) { }
+static inline void note_page_pte(struct ptdump_state *st, unsigned long addr, pte_t pte) { }
+static inline void note_page_pmd(struct ptdump_state *st, unsigned long addr, pmd_t pmd) { }
+static inline void note_page_pud(struct ptdump_state *st, unsigned long addr, pud_t pud) { }
+static inline void note_page_p4d(struct ptdump_state *st, unsigned long addr, p4d_t p4d) { }
+static inline void note_page_pgd(struct ptdump_state *st, unsigned long addr, pgd_t pgd) { }
+static inline void note_page_flush(struct ptdump_state *st) { }
+#endif /* CONFIG_PTDUMP */
#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/include/asm/rqspinlock.h b/arch/arm64/include/asm/rqspinlock.h
new file mode 100644
index 000000000000..9ea0a74e5892
--- /dev/null
+++ b/arch/arm64/include/asm/rqspinlock.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RQSPINLOCK_H
+#define _ASM_RQSPINLOCK_H
+
+#include <asm/barrier.h>
+
+/*
+ * Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom
+ * version based on [0]. In rqspinlock code, our conditional expression involves
+ * checking the value _and_ additionally a timeout. However, on arm64, the
+ * WFE-based implementation may never spin again if no stores occur to the
+ * locked byte in the lock word. As such, we may be stuck forever if
+ * event-stream based unblocking is not available on the platform for WFE spin
+ * loops (arch_timer_evtstrm_available).
+ *
+ * Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this
+ * copy-paste.
+ *
+ * While we rely on the implementation to amortize the cost of sampling
+ * cond_expr for us, it will not happen when event stream support is
+ * unavailable, time_expr check is amortized. This is not the common case, and
+ * it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns
+ * comparison, hence just let it be. In case of event-stream, the loop is woken
+ * up at microsecond granularity.
+ *
+ * [0]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com
+ */
+
+#ifndef smp_cond_load_acquire_timewait
+
+#define smp_cond_time_check_count 200
+
+#define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns, \
+ time_limit_ns) ({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ unsigned int __count = 0; \
+ for (;;) { \
+ VAL = READ_ONCE(*__PTR); \
+ if (cond_expr) \
+ break; \
+ cpu_relax(); \
+ if (__count++ < smp_cond_time_check_count) \
+ continue; \
+ if ((time_expr_ns) >= (time_limit_ns)) \
+ break; \
+ __count = 0; \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
+#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, time_limit_ns) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ if ((time_expr_ns) >= (time_limit_ns)) \
+ break; \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
+#define smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, time_limit_ns) \
+({ \
+ __unqual_scalar_typeof(*ptr) _val; \
+ int __wfe = arch_timer_evtstrm_available(); \
+ \
+ if (likely(__wfe)) { \
+ _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, \
+ time_limit_ns); \
+ } else { \
+ _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
+ time_expr_ns, \
+ time_limit_ns); \
+ smp_acquire__after_ctrl_dep(); \
+ } \
+ (typeof(*ptr))_val; \
+})
+
+#endif
+
+#define res_smp_cond_load_acquire(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
+
+#include <asm-generic/rqspinlock.h>
+
+#endif /* _ASM_RQSPINLOCK_H */
diff --git a/arch/arm64/include/asm/rsi.h b/arch/arm64/include/asm/rsi.h
index 188cbb9b23f5..b42aeac05340 100644
--- a/arch/arm64/include/asm/rsi.h
+++ b/arch/arm64/include/asm/rsi.h
@@ -10,6 +10,8 @@
#include <linux/jump_label.h>
#include <asm/rsi_cmds.h>
+#define RSI_PDEV_NAME "arm-cca-dev"
+
DECLARE_STATIC_KEY_FALSE(rsi_present);
void __init arm64_rsi_init(void);
diff --git a/arch/arm64/include/asm/rsi_cmds.h b/arch/arm64/include/asm/rsi_cmds.h
index e6a211001bd3..2c8763876dfb 100644
--- a/arch/arm64/include/asm/rsi_cmds.h
+++ b/arch/arm64/include/asm/rsi_cmds.h
@@ -7,6 +7,8 @@
#define __ASM_RSI_CMDS_H
#include <linux/arm-smccc.h>
+#include <linux/string.h>
+#include <asm/memory.h>
#include <asm/rsi_smc.h>
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h
index 56f7b1d4d54b..97d9256d33c9 100644
--- a/arch/arm64/include/asm/rwonce.h
+++ b/arch/arm64/include/asm/rwonce.h
@@ -12,16 +12,12 @@
#ifndef BUILD_VDSO
-#ifdef CONFIG_AS_HAS_LDAPR
#define __LOAD_RCPC(sfx, regs...) \
ALTERNATIVE( \
"ldar" #sfx "\t" #regs, \
".arch_extension rcpc\n" \
"ldapr" #sfx "\t" #regs, \
ARM64_HAS_LDAPR)
-#else
-#define __LOAD_RCPC(sfx, regs...) "ldar" #sfx "\t" #regs
-#endif /* CONFIG_AS_HAS_LDAPR */
/*
* When building with LTO, there is an increased risk of the compiler
diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h
index b83975555314..bf6bf40bc5ab 100644
--- a/arch/arm64/include/asm/seccomp.h
+++ b/arch/arm64/include/asm/seccomp.h
@@ -23,7 +23,6 @@
#define SECCOMP_ARCH_NATIVE_NR NR_syscalls
#define SECCOMP_ARCH_NATIVE_NAME "aarch64"
#ifdef CONFIG_COMPAT
-#include <asm/unistd_compat_32.h>
# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_ARM
# define SECCOMP_ARCH_COMPAT_NR __NR_compat32_syscalls
# define SECCOMP_ARCH_COMPAT_NAME "arm"
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 40971ac1303f..51b0d594239e 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[];
extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
extern char __hyp_text_start[], __hyp_text_end[];
+extern char __hyp_data_start[], __hyp_data_end[];
extern char __hyp_rodata_start[], __hyp_rodata_end[];
extern char __hyp_reloc_begin[], __hyp_reloc_end[];
extern char __hyp_bss_start[], __hyp_bss_end[];
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index 8a8acc220371..84783efdc9d1 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -5,7 +5,10 @@
#ifndef __ASM_SPARSEMEM_H
#define __ASM_SPARSEMEM_H
-#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
+#include <asm/pgtable-prot.h>
+
+#define MAX_PHYSMEM_BITS PHYS_MASK_SHIFT
+#define MAX_POSSIBLE_PHYSMEM_BITS (52)
/*
* Section size must be at least 512MB for 64K base
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index 0c4d9045c31f..8fef12626090 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -97,7 +97,9 @@ enum mitigation_state arm64_get_meltdown_state(void);
enum mitigation_state arm64_get_spectre_bhb_state(void);
bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
-u8 spectre_bhb_loop_affected(int scope);
+extern bool __nospectre_bhb;
+u8 get_spectre_bhb_loop_value(void);
+bool is_spectre_bhb_fw_mitigated(void);
void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
bool try_emulate_el1_ssbs(struct pt_regs *regs, u32 instr);
diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h
index 44759281d0d4..171f9edef49f 100644
--- a/arch/arm64/include/asm/stacktrace/nvhe.h
+++ b/arch/arm64/include/asm/stacktrace/nvhe.h
@@ -47,7 +47,7 @@ static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
-DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_base);
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index ab8e14b96f68..712daa90e643 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -61,6 +61,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
regs->regs[0] = val;
}
+static inline void syscall_set_nr(struct task_struct *task,
+ struct pt_regs *regs,
+ int nr)
+{
+ regs->syscallno = nr;
+ if (nr == -1) {
+ /*
+ * When the syscall number is set to -1, the syscall will be
+ * skipped. In this case the syscall return value has to be
+ * set explicitly, otherwise the first syscall argument is
+ * returned as the syscall return value.
+ */
+ syscall_set_return_value(task, regs, -ENOSYS, 0);
+ }
+}
+
#define SYSCALL_MAX_ARGS 6
static inline void syscall_get_arguments(struct task_struct *task,
@@ -73,6 +89,19 @@ static inline void syscall_get_arguments(struct task_struct *task,
memcpy(args, &regs->regs[1], 5 * sizeof(args[0]));
}
+static inline void syscall_set_arguments(struct task_struct *task,
+ struct pt_regs *regs,
+ const unsigned long *args)
+{
+ memcpy(&regs->regs[0], args, 6 * sizeof(args[0]));
+ /*
+ * Also copy the first argument into orig_x0
+ * so that syscall_get_arguments() would return it
+ * instead of the previous value.
+ */
+ regs->orig_x0 = regs->regs[0];
+}
+
/*
* We don't care about endianness (__AUDIT_ARCH_LE bit) here because
* AArch64 has the same system calls both on little- and big- endian.
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index b8303a83c0bf..f1bb0d10c39a 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -12,6 +12,7 @@
#include <linux/bits.h>
#include <linux/stringify.h>
#include <linux/kasan-tags.h>
+#include <linux/kconfig.h>
#include <asm/gpr-num.h>
@@ -117,6 +118,7 @@
#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
+/* Data cache zero operations */
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
@@ -153,11 +155,13 @@
#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
-/* Data cache zero operations */
#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
+#define SYS_DC_CIVAPS sys_insn(1, 0, 7, 15, 1)
+#define SYS_DC_CIGDVAPS sys_insn(1, 0, 7, 15, 5)
+
/*
* Automatically generated definitions for system registers, the
* manual encodings below are in the process of being converted to
@@ -283,8 +287,6 @@
#define SYS_RGSR_EL1 sys_reg(3, 0, 1, 0, 5)
#define SYS_GCR_EL1 sys_reg(3, 0, 1, 0, 6)
-#define SYS_TRFCR_EL1 sys_reg(3, 0, 1, 2, 1)
-
#define SYS_TCR_EL1 sys_reg(3, 0, 2, 0, 2)
#define SYS_APIAKEYLO_EL1 sys_reg(3, 0, 2, 1, 0)
@@ -477,6 +479,7 @@
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
+#define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2)
#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
@@ -484,23 +487,36 @@
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
+#define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0)
#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2)
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
+#define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
+#define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0)
#define __PMEV_op2(n) ((n) & 0x7)
#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))
+#define SYS_PMEVCNTSVRn_EL1(n) sys_reg(2, 0, 14, __CNTR_CRm(n), __PMEV_op2(n))
#define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n))
#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3))
#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n))
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
+#define SYS_SPMCGCRn_EL1(n) sys_reg(2, 0, 9, 13, ((n) & 1))
+
+#define __SPMEV_op2(n) ((n) & 0x7)
+#define __SPMEV_crm(p, n) ((((p) & 7) << 1) | (((n) >> 3) & 1))
+#define SYS_SPMEVCNTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b000, n), __SPMEV_op2(n))
+#define SYS_SPMEVFILT2Rn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b011, n), __SPMEV_op2(n))
+#define SYS_SPMEVFILTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b010, n), __SPMEV_op2(n))
+#define SYS_SPMEVTYPERn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b001, n), __SPMEV_op2(n))
+
#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0)
#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5)
@@ -519,8 +535,6 @@
#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0)
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
-#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
-#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
@@ -561,9 +575,6 @@
#define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4)
#define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5)
-#define SYS_ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0)
-#define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1)
-#define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2)
#define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3)
#define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5)
#define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7)
@@ -610,28 +621,18 @@
/* VHE encodings for architectural EL0/1 system registers */
#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0)
-#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
-#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
-#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3)
-#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
-#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1)
-#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
-#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
-#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3)
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
-#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
-#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
@@ -983,20 +984,7 @@
/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
#define SYS_MPIDR_SAFE_VAL (BIT(31))
-#define TRFCR_ELx_TS_SHIFT 5
-#define TRFCR_ELx_TS_MASK ((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_VIRTUAL ((0x1UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_GUEST_PHYSICAL ((0x2UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_PHYSICAL ((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_EL2_CX BIT(3)
-#define TRFCR_ELx_ExTRE BIT(1)
-#define TRFCR_ELx_E0TRE BIT(0)
-
/* GIC Hypervisor interface registers */
-/* ICH_MISR_EL2 bit definitions */
-#define ICH_MISR_EOI (1 << 0)
-#define ICH_MISR_U (1 << 1)
-
/* ICH_LR*_EL2 bit definitions */
#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1)
@@ -1011,17 +999,6 @@
#define ICH_LR_PRIORITY_SHIFT 48
#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT)
-/* ICH_HCR_EL2 bit definitions */
-#define ICH_HCR_EN (1 << 0)
-#define ICH_HCR_UIE (1 << 1)
-#define ICH_HCR_NPIE (1 << 3)
-#define ICH_HCR_TC (1 << 10)
-#define ICH_HCR_TALL0 (1 << 11)
-#define ICH_HCR_TALL1 (1 << 12)
-#define ICH_HCR_TDIR (1 << 14)
-#define ICH_HCR_EOIcount_SHIFT 27
-#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT)
-
/* ICH_VMCR_EL2 bit definitions */
#define ICH_VMCR_ACK_CTL_SHIFT 2
#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT)
@@ -1042,18 +1019,6 @@
#define ICH_VMCR_ENG1_SHIFT 1
#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT)
-/* ICH_VTR_EL2 bit definitions */
-#define ICH_VTR_PRI_BITS_SHIFT 29
-#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT)
-#define ICH_VTR_ID_BITS_SHIFT 23
-#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT)
-#define ICH_VTR_SEIS_SHIFT 22
-#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT)
-#define ICH_VTR_A3V_SHIFT 21
-#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT)
-#define ICH_VTR_TDS_SHIFT 19
-#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT)
-
/*
* Permission Indirection Extension (PIE) permission encodings.
* Encodings with the _O suffix, have overlays applied (Permission Overlay Extension).
@@ -1070,8 +1035,11 @@
#define PIE_RX UL(0xa)
#define PIE_RW UL(0xc)
#define PIE_RWX UL(0xe)
+#define PIE_MASK UL(0xf)
-#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+#define PIRx_ELx_BITS_PER_IDX 4
+#define PIRx_ELx_PERM_SHIFT(idx) ((idx) * PIRx_ELx_BITS_PER_IDX)
+#define PIRx_ELx_PERM_PREP(idx, perm) (((perm) & PIE_MASK) << PIRx_ELx_PERM_SHIFT(idx))
/*
* Permission Overlay Extension (POE) permission encodings.
@@ -1082,12 +1050,14 @@
#define POE_RX UL(0x3)
#define POE_W UL(0x4)
#define POE_RW UL(0x5)
-#define POE_XW UL(0x6)
-#define POE_RXW UL(0x7)
+#define POE_WX UL(0x6)
+#define POE_RWX UL(0x7)
#define POE_MASK UL(0xf)
-/* Initial value for Permission Overlay Extension for EL0 */
-#define POR_EL0_INIT POE_RXW
+#define POR_ELx_BITS_PER_IDX 4
+#define POR_ELx_PERM_SHIFT(idx) ((idx) * POR_ELx_BITS_PER_IDX)
+#define POR_ELx_PERM_GET(idx, reg) (((reg) >> POR_ELx_PERM_SHIFT(idx)) & POE_MASK)
+#define POR_ELx_PERM_PREP(idx, perm) (((perm) & POE_MASK) << POR_ELx_PERM_SHIFT(idx))
/*
* Definitions for Guarded Control Stack
@@ -1124,6 +1094,15 @@
__emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt))
.endm
+ .macro msr_hcr_el2, reg
+#if IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23)
+ dsb nsh
+ msr hcr_el2, \reg
+ isb
+#else
+ msr hcr_el2, \reg
+#endif
+ .endm
#else
#include <linux/bitfield.h>
@@ -1211,6 +1190,13 @@
write_sysreg(__scs_new, sysreg); \
} while (0)
+#define sysreg_clear_set_hcr(clear, set) do { \
+ u64 __scs_val = read_sysreg(hcr_el2); \
+ u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \
+ if (__scs_new != __scs_val) \
+ write_sysreg_hcr(__scs_new); \
+} while (0)
+
#define sysreg_clear_set_s(sysreg, clear, set) do { \
u64 __scs_val = read_sysreg_s(sysreg); \
u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \
@@ -1218,6 +1204,17 @@
write_sysreg_s(__scs_new, sysreg); \
} while (0)
+#define write_sysreg_hcr(__val) do { \
+ if (IS_ENABLED(CONFIG_AMPERE_ERRATUM_AC04_CPU_23) && \
+ (!system_capabilities_finalized() || \
+ alternative_has_cap_unlikely(ARM64_WORKAROUND_AMPERE_AC04_CPU_23))) \
+ asm volatile("dsb nsh; msr hcr_el2, %x0; isb" \
+ : : "rZ" (__val)); \
+ else \
+ asm volatile("msr hcr_el2, %x0" \
+ : : "rZ" (__val)); \
+} while (0)
+
#define read_sysreg_par() ({ \
u64 par; \
asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 1114c1c3300a..1269c2487574 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -59,11 +59,12 @@ void arch_setup_new_exec(void);
#define TIF_SIGPENDING 0 /* signal pending */
#define TIF_NEED_RESCHED 1 /* rescheduling necessary */
-#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
-#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
-#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
-#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */
-#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */
+#define TIF_NEED_RESCHED_LAZY 2 /* Lazy rescheduling needed */
+#define TIF_NOTIFY_RESUME 3 /* callback before returning to user */
+#define TIF_FOREIGN_FPSTATE 4 /* CPU's FP state is not current's */
+#define TIF_UPROBE 5 /* uprobe breakpoint or singlestep */
+#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */
+#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */
#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
@@ -82,9 +83,12 @@ void arch_setup_new_exec(void);
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
#define TIF_TSC_SIGSEGV 30 /* SIGSEGV on counter-timer access */
+#define TIF_LAZY_MMU 31 /* Task in lazy mmu mode */
+#define TIF_LAZY_MMU_PENDING 32 /* Ops pending for lazy mmu mode exit */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
@@ -100,10 +104,10 @@ void arch_setup_new_exec(void);
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_TSC_SIGSEGV (1 << TIF_TSC_SIGSEGV)
-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
_TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
_TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \
- _TIF_NOTIFY_SIGNAL)
+ _TIF_NOTIFY_SIGNAL | _TIF_SIGPENDING)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index a947c6e784ed..8d762607285c 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -9,12 +9,7 @@
#define __ASM_TLB_H
#include <linux/pagemap.h>
-#include <linux/swap.h>
-static inline void __tlb_remove_table(void *_table)
-{
- free_page_and_swap_cache((struct page *)_table);
-}
#define tlb_flush tlb_flush
static void tlb_flush(struct mmu_gather *tlb);
@@ -82,7 +77,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
{
struct ptdesc *ptdesc = page_ptdesc(pte);
- pagetable_pte_dtor(ptdesc);
tlb_remove_ptdesc(tlb, ptdesc);
}
@@ -92,7 +86,6 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
{
struct ptdesc *ptdesc = virt_to_ptdesc(pmdp);
- pagetable_pmd_dtor(ptdesc);
tlb_remove_ptdesc(tlb, ptdesc);
}
#endif
@@ -106,7 +99,19 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
if (!pgtable_l4_enabled())
return;
- pagetable_pud_dtor(ptdesc);
+ tlb_remove_ptdesc(tlb, ptdesc);
+}
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 4
+static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4dp,
+ unsigned long addr)
+{
+ struct ptdesc *ptdesc = virt_to_ptdesc(p4dp);
+
+ if (!pgtable_l5_enabled())
+ return;
+
tlb_remove_ptdesc(tlb, ptdesc);
}
#endif
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index bc94e036a26b..aa9efee17277 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -322,21 +322,15 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
return true;
}
-static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
- struct mm_struct *mm,
- unsigned long uaddr)
-{
- __flush_tlb_page_nosync(mm, uaddr);
-}
-
/*
- * If mprotect/munmap/etc occurs during TLB batched flushing, we need to
- * synchronise all the TLBI issued with a DSB to avoid the race mentioned in
- * flush_tlb_batched_pending().
+ * If mprotect/munmap/etc occurs during TLB batched flushing, we need to ensure
+ * all the previously issued TLBIs targeting mm have completed. But since we
+ * can be executing on a remote CPU, a DSB cannot guarantee this like it can
+ * for arch_tlbbatch_flush(). Our only option is to flush the entire mm.
*/
static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
{
- dsb(ish);
+ flush_tlb_mm(mm);
}
/*
@@ -396,33 +390,35 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
#define __flush_tlb_range_op(op, start, pages, stride, \
asid, tlb_level, tlbi_user, lpa2) \
do { \
+ typeof(start) __flush_start = start; \
+ typeof(pages) __flush_pages = pages; \
int num = 0; \
int scale = 3; \
int shift = lpa2 ? 16 : PAGE_SHIFT; \
unsigned long addr; \
\
- while (pages > 0) { \
+ while (__flush_pages > 0) { \
if (!system_supports_tlb_range() || \
- pages == 1 || \
- (lpa2 && start != ALIGN(start, SZ_64K))) { \
- addr = __TLBI_VADDR(start, asid); \
+ __flush_pages == 1 || \
+ (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) { \
+ addr = __TLBI_VADDR(__flush_start, asid); \
__tlbi_level(op, addr, tlb_level); \
if (tlbi_user) \
__tlbi_user_level(op, addr, tlb_level); \
- start += stride; \
- pages -= stride >> PAGE_SHIFT; \
+ __flush_start += stride; \
+ __flush_pages -= stride >> PAGE_SHIFT; \
continue; \
} \
\
- num = __TLBI_RANGE_NUM(pages, scale); \
+ num = __TLBI_RANGE_NUM(__flush_pages, scale); \
if (num >= 0) { \
- addr = __TLBI_VADDR_RANGE(start >> shift, asid, \
+ addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \
scale, num, tlb_level); \
__tlbi(r##op, addr); \
if (tlbi_user) \
__tlbi_user(r##op, addr); \
- start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
- pages -= __TLBI_RANGE_PAGES(num, scale); \
+ __flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+ __flush_pages -= __TLBI_RANGE_PAGES(num, scale);\
} \
scale--; \
} \
@@ -448,7 +444,7 @@ static inline bool __flush_tlb_range_limit_excess(unsigned long start,
return false;
}
-static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
+static inline void __flush_tlb_range_nosync(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned long stride, bool last_level,
int tlb_level)
@@ -460,12 +456,12 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
pages = (end - start) >> PAGE_SHIFT;
if (__flush_tlb_range_limit_excess(start, end, pages, stride)) {
- flush_tlb_mm(vma->vm_mm);
+ flush_tlb_mm(mm);
return;
}
dsb(ishst);
- asid = ASID(vma->vm_mm);
+ asid = ASID(mm);
if (last_level)
__flush_tlb_range_op(vale1is, start, pages, stride, asid,
@@ -474,7 +470,7 @@ static inline void __flush_tlb_range_nosync(struct vm_area_struct *vma,
__flush_tlb_range_op(vae1is, start, pages, stride, asid,
tlb_level, true, lpa2_is_enabled());
- mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
}
static inline void __flush_tlb_range(struct vm_area_struct *vma,
@@ -482,7 +478,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long stride, bool last_level,
int tlb_level)
{
- __flush_tlb_range_nosync(vma, start, end, stride,
+ __flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
last_level, tlb_level);
dsb(ish);
}
@@ -533,6 +529,12 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
dsb(ish);
isb();
}
+
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+ struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3);
+}
#endif
#endif
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index d780d1bd2eac..82cf1f879c61 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -109,10 +109,9 @@ static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned lon
int dstreg = ESR_ELx_MOPS_ISS_DESTREG(esr);
int srcreg = ESR_ELx_MOPS_ISS_SRCREG(esr);
int sizereg = ESR_ELx_MOPS_ISS_SIZEREG(esr);
- unsigned long dst, src, size;
+ unsigned long dst, size;
dst = regs->regs[dstreg];
- src = regs->regs[srcreg];
size = regs->regs[sizereg];
/*
@@ -129,6 +128,7 @@ static inline void arm64_mops_reset_regs(struct user_pt_regs *regs, unsigned lon
}
} else {
/* CPY* instruction */
+ unsigned long src = regs->regs[srcreg];
if (!(option_a ^ wrong_option)) {
/* Format is from Option B */
if (regs->pstate & PSR_N_BIT) {
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
index 3e3c3fdb1842..61679070f595 100644
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -5,7 +5,7 @@
#ifndef __ASM_VDSO_H
#define __ASM_VDSO_H
-#define __VVAR_PAGES 2
+#define __VDSO_PAGES 4
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
index 778c1202bbbf..d60ea7a72a9c 100644
--- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h
@@ -104,7 +104,7 @@ int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
}
static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
- const struct vdso_data *vd)
+ const struct vdso_time_data *vd)
{
u64 res;
@@ -131,45 +131,33 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
return res;
}
-static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
{
- const struct vdso_data *ret;
+ const struct vdso_time_data *ret;
/*
- * This simply puts &_vdso_data into ret. The reason why we don't use
- * `ret = _vdso_data` is that the compiler tends to optimise this in a
- * very suboptimal way: instead of keeping &_vdso_data in a register,
- * it goes through a relocation almost every time _vdso_data must be
+ * This simply puts &_vdso_time_data into ret. The reason why we don't use
+ * `ret = _vdso_time_data` is that the compiler tends to optimise this in a
+ * very suboptimal way: instead of keeping &_vdso_time_data in a register,
+ * it goes through a relocation almost every time _vdso_time_data must be
* accessed (even in subfunctions). This is both time and space
* consuming: each relocation uses a word in the code section, and it
* has to be loaded at runtime.
*
* This trick hides the assignment from the compiler. Since it cannot
* track where the pointer comes from, it will only use one relocation
- * where __arch_get_vdso_data() is called, and then keep the result in
- * a register.
+ * where __aarch64_get_vdso_u_time_data() is called, and then keep the
+ * result in a register.
*/
- asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data));
+ asm volatile("mov %0, %1" : "=r"(ret) : "r"(&vdso_u_time_data));
return ret;
}
+#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data
-#ifdef CONFIG_TIME_NS
-static __always_inline
-const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
-{
- const struct vdso_data *ret;
-
- /* See __arch_get_vdso_data(). */
- asm volatile("mov %0, %1" : "=r"(ret) : "r"(_timens_data));
-
- return ret;
-}
-#endif
-
-static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
+static inline bool vdso_clocksource_ok(const struct vdso_clock *vc)
{
- return vd->clock_mode == VDSO_CLOCKMODE_ARCHTIMER;
+ return vc->clock_mode == VDSO_CLOCKMODE_ARCHTIMER;
}
#define vdso_clocksource_ok vdso_clocksource_ok
diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h
index 342f807e2044..a2197da1951b 100644
--- a/arch/arm64/include/asm/vdso/getrandom.h
+++ b/arch/arm64/include/asm/vdso/getrandom.h
@@ -33,18 +33,6 @@ static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, uns
return ret;
}
-static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
-{
- /*
- * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace
- * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_
- * PAGE_OFFSET points to the real VVAR page.
- */
- if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS)
- return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * (1UL << CONFIG_PAGE_SHIFT);
- return &_vdso_rng_data;
-}
-
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index 764d13e2916c..da1ab8759592 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -8,6 +8,7 @@
#ifndef __ASSEMBLY__
#include <asm/alternative.h>
+#include <asm/arch_timer.h>
#include <asm/barrier.h>
#include <asm/unistd.h>
#include <asm/sysreg.h>
@@ -67,10 +68,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
}
static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
- const struct vdso_data *vd)
+ const struct vdso_time_data *vd)
{
- u64 res;
-
/*
* Core checks for mode already, so this raced against a concurrent
* update. Return something. Core will do another round and then
@@ -79,39 +78,21 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
if (clock_mode == VDSO_CLOCKMODE_NONE)
return 0;
- /*
- * If FEAT_ECV is available, use the self-synchronizing counter.
- * Otherwise the isb is required to prevent that the counter value
- * is speculated.
- */
- asm volatile(
- ALTERNATIVE("isb\n"
- "mrs %0, cntvct_el0",
- "nop\n"
- __mrs_s("%0", SYS_CNTVCTSS_EL0),
- ARM64_HAS_ECV)
- : "=r" (res)
- :
- : "memory");
-
- arch_counter_enforce_ordering(res);
-
- return res;
+ return __arch_counter_get_cntvct();
}
-static __always_inline
-const struct vdso_data *__arch_get_vdso_data(void)
+#if IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB)
+static __always_inline const struct vdso_time_data *__arch_get_vdso_u_time_data(void)
{
- return _vdso_data;
-}
+ const struct vdso_time_data *ret = &vdso_u_time_data;
-#ifdef CONFIG_TIME_NS
-static __always_inline
-const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
-{
- return _timens_data;
+ /* Work around invalid absolute relocations */
+ OPTIMIZER_HIDE_VAR(ret);
+
+ return ret;
}
-#endif
+#define __arch_get_vdso_u_time_data __arch_get_vdso_u_time_data
+#endif /* IS_ENABLED(CONFIG_CC_IS_GCC) && IS_ENABLED(CONFIG_PAGE_SIZE_64KB) */
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
index eea51946d45a..de58951b8df6 100644
--- a/arch/arm64/include/asm/vdso/vsyscall.h
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -2,44 +2,21 @@
#ifndef __ASM_VDSO_VSYSCALL_H
#define __ASM_VDSO_VSYSCALL_H
-#define __VDSO_RND_DATA_OFFSET 480
-
#ifndef __ASSEMBLY__
#include <vdso/datapage.h>
-enum vvar_pages {
- VVAR_DATA_PAGE_OFFSET,
- VVAR_TIMENS_PAGE_OFFSET,
- VVAR_NR_PAGES,
-};
-
#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
-extern struct vdso_data *vdso_data;
/*
* Update the vDSO data page to keep in sync with kernel timekeeping.
*/
static __always_inline
-struct vdso_data *__arm64_get_k_vdso_data(void)
-{
- return vdso_data;
-}
-#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
-
-static __always_inline
-struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void)
-{
- return (void *)vdso_data + __VDSO_RND_DATA_OFFSET;
-}
-#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data
-
-static __always_inline
-void __arm64_update_vsyscall(struct vdso_data *vdata)
+void __arm64_update_vsyscall(struct vdso_time_data *vdata)
{
- vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
- vdata[CS_RAW].mask = VDSO_PRECISION_MASK;
+ vdata->clock_data[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
+ vdata->clock_data[CS_RAW].mask = VDSO_PRECISION_MASK;
}
#define __arch_update_vsyscall __arm64_update_vsyscall
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index ebf4a9f943ed..aa280f356b96 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -67,7 +67,8 @@
* __boot_cpu_mode records what mode CPUs were booted in.
* A correctly-implemented bootloader must start all CPUs in the same mode:
* In this case, both 32bit halves of __boot_cpu_mode will contain the
- * same value (either 0 if booted in EL1, BOOT_CPU_MODE_EL2 if booted in EL2).
+ * same value (either BOOT_CPU_MODE_EL1 if booted in EL1, BOOT_CPU_MODE_EL2 if
+ * booted in EL2).
*
* Should the bootloader fail to do this, the two values will be different.
* This allows the kernel to flag an error when the secondaries have come up.
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index 38fafffe699f..12f534e8f3ed 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -23,6 +23,51 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
+#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
+static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr,
+ unsigned long end, u64 pfn,
+ unsigned int max_page_shift)
+{
+ /*
+ * If the block is at least CONT_PTE_SIZE in size, and is naturally
+ * aligned in both virtual and physical space, then we can pte-map the
+ * block using the PTE_CONT bit for more efficient use of the TLB.
+ */
+ if (max_page_shift < CONT_PTE_SHIFT)
+ return PAGE_SIZE;
+
+ if (end - addr < CONT_PTE_SIZE)
+ return PAGE_SIZE;
+
+ if (!IS_ALIGNED(addr, CONT_PTE_SIZE))
+ return PAGE_SIZE;
+
+ if (!IS_ALIGNED(PFN_PHYS(pfn), CONT_PTE_SIZE))
+ return PAGE_SIZE;
+
+ return CONT_PTE_SIZE;
+}
+
+#define arch_vmap_pte_range_unmap_size arch_vmap_pte_range_unmap_size
+static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr,
+ pte_t *ptep)
+{
+ /*
+ * The caller handles alignment so it's sufficient just to check
+ * PTE_CONT.
+ */
+ return pte_valid_cont(__ptep_get(ptep)) ? CONT_PTE_SIZE : PAGE_SIZE;
+}
+
+#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
+static inline int arch_vmap_pte_supported_shift(unsigned long size)
+{
+ if (size >= CONT_PTE_SIZE)
+ return CONT_PTE_SHIFT;
+
+ return PAGE_SHIFT;
+}
+
#endif
#define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
index 4f9bbd4d6c26..6f556e993644 100644
--- a/arch/arm64/include/asm/vncr_mapping.h
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -35,6 +35,8 @@
#define VNCR_CNTP_CTL_EL0 0x180
#define VNCR_SCXTNUM_EL1 0x188
#define VNCR_TFSR_EL1 0x190
+#define VNCR_HDFGRTR2_EL2 0x1A0
+#define VNCR_HDFGWTR2_EL2 0x1B0
#define VNCR_HFGRTR_EL2 0x1B8
#define VNCR_HFGWTR_EL2 0x1C0
#define VNCR_HFGITR_EL2 0x1C8
@@ -52,6 +54,9 @@
#define VNCR_PIRE0_EL1 0x290
#define VNCR_PIR_EL1 0x2A0
#define VNCR_POR_EL1 0x2A8
+#define VNCR_HFGRTR2_EL2 0x2C0
+#define VNCR_HFGWTR2_EL2 0x2C8
+#define VNCR_HFGITR2_EL2 0x310
#define VNCR_ICH_LR0_EL2 0x400
#define VNCR_ICH_LR1_EL2 0x408
#define VNCR_ICH_LR2_EL2 0x410
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 48d46b768eae..705a7afa8e58 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -56,6 +56,21 @@
#define HWCAP_PACA (1 << 30)
#define HWCAP_PACG (1UL << 31)
#define HWCAP_GCS (1UL << 32)
+#define HWCAP_CMPBR (1UL << 33)
+#define HWCAP_FPRCVT (1UL << 34)
+#define HWCAP_F8MM8 (1UL << 35)
+#define HWCAP_F8MM4 (1UL << 36)
+#define HWCAP_SVE_F16MM (1UL << 37)
+#define HWCAP_SVE_ELTPERM (1UL << 38)
+#define HWCAP_SVE_AES2 (1UL << 39)
+#define HWCAP_SVE_BFSCALE (1UL << 40)
+#define HWCAP_SVE2P2 (1UL << 41)
+#define HWCAP_SME2P2 (1UL << 42)
+#define HWCAP_SME_SBITPERM (1UL << 43)
+#define HWCAP_SME_AES (1UL << 44)
+#define HWCAP_SME_SFEXPA (1UL << 45)
+#define HWCAP_SME_STMOP (1UL << 46)
+#define HWCAP_SME_SMOP4 (1UL << 47)
/*
* HWCAP2 flags - for AT_HWCAP2
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 66736ff04011..ed5f3892674c 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -43,9 +43,6 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_DIRTY_LOG_PAGE_OFFSET 64
-#define KVM_REG_SIZE(id) \
- (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
struct kvm_regs {
struct user_pt_regs regs; /* sp = sp_el0 */
@@ -108,6 +105,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */
+#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */
struct kvm_vcpu_init {
__u32 target;
@@ -374,6 +372,7 @@ enum {
#endif
};
+/* Vendor hyper call function numbers 0-63 */
#define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2)
enum {
@@ -384,6 +383,17 @@ enum {
#endif
};
+/* Vendor hyper call function numbers 64-127 */
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2 KVM_REG_ARM_FW_FEAT_BMAP_REG(3)
+
+enum {
+ KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER = 0,
+ KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS = 1,
+#ifdef __KERNEL__
+ KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_COUNT,
+#endif
+};
+
/* Device Control API on vm fd */
#define KVM_ARM_VM_SMCCC_CTRL 0
#define KVM_ARM_VM_SMCCC_FILTER 0
@@ -406,6 +416,7 @@ enum {
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
@@ -420,10 +431,11 @@ enum {
/* Device Control API on vcpu fd */
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
-#define KVM_ARM_VCPU_PMU_V3_IRQ 0
-#define KVM_ARM_VCPU_PMU_V3_INIT 1
-#define KVM_ARM_VCPU_PMU_V3_FILTER 2
-#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
+#define KVM_ARM_VCPU_PMU_V3_IRQ 0
+#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_PMU_V3_FILTER 2
+#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3
+#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1